21 lines
645 B
Python
21 lines
645 B
Python
|
|
# -*- coding: utf-8 -*-
|
||
|
|
import scrapy
|
||
|
|
|
||
|
|
|
||
|
|
class GrabvideodataSpider(scrapy.Spider):
|
||
|
|
name = 'grabVideoData'
|
||
|
|
allowed_domains = ['http://www.cinematheque.fr/']
|
||
|
|
start_urls = ['http://www.cinematheque.fr/video/1219.html']
|
||
|
|
|
||
|
|
def parse(self, response):
|
||
|
|
for page in response.css("div#content"):
|
||
|
|
yield {
|
||
|
|
'titre' : page.css('h1::text').extract_first(),
|
||
|
|
'sous-titre' : page.css('h1 span::text').extract_first(),
|
||
|
|
'description' : page.css('.biographies p').extract(),
|
||
|
|
'videoSrcUrl' : page.css('iframe::attr(src)').extract_first(),
|
||
|
|
'tags' : page.css('.tag::text').extract()
|
||
|
|
}
|
||
|
|
|
||
|
|
|