CFVideoScraper/CinemScraper/spiders/grabVideoData.py

# -*- coding: utf-8 -*-
import scrapy


class GrabvideodataSpider(scrapy.Spider):
    name = 'grabVideoData'
    allowed_domains = ['http://www.cinematheque.fr/']
    start_urls = ['http://www.cinematheque.fr/video/1219.html']

    def parse(self, response):
    	for page in response.css("div#content"):
    		yield {
    			'titre' : page.css('h1::text').extract_first(),
    			'sous-titre' : page.css('h1 span::text').extract_first(),
    			'description' : page.css('.biographies p').extract(),
    			'videoSrcUrl' : page.css('iframe::attr(src)').extract_first(),
    			'tags'		  : page.css('.tag::text').extract()
    		}
Premier commit 2018-05-10 17:31:59 +00:00			`# -- coding: utf-8 --`
			`import scrapy`


			`class GrabvideodataSpider(scrapy.Spider):`
			`name = 'grabVideoData'`
			`allowed_domains = ['http://www.cinematheque.fr/']`
			`start_urls = ['http://www.cinematheque.fr/video/1219.html']`

			`def parse(self, response):`
			`for page in response.css("div#content"):`
			`yield {`
			`'titre' : page.css('h1::text').extract_first(),`
			`'sous-titre' : page.css('h1 span::text').extract_first(),`
			`'description' : page.css('.biographies p').extract(),`
			`'videoSrcUrl' : page.css('iframe::attr(src)').extract_first(),`
			`'tags' : page.css('.tag::text').extract()`
			`}`