From 1b9ac23e859a34bf5ff36a154418563a4b29f3ca Mon Sep 17 00:00:00 2001 From: Yohann Dedy Date: Sun, 1 Dec 2019 21:59:56 +0100 Subject: [PATCH] =?UTF-8?q?R=C3=A9cup=C3=A9ration=20infos=20=C3=A9mission?= =?UTF-8?q?=20avant=20=C3=A9pisode?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- FMScraper/spiders/get_episodes.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/FMScraper/spiders/get_episodes.py b/FMScraper/spiders/get_episodes.py index 6993255..2227a7d 100644 --- a/FMScraper/spiders/get_episodes.py +++ b/FMScraper/spiders/get_episodes.py @@ -24,23 +24,24 @@ class GetEpisodesSpider(scrapy.Spider): } def parse(self, response): -# page_emission = scrapy.Selector(response) -# show_loader = ItemLoader(item=show_Item(), selector=page_emission) -# show_loader.default_output_processor = Join() -# for field, xpath in self.show_fields.items(): -# show_loader.add_xpath(field, xpath) -# show_loader.add_value('url_page', response.url) + page_emission = scrapy.Selector(response) + show_loader = ItemLoader(item=show_Item(), selector=page_emission) + show_loader.default_output_processor = Join() + for field, xpath in self.show_fields.items(): + show_loader.add_xpath(field, xpath) + show_loader.add_value('url_page', response.url) -# yield show_loader.load_item() + yield show_loader.load_item() + yield scrapy.Request(url=response.url, callback = self.parse_episodes) + def parse_episodes(self, response): for sel in response.xpath('//section[@class="emission-diffusions-list"]//a[@class="preview-list-element-link"]/@href'): url_episode = response.urljoin(sel.extract()) next_page = response.xpath('//link[@rel="next"]/@href') yield scrapy.Request(url_episode, callback = self.parse_episode) if next_page: next_url = response.urljoin(next_page.extract_first()) - yield scrapy.Request(url=next_url) - + yield scrapy.Request(url=next_url,callback = self.parse_episodes) def parse_episode(self, response): page_episode = scrapy.Selector(response)