From 557c76c2ca4ba322dc22b9053952becb66fd9ebc Mon Sep 17 00:00:00 2001 From: Yohann Dedy Date: Tue, 26 Nov 2019 23:33:46 +0100 Subject: [PATCH] Base fonctionnelle --- FMScraper/spiders/get_episodes.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/FMScraper/spiders/get_episodes.py b/FMScraper/spiders/get_episodes.py index 527ca44..781e52c 100644 --- a/FMScraper/spiders/get_episodes.py +++ b/FMScraper/spiders/get_episodes.py @@ -20,8 +20,8 @@ class GetEpisodesSpider(scrapy.Spider): } def parse(self, response): - for sel in response.xpath('//section[@class="emission-diffusions-list"]'): - url_episode = response.urljoin(sel.xpath('.//a[@class="preview-list-element-link"]/@href').extract_first()) + for sel in response.xpath('//section[@class="emission-diffusions-list"]//a[@class="preview-list-element-link"]/@href'): + url_episode = response.urljoin(sel.extract()) yield scrapy.Request(url_episode, callback = self.parse_episode) def parse_episode(self, response): @@ -33,4 +33,6 @@ class GetEpisodesSpider(scrapy.Spider): for field, xpath in self.episode_fields.items(): loader.add_xpath(field, xpath) + loader.add_value('url_page', response.url) + yield loader.load_item()