diff --git a/FMScraper/spiders/get_episodes.py b/FMScraper/spiders/get_episodes.py index 527ca44..781e52c 100644 --- a/FMScraper/spiders/get_episodes.py +++ b/FMScraper/spiders/get_episodes.py @@ -20,8 +20,8 @@ class GetEpisodesSpider(scrapy.Spider): } def parse(self, response): - for sel in response.xpath('//section[@class="emission-diffusions-list"]'): - url_episode = response.urljoin(sel.xpath('.//a[@class="preview-list-element-link"]/@href').extract_first()) + for sel in response.xpath('//section[@class="emission-diffusions-list"]//a[@class="preview-list-element-link"]/@href'): + url_episode = response.urljoin(sel.extract()) yield scrapy.Request(url_episode, callback = self.parse_episode) def parse_episode(self, response): @@ -33,4 +33,6 @@ class GetEpisodesSpider(scrapy.Spider): for field, xpath in self.episode_fields.items(): loader.add_xpath(field, xpath) + loader.add_value('url_page', response.url) + yield loader.load_item()