Compare commits
1 Commits
master
...
recuperati
| Author | SHA1 | Date |
|---|---|---|
|
|
20f0f0d3be |
|
|
@ -0,0 +1,33 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import scrapy
|
||||
from scrapy.loader import ItemLoader
|
||||
from FMScraper.items import show_Item
|
||||
|
||||
|
||||
class GetEmissionsSpider(scrapy.Spider):
|
||||
name = 'get_emissions'
|
||||
allowed_domains = ['francemusique.fr']
|
||||
start_urls = ['http://francemusique.fr/emissions']
|
||||
|
||||
def parse(self, response):
|
||||
|
||||
for sel in response.xpath('//h2[@class="emission-title"]/a/@href'):
|
||||
url_emission = response.urljoin(sel.extract())
|
||||
yield scrapy.Request(url_emission, callback = self.parse_emission)
|
||||
|
||||
|
||||
def parse_emission(self, response):
|
||||
page_emission = scrapy.Selector(response)
|
||||
nom_emission = page_emission.xpath('//h1[@class="cover-emission-content-link-title"]/text()').extract_first()
|
||||
url_emission = response.url
|
||||
url_rss = page_emission.xpath('//div[@class="podcast-container rss"]/a/@href').extract_first()
|
||||
genre = page_emission.xpath('//span[@class="cover-emission-content-information-wrapper-more-genre"]/text()').extract_first()
|
||||
producteurs = page_emission.xpath('//div[@class="cover-emission-content-information-wrapper-producers"]/a/@title').extract()
|
||||
yield { 'url_emission' : url_emission,
|
||||
'nom_emission' : nom_emission,
|
||||
'url_rss' : url_rss,
|
||||
'genre_emission' : genre,
|
||||
'producteurs_emission' : producteurs
|
||||
}
|
||||
|
||||
|
||||
Loading…
Reference in New Issue