Compare commits
No commits in common. "recuperation-emissions" and "master" have entirely different histories.
recuperati
...
master
|
|
@ -1,33 +0,0 @@
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
import scrapy
|
|
||||||
from scrapy.loader import ItemLoader
|
|
||||||
from FMScraper.items import show_Item
|
|
||||||
|
|
||||||
|
|
||||||
class GetEmissionsSpider(scrapy.Spider):
|
|
||||||
name = 'get_emissions'
|
|
||||||
allowed_domains = ['francemusique.fr']
|
|
||||||
start_urls = ['http://francemusique.fr/emissions']
|
|
||||||
|
|
||||||
def parse(self, response):
|
|
||||||
|
|
||||||
for sel in response.xpath('//h2[@class="emission-title"]/a/@href'):
|
|
||||||
url_emission = response.urljoin(sel.extract())
|
|
||||||
yield scrapy.Request(url_emission, callback = self.parse_emission)
|
|
||||||
|
|
||||||
|
|
||||||
def parse_emission(self, response):
|
|
||||||
page_emission = scrapy.Selector(response)
|
|
||||||
nom_emission = page_emission.xpath('//h1[@class="cover-emission-content-link-title"]/text()').extract_first()
|
|
||||||
url_emission = response.url
|
|
||||||
url_rss = page_emission.xpath('//div[@class="podcast-container rss"]/a/@href').extract_first()
|
|
||||||
genre = page_emission.xpath('//span[@class="cover-emission-content-information-wrapper-more-genre"]/text()').extract_first()
|
|
||||||
producteurs = page_emission.xpath('//div[@class="cover-emission-content-information-wrapper-producers"]/a/@title').extract()
|
|
||||||
yield { 'url_emission' : url_emission,
|
|
||||||
'nom_emission' : nom_emission,
|
|
||||||
'url_rss' : url_rss,
|
|
||||||
'genre_emission' : genre,
|
|
||||||
'producteurs_emission' : producteurs
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
Loading…
Reference in New Issue