diff --git a/FMScraper/items.py b/FMScraper/items.py index e0cd611..a824ae0 100644 --- a/FMScraper/items.py +++ b/FMScraper/items.py @@ -22,6 +22,10 @@ def clean_html_script(value): description_full_post = description_re.group(1) + description_re.group(3) yield description_full_post +def clean_file_url(value): + url = re.match("(.+\.mp3)", value) + yield url.group(1) + class show_Item(scrapy.Item): name = scrapy.Field() url_page = scrapy.Field() @@ -38,10 +42,10 @@ class episode_Item(scrapy.Item): input_processor = MapCompose(clean_text) ) description_full = scrapy.Field( + url_file = scrapy.Field( default = 'null', - input_processor = MapCompose(clean_html_script) + input_processor = MapCompose(clean_file_url) ) - url_file = scrapy.Field() url_page = scrapy.Field() date_diffusion = scrapy.Field()