From e6bd517660aea58d4f05b3a6f3632dac704944e8 Mon Sep 17 00:00:00 2001 From: Yohann Dedy Date: Sun, 1 Dec 2019 22:01:20 +0100 Subject: [PATCH] Nettoyage url fichier --- FMScraper/items.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/FMScraper/items.py b/FMScraper/items.py index e0cd611..a824ae0 100644 --- a/FMScraper/items.py +++ b/FMScraper/items.py @@ -22,6 +22,10 @@ def clean_html_script(value): description_full_post = description_re.group(1) + description_re.group(3) yield description_full_post +def clean_file_url(value): + url = re.match("(.+\.mp3)", value) + yield url.group(1) + class show_Item(scrapy.Item): name = scrapy.Field() url_page = scrapy.Field() @@ -38,10 +42,10 @@ class episode_Item(scrapy.Item): input_processor = MapCompose(clean_text) ) description_full = scrapy.Field( + url_file = scrapy.Field( default = 'null', - input_processor = MapCompose(clean_html_script) + input_processor = MapCompose(clean_file_url) ) - url_file = scrapy.Field() url_page = scrapy.Field() date_diffusion = scrapy.Field()