Nettoyage url fichier

master
Yohann Dedy 2019-12-01 22:01:20 +01:00
parent 1b9ac23e85
commit e6bd517660
1 changed files with 6 additions and 2 deletions

View File

@ -22,6 +22,10 @@ def clean_html_script(value):
description_full_post = description_re.group(1) + description_re.group(3)
yield description_full_post
def clean_file_url(value):
url = re.match("(.+\.mp3)", value)
yield url.group(1)
class show_Item(scrapy.Item):
name = scrapy.Field()
url_page = scrapy.Field()
@ -38,10 +42,10 @@ class episode_Item(scrapy.Item):
input_processor = MapCompose(clean_text)
)
description_full = scrapy.Field(
url_file = scrapy.Field(
default = 'null',
input_processor = MapCompose(clean_html_script)
input_processor = MapCompose(clean_file_url)
)
url_file = scrapy.Field()
url_page = scrapy.Field()
date_diffusion = scrapy.Field()