From befa141eb26f9d672ef4f1c8adad4dbc97581489 Mon Sep 17 00:00:00 2001
From: Yohann Dedy
','\\n') + yield res + +def clean_p_tags(value): + text = value.replace('
','') + text = text.replace('
','') + yield text + def clean_url(value): yield value.split('?')[0] @@ -39,8 +54,13 @@ class video(scrapy.Item): input_processor = MapCompose(clean_text), output_processor = Join() ) - secondary_title = scrapy.Field() - description = scrapy.Field() + secondary_title = scrapy.Field( + input_processor = MapCompose(clean_text) + ) + description = scrapy.Field( + input_processor = MapCompose(line_breaks, clean_p_tags), + output_processor = Join('\n') + ) urlVideo = scrapy.Field( input_processor = MapCompose(clean_url) ) @@ -49,6 +69,12 @@ class video(scrapy.Item): date_event = scrapy.Field( input_processor = MapCompose(clean_text, format_date) ) - tags = scrapy.Field() - biographies = scrapy.Field() + tags = scrapy.Field( + input_processor = MapCompose(tags_to_array), + output_processor = Join('\n') + ) + biographies = scrapy.Field( + input_processor = MapCompose(line_breaks, clean_p_tags), + output_processor = Join('\n') + ) pass