Ajout du champ id_video

master
Yohann Dedy 2019-08-28 03:24:20 +02:00
parent 53828ccbce
commit 6a8061a75f
2 changed files with 5 additions and 1 deletions

View File

@ -45,6 +45,7 @@ class video(scrapy.Item):
input_processor = MapCompose(clean_url) input_processor = MapCompose(clean_url)
) )
urlCF = scrapy.Field() urlCF = scrapy.Field()
id_video = scrapy.Field()
date_event = scrapy.Field( date_event = scrapy.Field(
input_processor = MapCompose(clean_text, format_date) input_processor = MapCompose(clean_text, format_date)
) )

View File

@ -1,5 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import scrapy import scrapy
import re
from scrapy.loader import ItemLoader from scrapy.loader import ItemLoader
from scrapy.loader.processors import Join, MapCompose, TakeFirst from scrapy.loader.processors import Join, MapCompose, TakeFirst
from w3lib.html import remove_tags from w3lib.html import remove_tags
@ -17,7 +18,7 @@ class GrabvideodataSpider(scrapy.Spider):
'date_event' : '//p[@class="date"]/text()', 'date_event' : '//p[@class="date"]/text()',
'urlVideo' : '//iframe/@src', 'urlVideo' : '//iframe/@src',
'description' : '//div[@class="description"]/p', 'description' : '//div[@class="description"]/p',
'biographies' : '//div[@class="biographies"]', 'biographies' : '//div[@class="biographies"]/p',
'tags' : '//span[contains(@class, "tag")]/text()' 'tags' : '//span[contains(@class, "tag")]/text()'
} }
@ -41,6 +42,8 @@ class GrabvideodataSpider(scrapy.Spider):
for field, xpath in self.item_fields.items(): for field, xpath in self.item_fields.items():
loader.add_xpath(field, xpath) loader.add_xpath(field, xpath)
loader.add_value('urlCF', response.url) loader.add_value('urlCF', response.url)
extract_vid_id = re.compile(r'/(\d+).html')
loader.add_value('id_video', extract_vid_id.findall(response.url)[0])
yield loader.load_item() yield loader.load_item()