Ajout du champ id_video
parent
53828ccbce
commit
6a8061a75f
|
|
@ -45,6 +45,7 @@ class video(scrapy.Item):
|
||||||
input_processor = MapCompose(clean_url)
|
input_processor = MapCompose(clean_url)
|
||||||
)
|
)
|
||||||
urlCF = scrapy.Field()
|
urlCF = scrapy.Field()
|
||||||
|
id_video = scrapy.Field()
|
||||||
date_event = scrapy.Field(
|
date_event = scrapy.Field(
|
||||||
input_processor = MapCompose(clean_text, format_date)
|
input_processor = MapCompose(clean_text, format_date)
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
import scrapy
|
import scrapy
|
||||||
|
import re
|
||||||
from scrapy.loader import ItemLoader
|
from scrapy.loader import ItemLoader
|
||||||
from scrapy.loader.processors import Join, MapCompose, TakeFirst
|
from scrapy.loader.processors import Join, MapCompose, TakeFirst
|
||||||
from w3lib.html import remove_tags
|
from w3lib.html import remove_tags
|
||||||
|
|
@ -17,7 +18,7 @@ class GrabvideodataSpider(scrapy.Spider):
|
||||||
'date_event' : '//p[@class="date"]/text()',
|
'date_event' : '//p[@class="date"]/text()',
|
||||||
'urlVideo' : '//iframe/@src',
|
'urlVideo' : '//iframe/@src',
|
||||||
'description' : '//div[@class="description"]/p',
|
'description' : '//div[@class="description"]/p',
|
||||||
'biographies' : '//div[@class="biographies"]',
|
'biographies' : '//div[@class="biographies"]/p',
|
||||||
'tags' : '//span[contains(@class, "tag")]/text()'
|
'tags' : '//span[contains(@class, "tag")]/text()'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -41,6 +42,8 @@ class GrabvideodataSpider(scrapy.Spider):
|
||||||
for field, xpath in self.item_fields.items():
|
for field, xpath in self.item_fields.items():
|
||||||
loader.add_xpath(field, xpath)
|
loader.add_xpath(field, xpath)
|
||||||
loader.add_value('urlCF', response.url)
|
loader.add_value('urlCF', response.url)
|
||||||
|
extract_vid_id = re.compile(r'/(\d+).html')
|
||||||
|
loader.add_value('id_video', extract_vid_id.findall(response.url)[0])
|
||||||
yield loader.load_item()
|
yield loader.load_item()
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue