From b0f57b9a378a1ba84093552adfe5a93ae43488f7 Mon Sep 17 00:00:00 2001
From: Yohann Dedy <yohann@dedy.fr>
Date: Sat, 12 May 2018 01:21:56 +0200
Subject: [PATCH] =?UTF-8?q?Scraping=20des=20liens=20et=20r=C3=A9cup=C3=A9r?=
 =?UTF-8?q?ation=20des=20informations=20dans=20le=20m=C3=AAme=20spider?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 CinemScraper/spiders/grabVideoData.py |  26 +++++++++++++++-----------
 CinemScraper/spiders/listeVideos.py   |  18 ------------------
 CinemScraper/spiders/listeVideos.pyc  | Bin 929 -> 0 bytes
 3 files changed, 15 insertions(+), 29 deletions(-)
 delete mode 100644 CinemScraper/spiders/listeVideos.py
 delete mode 100644 CinemScraper/spiders/listeVideos.pyc
diff --git a/CinemScraper/spiders/grabVideoData.py b/CinemScraper/spiders/grabVideoData.py
index 05a1a3c..95a2c27 100644
--- a/CinemScraper/spiders/grabVideoData.py
+++ b/CinemScraper/spiders/grabVideoData.py
@@ -4,17 +4,21 @@ import scrapy
 
 class GrabvideodataSpider(scrapy.Spider):
     name = 'grabVideoData'
-    allowed_domains = ['http://www.cinematheque.fr/']
-    start_urls = ['http://www.cinematheque.fr/video/1219.html']
+    allowed_domains = ['cinematheque.fr']
+    start_urls = ['http://www.cinematheque.fr/decouvrir.html']
 
     def parse(self, response):
+        for lien in response.xpath('//a/@href[contains(.,"video")]/../..'):
+            url = response.urljoin(lien.css('a::attr(href)').extract_first())
+            yield scrapy.Request(url, callback = self.parse_dir_content)
+
+    def parse_dir_content(self, response):
     	for page in response.css("div#content"):
-    		yield {
-    			'titre' : page.css('h1::text').extract_first(),
-    			'sous-titre' : page.css('h1 span::text').extract_first(),
-    			'description' : page.css('.biographies p').extract(),
-    			'videoSrcUrl' : page.css('iframe::attr(src)').extract_first(),
-    			'tags'		  : page.css('.tag::text').extract()
-    		}
-    	
-        
+            yield {
+    		'titre' : page.css('h1::text').extract_first(),
+    		'sous-titre' : page.css('h1 span::text').extract_first(),
+    		'description' : page.css('.biographies p').extract(),
+    		'videoSrcUrl' : page.css('iframe::attr(src)').extract_first(),
+    		'tags'		  : page.css('.tag::text').extract()
+    	    }
+
diff --git a/CinemScraper/spiders/listeVideos.py b/CinemScraper/spiders/listeVideos.py
deleted file mode 100644
index e25581b..0000000
--- a/CinemScraper/spiders/listeVideos.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# -*- coding: utf-8 -*-
-import scrapy
-
-
-class ListevideosSpider(scrapy.Spider):
-    name = 'listeVideos'
-    allowed_domains = ['http://www.cinematheque.fr/']
-    start_urls = ['http://www.cinematheque.fr/decouvrir.html']
-
-    def parse(self, response):
-    	for lien in response.xpath('//a/@href[contains(.,"video")]/../..'):
-    		yield {
-    			'adresse' : lien.css('a::attr(href)').extract(),
-    			'dateUpload' : lien.css('::attr(data-sort)').extract(),
-
-    		}
-    	
-        
diff --git a/CinemScraper/spiders/listeVideos.pyc b/CinemScraper/spiders/listeVideos.pyc
deleted file mode 100644
index ca00c6ae0b2e8cb194058c8b16ab2e82e23edeaa..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 929
zcmcIi!EVz)5S_Kt0EJ3$=^0fV90|O)B1H(b5F`$$NHGLqR9P!~LmWA_yEASQshrAR
z@mX+(nRNu^4y*Osnc4B%H?tdlKiL1a{gLLd8zHYJ{Nae9;HjVjNZAO$0xT8G6%-+S
z1-J(Im<E!OL<7z)fATn^2W-A6k?dtz*anl97x$ZA@$L)$@F4>erp(6ZX+A4RLpg}a
z(*aDANYv0>hDr!$p3o~>5Vd2i*Xwnhm$h9PTG)%u#&f3&n>XF1D_y*xRh0+)x6p_}
zQGst6Hm~7I!P1v^tzdHg6utyd25=Qfi7bJmOGeHJ;dmDv+=%YM@l0LZhYzeiiH6L1
zhL0GvHu_}Y?EF*S)MUyUqxkWWZ^hB*T*om_Jm5q#9vdPTN$!YlNu+QV(V{ePw^<l6
zPq1+`qJ56CYu~q3V~V~rdmP)eTsujHIilyW8*(P6K1Iq#Tg~Y<8_eG}HCmErRoXh@
z6+EC^ik>x9x3Z`wr|(Yn<kd{Sl;$S6Gp%)hJLvBf^^Kc}+bxNt);P4H7v)e;hbjyX
zRU}(pMwZoPWwXrJlx3@?=qhnVmF)O9l5v!EuEMEM{ZaZa<xZIyL>I~p4%INY6ZCx(
dAEoXsiP^tmpZ<T8_{KczTk%}pEDGEUe*p)1+h+g(