Ismael Mejia
11 years ago
12 changed files with 158 additions and 20 deletions
Split View
Diff Options
-
4devscripts/gh-pages/add-version.py
-
2devscripts/gh-pages/update-sites.py
-
4devscripts/youtube_genalgo.py
-
11test/test_all_urls.py
-
2youtube_dl/FileDownloader.py
-
5youtube_dl/extractor/__init__.py
-
2youtube_dl/extractor/canalc2.py
-
77youtube_dl/extractor/francetv.py
-
47youtube_dl/extractor/slideshare.py
-
16youtube_dl/extractor/sohu.py
-
6youtube_dl/extractor/youtube.py
-
2youtube_dl/version.py
@ -0,0 +1,77 @@ |
|||
# encoding: utf-8 |
|||
import re |
|||
import xml.etree.ElementTree |
|||
|
|||
from .common import InfoExtractor |
|||
from ..utils import ( |
|||
compat_urlparse, |
|||
) |
|||
|
|||
|
|||
class FranceTVBaseInfoExtractor(InfoExtractor): |
|||
def _extract_video(self, video_id): |
|||
xml_desc = self._download_webpage( |
|||
'http://www.francetvinfo.fr/appftv/webservices/video/' |
|||
'getInfosOeuvre.php?id-diffusion=' |
|||
+ video_id, video_id, 'Downloading XML config') |
|||
info = xml.etree.ElementTree.fromstring(xml_desc.encode('utf-8')) |
|||
|
|||
manifest_url = info.find('videos/video/url').text |
|||
video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8') |
|||
video_url = video_url.replace('/z/', '/i/') |
|||
thumbnail_path = info.find('image').text |
|||
|
|||
return {'id': video_id, |
|||
'ext': 'mp4', |
|||
'url': video_url, |
|||
'title': info.find('titre').text, |
|||
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path), |
|||
'description': info.find('synopsis').text, |
|||
} |
|||
|
|||
|
|||
class PluzzIE(FranceTVBaseInfoExtractor): |
|||
IE_NAME = u'pluzz.francetv.fr' |
|||
_VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html' |
|||
|
|||
_TEST = { |
|||
u'url': u'http://pluzz.francetv.fr/videos/allo_rufo_saison5_,88439064.html', |
|||
u'file': u'88439064.mp4', |
|||
u'info_dict': { |
|||
u'title': u'Allô Rufo', |
|||
u'description': u'md5:d909f1ebdf963814b65772aea250400e', |
|||
}, |
|||
u'params': { |
|||
u'skip_download': True, |
|||
}, |
|||
} |
|||
|
|||
def _real_extract(self, url): |
|||
title = re.match(self._VALID_URL, url).group(1) |
|||
webpage = self._download_webpage(url, title) |
|||
video_id = self._search_regex( |
|||
r'data-diffusion="(\d+)"', webpage, 'ID') |
|||
return self._extract_video(video_id) |
|||
|
|||
|
|||
class FranceTvInfoIE(FranceTVBaseInfoExtractor): |
|||
IE_NAME = u'francetvinfo.fr' |
|||
_VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+).html' |
|||
|
|||
_TEST = { |
|||
u'url': u'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html', |
|||
u'file': u'84981923.mp4', |
|||
u'info_dict': { |
|||
u'title': u'Soir 3', |
|||
}, |
|||
u'params': { |
|||
u'skip_download': True, |
|||
}, |
|||
} |
|||
|
|||
def _real_extract(self, url): |
|||
mobj = re.match(self._VALID_URL, url) |
|||
page_title = mobj.group('title') |
|||
webpage = self._download_webpage(url, page_title) |
|||
video_id = self._search_regex(r'id-video=(\d+?)"', webpage, u'video id') |
|||
return self._extract_video(video_id) |
@ -0,0 +1,47 @@ |
|||
import re |
|||
import json |
|||
|
|||
from .common import InfoExtractor |
|||
from ..utils import ( |
|||
compat_urlparse, |
|||
ExtractorError, |
|||
) |
|||
|
|||
|
|||
class SlideshareIE(InfoExtractor): |
|||
_VALID_URL = r'https?://www\.slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)' |
|||
|
|||
_TEST = { |
|||
u'url': u'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity', |
|||
u'file': u'25665706.mp4', |
|||
u'info_dict': { |
|||
u'title': u'Managing Scale and Complexity', |
|||
u'description': u'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix', |
|||
}, |
|||
} |
|||
|
|||
def _real_extract(self, url): |
|||
mobj = re.match(self._VALID_URL, url) |
|||
page_title = mobj.group('title') |
|||
webpage = self._download_webpage(url, page_title) |
|||
slideshare_obj = self._search_regex( |
|||
r'var slideshare_object = ({.*?}); var user_info =', |
|||
webpage, u'slideshare object') |
|||
info = json.loads(slideshare_obj) |
|||
if info['slideshow']['type'] != u'video': |
|||
raise ExtractorError(u'Webpage type is "%s": only video extraction is supported for Slideshare' % info['slideshow']['type'], expected=True) |
|||
|
|||
doc = info['doc'] |
|||
bucket = info['jsplayer']['video_bucket'] |
|||
ext = info['jsplayer']['video_extension'] |
|||
video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext) |
|||
|
|||
return { |
|||
'_type': 'video', |
|||
'id': info['slideshow']['id'], |
|||
'title': info['slideshow']['title'], |
|||
'ext': ext, |
|||
'url': video_url, |
|||
'thumbnail': info['slideshow']['pin_image_url'], |
|||
'description': self._og_search_description(webpage), |
|||
} |
@ -1,2 +1,2 @@ |
|||
|
|||
__version__ = '2013.09.06.1' |
|||
__version__ = '2013.11.09' |
Write
Preview
Loading…
Cancel
Save