12 changed files with 158 additions and 20 deletions
Unified View
Diff Options
-
4devscripts/gh-pages/add-version.py
-
2devscripts/gh-pages/update-sites.py
-
4devscripts/youtube_genalgo.py
-
11test/test_all_urls.py
-
2youtube_dl/FileDownloader.py
-
5youtube_dl/extractor/__init__.py
-
2youtube_dl/extractor/canalc2.py
-
77youtube_dl/extractor/francetv.py
-
47youtube_dl/extractor/slideshare.py
-
16youtube_dl/extractor/sohu.py
-
6youtube_dl/extractor/youtube.py
-
2youtube_dl/version.py
@ -0,0 +1,77 @@ |
|||||
|
# encoding: utf-8 |
||||
|
import re |
||||
|
import xml.etree.ElementTree |
||||
|
|
||||
|
from .common import InfoExtractor |
||||
|
from ..utils import ( |
||||
|
compat_urlparse, |
||||
|
) |
||||
|
|
||||
|
|
||||
|
class FranceTVBaseInfoExtractor(InfoExtractor): |
||||
|
def _extract_video(self, video_id): |
||||
|
xml_desc = self._download_webpage( |
||||
|
'http://www.francetvinfo.fr/appftv/webservices/video/' |
||||
|
'getInfosOeuvre.php?id-diffusion=' |
||||
|
+ video_id, video_id, 'Downloading XML config') |
||||
|
info = xml.etree.ElementTree.fromstring(xml_desc.encode('utf-8')) |
||||
|
|
||||
|
manifest_url = info.find('videos/video/url').text |
||||
|
video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8') |
||||
|
video_url = video_url.replace('/z/', '/i/') |
||||
|
thumbnail_path = info.find('image').text |
||||
|
|
||||
|
return {'id': video_id, |
||||
|
'ext': 'mp4', |
||||
|
'url': video_url, |
||||
|
'title': info.find('titre').text, |
||||
|
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path), |
||||
|
'description': info.find('synopsis').text, |
||||
|
} |
||||
|
|
||||
|
|
||||
|
class PluzzIE(FranceTVBaseInfoExtractor): |
||||
|
IE_NAME = u'pluzz.francetv.fr' |
||||
|
_VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html' |
||||
|
|
||||
|
_TEST = { |
||||
|
u'url': u'http://pluzz.francetv.fr/videos/allo_rufo_saison5_,88439064.html', |
||||
|
u'file': u'88439064.mp4', |
||||
|
u'info_dict': { |
||||
|
u'title': u'Allô Rufo', |
||||
|
u'description': u'md5:d909f1ebdf963814b65772aea250400e', |
||||
|
}, |
||||
|
u'params': { |
||||
|
u'skip_download': True, |
||||
|
}, |
||||
|
} |
||||
|
|
||||
|
def _real_extract(self, url): |
||||
|
title = re.match(self._VALID_URL, url).group(1) |
||||
|
webpage = self._download_webpage(url, title) |
||||
|
video_id = self._search_regex( |
||||
|
r'data-diffusion="(\d+)"', webpage, 'ID') |
||||
|
return self._extract_video(video_id) |
||||
|
|
||||
|
|
||||
|
class FranceTvInfoIE(FranceTVBaseInfoExtractor): |
||||
|
IE_NAME = u'francetvinfo.fr' |
||||
|
_VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+).html' |
||||
|
|
||||
|
_TEST = { |
||||
|
u'url': u'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html', |
||||
|
u'file': u'84981923.mp4', |
||||
|
u'info_dict': { |
||||
|
u'title': u'Soir 3', |
||||
|
}, |
||||
|
u'params': { |
||||
|
u'skip_download': True, |
||||
|
}, |
||||
|
} |
||||
|
|
||||
|
def _real_extract(self, url): |
||||
|
mobj = re.match(self._VALID_URL, url) |
||||
|
page_title = mobj.group('title') |
||||
|
webpage = self._download_webpage(url, page_title) |
||||
|
video_id = self._search_regex(r'id-video=(\d+?)"', webpage, u'video id') |
||||
|
return self._extract_video(video_id) |
@ -0,0 +1,47 @@ |
|||||
|
import re |
||||
|
import json |
||||
|
|
||||
|
from .common import InfoExtractor |
||||
|
from ..utils import ( |
||||
|
compat_urlparse, |
||||
|
ExtractorError, |
||||
|
) |
||||
|
|
||||
|
|
||||
|
class SlideshareIE(InfoExtractor): |
||||
|
_VALID_URL = r'https?://www\.slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)' |
||||
|
|
||||
|
_TEST = { |
||||
|
u'url': u'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity', |
||||
|
u'file': u'25665706.mp4', |
||||
|
u'info_dict': { |
||||
|
u'title': u'Managing Scale and Complexity', |
||||
|
u'description': u'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix', |
||||
|
}, |
||||
|
} |
||||
|
|
||||
|
def _real_extract(self, url): |
||||
|
mobj = re.match(self._VALID_URL, url) |
||||
|
page_title = mobj.group('title') |
||||
|
webpage = self._download_webpage(url, page_title) |
||||
|
slideshare_obj = self._search_regex( |
||||
|
r'var slideshare_object = ({.*?}); var user_info =', |
||||
|
webpage, u'slideshare object') |
||||
|
info = json.loads(slideshare_obj) |
||||
|
if info['slideshow']['type'] != u'video': |
||||
|
raise ExtractorError(u'Webpage type is "%s": only video extraction is supported for Slideshare' % info['slideshow']['type'], expected=True) |
||||
|
|
||||
|
doc = info['doc'] |
||||
|
bucket = info['jsplayer']['video_bucket'] |
||||
|
ext = info['jsplayer']['video_extension'] |
||||
|
video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext) |
||||
|
|
||||
|
return { |
||||
|
'_type': 'video', |
||||
|
'id': info['slideshow']['id'], |
||||
|
'title': info['slideshow']['title'], |
||||
|
'ext': ext, |
||||
|
'url': video_url, |
||||
|
'thumbnail': info['slideshow']['pin_image_url'], |
||||
|
'description': self._og_search_description(webpage), |
||||
|
} |
@ -1,2 +1,2 @@ |
|||||
|
|
||||
__version__ = '2013.09.06.1' |
|
||||
|
__version__ = '2013.11.09' |
Write
Preview
Loading…
Cancel
Save