16 changed files with 344 additions and 103 deletions
Unified View
Diff Options
-
4devscripts/youtube_genalgo.py
-
1test/test_all_urls.py
-
79test/test_youtube_sig.py
-
4youtube_dl/YoutubeDL.py
-
4youtube_dl/extractor/__init__.py
-
32youtube_dl/extractor/arte.py
-
2youtube_dl/extractor/collegehumor.py
-
64youtube_dl/extractor/muzu.py
-
18youtube_dl/extractor/myvideo.py
-
52youtube_dl/extractor/ooyala.py
-
100youtube_dl/extractor/rtlnow.py
-
4youtube_dl/extractor/vevo.py
-
49youtube_dl/extractor/videofyme.py
-
28youtube_dl/extractor/youtube.py
-
4youtube_dl/utils.py
-
2youtube_dl/version.py
@ -1,79 +0,0 @@ |
|||||
#!/usr/bin/env python |
|
||||
|
|
||||
import unittest |
|
||||
import sys |
|
||||
|
|
||||
# Allow direct execution |
|
||||
import os |
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
|
||||
|
|
||||
from youtube_dl.extractor.youtube import YoutubeIE |
|
||||
from helper import FakeYDL |
|
||||
|
|
||||
ie = YoutubeIE(FakeYDL()) |
|
||||
sig = ie._decrypt_signature |
|
||||
sig_age_gate = ie._decrypt_signature_age_gate |
|
||||
|
|
||||
class TestYoutubeSig(unittest.TestCase): |
|
||||
def test_92(self): |
|
||||
wrong = "F9F9B6E6FD47029957AB911A964CC20D95A181A5D37A2DBEFD67D403DB0E8BE4F4910053E4E8A79.0B70B.0B80B8" |
|
||||
right = "69B6E6FD47029957AB911A9F4CC20D95A181A5D3.A2DBEFD67D403DB0E8BE4F4910053E4E8A7980B7" |
|
||||
self.assertEqual(sig(wrong), right) |
|
||||
|
|
||||
def test_90(self): |
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`" |
|
||||
right = "mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|" |
|
||||
self.assertEqual(sig(wrong), right) |
|
||||
|
|
||||
def test_88(self): |
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<" |
|
||||
right = "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej" |
|
||||
self.assertEqual(sig(wrong), right) |
|
||||
|
|
||||
def test_87(self): |
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<" |
|
||||
right = "tyuioplkjhgfdsazxcv<nm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>" |
|
||||
self.assertEqual(sig(wrong), right) |
|
||||
|
|
||||
def test_86(self): |
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<" |
|
||||
right = ">.1}|[{=+-_)(*&^%$#@!MNBVCXZASDFGHJK<POIUYTREW509876L432/mnbvcxzasdfghjklpoiuytre" |
|
||||
self.assertEqual(sig(wrong), right) |
|
||||
|
|
||||
def test_85(self): |
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<" |
|
||||
right = "ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c" |
|
||||
self.assertEqual(sig(wrong), right) |
|
||||
|
|
||||
def test_84(self): |
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<" |
|
||||
right = "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1" |
|
||||
self.assertEqual(sig(wrong), right) |
|
||||
|
|
||||
def test_83(self): |
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<" |
|
||||
right = "urty8ioplkjhgfdsazxcvbqm1234567S90QWERTYUIOPLKJHGFDnAZXCVBNM!#$%^&*()_+={[};?/>.<" |
|
||||
self.assertEqual(sig(wrong), right) |
|
||||
|
|
||||
def test_82(self): |
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<" |
|
||||
right = "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9" |
|
||||
self.assertEqual(sig(wrong), right) |
|
||||
|
|
||||
def test_81(self): |
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>." |
|
||||
right = "C>/?;}[{=+-(*&^%$#@!MNBVYXZASDFGHKLPOIU.TREWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp" |
|
||||
self.assertEqual(sig(wrong), right) |
|
||||
|
|
||||
def test_79(self): |
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/" |
|
||||
right = "Z?;}[{=+-(*&^%$#@!MNBVCXRASDFGHKLPOIUYT/EWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp" |
|
||||
self.assertEqual(sig(wrong), right) |
|
||||
|
|
||||
def test_86_age_gate(self): |
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<" |
|
||||
right = "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@" |
|
||||
self.assertEqual(sig_age_gate(wrong), right) |
|
||||
|
|
||||
if __name__ == '__main__': |
|
||||
unittest.main() |
|
@ -0,0 +1,64 @@ |
|||||
|
import re |
||||
|
import json |
||||
|
|
||||
|
from .common import InfoExtractor |
||||
|
from ..utils import ( |
||||
|
compat_urllib_parse, |
||||
|
determine_ext, |
||||
|
) |
||||
|
|
||||
|
|
||||
|
class MuzuTVIE(InfoExtractor): |
||||
|
_VALID_URL = r'https?://www.muzu.tv/(.+?)/(.+?)/(?P<id>\d+)' |
||||
|
IE_NAME = u'muzu.tv' |
||||
|
|
||||
|
_TEST = { |
||||
|
u'url': u'http://www.muzu.tv/defected/marcashken-featuring-sos-cat-walk-original-mix-music-video/1981454/', |
||||
|
u'file': u'1981454.mp4', |
||||
|
u'md5': u'98f8b2c7bc50578d6a0364fff2bfb000', |
||||
|
u'info_dict': { |
||||
|
u'title': u'Cat Walk (Original Mix)', |
||||
|
u'description': u'md5:90e868994de201b2570e4e5854e19420', |
||||
|
u'uploader': u'MarcAshken featuring SOS', |
||||
|
}, |
||||
|
} |
||||
|
|
||||
|
def _real_extract(self, url): |
||||
|
mobj = re.match(self._VALID_URL, url) |
||||
|
video_id = mobj.group('id') |
||||
|
|
||||
|
info_data = compat_urllib_parse.urlencode({'format': 'json', |
||||
|
'url': url, |
||||
|
}) |
||||
|
video_info_page = self._download_webpage('http://www.muzu.tv/api/oembed/?%s' % info_data, |
||||
|
video_id, u'Downloading video info') |
||||
|
info = json.loads(video_info_page) |
||||
|
|
||||
|
player_info_page = self._download_webpage('http://player.muzu.tv/player/playerInit?ai=%s' % video_id, |
||||
|
video_id, u'Downloading player info') |
||||
|
video_info = json.loads(player_info_page)['videos'][0] |
||||
|
for quality in ['1080' , '720', '480', '360']: |
||||
|
if video_info.get('v%s' % quality): |
||||
|
break |
||||
|
|
||||
|
data = compat_urllib_parse.urlencode({'ai': video_id, |
||||
|
# Even if each time you watch a video the hash changes, |
||||
|
# it seems to work for different videos, and it will work |
||||
|
# even if you use any non empty string as a hash |
||||
|
'viewhash': 'VBNff6djeV4HV5TRPW5kOHub2k', |
||||
|
'device': 'web', |
||||
|
'qv': quality, |
||||
|
}) |
||||
|
video_url_page = self._download_webpage('http://player.muzu.tv/player/requestVideo?%s' % data, |
||||
|
video_id, u'Downloading video url') |
||||
|
video_url_info = json.loads(video_url_page) |
||||
|
video_url = video_url_info['url'] |
||||
|
|
||||
|
return {'id': video_id, |
||||
|
'title': info['title'], |
||||
|
'url': video_url, |
||||
|
'ext': determine_ext(video_url), |
||||
|
'thumbnail': info['thumbnail_url'], |
||||
|
'description': info['description'], |
||||
|
'uploader': info['author_name'], |
||||
|
} |
@ -0,0 +1,52 @@ |
|||||
|
import re |
||||
|
import json |
||||
|
|
||||
|
from .common import InfoExtractor |
||||
|
from ..utils import unescapeHTML |
||||
|
|
||||
|
class OoyalaIE(InfoExtractor): |
||||
|
_VALID_URL = r'https?://.+?\.ooyala\.com/.*?embedCode=(?P<id>.+?)(&|$)' |
||||
|
|
||||
|
_TEST = { |
||||
|
# From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video |
||||
|
u'url': u'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8', |
||||
|
u'file': u'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8.mp4', |
||||
|
u'md5': u'3f5cceb3a7bf461d6c29dc466cf8033c', |
||||
|
u'info_dict': { |
||||
|
u'title': u'Explaining Data Recovery from Hard Drives and SSDs', |
||||
|
u'description': u'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.', |
||||
|
}, |
||||
|
} |
||||
|
|
||||
|
def _extract_result(self, info, more_info): |
||||
|
return {'id': info['embedCode'], |
||||
|
'ext': 'mp4', |
||||
|
'title': unescapeHTML(info['title']), |
||||
|
'url': info['url'], |
||||
|
'description': unescapeHTML(more_info['description']), |
||||
|
'thumbnail': more_info['promo'], |
||||
|
} |
||||
|
|
||||
|
def _real_extract(self, url): |
||||
|
mobj = re.match(self._VALID_URL, url) |
||||
|
embedCode = mobj.group('id') |
||||
|
player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embedCode |
||||
|
player = self._download_webpage(player_url, embedCode) |
||||
|
mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="', |
||||
|
player, u'mobile player url') |
||||
|
mobile_player = self._download_webpage(mobile_url, embedCode) |
||||
|
videos_info = self._search_regex(r'eval\("\((\[{.*?stream_redirect.*?}\])\)"\);', mobile_player, u'info').replace('\\"','"') |
||||
|
videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, u'more info').replace('\\"','"') |
||||
|
videos_info = json.loads(videos_info) |
||||
|
videos_more_info =json.loads(videos_more_info) |
||||
|
|
||||
|
if videos_more_info.get('lineup'): |
||||
|
videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])] |
||||
|
return {'_type': 'playlist', |
||||
|
'id': embedCode, |
||||
|
'title': unescapeHTML(videos_more_info['title']), |
||||
|
'entries': videos, |
||||
|
} |
||||
|
else: |
||||
|
return self._extract_result(videos_info[0], videos_more_info) |
||||
|
|
@ -0,0 +1,100 @@ |
|||||
|
# encoding: utf-8 |
||||
|
import re |
||||
|
|
||||
|
from .common import InfoExtractor |
||||
|
from ..utils import ExtractorError |
||||
|
|
||||
|
class RTLnowIE(InfoExtractor): |
||||
|
"""Information Extractor for RTLnow, RTL2now and VOXnow""" |
||||
|
_VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl(?:(?P<is_rtl2>2)|-)now\.rtl(?(is_rtl2)2|)\.de/|(?:www\.)?voxnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)' |
||||
|
_TESTS = [{ |
||||
|
u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1', |
||||
|
u'file': u'90419.flv', |
||||
|
u'info_dict': { |
||||
|
u'upload_date': u'20070416', |
||||
|
u'title': u'Ahornallee - Folge 1 - Der Einzug', |
||||
|
u'description': u'Folge 1 - Der Einzug', |
||||
|
}, |
||||
|
u'params': { |
||||
|
u'skip_download': True, |
||||
|
}, |
||||
|
}, |
||||
|
{ |
||||
|
u'url': u'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5', |
||||
|
u'file': u'69756.flv', |
||||
|
u'info_dict': { |
||||
|
u'upload_date': u'20120519', |
||||
|
u'title': u'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit...', |
||||
|
u'description': u'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.', |
||||
|
u'thumbnail': u'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg', |
||||
|
}, |
||||
|
u'params': { |
||||
|
u'skip_download': True, |
||||
|
}, |
||||
|
}, |
||||
|
{ |
||||
|
u'url': u'www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17', |
||||
|
u'file': u'13883.flv', |
||||
|
u'info_dict': { |
||||
|
u'upload_date': u'20090627', |
||||
|
u'title': u'Voxtours - Südafrika-Reporter II', |
||||
|
u'description': u'Südafrika-Reporter II', |
||||
|
}, |
||||
|
u'params': { |
||||
|
u'skip_download': True, |
||||
|
}, |
||||
|
}] |
||||
|
|
||||
|
def _real_extract(self,url): |
||||
|
mobj = re.match(self._VALID_URL, url) |
||||
|
|
||||
|
webpage_url = u'http://' + mobj.group('url') |
||||
|
video_page_url = u'http://' + mobj.group('base_url') |
||||
|
video_id = mobj.group(u'video_id') |
||||
|
|
||||
|
webpage = self._download_webpage(webpage_url, video_id) |
||||
|
video_title = self._html_search_regex(r'<title>(?P<title>[^<]+)</title>', |
||||
|
webpage, u'title') |
||||
|
playerdata_url = self._html_search_regex(r'\'playerdata\': \'(?P<playerdata_url>[^\']+)\'', |
||||
|
webpage, u'playerdata_url') |
||||
|
|
||||
|
playerdata = self._download_webpage(playerdata_url, video_id) |
||||
|
mobj = re.search(r'<title><!\[CDATA\[(?P<description>.+?)\s+- (?:Sendung )?vom (?P<upload_date_d>[0-9]{2})\.(?P<upload_date_m>[0-9]{2})\.(?:(?P<upload_date_Y>[0-9]{4})|(?P<upload_date_y>[0-9]{2})) [0-9]{2}:[0-9]{2} Uhr\]\]></title>', playerdata) |
||||
|
if mobj: |
||||
|
video_description = mobj.group(u'description') |
||||
|
if mobj.group('upload_date_Y'): |
||||
|
video_upload_date = mobj.group('upload_date_Y') |
||||
|
else: |
||||
|
video_upload_date = u'20' + mobj.group('upload_date_y') |
||||
|
video_upload_date += mobj.group('upload_date_m')+mobj.group('upload_date_d') |
||||
|
else: |
||||
|
video_description = None |
||||
|
video_upload_date = None |
||||
|
self._downloader.report_warning(u'Unable to extract description and upload date') |
||||
|
|
||||
|
# Thumbnail: not every video has an thumbnail |
||||
|
mobj = re.search(r'<meta property="og:image" content="(?P<thumbnail>[^"]+)">', webpage) |
||||
|
if mobj: |
||||
|
video_thumbnail = mobj.group(u'thumbnail') |
||||
|
else: |
||||
|
video_thumbnail = None |
||||
|
|
||||
|
mobj = re.search(r'<filename [^>]+><!\[CDATA\[(?P<url>rtmpe://(?:[^/]+/){2})(?P<play_path>[^\]]+)\]\]></filename>', playerdata) |
||||
|
if mobj is None: |
||||
|
raise ExtractorError(u'Unable to extract media URL') |
||||
|
video_url = mobj.group(u'url') |
||||
|
video_play_path = u'mp4:' + mobj.group(u'play_path') |
||||
|
video_player_url = video_page_url + u'includes/vodplayer.swf' |
||||
|
|
||||
|
return [{ |
||||
|
'id': video_id, |
||||
|
'url': video_url, |
||||
|
'play_path': video_play_path, |
||||
|
'page_url': video_page_url, |
||||
|
'player_url': video_player_url, |
||||
|
'ext': 'flv', |
||||
|
'title': video_title, |
||||
|
'description': video_description, |
||||
|
'upload_date': video_upload_date, |
||||
|
'thumbnail': video_thumbnail, |
||||
|
}] |
@ -0,0 +1,49 @@ |
|||||
|
import re |
||||
|
import xml.etree.ElementTree |
||||
|
|
||||
|
from .common import InfoExtractor |
||||
|
from ..utils import ( |
||||
|
find_xpath_attr, |
||||
|
determine_ext, |
||||
|
) |
||||
|
|
||||
|
class VideofyMeIE(InfoExtractor): |
||||
|
_VALID_URL = r'https?://(www.videofy.me/.+?|p.videofy.me/v)/(?P<id>\d+)(&|#|$)' |
||||
|
IE_NAME = u'videofy.me' |
||||
|
|
||||
|
_TEST = { |
||||
|
u'url': u'http://www.videofy.me/thisisvideofyme/1100701', |
||||
|
u'file': u'1100701.mp4', |
||||
|
u'md5': u'2046dd5758541d630bfa93e741e2fd79', |
||||
|
u'info_dict': { |
||||
|
u'title': u'This is VideofyMe', |
||||
|
u'description': None, |
||||
|
u'uploader': u'VideofyMe', |
||||
|
u'uploader_id': u'thisisvideofyme', |
||||
|
}, |
||||
|
|
||||
|
} |
||||
|
|
||||
|
def _real_extract(self, url): |
||||
|
mobj = re.match(self._VALID_URL, url) |
||||
|
video_id = mobj.group('id') |
||||
|
config_xml = self._download_webpage('http://sunshine.videofy.me/?videoId=%s' % video_id, |
||||
|
video_id) |
||||
|
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8')) |
||||
|
video = config.find('video') |
||||
|
sources = video.find('sources') |
||||
|
url_node = find_xpath_attr(sources, 'source', 'id', 'HQ on') |
||||
|
if url_node is None: |
||||
|
url_node = find_xpath_attr(sources, 'source', 'id', 'HQ off') |
||||
|
video_url = url_node.find('url').text |
||||
|
|
||||
|
return {'id': video_id, |
||||
|
'title': video.find('title').text, |
||||
|
'url': video_url, |
||||
|
'ext': determine_ext(video_url), |
||||
|
'thumbnail': video.find('thumb').text, |
||||
|
'description': video.find('description').text, |
||||
|
'uploader': config.find('blog/name').text, |
||||
|
'uploader_id': video.find('identifier').text, |
||||
|
'view_count': re.search(r'\d+', video.find('views').text).group(), |
||||
|
} |
@ -1,2 +1,2 @@ |
|||||
|
|
||||
__version__ = '2013.07.31' |
|
||||
|
__version__ = '2013.08.17' |
Write
Preview
Loading…
Cancel
Save