16 changed files with 344 additions and 103 deletions
Split View
Diff Options
-
4devscripts/youtube_genalgo.py
-
1test/test_all_urls.py
-
79test/test_youtube_sig.py
-
4youtube_dl/YoutubeDL.py
-
4youtube_dl/extractor/__init__.py
-
32youtube_dl/extractor/arte.py
-
2youtube_dl/extractor/collegehumor.py
-
64youtube_dl/extractor/muzu.py
-
18youtube_dl/extractor/myvideo.py
-
52youtube_dl/extractor/ooyala.py
-
100youtube_dl/extractor/rtlnow.py
-
4youtube_dl/extractor/vevo.py
-
49youtube_dl/extractor/videofyme.py
-
28youtube_dl/extractor/youtube.py
-
4youtube_dl/utils.py
-
2youtube_dl/version.py
@ -1,79 +0,0 @@ |
|||
#!/usr/bin/env python |
|||
|
|||
import unittest |
|||
import sys |
|||
|
|||
# Allow direct execution |
|||
import os |
|||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
|||
|
|||
from youtube_dl.extractor.youtube import YoutubeIE |
|||
from helper import FakeYDL |
|||
|
|||
ie = YoutubeIE(FakeYDL()) |
|||
sig = ie._decrypt_signature |
|||
sig_age_gate = ie._decrypt_signature_age_gate |
|||
|
|||
class TestYoutubeSig(unittest.TestCase): |
|||
def test_92(self): |
|||
wrong = "F9F9B6E6FD47029957AB911A964CC20D95A181A5D37A2DBEFD67D403DB0E8BE4F4910053E4E8A79.0B70B.0B80B8" |
|||
right = "69B6E6FD47029957AB911A9F4CC20D95A181A5D3.A2DBEFD67D403DB0E8BE4F4910053E4E8A7980B7" |
|||
self.assertEqual(sig(wrong), right) |
|||
|
|||
def test_90(self): |
|||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`" |
|||
right = "mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|" |
|||
self.assertEqual(sig(wrong), right) |
|||
|
|||
def test_88(self): |
|||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<" |
|||
right = "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej" |
|||
self.assertEqual(sig(wrong), right) |
|||
|
|||
def test_87(self): |
|||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<" |
|||
right = "tyuioplkjhgfdsazxcv<nm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>" |
|||
self.assertEqual(sig(wrong), right) |
|||
|
|||
def test_86(self): |
|||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<" |
|||
right = ">.1}|[{=+-_)(*&^%$#@!MNBVCXZASDFGHJK<POIUYTREW509876L432/mnbvcxzasdfghjklpoiuytre" |
|||
self.assertEqual(sig(wrong), right) |
|||
|
|||
def test_85(self): |
|||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<" |
|||
right = "ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c" |
|||
self.assertEqual(sig(wrong), right) |
|||
|
|||
def test_84(self): |
|||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<" |
|||
right = "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1" |
|||
self.assertEqual(sig(wrong), right) |
|||
|
|||
def test_83(self): |
|||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<" |
|||
right = "urty8ioplkjhgfdsazxcvbqm1234567S90QWERTYUIOPLKJHGFDnAZXCVBNM!#$%^&*()_+={[};?/>.<" |
|||
self.assertEqual(sig(wrong), right) |
|||
|
|||
def test_82(self): |
|||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<" |
|||
right = "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9" |
|||
self.assertEqual(sig(wrong), right) |
|||
|
|||
def test_81(self): |
|||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>." |
|||
right = "C>/?;}[{=+-(*&^%$#@!MNBVYXZASDFGHKLPOIU.TREWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp" |
|||
self.assertEqual(sig(wrong), right) |
|||
|
|||
def test_79(self): |
|||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/" |
|||
right = "Z?;}[{=+-(*&^%$#@!MNBVCXRASDFGHKLPOIUYT/EWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp" |
|||
self.assertEqual(sig(wrong), right) |
|||
|
|||
def test_86_age_gate(self): |
|||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<" |
|||
right = "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@" |
|||
self.assertEqual(sig_age_gate(wrong), right) |
|||
|
|||
if __name__ == '__main__': |
|||
unittest.main() |
@ -0,0 +1,64 @@ |
|||
import re |
|||
import json |
|||
|
|||
from .common import InfoExtractor |
|||
from ..utils import ( |
|||
compat_urllib_parse, |
|||
determine_ext, |
|||
) |
|||
|
|||
|
|||
class MuzuTVIE(InfoExtractor): |
|||
_VALID_URL = r'https?://www.muzu.tv/(.+?)/(.+?)/(?P<id>\d+)' |
|||
IE_NAME = u'muzu.tv' |
|||
|
|||
_TEST = { |
|||
u'url': u'http://www.muzu.tv/defected/marcashken-featuring-sos-cat-walk-original-mix-music-video/1981454/', |
|||
u'file': u'1981454.mp4', |
|||
u'md5': u'98f8b2c7bc50578d6a0364fff2bfb000', |
|||
u'info_dict': { |
|||
u'title': u'Cat Walk (Original Mix)', |
|||
u'description': u'md5:90e868994de201b2570e4e5854e19420', |
|||
u'uploader': u'MarcAshken featuring SOS', |
|||
}, |
|||
} |
|||
|
|||
def _real_extract(self, url): |
|||
mobj = re.match(self._VALID_URL, url) |
|||
video_id = mobj.group('id') |
|||
|
|||
info_data = compat_urllib_parse.urlencode({'format': 'json', |
|||
'url': url, |
|||
}) |
|||
video_info_page = self._download_webpage('http://www.muzu.tv/api/oembed/?%s' % info_data, |
|||
video_id, u'Downloading video info') |
|||
info = json.loads(video_info_page) |
|||
|
|||
player_info_page = self._download_webpage('http://player.muzu.tv/player/playerInit?ai=%s' % video_id, |
|||
video_id, u'Downloading player info') |
|||
video_info = json.loads(player_info_page)['videos'][0] |
|||
for quality in ['1080' , '720', '480', '360']: |
|||
if video_info.get('v%s' % quality): |
|||
break |
|||
|
|||
data = compat_urllib_parse.urlencode({'ai': video_id, |
|||
# Even if each time you watch a video the hash changes, |
|||
# it seems to work for different videos, and it will work |
|||
# even if you use any non empty string as a hash |
|||
'viewhash': 'VBNff6djeV4HV5TRPW5kOHub2k', |
|||
'device': 'web', |
|||
'qv': quality, |
|||
}) |
|||
video_url_page = self._download_webpage('http://player.muzu.tv/player/requestVideo?%s' % data, |
|||
video_id, u'Downloading video url') |
|||
video_url_info = json.loads(video_url_page) |
|||
video_url = video_url_info['url'] |
|||
|
|||
return {'id': video_id, |
|||
'title': info['title'], |
|||
'url': video_url, |
|||
'ext': determine_ext(video_url), |
|||
'thumbnail': info['thumbnail_url'], |
|||
'description': info['description'], |
|||
'uploader': info['author_name'], |
|||
} |
@ -0,0 +1,52 @@ |
|||
import re |
|||
import json |
|||
|
|||
from .common import InfoExtractor |
|||
from ..utils import unescapeHTML |
|||
|
|||
class OoyalaIE(InfoExtractor): |
|||
_VALID_URL = r'https?://.+?\.ooyala\.com/.*?embedCode=(?P<id>.+?)(&|$)' |
|||
|
|||
_TEST = { |
|||
# From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video |
|||
u'url': u'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8', |
|||
u'file': u'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8.mp4', |
|||
u'md5': u'3f5cceb3a7bf461d6c29dc466cf8033c', |
|||
u'info_dict': { |
|||
u'title': u'Explaining Data Recovery from Hard Drives and SSDs', |
|||
u'description': u'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.', |
|||
}, |
|||
} |
|||
|
|||
def _extract_result(self, info, more_info): |
|||
return {'id': info['embedCode'], |
|||
'ext': 'mp4', |
|||
'title': unescapeHTML(info['title']), |
|||
'url': info['url'], |
|||
'description': unescapeHTML(more_info['description']), |
|||
'thumbnail': more_info['promo'], |
|||
} |
|||
|
|||
def _real_extract(self, url): |
|||
mobj = re.match(self._VALID_URL, url) |
|||
embedCode = mobj.group('id') |
|||
player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embedCode |
|||
player = self._download_webpage(player_url, embedCode) |
|||
mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="', |
|||
player, u'mobile player url') |
|||
mobile_player = self._download_webpage(mobile_url, embedCode) |
|||
videos_info = self._search_regex(r'eval\("\((\[{.*?stream_redirect.*?}\])\)"\);', mobile_player, u'info').replace('\\"','"') |
|||
videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, u'more info').replace('\\"','"') |
|||
videos_info = json.loads(videos_info) |
|||
videos_more_info =json.loads(videos_more_info) |
|||
|
|||
if videos_more_info.get('lineup'): |
|||
videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])] |
|||
return {'_type': 'playlist', |
|||
'id': embedCode, |
|||
'title': unescapeHTML(videos_more_info['title']), |
|||
'entries': videos, |
|||
} |
|||
else: |
|||
return self._extract_result(videos_info[0], videos_more_info) |
|||
|
@ -0,0 +1,100 @@ |
|||
# encoding: utf-8 |
|||
import re |
|||
|
|||
from .common import InfoExtractor |
|||
from ..utils import ExtractorError |
|||
|
|||
class RTLnowIE(InfoExtractor): |
|||
"""Information Extractor for RTLnow, RTL2now and VOXnow""" |
|||
_VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl(?:(?P<is_rtl2>2)|-)now\.rtl(?(is_rtl2)2|)\.de/|(?:www\.)?voxnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)' |
|||
_TESTS = [{ |
|||
u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1', |
|||
u'file': u'90419.flv', |
|||
u'info_dict': { |
|||
u'upload_date': u'20070416', |
|||
u'title': u'Ahornallee - Folge 1 - Der Einzug', |
|||
u'description': u'Folge 1 - Der Einzug', |
|||
}, |
|||
u'params': { |
|||
u'skip_download': True, |
|||
}, |
|||
}, |
|||
{ |
|||
u'url': u'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5', |
|||
u'file': u'69756.flv', |
|||
u'info_dict': { |
|||
u'upload_date': u'20120519', |
|||
u'title': u'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit...', |
|||
u'description': u'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.', |
|||
u'thumbnail': u'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg', |
|||
}, |
|||
u'params': { |
|||
u'skip_download': True, |
|||
}, |
|||
}, |
|||
{ |
|||
u'url': u'www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17', |
|||
u'file': u'13883.flv', |
|||
u'info_dict': { |
|||
u'upload_date': u'20090627', |
|||
u'title': u'Voxtours - Südafrika-Reporter II', |
|||
u'description': u'Südafrika-Reporter II', |
|||
}, |
|||
u'params': { |
|||
u'skip_download': True, |
|||
}, |
|||
}] |
|||
|
|||
def _real_extract(self,url): |
|||
mobj = re.match(self._VALID_URL, url) |
|||
|
|||
webpage_url = u'http://' + mobj.group('url') |
|||
video_page_url = u'http://' + mobj.group('base_url') |
|||
video_id = mobj.group(u'video_id') |
|||
|
|||
webpage = self._download_webpage(webpage_url, video_id) |
|||
video_title = self._html_search_regex(r'<title>(?P<title>[^<]+)</title>', |
|||
webpage, u'title') |
|||
playerdata_url = self._html_search_regex(r'\'playerdata\': \'(?P<playerdata_url>[^\']+)\'', |
|||
webpage, u'playerdata_url') |
|||
|
|||
playerdata = self._download_webpage(playerdata_url, video_id) |
|||
mobj = re.search(r'<title><!\[CDATA\[(?P<description>.+?)\s+- (?:Sendung )?vom (?P<upload_date_d>[0-9]{2})\.(?P<upload_date_m>[0-9]{2})\.(?:(?P<upload_date_Y>[0-9]{4})|(?P<upload_date_y>[0-9]{2})) [0-9]{2}:[0-9]{2} Uhr\]\]></title>', playerdata) |
|||
if mobj: |
|||
video_description = mobj.group(u'description') |
|||
if mobj.group('upload_date_Y'): |
|||
video_upload_date = mobj.group('upload_date_Y') |
|||
else: |
|||
video_upload_date = u'20' + mobj.group('upload_date_y') |
|||
video_upload_date += mobj.group('upload_date_m')+mobj.group('upload_date_d') |
|||
else: |
|||
video_description = None |
|||
video_upload_date = None |
|||
self._downloader.report_warning(u'Unable to extract description and upload date') |
|||
|
|||
# Thumbnail: not every video has an thumbnail |
|||
mobj = re.search(r'<meta property="og:image" content="(?P<thumbnail>[^"]+)">', webpage) |
|||
if mobj: |
|||
video_thumbnail = mobj.group(u'thumbnail') |
|||
else: |
|||
video_thumbnail = None |
|||
|
|||
mobj = re.search(r'<filename [^>]+><!\[CDATA\[(?P<url>rtmpe://(?:[^/]+/){2})(?P<play_path>[^\]]+)\]\]></filename>', playerdata) |
|||
if mobj is None: |
|||
raise ExtractorError(u'Unable to extract media URL') |
|||
video_url = mobj.group(u'url') |
|||
video_play_path = u'mp4:' + mobj.group(u'play_path') |
|||
video_player_url = video_page_url + u'includes/vodplayer.swf' |
|||
|
|||
return [{ |
|||
'id': video_id, |
|||
'url': video_url, |
|||
'play_path': video_play_path, |
|||
'page_url': video_page_url, |
|||
'player_url': video_player_url, |
|||
'ext': 'flv', |
|||
'title': video_title, |
|||
'description': video_description, |
|||
'upload_date': video_upload_date, |
|||
'thumbnail': video_thumbnail, |
|||
}] |
@ -0,0 +1,49 @@ |
|||
import re |
|||
import xml.etree.ElementTree |
|||
|
|||
from .common import InfoExtractor |
|||
from ..utils import ( |
|||
find_xpath_attr, |
|||
determine_ext, |
|||
) |
|||
|
|||
class VideofyMeIE(InfoExtractor): |
|||
_VALID_URL = r'https?://(www.videofy.me/.+?|p.videofy.me/v)/(?P<id>\d+)(&|#|$)' |
|||
IE_NAME = u'videofy.me' |
|||
|
|||
_TEST = { |
|||
u'url': u'http://www.videofy.me/thisisvideofyme/1100701', |
|||
u'file': u'1100701.mp4', |
|||
u'md5': u'2046dd5758541d630bfa93e741e2fd79', |
|||
u'info_dict': { |
|||
u'title': u'This is VideofyMe', |
|||
u'description': None, |
|||
u'uploader': u'VideofyMe', |
|||
u'uploader_id': u'thisisvideofyme', |
|||
}, |
|||
|
|||
} |
|||
|
|||
def _real_extract(self, url): |
|||
mobj = re.match(self._VALID_URL, url) |
|||
video_id = mobj.group('id') |
|||
config_xml = self._download_webpage('http://sunshine.videofy.me/?videoId=%s' % video_id, |
|||
video_id) |
|||
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8')) |
|||
video = config.find('video') |
|||
sources = video.find('sources') |
|||
url_node = find_xpath_attr(sources, 'source', 'id', 'HQ on') |
|||
if url_node is None: |
|||
url_node = find_xpath_attr(sources, 'source', 'id', 'HQ off') |
|||
video_url = url_node.find('url').text |
|||
|
|||
return {'id': video_id, |
|||
'title': video.find('title').text, |
|||
'url': video_url, |
|||
'ext': determine_ext(video_url), |
|||
'thumbnail': video.find('thumb').text, |
|||
'description': video.find('description').text, |
|||
'uploader': config.find('blog/name').text, |
|||
'uploader_id': video.find('identifier').text, |
|||
'view_count': re.search(r'\d+', video.find('views').text).group(), |
|||
} |
@ -1,2 +1,2 @@ |
|||
|
|||
__version__ = '2013.07.31' |
|||
__version__ = '2013.08.17' |
Write
Preview
Loading…
Cancel
Save