Browse Source

Merge remote-tracking branch 'upstream/master'

master
Allan Zhou 11 years ago
parent
commit
b4e60dac23
7 changed files with 153 additions and 9 deletions
  1. 21
      devscripts/youtube_genalgo.py
  2. 1
      youtube_dl/__init__.py
  3. 4
      youtube_dl/extractor/__init__.py
  4. 47
      youtube_dl/extractor/jeuxvideo.py
  5. 34
      youtube_dl/extractor/pbs.py
  6. 23
      youtube_dl/extractor/slashdot.py
  7. 32
      youtube_dl/extractor/unistra.py

21
devscripts/youtube_genalgo.py

@ -11,24 +11,27 @@ tests = [
# 90 # 90
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`",
"mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"), "mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"),
# 89
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'",
"/?;:|}<[{=+-_)(*&^%$#@!MqBVCXZASDFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuyt"),
# 88 # 88
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<",
"J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"), "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"),
# 87 - vflART1Nf 2013/07/24
# 87
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
"tyuioplkjhgfdsazxcv<nm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>"),
# 86 - vflm_D8eE 2013/07/31
"uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"),
# 86
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
">.1}|[{=+-_)(*&^%$#@!MNBVCXZASDFGHJK<POIUYTREW509876L432/mnbvcxzasdfghjklpoiuytre"),
# 85 - vflSAFCP9 2013/07/19
"yuioplkjhgfdsazecvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<"),
# 85
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
"ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"),
".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"),
# 84 # 84
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
"<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"),
# 83 - vflTWC9KW 2013/08/01
"<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ09876543q1mnbvcxzasdfghjklpoiuew2"),
# 83
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
"qwertyuioplkjhg>dsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/f"),
".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),
# 82 # 82
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<",
"Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"), "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"),

1
youtube_dl/__init__.py

@ -27,6 +27,7 @@ __authors__ = (
'Johny Mo Swag', 'Johny Mo Swag',
'Axel Noack', 'Axel Noack',
'Albert Kim', 'Albert Kim',
'Pierre Rudloff',
) )
__license__ = 'Public Domain' __license__ = 'Public Domain'

4
youtube_dl/extractor/__init__.py

@ -36,6 +36,7 @@ from .ign import IGNIE, OneUPIE
from .ina import InaIE from .ina import InaIE
from .infoq import InfoQIE from .infoq import InfoQIE
from .instagram import InstagramIE from .instagram import InstagramIE
from .jeuxvideo import JeuxVideoIE
from .jukebox import JukeboxIE from .jukebox import JukeboxIE
from .justintv import JustinTVIE from .justintv import JustinTVIE
from .kankan import KankanIE from .kankan import KankanIE
@ -50,6 +51,7 @@ from .myspass import MySpassIE
from .myvideo import MyVideoIE from .myvideo import MyVideoIE
from .nba import NBAIE from .nba import NBAIE
from .ooyala import OoyalaIE from .ooyala import OoyalaIE
from .pbs import PBSIE
from .photobucket import PhotobucketIE from .photobucket import PhotobucketIE
from .pornotube import PornotubeIE from .pornotube import PornotubeIE
from .rbmaradio import RBMARadioIE from .rbmaradio import RBMARadioIE
@ -58,6 +60,7 @@ from .ringtv import RingTVIE
from .roxwel import RoxwelIE from .roxwel import RoxwelIE
from .rtlnow import RTLnowIE from .rtlnow import RTLnowIE
from .sina import SinaIE from .sina import SinaIE
from .slashdot import SlashdotIE
from .soundcloud import SoundcloudIE, SoundcloudSetIE from .soundcloud import SoundcloudIE, SoundcloudSetIE
from .spiegel import SpiegelIE from .spiegel import SpiegelIE
from .stanfordoc import StanfordOpenClassroomIE from .stanfordoc import StanfordOpenClassroomIE
@ -72,6 +75,7 @@ from .tudou import TudouIE
from .tumblr import TumblrIE from .tumblr import TumblrIE
from .tutv import TutvIE from .tutv import TutvIE
from .ustream import UstreamIE from .ustream import UstreamIE
from .unistra import UnistraIE
from .vbox7 import Vbox7IE from .vbox7 import Vbox7IE
from .veoh import VeohIE from .veoh import VeohIE
from .vevo import VevoIE from .vevo import VevoIE

47
youtube_dl/extractor/jeuxvideo.py

@ -0,0 +1,47 @@
# coding: utf-8
import json
import re
import xml.etree.ElementTree
from .common import InfoExtractor
class JeuxVideoIE(InfoExtractor):
_VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm'
_TEST = {
u'url': u'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
u'file': u'5182.mp4',
u'md5': u'e0fdb0cd3ce98713ef9c1e1e025779d0',
u'info_dict': {
u'title': u'GC 2013 : Tearaway nous présente ses papiers d\'identité',
u'description': u'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
title = re.match(self._VALID_URL, url).group(1)
webpage = self._download_webpage(url, title)
m_download = re.search(r'<param name="flashvars" value="config=(.*?)" />', webpage)
xml_link = m_download.group(1)
id = re.search(r'http://www.jeuxvideo.com/config/\w+/0011/(.*?)/\d+_player\.xml', xml_link).group(1)
xml_config = self._download_webpage(xml_link, title,
'Downloading XML config')
config = xml.etree.ElementTree.fromstring(xml_config.encode('utf-8'))
info = re.search(r'<format\.json>(.*?)</format\.json>',
xml_config, re.MULTILINE|re.DOTALL).group(1)
info = json.loads(info)['versions'][0]
video_url = 'http://video720.jeuxvideo.com/' + info['file']
return {'id': id,
'title' : config.find('titre_video').text,
'ext' : 'mp4',
'url' : video_url,
'description': self._og_search_description(webpage),
'thumbnail': config.find('image').text,
}

34
youtube_dl/extractor/pbs.py

@ -0,0 +1,34 @@
import re
import json
from .common import InfoExtractor
class PBSIE(InfoExtractor):
_VALID_URL = r'https?://video.pbs.org/video/(?P<id>\d+)/?'
_TEST = {
u'url': u'http://video.pbs.org/video/2365006249/',
u'file': u'2365006249.mp4',
u'md5': 'ce1888486f0908d555a8093cac9a7362',
u'info_dict': {
u'title': u'A More Perfect Union',
u'description': u'md5:ba0c207295339c8d6eced00b7c363c6a',
u'duration': 3190,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
info_page = self._download_webpage(info_url, video_id)
info =json.loads(info_page)
return {'id': video_id,
'title': info['title'],
'url': info['alternate_encoding']['url'],
'ext': 'mp4',
'description': info['program'].get('description'),
'thumbnail': info.get('image_url'),
'duration': info.get('duration'),
}

23
youtube_dl/extractor/slashdot.py

@ -0,0 +1,23 @@
import re
from .common import InfoExtractor
class SlashdotIE(InfoExtractor):
_VALID_URL = r'https?://tv.slashdot.org/video/\?embed=(?P<id>.*?)(&|$)'
_TEST = {
u'url': u'http://tv.slashdot.org/video/?embed=JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz',
u'file': u'JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz.mp4',
u'md5': u'd2222e7a4a4c1541b3e0cf732fb26735',
u'info_dict': {
u'title': u' Meet the Stampede Supercomputing Cluster\'s Administrator',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
ooyala_url = self._search_regex(r'<script src="(.*?)"', webpage, 'ooyala url')
return self.url_result(ooyala_url, 'Ooyala')

32
youtube_dl/extractor/unistra.py

@ -0,0 +1,32 @@
import re
from .common import InfoExtractor
class UnistraIE(InfoExtractor):
_VALID_URL = r'http://utv.unistra.fr/(?:index|video).php\?id_video\=(\d+)'
_TEST = {
u'url': u'http://utv.unistra.fr/video.php?id_video=154',
u'file': u'154.mp4',
u'md5': u'736f605cfdc96724d55bb543ab3ced24',
u'info_dict': {
u'title': u'M!ss Yella',
u'description': u'md5:75e8439a3e2981cd5d4b6db232e8fdfc',
},
}
def _real_extract(self, url):
id = re.match(self._VALID_URL, url).group(1)
webpage = self._download_webpage(url, id)
file = re.search(r'file: "(.*?)",', webpage).group(1)
title = self._html_search_regex(r'<title>UTV - (.*?)</', webpage, u'title')
video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file
return {'id': id,
'title': title,
'ext': 'mp4',
'url': video_url,
'description': self._html_search_regex(r'<meta name="Description" content="(.*?)"', webpage, u'description', flags=re.DOTALL),
'thumbnail': self._search_regex(r'image: "(.*?)"', webpage, u'thumbnail'),
}
Loading…
Cancel
Save