Browse Source

Merge branch 'master' of github.com:rg3/youtube-dl

master
Philipp Hagemeister 9 years ago
parent
commit
e14ced7918
4 changed files with 148 additions and 29 deletions
  1. 1
      youtube_dl/extractor/__init__.py
  2. 49
      youtube_dl/extractor/blinkx.py
  3. 27
      youtube_dl/extractor/ted.py
  4. 100
      youtube_dl/extractor/tv4.py

1
youtube_dl/extractor/__init__.py

@ -490,6 +490,7 @@ from .tumblr import TumblrIE
from .tunein import TuneInIE
from .turbo import TurboIE
from .tutv import TutvIE
from .tv4 import TV4IE
from .tvigle import TvigleIE
from .tvp import TvpIE, TvpSeriesIE
from .tvplay import TVPlayIE

49
youtube_dl/extractor/blinkx.py

@ -1,40 +1,35 @@
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import remove_start
from ..utils import (
remove_start,
int_or_none,
)
class BlinkxIE(InfoExtractor):
_VALID_URL = r'^(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)'
_VALID_URL = r'(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)'
IE_NAME = 'blinkx'
_TEST = {
'url': 'http://www.blinkx.com/ce/8aQUy7GVFYgFzpKhT0oqsilwOGFRVXk3R1ZGWWdGenBLaFQwb3FzaWx3OGFRVXk3R1ZGWWdGenB',
'md5': '2e9a07364af40163a908edbf10bb2492',
'url': 'http://www.blinkx.com/ce/Da0Gw3xc5ucpNduzLuDDlv4WC9PuI4fDi1-t6Y3LyfdY2SZS5Urbvn-UPJvrvbo8LTKTc67Wu2rPKSQDJyZeeORCR8bYkhs8lI7eqddznH2ofh5WEEdjYXnoRtj7ByQwt7atMErmXIeYKPsSDuMAAqJDlQZ-3Ff4HJVeH_s3Gh8oQ',
'md5': '337cf7a344663ec79bf93a526a2e06c7',
'info_dict': {
'id': '8aQUy7GV',
'id': 'Da0Gw3xc',
'ext': 'mp4',
'title': 'Police Car Rolls Away',
'uploader': 'stupidvideos.com',
'upload_date': '20131215',
'timestamp': 1387068000,
'description': 'A police car gently rolls away from a fight. Maybe it felt weird being around a confrontation and just had to get out of there!',
'duration': 14.886,
'thumbnails': [{
'width': 100,
'height': 76,
'resolution': '100x76',
'url': 'http://cdn.blinkx.com/stream/b/41/StupidVideos/20131215/1873969261/1873969261_tn_0.jpg',
}],
'title': 'No Daily Show for John Oliver; HBO Show Renewed - IGN News',
'uploader': 'IGN News',
'upload_date': '20150217',
'timestamp': 1424215740,
'description': 'HBO has renewed Last Week Tonight With John Oliver for two more seasons.',
'duration': 47.743333,
},
}
def _real_extract(self, rl):
m = re.match(self._VALID_URL, rl)
video_id = m.group('id')
def _real_extract(self, url):
video_id = self._match_id(url)
display_id = video_id[:8]
api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' +
@ -60,18 +55,20 @@ class BlinkxIE(InfoExtractor):
elif m['type'] in ('flv', 'mp4'):
vcodec = remove_start(m['vcodec'], 'ff')
acodec = remove_start(m['acodec'], 'ff')
tbr = (int(m['vbr']) + int(m['abr'])) // 1000
vbr = int_or_none(m.get('vbr') or m.get('vbitrate'), 1000)
abr = int_or_none(m.get('abr') or m.get('abitrate'), 1000)
tbr = vbr + abr if vbr and abr else None
format_id = '%s-%sk-%s' % (vcodec, tbr, m['w'])
formats.append({
'format_id': format_id,
'url': m['link'],
'vcodec': vcodec,
'acodec': acodec,
'abr': int(m['abr']) // 1000,
'vbr': int(m['vbr']) // 1000,
'abr': abr,
'vbr': vbr,
'tbr': tbr,
'width': int(m['w']),
'height': int(m['h']),
'width': int_or_none(m.get('w')),
'height': int_or_none(m.get('h')),
})
self._sort_formats(formats)

27
youtube_dl/extractor/ted.py

@ -83,6 +83,22 @@ class TEDIE(SubtitlesInfoExtractor):
'params': {
'skip_download': True,
},
}, {
# YouTube video
'url': 'http://www.ted.com/talks/jeffrey_kluger_the_sibling_bond',
'add_ie': ['Youtube'],
'info_dict': {
'id': 'aFBIPO-P7LM',
'ext': 'mp4',
'title': 'The hidden power of siblings: Jeff Kluger at TEDxAsheville',
'description': 'md5:3d7a4f50d95ca5dd67104e2a20f43fe1',
'uploader': 'TEDx Talks',
'uploader_id': 'TEDxTalks',
'upload_date': '20111216',
},
'params': {
'skip_download': True,
},
}]
_NATIVE_FORMATS = {
@ -132,11 +148,16 @@ class TEDIE(SubtitlesInfoExtractor):
talk_info = self._extract_info(webpage)['talks'][0]
if talk_info.get('external') is not None:
self.to_screen('Found video from %s' % talk_info['external']['service'])
external = talk_info.get('external')
if external:
service = external['service']
self.to_screen('Found video from %s' % service)
ext_url = None
if service.lower() == 'youtube':
ext_url = external.get('code')
return {
'_type': 'url',
'url': talk_info['external']['uri'],
'url': ext_url or external['uri'],
}
formats = [{

100
youtube_dl/extractor/tv4.py

@ -0,0 +1,100 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
ExtractorError,
parse_iso8601,
)
class TV4IE(InfoExtractor):
IE_DESC = 'tv4.se and tv4play.se'
_VALID_URL = r'''(?x)https?://(?:www\.)?
(?:
tv4\.se/(?:[^/]+)/klipp/(?:.*)-|
tv4play\.se/
(?:
(?:program|barn)/(?:[^\?]+)\?video_id=|
iframe/video/|
film/|
sport/|
)
)(?P<id>[0-9]+)'''
_TESTS = [
{
'url': 'http://www.tv4.se/kalla-fakta/klipp/kalla-fakta-5-english-subtitles-2491650',
'md5': '909d6454b87b10a25aa04c4bdd416a9b',
'info_dict': {
'id': '2491650',
'ext': 'mp4',
'title': 'Kalla Fakta 5 (english subtitles)',
'thumbnail': 're:^https?://.*\.jpg$',
'timestamp': int,
'upload_date': '20131125',
},
},
{
'url': 'http://www.tv4play.se/iframe/video/3054113',
'md5': '77f851c55139ffe0ebd41b6a5552489b',
'info_dict': {
'id': '3054113',
'ext': 'mp4',
'title': 'Så här jobbar ficktjuvarna - se avslöjande bilder',
'thumbnail': 're:^https?://.*\.jpg$',
'description': 'Unika bilder avslöjar hur turisternas fickor vittjas mitt på Stockholms central. Två experter på ficktjuvarna avslöjar knepen du ska se upp för.',
'timestamp': int,
'upload_date': '20150130',
},
},
{
'url': 'http://www.tv4play.se/sport/3060959',
'only_matching': True,
},
{
'url': 'http://www.tv4play.se/film/2378136',
'only_matching': True,
},
{
'url': 'http://www.tv4play.se/barn/looney-tunes?video_id=3062412',
'only_matching': True,
},
]
def _real_extract(self, url):
video_id = self._match_id(url)
info = self._download_json(
'http://www.tv4play.se/player/assets/%s.json' % video_id, video_id, 'Downloading video info JSON')
# If is_geo_restricted is true, it doesn't neceserally mean we can't download it
if info['is_geo_restricted']:
self.report_warning('This content might not be available in your country due to licensing restrictions.')
if info['requires_subscription']:
raise ExtractorError('This content requires subscription.', expected=True)
sources_data = self._download_json(
'https://prima.tv4play.se/api/web/asset/%s/play.json?protocol=http&videoFormat=MP4' % video_id, video_id, 'Downloading sources JSON')
sources = sources_data['playback']
formats = []
for item in sources.get('items', {}).get('item', []):
ext, bitrate = item['mediaFormat'], item['bitrate']
formats.append({
'format_id': '%s_%s' % (ext, bitrate),
'tbr': bitrate,
'ext': ext,
'url': item['url'],
})
self._sort_formats(formats)
return {
'id': video_id,
'title': info['title'],
'formats': formats,
'description': info.get('description'),
'timestamp': parse_iso8601(info.get('broadcast_date_time')),
'duration': info.get('duration'),
'thumbnail': info.get('image'),
'is_live': sources.get('live'),
}
Loading…
Cancel
Save