Browse Source

Merge remote-tracking branch 'pulpe/tagesschau'

master
Philipp Hagemeister 10 years ago
parent
commit
ebdb64d605
2 changed files with 83 additions and 0 deletions
  1. 1
      youtube_dl/extractor/__init__.py
  2. 82
      youtube_dl/extractor/tagesschau.py

1
youtube_dl/extractor/__init__.py

@ -266,6 +266,7 @@ from .streamcz import StreamCZIE
from .swrmediathek import SWRMediathekIE
from .syfy import SyfyIE
from .sztvhu import SztvHuIE
from .tagesschau import TagesschauIE
from .teamcoco import TeamcocoIE
from .techtalks import TechTalksIE
from .ted import TEDIE

82
youtube_dl/extractor/tagesschau.py

@ -0,0 +1,82 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class TagesschauIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/video/video(?P<id>-?\d+)\.html'
_TESTS = [{
'url': 'http://www.tagesschau.de/multimedia/video/video1399128.html',
'md5': 'bcdeac2194fb296d599ce7929dfa4009',
'info_dict': {
'id': '1399128',
'ext': 'mp4',
'title': 'Harald Range, Generalbundesanwalt, zu den Ermittlungen',
'description': 'md5:69da3c61275b426426d711bde96463ab',
'thumbnail': 're:^http:.*\.jpg$',
},
}, {
'url': 'http://www.tagesschau.de/multimedia/video/video-196.html',
'md5': '8aaa8bf3ae1ca2652309718c03019128',
'info_dict': {
'id': '196',
'ext': 'mp4',
'title': 'Ukraine-Konflikt: Klitschko in Kiew als B\xfcrgermeister vereidigt',
'description': 'md5:f22e4af75821d174fa6c977349682691',
'thumbnail': 're:http://.*\.jpg',
},
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
if video_id.startswith('-'):
display_id = video_id.strip('-')
else:
display_id = video_id
webpage = self._download_webpage(url, display_id)
playerpage = self._download_webpage(
'http://www.tagesschau.de/multimedia/video/video%s~player_autoplay-true.html' % video_id, display_id, 'Downloading player page')
medias = re.findall(r'"(http://media.+?)", type:"video/(.+?)", quality:"(.+?)"', playerpage)
formats = []
for url, ext, res in medias:
if res == 's':
res = 'small'
quality = 0
elif res == 'm':
res = 'medium'
quality = 1
elif res == 'l':
res = 'large'
quality = 2
else:
quality = 0
formats.append({
'format_id': res+'_'+ext,
'url': url,
'quality': quality,
'ext': ext,
})
self._sort_formats(formats)
thumbnail = re.findall(r'"(/multimedia/.+?\.jpg)"', playerpage)[-1]
return {
'id': display_id,
'title': self._og_search_title(webpage).strip(),
'thumbnail': 'http://www.tagesschau.de'+thumbnail,
'formats': formats,
'description': self._og_search_description(webpage).strip(),
}
Loading…
Cancel
Save