|
|
@ -0,0 +1,65 @@ |
|
|
|
import re |
|
|
|
import xml.etree.ElementTree |
|
|
|
import json |
|
|
|
|
|
|
|
from .common import InfoExtractor |
|
|
|
from ..utils import ( |
|
|
|
compat_urlparse, |
|
|
|
ExtractorError, |
|
|
|
find_xpath_attr, |
|
|
|
) |
|
|
|
|
|
|
|
class ORFIE(InfoExtractor): |
|
|
|
_VALID_URL = r'https?://tvthek.orf.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)' |
|
|
|
|
|
|
|
_TEST = { |
|
|
|
u'url': u'http://tvthek.orf.at/programs/1171769-Wetter-ZIB/episodes/6557323-Wetter', |
|
|
|
u'file': u'6566957.flv', |
|
|
|
u'info_dict': { |
|
|
|
u'title': u'Wetter', |
|
|
|
u'description': u'Christa Kummer, Marcus Wadsak und Kollegen präsentieren abwechselnd ihre täglichen Wetterprognosen für Österreich.\r \r Mehr Wetter unter wetter.ORF.at', |
|
|
|
}, |
|
|
|
u'params': { |
|
|
|
# It uses rtmp |
|
|
|
u'skip_download': True, |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
def _real_extract(self, url): |
|
|
|
mobj = re.match(self._VALID_URL, url) |
|
|
|
playlist_id = mobj.group('id') |
|
|
|
webpage = self._download_webpage(url, playlist_id) |
|
|
|
|
|
|
|
flash_xml = self._search_regex('ORF.flashXML = \'(.+?)\'', webpage, u'flash xml') |
|
|
|
flash_xml = compat_urlparse.parse_qs('xml='+flash_xml)['xml'][0] |
|
|
|
flash_config = xml.etree.ElementTree.fromstring(flash_xml.encode('utf-8')) |
|
|
|
playlist_json = self._search_regex(r'playlist\': \'(\[.*?\])\'', webpage, u'playlist').replace(r'\"','"') |
|
|
|
playlist = json.loads(playlist_json) |
|
|
|
|
|
|
|
videos = [] |
|
|
|
ns = '{http://tempuri.org/XMLSchema.xsd}' |
|
|
|
xpath = '%(ns)sPlaylist/%(ns)sItems/%(ns)sItem' % {'ns': ns} |
|
|
|
webpage_description = self._og_search_description(webpage) |
|
|
|
for (i, (item, info)) in enumerate(zip(flash_config.findall(xpath), playlist), 1): |
|
|
|
# Get best quality url |
|
|
|
rtmp_url = None |
|
|
|
for q in ['Q6A', 'Q4A', 'Q1A']: |
|
|
|
video_url = find_xpath_attr(item, '%sVideoUrl' % ns, 'quality', q) |
|
|
|
if video_url is not None: |
|
|
|
rtmp_url = video_url.text |
|
|
|
break |
|
|
|
if rtmp_url is None: |
|
|
|
raise ExtractorError(u'Couldn\'t get video url: %s' % info['id']) |
|
|
|
description = self._html_search_regex( |
|
|
|
r'id="playlist_entry_%s".*?<p>(.*?)</p>' % i, webpage, |
|
|
|
u'description', default=webpage_description, flags=re.DOTALL) |
|
|
|
videos.append({ |
|
|
|
'_type': 'video', |
|
|
|
'id': info['id'], |
|
|
|
'title': info['title'], |
|
|
|
'url': rtmp_url, |
|
|
|
'ext': 'flv', |
|
|
|
'description': description, |
|
|
|
}) |
|
|
|
|
|
|
|
return videos |