|
|
@ -6,7 +6,10 @@ import re |
|
|
|
from .common import InfoExtractor |
|
|
|
|
|
|
|
from ..compat import compat_str |
|
|
|
from ..utils import int_or_none |
|
|
|
from ..utils import ( |
|
|
|
int_or_none, |
|
|
|
try_get, |
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
class TEDIE(InfoExtractor): |
|
|
@ -113,8 +116,9 @@ class TEDIE(InfoExtractor): |
|
|
|
} |
|
|
|
|
|
|
|
def _extract_info(self, webpage): |
|
|
|
info_json = self._search_regex(r'q\("\w+.init",({.+})\)</script>', |
|
|
|
webpage, 'info json') |
|
|
|
info_json = self._search_regex( |
|
|
|
r'(?s)q\(\s*"\w+.init"\s*,\s*({.+})\)\s*</script>', |
|
|
|
webpage, 'info json') |
|
|
|
return json.loads(info_json) |
|
|
|
|
|
|
|
def _real_extract(self, url): |
|
|
@ -136,11 +140,16 @@ class TEDIE(InfoExtractor): |
|
|
|
webpage = self._download_webpage(url, name, |
|
|
|
'Downloading playlist webpage') |
|
|
|
info = self._extract_info(webpage) |
|
|
|
playlist_info = info['playlist'] |
|
|
|
|
|
|
|
playlist_info = try_get( |
|
|
|
info, lambda x: x['__INITIAL_DATA__']['playlist'], |
|
|
|
dict) or info['playlist'] |
|
|
|
|
|
|
|
playlist_entries = [ |
|
|
|
self.url_result('http://www.ted.com/talks/' + talk['slug'], self.ie_key()) |
|
|
|
for talk in info['talks'] |
|
|
|
for talk in try_get( |
|
|
|
info, lambda x: x['__INITIAL_DATA__']['talks'], |
|
|
|
dict) or info['talks'] |
|
|
|
] |
|
|
|
return self.playlist_result( |
|
|
|
playlist_entries, |
|
|
@ -149,9 +158,14 @@ class TEDIE(InfoExtractor): |
|
|
|
|
|
|
|
def _talk_info(self, url, video_name): |
|
|
|
webpage = self._download_webpage(url, video_name) |
|
|
|
self.report_extraction(video_name) |
|
|
|
|
|
|
|
talk_info = self._extract_info(webpage)['talks'][0] |
|
|
|
info = self._extract_info(webpage) |
|
|
|
|
|
|
|
talk_info = try_get( |
|
|
|
info, lambda x: x['__INITIAL_DATA__']['talks'][0], |
|
|
|
dict) or info['talks'][0] |
|
|
|
|
|
|
|
title = talk_info['title'].strip() |
|
|
|
|
|
|
|
external = talk_info.get('external') |
|
|
|
if external: |
|
|
@ -165,19 +179,27 @@ class TEDIE(InfoExtractor): |
|
|
|
'url': ext_url or external['uri'], |
|
|
|
} |
|
|
|
|
|
|
|
native_downloads = try_get( |
|
|
|
talk_info, lambda x: x['downloads']['nativeDownloads'], |
|
|
|
dict) or talk_info['nativeDownloads'] |
|
|
|
|
|
|
|
formats = [{ |
|
|
|
'url': format_url, |
|
|
|
'format_id': format_id, |
|
|
|
'format': format_id, |
|
|
|
} for (format_id, format_url) in talk_info['nativeDownloads'].items() if format_url is not None] |
|
|
|
} for (format_id, format_url) in native_downloads.items() if format_url is not None] |
|
|
|
if formats: |
|
|
|
for f in formats: |
|
|
|
finfo = self._NATIVE_FORMATS.get(f['format_id']) |
|
|
|
if finfo: |
|
|
|
f.update(finfo) |
|
|
|
|
|
|
|
player_talk = talk_info['player_talks'][0] |
|
|
|
|
|
|
|
resources_ = player_talk.get('resources') or talk_info.get('resources') |
|
|
|
|
|
|
|
http_url = None |
|
|
|
for format_id, resources in talk_info['resources'].items(): |
|
|
|
for format_id, resources in resources_.items(): |
|
|
|
if format_id == 'h264': |
|
|
|
for resource in resources: |
|
|
|
h264_url = resource.get('file') |
|
|
@ -237,14 +259,11 @@ class TEDIE(InfoExtractor): |
|
|
|
|
|
|
|
video_id = compat_str(talk_info['id']) |
|
|
|
|
|
|
|
thumbnail = talk_info['thumb'] |
|
|
|
if not thumbnail.startswith('http'): |
|
|
|
thumbnail = 'http://' + thumbnail |
|
|
|
return { |
|
|
|
'id': video_id, |
|
|
|
'title': talk_info['title'].strip(), |
|
|
|
'uploader': talk_info['speaker'], |
|
|
|
'thumbnail': thumbnail, |
|
|
|
'title': title, |
|
|
|
'uploader': player_talk.get('speaker') or talk_info.get('speaker'), |
|
|
|
'thumbnail': player_talk.get('thumb>an class="s1">') or talk_info.get('thumb'), |
|
|
|
'description': self._og_search_description(webpage), |
|
|
|
'subtitles': self._get_subtitles(video_id, talk_info), |
|
|
|
'formats': formats, |
|
|
|