|
@ -212,12 +212,15 @@ class UdemyIE(InfoExtractor): |
|
|
thumbnail = asset.get('thumbnail_url') or asset.get('thumbnailUrl') |
|
|
thumbnail = asset.get('thumbnail_url') or asset.get('thumbnailUrl') |
|
|
duration = float_or_none(asset.get('data', {}).get('duration')) |
|
|
duration = float_or_none(asset.get('data', {}).get('duration')) |
|
|
|
|
|
|
|
|
|
|
|
subtitles = {} |
|
|
|
|
|
automatic_captions = {} |
|
|
|
|
|
|
|
|
formats = [] |
|
|
formats = [] |
|
|
|
|
|
|
|
|
def extract_output_format(src): |
|
|
|
|
|
|
|
|
def extract_output_format(src, f_id): |
|
|
return { |
|
|
return { |
|
|
'url': src['url'], |
|
|
'url': src['url'], |
|
|
'format_id': '%sp' % (src.get('height') or format_id), |
|
|
|
|
|
|
|
|
'format_id': '%sp' % (src.get('height') or f_id), |
|
|
'width': int_or_none(src.get('width')), |
|
|
'width': int_or_none(src.get('width')), |
|
|
'height': int_or_none(src.get('height')), |
|
|
'height': int_or_none(src.get('height')), |
|
|
'vbr': int_or_none(src.get('video_bitrate_in_kbps')), |
|
|
'vbr': int_or_none(src.get('video_bitrate_in_kbps')), |
|
@ -237,30 +240,33 @@ class UdemyIE(InfoExtractor): |
|
|
def add_output_format_meta(f, key): |
|
|
def add_output_format_meta(f, key): |
|
|
output = outputs.get(key) |
|
|
output = outputs.get(key) |
|
|
if isinstance(output, dict): |
|
|
if isinstance(output, dict): |
|
|
output_format = extract_output_format(output) |
|
|
|
|
|
|
|
|
output_format = extract_output_format(output, key) |
|
|
output_format.update(f) |
|
|
output_format.update(f) |
|
|
return output_format |
|
|
return output_format |
|
|
return f |
|
|
return f |
|
|
|
|
|
|
|
|
|
|
|
def extract_formats(source_list): |
|
|
|
|
|
if not isinstance(source_list, list): |
|
|
|
|
|
return |
|
|
|
|
|
for source in source_list: |
|
|
|
|
|
video_url = source.get('file') or source.get('src') |
|
|
|
|
|
if not video_url or not isinstance(video_url, compat_str): |
|
|
|
|
|
continue |
|
|
|
|
|
format_id = source.get('label') |
|
|
|
|
|
f = { |
|
|
|
|
|
'url': video_url, |
|
|
|
|
|
'format_id': '%sp' % format_id, |
|
|
|
|
|
'height': int_or_none(format_id), |
|
|
|
|
|
} |
|
|
|
|
|
if format_id: |
|
|
|
|
|
# Some videos contain additional metadata (e.g. |
|
|
|
|
|
# https://www.udemy.com/ios9-swift/learn/#/lecture/3383208) |
|
|
|
|
|
f = add_output_format_meta(f, format_id) |
|
|
|
|
|
formats.append(f) |
|
|
|
|
|
|
|
|
download_urls = asset.get('download_urls') |
|
|
download_urls = asset.get('download_urls') |
|
|
if isinstance(download_urls, dict): |
|
|
if isinstance(download_urls, dict): |
|
|
video = download_urls.get('Video') |
|
|
|
|
|
if isinstance(video, list): |
|
|
|
|
|
for format_ in video: |
|
|
|
|
|
video_url = format_.get('file') |
|
|
|
|
|
if not video_url: |
|
|
|
|
|
continue |
|
|
|
|
|
format_id = format_.get('label') |
|
|
|
|
|
f = { |
|
|
|
|
|
'url': format_['file'], |
|
|
|
|
|
'format_id': '%sp' % format_id, |
|
|
|
|
|
'height': int_or_none(format_id), |
|
|
|
|
|
} |
|
|
|
|
|
if format_id: |
|
|
|
|
|
# Some videos contain additional metadata (e.g. |
|
|
|
|
|
# https://www.udemy.com/ios9-swift/learn/#/lecture/3383208) |
|
|
|
|
|
f = add_output_format_meta(f, format_id) |
|
|
|
|
|
formats.append(f) |
|
|
|
|
|
|
|
|
extract_formats(download_urls.get('Video')) |
|
|
|
|
|
|
|
|
view_html = lecture.get('view_html') |
|
|
view_html = lecture.get('view_html') |
|
|
if view_html: |
|
|
if view_html: |
|
@ -294,6 +300,35 @@ class UdemyIE(InfoExtractor): |
|
|
'height': height, |
|
|
'height': height, |
|
|
}, res)) |
|
|
}, res)) |
|
|
|
|
|
|
|
|
|
|
|
# react rendition since 2017.04.15 (see |
|
|
|
|
|
# https://github.com/rg3/youtube-dl/issues/12744) |
|
|
|
|
|
data = self._parse_json( |
|
|
|
|
|
self._search_regex( |
|
|
|
|
|
r'videojs-setup-data=(["\'])(?P<data>{.+?})\1', view_html, |
|
|
|
|
|
'setup data', default='{}', group='data'), video_id, |
|
|
|
|
|
transform_source=unescapeHTML, fatal=False) |
|
|
|
|
|
if data and isinstance(data, dict): |
|
|
|
|
|
extract_formats(data.get('sources')) |
|
|
|
|
|
if not duration: |
|
|
|
|
|
duration = int_or_none(data.get('duration')) |
|
|
|
|
|
tracks = data.get('tracks') |
|
|
|
|
|
if isinstance(tracks, list): |
|
|
|
|
|
for track in tracks: |
|
|
|
|
|
if not isinstance(track, dict): |
|
|
|
|
|
continue |
|
|
|
|
|
if track.get('kind') != 'captions': |
|
|
|
|
|
continue |
|
|
|
|
|
src = track.get('src') |
|
|
|
|
|
if not src or not isinstance(src, compat_str): |
|
|
|
|
|
continue |
|
|
|
|
|
lang = track.get('language') or track.get( |
|
|
|
|
|
'srclang') or track.get('label') |
|
|
|
|
|
sub_dict = automatic_captions if track.get( |
|
|
|
|
|
'autogenerated') is True else subtitles |
|
|
|
|
|
sub_dict.setdefault(lang, []).append({ |
|
|
|
|
|
'url': src, |
|
|
|
|
|
}) |
|
|
|
|
|
|
|
|
self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id')) |
|
|
self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id')) |
|
|
|
|
|
|
|
|
return { |
|
|
return { |
|
@ -302,7 +337,9 @@ class UdemyIE(InfoExtractor): |
|
|
'description': description, |
|
|
'description': description, |
|
|
'thumbnail': thumbnail, |
|
|
'thumbnail': thumbnail, |
|
|
'duration': duration, |
|
|
'duration': duration, |
|
|
'formats': formats |
|
|
|
|
|
|
|
|
'formats': formats, |
|
|
|
|
|
'subtitles': subtitles, |
|
|
|
|
|
'automatic_captions': automatic_captions, |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|