|
@ -716,6 +716,8 @@ class BBCIE(BBCCoUkIE): |
|
|
r'itemprop="datePublished"[^>]+datetime="([^"]+)"'], |
|
|
r'itemprop="datePublished"[^>]+datetime="([^"]+)"'], |
|
|
webpage, 'date', default=None)) |
|
|
webpage, 'date', default=None)) |
|
|
|
|
|
|
|
|
|
|
|
entries = [] |
|
|
|
|
|
|
|
|
# article with multiple videos embedded with playlist.sxml (e.g. |
|
|
# article with multiple videos embedded with playlist.sxml (e.g. |
|
|
# http://www.bbc.com/sport/0/football/34475836) |
|
|
# http://www.bbc.com/sport/0/football/34475836) |
|
|
playlists = re.findall(r'<param[^>]+name="playlist"[^>]+value="([^"]+)"', webpage) |
|
|
playlists = re.findall(r'<param[^>]+name="playlist"[^>]+value="([^"]+)"', webpage) |
|
@ -723,44 +725,57 @@ class BBCIE(BBCCoUkIE): |
|
|
entries = [ |
|
|
entries = [ |
|
|
self._extract_from_playlist_sxml(playlist_url, playlist_id, timestamp) |
|
|
self._extract_from_playlist_sxml(playlist_url, playlist_id, timestamp) |
|
|
for playlist_url in playlists] |
|
|
for playlist_url in playlists] |
|
|
playlist_title = self._og_search_title(webpage) |
|
|
|
|
|
playlist_description = self._og_search_description(webpage, default=None) |
|
|
|
|
|
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) |
|
|
|
|
|
|
|
|
|
|
|
# single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret) |
|
|
# news article with multiple videos embedded with data-playable |
|
|
programme_id = self._search_regex( |
|
|
data_playables = re.findall(r'data-playable=(["\'])({.+?})\1', webpage) |
|
|
[r'data-video-player-vpid="([\da-z]{8})"', |
|
|
if data_playables: |
|
|
r'<param[^>]+name="externalIdentifier"[^>]+value="([\da-z]{8})"'], |
|
|
for _, data_playable_json in data_playables: |
|
|
webpage, 'vpid', default=None) |
|
|
data_playable = self._parse_json( |
|
|
|
|
|
unescapeHTML(data_playable_json), playlist_id, fatal=False) |
|
|
duration = None |
|
|
if not data_playable: |
|
|
if not programme_id: |
|
|
continue |
|
|
# single video in news article embedded with data-playable (e.g. |
|
|
|
|
|
# http://www.bbc.com/news/world-us-canada-34473351) |
|
|
|
|
|
data_playable = self._parse_json( |
|
|
|
|
|
unescapeHTML(self._search_regex( |
|
|
|
|
|
r'data-playable=(["\'])(?P<json>{.+?})\1', webpage, |
|
|
|
|
|
'data playable', default='{}', group='json')), |
|
|
|
|
|
programme_id, fatal=False) |
|
|
|
|
|
if data_playable: |
|
|
|
|
|
# data-playable has video vpid in settings.playlistObject.items (e.g. |
|
|
|
|
|
# http://www.bbc.com/news/world-us-canada-34473351) |
|
|
|
|
|
settings = data_playable.get('settings', {}) |
|
|
settings = data_playable.get('settings', {}) |
|
|
if settings: |
|
|
if settings: |
|
|
|
|
|
# data-playable with video vpid in settings.playlistObject.items (e.g. |
|
|
|
|
|
# http://www.bbc.com/news/world-us-canada-34473351) |
|
|
playlist_object = settings.get('playlistObject', {}) |
|
|
playlist_object = settings.get('playlistObject', {}) |
|
|
if playlist_object: |
|
|
if playlist_object: |
|
|
items = playlist_object.get('items') |
|
|
items = playlist_object.get('items') |
|
|
if items and isinstance(items, list): |
|
|
if items and isinstance(items, list): |
|
|
|
|
|
title = playlist_object['title'] |
|
|
|
|
|
description = playlist_object.get('summary') |
|
|
duration = int_or_none(items[0].get('duration')) |
|
|
duration = int_or_none(items[0].get('duration')) |
|
|
programme_id = items[0].get('vpid') |
|
|
programme_id = items[0].get('vpid') |
|
|
if not programme_id: |
|
|
formats, subtitles = self._download_media_selector(programme_id) |
|
|
# data-playable has no vpid but has a playlist.sxml URLs |
|
|
self._sort_formats(formats) |
|
|
# in otherSettings.playlist (e.g. |
|
|
entries.append({ |
|
|
# http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani) |
|
|
'id': programme_id, |
|
|
playlist = data_playable.get('otherSettings', {}).get('playlist', {}) |
|
|
'title': title, |
|
|
if playlist: |
|
|
'description': description, |
|
|
return self._extract_from_playlist_sxml( |
|
|
'timestamp': timestamp, |
|
|
playlist.get('progressiveDownloadUrl'), playlist_id, timestamp) |
|
|
'duration': duration, |
|
|
|
|
|
'formats': formats, |
|
|
|
|
|
'subtitles': subtitles, |
|
|
|
|
|
}) |
|
|
|
|
|
else: |
|
|
|
|
|
# data-playable without vpid but with a playlist.sxml URLs |
|
|
|
|
|
# in otherSettings.playlist (e.g. |
|
|
|
|
|
# http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani) |
|
|
|
|
|
playlist = data_playable.get('otherSettings', {}).get('playlist', {}) |
|
|
|
|
|
if playlist: |
|
|
|
|
|
entries.append(self._extract_from_playlist_sxml( |
|
|
|
|
|
playlist.get('progressiveDownloadUrl'), playlist_id, timestamp)) |
|
|
|
|
|
|
|
|
|
|
|
if entries: |
|
|
|
|
|
playlist_title = self._og_search_title(webpage) |
|
|
|
|
|
playlist_description = self._og_search_description(webpage, default=None) |
|
|
|
|
|
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) |
|
|
|
|
|
|
|
|
|
|
|
# single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret) |
|
|
|
|
|
programme_id = self._search_regex( |
|
|
|
|
|
[r'data-video-player-vpid="([\da-z]{8})"', |
|
|
|
|
|
r'<param[^>]+name="externalIdentifier"[^>]+value="([\da-z]{8})"'], |
|
|
|
|
|
webpage, 'vpid', default=None) |
|
|
|
|
|
|
|
|
if programme_id: |
|
|
if programme_id: |
|
|
formats, subtitles = self._download_media_selector(programme_id) |
|
|
formats, subtitles = self._download_media_selector(programme_id) |
|
|
xxxxxxxxxx