|
|
@ -4,6 +4,7 @@ from __future__ import unicode_literals |
|
|
|
import re |
|
|
|
|
|
|
|
from .common import InfoExtractor |
|
|
|
from ..compat import compat_str |
|
|
|
from ..utils import ( |
|
|
|
ExtractorError, |
|
|
|
determine_ext, |
|
|
@ -375,6 +376,35 @@ class PBSIE(InfoExtractor): |
|
|
|
}, |
|
|
|
'expected_warnings': ['HTTP Error 403: Forbidden'], |
|
|
|
}, |
|
|
|
{ |
|
|
|
'url': 'https://www.pbs.org/wgbh/masterpiece/episodes/victoria-s2-e1/', |
|
|
|
'info_dict': { |
|
|
|
'id': '3007193718', |
|
|
|
'ext': 'mp4', |
|
|
|
'title': "Victoria - A Soldier's Daughter / The Green-Eyed Monster", |
|
|
|
'description': 'md5:37efbac85e0c09b009586523ec143652', |
|
|
|
'duration': 6292, |
|
|
|
'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$', |
|
|
|
}, |
|
|
|
'params': { |
|
|
|
'skip_download': True, |
|
|
|
}, |
|
|
|
'expected_warnings': ['HTTP Error 403: Forbidden'], |
|
|
|
}, |
|
|
|
{ |
|
|
|
'url': 'https://player.pbs.org/partnerplayer/tOz9tM5ljOXQqIIWke53UA==/', |
|
|
|
'info_dict': { |
|
|
|
'id': '3011407934', |
|
|
|
'ext': 'mp4', |
|
|
|
'title': 'Stories from the Stage - Road Trip', |
|
|
|
'duration': 1619, |
|
|
|
'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$', |
|
|
|
}, |
|
|
|
'params': { |
|
|
|
'skip_download': True, |
|
|
|
}, |
|
|
|
'expected_warnings': ['HTTP Error 403: Forbidden'], |
|
|
|
}, |
|
|
|
{ |
|
|
|
'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true', |
|
|
|
'only_matching': True, |
|
|
@ -438,6 +468,7 @@ class PBSIE(InfoExtractor): |
|
|
|
r'<input type="hidden" id="pbs_video_id_[0-9]+" value="([0-9]+)"/>', # jwplayer |
|
|
|
r"(?s)window\.PBS\.playerConfig\s*=\s*{.*?id\s*:\s*'([0-9]+)',", |
|
|
|
r'<div[^>]+\bdata-cove-id=["\'](\d+)"', # http://www.pbs.org/wgbh/roadshow/watch/episode/2105-indianapolis-hour-2/ |
|
|
|
r'<iframe[^>]+\bsrc=["\'](?:https?:)?//video\.pbs\.org/widget/partnerplayer/(\d+)', # https://www.pbs.org/wgbh/masterpiece/episodes/victoria-s2-e1/ |
|
|
|
] |
|
|
|
|
|
|
|
media_id = self._search_regex( |
|
|
@ -472,7 +503,8 @@ class PBSIE(InfoExtractor): |
|
|
|
if not url: |
|
|
|
url = self._og_search_url(webpage) |
|
|
|
|
|
|
|
mobj = re.match(self._VALID_URL, url) |
|
|
|
mobj = re.match( |
|
|
|
self._VALID_URL, self._proto_relative_url(url.strip())) |
|
|
|
|
|
|
|
player_id = mobj.group('player_id') |
|
|
|
if not display_id: |
|
|
@ -482,13 +514,27 @@ class PBSIE(InfoExtractor): |
|
|
|
url, display_id, note='Downloading player page', |
|
|
|
errnote='Could not download player page') |
|
|
|
video_id = self._search_regex( |
|
|
|
r'<div\s+id="video_([0-9]+)"', player_page, 'video ID') |
|
|
|
r'<div\s+id=["\']video_(\d+)', player_page, 'video ID', |
|
|
|
default=None) |
|
|
|
if not video_id: |
|
|
|
video_info = self._extract_video_data( |
|
|
|
player_page, 'video data', display_id) |
|
|
|
video_id = compat_str( |
|
|
|
video_info.get('id') or video_info['contentID']) |
|
|
|
else: |
|
|
|
video_id = mobj.group('id') |
|
|
|
display_id = video_id |
|
|
|
|
|
|
|
return video_id, display_id, None, description |
|
|
|
|
|
|
|
def _extract_video_data(self, string, name, video_id, fatal=True): |
|
|
|
return self._parse_json( |
|
|
|
self._search_regex( |
|
|
|
[r'(?s)PBS\.videoData\s*=\s*({.+?});\n', |
|
|
|
r'window\.videoBridge\s*=\s*({.+?});'], |
|
|
|
string, name, default='{}'), |
|
|
|
video_id, transform_source=js_to_json, fatal=fatal) |
|
|
|
|
|
|
|
def _real_extract(self, url): |
|
|
|
video_id, display_id, upload_date, description = self._extract_webpage(url) |
|
|
|
|
|
|
@ -519,11 +565,8 @@ class PBSIE(InfoExtractor): |
|
|
|
'http://player.pbs.org/%s/%s' % (page, video_id), |
|
|
|
display_id, 'Downloading %s page' % page, fatal=False) |
|
|
|
if player: |
|
|
|
video_info = self._parse_json( |
|
|
|
self._search_regex( |
|
|
|
[r'(?s)PBS\.videoData\s*=\s*({.+?});\n', r'window\.videoBridge\s*=\s*({.+?});'], |
|
|
|
player, '%s video data' % page, default='{}'), |
|
|
|
display_id, transform_source=js_to_json, fatal=False) |
|
|
|
video_info = self._extract_video_data( |
|
|
|
player, '%s video data' % page, display_id, fatal=False) |
|
|
|
if video_info: |
|
|
|
extract_redirect_urls(video_info) |
|
|
|
if not info: |
|
|
|