|
@ -10,6 +10,7 @@ from ..utils import ( |
|
|
int_or_none, |
|
|
int_or_none, |
|
|
js_to_json, |
|
|
js_to_json, |
|
|
strip_jsonp, |
|
|
strip_jsonp, |
|
|
|
|
|
strip_or_none, |
|
|
unified_strdate, |
|
|
unified_strdate, |
|
|
US_RATINGS, |
|
|
US_RATINGS, |
|
|
) |
|
|
) |
|
@ -200,7 +201,7 @@ class PBSIE(InfoExtractor): |
|
|
'id': '2365006249', |
|
|
'id': '2365006249', |
|
|
'ext': 'mp4', |
|
|
'ext': 'mp4', |
|
|
'title': 'Constitution USA with Peter Sagal - A More Perfect Union', |
|
|
'title': 'Constitution USA with Peter Sagal - A More Perfect Union', |
|
|
'description': 'md5:36f341ae62e251b8f5bd2b754b95a071', |
|
|
|
|
|
|
|
|
'description': 'md5:31b664af3c65fd07fa460d306b837d00', |
|
|
'duration': 3190, |
|
|
'duration': 3190, |
|
|
}, |
|
|
}, |
|
|
}, |
|
|
}, |
|
@ -211,7 +212,7 @@ class PBSIE(InfoExtractor): |
|
|
'id': '2365297690', |
|
|
'id': '2365297690', |
|
|
'ext': 'mp4', |
|
|
'ext': 'mp4', |
|
|
'title': 'FRONTLINE - Losing Iraq', |
|
|
'title': 'FRONTLINE - Losing Iraq', |
|
|
'description': 'md5:4d3eaa01f94e61b3e73704735f1196d9', |
|
|
|
|
|
|
|
|
'description': 'md5:5979a4d069b157f622d02bff62fbe654', |
|
|
'duration': 5050, |
|
|
'duration': 5050, |
|
|
}, |
|
|
}, |
|
|
}, |
|
|
}, |
|
@ -222,7 +223,7 @@ class PBSIE(InfoExtractor): |
|
|
'id': '2201174722', |
|
|
'id': '2201174722', |
|
|
'ext': 'mp4', |
|
|
'ext': 'mp4', |
|
|
'title': 'PBS NewsHour - Cyber Schools Gain Popularity, but Quality Questions Persist', |
|
|
'title': 'PBS NewsHour - Cyber Schools Gain Popularity, but Quality Questions Persist', |
|
|
'description': 'md5:95a19f568689d09a166dff9edada3301', |
|
|
|
|
|
|
|
|
'description': 'md5:86ab9a3d04458b876147b355788b8781', |
|
|
'duration': 801, |
|
|
'duration': 801, |
|
|
}, |
|
|
}, |
|
|
}, |
|
|
}, |
|
@ -267,7 +268,7 @@ class PBSIE(InfoExtractor): |
|
|
'display_id': 'player', |
|
|
'display_id': 'player', |
|
|
'ext': 'mp4', |
|
|
'ext': 'mp4', |
|
|
'title': 'American Experience - Death and the Civil War, Chapter 1', |
|
|
'title': 'American Experience - Death and the Civil War, Chapter 1', |
|
|
'description': 'md5:1b80a74e0380ed2a4fb335026de1600d', |
|
|
|
|
|
|
|
|
'description': 'md5:67fa89a9402e2ee7d08f53b920674c18', |
|
|
'duration': 682, |
|
|
'duration': 682, |
|
|
'thumbnail': 're:^https?://.*\.jpg$', |
|
|
'thumbnail': 're:^https?://.*\.jpg$', |
|
|
}, |
|
|
}, |
|
@ -293,13 +294,13 @@ class PBSIE(InfoExtractor): |
|
|
# "<iframe style='position: absolute;<br />\ntop: 0; left: 0;' ...", see |
|
|
# "<iframe style='position: absolute;<br />\ntop: 0; left: 0;' ...", see |
|
|
# https://github.com/rg3/youtube-dl/issues/7059) |
|
|
# https://github.com/rg3/youtube-dl/issues/7059) |
|
|
'url': 'http://www.pbs.org/food/features/a-chefs-life-season-3-episode-5-prickly-business/', |
|
|
'url': 'http://www.pbs.org/food/features/a-chefs-life-season-3-episode-5-prickly-business/', |
|
|
'md5': '84ced42850d78f1d4650297356e95e6f', |
|
|
|
|
|
|
|
|
'md5': '59b0ef5009f9ac8a319cc5efebcd865e', |
|
|
'info_dict': { |
|
|
'info_dict': { |
|
|
'id': '2365546844', |
|
|
'id': '2365546844', |
|
|
'display_id': 'a-chefs-life-season-3-episode-5-prickly-business', |
|
|
'display_id': 'a-chefs-life-season-3-episode-5-prickly-business', |
|
|
'ext': 'mp4', |
|
|
'ext': 'mp4', |
|
|
'title': "A Chef's Life - Season 3, Ep. 5: Prickly Business", |
|
|
'title': "A Chef's Life - Season 3, Ep. 5: Prickly Business", |
|
|
'description': 'md5:54033c6baa1f9623607c6e2ed245888b', |
|
|
|
|
|
|
|
|
'description': 'md5:c0ff7475a4b70261c7e58f493c2792a5', |
|
|
'duration': 1480, |
|
|
'duration': 1480, |
|
|
'thumbnail': 're:^https?://.*\.jpg$', |
|
|
'thumbnail': 're:^https?://.*\.jpg$', |
|
|
}, |
|
|
}, |
|
@ -312,7 +313,7 @@ class PBSIE(InfoExtractor): |
|
|
'display_id': 'the-atomic-artists', |
|
|
'display_id': 'the-atomic-artists', |
|
|
'ext': 'mp4', |
|
|
'ext': 'mp4', |
|
|
'title': 'FRONTLINE - The Atomic Artists', |
|
|
'title': 'FRONTLINE - The Atomic Artists', |
|
|
'description': 'md5:1a2481e86b32b2e12ec1905dd473e2c1', |
|
|
|
|
|
|
|
|
'description': 'md5:f677e4520cfacb4a5ce1471e31b57800', |
|
|
'duration': 723, |
|
|
'duration': 723, |
|
|
'thumbnail': 're:^https?://.*\.jpg$', |
|
|
'thumbnail': 're:^https?://.*\.jpg$', |
|
|
}, |
|
|
}, |
|
@ -323,7 +324,7 @@ class PBSIE(InfoExtractor): |
|
|
{ |
|
|
{ |
|
|
# Serves hd only via wigget/partnerplayer page |
|
|
# Serves hd only via wigget/partnerplayer page |
|
|
'url': 'http://www.pbs.org/video/2365641075/', |
|
|
'url': 'http://www.pbs.org/video/2365641075/', |
|
|
'md5': 'acfd4c400b48149a44861cb16dd305cf', |
|
|
|
|
|
|
|
|
'md5': 'fdf907851eab57211dd589cf12006666', |
|
|
'info_dict': { |
|
|
'info_dict': { |
|
|
'id': '2365641075', |
|
|
'id': '2365641075', |
|
|
'ext': 'mp4', |
|
|
'ext': 'mp4', |
|
@ -333,16 +334,6 @@ class PBSIE(InfoExtractor): |
|
|
'formats': 'mincount:8', |
|
|
'formats': 'mincount:8', |
|
|
}, |
|
|
}, |
|
|
}, |
|
|
}, |
|
|
{ |
|
|
|
|
|
# has undocumented http formats(4500k and 6500k) |
|
|
|
|
|
'url': 'http://www.pbs.org/video/2365815229/', |
|
|
|
|
|
'md5': '94635cd06b7133688e23f4b94e6637a5', |
|
|
|
|
|
'info_dict': { |
|
|
|
|
|
'id': '2365815229', |
|
|
|
|
|
'ext': 'mp4', |
|
|
|
|
|
'title': 'FRONTLINE - Mosquito Hunter', |
|
|
|
|
|
}, |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
{ |
|
|
'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true', |
|
|
'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true', |
|
|
'only_matching': True, |
|
|
'only_matching': True, |
|
@ -362,11 +353,16 @@ class PBSIE(InfoExtractor): |
|
|
def _extract_webpage(self, url): |
|
|
def _extract_webpage(self, url): |
|
|
mobj = re.match(self._VALID_URL, url) |
|
|
mobj = re.match(self._VALID_URL, url) |
|
|
|
|
|
|
|
|
|
|
|
description = None |
|
|
|
|
|
|
|
|
presumptive_id = mobj.group('presumptive_id') |
|
|
presumptive_id = mobj.group('presumptive_id') |
|
|
display_id = presumptive_id |
|
|
display_id = presumptive_id |
|
|
if presumptive_id: |
|
|
if presumptive_id: |
|
|
webpage = self._download_webpage(url, display_id) |
|
|
webpage = self._download_webpage(url, display_id) |
|
|
|
|
|
|
|
|
|
|
|
description = strip_or_none(self._og_search_description( |
|
|
|
|
|
webpage, default=None) or self._html_search_meta( |
|
|
|
|
|
'description', webpage, default=None)) |
|
|
upload_date = unified_strdate(self._search_regex( |
|
|
upload_date = unified_strdate(self._search_regex( |
|
|
r'<input type="hidden" id="air_date_[0-9]+" value="([^"]+)"', |
|
|
r'<input type="hidden" id="air_date_[0-9]+" value="([^"]+)"', |
|
|
webpage, 'upload date', default=None)) |
|
|
webpage, 'upload date', default=None)) |
|
@ -379,7 +375,7 @@ class PBSIE(InfoExtractor): |
|
|
for p in MULTI_PART_REGEXES: |
|
|
for p in MULTI_PART_REGEXES: |
|
|
tabbed_videos = re.findall(p, webpage) |
|
|
tabbed_videos = re.findall(p, webpage) |
|
|
if tabbed_videos: |
|
|
if tabbed_videos: |
|
|
return tabbed_videos, presumptive_id, upload_date |
|
|
|
|
|
|
|
|
return tabbed_videos, presumptive_id, upload_date, description |
|
|
|
|
|
|
|
|
MEDIA_ID_REGEXES = [ |
|
|
MEDIA_ID_REGEXES = [ |
|
|
r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'", # frontline video embed |
|
|
r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'", # frontline video embed |
|
@ -391,7 +387,7 @@ class PBSIE(InfoExtractor): |
|
|
media_id = self._search_regex( |
|
|
media_id = self._search_regex( |
|
|
MEDIA_ID_REGEXES, webpage, 'media ID', fatal=False, default=None) |
|
|
MEDIA_ID_REGEXES, webpage, 'media ID', fatal=False, default=None) |
|
|
if media_id: |
|
|
if media_id: |
|
|
return media_id, presumptive_id, upload_date |
|
|
|
|
|
|
|
|
return media_id, presumptive_id, upload_date, description |
|
|
|
|
|
|
|
|
# Fronline video embedded via flp |
|
|
# Fronline video embedded via flp |
|
|
video_id = self._search_regex( |
|
|
video_id = self._search_regex( |
|
@ -408,7 +404,7 @@ class PBSIE(InfoExtractor): |
|
|
'http://www.pbs.org/wgbh/pages/frontline/.json/getdir/getdir%d.json' % prg_id, |
|
|
'http://www.pbs.org/wgbh/pages/frontline/.json/getdir/getdir%d.json' % prg_id, |
|
|
presumptive_id, 'Downloading getdir JSON', |
|
|
presumptive_id, 'Downloading getdir JSON', |
|
|
transform_source=strip_jsonp) |
|
|
transform_source=strip_jsonp) |
|
|
return getdir['mid'], presumptive_id, upload_date |
|
|
|
|
|
|
|
|
return getdir['mid'], presumptive_id, upload_date, description |
|
|
|
|
|
|
|
|
for iframe in re.findall(r'(?s)<iframe(.+?)></iframe>', webpage): |
|
|
for iframe in re.findall(r'(?s)<iframe(.+?)></iframe>', webpage): |
|
|
url = self._search_regex( |
|
|
url = self._search_regex( |
|
@ -432,10 +428,10 @@ class PBSIE(InfoExtractor): |
|
|
video_id = mobj.group('id') |
|
|
video_id = mobj.group('id') |
|
|
display_id = video_id |
|
|
display_id = video_id |
|
|
|
|
|
|
|
|
return video_id, display_id, None |
|
|
|
|
|
|
|
|
return video_id, display_id, None, description |
|
|
|
|
|
|
|
|
def _real_extract(self, url): |
|
|
def _real_extract(self, url): |
|
|
video_id, display_id, upload_date = self._extract_webpage(url) |
|
|
|
|
|
|
|
|
video_id, display_id, upload_date, description = self._extract_webpage(url) |
|
|
|
|
|
|
|
|
if isinstance(video_id, list): |
|
|
if isinstance(video_id, list): |
|
|
entries = [self.url_result( |
|
|
entries = [self.url_result( |
|
@ -564,11 +560,14 @@ class PBSIE(InfoExtractor): |
|
|
if alt_title: |
|
|
if alt_title: |
|
|
info['title'] = alt_title + ' - ' + re.sub(r'^' + alt_title + '[\s\-:]+', '', info['title']) |
|
|
info['title'] = alt_title + ' - ' + re.sub(r'^' + alt_title + '[\s\-:]+', '', info['title']) |
|
|
|
|
|
|
|
|
|
|
|
description = info.get('description') or info.get( |
|
|
|
|
|
'program', {}).get('description') or description |
|
|
|
|
|
|
|
|
return { |
|
|
return { |
|
|
'id': video_id, |
|
|
'id': video_id, |
|
|
'display_id': display_id, |
|
|
'display_id': display_id, |
|
|
'title': info['title'], |
|
|
'title': info['title'], |
|
|
'description': info.get('description') or info.get('program', {}).get('description'), |
|
|
|
|
|
|
|
|
'description': description, |
|
|
'thumbnail': info.get('image_url'), |
|
|
'thumbnail': info.get('image_url'), |
|
|
'duration': int_or_none(info.get('duration')), |
|
|
'duration': int_or_none(info.get('duration')), |
|
|
'age_limit': age_limit, |
|
|
'age_limit': age_limit, |
|
|