Browse Source
[kinja] add support for Kinja embeds
[kinja] add support for Kinja embeds
closes #5756 closes #11282 closes #22237 closes #22384master
4 changed files with 241 additions and 52 deletions
Unified View
Diff Options
-
1youtube_dl/extractor/extractors.py
-
17youtube_dl/extractor/generic.py
-
221youtube_dl/extractor/kinja.py
-
54youtube_dl/extractor/onionstudios.py
@ -0,0 +1,221 @@ |
|||||
|
# coding: utf-8 |
||||
|
from __future__ import unicode_literals |
||||
|
|
||||
|
import re |
||||
|
|
||||
|
from .common import InfoExtractor |
||||
|
from ..compat import ( |
||||
|
compat_str, |
||||
|
compat_urllib_parse_unquote, |
||||
|
) |
||||
|
from ..utils import ( |
||||
|
int_or_none, |
||||
|
parse_iso8601, |
||||
|
strip_or_none, |
||||
|
try_get, |
||||
|
unescapeHTML, |
||||
|
urljoin, |
||||
|
) |
||||
|
|
||||
|
|
||||
|
class KinjaEmbedIE(InfoExtractor): |
||||
|
IENAME = 'kinja:embed' |
||||
|
_DOMAIN_REGEX = r'''(?:[^.]+\.)? |
||||
|
(?: |
||||
|
avclub| |
||||
|
clickhole| |
||||
|
deadspin| |
||||
|
gizmodo| |
||||
|
jalopnik| |
||||
|
jezebel| |
||||
|
kinja| |
||||
|
kotaku| |
||||
|
lifehacker| |
||||
|
splinternews| |
||||
|
the(?:inventory|onion|root|takeout) |
||||
|
)\.com''' |
||||
|
_COMMON_REGEX = r'''/ |
||||
|
(?: |
||||
|
ajax/inset| |
||||
|
embed/video |
||||
|
)/iframe\?.*?\bid=''' |
||||
|
_VALID_URL = r'''(?x)https?://%s%s |
||||
|
(?P<type> |
||||
|
fb| |
||||
|
imgur| |
||||
|
instagram| |
||||
|
jwp(?:layer)?-video| |
||||
|
kinjavideo| |
||||
|
mcp| |
||||
|
megaphone| |
||||
|
ooyala| |
||||
|
soundcloud(?:-playlist)?| |
||||
|
tumblr-post| |
||||
|
twitch-stream| |
||||
|
twitter| |
||||
|
ustream-channel| |
||||
|
vimeo| |
||||
|
vine| |
||||
|
youtube-(?:list|video) |
||||
|
)-(?P<id>[^&]+)''' % (_DOMAIN_REGEX, _COMMON_REGEX) |
||||
|
_TESTS = [{ |
||||
|
'url': 'https://kinja.com/ajax/inset/iframe?id=fb-10103303356633621', |
||||
|
'only_matching': True, |
||||
|
}, { |
||||
|
'url': 'https://kinja.com/ajax/inset/iframe?id=kinjavideo-100313', |
||||
|
'only_matching': True, |
||||
|
}, { |
||||
|
'url': 'https://kinja.com/ajax/inset/iframe?id=megaphone-PPY1300931075', |
||||
|
'only_matching': True, |
||||
|
}, { |
||||
|
'url': 'https://kinja.com/ajax/inset/iframe?id=ooyala-xzMXhleDpopuT0u1ijt_qZj3Va-34pEX%2FZTIxYmJjZDM2NWYzZDViZGRiOWJjYzc5', |
||||
|
'only_matching': True, |
||||
|
}, { |
||||
|
'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-128574047', |
||||
|
'only_matching': True, |
||||
|
}, { |
||||
|
'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-playlist-317413750', |
||||
|
'only_matching': True, |
||||
|
}, { |
||||
|
'url': 'https://kinja.com/ajax/inset/iframe?id=tumblr-post-160130699814-daydreams-at-midnight', |
||||
|
'only_matching': True, |
||||
|
}, { |
||||
|
'url': 'https://kinja.com/ajax/inset/iframe?id=twitch-stream-libratus_extra', |
||||
|
'only_matching': True, |
||||
|
}, { |
||||
|
'url': 'https://kinja.com/ajax/inset/iframe?id=twitter-1068875942473404422', |
||||
|
'only_matching': True, |
||||
|
}, { |
||||
|
'url': 'https://kinja.com/ajax/inset/iframe?id=ustream-channel-10414700', |
||||
|
'only_matching': True, |
||||
|
}, { |
||||
|
'url': 'https://kinja.com/ajax/inset/iframe?id=vimeo-120153502', |
||||
|
'only_matching': True, |
||||
|
}, { |
||||
|
'url': 'https://kinja.com/ajax/inset/iframe?id=vine-5BlvV5qqPrD', |
||||
|
'only_matching': True, |
||||
|
}, { |
||||
|
'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-list-BCQ3KyrPjgA/PLE6509247C270A72E', |
||||
|
'only_matching': True, |
||||
|
}, { |
||||
|
'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-video-00QyL0AgPAE', |
||||
|
'only_matching': True, |
||||
|
}] |
||||
|
_JWPLATFORM_PROVIDER = ('cdn.jwplayer.com/v2/media/', 'JWPlatform') |
||||
|
_PROVIDER_MAP = { |
||||
|
'fb': ('facebook.com/video.php?v=', 'Facebook'), |
||||
|
'imgur': ('imgur.com/', 'Imgur'), |
||||
|
'instagram': ('instagram.com/p/', 'Instagram'), |
||||
|
'jwplayer-video': _JWPLATFORM_PROVIDER, |
||||
|
'jwp-video': _JWPLATFORM_PROVIDER, |
||||
|
'megaphone': ('player.megaphone.fm/', 'Generic'), |
||||
|
'ooyala': ('player.ooyala.com/player.js?embedCode=', 'Ooyala'), |
||||
|
'soundcloud': ('api.soundcloud.com/tracks/', 'Soundcloud'), |
||||
|
'soundcloud-playlist': ('api.soundcloud.com/playlists/', 'SoundcloudPlaylist'), |
||||
|
'tumblr-post': ('%s.tumblr.com/post/%s', 'Tumblr'), |
||||
|
'twitch-stream': ('twitch.tv/', 'TwitchStream'), |
||||
|
'twitter': ('twitter.com/i/cards/tfw/v1/', 'TwitterCard'), |
||||
|
'ustream-channel': ('ustream.tv/embed/', 'Ustream'), |
||||
|
'vimeo': ('vimeo.com/', 'Vimeo'), |
||||
|
'vine': ('vine.co/v/', 'Vine'), |
||||
|
'youtube-list': ('youtube.com/embed/%s?list=%s', 'YoutubePlaylist'), |
||||
|
'youtube-video': ('youtube.com/embed/', 'Youtube'), |
||||
|
} |
||||
|
|
||||
|
@staticmethod |
||||
|
def _extract_urls(webpage, url): |
||||
|
return [urljoin(url, unescapeHTML(mobj.group('url'))) for mobj in re.finditer( |
||||
|
r'(?x)<iframe[^>]+?src=(?P<q>["\'])(?P<url>(?:(?:https?:)?//%s)?%s(?:(?!\1).)+)\1' % (KinjaEmbedIE._DOMAIN_REGEX, KinjaEmbedIE._COMMON_REGEX), |
||||
|
webpage)] |
||||
|
|
||||
|
def _real_extract(self, url): |
||||
|
video_type, video_id = re.match(self._VALID_URL, url).groups() |
||||
|
|
||||
|
provider = self._PROVIDER_MAP.get(video_type) |
||||
|
if provider: |
||||
|
video_id = compat_urllib_parse_unquote(video_id) |
||||
|
if video_type == 'tumblr-post': |
||||
|
video_id, blog = video_id.split('-', 1) |
||||
|
result_url = provider[0] % (blog, video_id) |
||||
|
elif video_type == 'youtube-list': |
||||
|
video_id, playlist_id = video_id.split('/') |
||||
|
result_url = provider[0] % (video_id, playlist_id) |
||||
|
else: |
||||
|
if video_type == 'ooyala': |
||||
|
video_id = video_id.split('/')[0] |
||||
|
result_url = provider[0] + video_id |
||||
|
return self.url_result('http://' + result_url, provider[1]) |
||||
|
|
||||
|
if video_type == 'kinjavideo': |
||||
|
data = self._download_json( |
||||
|
'https://kinja.com/api/core/video/views/videoById', |
||||
|
video_id, query={'videoId': video_id})['data'] |
||||
|
title = data['title'] |
||||
|
|
||||
|
formats = [] |
||||
|
for k in ('signedPlaylist', 'streaming'): |
||||
|
m3u8_url = data.get(k + 'Url') |
||||
|
if m3u8_url: |
||||
|
formats.extend(self._extract_m3u8_formats( |
||||
|
m3u8_url, video_id, 'mp4', 'm3u8_native', |
||||
|
m3u8_id='hls', fatal=False)) |
||||
|
self._sort_formats(formats) |
||||
|
|
||||
|
thumbnail = None |
||||
|
poster = data.get('poster') or {} |
||||
|
poster_id = poster.get('id') |
||||
|
if poster_id: |
||||
|
thumbnail = 'https://i.kinja-img.com/gawker-media/image/upload/%s.%s' % (poster_id, poster.get('format') or 'jpg') |
||||
|
|
||||
|
return { |
||||
|
'id': video_id, |
||||
|
'title': title, |
||||
|
'description': strip_or_none(data.get('description')), |
||||
|
'formats': formats, |
||||
|
'tags': data.get('tags'), |
||||
|
'timestamp': int_or_none(try_get( |
||||
|
data, lambda x: x['postInfo']['publishTimeMillis']), 1000), |
||||
|
'thumbnail': thumbnail, |
||||
|
'uploader': data.get('network'), |
||||
|
} |
||||
|
else: |
||||
|
video_data = self._download_json( |
||||
|
'https://api.vmh.univision.com/metadata/v1/content/' + video_id, |
||||
|
video_id)['videoMetadata'] |
||||
|
iptc = video_data['photoVideoMetadataIPTC'] |
||||
|
title = iptc['title']['en'] |
||||
|
fmg = video_data.get('photoVideoMetadata_fmg') or {} |
||||
|
tvss_domain = fmg.get('tvssDomain') or 'https://auth.univision.com' |
||||
|
data = self._download_json( |
||||
|
tvss_domain + '/api/v3/video-auth/url-signature-tokens', |
||||
|
video_id, query={'mcpids': video_id})['data'][0] |
||||
|
formats = [] |
||||
|
|
||||
|
rendition_url = data.get('renditionUrl') |
||||
|
if rendition_url: |
||||
|
formats = self._extract_m3u8_formats( |
||||
|
rendition_url, video_id, 'mp4', |
||||
|
'm3u8_native', m3u8_id='hls', fatal=False) |
||||
|
|
||||
|
fallback_rendition_url = data.get('fallbackRenditionUrl') |
||||
|
if fallback_rendition_url: |
||||
|
formats.append({ |
||||
|
'format_id': 'fallback', |
||||
|
'tbr': int_or_none(self._search_regex( |
||||
|
r'_(\d+)\.mp4', fallback_rendition_url, |
||||
|
'bitrate', default=None)), |
||||
|
'url': fallback_rendition_url, |
||||
|
}) |
||||
|
|
||||
|
self._sort_formats(formats) |
||||
|
|
||||
|
return { |
||||
|
'id': video_id, |
||||
|
'title': title, |
||||
|
'thumbnail': try_get(iptc, lambda x: x['cloudinaryLink']['link'], compat_str), |
||||
|
'uploader': fmg.get('network'), |
||||
|
'duration': int_or_none(iptc.get('fileDuration')), |
||||
|
'formats': formats, |
||||
|
'description': try_get(iptc, lambda x: x['description']['en'], compat_str), |
||||
|
'timestamp': parse_iso8601(iptc.get('dateReleased')), |
||||
|
} |
Write
Preview
Loading…
Cancel
Save