Browse Source
[kinja] add support for Kinja embeds
[kinja] add support for Kinja embeds
closes #5756 closes #11282 closes #22237 closes #22384master
Remita Amine
5 years ago
4 changed files with 241 additions and 52 deletions
Split View
Diff Options
-
1youtube_dl/extractor/extractors.py
-
17youtube_dl/extractor/generic.py
-
221youtube_dl/extractor/kinja.py
-
54youtube_dl/extractor/onionstudios.py
@ -0,0 +1,221 @@ |
|||
# coding: utf-8 |
|||
from __future__ import unicode_literals |
|||
|
|||
import re |
|||
|
|||
from .common import InfoExtractor |
|||
from ..compat import ( |
|||
compat_str, |
|||
compat_urllib_parse_unquote, |
|||
) |
|||
from ..utils import ( |
|||
int_or_none, |
|||
parse_iso8601, |
|||
strip_or_none, |
|||
try_get, |
|||
unescapeHTML, |
|||
urljoin, |
|||
) |
|||
|
|||
|
|||
class KinjaEmbedIE(InfoExtractor): |
|||
IENAME = 'kinja:embed' |
|||
_DOMAIN_REGEX = r'''(?:[^.]+\.)? |
|||
(?: |
|||
avclub| |
|||
clickhole| |
|||
deadspin| |
|||
gizmodo| |
|||
jalopnik| |
|||
jezebel| |
|||
kinja| |
|||
kotaku| |
|||
lifehacker| |
|||
splinternews| |
|||
the(?:inventory|onion|root|takeout) |
|||
)\.com''' |
|||
_COMMON_REGEX = r'''/ |
|||
(?: |
|||
ajax/inset| |
|||
embed/video |
|||
)/iframe\?.*?\bid=''' |
|||
_VALID_URL = r'''(?x)https?://%s%s |
|||
(?P<type> |
|||
fb| |
|||
imgur| |
|||
instagram| |
|||
jwp(?:layer)?-video| |
|||
kinjavideo| |
|||
mcp| |
|||
megaphone| |
|||
ooyala| |
|||
soundcloud(?:-playlist)?| |
|||
tumblr-post| |
|||
twitch-stream| |
|||
twitter| |
|||
ustream-channel| |
|||
vimeo| |
|||
vine| |
|||
youtube-(?:list|video) |
|||
)-(?P<id>[^&]+)''' % (_DOMAIN_REGEX, _COMMON_REGEX) |
|||
_TESTS = [{ |
|||
'url': 'https://kinja.com/ajax/inset/iframe?id=fb-10103303356633621', |
|||
'only_matching': True, |
|||
}, { |
|||
'url': 'https://kinja.com/ajax/inset/iframe?id=kinjavideo-100313', |
|||
'only_matching': True, |
|||
}, { |
|||
'url': 'https://kinja.com/ajax/inset/iframe?id=megaphone-PPY1300931075', |
|||
'only_matching': True, |
|||
}, { |
|||
'url': 'https://kinja.com/ajax/inset/iframe?id=ooyala-xzMXhleDpopuT0u1ijt_qZj3Va-34pEX%2FZTIxYmJjZDM2NWYzZDViZGRiOWJjYzc5', |
|||
'only_matching': True, |
|||
}, { |
|||
'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-128574047', |
|||
'only_matching': True, |
|||
}, { |
|||
'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-playlist-317413750', |
|||
'only_matching': True, |
|||
}, { |
|||
'url': 'https://kinja.com/ajax/inset/iframe?id=tumblr-post-160130699814-daydreams-at-midnight', |
|||
'only_matching': True, |
|||
}, { |
|||
'url': 'https://kinja.com/ajax/inset/iframe?id=twitch-stream-libratus_extra', |
|||
'only_matching': True, |
|||
}, { |
|||
'url': 'https://kinja.com/ajax/inset/iframe?id=twitter-1068875942473404422', |
|||
'only_matching': True, |
|||
}, { |
|||
'url': 'https://kinja.com/ajax/inset/iframe?id=ustream-channel-10414700', |
|||
'only_matching': True, |
|||
}, { |
|||
'url': 'https://kinja.com/ajax/inset/iframe?id=vimeo-120153502', |
|||
'only_matching': True, |
|||
}, { |
|||
'url': 'https://kinja.com/ajax/inset/iframe?id=vine-5BlvV5qqPrD', |
|||
'only_matching': True, |
|||
}, { |
|||
'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-list-BCQ3KyrPjgA/PLE6509247C270A72E', |
|||
'only_matching': True, |
|||
}, { |
|||
'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-video-00QyL0AgPAE', |
|||
'only_matching': True, |
|||
}] |
|||
_JWPLATFORM_PROVIDER = ('cdn.jwplayer.com/v2/media/', 'JWPlatform') |
|||
_PROVIDER_MAP = { |
|||
'fb': ('facebook.com/video.php?v=', 'Facebook'), |
|||
'imgur': ('imgur.com/', 'Imgur'), |
|||
'instagram': ('instagram.com/p/', 'Instagram'), |
|||
'jwplayer-video': _JWPLATFORM_PROVIDER, |
|||
'jwp-video': _JWPLATFORM_PROVIDER, |
|||
'megaphone': ('player.megaphone.fm/', 'Generic'), |
|||
'ooyala': ('player.ooyala.com/player.js?embedCode=', 'Ooyala'), |
|||
'soundcloud': ('api.soundcloud.com/tracks/', 'Soundcloud'), |
|||
'soundcloud-playlist': ('api.soundcloud.com/playlists/', 'SoundcloudPlaylist'), |
|||
'tumblr-post': ('%s.tumblr.com/post/%s', 'Tumblr'), |
|||
'twitch-stream': ('twitch.tv/', 'TwitchStream'), |
|||
'twitter': ('twitter.com/i/cards/tfw/v1/', 'TwitterCard'), |
|||
'ustream-channel': ('ustream.tv/embed/', 'Ustream'), |
|||
'vimeo': ('vimeo.com/', 'Vimeo'), |
|||
'vine': ('vine.co/v/', 'Vine'), |
|||
'youtube-list': ('youtube.com/embed/%s?list=%s', 'YoutubePlaylist'), |
|||
'youtube-video': ('youtube.com/embed/', 'Youtube'), |
|||
} |
|||
|
|||
@staticmethod |
|||
def _extract_urls(webpage, url): |
|||
return [urljoin(url, unescapeHTML(mobj.group('url'))) for mobj in re.finditer( |
|||
r'(?x)<iframe[^>]+?src=(?P<q>["\'])(?P<url>(?:(?:https?:)?//%s)?%s(?:(?!\1).)+)\1' % (KinjaEmbedIE._DOMAIN_REGEX, KinjaEmbedIE._COMMON_REGEX), |
|||
webpage)] |
|||
|
|||
def _real_extract(self, url): |
|||
video_type, video_id = re.match(self._VALID_URL, url).groups() |
|||
|
|||
provider = self._PROVIDER_MAP.get(video_type) |
|||
if provider: |
|||
video_id = compat_urllib_parse_unquote(video_id) |
|||
if video_type == 'tumblr-post': |
|||
video_id, blog = video_id.split('-', 1) |
|||
result_url = provider[0] % (blog, video_id) |
|||
elif video_type == 'youtube-list': |
|||
video_id, playlist_id = video_id.split('/') |
|||
result_url = provider[0] % (video_id, playlist_id) |
|||
else: |
|||
if video_type == 'ooyala': |
|||
video_id = video_id.split('/')[0] |
|||
result_url = provider[0] + video_id |
|||
return self.url_result('http://' + result_url, provider[1]) |
|||
|
|||
if video_type == 'kinjavideo': |
|||
data = self._download_json( |
|||
'https://kinja.com/api/core/video/views/videoById', |
|||
video_id, query={'videoId': video_id})['data'] |
|||
title = data['title'] |
|||
|
|||
formats = [] |
|||
for k in ('signedPlaylist', 'streaming'): |
|||
m3u8_url = data.get(k + 'Url') |
|||
if m3u8_url: |
|||
formats.extend(self._extract_m3u8_formats( |
|||
m3u8_url, video_id, 'mp4', 'm3u8_native', |
|||
m3u8_id='hls', fatal=False)) |
|||
self._sort_formats(formats) |
|||
|
|||
thumbnail = None |
|||
poster = data.get('poster') or {} |
|||
poster_id = poster.get('id') |
|||
if poster_id: |
|||
thumbnail = 'https://i.kinja-img.com/gawker-media/image/upload/%s.%s' % (poster_id, poster.get('format') or 'jpg') |
|||
|
|||
return { |
|||
'id': video_id, |
|||
'title': title, |
|||
'description': strip_or_none(data.get('description')), |
|||
'formats': formats, |
|||
'tags': data.get('tags'), |
|||
'timestamp': int_or_none(try_get( |
|||
data, lambda x: x['postInfo']['publishTimeMillis']), 1000), |
|||
'thumbnail': thumbnail, |
|||
'uploader': data.get('network'), |
|||
} |
|||
else: |
|||
video_data = self._download_json( |
|||
'https://api.vmh.univision.com/metadata/v1/content/' + video_id, |
|||
video_id)['videoMetadata'] |
|||
iptc = video_data['photoVideoMetadataIPTC'] |
|||
title = iptc['title']['en'] |
|||
fmg = video_data.get('photoVideoMetadata_fmg') or {} |
|||
tvss_domain = fmg.get('tvssDomain') or 'https://auth.univision.com' |
|||
data = self._download_json( |
|||
tvss_domain + '/api/v3/video-auth/url-signature-tokens', |
|||
video_id, query={'mcpids': video_id})['data'][0] |
|||
formats = [] |
|||
|
|||
rendition_url = data.get('renditionUrl') |
|||
if rendition_url: |
|||
formats = self._extract_m3u8_formats( |
|||
rendition_url, video_id, 'mp4', |
|||
'm3u8_native', m3u8_id='hls', fatal=False) |
|||
|
|||
fallback_rendition_url = data.get('fallbackRenditionUrl') |
|||
if fallback_rendition_url: |
|||
formats.append({ |
|||
'format_id': 'fallback', |
|||
'tbr': int_or_none(self._search_regex( |
|||
r'_(\d+)\.mp4', fallback_rendition_url, |
|||
'bitrate', default=None)), |
|||
'url': fallback_rendition_url, |
|||
}) |
|||
|
|||
self._sort_formats(formats) |
|||
|
|||
return { |
|||
'id': video_id, |
|||
'title': title, |
|||
'thumbnail': try_get(iptc, lambda x: x['cloudinaryLink']['link'], compat_str), |
|||
'uploader': fmg.get('network'), |
|||
'duration': int_or_none(iptc.get('fileDuration')), |
|||
'formats': formats, |
|||
'description': try_get(iptc, lambda x: x['description']['en'], compat_str), |
|||
'timestamp': parse_iso8601(iptc.get('dateReleased')), |
|||
} |
Write
Preview
Loading…
Cancel
Save