@ -1,25 +1,119 @@
from __future__ import unicode_literals
from __future__ import unicode_literals
import functools
import re
import re
from .common import InfoExtractor
from .common import InfoExtractor
from ..compat import compat_str
from ..compat import compat_str
from ..utils import (
from ..utils import (
parse_duration ,
unified_strdate ,
str_to_int ,
int_or_none ,
float_or_none ,
float_or_none ,
int_or_none ,
ISO639Utils ,
ISO639Utils ,
determine_ext ,
OnDemandPagedList ,
parse_duration ,
str_or_none ,
str_to_int ,
unified_strdate ,
)
)
class AdobeTVBaseIE ( InfoExtractor ) :
class AdobeTVBaseIE ( InfoExtractor ) :
_API_BASE_URL = ' http://tv.adobe.com/api/v4/ '
def _call_api ( self , path , video_id , query , note = None ) :
return self . _download_json (
' http://tv.adobe.com/api/v4/ ' + path ,
video_id , note , query = query ) [ ' data ' ]
def _parse_subtitles ( self , video_data , url_key ) :
subtitles = { }
for translation in video_data . get ( ' translations ' , [ ] ) :
vtt_path = translation . get ( url_key )
if not vtt_path :
continue
lang = translation . get ( ' language_w3c ' ) or ISO639Utils . long2short ( translation [ ' language_medium ' ] )
subtitles . setdefault ( lang , [ ] ) . append ( {
' ext ' : ' vtt ' ,
' url ' : vtt_path ,
} )
return subtitles
def _parse_video_data ( self , video_data ) :
video_id = compat_str ( video_data [ ' id ' ] )
title = video_data [ ' title ' ]
s3_extracted = False
formats = [ ]
for source in video_data . get ( ' videos ' , [ ] ) :
source_url = source . get ( ' url ' )
if not source_url :
continue
f = {
' format_id ' : source . get ( ' quality_level ' ) ,
' fps ' : int_or_none ( source . get ( ' frame_rate ' ) ) ,
' height ' : int_or_none ( source . get ( ' height ' ) ) ,
' tbr ' : int_or_none ( source . get ( ' video_data_rate ' ) ) ,
' width ' : int_or_none ( source . get ( ' width ' ) ) ,
' url ' : source_url ,
}
original_filename = source . get ( ' original_filename ' )
if original_filename :
if not ( f . get ( ' height ' ) and f . get ( ' width ' ) ) :
mobj = re . search ( r ' _( \ d+)x( \ d+) ' , original_filename )
if mobj :
f . update ( {
' height ' : int ( mobj . group ( 2 ) ) ,
' width ' : int ( mobj . group ( 1 ) ) ,
} )
if original_filename . startswith ( ' s3:// ' ) and not s3_extracted :
formats . append ( {
' format_id ' : ' original ' ,
' preference ' : 1 ,
' url ' : original_filename . replace ( ' s3:// ' , ' https://s3.amazonaws.com/ ' ) ,
} )
s3_extracted = True
formats . append ( f )
self . _sort_formats ( formats )
return {
' id ' : video_id ,
' title ' : title ,
' description ' : video_data . get ( ' description ' ) ,
' thumbnail ' : video_data . get ( ' thumbnail ' ) ,
' upload_date ' : unified_strdate ( video_data . get ( ' start_date ' ) ) ,
' duration ' : parse_duration ( video_data . get ( ' duration ' ) ) ,
' view_count ' : str_to_int ( video_data . get ( ' playcount ' ) ) ,
' formats ' : formats ,
' subtitles ' : self . _parse_subtitles ( video_data , ' vtt ' ) ,
}
class AdobeTVEmbedIE ( AdobeTVBaseIE ) :
IE_NAME = ' adobetv:embed '
_VALID_URL = r ' https?://tv \ .adobe \ .com/embed/ \ d+/(?P<id> \ d+) '
_TEST = {
' url ' : ' https://tv.adobe.com/embed/22/4153 ' ,
' md5 ' : ' c8c0461bf04d54574fc2b4d07ac6783a ' ,
' info_dict ' : {
' id ' : ' 4153 ' ,
' ext ' : ' flv ' ,
' title ' : ' Creating Graphics Optimized for BlackBerry ' ,
' description ' : ' md5:eac6e8dced38bdaae51cd94447927459 ' ,
' thumbnail ' : r ' re:https?://.* \ .jpg$ ' ,
' upload_date ' : ' 20091109 ' ,
' duration ' : 377 ,
' view_count ' : int ,
} ,
}
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
video_data = self . _call_api (
' episode/ ' + video_id , video_id , { ' disclosure ' : ' standard ' } ) [ 0 ]
return self . _parse_video_data ( video_data )
class AdobeTVIE ( AdobeTVBaseIE ) :
class AdobeTVIE ( AdobeTVBaseIE ) :
IE_NAME = ' adobetv '
_VALID_URL = r ' https?://tv \ .adobe \ .com/(?:(?P<language>fr|de|es|jp)/)?watch/(?P<show_urlname>[^/]+)/(?P<id>[^/]+) '
_VALID_URL = r ' https?://tv \ .adobe \ .com/(?:(?P<language>fr|de|es|jp)/)?watch/(?P<show_urlname>[^/]+)/(?P<id>[^/]+) '
_TEST = {
_TEST = {
@ -42,45 +136,33 @@ class AdobeTVIE(AdobeTVBaseIE):
if not language :
if not language :
language = ' en '
language = ' en '
video_data = self . _download_json (
self . _API_BASE_URL + ' episode/get/?language= %s &show_urlname= %s &urlname= %s &disclosure=standard ' % ( language , show_urlname , urlname ) ,
urlname ) [ ' data ' ] [ 0 ]
formats = [ {
' url ' : source [ ' url ' ] ,
' format_id ' : source . get ( ' quality_level ' ) or source [ ' url ' ] . split ( ' - ' ) [ - 1 ] . split ( ' . ' ) [ 0 ] or None ,
' width ' : int_or_none ( source . get ( ' width ' ) ) ,
' height ' : int_or_none ( source . get ( ' height ' ) ) ,
' tbr ' : int_or_none ( source . get ( ' video_data_rate ' ) ) ,
} for source in video_data [ ' videos ' ] ]
self . _sort_formats ( formats )
return {
' id ' : compat_str ( video_data [ ' id ' ] ) ,
' title ' : video_data [ ' title ' ] ,
' description ' : video_data . get ( ' description ' ) ,
' thumbnail ' : video_data . get ( ' thumbnail ' ) ,
' upload_date ' : unified_strdate ( video_data . get ( ' start_date ' ) ) ,
' duration ' : parse_duration ( video_data . get ( ' duration ' ) ) ,
' view_count ' : str_to_int ( video_data . get ( ' playcount ' ) ) ,
' formats ' : formats ,
}
video_data = self . _call_api (
' episode/get ' , urlname , {
' disclosure ' : ' standard ' ,
' language ' : language ,
' show_urlname ' : show_urlname ,
' urlname ' : urlname ,
} ) [ 0 ]
return self . _parse_video_data ( video_data )
class AdobeTVPlaylistBaseIE ( AdobeTVBaseIE ) :
class AdobeTVPlaylistBaseIE ( AdobeTVBaseIE ) :
def _parse_page_data ( self , page_data ) :
return [ self . url_result ( self . _get_element_url ( element_data ) ) for element_data in page_data ]
_PAGE_SIZE = 25
def _fetch_page ( self , display_id , query , page ) :
page + = 1
query [ ' page ' ] = page
for element_data in self . _call_api (
self . _RESOURCE , display_id , query , ' Download Page %d ' % page ) :
yield self . _process_data ( element_data )
def _extract_playlist_entries ( self , url , display_id ) :
page = self . _download_json ( url , display_id )
entries = self . _parse_page_data ( page [ ' data ' ] )
for page_num in range ( 2 , page [ ' paging ' ] [ ' pages ' ] + 1 ) :
entries . extend ( self . _parse_page_data (
self . _download_json ( url + ' &page= %d ' % page_num , display_id ) [ ' data ' ] ) )
return entries
def _extract_playlist_entries ( self , display_id , query ) :
return OnDemandPagedList ( functools . partial (
self . _fetch_page , display_id , query ) , self . _PAGE_SIZE )
class AdobeTVShowIE ( AdobeTVPlaylistBaseIE ) :
class AdobeTVShowIE ( AdobeTVPlaylistBaseIE ) :
IE_NAME = ' adobetv:show '
_VALID_URL = r ' https?://tv \ .adobe \ .com/(?:(?P<language>fr|de|es|jp)/)?show/(?P<id>[^/]+) '
_VALID_URL = r ' https?://tv \ .adobe \ .com/(?:(?P<language>fr|de|es|jp)/)?show/(?P<id>[^/]+) '
_TEST = {
_TEST = {
@ -92,26 +174,31 @@ class AdobeTVShowIE(AdobeTVPlaylistBaseIE):
} ,
} ,
' playlist_mincount ' : 136 ,
' playlist_mincount ' : 136 ,
}
}
def _get_element_url ( self , element_data ) :
return element_data [ ' urls ' ] [ 0 ]
_RESOURCE = ' episode '
_process_data = AdobeTVBaseIE . _parse_video_data
def _real_extract ( self , url ) :
def _real_extract ( self , url ) :
language , show_urlname = re . match ( self . _VALID_URL , url ) . groups ( )
language , show_urlname = re . match ( self . _VALID_URL , url ) . groups ( )
if not language :
if not language :
language = ' en '
language = ' en '
query = ' language= %s &show_urlname= %s ' % ( language , show_urlname )
query = {
' disclosure ' : ' standard ' ,
' language ' : language ,
' show_urlname ' : show_urlname ,
}
show_data = self . _download_json ( self . _API_BASE_URL + ' show/get/? %s ' % query , show_urlname ) [ ' data ' ] [ 0 ]
show_data = self . _call_api (
' show/get ' , show_urlname , query ) [ 0 ]
return self . playlist_result (
return self . playlist_result (
self . _extract_playlist_entries ( self . _API_BASE_URL + ' episode/? %s ' % query , show_urlname ) ,
compat_str ( show_data [ ' id ' ] ) ,
show_data [ ' show_name ' ] ,
show_data [ ' show_description ' ] )
self . _extract_playlist_entries ( show_urlname , query ) ,
str_or_none ( show_data . get ( ' id ' ) ) ,
show_data . get ( ' show_name ' ) ,
show_data . get ( ' show_description ' ) )
class AdobeTVChannelIE ( AdobeTVPlaylistBaseIE ) :
class AdobeTVChannelIE ( AdobeTVPlaylistBaseIE ) :
IE_NAME = ' adobetv:channel '
_VALID_URL = r ' https?://tv \ .adobe \ .com/(?:(?P<language>fr|de|es|jp)/)?channel/(?P<id>[^/]+)(?:/(?P<category_urlname>[^/]+))? '
_VALID_URL = r ' https?://tv \ .adobe \ .com/(?:(?P<language>fr|de|es|jp)/)?channel/(?P<id>[^/]+)(?:/(?P<category_urlname>[^/]+))? '
_TEST = {
_TEST = {
@ -121,24 +208,30 @@ class AdobeTVChannelIE(AdobeTVPlaylistBaseIE):
} ,
} ,
' playlist_mincount ' : 96 ,
' playlist_mincount ' : 96 ,
}
}
_RESOURCE = ' show '
def _get_element_url ( self , element_data ) :
return element_data [ ' url ' ]
def _process_data ( self , show_data ) :
return self . url_result (
show_data [ ' url ' ] , ' AdobeTVShow ' , str_or_none ( show_data . get ( ' id ' ) ) )
def _real_extract ( self , url ) :
def _real_extract ( self , url ) :
language , channel_urlname , category_urlname = re . match ( self . _VALID_URL , url ) . groups ( )
language , channel_urlname , category_urlname = re . match ( self . _VALID_URL , url ) . groups ( )
if not language :
if not language :
language = ' en '
language = ' en '
query = ' language= %s &channel_urlname= %s ' % ( language , channel_urlname )
query = {
' channel_urlname ' : channel_urlname ,
' language ' : language ,
}
if category_urlname :
if category_urlname :
query + = ' &category_urlname= %s ' % category_urlname
query [ ' category_urlname ' ] = category_urlname
return self . playlist_result (
return self . playlist_result (
self . _extract_playlist_entries ( self . _API_BASE_URL + ' show/? %s ' % query , channel_urlname ) ,
self . _extract_playlist_entries ( channel_urlname , query ) ,
channel_urlname )
channel_urlname )
class AdobeTVVideoIE ( InfoExtractor ) :
class AdobeTVVideoIE ( AdobeTVBaseIE ) :
IE_NAME = ' adobetv:video '
_VALID_URL = r ' https?://video \ .tv \ .adobe \ .com/v/(?P<id> \ d+) '
_VALID_URL = r ' https?://video \ .tv \ .adobe \ .com/v/(?P<id> \ d+) '
_TEST = {
_TEST = {
@ -160,38 +253,36 @@ class AdobeTVVideoIE(InfoExtractor):
video_data = self . _parse_json ( self . _search_regex (
video_data = self . _parse_json ( self . _search_regex (
r ' var \ s+bridge \ s*= \ s*([^;]+); ' , webpage , ' bridged data ' ) , video_id )
r ' var \ s+bridge \ s*= \ s*([^;]+); ' , webpage , ' bridged data ' ) , video_id )
formats = [ {
' format_id ' : ' %s - %s ' % ( determine_ext ( source [ ' src ' ] ) , source . get ( ' height ' ) ) ,
' url ' : source [ ' src ' ] ,
' width ' : int_or_none ( source . get ( ' width ' ) ) ,
' height ' : int_or_none ( source . get ( ' height ' ) ) ,
' tbr ' : int_or_none ( source . get ( ' bitrate ' ) ) ,
} for source in video_data [ ' sources ' ] ]
title = video_data [ ' title ' ]
formats = [ ]
sources = video_data . get ( ' sources ' ) or [ ]
for source in sources :
source_src = source . get ( ' src ' )
if not source_src :
continue
formats . append ( {
' filesize ' : int_or_none ( source . get ( ' kilobytes ' ) or None , invscale = 1000 ) ,
' format_id ' : ' - ' . join ( filter ( None , [ source . get ( ' format ' ) , source . get ( ' label ' ) ] ) ) ,
' height ' : int_or_none ( source . get ( ' height ' ) or None ) ,
' tbr ' : int_or_none ( source . get ( ' bitrate ' ) or None ) ,
' width ' : int_or_none ( source . get ( ' width ' ) or None ) ,
' url ' : source_src ,
} )
self . _sort_formats ( formats )
self . _sort_formats ( formats )
# For both metadata and downloaded files the duration varies among
# For both metadata and downloaded files the duration varies among
# formats. I just pick the max one
# formats. I just pick the max one
duration = max ( filter ( None , [
duration = max ( filter ( None , [
float_or_none ( source . get ( ' duration ' ) , scale = 1000 )
float_or_none ( source . get ( ' duration ' ) , scale = 1000 )
for source in video_data [ ' sources ' ] ] ) )
subtitles = { }
for translation in video_data . get ( ' translations ' , [ ] ) :
lang_id = translation . get ( ' language_w3c ' ) or ISO639Utils . long2short ( translation [ ' language_medium ' ] )
if lang_id not in subtitles :
subtitles [ lang_id ] = [ ]
subtitles [ lang_id ] . append ( {
' url ' : translation [ ' vttPath ' ] ,
' ext ' : ' vtt ' ,
} )
for source in sources ] ) )
return {
return {
' id ' : video_id ,
' id ' : video_id ,
' formats ' : formats ,
' formats ' : formats ,
' title ' : video_data [ ' title' ] ,
' title ' : title ,
' description ' : video_data . get ( ' description ' ) ,
' description ' : video_data . get ( ' description ' ) ,
' thumbnail ' : video_data [ ' video ' ] . get ( ' poster ' ) ,
' thumbnail ' : video_data . get ( ' video ' , { } ) . get ( ' poster ' ) ,
' duration ' : duration ,
' duration ' : duration ,
' subtitles ' : subtitles ,
' subtitles ' : self . _parse_subtitles ( video_data , ' vttPath ' ) ,
}
}