@ -4,37 +4,64 @@ from __future__ import unicode_literals
import re
import re
from .common import InfoExtractor
from .common import InfoExtractor
from ..compat import compat_chr
from ..utils import (
from ..utils import (
decode_packed_codes ,
decode_packed_codes ,
determine_ext ,
determine_ext ,
ExtractorError ,
ExtractorError ,
int_or_none ,
int_or_none ,
NO_DEFAULT ,
js_to_json ,
urlencode_postdata ,
urlencode_postdata ,
)
)
# based on openload_decode from 2bfeee69b976fe049761dd3012e30b637ee05a58
def aa_decode ( aa_code ) :
symbol_table = [
( ' 7 ' , ' ((゚ー゚) + (o^_^o)) ' ) ,
( ' 6 ' , ' ((o^_^o) +(o^_^o)) ' ) ,
( ' 5 ' , ' ((゚ー゚) + (゚Θ゚)) ' ) ,
( ' 2 ' , ' ((o^_^o) - (゚Θ゚)) ' ) ,
( ' 4 ' , ' (゚ー゚) ' ) ,
( ' 3 ' , ' (o^_^o) ' ) ,
( ' 1 ' , ' (゚Θ゚) ' ) ,
( ' 0 ' , ' (c^_^o) ' ) ,
]
delim = ' (゚Д゚)[゚ε゚]+ '
ret = ' '
for aa_char in aa_code . split ( delim ) :
for val , pat in symbol_table :
aa_char = aa_char . replace ( pat , val )
aa_char = aa_char . replace ( ' + ' , ' ' )
m = re . match ( r ' ^ \ d+ ' , aa_char )
if m :
ret + = compat_chr ( int ( m . group ( 0 ) , 8 ) )
else :
m = re . match ( r ' ^u([ \ da-f]+) ' , aa_char )
if m :
ret + = compat_chr ( int ( m . group ( 1 ) , 16 ) )
return ret
class XFileShareIE ( InfoExtractor ) :
class XFileShareIE ( InfoExtractor ) :
_SITES = (
_SITES = (
( r ' daclips \ .(?:in|com) ' , ' DaClips ' ) ,
( r ' filehoot \ .com ' , ' FileHoot ' ) ,
( r ' gorillavid \ .(?:in|com) ' , ' GorillaVid ' ) ,
( r ' movpod \ .in ' , ' MovPod ' ) ,
( r ' powerwatch \ .pw ' , ' PowerWatch ' ) ,
( r ' rapidvideo \ .ws ' , ' Rapidvideo.ws ' ) ,
( r ' clipwatching \ .com ' , ' ClipWatching ' ) ,
( r ' gounlimited \ .to ' , ' GoUnlimited ' ) ,
( r ' govid \ .me ' , ' GoVid ' ) ,
( r ' holavid \ .com ' , ' HolaVid ' ) ,
( r ' streamty \ .com ' , ' Streamty ' ) ,
( r ' thevideobee \ .to ' , ' TheVideoBee ' ) ,
( r ' thevideobee \ .to ' , ' TheVideoBee ' ) ,
( r ' vidto \ .(?:me|se) ' , ' Vidto ' ) ,
( r ' streamin \ .to ' , ' Streamin.To ' ) ,
( r ' xvidstage \ .com ' , ' XVIDSTAGE ' ) ,
( r ' vidabc \ .com ' , ' Vid ABC ' ) ,
( r ' uqload \ .com ' , ' Uqload ' ) ,
( r ' vidbom \ .com ' , ' VidBom ' ) ,
( r ' vidbom \ .com ' , ' VidBom ' ) ,
( r ' vidlo \ .us ' , ' vidlo ' ) ,
( r ' vidlo \ .us ' , ' vidlo ' ) ,
( r ' rapidvideo \ .(?:cool|org) ' , ' RapidVideo.TV ' ) ,
( r ' fastvideo \ .me ' , ' FastVideo.me ' ) ,
( r ' vidlocker \ .xyz ' , ' VidLocker ' ) ,
( r ' vidshare \ .tv ' , ' VidShare ' ) ,
( r ' vup \ .to ' , ' VUp ' ) ,
( r ' xvideosharing \ .com ' , ' XVideoSharing ' ) ,
)
)
IE_DESC = ' XFileShare based sites: %s ' % ' , ' . join ( list ( zip ( * _SITES ) ) [ 1 ] )
IE_DESC = ' XFileShare based sites: %s ' % ' , ' . join ( list ( zip ( * _SITES ) ) [ 1 ] )
_VALID_URL = ( r ' https?://(?P<host>(?:www \ .)?(?: %s ))/(?:embed-)?(?P<id>[0-9a-zA-Z]+) '
_VALID_URL = ( r ' https?://(?:www \ .)?(?P<host> %s )/(?:embed-)?(?P<id>[0-9a-zA-Z]+) '
% ' | ' . join ( site for site in list ( zip ( * _SITES ) ) [ 0 ] ) )
% ' | ' . join ( site for site in list ( zip ( * _SITES ) ) [ 0 ] ) )
_FILE_NOT_FOUND_REGEXES = (
_FILE_NOT_FOUND_REGEXES = (
@ -43,82 +70,14 @@ class XFileShareIE(InfoExtractor):
)
)
_TESTS = [ {
_TESTS = [ {
' url ' : ' http://gorillavid.in/06y9juieqpmi ' ,
' md5 ' : ' 5ae4a3580620380619678ee4875893ba ' ,
' info_dict ' : {
' id ' : ' 06y9juieqpmi ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Rebecca Black My Moment Official Music Video Reaction-6GK87Rc8bzQ ' ,
' thumbnail ' : r ' re:http://.* \ .jpg ' ,
} ,
} , {
' url ' : ' http://gorillavid.in/embed-z08zf8le23c6-960x480.html ' ,
' only_matching ' : True ,
} , {
' url ' : ' http://daclips.in/3rso4kdn6f9m ' ,
' md5 ' : ' 1ad8fd39bb976eeb66004d3a4895f106 ' ,
' url ' : ' http://xvideosharing.com/fq65f94nd2ve ' ,
' md5 ' : ' 4181f63957e8fe90ac836fa58dc3c8a6 ' ,
' info_dict ' : {
' info_dict ' : {
' id ' : ' 3rso4kdn6f9m ' ,
' id ' : ' fq65f94nd2ve ' ,
' ext ' : ' mp4 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Micro Pig piglets ready on 16th July 2009-bG0PdrCdxUc ' ,
' title ' : ' sample ' ,
' thumbnail ' : r ' re:http://.* \ .jpg ' ,
' thumbnail ' : r ' re:http://.* \ .jpg ' ,
}
} , {
' url ' : ' http://movpod.in/0wguyyxi1yca ' ,
' only_matching ' : True ,
} , {
' url ' : ' http://filehoot.com/3ivfabn7573c.html ' ,
' info_dict ' : {
' id ' : ' 3ivfabn7573c ' ,
' ext ' : ' mp4 ' ,
' title ' : ' youtube-dl test video \' äBaW_jenozKc.mp4.mp4 ' ,
' thumbnail ' : r ' re:http://.* \ .jpg ' ,
} ,
' skip ' : ' Video removed ' ,
} , {
' url ' : ' http://vidto.me/ku5glz52nqe1.html ' ,
' info_dict ' : {
' id ' : ' ku5glz52nqe1 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' test '
}
} , {
' url ' : ' http://powerwatch.pw/duecjibvicbu ' ,
' info_dict ' : {
' id ' : ' duecjibvicbu ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Big Buck Bunny trailer ' ,
} ,
} , {
' url ' : ' http://xvidstage.com/e0qcnl03co6z ' ,
' info_dict ' : {
' id ' : ' e0qcnl03co6z ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Chucky Prank 2015.mp4 ' ,
} ,
} , {
# removed by administrator
' url ' : ' http://xvidstage.com/amfy7atlkx25 ' ,
' only_matching ' : True ,
} , {
' url ' : ' http://vidabc.com/i8ybqscrphfv ' ,
' info_dict ' : {
' id ' : ' i8ybqscrphfv ' ,
' ext ' : ' mp4 ' ,
' title ' : ' re:Beauty and the Beast 2017 ' ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
} ,
} , {
' url ' : ' http://www.rapidvideo.cool/b667kprndr8w ' ,
' only_matching ' : True ,
} , {
' url ' : ' http://www.fastvideo.me/k8604r8nk8sn/FAST_FURIOUS_8_-_Trailer_italiano_ufficiale.mp4.html ' ,
' only_matching ' : True ,
} , {
' url ' : ' http://vidto.se/1tx1pf6t12cg.html ' ,
' only_matching ' : True ,
} ]
} ]
@staticmethod
@staticmethod
@ -131,10 +90,9 @@ class XFileShareIE(InfoExtractor):
webpage ) ]
webpage ) ]
def _real_extract ( self , url ) :
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
video_id = mobj . group ( ' id ' )
host , video_id = re . match ( self . _VALID_URL , url ) . groups ( )
url = ' http:// %s / %s ' % ( mobj . group ( ' host ' ) , video_id )
url = ' https :// %s / ' % host + ( ' embed- %s .html ' % video_id if host in ( ' govid.me ' , ' vidlo.us ' ) else video_id )
webpage = self . _download_webpage ( url , video_id )
webpage = self . _download_webpage ( url , video_id )
if any ( re . search ( p , webpage ) for p in self . _FILE_NOT_FOUND_REGEXES ) :
if any ( re . search ( p , webpage ) for p in self . _FILE_NOT_FOUND_REGEXES ) :
@ -142,7 +100,7 @@ class XFileShareIE(InfoExtractor):
fields = self . _hidden_inputs ( webpage )
fields = self . _hidden_inputs ( webpage )
if fields [ ' op ' ] == ' download1 ' :
if fields . get ( ' op ' ) == ' download1 ' :
countdown = int_or_none ( self . _search_regex (
countdown = int_or_none ( self . _search_regex (
r ' <span id= " countdown_str " >(?:[Ww]ait)? \ s*<span id= " cxc " >( \ d+)</span> \ s*(?:seconds?)?</span> ' ,
r ' <span id= " countdown_str " >(?:[Ww]ait)? \ s*<span id= " cxc " >( \ d+)</span> \ s*(?:seconds?)?</span> ' ,
webpage , ' countdown ' , default = None ) )
webpage , ' countdown ' , default = None ) )
@ -160,13 +118,37 @@ class XFileShareIE(InfoExtractor):
( r ' style= " z-index: [0-9]+; " >([^<]+)</span> ' ,
( r ' style= " z-index: [0-9]+; " >([^<]+)</span> ' ,
r ' <td nowrap>([^<]+)</td> ' ,
r ' <td nowrap>([^<]+)</td> ' ,
r ' h4-fine[^>]*>([^<]+)< ' ,
r ' h4-fine[^>]*>([^<]+)< ' ,
r ' >Watch (.+) ' ,
r ' >Watch (.+)[ <] ' ,
r ' <h2 class= " video-page-head " >([^<]+)</h2> ' ,
r ' <h2 class= " video-page-head " >([^<]+)</h2> ' ,
r ' <h2 style= " [^ " ]*color:#403f3d[^ " ]* " [^>]*>([^<]+)< ' ) , # streamin.to
r ' <h2 style= " [^ " ]*color:#403f3d[^ " ]* " [^>]*>([^<]+)< ' , # streamin.to
r ' title \ s*: \ s* " ([^ " ]+) " ' ) , # govid.me
webpage , ' title ' , default = None ) or self . _og_search_title (
webpage , ' title ' , default = None ) or self . _og_search_title (
webpage , default = None ) or video_id ) . strip ( )
webpage , default = None ) or video_id ) . strip ( )
def extract_formats ( default = NO_DEFAULT ) :
for regex , func in (
( r ' (eval \ (function \ (p,a,c,k,e,d \ ){.+) ' , decode_packed_codes ) ,
( r ' (゚.+) ' , aa_decode ) ) :
obf_code = self . _search_regex ( regex , webpage , ' obfuscated code ' , default = None )
if obf_code :
webpage = webpage . replace ( obf_code , func ( obf_code ) )
formats = [ ]
jwplayer_data = self . _search_regex (
[
r ' jwplayer \ ( " [^ " ]+ " \ ) \ .load \ ( \ [({.+?}) \ ] \ ); ' ,
r ' jwplayer \ ( " [^ " ]+ " \ ) \ .setup \ (({.+?}) \ ); ' ,
] , webpage ,
' jwplayer data ' , default = None )
if jwplayer_data :
jwplayer_data = self . _parse_json (
jwplayer_data . replace ( r " \ ' " , " ' " ) , video_id , js_to_json )
if jwplayer_data :
formats = self . _parse_jwplayer_data (
jwplayer_data , video_id , False ,
m3u8_id = ' hls ' , mpd_id = ' dash ' ) [ ' formats ' ]
if not formats :
urls = [ ]
urls = [ ]
for regex in (
for regex in (
r ' (?:file|src) \ s*: \ s*([ " \' ])(?P<url>http(?:(?! \ 1).)+ \ .(?:m3u8|mp4|flv)(?:(?! \ 1).)*) \ 1 ' ,
r ' (?:file|src) \ s*: \ s*([ " \' ])(?P<url>http(?:(?! \ 1).)+ \ .(?:m3u8|mp4|flv)(?:(?! \ 1).)*) \ 1 ' ,
@ -177,6 +159,12 @@ class XFileShareIE(InfoExtractor):
video_url = mobj . group ( ' url ' )
video_url = mobj . group ( ' url ' )
if video_url not in urls :
if video_url not in urls :
urls . append ( video_url )
urls . append ( video_url )
sources = self . _search_regex (
r ' sources \ s*: \ s*( \ [(?!{)[^ \ ]]+ \ ]) ' , webpage , ' sources ' , default = None )
if sources :
urls . extend ( self . _parse_json ( sources , video_id ) )
formats = [ ]
formats = [ ]
for video_url in urls :
for video_url in urls :
if determine_ext ( video_url ) == ' m3u8 ' :
if determine_ext ( video_url ) == ' m3u8 ' :
@ -189,21 +177,13 @@ class XFileShareIE(InfoExtractor):
' url ' : video_url ,
' url ' : video_url ,
' format_id ' : ' sd ' ,
' format_id ' : ' sd ' ,
} )
} )
if not formats and default is not NO_DEFAULT :
return default
self . _sort_formats ( formats )
return formats
formats = extract_formats ( default = None )
if not formats :
webpage = decode_packed_codes ( self . _search_regex (
r " (} \ ( ' (.+) ' ,( \ d+),( \ d+), ' [^ ' ]* \ b(?:file|embed) \ b[^ ' ]* ' \ .split \ ( ' \ | ' \ )) " ,
webpage , ' packed code ' ) )
formats = extract_formats ( )
self . _sort_formats ( formats )
thumbnail = self . _search_regex (
thumbnail = self . _search_regex (
r ' image \ s*: \ s*[ " \' ](http[^ " \' ]+)[ " \' ], ' , webpage , ' thumbnail ' , default = None )
[
r ' <video[^>]+poster= " ([^ " ]+) " ' ,
r ' (?:image|poster) \ s*: \ s*[ " \' ](http[^ " \' ]+)[ " \' ], ' ,
] , webpage , ' thumbnail ' , default = None )
return {
return {
' id ' : video_id ,
' id ' : video_id ,