|
|
@ -1,3 +1,5 @@ |
|
|
|
from __future__ import unicode_literals |
|
|
|
|
|
|
|
import base64 |
|
|
|
import hashlib |
|
|
|
import json |
|
|
@ -202,17 +204,17 @@ class InfoExtractor(object): |
|
|
|
self.report_download_webpage(video_id) |
|
|
|
elif note is not False: |
|
|
|
if video_id is None: |
|
|
|
self.to_screen(u'%s' % (note,)) |
|
|
|
self.to_screen('%s' % (note,)) |
|
|
|
else: |
|
|
|
self.to_screen(u'%s: %s' % (video_id, note)) |
|
|
|
self.to_screen('%s: %s' % (video_id, note)) |
|
|
|
try: |
|
|
|
return self._downloader.urlopen(url_or_request) |
|
|
|
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: |
|
|
|
if errnote is False: |
|
|
|
return False |
|
|
|
if errnote is None: |
|
|
|
errnote = u'Unable to download webpage' |
|
|
|
errmsg = u'%s: %s' % (errnote, compat_str(err)) |
|
|
|
errnote = 'Unable to download webpage' |
|
|
|
errmsg = '%s: %s' % (errnote, compat_str(err)) |
|
|
|
if fatal: |
|
|
|
raise ExtractorError(errmsg, sys.exc_info()[2], cause=err) |
|
|
|
else: |
|
|
@ -249,7 +251,7 @@ class InfoExtractor(object): |
|
|
|
url = url_or_request.get_full_url() |
|
|
|
except AttributeError: |
|
|
|
url = url_or_request |
|
|
|
self.to_screen(u'Dumping request to ' + url) |
|
|
|
self.to_screen('Dumping request to ' + url) |
|
|
|
dump = base64.b64encode(webpage_bytes).decode('ascii') |
|
|
|
self._downloader.to_screen(dump) |
|
|
|
if self._downloader.params.get('write_pages', False): |
|
|
@ -259,11 +261,11 @@ class InfoExtractor(object): |
|
|
|
url = url_or_request |
|
|
|
basen = '%s_%s' % (video_id, url) |
|
|
|
if len(basen) > 240: |
|
|
|
h = u'___' + hashlib.md5(basen.encode('utf-8')).hexdigest() |
|
|
|
h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest() |
|
|
|
basen = basen[:240 - len(h)] + h |
|
|
|
raw_filename = basen + '.dump' |
|
|
|
filename = sanitize_filename(raw_filename, restricted=True) |
|
|
|
self.to_screen(u'Saving request to ' + filename) |
|
|
|
self.to_screen('Saving request to ' + filename) |
|
|
|
with open(filename, 'wb') as outf: |
|
|
|
outf.write(webpage_bytes) |
|
|
|
|
|
|
@ -272,14 +274,14 @@ class InfoExtractor(object): |
|
|
|
except LookupError: |
|
|
|
content = webpage_bytes.decode('utf-8', 'replace') |
|
|
|
|
|
|
|
if (u'<title>Access to this site is blocked</title>' in content and |
|
|
|
u'Websense' in content[:512]): |
|
|
|
msg = u'Access to this webpage has been blocked by Websense filtering software in your network.' |
|
|
|
if ('<title>Access to this site is blocked</title>' in content and |
|
|
|
'Websense' in content[:512]): |
|
|
|
msg = 'Access to this webpage has been blocked by Websense filtering software in your network.' |
|
|
|
blocked_iframe = self._html_search_regex( |
|
|
|
r'<iframe src="([^"]+)"', content, |
|
|
|
u'Websense information URL', default=None) |
|
|
|
'Websense information URL', default=None) |
|
|
|
if blocked_iframe: |
|
|
|
msg += u' Visit %s for more details' % blocked_iframe |
|
|
|
msg += ' Visit %s for more details' % blocked_iframe |
|
|
|
raise ExtractorError(msg, expected=True) |
|
|
|
|
|
|
|
return (content, urlh) |
|
|
@ -294,7 +296,7 @@ class InfoExtractor(object): |
|
|
|
return content |
|
|
|
|
|
|
|
def _download_xml(self, url_or_request, video_id, |
|
|
|
note=u'Downloading XML', errnote=u'Unable to download XML', |
|
|
|
note='Downloading XML', errnote='Unable to download XML', |
|
|
|
transform_source=None, fatal=True): |
|
|
|
"""Return the xml as an xml.etree.ElementTree.Element""" |
|
|
|
xml_string = self._download_webpage( |
|
|
@ -306,8 +308,8 @@ class InfoExtractor(object): |
|
|
|
return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8')) |
|
|
|
|
|
|
|
def _download_json(self, url_or_request, video_id, |
|
|
|
note=u'Downloading JSON metadata', |
|
|
|
errnote=u'Unable to download JSON metadata', |
|
|
|
note='Downloading JSON metadata', |
|
|
|
errnote='Unable to download JSON metadata', |
|
|
|
transform_source=None, |
|
|
|
fatal=True): |
|
|
|
json_string = self._download_webpage( |
|
|
@ -322,29 +324,29 @@ class InfoExtractor(object): |
|
|
|
raise ExtractorError('Failed to download JSON', cause=ve) |
|
|
|
|
|
|
|
def report_warning(self, msg, video_id=None): |
|
|
|
idstr = u'' if video_id is None else u'%s: ' % video_id |
|
|
|
idstr = '' if video_id is None else '%s: ' % video_id |
|
|
|
self._downloader.report_warning( |
|
|
|
u'[%s] %s%s' % (self.IE_NAME, idstr, msg)) |
|
|
|
'[%s] %s%s' % (self.IE_NAME, idstr, msg)) |
|
|
|
|
|
|
|
def to_screen(self, msg): |
|
|
|
"""Print msg to screen, prefixing it with '[ie_name]'""" |
|
|
|
self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg)) |
|
|
|
self._downloader.to_screen('[%s] %s' % (self.IE_NAME, msg)) |
|
|
|
|
|
|
|
def report_extraction(self, id_or_name): |
|
|
|
"""Report information extraction.""" |
|
|
|
self.to_screen(u'%s: Extracting information' % id_or_name) |
|
|
|
self.to_screen('%s: Extracting information' % id_or_name) |
|
|
|
|
|
|
|
def report_download_webpage(self, video_id): |
|
|
|
"""Report webpage download.""" |
|
|
|
self.to_screen(u'%s: Downloading webpage' % video_id) |
|
|
|
self.to_screen('%s: Downloading webpage' % video_id) |
|
|
|
|
|
|
|
def report_age_confirmation(self): |
|
|
|
"""Report attempt to confirm age.""" |
|
|
|
self.to_screen(u'Confirming age') |
|
|
|
self.to_screen('Confirming age') |
|
|
|
|
|
|
|
def report_login(self): |
|
|
|
"""Report attempt to log in.""" |
|
|
|
self.to_screen(u'Logging in') |
|
|
|
self.to_screen('Logging in') |
|
|
|
|
|
|
|
#Methods for following #608 |
|
|
|
@staticmethod |
|
|
@ -384,7 +386,7 @@ class InfoExtractor(object): |
|
|
|
break |
|
|
|
|
|
|
|
if os.name != 'nt' and sys.stderr.isatty(): |
|
|
|
_name = u'\033[0;34m%s\033[0m' % name |
|
|
|
_name = '\033[0;34m%s\033[0m' % name |
|
|
|
else: |
|
|
|
_name = name |
|
|
|
|
|
|
@ -394,10 +396,10 @@ class InfoExtractor(object): |
|
|
|
elif default is not _NO_DEFAULT: |
|
|
|
return default |
|
|
|
elif fatal: |
|
|
|
raise RegexNotFoundError(u'Unable to extract %s' % _name) |
|
|
|
raise RegexNotFoundError('Unable to extract %s' % _name) |
|
|
|
else: |
|
|
|
self._downloader.report_warning(u'unable to extract %s; ' |
|
|
|
u'please report this issue on http://yt-dl.org/bug' % _name) |
|
|
|
self._downloader.report_warning('unable to extract %s; ' |
|
|
|
'please report this issue on http://yt-dl.org/bug' % _name) |
|
|
|
return None |
|
|
|
|
|
|
|
def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0): |
|
|
@ -436,7 +438,7 @@ class InfoExtractor(object): |
|
|
|
else: |
|
|
|
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) |
|
|
|
except (IOError, netrc.NetrcParseError) as err: |
|
|
|
self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err)) |
|
|
|
self._downloader.report_warning('parsing .netrc: %s' % compat_str(err)) |
|
|
|
|
|
|
|
return (username, password) |
|
|
|
|
|
|
@ -476,7 +478,7 @@ class InfoExtractor(object): |
|
|
|
return unescapeHTML(escaped) |
|
|
|
|
|
|
|
def _og_search_thumbnail(self, html, **kargs): |
|
|
|
return self._og_search_property('image', html, u'thumbnail url', fatal=False, **kargs) |
|
|
|
return self._og_search_property('image', html, 'thumbnail url', fatal=False, **kargs) |
|
|
|
|
|
|
|
def _og_search_description(self, html, **kargs): |
|
|
|
return self._og_search_property('description', html, fatal=False, **kargs) |
|
|
@ -535,7 +537,7 @@ class InfoExtractor(object): |
|
|
|
|
|
|
|
def _sort_formats(self, formats): |
|
|
|
if not formats: |
|
|
|
raise ExtractorError(u'No video formats found') |
|
|
|
raise ExtractorError('No video formats found') |
|
|
|
|
|
|
|
def _formats_key(f): |
|
|
|
# TODO remove the following workaround |
|
|
@ -555,9 +557,9 @@ class InfoExtractor(object): |
|
|
|
|
|
|
|
if f.get('vcodec') == 'none': # audio only |
|
|
|
if self._downloader.params.get('prefer_free_formats'): |
|
|
|
ORDER = [u'aac', u'mp3', u'm4a', u'webm', u'ogg', u'opus'] |
|
|
|
ORDER = ['aac', 'mp3', 'm4a', 'webm', 'ogg', 'opus'] |
|
|
|
else: |
|
|
|
ORDER = [u'webm', u'opus', u'ogg', u'mp3', u'aac', u'm4a'] |
|
|
|
ORDER = ['webm', 'opus', 'ogg', 'mp3', 'aac', 'm4a'] |
|
|
|
ext_preference = 0 |
|
|
|
try: |
|
|
|
audio_ext_preference = ORDER.index(f['ext']) |
|
|
@ -565,9 +567,9 @@ class InfoExtractor(object): |
|
|
|
audio_ext_preference = -1 |
|
|
|
else: |
|
|
|
if self._downloader.params.get('prefer_free_formats'): |
|
|
|
ORDER = [u'flv', u'mp4', u'webm'] |
|
|
|
ORDER = ['flv', 'mp4', 'webm'] |
|
|
|
else: |
|
|
|
ORDER = [u'webm', u'flv', u'mp4'] |
|
|
|
ORDER = ['webm', 'flv', 'mp4'] |
|
|
|
try: |
|
|
|
ext_preference = ORDER.index(f['ext']) |
|
|
|
except ValueError: |
|
|
@ -609,7 +611,7 @@ class InfoExtractor(object): |
|
|
|
|
|
|
|
def _sleep(self, timeout, video_id, msg_template=None): |
|
|
|
if msg_template is None: |
|
|
|
msg_template = u'%(video_id)s: Waiting for %(timeout)s seconds' |
|
|
|
msg_template = '%(video_id)s: Waiting for %(timeout)s seconds' |
|
|
|
msg = msg_template % {'video_id': video_id, 'timeout': timeout} |
|
|
|
self.to_screen(msg) |
|
|
|
time.sleep(timeout) |
|
|
@ -704,7 +706,7 @@ class SearchInfoExtractor(InfoExtractor): |
|
|
|
def _real_extract(self, query): |
|
|
|
mobj = re.match(self._make_valid_url(), query) |
|
|
|
if mobj is None: |
|
|
|
raise ExtractorError(u'Invalid search query "%s"' % query) |
|
|
|
raise ExtractorError('Invalid search query "%s"' % query) |
|
|
|
|
|
|
|
prefix = mobj.group('prefix') |
|
|
|
query = mobj.group('query') |
|
|
@ -715,9 +717,9 @@ class SearchInfoExtractor(InfoExtractor): |
|
|
|
else: |
|
|
|
n = int(prefix) |
|
|
|
if n <= 0: |
|
|
|
raise ExtractorError(u'invalid download number %s for query "%s"' % (n, query)) |
|
|
|
raise ExtractorError('invalid download number %s for query "%s"' % (n, query)) |
|
|
|
elif n > self._MAX_RESULTS: |
|
|
|
self._downloader.report_warning(u'%s returns max %i results (you requested %i)' % (self._SEARCH_KEY, self._MAX_RESULTS, n)) |
|
|
|
self._downloader.report_warning('%s returns max %i results (you requested %i)' % (self._SEARCH_KEY, self._MAX_RESULTS, n)) |
|
|
|
n = self._MAX_RESULTS |
|
|
|
return self._get_n_results(query, n) |
|
|
|
|
|
|
|