|
|
@ -58,7 +58,7 @@ def preferredencoding(): |
|
|
|
|
|
|
|
def htmlentity_transform(matchobj): |
|
|
|
"""Transforms an HTML entity to a Unicode character. |
|
|
|
|
|
|
|
|
|
|
|
This function receives a match object and is intended to be used with |
|
|
|
the re.sub() function. |
|
|
|
""" |
|
|
@ -115,7 +115,7 @@ def sanitize_open(filename, open_mode): |
|
|
|
|
|
|
|
class DownloadError(Exception): |
|
|
|
"""Download Error exception. |
|
|
|
|
|
|
|
|
|
|
|
This exception may be thrown by FileDownloader objects if they are not |
|
|
|
configured to continue on errors. They will contain the appropriate |
|
|
|
error message. |
|
|
@ -227,7 +227,7 @@ class FileDownloader(object): |
|
|
|
self._num_downloads = 0 |
|
|
|
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] |
|
|
|
self.params = params |
|
|
|
|
|
|
|
|
|
|
|
@staticmethod |
|
|
|
def pmkdir(filename): |
|
|
|
"""Create directory components in filename. Similar to Unix "mkdir -p".""" |
|
|
@ -313,12 +313,12 @@ class FileDownloader(object): |
|
|
|
"""Add an InfoExtractor object to the end of the list.""" |
|
|
|
self._ies.append(ie) |
|
|
|
ie.set_downloader(self) |
|
|
|
|
|
|
|
|
|
|
|
def add_post_processor(self, pp): |
|
|
|
"""Add a PostProcessor object to the end of the chain.""" |
|
|
|
self._pps.append(pp) |
|
|
|
pp.set_downloader(self) |
|
|
|
|
|
|
|
|
|
|
|
def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False): |
|
|
|
"""Print message to stdout if not in quiet mode.""" |
|
|
|
try: |
|
|
@ -329,11 +329,11 @@ class FileDownloader(object): |
|
|
|
except (UnicodeEncodeError), err: |
|
|
|
if not ignore_encoding_errors: |
|
|
|
raise |
|
|
|
|
|
|
|
|
|
|
|
def to_stderr(self, message): |
|
|
|
"""Print message to stderr.""" |
|
|
|
print >>sys.stderr, message.encode(preferredencoding()) |
|
|
|
|
|
|
|
|
|
|
|
def to_cons_title(self, message): |
|
|
|
"""Set console/terminal window title to message.""" |
|
|
|
if not self.params.get('consoletitle', False): |
|
|
@ -386,7 +386,7 @@ class FileDownloader(object): |
|
|
|
def report_destination(self, filename): |
|
|
|
"""Report destination filename.""" |
|
|
|
self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True) |
|
|
|
|
|
|
|
|
|
|
|
def report_progress(self, percent_str, data_len_str, speed_str, eta_str): |
|
|
|
"""Report download progress.""" |
|
|
|
if self.params.get('noprogress', False): |
|
|
@ -399,29 +399,29 @@ class FileDownloader(object): |
|
|
|
def report_resuming_byte(self, resume_len): |
|
|
|
"""Report attempt to resume at given byte.""" |
|
|
|
self.to_screen(u'[download] Resuming download at byte %s' % resume_len) |
|
|
|
|
|
|
|
|
|
|
|
def report_retry(self, count, retries): |
|
|
|
"""Report retry in case of HTTP error 5xx""" |
|
|
|
self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries)) |
|
|
|
|
|
|
|
|
|
|
|
def report_file_already_downloaded(self, file_name): |
|
|
|
"""Report file has already been fully downloaded.""" |
|
|
|
try: |
|
|
|
self.to_screen(u'[download] %s has already been downloaded' % file_name) |
|
|
|
except (UnicodeEncodeError), err: |
|
|
|
self.to_screen(u'[download] The file has already been downloaded') |
|
|
|
|
|
|
|
|
|
|
|
def report_unable_to_resume(self): |
|
|
|
"""Report it was impossible to resume download.""" |
|
|
|
self.to_screen(u'[download] Unable to resume') |
|
|
|
|
|
|
|
|
|
|
|
def report_finish(self): |
|
|
|
"""Report download finished.""" |
|
|
|
if self.params.get('noprogress', False): |
|
|
|
self.to_screen(u'[download] Download completed') |
|
|
|
else: |
|
|
|
self.to_screen(u'') |
|
|
|
|
|
|
|
|
|
|
|
def increment_downloads(self): |
|
|
|
"""Increment the ordinal that assigns a number to each file.""" |
|
|
|
self._num_downloads += 1 |
|
|
@ -441,7 +441,7 @@ class FileDownloader(object): |
|
|
|
print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace') |
|
|
|
|
|
|
|
return |
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
|
template_dict = dict(info_dict) |
|
|
|
template_dict['epoch'] = unicode(long(time.time())) |
|
|
@ -512,7 +512,7 @@ class FileDownloader(object): |
|
|
|
info = pp.run(info) |
|
|
|
if info is None: |
|
|
|
break |
|
|
|
|
|
|
|
|
|
|
|
def _download_with_rtmpdump(self, filename, url, player_url): |
|
|
|
self.report_destination(filename) |
|
|
|
tmpfilename = self.temp_name(filename) |
|
|
@ -730,7 +730,7 @@ class InfoExtractor(object): |
|
|
|
def set_downloader(self, downloader): |
|
|
|
"""Sets the downloader for this IE.""" |
|
|
|
self._downloader = downloader |
|
|
|
|
|
|
|
|
|
|
|
def _real_initialize(self): |
|
|
|
"""Real initialization process. Redefine in subclasses.""" |
|
|
|
pass |
|
|
@ -771,31 +771,31 @@ class YoutubeIE(InfoExtractor): |
|
|
|
def report_login(self): |
|
|
|
"""Report attempt to log in.""" |
|
|
|
self._downloader.to_screen(u'[youtube] Logging in') |
|
|
|
|
|
|
|
|
|
|
|
def report_age_confirmation(self): |
|
|
|
"""Report attempt to confirm age.""" |
|
|
|
self._downloader.to_screen(u'[youtube] Confirming age') |
|
|
|
|
|
|
|
|
|
|
|
def report_video_webpage_download(self, video_id): |
|
|
|
"""Report attempt to download video webpage.""" |
|
|
|
self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id) |
|
|
|
|
|
|
|
|
|
|
|
def report_video_info_webpage_download(self, video_id): |
|
|
|
"""Report attempt to download video info webpage.""" |
|
|
|
self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id) |
|
|
|
|
|
|
|
|
|
|
|
def report_information_extraction(self, video_id): |
|
|
|
"""Report attempt to extract video information.""" |
|
|
|
self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id) |
|
|
|
|
|
|
|
|
|
|
|
def report_unavailable_format(self, video_id, format): |
|
|
|
"""Report extracted video URL.""" |
|
|
|
self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format)) |
|
|
|
|
|
|
|
|
|
|
|
def report_rtmp_download(self): |
|
|
|
"""Indicate the download will use the RTMP protocol.""" |
|
|
|
self._downloader.to_screen(u'[youtube] RTMP download detected') |
|
|
|
|
|
|
|
|
|
|
|
def _real_initialize(self): |
|
|
|
if self._downloader is None: |
|
|
|
return |
|
|
@ -851,7 +851,7 @@ class YoutubeIE(InfoExtractor): |
|
|
|
except (urllib2.URLError, httplib.HTTPException, socket.error), err: |
|
|
|
self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err)) |
|
|
|
return |
|
|
|
|
|
|
|
|
|
|
|
# Confirm age |
|
|
|
age_form = { |
|
|
|
'next_url': '/', |
|
|
@ -1043,11 +1043,11 @@ class MetacafeIE(InfoExtractor): |
|
|
|
def report_age_confirmation(self): |
|
|
|
"""Report attempt to confirm age.""" |
|
|
|
self._downloader.to_screen(u'[metacafe] Confirming age') |
|
|
|
|
|
|
|
|
|
|
|
def report_download_webpage(self, video_id): |
|
|
|
"""Report webpage download.""" |
|
|
|
self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id) |
|
|
|
|
|
|
|
|
|
|
|
def report_extraction(self, video_id): |
|
|
|
"""Report information extraction.""" |
|
|
|
self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id) |
|
|
@ -1074,7 +1074,7 @@ class MetacafeIE(InfoExtractor): |
|
|
|
except (urllib2.URLError, httplib.HTTPException, socket.error), err: |
|
|
|
self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err)) |
|
|
|
return |
|
|
|
|
|
|
|
|
|
|
|
def _real_extract(self, url): |
|
|
|
# Extract id and simplified title from URL |
|
|
|
mobj = re.match(self._VALID_URL, url) |
|
|
@ -1110,7 +1110,7 @@ class MetacafeIE(InfoExtractor): |
|
|
|
if mobj is not None: |
|
|
|
mediaURL = urllib.unquote(mobj.group(1)) |
|
|
|
video_extension = mediaURL[-3:] |
|
|
|
|
|
|
|
|
|
|
|
# Extract gdaKey if available |
|
|
|
mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage) |
|
|
|
if mobj is None: |
|
|
@ -1180,7 +1180,7 @@ class DailymotionIE(InfoExtractor): |
|
|
|
def report_download_webpage(self, video_id): |
|
|
|
"""Report webpage download.""" |
|
|
|
self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id) |
|
|
|
|
|
|
|
|
|
|
|
def report_extraction(self, video_id): |
|
|
|
"""Report information extraction.""" |
|
|
|
self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id) |
|
|
@ -1717,7 +1717,7 @@ class YoutubeSearchIE(InfoExtractor): |
|
|
|
def __init__(self, youtube_ie, downloader=None): |
|
|
|
InfoExtractor.__init__(self, downloader) |
|
|
|
self._youtube_ie = youtube_ie |
|
|
|
|
|
|
|
|
|
|
|
@staticmethod |
|
|
|
def suitable(url): |
|
|
|
return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None) |
|
|
@ -1729,7 +1729,7 @@ class YoutubeSearchIE(InfoExtractor): |
|
|
|
|
|
|
|
def _real_initialize(self): |
|
|
|
self._youtube_ie.initialize() |
|
|
|
|
|
|
|
|
|
|
|
def _real_extract(self, query): |
|
|
|
mobj = re.match(self._VALID_QUERY, query) |
|
|
|
if mobj is None: |
|
|
@ -1808,7 +1808,7 @@ class GoogleSearchIE(InfoExtractor): |
|
|
|
def __init__(self, google_ie, downloader=None): |
|
|
|
InfoExtractor.__init__(self, downloader) |
|
|
|
self._google_ie = google_ie |
|
|
|
|
|
|
|
|
|
|
|
@staticmethod |
|
|
|
def suitable(url): |
|
|
|
return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None) |
|
|
@ -1820,7 +1820,7 @@ class GoogleSearchIE(InfoExtractor): |
|
|
|
|
|
|
|
def _real_initialize(self): |
|
|
|
self._google_ie.initialize() |
|
|
|
|
|
|
|
|
|
|
|
def _real_extract(self, query): |
|
|
|
mobj = re.match(self._VALID_QUERY, query) |
|
|
|
if mobj is None: |
|
|
@ -1899,7 +1899,7 @@ class YahooSearchIE(InfoExtractor): |
|
|
|
def __init__(self, yahoo_ie, downloader=None): |
|
|
|
InfoExtractor.__init__(self, downloader) |
|
|
|
self._yahoo_ie = yahoo_ie |
|
|
|
|
|
|
|
|
|
|
|
@staticmethod |
|
|
|
def suitable(url): |
|
|
|
return (re.match(YahooSearchIE._VALID_QUERY, url) is not None) |
|
|
@ -1911,7 +1911,7 @@ class YahooSearchIE(InfoExtractor): |
|
|
|
|
|
|
|
def _real_initialize(self): |
|
|
|
self._yahoo_ie.initialize() |
|
|
|
|
|
|
|
|
|
|
|
def _real_extract(self, query): |
|
|
|
mobj = re.match(self._VALID_QUERY, query) |
|
|
|
if mobj is None: |
|
|
@ -1990,7 +1990,7 @@ class YoutubePlaylistIE(InfoExtractor): |
|
|
|
def __init__(self, youtube_ie, downloader=None): |
|
|
|
InfoExtractor.__init__(self, downloader) |
|
|
|
self._youtube_ie = youtube_ie |
|
|
|
|
|
|
|
|
|
|
|
@staticmethod |
|
|
|
def suitable(url): |
|
|
|
return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None) |
|
|
@ -2001,7 +2001,7 @@ class YoutubePlaylistIE(InfoExtractor): |
|
|
|
|
|
|
|
def _real_initialize(self): |
|
|
|
self._youtube_ie.initialize() |
|
|
|
|
|
|
|
|
|
|
|
def _real_extract(self, url): |
|
|
|
# Extract playlist id |
|
|
|
mobj = re.match(self._VALID_URL, url) |
|
|
@ -2053,7 +2053,7 @@ class YoutubeUserIE(InfoExtractor): |
|
|
|
def __init__(self, youtube_ie, downloader=None): |
|
|
|
InfoExtractor.__init__(self, downloader) |
|
|
|
self._youtube_ie = youtube_ie |
|
|
|
|
|
|
|
|
|
|
|
@staticmethod |
|
|
|
def suitable(url): |
|
|
|
return (re.match(YoutubeUserIE._VALID_URL, url) is not None) |
|
|
@ -2064,7 +2064,7 @@ class YoutubeUserIE(InfoExtractor): |
|
|
|
|
|
|
|
def _real_initialize(self): |
|
|
|
self._youtube_ie.initialize() |
|
|
|
|
|
|
|
|
|
|
|
def _real_extract(self, url): |
|
|
|
# Extract username |
|
|
|
mobj = re.match(self._VALID_URL, url) |
|
|
@ -2205,7 +2205,7 @@ class PostProcessor(object): |
|
|
|
def set_downloader(self, downloader): |
|
|
|
"""Sets the downloader for this PP.""" |
|
|
|
self._downloader = downloader |
|
|
|
|
|
|
|
|
|
|
|
def run(self, information): |
|
|
|
"""Run the PostProcessor. |
|
|
|
|
|
|
@ -2225,7 +2225,7 @@ class PostProcessor(object): |
|
|
|
it was called from. |
|
|
|
""" |
|
|
|
return information # by default, do nothing |
|
|
|
|
|
|
|
|
|
|
|
### MAIN PROGRAM ### |
|
|
|
if __name__ == '__main__': |
|
|
|
try: |
|
|
|