|
|
@ -419,15 +419,6 @@ class InfoExtractor(object): |
|
|
|
"""Sets the downloader for this IE.""" |
|
|
|
self._downloader = downloader |
|
|
|
|
|
|
|
def to_stdout(self, message): |
|
|
|
"""Print message to stdout if downloader is not in quiet mode.""" |
|
|
|
if self._downloader is None or not self._downloader.params.get('quiet', False): |
|
|
|
print message |
|
|
|
|
|
|
|
def to_stderr(self, message): |
|
|
|
"""Print message to stderr.""" |
|
|
|
print >>sys.stderr, message |
|
|
|
|
|
|
|
def _real_initialize(self): |
|
|
|
"""Real initialization process. Redefine in subclasses.""" |
|
|
|
pass |
|
|
@ -474,27 +465,27 @@ class YoutubeIE(InfoExtractor): |
|
|
|
|
|
|
|
def report_lang(self): |
|
|
|
"""Report attempt to set language.""" |
|
|
|
self.to_stdout(u'[youtube] Setting language') |
|
|
|
self._downloader.to_stdout(u'[youtube] Setting language') |
|
|
|
|
|
|
|
def report_login(self): |
|
|
|
"""Report attempt to log in.""" |
|
|
|
self.to_stdout(u'[youtube] Logging in') |
|
|
|
self._downloader.to_stdout(u'[youtube] Logging in') |
|
|
|
|
|
|
|
def report_age_confirmation(self): |
|
|
|
"""Report attempt to confirm age.""" |
|
|
|
self.to_stdout(u'[youtube] Confirming age') |
|
|
|
self._downloader.to_stdout(u'[youtube] Confirming age') |
|
|
|
|
|
|
|
def report_webpage_download(self, video_id): |
|
|
|
"""Report attempt to download webpage.""" |
|
|
|
self.to_stdout(u'[youtube] %s: Downloading video webpage' % video_id) |
|
|
|
self._downloader.to_stdout(u'[youtube] %s: Downloading video webpage' % video_id) |
|
|
|
|
|
|
|
def report_information_extraction(self, video_id): |
|
|
|
"""Report attempt to extract video information.""" |
|
|
|
self.to_stdout(u'[youtube] %s: Extracting video information' % video_id) |
|
|
|
self._downloader.to_stdout(u'[youtube] %s: Extracting video information' % video_id) |
|
|
|
|
|
|
|
def report_video_url(self, video_id, video_real_url): |
|
|
|
"""Report extracted video URL.""" |
|
|
|
self.to_stdout(u'[youtube] %s: URL: %s' % (video_id, video_real_url)) |
|
|
|
self._downloader.to_stdout(u'[youtube] %s: URL: %s' % (video_id, video_real_url)) |
|
|
|
|
|
|
|
def _real_initialize(self): |
|
|
|
if self._downloader is None: |
|
|
@ -517,7 +508,7 @@ class YoutubeIE(InfoExtractor): |
|
|
|
else: |
|
|
|
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) |
|
|
|
except (IOError, netrc.NetrcParseError), err: |
|
|
|
self.to_stderr(u'WARNING: parsing .netrc: %s' % str(err)) |
|
|
|
self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err)) |
|
|
|
return |
|
|
|
|
|
|
|
# Set language |
|
|
@ -526,7 +517,7 @@ class YoutubeIE(InfoExtractor): |
|
|
|
self.report_lang() |
|
|
|
urllib2.urlopen(request).read() |
|
|
|
except (urllib2.URLError, httplib.HTTPException, socket.error), err: |
|
|
|
self.to_stderr(u'WARNING: unable to set language: %s' % str(err)) |
|
|
|
self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err)) |
|
|
|
return |
|
|
|
|
|
|
|
# No authentication to be performed |
|
|
@ -546,10 +537,10 @@ class YoutubeIE(InfoExtractor): |
|
|
|
self.report_login() |
|
|
|
login_results = urllib2.urlopen(request).read() |
|
|
|
if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None: |
|
|
|
self.to_stderr(u'WARNING: unable to log in: bad username or password') |
|
|
|
self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password') |
|
|
|
return |
|
|
|
except (urllib2.URLError, httplib.HTTPException, socket.error), err: |
|
|
|
self.to_stderr(u'WARNING: unable to log in: %s' % str(err)) |
|
|
|
self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err)) |
|
|
|
return |
|
|
|
|
|
|
|
# Confirm age |
|
|
@ -562,14 +553,14 @@ class YoutubeIE(InfoExtractor): |
|
|
|
self.report_age_confirmation() |
|
|
|
age_results = urllib2.urlopen(request).read() |
|
|
|
except (urllib2.URLError, httplib.HTTPException, socket.error), err: |
|
|
|
self.to_stderr(u'ERROR: unable to confirm age: %s' % str(err)) |
|
|
|
self._downloader.to_stderr(u'ERROR: unable to confirm age: %s' % str(err)) |
|
|
|
return |
|
|
|
|
|
|
|
def _real_extract(self, url): |
|
|
|
# Extract video id from URL |
|
|
|
mobj = re.match(self._VALID_URL, url) |
|
|
|
if mobj is None: |
|
|
|
self.to_stderr(u'ERROR: invalid URL: %s' % url) |
|
|
|
self._downloader.to_stderr(u'ERROR: invalid URL: %s' % url) |
|
|
|
return [None] |
|
|
|
video_id = mobj.group(2) |
|
|
|
|
|
|
@ -578,8 +569,6 @@ class YoutubeIE(InfoExtractor): |
|
|
|
if self._downloader is not None: |
|
|
|
params = self._downloader.params |
|
|
|
format_param = params.get('format', None) |
|
|
|
if format_param is None: |
|
|
|
format_param = '34' |
|
|
|
|
|
|
|
# Extension |
|
|
|
video_extension = { |
|
|
@ -597,14 +586,14 @@ class YoutubeIE(InfoExtractor): |
|
|
|
self.report_webpage_download(video_id) |
|
|
|
video_webpage = urllib2.urlopen(request).read() |
|
|
|
except (urllib2.URLError, httplib.HTTPException, socket.error), err: |
|
|
|
self.to_stderr(u'ERROR: unable to download video webpage: %s' % str(err)) |
|
|
|
self._downloader.to_stderr(u'ERROR: unable to download video webpage: %s' % str(err)) |
|
|
|
return [None] |
|
|
|
self.report_information_extraction(video_id) |
|
|
|
|
|
|
|
# "t" param |
|
|
|
mobj = re.search(r', "t": "([^"]+)"', video_webpage) |
|
|
|
if mobj is None: |
|
|
|
self.to_stderr(u'ERROR: unable to extract "t" parameter') |
|
|
|
self._downloader.to_stderr(u'ERROR: unable to extract "t" parameter') |
|
|
|
return [None] |
|
|
|
video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&el=detailpage&ps=' % (video_id, mobj.group(1)) |
|
|
|
if format_param is not None: |
|
|
@ -614,14 +603,14 @@ class YoutubeIE(InfoExtractor): |
|
|
|
# uploader |
|
|
|
mobj = re.search(r"var watchUsername = '([^']+)';", video_webpage) |
|
|
|
if mobj is None: |
|
|
|
self.to_stderr(u'ERROR: unable to extract uploader nickname') |
|
|
|
self._downloader.to_stderr(u'ERROR: unable to extract uploader nickname') |
|
|
|
return [None] |
|
|
|
video_uploader = mobj.group(1) |
|
|
|
|
|
|
|
# title |
|
|
|
mobj = re.search(r'(?im)<title>YouTube - ([^<]*)</title>', video_webpage) |
|
|
|
if mobj is None: |
|
|
|
self.to_stderr(u'ERROR: unable to extract video title') |
|
|
|
self._downloader.to_stderr(u'ERROR: unable to extract video title') |
|
|
|
return [None] |
|
|
|
video_title = mobj.group(1).decode('utf-8') |
|
|
|
video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title) |
|
|
@ -631,7 +620,7 @@ class YoutubeIE(InfoExtractor): |
|
|
|
simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) |
|
|
|
simple_title = simple_title.strip(ur'_') |
|
|
|
|
|
|
|
# Return information |
|
|
|
# Process video information |
|
|
|
return [{ |
|
|
|
'id': video_id.decode('utf-8'), |
|
|
|
'url': video_real_url.decode('utf-8'), |
|
|
@ -658,19 +647,19 @@ class MetacafeIE(InfoExtractor): |
|
|
|
|
|
|
|
def report_disclaimer(self): |
|
|
|
"""Report disclaimer retrieval.""" |
|
|
|
self.to_stdout(u'[metacafe] Retrieving disclaimer') |
|
|
|
self._downloader.to_stdout(u'[metacafe] Retrieving disclaimer') |
|
|
|
|
|
|
|
def report_age_confirmation(self): |
|
|
|
"""Report attempt to confirm age.""" |
|
|
|
self.to_stdout(u'[metacafe] Confirming age') |
|
|
|
self._downloader.to_stdout(u'[metacafe] Confirming age') |
|
|
|
|
|
|
|
def report_download_webpage(self, video_id): |
|
|
|
"""Report webpage download.""" |
|
|
|
self.to_stdout(u'[metacafe] %s: Downloading webpage' % video_id) |
|
|
|
self._downloader.to_stdout(u'[metacafe] %s: Downloading webpage' % video_id) |
|
|
|
|
|
|
|
def report_extraction(self, video_id): |
|
|
|
"""Report information extraction.""" |
|
|
|
self.to_stdout(u'[metacafe] %s: Extracting information' % video_id) |
|
|
|
self._downloader.to_stdout(u'[metacafe] %s: Extracting information' % video_id) |
|
|
|
|
|
|
|
def _real_initialize(self): |
|
|
|
# Retrieve disclaimer |
|
|
@ -679,7 +668,7 @@ class MetacafeIE(InfoExtractor): |
|
|
|
self.report_disclaimer() |
|
|
|
disclaimer = urllib2.urlopen(request).read() |
|
|
|
except (urllib2.URLError, httplib.HTTPException, socket.error), err: |
|
|
|
self.to_stderr(u'ERROR: unable to retrieve disclaimer: %s' % str(err)) |
|
|
|
self._downloader.to_stderr(u'ERROR: unable to retrieve disclaimer: %s' % str(err)) |
|
|
|
return |
|
|
|
|
|
|
|
# Confirm age |
|
|
@ -692,14 +681,14 @@ class MetacafeIE(InfoExtractor): |
|
|
|
self.report_age_confirmation() |
|
|
|
disclaimer = urllib2.urlopen(request).read() |
|
|
|
except (urllib2.URLError, httplib.HTTPException, socket.error), err: |
|
|
|
self.to_stderr(u'ERROR: unable to confirm age: %s' % str(err)) |
|
|
|
self._downloader.to_stderr(u'ERROR: unable to confirm age: %s' % str(err)) |
|
|
|
return |
|
|
|
|
|
|
|
def _real_extract(self, url): |
|
|
|
# Extract id and simplified title from URL |
|
|
|
mobj = re.match(self._VALID_URL, url) |
|
|
|
if mobj is None: |
|
|
|
self.to_stderr(u'ERROR: invalid URL: %s' % url) |
|
|
|
self._downloader.to_stderr(u'ERROR: invalid URL: %s' % url) |
|
|
|
return [None] |
|
|
|
|
|
|
|
video_id = mobj.group(1) |
|
|
@ -718,20 +707,20 @@ class MetacafeIE(InfoExtractor): |
|
|
|
self.report_download_webpage(video_id) |
|
|
|
webpage = urllib2.urlopen(request).read() |
|
|
|
except (urllib2.URLError, httplib.HTTPException, socket.error), err: |
|
|
|
self.to_stderr(u'ERROR: unable retrieve video webpage: %s' % str(err)) |
|
|
|
self._downloader.to_stderr(u'ERROR: unable retrieve video webpage: %s' % str(err)) |
|
|
|
return [None] |
|
|
|
|
|
|
|
# Extract URL, uploader and title from webpage |
|
|
|
self.report_extraction(video_id) |
|
|
|
mobj = re.search(r'(?m)"mediaURL":"(http.*?\.flv)"', webpage) |
|
|
|
if mobj is None: |
|
|
|
self.to_stderr(u'ERROR: unable to extract media URL') |
|
|
|
self._downloader.to_stderr(u'ERROR: unable to extract media URL') |
|
|
|
return [None] |
|
|
|
mediaURL = mobj.group(1).replace('\\', '') |
|
|
|
|
|
|
|
mobj = re.search(r'(?m)"gdaKey":"(.*?)"', webpage) |
|
|
|
if mobj is None: |
|
|
|
self.to_stderr(u'ERROR: unable to extract gdaKey') |
|
|
|
self._downloader.to_stderr(u'ERROR: unable to extract gdaKey') |
|
|
|
return [None] |
|
|
|
gdaKey = mobj.group(1) |
|
|
|
|
|
|
@ -739,13 +728,13 @@ class MetacafeIE(InfoExtractor): |
|
|
|
|
|
|
|
mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage) |
|
|
|
if mobj is None: |
|
|
|
self.to_stderr(u'ERROR: unable to extract title') |
|
|
|
self._downloader.to_stderr(u'ERROR: unable to extract title') |
|
|
|
return [None] |
|
|
|
video_title = mobj.group(1).decode('utf-8') |
|
|
|
|
|
|
|
mobj = re.search(r'(?m)<li id="ChnlUsr">.*?Submitter:<br />(.*?)</li>', webpage) |
|
|
|
if mobj is None: |
|
|
|
self.to_stderr(u'ERROR: unable to extract uploader nickname') |
|
|
|
self._downloader.to_stderr(u'ERROR: unable to extract uploader nickname') |
|
|
|
return [None] |
|
|
|
video_uploader = re.sub(r'<.*?>', '', mobj.group(1)) |
|
|
|
|
|
|
@ -779,7 +768,7 @@ class YoutubeSearchIE(InfoExtractor): |
|
|
|
|
|
|
|
def report_download_page(self, query, pagenum): |
|
|
|
"""Report attempt to download playlist page with given number.""" |
|
|
|
self.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum)) |
|
|
|
self._downloader.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum)) |
|
|
|
|
|
|
|
def _real_initialize(self): |
|
|
|
self._youtube_ie.initialize() |
|
|
@ -787,7 +776,7 @@ class YoutubeSearchIE(InfoExtractor): |
|
|
|
def _real_extract(self, query): |
|
|
|
mobj = re.match(self._VALID_QUERY, query) |
|
|
|
if mobj is None: |
|
|
|
self.to_stderr(u'ERROR: invalid search query "%s"' % query) |
|
|
|
self._downloader.to_stderr(u'ERROR: invalid search query "%s"' % query) |
|
|
|
return [None] |
|
|
|
|
|
|
|
prefix, query = query.split(':') |
|
|
@ -800,10 +789,10 @@ class YoutubeSearchIE(InfoExtractor): |
|
|
|
try: |
|
|
|
n = int(prefix) |
|
|
|
if n <= 0: |
|
|
|
self.to_stderr(u'ERROR: invalid download number %s for query "%s"' % (n, query)) |
|
|
|
self._downloader.to_stderr(u'ERROR: invalid download number %s for query "%s"' % (n, query)) |
|
|
|
return [None] |
|
|
|
elif n > self._max_youtube_results: |
|
|
|
self.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n)) |
|
|
|
self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n)) |
|
|
|
n = self._max_youtube_results |
|
|
|
return self._download_n_results(query, n) |
|
|
|
except ValueError: # parsing prefix as int fails |
|
|
@ -823,7 +812,7 @@ class YoutubeSearchIE(InfoExtractor): |
|
|
|
try: |
|
|
|
page = urllib2.urlopen(request).read() |
|
|
|
except (urllib2.URLError, httplib.HTTPException, socket.error), err: |
|
|
|
self.to_stderr(u'ERROR: unable to download webpage: %s' % str(err)) |
|
|
|
self._downloader.to_stderr(u'ERROR: unable to download webpage: %s' % str(err)) |
|
|
|
return [None] |
|
|
|
|
|
|
|
# Extract video identifiers |
|
|
@ -866,7 +855,7 @@ class YoutubePlaylistIE(InfoExtractor): |
|
|
|
|
|
|
|
def report_download_page(self, playlist_id, pagenum): |
|
|
|
"""Report attempt to download playlist page with given number.""" |
|
|
|
self.to_stdout(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum)) |
|
|
|
self._downloader.to_stdout(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum)) |
|
|
|
|
|
|
|
def _real_initialize(self): |
|
|
|
self._youtube_ie.initialize() |
|
|
@ -875,7 +864,7 @@ class YoutubePlaylistIE(InfoExtractor): |
|
|
|
# Extract playlist id |
|
|
|
mobj = re.match(self._VALID_URL, url) |
|
|
|
if mobj is None: |
|
|
|
self.to_stderr(u'ERROR: invalid url: %s' % url) |
|
|
|
self._downloader.to_stderr(u'ERROR: invalid url: %s' % url) |
|
|
|
return [None] |
|
|
|
|
|
|
|
# Download playlist pages |
|
|
@ -889,7 +878,7 @@ class YoutubePlaylistIE(InfoExtractor): |
|
|
|
try: |
|
|
|
page = urllib2.urlopen(request).read() |
|
|
|
except (urllib2.URLError, httplib.HTTPException, socket.error), err: |
|
|
|
self.to_stderr(u'ERROR: unable to download webpage: %s' % str(err)) |
|
|
|
self._downloader.to_stderr(u'ERROR: unable to download webpage: %s' % str(err)) |
|
|
|
return [None] |
|
|
|
|
|
|
|
# Extract video identifiers |
|
|
@ -930,15 +919,6 @@ class PostProcessor(object): |
|
|
|
def __init__(self, downloader=None): |
|
|
|
self._downloader = downloader |
|
|
|
|
|
|
|
def to_stdout(self, message): |
|
|
|
"""Print message to stdout if downloader is not in quiet mode.""" |
|
|
|
if self._downloader is None or not self._downloader.params.get('quiet', False): |
|
|
|
print message |
|
|
|
|
|
|
|
def to_stderr(self, message): |
|
|
|
"""Print message to stderr.""" |
|
|
|
print >>sys.stderr, message |
|
|
|
|
|
|
|
def set_downloader(self, downloader): |
|
|
|
"""Sets the downloader for this PP.""" |
|
|
|
self._downloader = downloader |
|
|
|