Split code as a package, compiled into an executable zip

13 years ago · d77c3dfd02
8 changed files with 4333 additions and 4284 deletions
--- a/5
+++ b/5
@ -18,6 +18,9 @@ update-readme:
 		echo "$${footer}" >> README.md

 compile:
-	cp youtube_dl/__init__.py youtube-dl
+	zip --junk-paths youtube-dl youtube_dl/*.py
+	echo '#!/usr/bin/env python' > youtube-dl
+	cat youtube-dl.zip >> youtube-dl
+	rm youtube-dl.zip

 .PHONY: default compile update update-latest update-readme
--- a/BIN
+++ b/BIN
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@ -0,0 +1,681 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import httplib
+import math
+import os
+import re
+import socket
+import subprocess
+import sys
+import time
+import urllib2
+
+if os.name == 'nt':
+	import ctypes
+	
+from Utils import *
+
+
+class FileDownloader(object):
+	"""File Downloader class.
+
+	File downloader objects are the ones responsible of downloading the
+	actual video file and writing it to disk if the user has requested
+	it, among some other tasks. In most cases there should be one per
+	program. As, given a video URL, the downloader doesn't know how to
+	extract all the needed information, task that InfoExtractors do, it
+	has to pass the URL to one of them.
+
+	For this, file downloader objects have a method that allows
+	InfoExtractors to be registered in a given order. When it is passed
+	a URL, the file downloader handles it to the first InfoExtractor it
+	finds that reports being able to handle it. The InfoExtractor extracts
+	all the information about the video or videos the URL refers to, and
+	asks the FileDownloader to process the video information, possibly
+	downloading the video.
+
+	File downloaders accept a lot of parameters. In order not to saturate
+	the object constructor with arguments, it receives a dictionary of
+	options instead. These options are available through the params
+	attribute for the InfoExtractors to use. The FileDownloader also
+	registers itself as the downloader in charge for the InfoExtractors
+	that are added to it, so this is a "mutual registration".
+
+	Available options:
+
+	username:         Username for authentication purposes.
+	password:         Password for authentication purposes.
+	usenetrc:         Use netrc for authentication instead.
+	quiet:            Do not print messages to stdout.
+	forceurl:         Force printing final URL.
+	forcetitle:       Force printing title.
+	forcethumbnail:   Force printing thumbnail URL.
+	forcedescription: Force printing description.
+	forcefilename:    Force printing final filename.
+	simulate:         Do not download the video files.
+	format:           Video format code.
+	format_limit:     Highest quality format to try.
+	outtmpl:          Template for output names.
+	ignoreerrors:     Do not stop on download errors.
+	ratelimit:        Download speed limit, in bytes/sec.
+	nooverwrites:     Prevent overwriting files.
+	retries:          Number of times to retry for HTTP error 5xx
+	continuedl:       Try to continue downloads if possible.
+	noprogress:       Do not print the progress bar.
+	playliststart:    Playlist item to start at.
+	playlistend:      Playlist item to end at.
+	matchtitle:       Download only matching titles.
+	rejecttitle:      Reject downloads for matching titles.
+	logtostderr:      Log messages to stderr instead of stdout.
+	consoletitle:     Display progress in console window's titlebar.
+	nopart:           Do not use temporary .part files.
+	updatetime:       Use the Last-modified header to set output file timestamps.
+	writedescription: Write the video description to a .description file
+	writeinfojson:    Write the video description to a .info.json file
+	writesubtitles:   Write the video subtitles to a .srt file
+	subtitleslang:    Language of the subtitles to download
+	"""
+
+	params = None
+	_ies = []
+	_pps = []
+	_download_retcode = None
+	_num_downloads = None
+	_screen_file = None
+
+	def __init__(self, params):
+		"""Create a FileDownloader object with the given options."""
+		self._ies = []
+		self._pps = []
+		self._download_retcode = 0
+		self._num_downloads = 0
+		self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
+		self.params = params
+
+	@staticmethod
+	def format_bytes(bytes):
+		if bytes is None:
+			return 'N/A'
+		if type(bytes) is str:
+			bytes = float(bytes)
+		if bytes == 0.0:
+			exponent = 0
+		else:
+			exponent = long(math.log(bytes, 1024.0))
+		suffix = 'bkMGTPEZY'[exponent]
+		converted = float(bytes) / float(1024 ** exponent)
+		return '%.2f%s' % (converted, suffix)
+
+	@staticmethod
+	def calc_percent(byte_counter, data_len):
+		if data_len is None:
+			return '---.-%'
+		return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
+
+	@staticmethod
+	def calc_eta(start, now, total, current):
+		if total is None:
+			return '--:--'
+		dif = now - start
+		if current == 0 or dif < 0.001: # One millisecond
+			return '--:--'
+		rate = float(current) / dif
+		eta = long((float(total) - float(current)) / rate)
+		(eta_mins, eta_secs) = divmod(eta, 60)
+		if eta_mins > 99:
+			return '--:--'
+		return '%02d:%02d' % (eta_mins, eta_secs)
+
+	@staticmethod
+	def calc_speed(start, now, bytes):
+		dif = now - start
+		if bytes == 0 or dif < 0.001: # One millisecond
+			return '%10s' % '---b/s'
+		return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
+
+	@staticmethod
+	def best_block_size(elapsed_time, bytes):
+		new_min = max(bytes / 2.0, 1.0)
+		new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
+		if elapsed_time < 0.001:
+			return long(new_max)
+		rate = bytes / elapsed_time
+		if rate > new_max:
+			return long(new_max)
+		if rate < new_min:
+			return long(new_min)
+		return long(rate)
+
+	@staticmethod
+	def parse_bytes(bytestr):
+		"""Parse a string indicating a byte quantity into a long integer."""
+		matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
+		if matchobj is None:
+			return None
+		number = float(matchobj.group(1))
+		multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
+		return long(round(number * multiplier))
+
+	def add_info_extractor(self, ie):
+		"""Add an InfoExtractor object to the end of the list."""
+		self._ies.append(ie)
+		ie.set_downloader(self)
+
+	def add_post_processor(self, pp):
+		"""Add a PostProcessor object to the end of the chain."""
+		self._pps.append(pp)
+		pp.set_downloader(self)
+
+	def to_screen(self, message, skip_eol=False):
+		"""Print message to stdout if not in quiet mode."""
+		assert type(message) == type(u'')
+		if not self.params.get('quiet', False):
+			terminator = [u'\n', u''][skip_eol]
+			output = message + terminator
+
+			if 'b' not in self._screen_file.mode or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
+				output = output.encode(preferredencoding(), 'ignore')
+			self._screen_file.write(output)
+			self._screen_file.flush()
+
+	def to_stderr(self, message):
+		"""Print message to stderr."""
+		print >>sys.stderr, message.encode(preferredencoding())
+
+	def to_cons_title(self, message):
+		"""Set console/terminal window title to message."""
+		if not self.params.get('consoletitle', False):
+			return
+		if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
+			# c_wchar_p() might not be necessary if `message` is
+			# already of type unicode()
+			ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
+		elif 'TERM' in os.environ:
+			sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
+
+	def fixed_template(self):
+		"""Checks if the output template is fixed."""
+		return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
+
+	def trouble(self, message=None):
+		"""Determine action to take when a download problem appears.
+
+		Depending on if the downloader has been configured to ignore
+		download errors or not, this method may throw an exception or
+		not when errors are found, after printing the message.
+		"""
+		if message is not None:
+			self.to_stderr(message)
+		if not self.params.get('ignoreerrors', False):
+			raise DownloadError(message)
+		self._download_retcode = 1
+
+	def slow_down(self, start_time, byte_counter):
+		"""Sleep if the download speed is over the rate limit."""
+		rate_limit = self.params.get('ratelimit', None)
+		if rate_limit is None or byte_counter == 0:
+			return
+		now = time.time()
+		elapsed = now - start_time
+		if elapsed <= 0.0:
+			return
+		speed = float(byte_counter) / elapsed
+		if speed > rate_limit:
+			time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
+
+	def temp_name(self, filename):
+		"""Returns a temporary filename for the given filename."""
+		if self.params.get('nopart', False) or filename == u'-' or \
+				(os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
+			return filename
+		return filename + u'.part'
+
+	def undo_temp_name(self, filename):
+		if filename.endswith(u'.part'):
+			return filename[:-len(u'.part')]
+		return filename
+
+	def try_rename(self, old_filename, new_filename):
+		try:
+			if old_filename == new_filename:
+				return
+			os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
+		except (IOError, OSError), err:
+			self.trouble(u'ERROR: unable to rename file')
+
+	def try_utime(self, filename, last_modified_hdr):
+		"""Try to set the last-modified time of the given file."""
+		if last_modified_hdr is None:
+			return
+		if not os.path.isfile(encodeFilename(filename)):
+			return
+		timestr = last_modified_hdr
+		if timestr is None:
+			return
+		filetime = timeconvert(timestr)
+		if filetime is None:
+			return filetime
+		try:
+			os.utime(filename, (time.time(), filetime))
+		except:
+			pass
+		return filetime
+
+	def report_writedescription(self, descfn):
+		""" Report that the description file is being written """
+		self.to_screen(u'[info] Writing video description to: ' + descfn)
+
+	def report_writesubtitles(self, srtfn):
+		""" Report that the subtitles file is being written """
+		self.to_screen(u'[info] Writing video subtitles to: ' + srtfn)
+
+	def report_writeinfojson(self, infofn):
+		""" Report that the metadata file has been written """
+		self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
+
+	def report_destination(self, filename):
+		"""Report destination filename."""
+		self.to_screen(u'[download] Destination: ' + filename)
+
+	def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
+		"""Report download progress."""
+		if self.params.get('noprogress', False):
+			return
+		self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
+				(percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
+		self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
+				(percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
+
+	def report_resuming_byte(self, resume_len):
+		"""Report attempt to resume at given byte."""
+		self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
+
+	def report_retry(self, count, retries):
+		"""Report retry in case of HTTP error 5xx"""
+		self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
+
+	def report_file_already_downloaded(self, file_name):
+		"""Report file has already been fully downloaded."""
+		try:
+			self.to_screen(u'[download] %s has already been downloaded' % file_name)
+		except (UnicodeEncodeError), err:
+			self.to_screen(u'[download] The file has already been downloaded')
+
+	def report_unable_to_resume(self):
+		"""Report it was impossible to resume download."""
+		self.to_screen(u'[download] Unable to resume')
+
+	def report_finish(self):
+		"""Report download finished."""
+		if self.params.get('noprogress', False):
+			self.to_screen(u'[download] Download completed')
+		else:
+			self.to_screen(u'')
+
+	def increment_downloads(self):
+		"""Increment the ordinal that assigns a number to each file."""
+		self._num_downloads += 1
+
+	def prepare_filename(self, info_dict):
+		"""Generate the output filename."""
+		try:
+			template_dict = dict(info_dict)
+			template_dict['epoch'] = unicode(long(time.time()))
+			template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
+			filename = self.params['outtmpl'] % template_dict
+			return filename
+		except (ValueError, KeyError), err:
+			self.trouble(u'ERROR: invalid system charset or erroneous output template')
+			return None
+
+	def _match_entry(self, info_dict):
+		""" Returns None iff the file should be downloaded """
+
+		title = info_dict['title']
+		matchtitle = self.params.get('matchtitle', False)
+		if matchtitle and not re.search(matchtitle, title, re.IGNORECASE):
+			return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
+		rejecttitle = self.params.get('rejecttitle', False)
+		if rejecttitle and re.search(rejecttitle, title, re.IGNORECASE):
+			return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
+		return None
+
+	def process_info(self, info_dict):
+		"""Process a single dictionary returned by an InfoExtractor."""
+
+		reason = self._match_entry(info_dict)
+		if reason is not None:
+			self.to_screen(u'[download] ' + reason)
+			return
+
+		max_downloads = self.params.get('max_downloads')
+		if max_downloads is not None:
+			if self._num_downloads > int(max_downloads):
+				raise MaxDownloadsReached()
+
+		filename = self.prepare_filename(info_dict)
+		
+		# Forced printings
+		if self.params.get('forcetitle', False):
+			print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
+		if self.params.get('forceurl', False):
+			print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
+		if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
+			print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
+		if self.params.get('forcedescription', False) and 'description' in info_dict:
+			print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
+		if self.params.get('forcefilename', False) and filename is not None:
+			print filename.encode(preferredencoding(), 'xmlcharrefreplace')
+		if self.params.get('forceformat', False):
+			print info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace')
+
+		# Do nothing else if in simulate mode
+		if self.params.get('simulate', False):
+			return
+
+		if filename is None:
+			return
+
+		try:
+			dn = os.path.dirname(encodeFilename(filename))
+			if dn != '' and not os.path.exists(dn): # dn is already encoded
+				os.makedirs(dn)
+		except (OSError, IOError), err:
+			self.trouble(u'ERROR: unable to create directory ' + unicode(err))
+			return
+
+		if self.params.get('writedescription', False):
+			try:
+				descfn = filename + u'.description'
+				self.report_writedescription(descfn)
+				descfile = open(encodeFilename(descfn), 'wb')
+				try:
+					descfile.write(info_dict['description'].encode('utf-8'))
+				finally:
+					descfile.close()
+			except (OSError, IOError):
+				self.trouble(u'ERROR: Cannot write description file ' + descfn)
+				return
+				
+		if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
+			# subtitles download errors are already managed as troubles in relevant IE
+			# that way it will silently go on when used with unsupporting IE 
+			try:
+				srtfn = filename.rsplit('.', 1)[0] + u'.srt'
+				self.report_writesubtitles(srtfn)
+				srtfile = open(encodeFilename(srtfn), 'wb')
+				try:
+					srtfile.write(info_dict['subtitles'].encode('utf-8'))
+				finally:
+					srtfile.close()
+			except (OSError, IOError):
+				self.trouble(u'ERROR: Cannot write subtitles file ' + descfn)
+				return
+
+		if self.params.get('writeinfojson', False):
+			infofn = filename + u'.info.json'
+			self.report_writeinfojson(infofn)
+			try:
+				json.dump
+			except (NameError,AttributeError):
+				self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.')
+				return
+			try:
+				infof = open(encodeFilename(infofn), 'wb')
+				try:
+					json_info_dict = dict((k,v) for k,v in info_dict.iteritems() if not k in ('urlhandle',))
+					json.dump(json_info_dict, infof)
+				finally:
+					infof.close()
+			except (OSError, IOError):
+				self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn)
+				return
+
+		if not self.params.get('skip_download', False):
+			if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
+				success = True
+			else:
+				try:
+					success = self._do_download(filename, info_dict)
+				except (OSError, IOError), err:
+					raise UnavailableVideoError
+				except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+					self.trouble(u'ERROR: unable to download video data: %s' % str(err))
+					return
+				except (ContentTooShortError, ), err:
+					self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
+					return
+	
+			if success:
+				try:
+					self.post_process(filename, info_dict)
+				except (PostProcessingError), err:
+					self.trouble(u'ERROR: postprocessing: %s' % str(err))
+					return
+
+	def download(self, url_list):
+		"""Download a given list of URLs."""
+		if len(url_list) > 1 and self.fixed_template():
+			raise SameFileError(self.params['outtmpl'])
+
+		for url in url_list:
+			suitable_found = False
+			for ie in self._ies:
+				# Go to next InfoExtractor if not suitable
+				if not ie.suitable(url):
+					continue
+
+				# Suitable InfoExtractor found
+				suitable_found = True
+
+				# Extract information from URL and process it
+				ie.extract(url)
+
+				# Suitable InfoExtractor had been found; go to next URL
+				break
+
+			if not suitable_found:
+				self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
+
+		return self._download_retcode
+
+	def post_process(self, filename, ie_info):
+		"""Run the postprocessing chain on the given file."""
+		info = dict(ie_info)
+		info['filepath'] = filename
+		for pp in self._pps:
+			info = pp.run(info)
+			if info is None:
+				break
+
+	def _download_with_rtmpdump(self, filename, url, player_url):
+		self.report_destination(filename)
+		tmpfilename = self.temp_name(filename)
+
+		# Check for rtmpdump first
+		try:
+			subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
+		except (OSError, IOError):
+			self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
+			return False
+
+		# Download using rtmpdump. rtmpdump returns exit code 2 when
+		# the connection was interrumpted and resuming appears to be
+		# possible. This is part of rtmpdump's normal usage, AFAIK.
+		basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
+		args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
+		if self.params.get('verbose', False):
+			try:
+				import pipes
+				shell_quote = lambda args: ' '.join(map(pipes.quote, args))
+			except ImportError:
+				shell_quote = repr
+			self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
+		retval = subprocess.call(args)
+		while retval == 2 or retval == 1:
+			prevsize = os.path.getsize(encodeFilename(tmpfilename))
+			self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
+			time.sleep(5.0) # This seems to be needed
+			retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
+			cursize = os.path.getsize(encodeFilename(tmpfilename))
+			if prevsize == cursize and retval == 1:
+				break
+			 # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
+			if prevsize == cursize and retval == 2 and cursize > 1024:
+				self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
+				retval = 0
+				break
+		if retval == 0:
+			self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(encodeFilename(tmpfilename)))
+			self.try_rename(tmpfilename, filename)
+			return True
+		else:
+			self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
+			return False
+
+	def _do_download(self, filename, info_dict):
+		url = info_dict['url']
+		player_url = info_dict.get('player_url', None)
+
+		# Check file already present
+		if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
+			self.report_file_already_downloaded(filename)
+			return True
+
+		# Attempt to download using rtmpdump
+		if url.startswith('rtmp'):
+			return self._download_with_rtmpdump(filename, url, player_url)
+
+		tmpfilename = self.temp_name(filename)
+		stream = None
+
+		# Do not include the Accept-Encoding header
+		headers = {'Youtubedl-no-compression': 'True'}
+		basic_request = urllib2.Request(url, None, headers)
+		request = urllib2.Request(url, None, headers)
+
+		# Establish possible resume length
+		if os.path.isfile(encodeFilename(tmpfilename)):
+			resume_len = os.path.getsize(encodeFilename(tmpfilename))
+		else:
+			resume_len = 0
+
+		open_mode = 'wb'
+		if resume_len != 0:
+			if self.params.get('continuedl', False):
+				self.report_resuming_byte(resume_len)
+				request.add_header('Range','bytes=%d-' % resume_len)
+				open_mode = 'ab'
+			else:
+				resume_len = 0
+
+		count = 0
+		retries = self.params.get('retries', 0)
+		while count <= retries:
+			# Establish connection
+			try:
+				if count == 0 and 'urlhandle' in info_dict:
+					data = info_dict['urlhandle']
+				data = urllib2.urlopen(request)
+				break
+			except (urllib2.HTTPError, ), err:
+				if (err.code < 500 or err.code >= 600) and err.code != 416:
+					# Unexpected HTTP error
+					raise
+				elif err.code == 416:
+					# Unable to resume (requested range not satisfiable)
+					try:
+						# Open the connection again without the range header
+						data = urllib2.urlopen(basic_request)
+						content_length = data.info()['Content-Length']
+					except (urllib2.HTTPError, ), err:
+						if err.code < 500 or err.code >= 600:
+							raise
+					else:
+						# Examine the reported length
+						if (content_length is not None and
+								(resume_len - 100 < long(content_length) < resume_len + 100)):
+							# The file had already been fully downloaded.
+							# Explanation to the above condition: in issue #175 it was revealed that
+							# YouTube sometimes adds or removes a few bytes from the end of the file,
+							# changing the file size slightly and causing problems for some users. So
+							# I decided to implement a suggested change and consider the file
+							# completely downloaded if the file size differs less than 100 bytes from
+							# the one in the hard drive.
+							self.report_file_already_downloaded(filename)
+							self.try_rename(tmpfilename, filename)
+							return True
+						else:
+							# The length does not match, we start the download over
+							self.report_unable_to_resume()
+							open_mode = 'wb'
+							break
+			# Retry
+			count += 1
+			if count <= retries:
+				self.report_retry(count, retries)
+
+		if count > retries:
+			self.trouble(u'ERROR: giving up after %s retries' % retries)
+			return False
+
+		data_len = data.info().get('Content-length', None)
+		if data_len is not None:
+			data_len = long(data_len) + resume_len
+		data_len_str = self.format_bytes(data_len)
+		byte_counter = 0 + resume_len
+		block_size = 1024
+		start = time.time()
+		while True:
+			# Download and write
+			before = time.time()
+			data_block = data.read(block_size)
+			after = time.time()
+			if len(data_block) == 0:
+				break
+			byte_counter += len(data_block)
+
+			# Open file just in time
+			if stream is None:
+				try:
+					(stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
+					assert stream is not None
+					filename = self.undo_temp_name(tmpfilename)
+					self.report_destination(filename)
+				except (OSError, IOError), err:
+					self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
+					return False
+			try:
+				stream.write(data_block)
+			except (IOError, OSError), err:
+				self.trouble(u'\nERROR: unable to write data: %s' % str(err))
+				return False
+			block_size = self.best_block_size(after - before, len(data_block))
+
+			# Progress message
+			speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
+			if data_len is None:
+				self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
+			else:
+				percent_str = self.calc_percent(byte_counter, data_len)
+				eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
+				self.report_progress(percent_str, data_len_str, speed_str, eta_str)
+
+			# Apply rate limit
+			self.slow_down(start, byte_counter - resume_len)
+
+		if stream is None:
+			self.trouble(u'\nERROR: Did not get any data blocks')
+			return False
+		stream.close()
+		self.report_finish()
+		if data_len is not None and byte_counter != data_len:
+			raise ContentTooShortError(byte_counter, long(data_len))
+		self.try_rename(tmpfilename, filename)
+
+		# Update file modification time
+		if self.params.get('updatetime', True):
+			info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
+
+		return True
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
--- a/youtube_dl/PostProcessing.py
+++ b/youtube_dl/PostProcessing.py
@ -0,0 +1,185 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import os
+import subprocess
+import sys
+import time
+
+from Utils import *
+
+
+class PostProcessor(object):
+	"""Post Processor class.
+
+	PostProcessor objects can be added to downloaders with their
+	add_post_processor() method. When the downloader has finished a
+	successful download, it will take its internal chain of PostProcessors
+	and start calling the run() method on each one of them, first with
+	an initial argument and then with the returned value of the previous
+	PostProcessor.
+
+	The chain will be stopped if one of them ever returns None or the end
+	of the chain is reached.
+
+	PostProcessor objects follow a "mutual registration" process similar
+	to InfoExtractor objects.
+	"""
+
+	_downloader = None
+
+	def __init__(self, downloader=None):
+		self._downloader = downloader
+
+	def set_downloader(self, downloader):
+		"""Sets the downloader for this PP."""
+		self._downloader = downloader
+
+	def run(self, information):
+		"""Run the PostProcessor.
+
+		The "information" argument is a dictionary like the ones
+		composed by InfoExtractors. The only difference is that this
+		one has an extra field called "filepath" that points to the
+		downloaded file.
+
+		When this method returns None, the postprocessing chain is
+		stopped. However, this method may return an information
+		dictionary that will be passed to the next postprocessing
+		object in the chain. It can be the one it received after
+		changing some fields.
+
+		In addition, this method may raise a PostProcessingError
+		exception that will be taken into account by the downloader
+		it was called from.
+		"""
+		return information # by default, do nothing
+
+class AudioConversionError(BaseException):
+	def __init__(self, message):
+		self.message = message
+
+class FFmpegExtractAudioPP(PostProcessor):
+
+	def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, keepvideo=False):
+		PostProcessor.__init__(self, downloader)
+		if preferredcodec is None:
+			preferredcodec = 'best'
+		self._preferredcodec = preferredcodec
+		self._preferredquality = preferredquality
+		self._keepvideo = keepvideo
+
+	@staticmethod
+	def get_audio_codec(path):
+		try:
+			cmd = ['ffprobe', '-show_streams', '--', encodeFilename(path)]
+			handle = subprocess.Popen(cmd, stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE)
+			output = handle.communicate()[0]
+			if handle.wait() != 0:
+				return None
+		except (IOError, OSError):
+			return None
+		audio_codec = None
+		for line in output.split('\n'):
+			if line.startswith('codec_name='):
+				audio_codec = line.split('=')[1].strip()
+			elif line.strip() == 'codec_type=audio' and audio_codec is not None:
+				return audio_codec
+		return None
+
+	@staticmethod
+	def run_ffmpeg(path, out_path, codec, more_opts):
+		if codec is None:
+			acodec_opts = []
+		else:
+			acodec_opts = ['-acodec', codec]
+		cmd = ['ffmpeg', '-y', '-i', encodeFilename(path), '-vn'] + acodec_opts + more_opts + ['--', encodeFilename(out_path)]
+		try:
+			p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+			stdout,stderr = p.communicate()
+		except (IOError, OSError):
+			e = sys.exc_info()[1]
+			if isinstance(e, OSError) and e.errno == 2:
+				raise AudioConversionError('ffmpeg not found. Please install ffmpeg.')
+			else:
+				raise e
+		if p.returncode != 0:
+			msg = stderr.strip().split('\n')[-1]
+			raise AudioConversionError(msg)
+
+	def run(self, information):
+		path = information['filepath']
+
+		filecodec = self.get_audio_codec(path)
+		if filecodec is None:
+			self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe')
+			return None
+
+		more_opts = []
+		if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
+			if self._preferredcodec == 'm4a' and filecodec == 'aac':
+				# Lossless, but in another container
+				acodec = 'copy'
+				extension = self._preferredcodec
+				more_opts = ['-absf', 'aac_adtstoasc']
+			elif filecodec in ['aac', 'mp3', 'vorbis']:
+				# Lossless if possible
+				acodec = 'copy'
+				extension = filecodec
+				if filecodec == 'aac':
+					more_opts = ['-f', 'adts']
+				if filecodec == 'vorbis':
+					extension = 'ogg'
+			else:
+				# MP3 otherwise.
+				acodec = 'libmp3lame'
+				extension = 'mp3'
+				more_opts = []
+				if self._preferredquality is not None:
+					more_opts += ['-ab', self._preferredquality]
+		else:
+			# We convert the audio (lossy)
+			acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec]
+			extension = self._preferredcodec
+			more_opts = []
+			if self._preferredquality is not None:
+				more_opts += ['-ab', self._preferredquality]
+			if self._preferredcodec == 'aac':
+				more_opts += ['-f', 'adts']
+			if self._preferredcodec == 'm4a':
+				more_opts += ['-absf', 'aac_adtstoasc']
+			if self._preferredcodec == 'vorbis':
+				extension = 'ogg'
+			if self._preferredcodec == 'wav':
+				extension = 'wav'
+				more_opts += ['-f', 'wav']
+
+		prefix, sep, ext = path.rpartition(u'.') # not os.path.splitext, since the latter does not work on unicode in all setups
+		new_path = prefix + sep + extension
+		self._downloader.to_screen(u'[ffmpeg] Destination: ' + new_path)
+		try:
+			self.run_ffmpeg(path, new_path, acodec, more_opts)
+		except:
+			etype,e,tb = sys.exc_info()
+			if isinstance(e, AudioConversionError):
+				self._downloader.to_stderr(u'ERROR: audio conversion failed: ' + e.message)
+			else:
+				self._downloader.to_stderr(u'ERROR: error running ffmpeg')
+			return None
+
+ 		# Try to update the date time for extracted audio file.
+		if information.get('filetime') is not None:
+			try:
+				os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
+			except:
+				self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file')
+
+		if not self._keepvideo:
+			try:
+				os.remove(encodeFilename(path))
+			except (IOError, OSError):
+				self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file')
+				return None
+
+		information['filepath'] = new_path
+		return information
--- a/youtube_dl/Utils.py
+++ b/youtube_dl/Utils.py
@ -0,0 +1,375 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import gzip
+import htmlentitydefs
+import HTMLParser
+import locale
+import os
+import re
+import sys
+import zlib
+import urllib2
+import email.utils
+
+try:
+	import cStringIO as StringIO
+except ImportError:
+	import StringIO
+
+std_headers = {
+	'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1',
+	'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
+	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+	'Accept-Encoding': 'gzip, deflate',
+	'Accept-Language': 'en-us,en;q=0.5',
+}
+
+def preferredencoding():
+	"""Get preferred encoding.
+
+	Returns the best encoding scheme for the system, based on
+	locale.getpreferredencoding() and some further tweaks.
+	"""
+	def yield_preferredencoding():
+		try:
+			pref = locale.getpreferredencoding()
+			u'TEST'.encode(pref)
+		except:
+			pref = 'UTF-8'
+		while True:
+			yield pref
+	return yield_preferredencoding().next()
+
+
+def htmlentity_transform(matchobj):
+	"""Transforms an HTML entity to a Unicode character.
+
+	This function receives a match object and is intended to be used with
+	the re.sub() function.
+	"""
+	entity = matchobj.group(1)
+
+	# Known non-numeric HTML entity
+	if entity in htmlentitydefs.name2codepoint:
+		return unichr(htmlentitydefs.name2codepoint[entity])
+
+	# Unicode character
+	mobj = re.match(ur'(?u)#(x?\d+)', entity)
+	if mobj is not None:
+		numstr = mobj.group(1)
+		if numstr.startswith(u'x'):
+			base = 16
+			numstr = u'0%s' % numstr
+		else:
+			base = 10
+		return unichr(long(numstr, base))
+
+	# Unknown entity in name, return its literal representation
+	return (u'&%s;' % entity)
+
+
+def sanitize_title(utitle):
+	"""Sanitizes a video title so it could be used as part of a filename."""
+	utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
+	return utitle.replace(unicode(os.sep), u'%')
+
+
+def sanitize_open(filename, open_mode):
+	"""Try to open the given filename, and slightly tweak it if this fails.
+
+	Attempts to open the given filename. If this fails, it tries to change
+	the filename slightly, step by step, until it's either able to open it
+	or it fails and raises a final exception, like the standard open()
+	function.
+
+	It returns the tuple (stream, definitive_file_name).
+	"""
+	try:
+		if filename == u'-':
+			if sys.platform == 'win32':
+				import msvcrt
+				msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
+			return (sys.stdout, filename)
+		stream = open(encodeFilename(filename), open_mode)
+		return (stream, filename)
+	except (IOError, OSError), err:
+		# In case of error, try to remove win32 forbidden chars
+		filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
+
+		# An exception here should be caught in the caller
+		stream = open(encodeFilename(filename), open_mode)
+		return (stream, filename)
+
+
+def timeconvert(timestr):
+	"""Convert RFC 2822 defined time string into system timestamp"""
+	timestamp = None
+	timetuple = email.utils.parsedate_tz(timestr)
+	if timetuple is not None:
+		timestamp = email.utils.mktime_tz(timetuple)
+	return timestamp
+
+def simplify_title(title):
+	expr = re.compile(ur'[^\w\d_\-]+', flags=re.UNICODE)
+	return expr.sub(u'_', title).strip(u'_')
+
+def orderedSet(iterable):
+	""" Remove all duplicates from the input iterable """
+	res = []
+	for el in iterable:
+		if el not in res:
+			res.append(el)
+	return res
+
+def unescapeHTML(s):
+	"""
+	@param s a string (of type unicode)
+	"""
+	assert type(s) == type(u'')
+
+	htmlParser = HTMLParser.HTMLParser()
+	return htmlParser.unescape(s)
+
+def encodeFilename(s):
+	"""
+	@param s The name of the file (of type unicode)
+	"""
+
+	assert type(s) == type(u'')
+
+	if sys.platform == 'win32' and sys.getwindowsversion().major >= 5:
+		# Pass u'' directly to use Unicode APIs on Windows 2000 and up
+		# (Detecting Windows NT 4 is tricky because 'major >= 4' would
+		# match Windows 9x series as well. Besides, NT 4 is obsolete.)
+		return s
+	else:
+		return s.encode(sys.getfilesystemencoding(), 'ignore')
+
+class DownloadError(Exception):
+	"""Download Error exception.
+
+	This exception may be thrown by FileDownloader objects if they are not
+	configured to continue on errors. They will contain the appropriate
+	error message.
+	"""
+	pass
+
+
+class SameFileError(Exception):
+	"""Same File exception.
+
+	This exception will be thrown by FileDownloader objects if they detect
+	multiple files would have to be downloaded to the same file on disk.
+	"""
+	pass
+
+
+class PostProcessingError(Exception):
+	"""Post Processing exception.
+
+	This exception may be raised by PostProcessor's .run() method to
+	indicate an error in the postprocessing task.
+	"""
+	pass
+
+class MaxDownloadsReached(Exception):
+	""" --max-downloads limit has been reached. """
+	pass
+
+
+class UnavailableVideoError(Exception):
+	"""Unavailable Format exception.
+
+	This exception will be thrown when a video is requested
+	in a format that is not available for that video.
+	"""
+	pass
+
+
+class ContentTooShortError(Exception):
+	"""Content Too Short exception.
+
+	This exception may be raised by FileDownloader objects when a file they
+	download is too small for what the server announced first, indicating
+	the connection was probably interrupted.
+	"""
+	# Both in bytes
+	downloaded = None
+	expected = None
+
+	def __init__(self, downloaded, expected):
+		self.downloaded = downloaded
+		self.expected = expected
+
+
+class YoutubeDLHandler(urllib2.HTTPHandler):
+	"""Handler for HTTP requests and responses.
+
+	This class, when installed with an OpenerDirector, automatically adds
+	the standard headers to every HTTP request and handles gzipped and
+	deflated responses from web servers. If compression is to be avoided in
+	a particular request, the original request in the program code only has
+	to include the HTTP header "Youtubedl-No-Compression", which will be
+	removed before making the real request.
+
+	Part of this code was copied from:
+
+	http://techknack.net/python-urllib2-handlers/
+
+	Andrew Rowls, the author of that code, agreed to release it to the
+	public domain.
+	"""
+
+	@staticmethod
+	def deflate(data):
+		try:
+			return zlib.decompress(data, -zlib.MAX_WBITS)
+		except zlib.error:
+			return zlib.decompress(data)
+
+	@staticmethod
+	def addinfourl_wrapper(stream, headers, url, code):
+		if hasattr(urllib2.addinfourl, 'getcode'):
+			return urllib2.addinfourl(stream, headers, url, code)
+		ret = urllib2.addinfourl(stream, headers, url)
+		ret.code = code
+		return ret
+
+	def http_request(self, req):
+		for h in std_headers:
+			if h in req.headers:
+				del req.headers[h]
+			req.add_header(h, std_headers[h])
+		if 'Youtubedl-no-compression' in req.headers:
+			if 'Accept-encoding' in req.headers:
+				del req.headers['Accept-encoding']
+			del req.headers['Youtubedl-no-compression']
+		return req
+
+	def http_response(self, req, resp):
+		old_resp = resp
+		# gzip
+		if resp.headers.get('Content-encoding', '') == 'gzip':
+			gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
+			resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
+			resp.msg = old_resp.msg
+		# deflate
+		if resp.headers.get('Content-encoding', '') == 'deflate':
+			gz = StringIO.StringIO(self.deflate(resp.read()))
+			resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
+			resp.msg = old_resp.msg
+		return resp
+		
+try:
+	import json
+except ImportError: # Python <2.6, use trivialjson (https://github.com/phihag/trivialjson):
+	import re
+	class json(object):
+		@staticmethod
+		def loads(s):
+			s = s.decode('UTF-8')
+			def raiseError(msg, i):
+				raise ValueError(msg + ' at position ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]))
+			def skipSpace(i, expectMore=True):
+				while i < len(s) and s[i] in ' \t\r\n':
+					i += 1
+				if expectMore:
+					if i >= len(s):
+						raiseError('Premature end', i)
+				return i
+			def decodeEscape(match):
+				esc = match.group(1)
+				_STATIC = {
+					'"': '"',
+					'\\': '\\',
+					'/': '/',
+					'b': unichr(0x8),
+					'f': unichr(0xc),
+					'n': '\n',
+					'r': '\r',
+					't': '\t',
+				}
+				if esc in _STATIC:
+					return _STATIC[esc]
+				if esc[0] == 'u':
+					if len(esc) == 1+4:
+						return unichr(int(esc[1:5], 16))
+					if len(esc) == 5+6 and esc[5:7] == '\\u':
+						hi = int(esc[1:5], 16)
+						low = int(esc[7:11], 16)
+						return unichr((hi - 0xd800) * 0x400 + low - 0xdc00 + 0x10000)
+				raise ValueError('Unknown escape ' + str(esc))
+			def parseString(i):
+				i += 1
+				e = i
+				while True:
+					e = s.index('"', e)
+					bslashes = 0
+					while s[e-bslashes-1] == '\\':
+						bslashes += 1
+					if bslashes % 2 == 1:
+						e += 1
+						continue
+					break
+				rexp = re.compile(r'\\(u[dD][89aAbB][0-9a-fA-F]{2}\\u[0-9a-fA-F]{4}|u[0-9a-fA-F]{4}|.|$)')
+				stri = rexp.sub(decodeEscape, s[i:e])
+				return (e+1,stri)
+			def parseObj(i):
+				i += 1
+				res = {}
+				i = skipSpace(i)
+				if s[i] == '}': # Empty dictionary
+					return (i+1,res)
+				while True:
+					if s[i] != '"':
+						raiseError('Expected a string object key', i)
+					i,key = parseString(i)
+					i = skipSpace(i)
+					if i >= len(s) or s[i] != ':':
+						raiseError('Expected a colon', i)
+					i,val = parse(i+1)
+					res[key] = val
+					i = skipSpace(i)
+					if s[i] == '}':
+						return (i+1, res)
+					if s[i] != ',':
+						raiseError('Expected comma or closing curly brace', i)
+					i = skipSpace(i+1)
+			def parseArray(i):
+				res = []
+				i = skipSpace(i+1)
+				if s[i] == ']': # Empty array
+					return (i+1,res)
+				while True:
+					i,val = parse(i)
+					res.append(val)
+					i = skipSpace(i) # Raise exception if premature end
+					if s[i] == ']':
+						return (i+1, res)
+					if s[i] != ',':
+						raiseError('Expected a comma or closing bracket', i)
+					i = skipSpace(i+1)
+			def parseDiscrete(i):
+				for k,v in {'true': True, 'false': False, 'null': None}.items():
+					if s.startswith(k, i):
+						return (i+len(k), v)
+				raiseError('Not a boolean (or null)', i)
+			def parseNumber(i):
+				mobj = re.match('^(-?(0|[1-9][0-9]*)(\.[0-9]*)?([eE][+-]?[0-9]+)?)', s[i:])
+				if mobj is None:
+					raiseError('Not a number', i)
+				nums = mobj.group(1)
+				if '.' in nums or 'e' in nums or 'E' in nums:
+					return (i+len(nums), float(nums))
+				return (i+len(nums), int(nums))
+			CHARMAP = {'{': parseObj, '[': parseArray, '"': parseString, 't': parseDiscrete, 'f': parseDiscrete, 'n': parseDiscrete}
+			def parse(i):
+				i = skipSpace(i)
+				i,res = CHARMAP.get(s[i], parseNumber)(i)
+				i = skipSpace(i, False)
+				return (i,res)
+			i,res = parse(0)
+			if i < len(s):
+				raise ValueError('Extra data at end of input (index ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]) + ')')
+			return res
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
--- a/youtube_dl/main.py
+++ b/youtube_dl/main.py
@ -0,0 +1,7 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import __init__
+
+if __name__ == '__main__':
+	__init__.main()