You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

372 lines
14 KiB

10 years ago
10 years ago
10 years ago
9 years ago
9 years ago
11 years ago
11 years ago
  1. from __future__ import division, unicode_literals
  2. import os
  3. import re
  4. import sys
  5. import time
  6. from ..compat import compat_str
  7. from ..utils import (
  8. encodeFilename,
  9. decodeArgument,
  10. format_bytes,
  11. timeconvert,
  12. )
  13. class FileDownloader(object):
  14. """File Downloader class.
  15. File downloader objects are the ones responsible of downloading the
  16. actual video file and writing it to disk.
  17. File downloaders accept a lot of parameters. In order not to saturate
  18. the object constructor with arguments, it receives a dictionary of
  19. options instead.
  20. Available options:
  21. verbose: Print additional info to stdout.
  22. quiet: Do not print messages to stdout.
  23. ratelimit: Download speed limit, in bytes/sec.
  24. retries: Number of times to retry for HTTP error 5xx
  25. buffersize: Size of download buffer in bytes.
  26. noresizebuffer: Do not automatically resize the download buffer.
  27. continuedl: Try to continue downloads if possible.
  28. noprogress: Do not print the progress bar.
  29. logtostderr: Log messages to stderr instead of stdout.
  30. consoletitle: Display progress in console window's titlebar.
  31. nopart: Do not use temporary .part files.
  32. updatetime: Use the Last-modified header to set output file timestamps.
  33. test: Download only first bytes to test the downloader.
  34. min_filesize: Skip files smaller than this size
  35. max_filesize: Skip files larger than this size
  36. xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
  37. (experimenatal)
  38. external_downloader_args: A list of additional command-line arguments for the
  39. external downloader.
  40. Subclasses of this one must re-define the real_download method.
  41. """
  42. _TEST_FILE_SIZE = 10241
  43. params = None
  44. def __init__(self, ydl, params):
  45. """Create a FileDownloader object with the given options."""
  46. self.ydl = ydl
  47. self._progress_hooks = []
  48. self.params = params
  49. self.add_progress_hook(self.report_progress)
  50. @staticmethod
  51. def format_seconds(seconds):
  52. (mins, secs) = divmod(seconds, 60)
  53. (hours, mins) = divmod(mins, 60)
  54. if hours > 99:
  55. return '--:--:--'
  56. if hours == 0:
  57. return '%02d:%02d' % (mins, secs)
  58. else:
  59. return '%02d:%02d:%02d' % (hours, mins, secs)
  60. @staticmethod
  61. def calc_percent(byte_counter, data_len):
  62. if data_len is None:
  63. return None
  64. return float(byte_counter) / float(data_len) * 100.0
  65. @staticmethod
  66. def format_percent(percent):
  67. if percent is None:
  68. return '---.-%'
  69. return '%6s' % ('%3.1f%%' % percent)
  70. @staticmethod
  71. def calc_eta(start, now, total, current):
  72. if total is None:
  73. return None
  74. if now is None:
  75. now = time.time()
  76. dif = now - start
  77. if current == 0 or dif < 0.001: # One millisecond
  78. return None
  79. rate = float(current) / dif
  80. return int((float(total) - float(current)) / rate)
  81. @staticmethod
  82. def format_eta(eta):
  83. if eta is None:
  84. return '--:--'
  85. return FileDownloader.format_seconds(eta)
  86. @staticmethod
  87. def calc_speed(start, now, bytes):
  88. dif = now - start
  89. if bytes == 0 or dif < 0.001: # One millisecond
  90. return None
  91. return float(bytes) / dif
  92. @staticmethod
  93. def format_speed(speed):
  94. if speed is None:
  95. return '%10s' % '---b/s'
  96. return '%10s' % ('%s/s' % format_bytes(speed))
  97. @staticmethod
  98. def best_block_size(elapsed_time, bytes):
  99. new_min = max(bytes / 2.0, 1.0)
  100. new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
  101. if elapsed_time < 0.001:
  102. return int(new_max)
  103. rate = bytes / elapsed_time
  104. if rate > new_max:
  105. return int(new_max)
  106. if rate < new_min:
  107. return int(new_min)
  108. return int(rate)
  109. @staticmethod
  110. def parse_bytes(bytestr):
  111. """Parse a string indicating a byte quantity into an integer."""
  112. matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
  113. if matchobj is None:
  114. return None
  115. number = float(matchobj.group(1))
  116. multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
  117. return int(round(number * multiplier))
  118. def to_screen(self, *args, **kargs):
  119. self.ydl.to_screen(*args, **kargs)
  120. def to_stderr(self, message):
  121. self.ydl.to_screen(message)
  122. def to_console_title(self, message):
  123. self.ydl.to_console_title(message)
  124. def trouble(self, *args, **kargs):
  125. self.ydl.trouble(*args, **kargs)
  126. def report_warning(self, *args, **kargs):
  127. self.ydl.report_warning(*args, **kargs)
  128. def report_error(self, *args, **kargs):
  129. self.ydl.report_error(*args, **kargs)
  130. def slow_down(self, start_time, now, byte_counter):
  131. """Sleep if the download speed is over the rate limit."""
  132. rate_limit = self.params.get('ratelimit', None)
  133. if rate_limit is None or byte_counter == 0:
  134. return
  135. if now is None:
  136. now = time.time()
  137. elapsed = now - start_time
  138. if elapsed <= 0.0:
  139. return
  140. speed = float(byte_counter) / elapsed
  141. if speed > rate_limit:
  142. time.sleep(max((byte_counter // rate_limit) - elapsed, 0))
  143. def temp_name(self, filename):
  144. """Returns a temporary filename for the given filename."""
  145. if self.params.get('nopart', False) or filename == '-' or \
  146. (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
  147. return filename
  148. return filename + '.part'
  149. def undo_temp_name(self, filename):
  150. if filename.endswith('.part'):
  151. return filename[:-len('.part')]
  152. return filename
  153. def try_rename(self, old_filename, new_filename):
  154. try:
  155. if old_filename == new_filename:
  156. return
  157. os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
  158. except (IOError, OSError) as err:
  159. self.report_error('unable to rename file: %s' % compat_str(err))
  160. def try_utime(self, filename, last_modified_hdr):
  161. """Try to set the last-modified time of the given file."""
  162. if last_modified_hdr is None:
  163. return
  164. if not os.path.isfile(encodeFilename(filename)):
  165. return
  166. timestr = last_modified_hdr
  167. if timestr is None:
  168. return
  169. filetime = timeconvert(timestr)
  170. if filetime is None:
  171. return filetime
  172. # Ignore obviously invalid dates
  173. if filetime == 0:
  174. return
  175. try:
  176. os.utime(filename, (time.time(), filetime))
  177. except Exception:
  178. pass
  179. return filetime
  180. def report_destination(self, filename):
  181. """Report destination filename."""
  182. self.to_screen('[download] Destination: ' + filename)
  183. def _report_progress_status(self, msg, is_last_line=False):
  184. fullmsg = '[download] ' + msg
  185. if self.params.get('progress_with_newline', False):
  186. self.to_screen(fullmsg)
  187. else:
  188. if os.name == 'nt':
  189. prev_len = getattr(self, '_report_progress_prev_line_length',
  190. 0)
  191. if prev_len > len(fullmsg):
  192. fullmsg += ' ' * (prev_len - len(fullmsg))
  193. self._report_progress_prev_line_length = len(fullmsg)
  194. clear_line = '\r'
  195. else:
  196. clear_line = ('\r\x1b[K' if sys.stderr.isatty() else '\r')
  197. self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
  198. self.to_console_title('youtube-dl ' + msg)
  199. def report_progress(self, s):
  200. if s['status'] == 'finished':
  201. if self.params.get('noprogress', False):
  202. self.to_screen('[download] Download completed')
  203. else:
  204. s['_total_bytes_str'] = format_bytes(s['total_bytes'])
  205. if s.get('elapsed') is not None:
  206. s['_elapsed_str'] = self.format_seconds(s['elapsed'])
  207. msg_template = '100%% of %(_total_bytes_str)s in %(_elapsed_str)s'
  208. else:
  209. msg_template = '100%% of %(_total_bytes_str)s'
  210. self._report_progress_status(
  211. msg_template % s, is_last_line=True)
  212. if self.params.get('noprogress'):
  213. return
  214. if s['status'] != 'downloading':
  215. return
  216. if s.get('eta') is not None:
  217. s['_eta_str'] = self.format_eta(s['eta'])
  218. else:
  219. s['_eta_str'] = 'Unknown ETA'
  220. if s.get('total_bytes') and s.get('downloaded_bytes') is not None:
  221. s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes'])
  222. elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None:
  223. s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate'])
  224. else:
  225. if s.get('downloaded_bytes') == 0:
  226. s['_percent_str'] = self.format_percent(0)
  227. else:
  228. s['_percent_str'] = 'Unknown %'
  229. if s.get('speed') is not None:
  230. s['_speed_str'] = self.format_speed(s['speed'])
  231. else:
  232. s['_speed_str'] = 'Unknown speed'
  233. if s.get('total_bytes') is not None:
  234. s['_total_bytes_str'] = format_bytes(s['total_bytes'])
  235. msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'
  236. elif s.get('total_bytes_estimate') is not None:
  237. s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate'])
  238. msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'
  239. else:
  240. if s.get('downloaded_bytes') is not None:
  241. s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes'])
  242. if s.get('elapsed'):
  243. s['_elapsed_str'] = self.format_seconds(s['elapsed'])
  244. msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'
  245. else:
  246. msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s'
  247. else:
  248. msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s'
  249. self._report_progress_status(msg_template % s)
  250. def report_resuming_byte(self, resume_len):
  251. """Report attempt to resume at given byte."""
  252. self.to_screen('[download] Resuming download at byte %s' % resume_len)
  253. def report_retry(self, count, retries):
  254. """Report retry in case of HTTP error 5xx"""
  255. self.to_screen('[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
  256. def report_file_already_downloaded(self, file_name):
  257. """Report file has already been fully downloaded."""
  258. try:
  259. self.to_screen('[download] %s has already been downloaded' % file_name)
  260. except UnicodeEncodeError:
  261. self.to_screen('[download] The file has already been downloaded')
  262. def report_unable_to_resume(self):
  263. """Report it was impossible to resume download."""
  264. self.to_screen('[download] Unable to resume')
  265. def download(self, filename, info_dict):
  266. """Download to a filename using the info from info_dict
  267. Return True on success and False otherwise
  268. """
  269. nooverwrites_and_exists = (
  270. self.params.get('nooverwrites', False) and
  271. os.path.exists(encodeFilename(filename))
  272. )
  273. continuedl_and_exists = (
  274. self.params.get('continuedl', True) and
  275. os.path.isfile(encodeFilename(filename)) and
  276. not self.params.get('nopart', False)
  277. )
  278. # Check file already present
  279. if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
  280. self.report_file_already_downloaded(filename)
  281. self._hook_progress({
  282. 'filename': filename,
  283. 'status': 'finished',
  284. 'total_bytes': os.path.getsize(encodeFilename(filename)),
  285. })
  286. return True
  287. sleep_interval = self.params.get('sleep_interval')
  288. if sleep_interval:
  289. self.to_screen('[download] Sleeping %s seconds...' % sleep_interval)
  290. time.sleep(sleep_interval)
  291. return self.real_download(filename, info_dict)
  292. def real_download(self, filename, info_dict):
  293. """Real download process. Redefine in subclasses."""
  294. raise NotImplementedError('This method must be implemented by subclasses')
  295. def _hook_progress(self, status):
  296. for ph in self._progress_hooks:
  297. ph(status)
  298. def add_progress_hook(self, ph):
  299. # See YoutubeDl.py (search for progress_hooks) for a description of
  300. # this interface
  301. self._progress_hooks.append(ph)
  302. def _debug_cmd(self, args, exe=None):
  303. if not self.params.get('verbose', False):
  304. return
  305. str_args = [decodeArgument(a) for a in args]
  306. if exe is None:
  307. exe = os.path.basename(str_args[0])
  308. try:
  309. import pipes
  310. shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
  311. except ImportError:
  312. shell_quote = repr
  313. self.to_screen('[debug] %s command line: %s' % (
  314. exe, shell_quote(str_args)))