You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

380 lines
14 KiB

10 years ago
10 years ago
10 years ago
9 years ago
9 years ago
11 years ago
11 years ago
  1. from __future__ import division, unicode_literals
  2. import os
  3. import re
  4. import sys
  5. import time
  6. from ..compat import compat_os_name
  7. from ..utils import (
  8. encodeFilename,
  9. error_to_compat_str,
  10. decodeArgument,
  11. format_bytes,
  12. timeconvert,
  13. )
  14. class FileDownloader(object):
  15. """File Downloader class.
  16. File downloader objects are the ones responsible of downloading the
  17. actual video file and writing it to disk.
  18. File downloaders accept a lot of parameters. In order not to saturate
  19. the object constructor with arguments, it receives a dictionary of
  20. options instead.
  21. Available options:
  22. verbose: Print additional info to stdout.
  23. quiet: Do not print messages to stdout.
  24. ratelimit: Download speed limit, in bytes/sec.
  25. retries: Number of times to retry for HTTP error 5xx
  26. buffersize: Size of download buffer in bytes.
  27. noresizebuffer: Do not automatically resize the download buffer.
  28. continuedl: Try to continue downloads if possible.
  29. noprogress: Do not print the progress bar.
  30. logtostderr: Log messages to stderr instead of stdout.
  31. consoletitle: Display progress in console window's titlebar.
  32. nopart: Do not use temporary .part files.
  33. updatetime: Use the Last-modified header to set output file timestamps.
  34. test: Download only first bytes to test the downloader.
  35. min_filesize: Skip files smaller than this size
  36. max_filesize: Skip files larger than this size
  37. xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
  38. (experimental)
  39. external_downloader_args: A list of additional command-line arguments for the
  40. external downloader.
  41. hls_use_mpegts: Use the mpegts container for HLS videos.
  42. Subclasses of this one must re-define the real_download method.
  43. """
  44. _TEST_FILE_SIZE = 10241
  45. params = None
  46. def __init__(self, ydl, params):
  47. """Create a FileDownloader object with the given options."""
  48. self.ydl = ydl
  49. self._progress_hooks = []
  50. self.params = params
  51. self.add_progress_hook(self.report_progress)
  52. @staticmethod
  53. def format_seconds(seconds):
  54. (mins, secs) = divmod(seconds, 60)
  55. (hours, mins) = divmod(mins, 60)
  56. if hours > 99:
  57. return '--:--:--'
  58. if hours == 0:
  59. return '%02d:%02d' % (mins, secs)
  60. else:
  61. return '%02d:%02d:%02d' % (hours, mins, secs)
  62. @staticmethod
  63. def calc_percent(byte_counter, data_len):
  64. if data_len is None:
  65. return None
  66. return float(byte_counter) / float(data_len) * 100.0
  67. @staticmethod
  68. def format_percent(percent):
  69. if percent is None:
  70. return '---.-%'
  71. return '%6s' % ('%3.1f%%' % percent)
  72. @staticmethod
  73. def calc_eta(start, now, total, current):
  74. if total is None:
  75. return None
  76. if now is None:
  77. now = time.time()
  78. dif = now - start
  79. if current == 0 or dif < 0.001: # One millisecond
  80. return None
  81. rate = float(current) / dif
  82. return int((float(total) - float(current)) / rate)
  83. @staticmethod
  84. def format_eta(eta):
  85. if eta is None:
  86. return '--:--'
  87. return FileDownloader.format_seconds(eta)
  88. @staticmethod
  89. def calc_speed(start, now, bytes):
  90. dif = now - start
  91. if bytes == 0 or dif < 0.001: # One millisecond
  92. return None
  93. return float(bytes) / dif
  94. @staticmethod
  95. def format_speed(speed):
  96. if speed is None:
  97. return '%10s' % '---b/s'
  98. return '%10s' % ('%s/s' % format_bytes(speed))
  99. @staticmethod
  100. def format_retries(retries):
  101. return 'inf' if retries == float('inf') else '%.0f' % retries
  102. @staticmethod
  103. def best_block_size(elapsed_time, bytes):
  104. new_min = max(bytes / 2.0, 1.0)
  105. new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
  106. if elapsed_time < 0.001:
  107. return int(new_max)
  108. rate = bytes / elapsed_time
  109. if rate > new_max:
  110. return int(new_max)
  111. if rate < new_min:
  112. return int(new_min)
  113. return int(rate)
  114. @staticmethod
  115. def parse_bytes(bytestr):
  116. """Parse a string indicating a byte quantity into an integer."""
  117. matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
  118. if matchobj is None:
  119. return None
  120. number = float(matchobj.group(1))
  121. multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
  122. return int(round(number * multiplier))
  123. def to_screen(self, *args, **kargs):
  124. self.ydl.to_screen(*args, **kargs)
  125. def to_stderr(self, message):
  126. self.ydl.to_screen(message)
  127. def to_console_title(self, message):
  128. self.ydl.to_console_title(message)
  129. def trouble(self, *args, **kargs):
  130. self.ydl.trouble(*args, **kargs)
  131. def report_warning(self, *args, **kargs):
  132. self.ydl.report_warning(*args, **kargs)
  133. def report_error(self, *args, **kargs):
  134. self.ydl.report_error(*args, **kargs)
  135. def slow_down(self, start_time, now, byte_counter):
  136. """Sleep if the download speed is over the rate limit."""
  137. rate_limit = self.params.get('ratelimit')
  138. if rate_limit is None or byte_counter == 0:
  139. return
  140. if now is None:
  141. now = time.time()
  142. elapsed = now - start_time
  143. if elapsed <= 0.0:
  144. return
  145. speed = float(byte_counter) / elapsed
  146. if speed > rate_limit:
  147. time.sleep(max((byte_counter // rate_limit) - elapsed, 0))
  148. def temp_name(self, filename):
  149. """Returns a temporary filename for the given filename."""
  150. if self.params.get('nopart', False) or filename == '-' or \
  151. (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
  152. return filename
  153. return filename + '.part'
  154. def undo_temp_name(self, filename):
  155. if filename.endswith('.part'):
  156. return filename[:-len('.part')]
  157. return filename
  158. def try_rename(self, old_filename, new_filename):
  159. try:
  160. if old_filename == new_filename:
  161. return
  162. os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
  163. except (IOError, OSError) as err:
  164. self.report_error('unable to rename file: %s' % error_to_compat_str(err))
  165. def try_utime(self, filename, last_modified_hdr):
  166. """Try to set the last-modified time of the given file."""
  167. if last_modified_hdr is None:
  168. return
  169. if not os.path.isfile(encodeFilename(filename)):
  170. return
  171. timestr = last_modified_hdr
  172. if timestr is None:
  173. return
  174. filetime = timeconvert(timestr)
  175. if filetime is None:
  176. return filetime
  177. # Ignore obviously invalid dates
  178. if filetime == 0:
  179. return
  180. try:
  181. os.utime(filename, (time.time(), filetime))
  182. except Exception:
  183. pass
  184. return filetime
  185. def report_destination(self, filename):
  186. """Report destination filename."""
  187. self.to_screen('[download] Destination: ' + filename)
  188. def _report_progress_status(self, msg, is_last_line=False):
  189. fullmsg = '[download] ' + msg
  190. if self.params.get('progress_with_newline', False):
  191. self.to_screen(fullmsg)
  192. else:
  193. if compat_os_name == 'nt':
  194. prev_len = getattr(self, '_report_progress_prev_line_length',
  195. 0)
  196. if prev_len > len(fullmsg):
  197. fullmsg += ' ' * (prev_len - len(fullmsg))
  198. self._report_progress_prev_line_length = len(fullmsg)
  199. clear_line = '\r'
  200. else:
  201. clear_line = ('\r\x1b[K' if sys.stderr.isatty() else '\r')
  202. self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
  203. self.to_console_title('youtube-dl ' + msg)
  204. def report_progress(self, s):
  205. if s['status'] == 'finished':
  206. if self.params.get('noprogress', False):
  207. self.to_screen('[download] Download completed')
  208. else:
  209. s['_total_bytes_str'] = format_bytes(s['total_bytes'])
  210. if s.get('elapsed') is not None:
  211. s['_elapsed_str'] = self.format_seconds(s['elapsed'])
  212. msg_template = '100%% of %(_total_bytes_str)s in %(_elapsed_str)s'
  213. else:
  214. msg_template = '100%% of %(_total_bytes_str)s'
  215. self._report_progress_status(
  216. msg_template % s, is_last_line=True)
  217. if self.params.get('noprogress'):
  218. return
  219. if s['status'] != 'downloading':
  220. return
  221. if s.get('eta') is not None:
  222. s['_eta_str'] = self.format_eta(s['eta'])
  223. else:
  224. s['_eta_str'] = 'Unknown ETA'
  225. if s.get('total_bytes') and s.get('downloaded_bytes') is not None:
  226. s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes'])
  227. elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None:
  228. s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate'])
  229. else:
  230. if s.get('downloaded_bytes') == 0:
  231. s['_percent_str'] = self.format_percent(0)
  232. else:
  233. s['_percent_str'] = 'Unknown %'
  234. if s.get('speed') is not None:
  235. s['_speed_str'] = self.format_speed(s['speed'])
  236. else:
  237. s['_speed_str'] = 'Unknown speed'
  238. if s.get('total_bytes') is not None:
  239. s['_total_bytes_str'] = format_bytes(s['total_bytes'])
  240. msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'
  241. elif s.get('total_bytes_estimate') is not None:
  242. s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate'])
  243. msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'
  244. else:
  245. if s.get('downloaded_bytes') is not None:
  246. s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes'])
  247. if s.get('elapsed'):
  248. s['_elapsed_str'] = self.format_seconds(s['elapsed'])
  249. msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'
  250. else:
  251. msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s'
  252. else:
  253. msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s'
  254. self._report_progress_status(msg_template % s)
  255. def report_resuming_byte(self, resume_len):
  256. """Report attempt to resume at given byte."""
  257. self.to_screen('[download] Resuming download at byte %s' % resume_len)
  258. def report_retry(self, count, retries):
  259. """Report retry in case of HTTP error 5xx"""
  260. self.to_screen(
  261. '[download] Got server HTTP error. Retrying (attempt %d of %s)...'
  262. % (count, self.format_retries(retries)))
  263. def report_file_already_downloaded(self, file_name):
  264. """Report file has already been fully downloaded."""
  265. try:
  266. self.to_screen('[download] %s has already been downloaded' % file_name)
  267. except UnicodeEncodeError:
  268. self.to_screen('[download] The file has already been downloaded')
  269. def report_unable_to_resume(self):
  270. """Report it was impossible to resume download."""
  271. self.to_screen('[download] Unable to resume')
  272. def download(self, filename, info_dict):
  273. """Download to a filename using the info from info_dict
  274. Return True on success and False otherwise
  275. """
  276. nooverwrites_and_exists = (
  277. self.params.get('nooverwrites', False) and
  278. os.path.exists(encodeFilename(filename))
  279. )
  280. continuedl_and_exists = (
  281. self.params.get('continuedl', True) and
  282. os.path.isfile(encodeFilename(filename)) and
  283. not self.params.get('nopart', False)
  284. )
  285. # Check file already present
  286. if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
  287. self.report_file_already_downloaded(filename)
  288. self._hook_progress({
  289. 'filename': filename,
  290. 'status': 'finished',
  291. 'total_bytes': os.path.getsize(encodeFilename(filename)),
  292. })
  293. return True
  294. sleep_interval = self.params.get('sleep_interval')
  295. if sleep_interval:
  296. self.to_screen('[download] Sleeping %s seconds...' % sleep_interval)
  297. time.sleep(sleep_interval)
  298. return self.real_download(filename, info_dict)
  299. def real_download(self, filename, info_dict):
  300. """Real download process. Redefine in subclasses."""
  301. raise NotImplementedError('This method must be implemented by subclasses')
  302. def _hook_progress(self, status):
  303. for ph in self._progress_hooks:
  304. ph(status)
  305. def add_progress_hook(self, ph):
  306. # See YoutubeDl.py (search for progress_hooks) for a description of
  307. # this interface
  308. self._progress_hooks.append(ph)
  309. def _debug_cmd(self, args, exe=None):
  310. if not self.params.get('verbose', False):
  311. return
  312. str_args = [decodeArgument(a) for a in args]
  313. if exe is None:
  314. exe = os.path.basename(str_args[0])
  315. try:
  316. import pipes
  317. shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
  318. except ImportError:
  319. shell_quote = repr
  320. self.to_screen('[debug] %s command line: %s' % (
  321. exe, shell_quote(str_args)))