318 lines
11 KiB

11 years ago
  1. import os
  2. import re
  3. import sys
  4. import time
  5. from ..utils import (
  6. compat_str,
  7. encodeFilename,
  8. format_bytes,
  9. timeconvert,
  10. )
  11. class FileDownloader(object):
  12. """File Downloader class.
  13. File downloader objects are the ones responsible of downloading the
  14. actual video file and writing it to disk.
  15. File downloaders accept a lot of parameters. In order not to saturate
  16. the object constructor with arguments, it receives a dictionary of
  17. options instead.
  18. Available options:
  19. verbose: Print additional info to stdout.
  20. quiet: Do not print messages to stdout.
  21. ratelimit: Download speed limit, in bytes/sec.
  22. retries: Number of times to retry for HTTP error 5xx
  23. buffersize: Size of download buffer in bytes.
  24. noresizebuffer: Do not automatically resize the download buffer.
  25. continuedl: Try to continue downloads if possible.
  26. noprogress: Do not print the progress bar.
  27. logtostderr: Log messages to stderr instead of stdout.
  28. consoletitle: Display progress in console window's titlebar.
  29. nopart: Do not use temporary .part files.
  30. updatetime: Use the Last-modified header to set output file timestamps.
  31. test: Download only first bytes to test the downloader.
  32. min_filesize: Skip files smaller than this size
  33. max_filesize: Skip files larger than this size
  34. Subclasses of this one must re-define the real_download method.
  35. """
  36. _TEST_FILE_SIZE = 10241
  37. params = None
  38. def __init__(self, ydl, params):
  39. """Create a FileDownloader object with the given options."""
  40. self.ydl = ydl
  41. self._progress_hooks = []
  42. self.params = params
  43. @staticmethod
  44. def format_seconds(seconds):
  45. (mins, secs) = divmod(seconds, 60)
  46. (hours, mins) = divmod(mins, 60)
  47. if hours > 99:
  48. return '--:--:--'
  49. if hours == 0:
  50. return '%02d:%02d' % (mins, secs)
  51. else:
  52. return '%02d:%02d:%02d' % (hours, mins, secs)
  53. @staticmethod
  54. def calc_percent(byte_counter, data_len):
  55. if data_len is None:
  56. return None
  57. return float(byte_counter) / float(data_len) * 100.0
  58. @staticmethod
  59. def format_percent(percent):
  60. if percent is None:
  61. return '---.-%'
  62. return '%6s' % ('%3.1f%%' % percent)
  63. @staticmethod
  64. def calc_eta(start, now, total, current):
  65. if total is None:
  66. return None
  67. dif = now - start
  68. if current == 0 or dif < 0.001: # One millisecond
  69. return None
  70. rate = float(current) / dif
  71. return int((float(total) - float(current)) / rate)
  72. @staticmethod
  73. def format_eta(eta):
  74. if eta is None:
  75. return '--:--'
  76. return FileDownloader.format_seconds(eta)
  77. @staticmethod
  78. def calc_speed(start, now, bytes):
  79. dif = now - start
  80. if bytes == 0 or dif < 0.001: # One millisecond
  81. return None
  82. return float(bytes) / dif
  83. @staticmethod
  84. def format_speed(speed):
  85. if speed is None:
  86. return '%10s' % '---b/s'
  87. return '%10s' % ('%s/s' % format_bytes(speed))
  88. @staticmethod
  89. def best_block_size(elapsed_time, bytes):
  90. new_min = max(bytes / 2.0, 1.0)
  91. new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
  92. if elapsed_time < 0.001:
  93. return int(new_max)
  94. rate = bytes / elapsed_time
  95. if rate > new_max:
  96. return int(new_max)
  97. if rate < new_min:
  98. return int(new_min)
  99. return int(rate)
  100. @staticmethod
  101. def parse_bytes(bytestr):
  102. """Parse a string indicating a byte quantity into an integer."""
  103. matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
  104. if matchobj is None:
  105. return None
  106. number = float(matchobj.group(1))
  107. multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
  108. return int(round(number * multiplier))
  109. def to_screen(self, *args, **kargs):
  110. self.ydl.to_screen(*args, **kargs)
  111. def to_stderr(self, message):
  112. self.ydl.to_screen(message)
  113. def to_console_title(self, message):
  114. self.ydl.to_console_title(message)
  115. def trouble(self, *args, **kargs):
  116. self.ydl.trouble(*args, **kargs)
  117. def report_warning(self, *args, **kargs):
  118. self.ydl.report_warning(*args, **kargs)
  119. def report_error(self, *args, **kargs):
  120. self.ydl.report_error(*args, **kargs)
  121. def slow_down(self, start_time, byte_counter):
  122. """Sleep if the download speed is over the rate limit."""
  123. rate_limit = self.params.get('ratelimit', None)
  124. if rate_limit is None or byte_counter == 0:
  125. return
  126. now = time.time()
  127. elapsed = now - start_time
  128. if elapsed <= 0.0:
  129. return
  130. speed = float(byte_counter) / elapsed
  131. if speed > rate_limit:
  132. time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
  133. def temp_name(self, filename):
  134. """Returns a temporary filename for the given filename."""
  135. if self.params.get('nopart', False) or filename == u'-' or \
  136. (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
  137. return filename
  138. return filename + u'.part'
  139. def undo_temp_name(self, filename):
  140. if filename.endswith(u'.part'):
  141. return filename[:-len(u'.part')]
  142. return filename
  143. def try_rename(self, old_filename, new_filename):
  144. try:
  145. if old_filename == new_filename:
  146. return
  147. os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
  148. except (IOError, OSError) as err:
  149. self.report_error(u'unable to rename file: %s' % compat_str(err))
  150. def try_utime(self, filename, last_modified_hdr):
  151. """Try to set the last-modified time of the given file."""
  152. if last_modified_hdr is None:
  153. return
  154. if not os.path.isfile(encodeFilename(filename)):
  155. return
  156. timestr = last_modified_hdr
  157. if timestr is None:
  158. return
  159. filetime = timeconvert(timestr)
  160. if filetime is None:
  161. return filetime
  162. # Ignore obviously invalid dates
  163. if filetime == 0:
  164. return
  165. try:
  166. os.utime(filename, (time.time(), filetime))
  167. except:
  168. pass
  169. return filetime
  170. def report_destination(self, filename):
  171. """Report destination filename."""
  172. self.to_screen(u'[download] Destination: ' + filename)
  173. def _report_progress_status(self, msg, is_last_line=False):
  174. fullmsg = u'[download] ' + msg
  175. if self.params.get('progress_with_newline', False):
  176. self.to_screen(fullmsg)
  177. else:
  178. if os.name == 'nt':
  179. prev_len = getattr(self, '_report_progress_prev_line_length',
  180. 0)
  181. if prev_len > len(fullmsg):
  182. fullmsg += u' ' * (prev_len - len(fullmsg))
  183. self._report_progress_prev_line_length = len(fullmsg)
  184. clear_line = u'\r'
  185. else:
  186. clear_line = (u'\r\x1b[K' if sys.stderr.isatty() else u'\r')
  187. self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
  188. self.to_console_title(u'youtube-dl ' + msg)
  189. def report_progress(self, percent, data_len_str, speed, eta):
  190. """Report download progress."""
  191. if self.params.get('noprogress', False):
  192. return
  193. if eta is not None:
  194. eta_str = self.format_eta(eta)
  195. else:
  196. eta_str = 'Unknown ETA'
  197. if percent is not None:
  198. percent_str = self.format_percent(percent)
  199. else:
  200. percent_str = 'Unknown %'
  201. speed_str = self.format_speed(speed)
  202. msg = (u'%s of %s at %s ETA %s' %
  203. (percent_str, data_len_str, speed_str, eta_str))
  204. self._report_progress_status(msg)
  205. def report_progress_live_stream(self, downloaded_data_len, speed, elapsed):
  206. if self.params.get('noprogress', False):
  207. return
  208. downloaded_str = format_bytes(downloaded_data_len)
  209. speed_str = self.format_speed(speed)
  210. elapsed_str = FileDownloader.format_seconds(elapsed)
  211. msg = u'%s at %s (%s)' % (downloaded_str, speed_str, elapsed_str)
  212. self._report_progress_status(msg)
  213. def report_finish(self, data_len_str, tot_time):
  214. """Report download finished."""
  215. if self.params.get('noprogress', False):
  216. self.to_screen(u'[download] Download completed')
  217. else:
  218. self._report_progress_status(
  219. (u'100%% of %s in %s' %
  220. (data_len_str, self.format_seconds(tot_time))),
  221. is_last_line=True)
  222. def report_resuming_byte(self, resume_len):
  223. """Report attempt to resume at given byte."""
  224. self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
  225. def report_retry(self, count, retries):
  226. """Report retry in case of HTTP error 5xx"""
  227. self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
  228. def report_file_already_downloaded(self, file_name):
  229. """Report file has already been fully downloaded."""
  230. try:
  231. self.to_screen(u'[download] %s has already been downloaded' % file_name)
  232. except UnicodeEncodeError:
  233. self.to_screen(u'[download] The file has already been downloaded')
  234. def report_unable_to_resume(self):
  235. """Report it was impossible to resume download."""
  236. self.to_screen(u'[download] Unable to resume')
  237. def download(self, filename, info_dict):
  238. """Download to a filename using the info from info_dict
  239. Return True on success and False otherwise
  240. """
  241. # Check file already present
  242. if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
  243. self.report_file_already_downloaded(filename)
  244. self._hook_progress({
  245. 'filename': filename,
  246. 'status': 'finished',
  247. 'total_bytes': os.path.getsize(encodeFilename(filename)),
  248. })
  249. return True
  250. return self.real_download(filename, info_dict)
  251. def real_download(self, filename, info_dict):
  252. """Real download process. Redefine in subclasses."""
  253. raise NotImplementedError(u'This method must be implemented by subclasses')
  254. def _hook_progress(self, status):
  255. for ph in self._progress_hooks:
  256. ph(status)
  257. def add_progress_hook(self, ph):
  258. """ ph gets called on download progress, with a dictionary with the entries
  259. * filename: The final filename
  260. * status: One of "downloading" and "finished"
  261. It can also have some of the following entries:
  262. * downloaded_bytes: Bytes on disks
  263. * total_bytes: Total bytes, None if unknown
  264. * tmpfilename: The filename we're currently writing to
  265. * eta: The estimated time in seconds, None if unknown
  266. * speed: The download speed in bytes/second, None if unknown
  267. Hooks are guaranteed to be called at least once (with status "finished")
  268. if the download is successful.
  269. """
  270. self._progress_hooks.append(ph)