You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

397 lines
12 KiB

10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
9 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
  1. #!/usr/bin/env python2
  2. # -*- coding: utf-8 -*-
  3. """Python module to download videos.
  4. This module contains the actual downloaders responsible
  5. for downloading the video files.
  6. Note:
  7. downloaders.py is part of the youtubedlg package but it can be used
  8. as a stand alone module for downloading videos.
  9. """
  10. from __future__ import unicode_literals
  11. import os
  12. import sys
  13. import locale
  14. import subprocess
  15. from time import sleep
  16. from Queue import Queue
  17. from threading import Thread
  18. class PipeReader(Thread):
  19. """Helper class to avoid deadlocks when reading from subprocess pipes.
  20. This class uses python threads and queues in order to read from subprocess
  21. pipes in an asynchronous way.
  22. Attributes:
  23. WAIT_TIME (float): Time in seconds to sleep.
  24. Args:
  25. queue (Queue.Queue): Python queue to store the output of the subprocess.
  26. """
  27. WAIT_TIME = 0.1
  28. def __init__(self, queue):
  29. super(PipeReader, self).__init__()
  30. self._filedescriptor = None
  31. self._running = True
  32. self._queue = queue
  33. self.start()
  34. def run(self):
  35. while self._running:
  36. if self._filedescriptor is not None:
  37. for line in iter(self._filedescriptor.readline, ''):
  38. self._queue.put_nowait(line.rstrip())
  39. self._filedescriptor = None
  40. sleep(self.WAIT_TIME)
  41. def attach_filedescriptor(self, filedesc):
  42. """Attach a filedescriptor to the PipeReader. """
  43. self._filedescriptor = filedesc
  44. def join(self, timeout=None):
  45. self._running = False
  46. super(PipeReader, self).join(timeout)
  47. class YoutubeDLDownloader(object):
  48. """Python class for downloading videos using youtube-dl & subprocess.
  49. Attributes:
  50. OK, ERROR, STOPPED, ALREADY, FILESIZE_ABORT, WARNING (int): 'Random'
  51. integers that describe the return code from the download() method.
  52. Args:
  53. youtubedl_path (string): Absolute path to youtube-dl binary.
  54. data_hook (function): Optional callback function to retrieve download
  55. process data.
  56. log_data (function): Optional callback function to write data to
  57. the log file.
  58. Note:
  59. For available data keys check self._data under __init__().
  60. Warnings:
  61. The caller is responsible for calling the close() method after he has
  62. finished with the object in order for the object to be able to properly
  63. close down itself.
  64. Example:
  65. How to use YoutubeDLDownloader from a python script.
  66. from downloaders import YoutubeDLDownloader
  67. def data_hook(data):
  68. print data
  69. downloader = YoutubeDLDownloader('/usr/bin/youtube-dl', data_hook)
  70. downloader.download(<URL STRING>, ['-f', 'flv'])
  71. """
  72. OK = 0
  73. ERROR = 1
  74. STOPPED = 2
  75. ALREADY = 3
  76. FILESIZE_ABORT = 4
  77. WARNING = 5
  78. def __init__(self, youtubedl_path, data_hook=None, log_data=None):
  79. self.youtubedl_path = youtubedl_path
  80. self.data_hook = data_hook
  81. self.log_data = log_data
  82. self._return_code = 0
  83. self._proc = None
  84. self._data = {
  85. 'playlist_index': None,
  86. 'playlist_size': None,
  87. 'filesize': None,
  88. 'filename': None,
  89. 'percent': None,
  90. 'status': None,
  91. 'speed': None,
  92. 'eta': None
  93. }
  94. self._stderr_queue = Queue()
  95. self._stderr_reader = PipeReader(self._stderr_queue)
  96. def download(self, url, options):
  97. """Download url using given options.
  98. Args:
  99. url (string): URL string to download.
  100. options (list): Python list that contains youtube-dl options.
  101. Returns:
  102. An integer that shows the status of the download process.
  103. Right now we support 6 different return codes.
  104. OK (0): The download process completed successfully.
  105. ERROR (1): An error occured during the download process.
  106. STOPPED (2): The download process was stopped from the user.
  107. ALREADY (3): The given url is already downloaded.
  108. FILESIZE_ABORT (4): The corresponding url video file was larger or
  109. smaller from the given options filesize limit.
  110. WARNING (5): A warning occured during the download process.
  111. """
  112. self._reset()
  113. cmd = self._get_cmd(url, options)
  114. self._create_process(cmd)
  115. self._stderr_reader.attach_filedescriptor(self._proc.stderr)
  116. while self._proc_is_alive():
  117. stdout = self._proc.stdout.readline().rstrip().decode(self._get_encoding(), 'ignore')
  118. if stdout:
  119. self._sync_data(extract_data(stdout))
  120. self._hook_data()
  121. # Read stderr after download process has been completed
  122. # We don't need to read stderr in real time
  123. while not self._stderr_queue.empty():
  124. stderr = self._stderr_queue.get_nowait().decode(self._get_encoding(), 'ignore')
  125. self._log(stderr)
  126. if self._return_code != self.STOPPED:
  127. if self._is_warning(stderr):
  128. self._return_code = self.WARNING
  129. else:
  130. self._return_code = self.ERROR
  131. self._last_data_hook()
  132. return self._return_code
  133. def stop(self):
  134. """Stop the download process and set return code to STOPPED. """
  135. if self._proc_is_alive():
  136. self._proc.kill()
  137. self._return_code = self.STOPPED
  138. def close(self):
  139. """Destructor like function for the object. """
  140. self._stderr_reader.join()
  141. def _is_warning(self, stderr):
  142. return stderr.split(':')[0] == 'WARNING'
  143. def _last_data_hook(self):
  144. """Set the last data information based on the return code. """
  145. if self._return_code == self.OK:
  146. self._data['status'] = 'Finished'
  147. elif self._return_code == self.ERROR:
  148. self._data['status'] = 'Error'
  149. self._data['speed'] = ''
  150. self._data['eta'] = ''
  151. elif self._return_code == self.WARNING:
  152. self._data['status'] = 'Warning'
  153. self._data['speed'] = ''
  154. self._data['eta'] = ''
  155. elif self._return_code == self.STOPPED:
  156. self._data['status'] = 'Stopped'
  157. self._data['speed'] = ''
  158. self._data['eta'] = ''
  159. elif self._return_code == self.ALREADY:
  160. self._data['status'] = 'Already Downloaded'
  161. else:
  162. self._data['status'] = 'Filesize Abort'
  163. self._hook_data()
  164. def _reset(self):
  165. """Reset the data. """
  166. self._return_code = 0
  167. self._data = {
  168. 'playlist_index': None,
  169. 'playlist_size': None,
  170. 'filesize': None,
  171. 'filename': None,
  172. 'percent': None,
  173. 'status': None,
  174. 'speed': None,
  175. 'eta': None
  176. }
  177. def _sync_data(self, data):
  178. """Synchronise self._data with data. It also filters some keys.
  179. Args:
  180. data (dictionary): Python dictionary that contains different
  181. keys. The keys are not standar the dictionary can also be
  182. empty when there are no data to extract. See extract_data().
  183. """
  184. for key in data:
  185. if key == 'filename':
  186. # Keep only the filename on data['filename']
  187. data['filename'] = os.path.basename(data['filename'])
  188. if key == 'status':
  189. if data['status'] == 'Already Downloaded':
  190. # Set self._return_code to already downloaded
  191. # and trash that key
  192. self._return_code = self.ALREADY
  193. data['status'] = None
  194. if data['status'] == 'Filesize Abort':
  195. # Set self._return_code to filesize abort
  196. # and trash that key
  197. self._return_code = self.FILESIZE_ABORT
  198. data['status'] = None
  199. self._data[key] = data[key]
  200. def _log(self, data):
  201. """Log data using the callback function. """
  202. if self.log_data is not None:
  203. self.log_data(data)
  204. def _hook_data(self):
  205. """Pass self._data back to the data_hook. """
  206. if self.data_hook is not None:
  207. self.data_hook(self._data)
  208. def _proc_is_alive(self):
  209. """Returns True if self._proc is alive else False. """
  210. if self._proc is None:
  211. return False
  212. return self._proc.poll() is None
  213. def _get_cmd(self, url, options):
  214. """Build the subprocess command.
  215. Args:
  216. url (string): URL string to download.
  217. options (list): Python list that contains youtube-dl options.
  218. Returns:
  219. Python list that contains the command to execute.
  220. """
  221. if os.name == 'nt':
  222. cmd = [self.youtubedl_path] + options + [url]
  223. else:
  224. cmd = ['python', self.youtubedl_path] + options + [url]
  225. return cmd
  226. def _get_encoding(self):
  227. """Return system encoding. """
  228. try:
  229. encoding = locale.getpreferredencoding()
  230. 'TEST'.encode(encoding)
  231. except:
  232. encoding = 'UTF-8'
  233. return encoding
  234. def _create_process(self, cmd):
  235. """Create new subprocess.
  236. Args:
  237. cmd (list): Python list that contains the command to execute.
  238. """
  239. encoding = info = None
  240. # Hide subprocess window on Windows
  241. if os.name == 'nt':
  242. info = subprocess.STARTUPINFO()
  243. info.dwFlags |= subprocess.STARTF_USESHOWWINDOW
  244. # Encode command for subprocess
  245. # Refer to http://stackoverflow.com/a/9951851/35070
  246. if sys.version_info < (3, 0):
  247. encoding = self._get_encoding()
  248. if encoding is not None:
  249. cmd = [item.encode(encoding, 'ignore') for item in cmd]
  250. self._proc = subprocess.Popen(cmd,
  251. stdout=subprocess.PIPE,
  252. stderr=subprocess.PIPE,
  253. startupinfo=info)
  254. def extract_data(stdout):
  255. """Extract data from youtube-dl stdout.
  256. Args:
  257. stdout (string): String that contains the youtube-dl stdout.
  258. Returns:
  259. Python dictionary. For available keys check self._data under
  260. YoutubeDLDownloader.__init__().
  261. """
  262. data_dictionary = dict()
  263. if not stdout:
  264. return data_dictionary
  265. stdout = [string for string in stdout.split(' ') if string != '']
  266. stdout[0] = stdout[0].lstrip('\r')
  267. if stdout[0] == '[download]':
  268. data_dictionary['status'] = 'Downloading'
  269. # Get filename
  270. if stdout[1] == 'Destination:':
  271. data_dictionary['filename'] = ' '.join(stdout[2:])
  272. # Get progress info
  273. if '%' in stdout[1]:
  274. if stdout[1] == '100%':
  275. data_dictionary['speed'] = ''
  276. data_dictionary['eta'] = ''
  277. else:
  278. data_dictionary['percent'] = stdout[1]
  279. data_dictionary['filesize'] = stdout[3]
  280. data_dictionary['speed'] = stdout[5]
  281. data_dictionary['eta'] = stdout[7]
  282. # Get playlist info
  283. if stdout[1] == 'Downloading' and stdout[2] == 'video':
  284. data_dictionary['playlist_index'] = stdout[3]
  285. data_dictionary['playlist_size'] = stdout[5]
  286. # Get file already downloaded status
  287. if stdout[-1] == 'downloaded':
  288. data_dictionary['status'] = 'Already Downloaded'
  289. # Get filesize abort status
  290. if stdout[-1] == 'Aborting.':
  291. data_dictionary['status'] = 'Filesize Abort'
  292. elif stdout[0] == '[ffmpeg]':
  293. data_dictionary['status'] = 'Post Processing'
  294. else:
  295. data_dictionary['status'] = 'Pre Processing'
  296. return data_dictionary