You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

427 lines
13 KiB

10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
9 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
  1. #!/usr/bin/env python2
  2. # -*- coding: utf-8 -*-
  3. """Python module to download videos.
  4. This module contains the actual downloaders responsible
  5. for downloading the video files.
  6. Note:
  7. downloaders.py is part of the youtubedlg package but it can be used
  8. as a stand alone module for downloading videos.
  9. """
  10. from __future__ import unicode_literals
  11. import os
  12. import sys
  13. import locale
  14. import signal
  15. import subprocess
  16. from time import sleep
  17. from Queue import Queue
  18. from threading import Thread
  19. class PipeReader(Thread):
  20. """Helper class to avoid deadlocks when reading from subprocess pipes.
  21. This class uses python threads and queues in order to read from subprocess
  22. pipes in an asynchronous way.
  23. Attributes:
  24. WAIT_TIME (float): Time in seconds to sleep.
  25. Args:
  26. queue (Queue.Queue): Python queue to store the output of the subprocess.
  27. """
  28. WAIT_TIME = 0.1
  29. def __init__(self, queue):
  30. super(PipeReader, self).__init__()
  31. self._filedescriptor = None
  32. self._running = True
  33. self._queue = queue
  34. self.start()
  35. def run(self):
  36. # Flag to ignore specific lines
  37. ignore_line = False
  38. while self._running:
  39. if self._filedescriptor is not None:
  40. for line in iter(self._filedescriptor.readline, ''):
  41. # Ignore ffmpeg stderr
  42. if str('ffmpeg version') in line:
  43. ignore_line = True
  44. if not ignore_line:
  45. self._queue.put_nowait(line)
  46. self._filedescriptor = None
  47. ignore_line = False
  48. sleep(self.WAIT_TIME)
  49. def attach_filedescriptor(self, filedesc):
  50. """Attach a filedescriptor to the PipeReader. """
  51. self._filedescriptor = filedesc
  52. def join(self, timeout=None):
  53. self._running = False
  54. super(PipeReader, self).join(timeout)
  55. class YoutubeDLDownloader(object):
  56. """Python class for downloading videos using youtube-dl & subprocess.
  57. Attributes:
  58. OK, ERROR, STOPPED, ALREADY, FILESIZE_ABORT, WARNING (int): Integers
  59. that describe the return code from the download() method. The
  60. larger the number the higher is the hierarchy of the code.
  61. Codes with smaller hierachy cannot overwrite codes with higher
  62. hierarchy.
  63. Args:
  64. youtubedl_path (string): Absolute path to youtube-dl binary.
  65. data_hook (function): Optional callback function to retrieve download
  66. process data.
  67. log_data (function): Optional callback function to write data to
  68. the log file.
  69. Note:
  70. For available data keys check self._data under __init__().
  71. Warnings:
  72. The caller is responsible for calling the close() method after he has
  73. finished with the object in order for the object to be able to properly
  74. close down itself.
  75. Example:
  76. How to use YoutubeDLDownloader from a python script.
  77. from downloaders import YoutubeDLDownloader
  78. def data_hook(data):
  79. print data
  80. downloader = YoutubeDLDownloader('/usr/bin/youtube-dl', data_hook)
  81. downloader.download(<URL STRING>, ['-f', 'flv'])
  82. """
  83. OK = 0
  84. WARNING = 1
  85. ERROR = 2
  86. FILESIZE_ABORT = 3
  87. ALREADY = 4
  88. STOPPED = 5
  89. def __init__(self, youtubedl_path, data_hook=None, log_data=None):
  90. self.youtubedl_path = youtubedl_path
  91. self.data_hook = data_hook
  92. self.log_data = log_data
  93. self._return_code = self.OK
  94. self._proc = None
  95. self._data = {
  96. 'playlist_index': None,
  97. 'playlist_size': None,
  98. 'filesize': None,
  99. 'filename': None,
  100. 'percent': None,
  101. 'status': None,
  102. 'speed': None,
  103. 'eta': None
  104. }
  105. self._stderr_queue = Queue()
  106. self._stderr_reader = PipeReader(self._stderr_queue)
  107. def download(self, url, options):
  108. """Download url using given options.
  109. Args:
  110. url (string): URL string to download.
  111. options (list): Python list that contains youtube-dl options.
  112. Returns:
  113. An integer that shows the status of the download process.
  114. There are 6 different return codes.
  115. OK (0): The download process completed successfully.
  116. WARNING (1): A warning occured during the download process.
  117. ERROR (2): An error occured during the download process.
  118. FILESIZE_ABORT (3): The corresponding url video file was larger or
  119. smaller from the given filesize limit.
  120. ALREADY (4): The given url is already downloaded.
  121. STOPPED (5): The download process was stopped by the user.
  122. """
  123. self._reset()
  124. cmd = self._get_cmd(url, options)
  125. self._create_process(cmd)
  126. self._stderr_reader.attach_filedescriptor(self._proc.stderr)
  127. while self._proc_is_alive():
  128. stdout = self._proc.stdout.readline().rstrip().decode(self._get_encoding(), 'ignore')
  129. if stdout:
  130. self._sync_data(extract_data(stdout))
  131. self._hook_data()
  132. # Read stderr after download process has been completed
  133. # We don't need to read stderr in real time
  134. while not self._stderr_queue.empty():
  135. stderr = self._stderr_queue.get_nowait().rstrip().decode(self._get_encoding(), 'ignore')
  136. self._log(stderr)
  137. if self._is_warning(stderr):
  138. self._set_returncode(self.WARNING)
  139. else:
  140. self._set_returncode(self.ERROR)
  141. self._last_data_hook()
  142. return self._return_code
  143. def stop(self):
  144. """Stop the download process and set return code to STOPPED. """
  145. if self._proc_is_alive():
  146. if os.name == 'nt':
  147. # os.killpg is not available on Windows
  148. # See: https://bugs.python.org/issue5115
  149. self._proc.kill()
  150. else:
  151. os.killpg(self._proc.pid, signal.SIGKILL)
  152. self._set_returncode(self.STOPPED)
  153. def close(self):
  154. """Destructor like function for the object. """
  155. self._stderr_reader.join()
  156. def _set_returncode(self, code):
  157. """Set self._return_code only if the hierarchy of the given code is
  158. higher than the current self._return_code. """
  159. if code >= self._return_code:
  160. self._return_code = code
  161. def _is_warning(self, stderr):
  162. return stderr.split(':')[0] == 'WARNING'
  163. def _last_data_hook(self):
  164. """Set the last data information based on the return code. """
  165. if self._return_code == self.OK:
  166. self._data['status'] = 'Finished'
  167. elif self._return_code == self.ERROR:
  168. self._data['status'] = 'Error'
  169. self._data['speed'] = ''
  170. self._data['eta'] = ''
  171. elif self._return_code == self.WARNING:
  172. self._data['status'] = 'Warning'
  173. self._data['speed'] = ''
  174. self._data['eta'] = ''
  175. elif self._return_code == self.STOPPED:
  176. self._data['status'] = 'Stopped'
  177. self._data['speed'] = ''
  178. self._data['eta'] = ''
  179. elif self._return_code == self.ALREADY:
  180. self._data['status'] = 'Already Downloaded'
  181. else:
  182. self._data['status'] = 'Filesize Abort'
  183. self._hook_data()
  184. def _reset(self):
  185. """Reset the data. """
  186. self._return_code = self.OK
  187. self._data = {
  188. 'playlist_index': None,
  189. 'playlist_size': None,
  190. 'filesize': None,
  191. 'filename': None,
  192. 'percent': None,
  193. 'status': None,
  194. 'speed': None,
  195. 'eta': None
  196. }
  197. def _sync_data(self, data):
  198. """Synchronise self._data with data. It also filters some keys.
  199. Args:
  200. data (dictionary): Python dictionary that contains different
  201. keys. The keys are not standar the dictionary can also be
  202. empty when there are no data to extract. See extract_data().
  203. """
  204. for key in data:
  205. if key == 'filename':
  206. # Keep only the filename on data['filename']
  207. data['filename'] = os.path.basename(data['filename'])
  208. if key == 'status':
  209. if data['status'] == 'Already Downloaded':
  210. # Set self._return_code to already downloaded
  211. # and trash that key
  212. self._set_returncode(self.ALREADY)
  213. data['status'] = None
  214. if data['status'] == 'Filesize Abort':
  215. # Set self._return_code to filesize abort
  216. # and trash that key
  217. self._set_returncode(self.FILESIZE_ABORT)
  218. data['status'] = None
  219. self._data[key] = data[key]
  220. def _log(self, data):
  221. """Log data using the callback function. """
  222. if self.log_data is not None:
  223. self.log_data(data)
  224. def _hook_data(self):
  225. """Pass self._data back to the data_hook. """
  226. if self.data_hook is not None:
  227. self.data_hook(self._data)
  228. def _proc_is_alive(self):
  229. """Returns True if self._proc is alive else False. """
  230. if self._proc is None:
  231. return False
  232. return self._proc.poll() is None
  233. def _get_cmd(self, url, options):
  234. """Build the subprocess command.
  235. Args:
  236. url (string): URL string to download.
  237. options (list): Python list that contains youtube-dl options.
  238. Returns:
  239. Python list that contains the command to execute.
  240. """
  241. if os.name == 'nt':
  242. cmd = [self.youtubedl_path] + options + [url]
  243. else:
  244. cmd = ['python', self.youtubedl_path] + options + [url]
  245. return cmd
  246. def _get_encoding(self):
  247. """Return system encoding. """
  248. try:
  249. encoding = locale.getpreferredencoding()
  250. 'TEST'.encode(encoding)
  251. except:
  252. encoding = 'UTF-8'
  253. return encoding
  254. def _create_process(self, cmd):
  255. """Create new subprocess.
  256. Args:
  257. cmd (list): Python list that contains the command to execute.
  258. """
  259. encoding = info = preexec = None
  260. if os.name == 'nt':
  261. # Hide subprocess window
  262. info = subprocess.STARTUPINFO()
  263. info.dwFlags |= subprocess.STARTF_USESHOWWINDOW
  264. else:
  265. # Make subprocess the process group leader
  266. # in order to kill the whole process group with os.killpg
  267. preexec = os.setsid
  268. # Encode command for subprocess
  269. # Refer to http://stackoverflow.com/a/9951851/35070
  270. if sys.version_info < (3, 0):
  271. encoding = self._get_encoding()
  272. if encoding is not None:
  273. cmd = [item.encode(encoding, 'ignore') for item in cmd]
  274. self._proc = subprocess.Popen(cmd,
  275. stdout=subprocess.PIPE,
  276. stderr=subprocess.PIPE,
  277. preexec_fn = preexec,
  278. startupinfo=info)
  279. def extract_data(stdout):
  280. """Extract data from youtube-dl stdout.
  281. Args:
  282. stdout (string): String that contains the youtube-dl stdout.
  283. Returns:
  284. Python dictionary. For available keys check self._data under
  285. YoutubeDLDownloader.__init__().
  286. """
  287. data_dictionary = dict()
  288. if not stdout:
  289. return data_dictionary
  290. stdout = [string for string in stdout.split(' ') if string != '']
  291. stdout[0] = stdout[0].lstrip('\r')
  292. if stdout[0] == '[download]':
  293. data_dictionary['status'] = 'Downloading'
  294. # Get filename
  295. if stdout[1] == 'Destination:':
  296. data_dictionary['filename'] = ' '.join(stdout[2:])
  297. # Get progress info
  298. if '%' in stdout[1]:
  299. if stdout[1] == '100%':
  300. data_dictionary['speed'] = ''
  301. data_dictionary['eta'] = ''
  302. else:
  303. data_dictionary['percent'] = stdout[1]
  304. data_dictionary['filesize'] = stdout[3]
  305. data_dictionary['speed'] = stdout[5]
  306. data_dictionary['eta'] = stdout[7]
  307. # Get playlist info
  308. if stdout[1] == 'Downloading' and stdout[2] == 'video':
  309. data_dictionary['playlist_index'] = stdout[3]
  310. data_dictionary['playlist_size'] = stdout[5]
  311. # Get file already downloaded status
  312. if stdout[-1] == 'downloaded':
  313. data_dictionary['status'] = 'Already Downloaded'
  314. # Get filesize abort status
  315. if stdout[-1] == 'Aborting.':
  316. data_dictionary['status'] = 'Filesize Abort'
  317. elif stdout[0] == '[ffmpeg]':
  318. data_dictionary['status'] = 'Post Processing'
  319. else:
  320. data_dictionary['status'] = 'Pre Processing'
  321. return data_dictionary