You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

469 lines
15 KiB

10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
9 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
  1. #!/usr/bin/env python2
  2. # -*- coding: utf-8 -*-
  3. """Python module to download videos.
  4. This module contains the actual downloaders responsible
  5. for downloading the video files.
  6. Note:
  7. downloaders.py is part of the youtubedlg package but it can be used
  8. as a stand alone module for downloading videos.
  9. """
  10. from __future__ import unicode_literals
  11. import re
  12. import os
  13. import sys
  14. import locale
  15. import signal
  16. import subprocess
  17. from time import sleep
  18. from Queue import Queue
  19. from threading import Thread
  20. class PipeReader(Thread):
  21. """Helper class to avoid deadlocks when reading from subprocess pipes.
  22. This class uses python threads and queues in order to read from subprocess
  23. pipes in an asynchronous way.
  24. Attributes:
  25. WAIT_TIME (float): Time in seconds to sleep.
  26. Args:
  27. queue (Queue.Queue): Python queue to store the output of the subprocess.
  28. """
  29. WAIT_TIME = 0.1
  30. def __init__(self, queue):
  31. super(PipeReader, self).__init__()
  32. self._filedescriptor = None
  33. self._running = True
  34. self._queue = queue
  35. self.start()
  36. def run(self):
  37. # Flag to ignore specific lines
  38. ignore_line = False
  39. while self._running:
  40. if self._filedescriptor is not None:
  41. for line in iter(self._filedescriptor.readline, ''):
  42. # Ignore ffmpeg stderr
  43. if str('ffmpeg version') in line:
  44. ignore_line = True
  45. if not ignore_line:
  46. self._queue.put_nowait(line)
  47. self._filedescriptor = None
  48. ignore_line = False
  49. sleep(self.WAIT_TIME)
  50. def attach_filedescriptor(self, filedesc):
  51. """Attach a filedescriptor to the PipeReader. """
  52. self._filedescriptor = filedesc
  53. def join(self, timeout=None):
  54. self._running = False
  55. super(PipeReader, self).join(timeout)
  56. class YoutubeDLDownloader(object):
  57. """Python class for downloading videos using youtube-dl & subprocess.
  58. Attributes:
  59. OK, ERROR, STOPPED, ALREADY, FILESIZE_ABORT, WARNING (int): Integers
  60. that describe the return code from the download() method. The
  61. larger the number the higher is the hierarchy of the code.
  62. Codes with smaller hierachy cannot overwrite codes with higher
  63. hierarchy.
  64. Args:
  65. youtubedl_path (string): Absolute path to youtube-dl binary.
  66. data_hook (function): Optional callback function to retrieve download
  67. process data.
  68. log_data (function): Optional callback function to write data to
  69. the log file.
  70. Warnings:
  71. The caller is responsible for calling the close() method after he has
  72. finished with the object in order for the object to be able to properly
  73. close down itself.
  74. Example:
  75. How to use YoutubeDLDownloader from a python script.
  76. from downloaders import YoutubeDLDownloader
  77. def data_hook(data):
  78. print data
  79. downloader = YoutubeDLDownloader('/usr/bin/youtube-dl', data_hook)
  80. downloader.download(<URL STRING>, ['-f', 'flv'])
  81. """
  82. OK = 0
  83. WARNING = 1
  84. ERROR = 2
  85. FILESIZE_ABORT = 3
  86. ALREADY = 4
  87. STOPPED = 5
  88. def __init__(self, youtubedl_path, data_hook=None, log_data=None):
  89. self.youtubedl_path = youtubedl_path
  90. self.data_hook = data_hook
  91. self.log_data = log_data
  92. self._return_code = self.OK
  93. self._proc = None
  94. self._encoding = self._get_encoding()
  95. self._stderr_queue = Queue()
  96. self._stderr_reader = PipeReader(self._stderr_queue)
  97. def download(self, url, options):
  98. """Download url using given options.
  99. Args:
  100. url (string): URL string to download.
  101. options (list): Python list that contains youtube-dl options.
  102. Returns:
  103. An integer that shows the status of the download process.
  104. There are 6 different return codes.
  105. OK (0): The download process completed successfully.
  106. WARNING (1): A warning occured during the download process.
  107. ERROR (2): An error occured during the download process.
  108. FILESIZE_ABORT (3): The corresponding url video file was larger or
  109. smaller from the given filesize limit.
  110. ALREADY (4): The given url is already downloaded.
  111. STOPPED (5): The download process was stopped by the user.
  112. """
  113. self._return_code = self.OK
  114. cmd = self._get_cmd(url, options)
  115. self._create_process(cmd)
  116. self._stderr_reader.attach_filedescriptor(self._proc.stderr)
  117. while self._proc_is_alive():
  118. stdout = self._proc.stdout.readline().rstrip()
  119. stdout = stdout.decode(self._encoding, 'ignore')
  120. if stdout:
  121. data_dict = extract_data(stdout)
  122. self._extract_info(data_dict)
  123. self._hook_data(data_dict)
  124. # Read stderr after download process has been completed
  125. # We don't need to read stderr in real time
  126. while not self._stderr_queue.empty():
  127. stderr = self._stderr_queue.get_nowait().rstrip()
  128. stderr = stderr.decode(self._encoding, 'ignore')
  129. self._log(stderr)
  130. if self._is_warning(stderr):
  131. self._set_returncode(self.WARNING)
  132. else:
  133. self._set_returncode(self.ERROR)
  134. self._last_data_hook()
  135. return self._return_code
  136. def stop(self):
  137. """Stop the download process and set return code to STOPPED. """
  138. if self._proc_is_alive():
  139. if os.name == 'nt':
  140. # os.killpg is not available on Windows
  141. # See: https://bugs.python.org/issue5115
  142. self._proc.kill()
  143. else:
  144. os.killpg(self._proc.pid, signal.SIGKILL)
  145. self._set_returncode(self.STOPPED)
  146. def close(self):
  147. """Destructor like function for the object. """
  148. self._stderr_reader.join()
  149. def _set_returncode(self, code):
  150. """Set self._return_code only if the hierarchy of the given code is
  151. higher than the current self._return_code. """
  152. if code >= self._return_code:
  153. self._return_code = code
  154. def _is_warning(self, stderr):
  155. return stderr.split(':')[0] == 'WARNING'
  156. def _last_data_hook(self):
  157. """Set the last data information based on the return code. """
  158. data_dictionary = {}
  159. if self._return_code == self.OK:
  160. data_dictionary['status'] = 'Finished'
  161. elif self._return_code == self.ERROR:
  162. data_dictionary['status'] = 'Error'
  163. data_dictionary['speed'] = ''
  164. data_dictionary['eta'] = ''
  165. elif self._return_code == self.WARNING:
  166. data_dictionary['status'] = 'Warning'
  167. data_dictionary['speed'] = ''
  168. data_dictionary['eta'] = ''
  169. elif self._return_code == self.STOPPED:
  170. data_dictionary['status'] = 'Stopped'
  171. data_dictionary['speed'] = ''
  172. data_dictionary['eta'] = ''
  173. elif self._return_code == self.ALREADY:
  174. data_dictionary['status'] = 'Already Downloaded'
  175. else:
  176. data_dictionary['status'] = 'Filesize Abort'
  177. self._hook_data(data_dictionary)
  178. def _extract_info(self, data):
  179. """Extract informations about the download process from the given data.
  180. Args:
  181. data (dict): Python dictionary that contains different
  182. keys. The keys are not standar the dictionary can also be
  183. empty when there are no data to extract. See extract_data().
  184. """
  185. if 'status' in data:
  186. if data['status'] == 'Already Downloaded':
  187. # Set self._return_code to already downloaded
  188. # and trash that key
  189. self._set_returncode(self.ALREADY)
  190. data['status'] = None
  191. if data['status'] == 'Filesize Abort':
  192. # Set self._return_code to filesize abort
  193. # and trash that key
  194. self._set_returncode(self.FILESIZE_ABORT)
  195. data['status'] = None
  196. def _log(self, data):
  197. """Log data using the callback function. """
  198. if self.log_data is not None:
  199. self.log_data(data)
  200. def _hook_data(self, data):
  201. """Pass data back to the caller. """
  202. if self.data_hook is not None:
  203. self.data_hook(data)
  204. def _proc_is_alive(self):
  205. """Returns True if self._proc is alive else False. """
  206. if self._proc is None:
  207. return False
  208. return self._proc.poll() is None
  209. def _get_cmd(self, url, options):
  210. """Build the subprocess command.
  211. Args:
  212. url (string): URL string to download.
  213. options (list): Python list that contains youtube-dl options.
  214. Returns:
  215. Python list that contains the command to execute.
  216. """
  217. if os.name == 'nt':
  218. cmd = [self.youtubedl_path] + options + [url]
  219. else:
  220. cmd = ['python', self.youtubedl_path] + options + [url]
  221. return cmd
  222. def _get_encoding(self):
  223. """Return system encoding. """
  224. try:
  225. encoding = locale.getpreferredencoding()
  226. 'TEST'.encode(encoding)
  227. except:
  228. encoding = 'UTF-8'
  229. return encoding
  230. def _create_process(self, cmd):
  231. """Create new subprocess.
  232. Args:
  233. cmd (list): Python list that contains the command to execute.
  234. """
  235. info = preexec = None
  236. if os.name == 'nt':
  237. # Hide subprocess window
  238. info = subprocess.STARTUPINFO()
  239. info.dwFlags |= subprocess.STARTF_USESHOWWINDOW
  240. else:
  241. # Make subprocess the process group leader
  242. # in order to kill the whole process group with os.killpg
  243. preexec = os.setsid
  244. # Encode command for subprocess
  245. # Refer to http://stackoverflow.com/a/9951851/35070
  246. if sys.version_info < (3, 0):
  247. cmd = [item.encode(self._encoding, 'ignore') for item in cmd]
  248. self._proc = subprocess.Popen(cmd,
  249. stdout=subprocess.PIPE,
  250. stderr=subprocess.PIPE,
  251. preexec_fn=preexec,
  252. startupinfo=info)
  253. def extract_data(stdout):
  254. """Extract data from youtube-dl stdout.
  255. Args:
  256. stdout (string): String that contains the youtube-dl stdout.
  257. Returns:
  258. Python dictionary. The returned dictionary can be empty if there are
  259. no data to extract else it may contain one or more of the
  260. following keys:
  261. 'status' : Contains the status of the download process.
  262. 'path' : Destination path.
  263. 'extension' : The file extension.
  264. 'filename' : The filename without the extension.
  265. 'percent' : The percentage of the video being downloaded.
  266. 'eta' : Estimated time for the completion of the download process.
  267. 'speed' : Download speed.
  268. 'filesize' : The size of the video file being downloaded.
  269. 'playlist_index' : The playlist index of the current video file being downloaded.
  270. 'playlist_size' : The number of videos in the playlist.
  271. """
  272. def extract_filename(input_data):
  273. path, fullname = os.path.split(input_data.strip("\""))
  274. filename, extension = os.path.splitext(fullname)
  275. return path, filename, extension
  276. data_dictionary = {}
  277. if not stdout:
  278. return data_dictionary
  279. # We want to keep the spaces in order to extract filenames with
  280. # multiple whitespaces correctly. We also keep a copy of the old
  281. # 'stdout' for backward compatibility with the old code
  282. stdout_with_spaces = stdout.split(' ')
  283. stdout = stdout.split()
  284. stdout[0] = stdout[0].lstrip('\r')
  285. if stdout[0] == '[download]':
  286. data_dictionary['status'] = 'Downloading'
  287. # Get path, filename & extension
  288. if stdout[1] == 'Destination:':
  289. path, filename, extension = extract_filename(' '.join(stdout_with_spaces[2:]))
  290. data_dictionary['path'] = path
  291. data_dictionary['filename'] = filename
  292. data_dictionary['extension'] = extension
  293. # Get progress info
  294. if '%' in stdout[1]:
  295. if stdout[1] == '100%':
  296. data_dictionary['speed'] = ''
  297. data_dictionary['eta'] = ''
  298. data_dictionary['percent'] = '100%'
  299. data_dictionary['filesize'] = stdout[3]
  300. else:
  301. data_dictionary['percent'] = stdout[1]
  302. data_dictionary['filesize'] = stdout[3]
  303. data_dictionary['speed'] = stdout[5]
  304. data_dictionary['eta'] = stdout[7]
  305. # Get playlist info
  306. if stdout[1] == 'Downloading' and stdout[2] == 'video':
  307. data_dictionary['playlist_index'] = stdout[3]
  308. data_dictionary['playlist_size'] = stdout[5]
  309. # Remove the 'and merged' part from stdout when using ffmpeg to merge the formats
  310. if stdout[-3] == 'downloaded' and stdout [-1] == 'merged':
  311. stdout = stdout[:-2]
  312. stdout_with_spaces = stdout_with_spaces[:-2]
  313. data_dictionary['percent'] = '100%'
  314. # Get file already downloaded status
  315. if stdout[-1] == 'downloaded':
  316. data_dictionary['status'] = 'Already Downloaded'
  317. path, filename, extension = extract_filename(' '.join(stdout_with_spaces[1:-4]))
  318. data_dictionary['path'] = path
  319. data_dictionary['filename'] = filename
  320. data_dictionary['extension'] = extension
  321. # Get filesize abort status
  322. if stdout[-1] == 'Aborting.':
  323. data_dictionary['status'] = 'Filesize Abort'
  324. elif stdout[0] == '[hlsnative]':
  325. # native hls extractor
  326. # see: https://github.com/rg3/youtube-dl/blob/master/youtube_dl/downloader/hls.py#L54
  327. data_dictionary['status'] = 'Downloading'
  328. if len(stdout) == 7:
  329. segment_no = float(stdout[6])
  330. current_segment = float(stdout[4])
  331. # Get the percentage
  332. percent = '{0:.1f}%'.format(current_segment / segment_no * 100)
  333. data_dictionary['percent'] = percent
  334. elif stdout[0] == '[ffmpeg]':
  335. data_dictionary['status'] = 'Post Processing'
  336. # Get final extension after merging process
  337. if stdout[1] == 'Merging':
  338. path, filename, extension = extract_filename(' '.join(stdout_with_spaces[4:]))
  339. data_dictionary['path'] = path
  340. data_dictionary['filename'] = filename
  341. data_dictionary['extension'] = extension
  342. # Get final extension ffmpeg post process simple (not file merge)
  343. if stdout[1] == 'Destination:':
  344. path, filename, extension = extract_filename(' '.join(stdout_with_spaces[2:]))
  345. data_dictionary['path'] = path
  346. data_dictionary['filename'] = filename
  347. data_dictionary['extension'] = extension
  348. elif stdout[0][0] != '[' or stdout[0] == '[debug]':
  349. pass # Just ignore this output
  350. else:
  351. data_dictionary['status'] = 'Pre Processing'
  352. return data_dictionary