You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

330 lines
9.7 KiB

10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
  1. #!/usr/bin/env python2
  2. # -*- coding: utf-8 -*-
  3. """Python module to download videos.
  4. This module contains the actual downloaders responsible
  5. for downloading the video files.
  6. Note:
  7. downloaders.py is part of the youtubedlg package but it can be used
  8. as a stand alone module for downloading videos.
  9. """
  10. from __future__ import unicode_literals
  11. import os
  12. import sys
  13. import locale
  14. import subprocess
  15. class YoutubeDLDownloader(object):
  16. """Python class for downloading videos using youtube-dl & subprocess.
  17. Attributes:
  18. OK, ERROR, STOPPED, ALREADY, FILESIZE_ABORT (int): 'Random' integers
  19. that describe the return code from the download() method.
  20. Args:
  21. youtubedl_path (string): Absolute path to youtube-dl binary.
  22. data_hook (function): Optional callback function to retrieve download
  23. process data.
  24. log_manager (logmanager.LogManager): Object responsible for writing
  25. errors to the log.
  26. Note:
  27. For available data keys check self._data under __init__().
  28. Example:
  29. How to use YoutubeDLDownloader from a python script.
  30. from downloaders import YoutubeDLDownloader
  31. def data_hook(data):
  32. print data
  33. downloader = YoutubeDLDownloader('/usr/bin/youtube-dl', data_hook)
  34. downloader.download(<URL STRING>, ['-f', 'flv'])
  35. """
  36. OK = 0
  37. ERROR = 1
  38. STOPPED = 2
  39. ALREADY = 3
  40. FILESIZE_ABORT = 4
  41. def __init__(self, youtubedl_path, data_hook=None, log_manager=None):
  42. self.youtubedl_path = youtubedl_path
  43. self.log_manager = log_manager
  44. self.data_hook = data_hook
  45. self._return_code = 0
  46. self._proc = None
  47. self._data = {
  48. 'playlist_index': None,
  49. 'playlist_size': None,
  50. 'filesize': None,
  51. 'filename': None,
  52. 'percent': None,
  53. 'status': None,
  54. 'speed': None,
  55. 'eta': None
  56. }
  57. def download(self, url, options):
  58. """Download url using given options.
  59. Args:
  60. url (string): URL string to download.
  61. options (list): Python list that contains youtube-dl options.
  62. Returns:
  63. An integer that shows the status of the download process.
  64. Right now we support 5 different return codes.
  65. OK (0): The download process completed successfully.
  66. ERROR (1): An error occured during the download process.
  67. STOPPED (2): The download process was stopped from the user.
  68. ALREADY (3): The given url is already downloaded.
  69. FILESIZE_ABORT (4): The corresponding url video file was larger or
  70. smaller from the given options filesize limit.
  71. """
  72. self._reset()
  73. cmd = self._get_cmd(url, options)
  74. self._create_process(cmd)
  75. while self._proc_is_alive():
  76. stdout, stderr = self._read()
  77. if stderr:
  78. self._return_code = self.ERROR
  79. self._log(stderr)
  80. if stdout:
  81. self._sync_data(extract_data(stdout))
  82. self._hook_data()
  83. self._last_data_hook()
  84. return self._return_code
  85. def stop(self):
  86. """Stop the download process and set return code to STOPPED. """
  87. if self._proc_is_alive():
  88. self._proc.kill()
  89. self._return_code = self.STOPPED
  90. def _last_data_hook(self):
  91. """Set the last data information based on the return code. """
  92. if self._return_code == self.OK:
  93. self._data['status'] = 'Finished'
  94. elif self._return_code == self.ERROR:
  95. self._data['status'] = 'Error'
  96. self._data['speed'] = ''
  97. self._data['eta'] = ''
  98. elif self._return_code == self.STOPPED:
  99. self._data['status'] = 'Stopped'
  100. self._data['speed'] = ''
  101. self._data['eta'] = ''
  102. elif self._return_code == self.ALREADY:
  103. self._data['status'] = 'Already Downloaded'
  104. else:
  105. self._data['status'] = 'Filesize Abort'
  106. self._hook_data()
  107. def _reset(self):
  108. """Reset the data. """
  109. self._return_code = 0
  110. self._data = {
  111. 'playlist_index': None,
  112. 'playlist_size': None,
  113. 'filesize': None,
  114. 'filename': None,
  115. 'percent': None,
  116. 'status': None,
  117. 'speed': None,
  118. 'eta': None
  119. }
  120. def _sync_data(self, data):
  121. """Synchronise self._data with data. It also filters some keys.
  122. Args:
  123. data (dictionary): Python dictionary that contains different
  124. keys. The keys are not standar the dictionary can also be
  125. empty when there are no data to extract. See extract_data().
  126. """
  127. for key in data:
  128. if key == 'filename':
  129. # Keep only the filename on data['filename']
  130. data['filename'] = os.path.basename(data['filename'])
  131. if key == 'status':
  132. if data['status'] == 'Already Downloaded':
  133. # Set self._return_code to already downloaded
  134. # and trash that key
  135. self._return_code = self.ALREADY
  136. data['status'] = None
  137. if data['status'] == 'Filesize Abort':
  138. # Set self._return_code to filesize abort
  139. # and trash that key
  140. self._return_code = self.FILESIZE_ABORT
  141. data['status'] = None
  142. self._data[key] = data[key]
  143. def _log(self, data):
  144. """Log data using log_manager. """
  145. if self.log_manager is not None:
  146. self.log_manager.log(data)
  147. def _hook_data(self):
  148. """Pass self._data back to the data_hook. """
  149. if self.data_hook is not None:
  150. self.data_hook(self._data)
  151. def _proc_is_alive(self):
  152. """Returns True if self._proc is alive else False. """
  153. if self._proc is None:
  154. return False
  155. return self._proc.poll() is None
  156. def _read(self):
  157. """Read subprocess stdout, stderr.
  158. Returns:
  159. Python tuple that contains the STDOUT and STDERR
  160. strings.
  161. """
  162. stdout = stderr = ''
  163. if self._proc is not None:
  164. stdout = self._proc.stdout.readline().rstrip()
  165. if not stdout:
  166. stderr = self._proc.stderr.readline().rstrip()
  167. return stdout, stderr
  168. def _get_cmd(self, url, options):
  169. """Build the subprocess command.
  170. Args:
  171. url (string): URL string to download.
  172. options (list): Python list that contains youtube-dl options.
  173. Returns:
  174. Python list that contains the command to execute.
  175. """
  176. if os.name == 'nt':
  177. cmd = [self.youtubedl_path] + options + [url]
  178. else:
  179. cmd = ['python', self.youtubedl_path] + options + [url]
  180. return cmd
  181. def _create_process(self, cmd):
  182. """Create new subprocess.
  183. Args:
  184. cmd (list): Python list that contains the command to execute.
  185. """
  186. encoding = info = None
  187. # Hide subprocess window on Windows
  188. if os.name == 'nt':
  189. info = subprocess.STARTUPINFO()
  190. info.dwFlags |= subprocess.STARTF_USESHOWWINDOW
  191. # Encode command for subprocess
  192. # Refer to http://stackoverflow.com/a/9951851/35070
  193. if sys.version_info < (3, 0) and sys.platform == 'win32':
  194. try:
  195. encoding = locale.getpreferredencoding()
  196. u'TEST'.encode(encoding)
  197. except:
  198. encoding = 'UTF-8'
  199. if encoding is not None:
  200. cmd = [item.encode(encoding, 'ignore') for item in cmd]
  201. self._proc = subprocess.Popen(cmd,
  202. stdout=subprocess.PIPE,
  203. stderr=subprocess.PIPE,
  204. startupinfo=info)
  205. def extract_data(stdout):
  206. """Extract data from youtube-dl stdout.
  207. Args:
  208. stdout (string): String that contains the youtube-dl stdout.
  209. Returns:
  210. Python dictionary. For available keys check self._data under
  211. YoutubeDLDownloader.__init__().
  212. """
  213. data_dictionary = dict()
  214. if not stdout:
  215. return data_dictionary
  216. stdout = [string for string in stdout.split(' ') if string != '']
  217. stdout[0] = stdout[0].lstrip('\r')
  218. if stdout[0] == '[download]':
  219. data_dictionary['status'] = 'Downloading'
  220. # Get filename
  221. if stdout[1] == 'Destination:':
  222. data_dictionary['filename'] = ' '.join(stdout[2:])
  223. # Get progress info
  224. if '%' in stdout[1]:
  225. if stdout[1] == '100%':
  226. data_dictionary['speed'] = ''
  227. data_dictionary['eta'] = ''
  228. else:
  229. data_dictionary['percent'] = stdout[1]
  230. data_dictionary['filesize'] = stdout[3]
  231. data_dictionary['speed'] = stdout[5]
  232. data_dictionary['eta'] = stdout[7]
  233. # Get playlist info
  234. if stdout[1] == 'Downloading' and stdout[2] == 'video':
  235. data_dictionary['playlist_index'] = stdout[3]
  236. data_dictionary['playlist_size'] = stdout[5]
  237. # Get file already downloaded status
  238. if stdout[-1] == 'downloaded':
  239. data_dictionary['status'] = 'Already Downloaded'
  240. # Get filesize abort status
  241. if stdout[-1] == 'Aborting.':
  242. data_dictionary['status'] = 'Filesize Abort'
  243. elif stdout[0] == '[ffmpeg]':
  244. data_dictionary['status'] = 'Post Processing'
  245. else:
  246. data_dictionary['status'] = 'Pre Processing'
  247. return data_dictionary