You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

327 lines
9.6 KiB

10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
  1. #!/usr/bin/env python2
  2. """Python module to download videos.
  3. This module contains the actual downloaders responsible
  4. for downloading the video files.
  5. Note:
  6. downloaders.py is part of the youtubedlg package but it can be used
  7. as a stand alone module for downloading videos.
  8. """
  9. import os
  10. import sys
  11. import locale
  12. import subprocess
  13. class YoutubeDLDownloader(object):
  14. """Python class for downloading videos using youtube-dl & subprocess.
  15. Attributes:
  16. OK, ERROR, STOPPED, ALREADY, FILESIZE_ABORT (int): 'Random' integers
  17. that describe the return code from the download() method.
  18. Args:
  19. youtubedl_path (string): Absolute path to youtube-dl binary.
  20. data_hook (function): Optional callback function to retrieve download
  21. process data.
  22. log_manager (logmanager.LogManager): Object responsible for writing
  23. errors to the log.
  24. Note:
  25. For available data keys check self._data under __init__().
  26. Example:
  27. How to use YoutubeDLDownloader from a python script.
  28. from downloaders import YoutubeDLDownloader
  29. def data_hook(data):
  30. print data
  31. downloader = YoutubeDLDownloader('/usr/bin/youtube-dl', data_hook)
  32. downloader.download(<URL STRING>, ['-f', 'flv'])
  33. """
  34. OK = 0
  35. ERROR = 1
  36. STOPPED = 2
  37. ALREADY = 3
  38. FILESIZE_ABORT = 4
  39. def __init__(self, youtubedl_path, data_hook=None, log_manager=None):
  40. self.youtubedl_path = youtubedl_path
  41. self.log_manager = log_manager
  42. self.data_hook = data_hook
  43. self._return_code = 0
  44. self._proc = None
  45. self._data = {
  46. 'playlist_index': None,
  47. 'playlist_size': None,
  48. 'filesize': None,
  49. 'filename': None,
  50. 'percent': None,
  51. 'status': None,
  52. 'speed': None,
  53. 'eta': None
  54. }
  55. def download(self, url, options):
  56. """Download url using given options.
  57. Args:
  58. url (string): URL string to download.
  59. options (list): Python list that contains youtube-dl options.
  60. Returns:
  61. An integer that shows the status of the download process.
  62. Right now we support 5 different return codes.
  63. OK (0): The download process completed successfully.
  64. ERROR (1): An error occured during the download process.
  65. STOPPED (2): The download process was stopped from the user.
  66. ALREADY (3): The given url is already downloaded.
  67. FILESIZE_ABORT (4): The corresponding url video file was larger or
  68. smaller from the given options filesize limit.
  69. """
  70. self._reset()
  71. cmd = self._get_cmd(url, options)
  72. self._create_process(cmd)
  73. while self._proc_is_alive():
  74. stdout, stderr = self._read()
  75. if stderr:
  76. self._return_code = self.ERROR
  77. self._log(stderr)
  78. if stdout:
  79. self._sync_data(extract_data(stdout))
  80. self._hook_data()
  81. self._last_data_hook()
  82. return self._return_code
  83. def stop(self):
  84. """Stop the download process and set return code to STOPPED. """
  85. if self._proc_is_alive():
  86. self._proc.kill()
  87. self._return_code = self.STOPPED
  88. def _last_data_hook(self):
  89. """Set the last data information based on the return code. """
  90. if self._return_code == self.OK:
  91. self._data['status'] = 'Finished'
  92. elif self._return_code == self.ERROR:
  93. self._data['status'] = 'Error'
  94. self._data['speed'] = ''
  95. self._data['eta'] = ''
  96. elif self._return_code == self.STOPPED:
  97. self._data['status'] = 'Stopped'
  98. self._data['speed'] = ''
  99. self._data['eta'] = ''
  100. elif self._return_code == self.ALREADY:
  101. self._data['status'] = 'Already Downloaded'
  102. else:
  103. self._data['status'] = 'Filesize Abort'
  104. self._hook_data()
  105. def _reset(self):
  106. """Reset the data. """
  107. self._return_code = 0
  108. self._data = {
  109. 'playlist_index': None,
  110. 'playlist_size': None,
  111. 'filesize': None,
  112. 'filename': None,
  113. 'percent': None,
  114. 'status': None,
  115. 'speed': None,
  116. 'eta': None
  117. }
  118. def _sync_data(self, data):
  119. """Synchronise self._data with data. It also filters some keys.
  120. Args:
  121. data (dictionary): Python dictionary that contains different
  122. keys. The keys are not standar the dictionary can also be
  123. empty when there are no data to extract. See extract_data().
  124. """
  125. for key in data:
  126. if key == 'filename':
  127. # Keep only the filename on data['filename']
  128. data['filename'] = os.path.basename(data['filename'])
  129. if key == 'status':
  130. if data['status'] == 'Already Downloaded':
  131. # Set self._return_code to already downloaded
  132. # and trash that key
  133. self._return_code = self.ALREADY
  134. data['status'] = None
  135. if data['status'] == 'Filesize Abort':
  136. # Set self._return_code to filesize abort
  137. # and trash that key
  138. self._return_code = self.FILESIZE_ABORT
  139. data['status'] = None
  140. self._data[key] = data[key]
  141. def _log(self, data):
  142. """Log data using log_manager. """
  143. if self.log_manager is not None:
  144. self.log_manager.log(data)
  145. def _hook_data(self):
  146. """Pass self._data back to the data_hook. """
  147. if self.data_hook is not None:
  148. self.data_hook(self._data)
  149. def _proc_is_alive(self):
  150. """Returns True if self._proc is alive else False. """
  151. if self._proc is None:
  152. return False
  153. return self._proc.poll() is None
  154. def _read(self):
  155. """Read subprocess stdout, stderr.
  156. Returns:
  157. Python tuple that contains the STDOUT and STDERR
  158. strings.
  159. """
  160. stdout = stderr = ''
  161. if self._proc is not None:
  162. stdout = self._proc.stdout.readline().rstrip()
  163. if not stdout:
  164. stderr = self._proc.stderr.readline().rstrip()
  165. return stdout, stderr
  166. def _get_cmd(self, url, options):
  167. """Build the subprocess command.
  168. Args:
  169. url (string): URL string to download.
  170. options (list): Python list that contains youtube-dl options.
  171. Returns:
  172. Python list that contains the command to execute.
  173. """
  174. if os.name == 'nt':
  175. cmd = [self.youtubedl_path] + options + [url]
  176. else:
  177. cmd = ['python', self.youtubedl_path] + options + [url]
  178. return cmd
  179. def _create_process(self, cmd):
  180. """Create new subprocess.
  181. Args:
  182. cmd (list): Python list that contains the command to execute.
  183. """
  184. encoding = info = None
  185. # Hide subprocess window on Windows
  186. if os.name == 'nt':
  187. info = subprocess.STARTUPINFO()
  188. info.dwFlags |= subprocess.STARTF_USESHOWWINDOW
  189. # Encode command for subprocess
  190. # Refer to http://stackoverflow.com/a/9951851/35070
  191. if sys.version_info < (3, 0) and sys.platform == 'win32':
  192. try:
  193. encoding = locale.getpreferredencoding()
  194. u'TEST'.encode(encoding)
  195. except:
  196. encoding = 'UTF-8'
  197. if encoding is not None:
  198. cmd = [item.encode(encoding, 'ignore') for item in cmd]
  199. self._proc = subprocess.Popen(cmd,
  200. stdout=subprocess.PIPE,
  201. stderr=subprocess.PIPE,
  202. startupinfo=info)
  203. def extract_data(stdout):
  204. """Extract data from youtube-dl stdout.
  205. Args:
  206. stdout (string): String that contains the youtube-dl stdout.
  207. Returns:
  208. Python dictionary. For available keys check self._data under
  209. YoutubeDLDownloader.__init__().
  210. """
  211. data_dictionary = dict()
  212. if not stdout:
  213. return data_dictionary
  214. stdout = [string for string in stdout.split(' ') if string != '']
  215. stdout[0] = stdout[0].lstrip('\r')
  216. if stdout[0] == '[download]':
  217. data_dictionary['status'] = 'Downloading'
  218. # Get filename
  219. if stdout[1] == 'Destination:':
  220. data_dictionary['filename'] = ' '.join(stdout[1:])
  221. # Get progress info
  222. if '%' in stdout[1]:
  223. if stdout[1] == '100%':
  224. data_dictionary['speed'] = ''
  225. data_dictionary['eta'] = ''
  226. else:
  227. data_dictionary['percent'] = stdout[1]
  228. data_dictionary['filesize'] = stdout[3]
  229. data_dictionary['speed'] = stdout[5]
  230. data_dictionary['eta'] = stdout[7]
  231. # Get playlist info
  232. if stdout[1] == 'Downloading' and stdout[2] == 'video':
  233. data_dictionary['playlist_index'] = stdout[3]
  234. data_dictionary['playlist_size'] = stdout[5]
  235. # Get file already downloaded status
  236. if stdout[-1] == 'downloaded':
  237. data_dictionary['status'] = 'Already Downloaded'
  238. # Get filesize abort status
  239. if stdout[-1] == 'Aborting.':
  240. data_dictionary['status'] = 'Filesize Abort'
  241. elif stdout[0] == '[ffmpeg]':
  242. data_dictionary['status'] = 'Post Processing'
  243. else:
  244. data_dictionary['status'] = 'Pre Processing'
  245. return data_dictionary