You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

352 lines
10 KiB

10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
9 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
  1. #!/usr/bin/env python2
  2. # -*- coding: utf-8 -*-
  3. """Python module to download videos.
  4. This module contains the actual downloaders responsible
  5. for downloading the video files.
  6. Note:
  7. downloaders.py is part of the youtubedlg package but it can be used
  8. as a stand alone module for downloading videos.
  9. """
  10. from __future__ import unicode_literals
  11. import os
  12. import sys
  13. import locale
  14. import subprocess
  15. class YoutubeDLDownloader(object):
  16. """Python class for downloading videos using youtube-dl & subprocess.
  17. Attributes:
  18. OK, ERROR, STOPPED, ALREADY, FILESIZE_ABORT, WARNING (int): 'Random'
  19. integers that describe the return code from the download() method.
  20. Args:
  21. youtubedl_path (string): Absolute path to youtube-dl binary.
  22. data_hook (function): Optional callback function to retrieve download
  23. process data.
  24. log_data (function): Optional callback function to write data to
  25. the log file.
  26. Note:
  27. For available data keys check self._data under __init__().
  28. Example:
  29. How to use YoutubeDLDownloader from a python script.
  30. from downloaders import YoutubeDLDownloader
  31. def data_hook(data):
  32. print data
  33. downloader = YoutubeDLDownloader('/usr/bin/youtube-dl', data_hook)
  34. downloader.download(<URL STRING>, ['-f', 'flv'])
  35. """
  36. OK = 0
  37. ERROR = 1
  38. STOPPED = 2
  39. ALREADY = 3
  40. FILESIZE_ABORT = 4
  41. WARNING = 5
  42. def __init__(self, youtubedl_path, data_hook=None, log_data=None):
  43. self.youtubedl_path = youtubedl_path
  44. self.data_hook = data_hook
  45. self.log_data = log_data
  46. self._return_code = 0
  47. self._proc = None
  48. self._data = {
  49. 'playlist_index': None,
  50. 'playlist_size': None,
  51. 'filesize': None,
  52. 'filename': None,
  53. 'percent': None,
  54. 'status': None,
  55. 'speed': None,
  56. 'eta': None
  57. }
  58. def download(self, url, options):
  59. """Download url using given options.
  60. Args:
  61. url (string): URL string to download.
  62. options (list): Python list that contains youtube-dl options.
  63. Returns:
  64. An integer that shows the status of the download process.
  65. Right now we support 5 different return codes.
  66. OK (0): The download process completed successfully.
  67. ERROR (1): An error occured during the download process.
  68. STOPPED (2): The download process was stopped from the user.
  69. ALREADY (3): The given url is already downloaded.
  70. FILESIZE_ABORT (4): The corresponding url video file was larger or
  71. smaller from the given options filesize limit.
  72. WARNING (5): A warning occured during the download process.
  73. """
  74. self._reset()
  75. cmd = self._get_cmd(url, options)
  76. self._create_process(cmd)
  77. while self._proc_is_alive():
  78. stdout, stderr = self._read()
  79. if stderr:
  80. if self._is_warning(stderr):
  81. self._return_code = self.WARNING
  82. else:
  83. self._return_code = self.ERROR
  84. self._log(stderr)
  85. if stdout:
  86. self._sync_data(extract_data(stdout))
  87. self._hook_data()
  88. self._last_data_hook()
  89. return self._return_code
  90. def stop(self):
  91. """Stop the download process and set return code to STOPPED. """
  92. if self._proc_is_alive():
  93. self._proc.kill()
  94. self._return_code = self.STOPPED
  95. def _is_warning(self, stderr):
  96. return stderr.split(':')[0] == 'WARNING'
  97. def _last_data_hook(self):
  98. """Set the last data information based on the return code. """
  99. if self._return_code == self.OK:
  100. self._data['status'] = 'Finished'
  101. elif self._return_code == self.ERROR:
  102. self._data['status'] = 'Error'
  103. self._data['speed'] = ''
  104. self._data['eta'] = ''
  105. elif self._return_code == self.WARNING:
  106. self._data['status'] = 'Warning'
  107. self._data['speed'] = ''
  108. self._data['eta'] = ''
  109. elif self._return_code == self.STOPPED:
  110. self._data['status'] = 'Stopped'
  111. self._data['speed'] = ''
  112. self._data['eta'] = ''
  113. elif self._return_code == self.ALREADY:
  114. self._data['status'] = 'Already Downloaded'
  115. else:
  116. self._data['status'] = 'Filesize Abort'
  117. self._hook_data()
  118. def _reset(self):
  119. """Reset the data. """
  120. self._return_code = 0
  121. self._data = {
  122. 'playlist_index': None,
  123. 'playlist_size': None,
  124. 'filesize': None,
  125. 'filename': None,
  126. 'percent': None,
  127. 'status': None,
  128. 'speed': None,
  129. 'eta': None
  130. }
  131. def _sync_data(self, data):
  132. """Synchronise self._data with data. It also filters some keys.
  133. Args:
  134. data (dictionary): Python dictionary that contains different
  135. keys. The keys are not standar the dictionary can also be
  136. empty when there are no data to extract. See extract_data().
  137. """
  138. for key in data:
  139. if key == 'filename':
  140. # Keep only the filename on data['filename']
  141. data['filename'] = os.path.basename(data['filename'])
  142. if key == 'status':
  143. if data['status'] == 'Already Downloaded':
  144. # Set self._return_code to already downloaded
  145. # and trash that key
  146. self._return_code = self.ALREADY
  147. data['status'] = None
  148. if data['status'] == 'Filesize Abort':
  149. # Set self._return_code to filesize abort
  150. # and trash that key
  151. self._return_code = self.FILESIZE_ABORT
  152. data['status'] = None
  153. self._data[key] = data[key]
  154. def _log(self, data):
  155. """Log data using the callback function. """
  156. if self.log_data is not None:
  157. self.log_data(data)
  158. def _hook_data(self):
  159. """Pass self._data back to the data_hook. """
  160. if self.data_hook is not None:
  161. self.data_hook(self._data)
  162. def _proc_is_alive(self):
  163. """Returns True if self._proc is alive else False. """
  164. if self._proc is None:
  165. return False
  166. return self._proc.poll() is None
  167. def _read(self):
  168. """Read subprocess stdout, stderr.
  169. Returns:
  170. Python tuple that contains the STDOUT and STDERR
  171. strings.
  172. """
  173. stdout = stderr = ''
  174. if self._proc is not None:
  175. stdout = self._proc.stdout.readline().rstrip()
  176. if not stdout:
  177. stderr = self._proc.stderr.readline().rstrip()
  178. encoding = self._get_encoding()
  179. return stdout.decode(encoding, 'ignore'), stderr.decode(encoding, 'ignore')
  180. def _get_cmd(self, url, options):
  181. """Build the subprocess command.
  182. Args:
  183. url (string): URL string to download.
  184. options (list): Python list that contains youtube-dl options.
  185. Returns:
  186. Python list that contains the command to execute.
  187. """
  188. if os.name == 'nt':
  189. cmd = [self.youtubedl_path] + options + [url]
  190. else:
  191. cmd = ['python', self.youtubedl_path] + options + [url]
  192. return cmd
  193. def _get_encoding(self):
  194. """Return system encoding. """
  195. try:
  196. encoding = locale.getpreferredencoding()
  197. 'TEST'.encode(encoding)
  198. except:
  199. encoding = 'UTF-8'
  200. return encoding
  201. def _create_process(self, cmd):
  202. """Create new subprocess.
  203. Args:
  204. cmd (list): Python list that contains the command to execute.
  205. """
  206. encoding = info = None
  207. # Hide subprocess window on Windows
  208. if os.name == 'nt':
  209. info = subprocess.STARTUPINFO()
  210. info.dwFlags |= subprocess.STARTF_USESHOWWINDOW
  211. # Encode command for subprocess
  212. # Refer to http://stackoverflow.com/a/9951851/35070
  213. if sys.version_info < (3, 0):
  214. encoding = self._get_encoding()
  215. if encoding is not None:
  216. cmd = [item.encode(encoding, 'ignore') for item in cmd]
  217. self._proc = subprocess.Popen(cmd,
  218. stdout=subprocess.PIPE,
  219. stderr=subprocess.PIPE,
  220. startupinfo=info)
  221. def extract_data(stdout):
  222. """Extract data from youtube-dl stdout.
  223. Args:
  224. stdout (string): String that contains the youtube-dl stdout.
  225. Returns:
  226. Python dictionary. For available keys check self._data under
  227. YoutubeDLDownloader.__init__().
  228. """
  229. data_dictionary = dict()
  230. if not stdout:
  231. return data_dictionary
  232. stdout = [string for string in stdout.split(' ') if string != '']
  233. stdout[0] = stdout[0].lstrip('\r')
  234. if stdout[0] == '[download]':
  235. data_dictionary['status'] = 'Downloading'
  236. # Get filename
  237. if stdout[1] == 'Destination:':
  238. data_dictionary['filename'] = ' '.join(stdout[2:])
  239. # Get progress info
  240. if '%' in stdout[1]:
  241. if stdout[1] == '100%':
  242. data_dictionary['speed'] = ''
  243. data_dictionary['eta'] = ''
  244. else:
  245. data_dictionary['percent'] = stdout[1]
  246. data_dictionary['filesize'] = stdout[3]
  247. data_dictionary['speed'] = stdout[5]
  248. data_dictionary['eta'] = stdout[7]
  249. # Get playlist info
  250. if stdout[1] == 'Downloading' and stdout[2] == 'video':
  251. data_dictionary['playlist_index'] = stdout[3]
  252. data_dictionary['playlist_size'] = stdout[5]
  253. # Get file already downloaded status
  254. if stdout[-1] == 'downloaded':
  255. data_dictionary['status'] = 'Already Downloaded'
  256. # Get filesize abort status
  257. if stdout[-1] == 'Aborting.':
  258. data_dictionary['status'] = 'Filesize Abort'
  259. elif stdout[0] == '[ffmpeg]':
  260. data_dictionary['status'] = 'Post Processing'
  261. else:
  262. data_dictionary['status'] = 'Pre Processing'
  263. return data_dictionary