#!/usr/bin/env python2 # -*- coding: utf-8 -*- """Python module to download videos. This module contains the actual downloaders responsible for downloading the video files. Note: downloaders.py is part of the youtubedlg package but it can be used as a stand alone module for downloading videos. """ from __future__ import unicode_literals import os import sys import locale import subprocess from time import sleep from Queue import Queue from threading import Thread class PipeReader(Thread): """Helper class to avoid deadlocks when reading from subprocess pipes. This class uses python threads and queues in order to read from subprocess pipes in an asynchronous way. Attributes: WAIT_TIME (float): Time in seconds to sleep. Args: queue (Queue.Queue): Python queue to store the output of the subprocess. """ WAIT_TIME = 0.1 def __init__(self, queue): super(PipeReader, self).__init__() self._filedescriptor = None self._running = True self._queue = queue self.start() def run(self): while self._running: if self._filedescriptor is not None: for line in iter(self._filedescriptor.readline, ''): self._queue.put_nowait(line.rstrip()) self._filedescriptor = None sleep(self.WAIT_TIME) def attach_filedescriptor(self, filedesc): """Attach a filedescriptor to the PipeReader. """ self._filedescriptor = filedesc def join(self, timeout=None): self._running = False super(PipeReader, self).join(timeout) class YoutubeDLDownloader(object): """Python class for downloading videos using youtube-dl & subprocess. Attributes: OK, ERROR, STOPPED, ALREADY, FILESIZE_ABORT, WARNING (int): 'Random' integers that describe the return code from the download() method. Args: youtubedl_path (string): Absolute path to youtube-dl binary. data_hook (function): Optional callback function to retrieve download process data. log_data (function): Optional callback function to write data to the log file. Note: For available data keys check self._data under __init__(). Warnings: The caller is responsible for calling the close() method after he has finished with the object in order for the object to be able to properly close down itself. Example: How to use YoutubeDLDownloader from a python script. from downloaders import YoutubeDLDownloader def data_hook(data): print data downloader = YoutubeDLDownloader('/usr/bin/youtube-dl', data_hook) downloader.download(, ['-f', 'flv']) """ OK = 0 ERROR = 1 STOPPED = 2 ALREADY = 3 FILESIZE_ABORT = 4 WARNING = 5 def __init__(self, youtubedl_path, data_hook=None, log_data=None): self.youtubedl_path = youtubedl_path self.data_hook = data_hook self.log_data = log_data self._return_code = 0 self._proc = None self._data = { 'playlist_index': None, 'playlist_size': None, 'filesize': None, 'filename': None, 'percent': None, 'status': None, 'speed': None, 'eta': None } self._stderr_queue = Queue() self._stderr_reader = PipeReader(self._stderr_queue) def download(self, url, options): """Download url using given options. Args: url (string): URL string to download. options (list): Python list that contains youtube-dl options. Returns: An integer that shows the status of the download process. Right now we support 6 different return codes. OK (0): The download process completed successfully. ERROR (1): An error occured during the download process. STOPPED (2): The download process was stopped from the user. ALREADY (3): The given url is already downloaded. FILESIZE_ABORT (4): The corresponding url video file was larger or smaller from the given options filesize limit. WARNING (5): A warning occured during the download process. """ self._reset() cmd = self._get_cmd(url, options) self._create_process(cmd) self._stderr_reader.attach_filedescriptor(self._proc.stderr) while self._proc_is_alive(): stdout = self._proc.stdout.readline().rstrip().decode(self._get_encoding(), 'ignore') if stdout: self._sync_data(extract_data(stdout)) self._hook_data() # Read stderr after download process has been completed # We don't need to read stderr in real time while not self._stderr_queue.empty(): stderr = self._stderr_queue.get_nowait().decode(self._get_encoding(), 'ignore') self._log(stderr) if self._return_code != self.STOPPED: if self._is_warning(stderr): self._return_code = self.WARNING else: self._return_code = self.ERROR self._last_data_hook() return self._return_code def stop(self): """Stop the download process and set return code to STOPPED. """ if self._proc_is_alive(): self._proc.kill() self._return_code = self.STOPPED def close(self): """Destructor like function for the object. """ self._stderr_reader.join() def _is_warning(self, stderr): return stderr.split(':')[0] == 'WARNING' def _last_data_hook(self): """Set the last data information based on the return code. """ if self._return_code == self.OK: self._data['status'] = 'Finished' elif self._return_code == self.ERROR: self._data['status'] = 'Error' self._data['speed'] = '' self._data['eta'] = '' elif self._return_code == self.WARNING: self._data['status'] = 'Warning' self._data['speed'] = '' self._data['eta'] = '' elif self._return_code == self.STOPPED: self._data['status'] = 'Stopped' self._data['speed'] = '' self._data['eta'] = '' elif self._return_code == self.ALREADY: self._data['status'] = 'Already Downloaded' else: self._data['status'] = 'Filesize Abort' self._hook_data() def _reset(self): """Reset the data. """ self._return_code = 0 self._data = { 'playlist_index': None, 'playlist_size': None, 'filesize': None, 'filename': None, 'percent': None, 'status': None, 'speed': None, 'eta': None } def _sync_data(self, data): """Synchronise self._data with data. It also filters some keys. Args: data (dictionary): Python dictionary that contains different keys. The keys are not standar the dictionary can also be empty when there are no data to extract. See extract_data(). """ for key in data: if key == 'filename': # Keep only the filename on data['filename'] data['filename'] = os.path.basename(data['filename']) if key == 'status': if data['status'] == 'Already Downloaded': # Set self._return_code to already downloaded # and trash that key self._return_code = self.ALREADY data['status'] = None if data['status'] == 'Filesize Abort': # Set self._return_code to filesize abort # and trash that key self._return_code = self.FILESIZE_ABORT data['status'] = None self._data[key] = data[key] def _log(self, data): """Log data using the callback function. """ if self.log_data is not None: self.log_data(data) def _hook_data(self): """Pass self._data back to the data_hook. """ if self.data_hook is not None: self.data_hook(self._data) def _proc_is_alive(self): """Returns True if self._proc is alive else False. """ if self._proc is None: return False return self._proc.poll() is None def _get_cmd(self, url, options): """Build the subprocess command. Args: url (string): URL string to download. options (list): Python list that contains youtube-dl options. Returns: Python list that contains the command to execute. """ if os.name == 'nt': cmd = [self.youtubedl_path] + options + [url] else: cmd = ['python', self.youtubedl_path] + options + [url] return cmd def _get_encoding(self): """Return system encoding. """ try: encoding = locale.getpreferredencoding() 'TEST'.encode(encoding) except: encoding = 'UTF-8' return encoding def _create_process(self, cmd): """Create new subprocess. Args: cmd (list): Python list that contains the command to execute. """ encoding = info = None # Hide subprocess window on Windows if os.name == 'nt': info = subprocess.STARTUPINFO() info.dwFlags |= subprocess.STARTF_USESHOWWINDOW # Encode command for subprocess # Refer to http://stackoverflow.com/a/9951851/35070 if sys.version_info < (3, 0): encoding = self._get_encoding() if encoding is not None: cmd = [item.encode(encoding, 'ignore') for item in cmd] self._proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, startupinfo=info) def extract_data(stdout): """Extract data from youtube-dl stdout. Args: stdout (string): String that contains the youtube-dl stdout. Returns: Python dictionary. For available keys check self._data under YoutubeDLDownloader.__init__(). """ data_dictionary = dict() if not stdout: return data_dictionary stdout = [string for string in stdout.split(' ') if string != ''] stdout[0] = stdout[0].lstrip('\r') if stdout[0] == '[download]': data_dictionary['status'] = 'Downloading' # Get filename if stdout[1] == 'Destination:': data_dictionary['filename'] = ' '.join(stdout[2:]) # Get progress info if '%' in stdout[1]: if stdout[1] == '100%': data_dictionary['speed'] = '' data_dictionary['eta'] = '' else: data_dictionary['percent'] = stdout[1] data_dictionary['filesize'] = stdout[3] data_dictionary['speed'] = stdout[5] data_dictionary['eta'] = stdout[7] # Get playlist info if stdout[1] == 'Downloading' and stdout[2] == 'video': data_dictionary['playlist_index'] = stdout[3] data_dictionary['playlist_size'] = stdout[5] # Get file already downloaded status if stdout[-1] == 'downloaded': data_dictionary['status'] = 'Already Downloaded' # Get filesize abort status if stdout[-1] == 'Aborting.': data_dictionary['status'] = 'Filesize Abort' elif stdout[0] == '[ffmpeg]': data_dictionary['status'] = 'Post Processing' else: data_dictionary['status'] = 'Pre Processing' return data_dictionary