youtube-dl-gui/youtube_dl_gui/downloaders.py

#!/usr/bin/env python2

"""Python module to download videos. 

This module contains the actual downloaders responsible 
for downloading the video files. It's more like a driver module 
that connects the youtubedlg with different 3rd party (or not) downloaders.

Note:
    downloaders.py is part of the youtubedlg package but it can be used
    as a stand alone driver module for downloading videos.

"""

import os
import sys
import locale
import subprocess


class YoutubeDLDownloader(object):

    """Python class for downloading videos using youtube-dl & subprocess.
    
    Attributes:
        OK, ERROR, STOPPED, ALREADY, FILESIZE_ABORT (int): 'Random' integers 
        that describe the return code from the download() method.
        
    Args:
        youtubedl_path (string): Absolute path to youtube-dl binary.
        
        data_hook (function): Optional callback function to retrieve download 
            process data. 
            
        log_manager (logmanager.LogManager): Object responsible for writing
            errors to the log.
        
    Note:
        For available data keys check self._data under __init__()
        
    Example:
        How to use YoutubeDLDownloader from a python script.
        
            from downloaders import YoutubeDLDownloader
            
            def data_hook(data):
                print data
            
            downloader = YoutubeDLDownloader('/usr/bin/youtube-dl', data_hook)
            
            downloader.download(<URL STRING>, ['-f', 'flv'])
        
    """
    
    OK = 0
    ERROR = 1
    STOPPED = 2
    ALREADY = 3
    FILESIZE_ABORT = 4

    def __init__(self, youtubedl_path, data_hook=None, log_manager=None):
        self.youtubedl_path = youtubedl_path
        self.log_manager = log_manager
        self.data_hook = data_hook

        self._return_code = 0
        self._proc = None
        self._data = {
            'playlist_index': None,
            'playlist_size': None,
            'filesize': None,
            'filename': None,
            'percent': None,
            'status': None,
            'speed': None,
            'eta': None
        }

    def download(self, url, options):
        """Download url using given options.

        Args:
            url (string): URL string to download.
            options (list): Python list that contains youtube-dl options.
            
        Returns:
            An integer that shows the status of the download process.
            Right now we support 5 different return codes.
            
            OK (0): The download process completed successfully.
            ERROR (1): An error occured during the download process.
            STOPPED (2): The download process was stopped from the user.
            ALREADY (3): The given url is already downloaded.
            FILESIZE_ABORT (4): The corresponding url video file was larger or
                smaller from the given options filesize limit.
        
        """
        self._reset()

        cmd = self._get_cmd(url, options)
        self._create_process(cmd)

        while self._proc_is_alive():
            stdout, stderr = self._read()

            if stderr:
                self._return_code = self.ERROR
                self._log(stderr)
            
            if stdout:
                self._sync_data(extract_data(stdout))
                self._hook_data()

        self._last_data_hook()
            
        return self._return_code

    def stop(self):
        """Stop the download process and set return code to STOPPED. """
        if self._proc_is_alive():
            self._proc.kill()
            self._return_code = self.STOPPED

    def _last_data_hook(self):
        """Set the last data information based on the return code. """
        if self._return_code == self.OK:
            self._data['status'] = 'Finished'
        elif self._return_code == self.ERROR:
            self._data['status'] = 'Error'
            self._data['speed'] = ''
            self._data['eta'] = ''
        elif self._return_code == self.STOPPED:
            self._data['status'] = 'Stopped'
            self._data['speed'] = ''
            self._data['eta'] = ''
        elif self._return_code == self.ALREADY:
            self._data['status'] = 'Already Downloaded'
        else:
            self._data['status'] = 'Filesize Abort'
            
        self._hook_data()
            
    def _reset(self):
        """Reset the data. """
        self._return_code = 0
        self._data = {
            'playlist_index': None,
            'playlist_size': None,
            'filesize': None,
            'filename': None,
            'percent': None,
            'status': None,
            'speed': None,
            'eta': None
        }
            
    def _sync_data(self, data):
        """ Synchronise self._data with data. It also filters some keys.
        
        Args:
            data (dictionary): Python dictionary that contains different
                keys. The keys are not standar the dictionary can also be
                empty when there are no data to extract. See extract_data().
        
        """
        for key in data:
            if key == 'filename':
                # Keep only the filename on data['filename']
                data['filename'] = os.path.basename(data['filename'])

            if key == 'status':
                if data['status'] == 'Already Downloaded':
                    # Set self._return_code to already downloaded
                    # and trash that key (GUI won't read it if it's None)
                    self._return_code = self.ALREADY
                    data['status'] = None
                    
                if data['status'] == 'Filesize Abort':
                    # Set self._return_code to filesize abort
                    # and trash that key (GUI won't read it if it's None)
                    self._return_code = self.FILESIZE_ABORT
                    data['status'] = None

            self._data[key] = data[key]

    def _log(self, data):
        """Log data using log_manager.
        
        Args:
            data (string): String to write in the log file.
        
        """
        if self.log_manager is not None:
            self.log_manager.log(data)

    def _hook_data(self):
        """Pass self._data back to data_hook. """
        if self.data_hook is not None:
            self.data_hook(self._data)

    def _proc_is_alive(self):
        """Return True if self._proc is alive. Else False. """
        if self._proc is None:
            return False

        return self._proc.poll() is None

    def _read(self):
        """Read subprocess stdout, stderr.
        
        Returns:
            Python tuple that contains the STDOUT string and 
            the STDERR string.
        
        """
        stdout = stderr = ''

        stdout = self._read_stream(self._proc.stdout)

        if not stdout:
            stderr = self._read_stream(self._proc.stderr)

        return stdout, stderr

    def _read_stream(self, stream):
        """Read subprocess stream.
        
        Args:
            stream (subprocess.PIPE): Subprocess pipe. Can be either STDOUT
                or STDERR.
        
        Returns:
            String that contains the stream (STDOUT or STDERR) string.
        
        """
        if self._proc is None:
            return ''

        return stream.readline().rstrip()

    def _get_cmd(self, url, options):
        """Build the subprocess command.
        
        Args:
            url (string): URL string to download.
            options (list): Python list that contains youtube-dl options.
        
        Returns:
            Python list that contains the command to execute.
        
        """
        if os.name == 'nt':
            cmd = [self.youtubedl_path] + options + [url]
        else:
            cmd = ['python', self.youtubedl_path] + options + [url]

        return cmd

    def _create_process(self, cmd):
        """Create new subprocess.
        
        Args:
            cmd (list): Python list that contains the command to execute.
        
        """
        encoding = info = None

        # Hide subprocess window on Windows
        if os.name == 'nt':
            info = subprocess.STARTUPINFO()
            info.dwFlags |= subprocess.STARTF_USESHOWWINDOW

        # Encode command for subprocess
        # Refer to http://stackoverflow.com/a/9951851/35070
        if sys.version_info < (3, 0) and sys.platform == 'win32':
            try:
                encoding = locale.getpreferredencoding()
                u'TEST'.encode(encoding)
            except:
                encoding = 'UTF-8'

        if encoding is not None:
            cmd = [item.encode(encoding, 'ignore') for item in cmd]

        self._proc = subprocess.Popen(cmd,
                                      stdout=subprocess.PIPE,
                                      stderr=subprocess.PIPE,
                                      startupinfo=info)


def extract_data(stdout):
    """Extract data from youtube-dl stdout.
    
    Args:
        stdout (string): String that contains the youtube-dl stdout.
        
    Returns:
        Python dictionary. For available keys check self._data under
        YoutubeDLDownloader.__init__().
    
    """
    data_dictionary = dict()
        
    if not stdout:
        return data_dictionary
        
    stdout = [string for string in stdout.split(' ') if string != '']

    stdout[0] = stdout[0].lstrip('\r')
    
    if stdout[0] == '[download]':
        data_dictionary['status'] = 'Downloading'

        # Get filename
        if stdout[1] == 'Destination:':
            data_dictionary['filename'] = ' '.join(stdout[1:])

        # Get progress info
        if '%' in stdout[1]:
            if stdout[1] == '100%':
                data_dictionary['speed'] = ''
                data_dictionary['eta'] = ''
            else:
                data_dictionary['percent'] = stdout[1]
                data_dictionary['filesize'] = stdout[3]
                data_dictionary['speed'] = stdout[5]
                data_dictionary['eta'] = stdout[7]

        # Get playlist info
        if stdout[1] == 'Downloading' and stdout[2] == 'video':
            data_dictionary['playlist_index'] = stdout[3]
            data_dictionary['playlist_size'] = stdout[5]

        # Get file already downloaded status
        if stdout[-1] == 'downloaded':
            data_dictionary['status'] = 'Already Downloaded'
            
        # Get filesize abort status
        if stdout[-1] == 'Aborting.':
            data_dictionary['status'] = 'Filesize Abort'

    elif stdout[0] == '[ffmpeg]':
        data_dictionary['status'] = 'Post Processing'

    else:
        data_dictionary['status'] = 'Pre Processing'

    return data_dictionary