youtube-dl-gui/youtube_dl_gui/downloaders.py

#!/usr/bin/env python2
# -*- coding: utf-8 -*-

"""Python module to download videos.

This module contains the actual downloaders responsible
for downloading the video files.

Note:
    downloaders.py is part of the youtubedlg package but it can be used
    as a stand alone module for downloading videos.

"""

from __future__ import unicode_literals

import os
import sys
import locale
import signal
import subprocess

from time import sleep
from Queue import Queue
from threading import Thread


class PipeReader(Thread):
    """Helper class to avoid deadlocks when reading from subprocess pipes.

    This class uses python threads and queues in order to read from subprocess
    pipes in an asynchronous way.

    Attributes:
        WAIT_TIME (float): Time in seconds to sleep.

    Args:
        queue (Queue.Queue): Python queue to store the output of the subprocess.

    """

    WAIT_TIME = 0.1

    def __init__(self, queue):
        super(PipeReader, self).__init__()
        self._filedescriptor = None
        self._running = True
        self._queue = queue

        self.start()

    def run(self):
        # Flag to ignore specific lines
        ignore_line = False

        while self._running:
            if self._filedescriptor is not None:
                for line in iter(self._filedescriptor.readline, ''):
                    # Ignore ffmpeg stderr
                    if str('ffmpeg version') in line:
                        ignore_line = True

                    if not ignore_line:
                        self._queue.put_nowait(line)

                self._filedescriptor = None
                ignore_line = False

            sleep(self.WAIT_TIME)

    def attach_filedescriptor(self, filedesc):
        """Attach a filedescriptor to the PipeReader. """
        self._filedescriptor = filedesc

    def join(self, timeout=None):
        self._running = False
        super(PipeReader, self).join(timeout)


class YoutubeDLDownloader(object):

    """Python class for downloading videos using youtube-dl & subprocess.

    Attributes:
        OK, ERROR, STOPPED, ALREADY, FILESIZE_ABORT, WARNING (int): Integers
            that describe the return code from the download() method. The
            larger the number the higher is the hierarchy of the code.
            Codes with smaller hierachy cannot overwrite codes with higher
            hierarchy.

    Args:
        youtubedl_path (string): Absolute path to youtube-dl binary.

        data_hook (function): Optional callback function to retrieve download
            process data.

        log_data (function): Optional callback function to write data to
            the log file.

    Note:
        For available data keys check self._data under __init__().

    Warnings:
        The caller is responsible for calling the close() method after he has
        finished with the object in order for the object to be able to properly
        close down itself.

    Example:
        How to use YoutubeDLDownloader from a python script.

            from downloaders import YoutubeDLDownloader

            def data_hook(data):
                print data

            downloader = YoutubeDLDownloader('/usr/bin/youtube-dl', data_hook)

            downloader.download(<URL STRING>, ['-f', 'flv'])

    """

    OK = 0
    WARNING = 1
    ERROR = 2
    FILESIZE_ABORT = 3
    ALREADY = 4
    STOPPED = 5

    def __init__(self, youtubedl_path, data_hook=None, log_data=None):
        self.youtubedl_path = youtubedl_path
        self.data_hook = data_hook
        self.log_data = log_data

        self._return_code = self.OK
        self._proc = None
        self._data = {
            'playlist_index': None,
            'playlist_size': None,
            'filesize': None,
            'filename': None,
            'percent': None,
            'status': None,
            'speed': None,
            'eta': None
        }

        self._stderr_queue = Queue()
        self._stderr_reader = PipeReader(self._stderr_queue)

    def download(self, url, options):
        """Download url using given options.

        Args:
            url (string): URL string to download.
            options (list): Python list that contains youtube-dl options.

        Returns:
            An integer that shows the status of the download process.
            There are 6 different return codes.

            OK (0): The download process completed successfully.
            WARNING (1): A warning occured during the download process.
            ERROR (2): An error occured during the download process.
            FILESIZE_ABORT (3): The corresponding url video file was larger or
                smaller from the given filesize limit.
            ALREADY (4): The given url is already downloaded.
            STOPPED (5): The download process was stopped by the user.

        """
        self._reset()

        cmd = self._get_cmd(url, options)
        self._create_process(cmd)

        self._stderr_reader.attach_filedescriptor(self._proc.stderr)

        while self._proc_is_alive():
            stdout = self._proc.stdout.readline().rstrip().decode(self._get_encoding(), 'ignore')

            if stdout:
                self._sync_data(extract_data(stdout))
                self._hook_data()

        # Read stderr after download process has been completed
        # We don't need to read stderr in real time
        while not self._stderr_queue.empty():
            stderr = self._stderr_queue.get_nowait().rstrip().decode(self._get_encoding(), 'ignore')

            self._log(stderr)

            if self._is_warning(stderr):
                self._set_returncode(self.WARNING)
            else:
                self._set_returncode(self.ERROR)

        self._last_data_hook()

        return self._return_code

    def stop(self):
        """Stop the download process and set return code to STOPPED. """
        if self._proc_is_alive():

            if os.name == 'nt':
                # os.killpg is not available on Windows
                # See: https://bugs.python.org/issue5115
                self._proc.kill()
            else:
                os.killpg(self._proc.pid, signal.SIGKILL)

            self._set_returncode(self.STOPPED)

    def close(self):
        """Destructor like function for the object. """
        self._stderr_reader.join()

    def _set_returncode(self, code):
        """Set self._return_code only if the hierarchy of the given code is
        higher than the current self._return_code. """
        if code >= self._return_code:
            self._return_code = code

    def _is_warning(self, stderr):
        return stderr.split(':')[0] == 'WARNING'

    def _last_data_hook(self):
        """Set the last data information based on the return code. """
        if self._return_code == self.OK:
            self._data['status'] = 'Finished'
        elif self._return_code == self.ERROR:
            self._data['status'] = 'Error'
            self._data['speed'] = ''
            self._data['eta'] = ''
        elif self._return_code == self.WARNING:
            self._data['status'] = 'Warning'
            self._data['speed'] = ''
            self._data['eta'] = ''
        elif self._return_code == self.STOPPED:
            self._data['status'] = 'Stopped'
            self._data['speed'] = ''
            self._data['eta'] = ''
        elif self._return_code == self.ALREADY:
            self._data['status'] = 'Already Downloaded'
        else:
            self._data['status'] = 'Filesize Abort'

        self._hook_data()

    def _reset(self):
        """Reset the data. """
        self._return_code = self.OK
        self._data = {
            'playlist_index': None,
            'playlist_size': None,
            'filesize': None,
            'filename': None,
            'percent': None,
            'status': None,
            'speed': None,
            'eta': None
        }

    def _sync_data(self, data):
        """Synchronise self._data with data. It also filters some keys.

        Args:
            data (dictionary): Python dictionary that contains different
                keys. The keys are not standar the dictionary can also be
                empty when there are no data to extract. See extract_data().

        """
        for key in data:
            if key == 'filename':
                # Keep only the filename on data['filename']
                data['filename'] = os.path.basename(data['filename'])

            if key == 'status':
                if data['status'] == 'Already Downloaded':
                    # Set self._return_code to already downloaded
                    # and trash that key
                    self._set_returncode(self.ALREADY)
                    data['status'] = None

                if data['status'] == 'Filesize Abort':
                    # Set self._return_code to filesize abort
                    # and trash that key
                    self._set_returncode(self.FILESIZE_ABORT)
                    data['status'] = None

            self._data[key] = data[key]

    def _log(self, data):
        """Log data using the callback function. """
        if self.log_data is not None:
            self.log_data(data)

    def _hook_data(self):
        """Pass self._data back to the data_hook. """
        if self.data_hook is not None:
            self.data_hook(self._data)

    def _proc_is_alive(self):
        """Returns True if self._proc is alive else False. """
        if self._proc is None:
            return False

        return self._proc.poll() is None

    def _get_cmd(self, url, options):
        """Build the subprocess command.

        Args:
            url (string): URL string to download.
            options (list): Python list that contains youtube-dl options.

        Returns:
            Python list that contains the command to execute.

        """
        if os.name == 'nt':
            cmd = [self.youtubedl_path] + options + [url]
        else:
            cmd = ['python', self.youtubedl_path] + options + [url]

        return cmd

    def _get_encoding(self):
        """Return system encoding. """
        try:
            encoding = locale.getpreferredencoding()
            'TEST'.encode(encoding)
        except:
            encoding = 'UTF-8'

        return encoding

    def _create_process(self, cmd):
        """Create new subprocess.

        Args:
            cmd (list): Python list that contains the command to execute.

        """
        encoding = info = preexec = None

        if os.name == 'nt':
            # Hide subprocess window
            info = subprocess.STARTUPINFO()
            info.dwFlags |= subprocess.STARTF_USESHOWWINDOW
        else:
            # Make subprocess the process group leader
            # in order to kill the whole process group with os.killpg
            preexec = os.setsid

        # Encode command for subprocess
        # Refer to http://stackoverflow.com/a/9951851/35070
        if sys.version_info < (3, 0):
            encoding = self._get_encoding()

        if encoding is not None:
            cmd = [item.encode(encoding, 'ignore') for item in cmd]

        self._proc = subprocess.Popen(cmd,
                                      stdout=subprocess.PIPE,
                                      stderr=subprocess.PIPE,
                                      preexec_fn = preexec,
                                      startupinfo=info)


def extract_data(stdout):
    """Extract data from youtube-dl stdout.

    Args:
        stdout (string): String that contains the youtube-dl stdout.

    Returns:
        Python dictionary. For available keys check self._data under
        YoutubeDLDownloader.__init__().

    """
    data_dictionary = dict()

    if not stdout:
        return data_dictionary

    stdout = [string for string in stdout.split(' ') if string != '']

    stdout[0] = stdout[0].lstrip('\r')

    if stdout[0] == '[download]':
        data_dictionary['status'] = 'Downloading'

        # Get filename
        if stdout[1] == 'Destination:':
            data_dictionary['filename'] = ' '.join(stdout[2:])

        # Get progress info
        if '%' in stdout[1]:
            if stdout[1] == '100%':
                data_dictionary['speed'] = ''
                data_dictionary['eta'] = ''
            else:
                data_dictionary['percent'] = stdout[1]
                data_dictionary['filesize'] = stdout[3]
                data_dictionary['speed'] = stdout[5]
                data_dictionary['eta'] = stdout[7]

        # Get playlist info
        if stdout[1] == 'Downloading' and stdout[2] == 'video':
            data_dictionary['playlist_index'] = stdout[3]
            data_dictionary['playlist_size'] = stdout[5]

        # Get file already downloaded status
        if stdout[-1] == 'downloaded':
            data_dictionary['status'] = 'Already Downloaded'

        # Get filesize abort status
        if stdout[-1] == 'Aborting.':
            data_dictionary['status'] = 'Filesize Abort'

    elif stdout[0] == '[ffmpeg]':
        data_dictionary['status'] = 'Post Processing'

    else:
        data_dictionary['status'] = 'Pre Processing'

    return data_dictionary