youtube-dl-gui/youtube_dl_gui/downloaders.py

#!/usr/bin/env python2
# -*- coding: utf-8 -*-

"""Python module to download videos.

This module contains the actual downloaders responsible
for downloading the video files.

Note:
    downloaders.py is part of the youtubedlg package but it can be used
    as a stand alone module for downloading videos.

"""

from __future__ import unicode_literals

import os
import sys
import locale
import subprocess

from time import sleep
from Queue import Queue
from threading import Thread


class PipeReader(Thread):
    """Helper class to avoid deadlocks when reading from subprocess pipes.

    This class uses python threads and queues in order to read from subprocess
    pipes in an asynchronous way.

    Attributes:
        WAIT_TIME (float): Time in seconds to sleep.

    Args:
        queue (Queue.Queue): Python queue to store the output of the subprocess.

    """

    WAIT_TIME = 0.1

    def __init__(self, queue):
        super(PipeReader, self).__init__()
        self._filedescriptor = None
        self._running = True
        self._queue = queue

        self.start()

    def run(self):
        while self._running:
            if self._filedescriptor is not None:
                for line in iter(self._filedescriptor.readline, ''):
                    self._queue.put_nowait(line.rstrip())

                self._filedescriptor = None

            sleep(self.WAIT_TIME)

    def attach_filedescriptor(self, filedesc):
        """Attach a filedescriptor to the PipeReader. """
        self._filedescriptor = filedesc

    def join(self, timeout=None):
        self._running = False
        super(PipeReader, self).join(timeout)


class YoutubeDLDownloader(object):

    """Python class for downloading videos using youtube-dl & subprocess.

    Attributes:
        OK, ERROR, STOPPED, ALREADY, FILESIZE_ABORT, WARNING (int): 'Random'
        integers that describe the return code from the download() method.

    Args:
        youtubedl_path (string): Absolute path to youtube-dl binary.

        data_hook (function): Optional callback function to retrieve download
            process data.

        log_data (function): Optional callback function to write data to
            the log file.

    Note:
        For available data keys check self._data under __init__().

    Warnings:
        The caller is responsible for calling the close() method after he has
        finished with the object in order for the object to be able to properly
        close down itself.

    Example:
        How to use YoutubeDLDownloader from a python script.

            from downloaders import YoutubeDLDownloader

            def data_hook(data):
                print data

            downloader = YoutubeDLDownloader('/usr/bin/youtube-dl', data_hook)

            downloader.download(<URL STRING>, ['-f', 'flv'])

    """

    OK = 0
    ERROR = 1
    STOPPED = 2
    ALREADY = 3
    FILESIZE_ABORT = 4
    WARNING = 5

    def __init__(self, youtubedl_path, data_hook=None, log_data=None):
        self.youtubedl_path = youtubedl_path
        self.data_hook = data_hook
        self.log_data = log_data

        self._return_code = 0
        self._proc = None
        self._data = {
            'playlist_index': None,
            'playlist_size': None,
            'filesize': None,
            'filename': None,
            'percent': None,
            'status': None,
            'speed': None,
            'eta': None
        }

        self._stderr_queue = Queue()
        self._stderr_reader = PipeReader(self._stderr_queue)

    def download(self, url, options):
        """Download url using given options.

        Args:
            url (string): URL string to download.
            options (list): Python list that contains youtube-dl options.

        Returns:
            An integer that shows the status of the download process.
            Right now we support 6 different return codes.

            OK (0): The download process completed successfully.
            ERROR (1): An error occured during the download process.
            STOPPED (2): The download process was stopped from the user.
            ALREADY (3): The given url is already downloaded.
            FILESIZE_ABORT (4): The corresponding url video file was larger or
                smaller from the given options filesize limit.
            WARNING (5): A warning occured during the download process.

        """
        self._reset()

        cmd = self._get_cmd(url, options)
        self._create_process(cmd)

        self._stderr_reader.attach_filedescriptor(self._proc.stderr)

        while self._proc_is_alive():
            stdout = self._proc.stdout.readline().rstrip().decode(self._get_encoding(), 'ignore')

            if stdout:
                self._sync_data(extract_data(stdout))
                self._hook_data()

        # Read stderr after download process has been completed
        # We don't need to read stderr in real time
        while not self._stderr_queue.empty():
            stderr = self._stderr_queue.get_nowait().decode(self._get_encoding(), 'ignore')

            self._log(stderr)

            if self._return_code != self.STOPPED:
                if self._is_warning(stderr):
                    self._return_code = self.WARNING
                else:
                    self._return_code = self.ERROR

        self._last_data_hook()

        return self._return_code

    def stop(self):
        """Stop the download process and set return code to STOPPED. """
        if self._proc_is_alive():
            self._proc.kill()
            self._return_code = self.STOPPED

    def close(self):
        """Destructor like function for the object. """
        self._stderr_reader.join()

    def _is_warning(self, stderr):
        return stderr.split(':')[0] == 'WARNING'

    def _last_data_hook(self):
        """Set the last data information based on the return code. """
        if self._return_code == self.OK:
            self._data['status'] = 'Finished'
        elif self._return_code == self.ERROR:
            self._data['status'] = 'Error'
            self._data['speed'] = ''
            self._data['eta'] = ''
        elif self._return_code == self.WARNING:
            self._data['status'] = 'Warning'
            self._data['speed'] = ''
            self._data['eta'] = ''
        elif self._return_code == self.STOPPED:
            self._data['status'] = 'Stopped'
            self._data['speed'] = ''
            self._data['eta'] = ''
        elif self._return_code == self.ALREADY:
            self._data['status'] = 'Already Downloaded'
        else:
            self._data['status'] = 'Filesize Abort'

        self._hook_data()

    def _reset(self):
        """Reset the data. """
        self._return_code = 0
        self._data = {
            'playlist_index': None,
            'playlist_size': None,
            'filesize': None,
            'filename': None,
            'percent': None,
            'status': None,
            'speed': None,
            'eta': None
        }

    def _sync_data(self, data):
        """Synchronise self._data with data. It also filters some keys.

        Args:
            data (dictionary): Python dictionary that contains different
                keys. The keys are not standar the dictionary can also be
                empty when there are no data to extract. See extract_data().

        """
        for key in data:
            if key == 'filename':
                # Keep only the filename on data['filename']
                data['filename'] = os.path.basename(data['filename'])

            if key == 'status':
                if data['status'] == 'Already Downloaded':
                    # Set self._return_code to already downloaded
                    # and trash that key
                    self._return_code = self.ALREADY
                    data['status'] = None

                if data['status'] == 'Filesize Abort':
                    # Set self._return_code to filesize abort
                    # and trash that key
                    self._return_code = self.FILESIZE_ABORT
                    data['status'] = None

            self._data[key] = data[key]

    def _log(self, data):
        """Log data using the callback function. """
        if self.log_data is not None:
            self.log_data(data)

    def _hook_data(self):
        """Pass self._data back to the data_hook. """
        if self.data_hook is not None:
            self.data_hook(self._data)

    def _proc_is_alive(self):
        """Returns True if self._proc is alive else False. """
        if self._proc is None:
            return False

        return self._proc.poll() is None

    def _get_cmd(self, url, options):
        """Build the subprocess command.

        Args:
            url (string): URL string to download.
            options (list): Python list that contains youtube-dl options.

        Returns:
            Python list that contains the command to execute.

        """
        if os.name == 'nt':
            cmd = [self.youtubedl_path] + options + [url]
        else:
            cmd = ['python', self.youtubedl_path] + options + [url]

        return cmd

    def _get_encoding(self):
        """Return system encoding. """
        try:
            encoding = locale.getpreferredencoding()
            'TEST'.encode(encoding)
        except:
            encoding = 'UTF-8'

        return encoding

    def _create_process(self, cmd):
        """Create new subprocess.

        Args:
            cmd (list): Python list that contains the command to execute.

        """
        encoding = info = None

        # Hide subprocess window on Windows
        if os.name == 'nt':
            info = subprocess.STARTUPINFO()
            info.dwFlags |= subprocess.STARTF_USESHOWWINDOW

        # Encode command for subprocess
        # Refer to http://stackoverflow.com/a/9951851/35070
        if sys.version_info < (3, 0):
            encoding = self._get_encoding()

        if encoding is not None:
            cmd = [item.encode(encoding, 'ignore') for item in cmd]

        self._proc = subprocess.Popen(cmd,
                                      stdout=subprocess.PIPE,
                                      stderr=subprocess.PIPE,
                                      startupinfo=info)


def extract_data(stdout):
    """Extract data from youtube-dl stdout.

    Args:
        stdout (string): String that contains the youtube-dl stdout.

    Returns:
        Python dictionary. For available keys check self._data under
        YoutubeDLDownloader.__init__().

    """
    data_dictionary = dict()

    if not stdout:
        return data_dictionary

    stdout = [string for string in stdout.split(' ') if string != '']

    stdout[0] = stdout[0].lstrip('\r')

    if stdout[0] == '[download]':
        data_dictionary['status'] = 'Downloading'

        # Get filename
        if stdout[1] == 'Destination:':
            data_dictionary['filename'] = ' '.join(stdout[2:])

        # Get progress info
        if '%' in stdout[1]:
            if stdout[1] == '100%':
                data_dictionary['speed'] = ''
                data_dictionary['eta'] = ''
            else:
                data_dictionary['percent'] = stdout[1]
                data_dictionary['filesize'] = stdout[3]
                data_dictionary['speed'] = stdout[5]
                data_dictionary['eta'] = stdout[7]

        # Get playlist info
        if stdout[1] == 'Downloading' and stdout[2] == 'video':
            data_dictionary['playlist_index'] = stdout[3]
            data_dictionary['playlist_size'] = stdout[5]

        # Get file already downloaded status
        if stdout[-1] == 'downloaded':
            data_dictionary['status'] = 'Already Downloaded'

        # Get filesize abort status
        if stdout[-1] == 'Aborting.':
            data_dictionary['status'] = 'Filesize Abort'

    elif stdout[0] == '[ffmpeg]':
        data_dictionary['status'] = 'Post Processing'

    else:
        data_dictionary['status'] = 'Pre Processing'

    return data_dictionary