|
|
#!/usr/bin/env python2 # -*- coding: utf-8 -*-
"""Python module to download videos.
This module contains the actual downloaders responsible for downloading the video files.
Note: downloaders.py is part of the youtubedlg package but it can be used as a stand alone module for downloading videos.
"""
from __future__ import unicode_literals
import os import sys import locale import subprocess
from time import sleep from Queue import Queue from threading import Thread
class PipeReader(Thread): """Helper class to avoid deadlocks when reading from subprocess pipes.
This class uses python threads and queues in order to read from subprocess pipes in an asynchronous way.
Attributes: WAIT_TIME (float): Time in seconds to sleep.
Args: queue (Queue.Queue): Python queue to store the output of the subprocess.
"""
WAIT_TIME = 0.1
def __init__(self, queue): super(PipeReader, self).__init__() self._filedescriptor = None self._running = True self._queue = queue
self.start()
def run(self): while self._running: if self._filedescriptor is not None: for line in iter(self._filedescriptor.readline, ''): self._queue.put_nowait(line.rstrip())
self._filedescriptor = None
sleep(self.WAIT_TIME)
def attach_filedescriptor(self, filedesc): """Attach a filedescriptor to the PipeReader. """ self._filedescriptor = filedesc
def join(self, timeout=None): self._running = False super(PipeReader, self).join(timeout)
class YoutubeDLDownloader(object):
"""Python class for downloading videos using youtube-dl & subprocess.
Attributes: OK, ERROR, STOPPED, ALREADY, FILESIZE_ABORT, WARNING (int): 'Random' integers that describe the return code from the download() method.
Args: youtubedl_path (string): Absolute path to youtube-dl binary.
data_hook (function): Optional callback function to retrieve download process data.
log_data (function): Optional callback function to write data to the log file.
Note: For available data keys check self._data under __init__().
Warnings: The caller is responsible for calling the close() method after he has finished with the object in order for the object to be able to properly close down itself.
Example: How to use YoutubeDLDownloader from a python script.
from downloaders import YoutubeDLDownloader
def data_hook(data): print data
downloader = YoutubeDLDownloader('/usr/bin/youtube-dl', data_hook)
downloader.download(<URL STRING>, ['-f', 'flv'])
"""
OK = 0 ERROR = 1 STOPPED = 2 ALREADY = 3 FILESIZE_ABORT = 4 WARNING = 5
def __init__(self, youtubedl_path, data_hook=None, log_data=None): self.youtubedl_path = youtubedl_path self.data_hook = data_hook self.log_data = log_data
self._return_code = 0 self._proc = None self._data = { 'playlist_index': None, 'playlist_size': None, 'filesize': None, 'filename': None, 'percent': None, 'status': None, 'speed': None, 'eta': None }
self._stderr_queue = Queue() self._stderr_reader = PipeReader(self._stderr_queue)
def download(self, url, options): """Download url using given options.
Args: url (string): URL string to download. options (list): Python list that contains youtube-dl options.
Returns: An integer that shows the status of the download process. Right now we support 6 different return codes.
OK (0): The download process completed successfully. ERROR (1): An error occured during the download process. STOPPED (2): The download process was stopped from the user. ALREADY (3): The given url is already downloaded. FILESIZE_ABORT (4): The corresponding url video file was larger or smaller from the given options filesize limit. WARNING (5): A warning occured during the download process.
"""
self._reset()
cmd = self._get_cmd(url, options) self._create_process(cmd)
self._stderr_reader.attach_filedescriptor(self._proc.stderr)
while self._proc_is_alive(): stdout = self._proc.stdout.readline().rstrip().decode(self._get_encoding(), 'ignore')
if stdout: self._sync_data(extract_data(stdout)) self._hook_data()
# Read stderr after download process has been completed # We don't need to read stderr in real time while not self._stderr_queue.empty(): stderr = self._stderr_queue.get_nowait().decode(self._get_encoding(), 'ignore')
self._log(stderr)
if self._return_code != self.STOPPED: if self._is_warning(stderr): self._return_code = self.WARNING else: self._return_code = self.ERROR
self._last_data_hook()
return self._return_code
def stop(self): """Stop the download process and set return code to STOPPED. """ if self._proc_is_alive(): self._proc.kill() self._return_code = self.STOPPED
def close(self): """Destructor like function for the object. """ self._stderr_reader.join()
def _is_warning(self, stderr): return stderr.split(':')[0] == 'WARNING'
def _last_data_hook(self): """Set the last data information based on the return code. """ if self._return_code == self.OK: self._data['status'] = 'Finished' elif self._return_code == self.ERROR: self._data['status'] = 'Error' self._data['speed'] = '' self._data['eta'] = '' elif self._return_code == self.WARNING: self._data['status'] = 'Warning' self._data['speed'] = '' self._data['eta'] = '' elif self._return_code == self.STOPPED: self._data['status'] = 'Stopped' self._data['speed'] = '' self._data['eta'] = '' elif self._return_code == self.ALREADY: self._data['status'] = 'Already Downloaded' else: self._data['status'] = 'Filesize Abort'
self._hook_data()
def _reset(self): """Reset the data. """ self._return_code = 0 self._data = { 'playlist_index': None, 'playlist_size': None, 'filesize': None, 'filename': None, 'percent': None, 'status': None, 'speed': None, 'eta': None }
def _sync_data(self, data): """Synchronise self._data with data. It also filters some keys.
Args: data (dictionary): Python dictionary that contains different keys. The keys are not standar the dictionary can also be empty when there are no data to extract. See extract_data().
"""
for key in data: if key == 'filename': # Keep only the filename on data['filename'] data['filename'] = os.path.basename(data['filename'])
if key == 'status': if data['status'] == 'Already Downloaded': # Set self._return_code to already downloaded # and trash that key self._return_code = self.ALREADY data['status'] = None
if data['status'] == 'Filesize Abort': # Set self._return_code to filesize abort # and trash that key self._return_code = self.FILESIZE_ABORT data['status'] = None
self._data[key] = data[key]
def _log(self, data): """Log data using the callback function. """ if self.log_data is not None: self.log_data(data)
def _hook_data(self): """Pass self._data back to the data_hook. """ if self.data_hook is not None: self.data_hook(self._data)
def _proc_is_alive(self): """Returns True if self._proc is alive else False. """ if self._proc is None: return False
return self._proc.poll() is None
def _get_cmd(self, url, options): """Build the subprocess command.
Args: url (string): URL string to download. options (list): Python list that contains youtube-dl options.
Returns: Python list that contains the command to execute.
"""
if os.name == 'nt': cmd = [self.youtubedl_path] + options + [url] else: cmd = ['python', self.youtubedl_path] + options + [url]
return cmd
def _get_encoding(self): """Return system encoding. """ try: encoding = locale.getpreferredencoding() 'TEST'.encode(encoding) except: encoding = 'UTF-8'
return encoding
def _create_process(self, cmd): """Create new subprocess.
Args: cmd (list): Python list that contains the command to execute.
"""
encoding = info = None
# Hide subprocess window on Windows if os.name == 'nt': info = subprocess.STARTUPINFO() info.dwFlags |= subprocess.STARTF_USESHOWWINDOW
# Encode command for subprocess # Refer to http://stackoverflow.com/a/9951851/35070 if sys.version_info < (3, 0): encoding = self._get_encoding()
if encoding is not None: cmd = [item.encode(encoding, 'ignore') for item in cmd]
self._proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, startupinfo=info)
def extract_data(stdout): """Extract data from youtube-dl stdout.
Args: stdout (string): String that contains the youtube-dl stdout.
Returns: Python dictionary. For available keys check self._data under YoutubeDLDownloader.__init__().
"""
data_dictionary = dict()
if not stdout: return data_dictionary
stdout = [string for string in stdout.split(' ') if string != '']
stdout[0] = stdout[0].lstrip('\r')
if stdout[0] == '[download]': data_dictionary['status'] = 'Downloading'
# Get filename if stdout[1] == 'Destination:': data_dictionary['filename'] = ' '.join(stdout[2:])
# Get progress info if '%' in stdout[1]: if stdout[1] == '100%': data_dictionary['speed'] = '' data_dictionary['eta'] = '' else: data_dictionary['percent'] = stdout[1] data_dictionary['filesize'] = stdout[3] data_dictionary['speed'] = stdout[5] data_dictionary['eta'] = stdout[7]
# Get playlist info if stdout[1] == 'Downloading' and stdout[2] == 'video': data_dictionary['playlist_index'] = stdout[3] data_dictionary['playlist_size'] = stdout[5]
# Get file already downloaded status if stdout[-1] == 'downloaded': data_dictionary['status'] = 'Already Downloaded'
# Get filesize abort status if stdout[-1] == 'Aborting.': data_dictionary['status'] = 'Filesize Abort'
elif stdout[0] == '[ffmpeg]': data_dictionary['status'] = 'Post Processing'
else: data_dictionary['status'] = 'Pre Processing'
return data_dictionary
|