You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

397 lines
12 KiB

#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""Python module to download videos.
This module contains the actual downloaders responsible
for downloading the video files.
Note:
downloaders.py is part of the youtubedlg package but it can be used
as a stand alone module for downloading videos.
"""
from __future__ import unicode_literals
import os
import sys
import locale
import subprocess
from time import sleep
from Queue import Queue
from threading import Thread
class PipeReader(Thread):
"""Helper class to avoid deadlocks when reading from subprocess pipes.
This class uses python threads and queues in order to read from subprocess
pipes in an asynchronous way.
Attributes:
WAIT_TIME (float): Time in seconds to sleep.
Args:
queue (Queue.Queue): Python queue to store the output of the subprocess.
"""
WAIT_TIME = 0.1
def __init__(self, queue):
super(PipeReader, self).__init__()
self._filedescriptor = None
self._running = True
self._queue = queue
self.start()
def run(self):
while self._running:
if self._filedescriptor is not None:
for line in iter(self._filedescriptor.readline, ''):
self._queue.put_nowait(line.rstrip())
self._filedescriptor = None
sleep(self.WAIT_TIME)
def attach_filedescriptor(self, filedesc):
"""Attach a filedescriptor to the PipeReader. """
self._filedescriptor = filedesc
def join(self, timeout=None):
self._running = False
super(PipeReader, self).join(timeout)
class YoutubeDLDownloader(object):
"""Python class for downloading videos using youtube-dl & subprocess.
Attributes:
OK, ERROR, STOPPED, ALREADY, FILESIZE_ABORT, WARNING (int): 'Random'
integers that describe the return code from the download() method.
Args:
youtubedl_path (string): Absolute path to youtube-dl binary.
data_hook (function): Optional callback function to retrieve download
process data.
log_data (function): Optional callback function to write data to
the log file.
Note:
For available data keys check self._data under __init__().
Warnings:
The caller is responsible for calling the close() method after he has
finished with the object in order for the object to be able to properly
close down itself.
Example:
How to use YoutubeDLDownloader from a python script.
from downloaders import YoutubeDLDownloader
def data_hook(data):
print data
downloader = YoutubeDLDownloader('/usr/bin/youtube-dl', data_hook)
downloader.download(<URL STRING>, ['-f', 'flv'])
"""
OK = 0
ERROR = 1
STOPPED = 2
ALREADY = 3
FILESIZE_ABORT = 4
WARNING = 5
def __init__(self, youtubedl_path, data_hook=None, log_data=None):
self.youtubedl_path = youtubedl_path
self.data_hook = data_hook
self.log_data = log_data
self._return_code = 0
self._proc = None
self._data = {
'playlist_index': None,
'playlist_size': None,
'filesize': None,
'filename': None,
'percent': None,
'status': None,
'speed': None,
'eta': None
}
self._stderr_queue = Queue()
self._stderr_reader = PipeReader(self._stderr_queue)
def download(self, url, options):
"""Download url using given options.
Args:
url (string): URL string to download.
options (list): Python list that contains youtube-dl options.
Returns:
An integer that shows the status of the download process.
Right now we support 6 different return codes.
OK (0): The download process completed successfully.
ERROR (1): An error occured during the download process.
STOPPED (2): The download process was stopped from the user.
ALREADY (3): The given url is already downloaded.
FILESIZE_ABORT (4): The corresponding url video file was larger or
smaller from the given options filesize limit.
WARNING (5): A warning occured during the download process.
"""
self._reset()
cmd = self._get_cmd(url, options)
self._create_process(cmd)
self._stderr_reader.attach_filedescriptor(self._proc.stderr)
while self._proc_is_alive():
stdout = self._proc.stdout.readline().rstrip().decode(self._get_encoding(), 'ignore')
if stdout:
self._sync_data(extract_data(stdout))
self._hook_data()
# Read stderr after download process has been completed
# We don't need to read stderr in real time
while not self._stderr_queue.empty():
stderr = self._stderr_queue.get_nowait().decode(self._get_encoding(), 'ignore')
self._log(stderr)
if self._return_code != self.STOPPED:
if self._is_warning(stderr):
self._return_code = self.WARNING
else:
self._return_code = self.ERROR
self._last_data_hook()
return self._return_code
def stop(self):
"""Stop the download process and set return code to STOPPED. """
if self._proc_is_alive():
self._proc.kill()
self._return_code = self.STOPPED
def close(self):
"""Destructor like function for the object. """
self._stderr_reader.join()
def _is_warning(self, stderr):
return stderr.split(':')[0] == 'WARNING'
def _last_data_hook(self):
"""Set the last data information based on the return code. """
if self._return_code == self.OK:
self._data['status'] = 'Finished'
elif self._return_code == self.ERROR:
self._data['status'] = 'Error'
self._data['speed'] = ''
self._data['eta'] = ''
elif self._return_code == self.WARNING:
self._data['status'] = 'Warning'
self._data['speed'] = ''
self._data['eta'] = ''
elif self._return_code == self.STOPPED:
self._data['status'] = 'Stopped'
self._data['speed'] = ''
self._data['eta'] = ''
elif self._return_code == self.ALREADY:
self._data['status'] = 'Already Downloaded'
else:
self._data['status'] = 'Filesize Abort'
self._hook_data()
def _reset(self):
"""Reset the data. """
self._return_code = 0
self._data = {
'playlist_index': None,
'playlist_size': None,
'filesize': None,
'filename': None,
'percent': None,
'status': None,
'speed': None,
'eta': None
}
def _sync_data(self, data):
"""Synchronise self._data with data. It also filters some keys.
Args:
data (dictionary): Python dictionary that contains different
keys. The keys are not standar the dictionary can also be
empty when there are no data to extract. See extract_data().
"""
for key in data:
if key == 'filename':
# Keep only the filename on data['filename']
data['filename'] = os.path.basename(data['filename'])
if key == 'status':
if data['status'] == 'Already Downloaded':
# Set self._return_code to already downloaded
# and trash that key
self._return_code = self.ALREADY
data['status'] = None
if data['status'] == 'Filesize Abort':
# Set self._return_code to filesize abort
# and trash that key
self._return_code = self.FILESIZE_ABORT
data['status'] = None
self._data[key] = data[key]
def _log(self, data):
"""Log data using the callback function. """
if self.log_data is not None:
self.log_data(data)
def _hook_data(self):
"""Pass self._data back to the data_hook. """
if self.data_hook is not None:
self.data_hook(self._data)
def _proc_is_alive(self):
"""Returns True if self._proc is alive else False. """
if self._proc is None:
return False
return self._proc.poll() is None
def _get_cmd(self, url, options):
"""Build the subprocess command.
Args:
url (string): URL string to download.
options (list): Python list that contains youtube-dl options.
Returns:
Python list that contains the command to execute.
"""
if os.name == 'nt':
cmd = [self.youtubedl_path] + options + [url]
else:
cmd = ['python', self.youtubedl_path] + options + [url]
return cmd
def _get_encoding(self):
"""Return system encoding. """
try:
encoding = locale.getpreferredencoding()
'TEST'.encode(encoding)
except:
encoding = 'UTF-8'
return encoding
def _create_process(self, cmd):
"""Create new subprocess.
Args:
cmd (list): Python list that contains the command to execute.
"""
encoding = info = None
# Hide subprocess window on Windows
if os.name == 'nt':
info = subprocess.STARTUPINFO()
info.dwFlags |= subprocess.STARTF_USESHOWWINDOW
# Encode command for subprocess
# Refer to http://stackoverflow.com/a/9951851/35070
if sys.version_info < (3, 0):
encoding = self._get_encoding()
if encoding is not None:
cmd = [item.encode(encoding, 'ignore') for item in cmd]
self._proc = subprocess.Popen(cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
startupinfo=info)
def extract_data(stdout):
"""Extract data from youtube-dl stdout.
Args:
stdout (string): String that contains the youtube-dl stdout.
Returns:
Python dictionary. For available keys check self._data under
YoutubeDLDownloader.__init__().
"""
data_dictionary = dict()
if not stdout:
return data_dictionary
stdout = [string for string in stdout.split(' ') if string != '']
stdout[0] = stdout[0].lstrip('\r')
if stdout[0] == '[download]':
data_dictionary['status'] = 'Downloading'
# Get filename
if stdout[1] == 'Destination:':
data_dictionary['filename'] = ' '.join(stdout[2:])
# Get progress info
if '%' in stdout[1]:
if stdout[1] == '100%':
data_dictionary['speed'] = ''
data_dictionary['eta'] = ''
else:
data_dictionary['percent'] = stdout[1]
data_dictionary['filesize'] = stdout[3]
data_dictionary['speed'] = stdout[5]
data_dictionary['eta'] = stdout[7]
# Get playlist info
if stdout[1] == 'Downloading' and stdout[2] == 'video':
data_dictionary['playlist_index'] = stdout[3]
data_dictionary['playlist_size'] = stdout[5]
# Get file already downloaded status
if stdout[-1] == 'downloaded':
data_dictionary['status'] = 'Already Downloaded'
# Get filesize abort status
if stdout[-1] == 'Aborting.':
data_dictionary['status'] = 'Filesize Abort'
elif stdout[0] == '[ffmpeg]':
data_dictionary['status'] = 'Post Processing'
else:
data_dictionary['status'] = 'Pre Processing'
return data_dictionary