#!/usr/bin/env python2 # -*- coding: utf-8 -*-
"""Python module to download videos.
This module contains the actual downloaders responsible for downloading the video files.
Note: downloaders.py is part of the youtubedlg package but it can be used as a stand alone module for downloading videos.
from __future__ import unicode_literals
import os import sys import locale import subprocess
class YoutubeDLDownloader(object):
"""Python class for downloading videos using youtube-dl & subprocess.
Attributes: OK, ERROR, STOPPED, ALREADY, FILESIZE_ABORT (int): 'Random' integers that describe the return code from the download() method.
Args: youtubedl_path (string): Absolute path to youtube-dl binary.
data_hook (function): Optional callback function to retrieve download process data.
log_manager (logmanager.LogManager): Object responsible for writing errors to the log.
Note: For available data keys check self._data under __init__().
Example: How to use YoutubeDLDownloader from a python script.
from downloaders import YoutubeDLDownloader
def data_hook(data): print data
downloader = YoutubeDLDownloader('/usr/bin/youtube-dl', data_hook)
downloader.download(<URL STRING>, ['-f', 'flv'])
def __init__(self, youtubedl_path, data_hook=None, log_manager=None): self.youtubedl_path = youtubedl_path self.log_manager = log_manager self.data_hook = data_hook
self._return_code = 0 self._proc = None self._data = { 'playlist_index': None, 'playlist_size': None, 'filesize': None, 'filename': None, 'percent': None, 'status': None, 'speed': None, 'eta': None }
def download(self, url, options): """Download url using given options.
Args: url (string): URL string to download. options (list): Python list that contains youtube-dl options.
Returns: An integer that shows the status of the download process. Right now we support 5 different return codes.
OK (0): The download process completed successfully. ERROR (1): An error occured during the download process. STOPPED (2): The download process was stopped from the user. ALREADY (3): The given url is already downloaded. FILESIZE_ABORT (4): The corresponding url video file was larger or smaller from the given options filesize limit.
cmd = self._get_cmd(url, options) self._create_process(cmd)
while self._proc_is_alive(): stdout, stderr = self._read()
if stderr: self._return_code = self.ERROR self._log(stderr)
if stdout: self._sync_data(extract_data(stdout)) self._hook_data()
return self._return_code
def stop(self): """Stop the download process and set return code to STOPPED. """ if self._proc_is_alive(): self._proc.kill() self._return_code = self.STOPPED
def _last_data_hook(self): """Set the last data information based on the return code. """ if self._return_code == self.OK: self._data['status'] = 'Finished' elif self._return_code == self.ERROR: self._data['status'] = 'Error' self._data['speed'] = '' self._data['eta'] = '' elif self._return_code == self.STOPPED: self._data['status'] = 'Stopped' self._data['speed'] = '' self._data['eta'] = '' elif self._return_code == self.ALREADY: self._data['status'] = 'Already Downloaded' else: self._data['status'] = 'Filesize Abort'
def _reset(self): """Reset the data. """ self._return_code = 0 self._data = { 'playlist_index': None, 'playlist_size': None, 'filesize': None, 'filename': None, 'percent': None, 'status': None, 'speed': None, 'eta': None }
def _sync_data(self, data): """Synchronise self._data with data. It also filters some keys.
Args: data (dictionary): Python dictionary that contains different keys. The keys are not standar the dictionary can also be empty when there are no data to extract. See extract_data().
for key in data: if key == 'filename': # Keep only the filename on data['filename'] data['filename'] = os.path.basename(data['filename'])
if key == 'status': if data['status'] == 'Already Downloaded': # Set self._return_code to already downloaded # and trash that key self._return_code = self.ALREADY data['status'] = None
if data['status'] == 'Filesize Abort': # Set self._return_code to filesize abort # and trash that key self._return_code = self.FILESIZE_ABORT data['status'] = None
self._data[key] = data[key]
def _log(self, data): """Log data using log_manager. """ if self.log_manager is not None: self.log_manager.log(data)
def _hook_data(self): """Pass self._data back to the data_hook. """ if self.data_hook is not None: self.data_hook(self._data)
def _proc_is_alive(self): """Returns True if self._proc is alive else False. """ if self._proc is None: return False
return self._proc.poll() is None
def _read(self): """Read subprocess stdout, stderr.
Returns: Python tuple that contains the STDOUT and STDERR strings.
stdout = stderr = ''
if self._proc is not None: stdout = self._proc.stdout.readline().rstrip()
if not stdout: stderr = self._proc.stderr.readline().rstrip()
encoding = self._get_encoding()
return stdout.decode(encoding, 'ignore'), stderr.decode(encoding, 'ignore')
def _get_cmd(self, url, options): """Build the subprocess command.
Args: url (string): URL string to download. options (list): Python list that contains youtube-dl options.
Returns: Python list that contains the command to execute.
if os.name == 'nt': cmd = [self.youtubedl_path] + options + [url] else: cmd = ['python', self.youtubedl_path] + options + [url]
return cmd
def _get_encoding(self): """Return system encoding. """ try: encoding = locale.getpreferredencoding() 'TEST'.encode(encoding) except: encoding = 'UTF-8'
return encoding
def _create_process(self, cmd): """Create new subprocess.
Args: cmd (list): Python list that contains the command to execute.
encoding = info = None
# Hide subprocess window on Windows if os.name == 'nt': info = subprocess.STARTUPINFO() info.dwFlags |= subprocess.STARTF_USESHOWWINDOW
# Encode command for subprocess # Refer to http://stackoverflow.com/a/9951851/35070 if sys.version_info < (3, 0): encoding = self._get_encoding()
if encoding is not None: cmd = [item.encode(encoding, 'ignore') for item in cmd]
self._proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, startupinfo=info)
def extract_data(stdout): """Extract data from youtube-dl stdout.
Args: stdout (string): String that contains the youtube-dl stdout.
Returns: Python dictionary. For available keys check self._data under YoutubeDLDownloader.__init__().
data_dictionary = dict()
if not stdout: return data_dictionary
stdout = [string for string in stdout.split(' ') if string != '']
stdout[0] = stdout[0].lstrip('\r')
if stdout[0] == '[download]': data_dictionary['status'] = 'Downloading'
# Get filename if stdout[1] == 'Destination:': data_dictionary['filename'] = ' '.join(stdout[2:])
# Get progress info if '%' in stdout[1]: if stdout[1] == '100%': data_dictionary['speed'] = '' data_dictionary['eta'] = '' else: data_dictionary['percent'] = stdout[1] data_dictionary['filesize'] = stdout[3] data_dictionary['speed'] = stdout[5] data_dictionary['eta'] = stdout[7]
# Get playlist info if stdout[1] == 'Downloading' and stdout[2] == 'video': data_dictionary['playlist_index'] = stdout[3] data_dictionary['playlist_size'] = stdout[5]
# Get file already downloaded status if stdout[-1] == 'downloaded': data_dictionary['status'] = 'Already Downloaded'
# Get filesize abort status if stdout[-1] == 'Aborting.': data_dictionary['status'] = 'Filesize Abort'
elif stdout[0] == '[ffmpeg]': data_dictionary['status'] = 'Post Processing'
else: data_dictionary['status'] = 'Pre Processing'
return data_dictionary