Merge remote-tracking branch 'origin/master'

11 years ago · 7394b8db3b
6 changed files with 154 additions and 0 deletions
--- a/test/test_write_annotations.py
+++ b/test/test_write_annotations.py
@ -0,0 +1,82 @@
 #!/usr/bin/env python
 # coding: utf-8
 import xml.etree.ElementTree
 import os
 import sys
 import unittest
 # Allow direct execution
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import youtube_dl.YoutubeDL
 import youtube_dl.extractor
 from youtube_dl.utils import *
 from .helper import try_rm
 PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json")
 # General configuration (from __init__, not very elegant...)
 jar = compat_cookiejar.CookieJar()
 cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
 proxy_handler = compat_urllib_request.ProxyHandler()
 opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
 compat_urllib_request.install_opener(opener)
 class YoutubeDL(youtube_dl.YoutubeDL):
    def __init__(self, *args, **kwargs):
        super(YoutubeDL, self).__init__(*args, **kwargs)
        self.to_stderr = self.to_screen
 with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
    params = json.load(pf)
 params['writeannotations'] = True
 params['skip_download'] = True
 params['writeinfojson'] = False
 params['format'] = 'flv'
 TEST_ID = 'gr51aVj-mLg'
 ANNOTATIONS_FILE = TEST_ID + '.flv.annotations.xml'
 EXPECTED_ANNOTATIONS = ['Speech bubble', 'Note', 'Title', 'Spotlight', 'Label']
 class TestAnnotations(unittest.TestCase):
    def setUp(self):
        # Clear old files
        self.tearDown()
    def test_info_json(self):
        expected = list(EXPECTED_ANNOTATIONS) #Two annotations could have the same text.
        ie = youtube_dl.extractor.YoutubeIE()
        ydl = YoutubeDL(params)
        ydl.add_info_extractor(ie)
        ydl.download([TEST_ID])
        self.assertTrue(os.path.exists(ANNOTATIONS_FILE))
        annoxml = None
        with io.open(ANNOTATIONS_FILE, 'r', encoding='utf-8') as annof:
                annoxml = xml.etree.ElementTree.parse(annof)
        self.assertTrue(annoxml is not None, 'Failed to parse annotations XML')
        root = annoxml.getroot()
        self.assertEqual(root.tag, 'document')
        annotationsTag = root.find('annotations')
        self.assertEqual(annotationsTag.tag, 'annotations')
        annotations = annotationsTag.findall('annotation')
        #Not all the annotations have TEXT children and the annotations are returned unsorted.
        for a in annotations:
                self.assertEqual(a.tag, 'annotation')
                if a.get('type') == 'text':
                        textTag = a.find('TEXT')
                        text = textTag.text
                        self.assertTrue(text in expected) #assertIn only added in python 2.7
                        #remove the first occurance, there could be more than one annotation with the same text
                        expected.remove(text)
        #We should have seen (and removed) all the expected annotation texts.
        self.assertEqual(len(expected), 0, 'Not all expected annotations were found.')
    def tearDown(self):
        try_rm(ANNOTATIONS_FILE)
 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@ -71,6 +71,7 @@ class YoutubeDL(object):
    logtostderr:       Log messages to stderr instead of stdout.
    writedescription:  Write the video description to a .description file
    writeinfojson:     Write the video description to a .info.json file
    writeannotations:  Write the video annotations to a .annotations.xml file
    writethumbnail:    Write the thumbnail image to a file
    writesubtitles:    Write the video subtitles to a file
    writeautomaticsub: Write the automatic subtitles to a file
@ -258,6 +259,10 @@ class YoutubeDL(object):
        """ Report that the metadata file has been written """
        self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
    def report_writeannotations(self, annofn):
        """ Report that the annotations file has been written. """
        self.to_screen(u'[info] Writing video annotations to: ' + annofn)
    def report_file_already_downloaded(self, file_name):
        """Report file has already been fully downloaded."""
        try:
@ -522,6 +527,18 @@ class YoutubeDL(object):
                self.report_error(u'Cannot write description file ' + descfn)
                return
        if self.params.get('writeannotations', False):
            try:
               annofn = filename + u'.annotations.xml'
               self.report_writeannotations(annofn)
               with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
                   annofile.write(info_dict['annotations'])
            except (KeyError, TypeError):
                self.report_warning(u'There are no annotations to write.')
            except (OSError, IOError):
                 self.report_error(u'Cannot write annotations file: ' + annofn)
                 return
        subtitles_are_requested = any([self.params.get('writesubtitles', False),
                                       self.params.get('writeautomaticsub')])
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@ -339,6 +339,9 @@ def parseOpts(overrideArguments=None):
    filesystem.add_option('--write-info-json',
            action='store_true', dest='writeinfojson',
            help='write video metadata to a .info.json file', default=False)
    filesystem.add_option('--write-annotations',
            action='store_true', dest='writeannotations',
            help='write video annotations to a .annotation file', default=False)
    filesystem.add_option('--write-thumbnail',
            action='store_true', dest='writethumbnail',
            help='write thumbnail image to disk', default=False)
@ -601,6 +604,7 @@ def _real_main(argv=None):
        'nopart': opts.nopart,
        'updatetime': opts.updatetime,
        'writedescription': opts.writedescription,
        'writeannotations': opts.writeannotations,
        'writeinfojson': opts.writeinfojson,
        'writethumbnail': opts.writethumbnail,
        'writesubtitles': opts.writesubtitles,
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -131,6 +131,7 @@ from .vice import ViceIE
 from .viddler import ViddlerIE
 from .videodetective import VideoDetectiveIE
 from .videofyme import VideofyMeIE
 from .videopremium import VideoPremiumIE
 from .vimeo import VimeoIE, VimeoChannelIE
 from .vine import VineIE
 from .wat import WatIE
--- a/youtube_dl/extractor/videopremium.py
+++ b/youtube_dl/extractor/videopremium.py
@ -0,0 +1,40 @@
 import re
 import random
 from .common import InfoExtractor
 class VideoPremiumIE(InfoExtractor):
    _VALID_URL = r'(?:https?://)?(?:www\.)?videopremium\.tv/(?P<id>\w+)(?:/.*)?'
    _TEST = {
        u'url': u'http://videopremium.tv/4w7oadjsf156',
        u'file': u'4w7oadjsf156.f4v',
        u'info_dict': {
            u"title": u"youtube-dl_test_video____a_________-BaW_jenozKc.mp4.mp4"
        },
        u'params': {
            u'skip_download': True,
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage_url = 'http://videopremium.tv/' + video_id
        webpage = self._download_webpage(webpage_url, video_id)
        self.report_extraction(video_id)
        video_title = self._html_search_regex(r'<h2(?:.*?)>\s*(.+?)\s*<',
            webpage, u'video title')
        return [{
            'id':          video_id,
            'url':         "rtmp://e%d.md.iplay.md/play" % random.randint(1, 16),
            'play_path':   "mp4:%s.f4v" % video_id,
            'page_url':    "http://videopremium.tv/" + video_id,
            'player_url':  "http://videopremium.tv/uplayer/uppod.swf",
            'ext':         'f4v',
            'title':       video_title,
        }]
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -1250,6 +1250,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
            url_map[itag] = format_url
        return url_map
    def _extract_annotations(self, video_id):
        url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
        return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.')
    def _real_extract(self, url):
        # Extract original video URL from URL with redirection, like age verification, using next_url parameter
        mobj = re.search(self._NEXT_URL_RE, url)
@ -1382,6 +1386,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
        else:
            video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
        # annotations
        video_annotations = None
        if self._downloader.params.get('writeannotations', False):
                video_annotations = self._extract_annotations(video_id)
        # Decide which formats to download
        try:
@ -1495,6 +1504,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                'subtitles':    video_subtitles,
                'duration':     video_duration,
                'age_limit':    18 if age_gate else 0,
                'annotations':  video_annotations
            })
        return results