Merge branch 'master' into opener-to-ydl

11 years ago · e03db0a077
20 changed files with 464 additions and 117 deletions
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@ -100,6 +100,7 @@ class TestAllURLsMatching(unittest.TestCase):
    def test_keywords(self):
        self.assertMatch(':ytsubs', ['youtube:subscriptions'])
        self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
        self.assertMatch(':ythistory', ['youtube:history'])
        self.assertMatch(':thedailyshow', ['ComedyCentral'])
        self.assertMatch(':tds', ['ComedyCentral'])
        self.assertMatch(':colbertreport', ['ComedyCentral'])
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@ -102,7 +102,7 @@ class TestPlaylists(unittest.TestCase):
        result = ie.extract('http://bambuser.com/channel/pixelversity')
        self.assertIsPlaylist(result)
        self.assertEqual(result['title'], u'pixelversity')
        self.assertTrue(len(result['entries']) >= 66)
        self.assertTrue(len(result['entries']) >= 60)
    def test_bandcamp_album(self):
        dl = FakeYDL()
--- a/test/test_youtube_lists.py
+++ b/test/test_youtube_lists.py
@ -27,7 +27,7 @@ class TestYoutubeLists(unittest.TestCase):
    def test_youtube_playlist(self):
        dl = FakeYDL()
        ie = YoutubePlaylistIE(dl)
        result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')[0]
        result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
        self.assertIsPlaylist(result)
        self.assertEqual(result['title'], 'ytdl test PL')
        ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
@ -44,13 +44,13 @@ class TestYoutubeLists(unittest.TestCase):
    def test_issue_673(self):
        dl = FakeYDL()
        ie = YoutubePlaylistIE(dl)
        result = ie.extract('PLBB231211A4F62143')[0]
        result = ie.extract('PLBB231211A4F62143')
        self.assertTrue(len(result['entries']) > 25)
    def test_youtube_playlist_long(self):
        dl = FakeYDL()
        ie = YoutubePlaylistIE(dl)
        result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')[0]
        result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
        self.assertIsPlaylist(result)
        self.assertTrue(len(result['entries']) >= 799)
@ -58,7 +58,7 @@ class TestYoutubeLists(unittest.TestCase):
        #651
        dl = FakeYDL()
        ie = YoutubePlaylistIE(dl)
        result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')[0]
        result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
        ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
        self.assertFalse('pElCt5oNDuI' in ytie_results)
        self.assertFalse('KdPEApIVdWM' in ytie_results)
@ -66,7 +66,7 @@ class TestYoutubeLists(unittest.TestCase):
    def test_youtube_playlist_empty(self):
        dl = FakeYDL()
        ie = YoutubePlaylistIE(dl)
        result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')[0]
        result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')
        self.assertIsPlaylist(result)
        self.assertEqual(len(result['entries']), 0)
@ -74,7 +74,7 @@ class TestYoutubeLists(unittest.TestCase):
        dl = FakeYDL()
        ie = YoutubePlaylistIE(dl)
        # TODO find a > 100 (paginating?) videos course
        result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')[0]
        result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
        entries = result['entries']
        self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs')
        self.assertEqual(len(entries), 25)
@ -84,22 +84,22 @@ class TestYoutubeLists(unittest.TestCase):
        dl = FakeYDL()
        ie = YoutubeChannelIE(dl)
        #test paginated channel
        result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')[0]
        result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')
        self.assertTrue(len(result['entries']) > 90)
        #test autogenerated channel
        result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')[0]
        result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
        self.assertTrue(len(result['entries']) >= 18)
    def test_youtube_user(self):
        dl = FakeYDL()
        ie = YoutubeUserIE(dl)
        result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')[0]
        result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')
        self.assertTrue(len(result['entries']) >= 320)
    def test_youtube_safe_search(self):
        dl = FakeYDL()
        ie = YoutubePlaylistIE(dl)
        result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')[0]
        result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')
        self.assertEqual(len(result['entries']), 2)
    def test_youtube_show(self):
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@ -104,6 +104,7 @@ class YoutubeDL(object):
    playlistend:       Playlist item to end at.
    matchtitle:        Download only matching titles.
    rejecttitle:       Reject downloads for matching titles.
    logger:            Log messages to a logging.Logger instance.
    logtostderr:       Log messages to stderr instead of stdout.
    writedescription:  Write the video description to a .description file
    writeinfojson:     Write the video description to a .info.json file
@ -204,7 +205,9 @@ class YoutubeDL(object):
    def to_screen(self, message, skip_eol=False):
        """Print message to stdout if not in quiet mode."""
        if not self.params.get('quiet', False):
        if self.params.get('logger'):
            self.params['logger'].debug(message)
        elif not self.params.get('quiet', False):
            terminator = [u'\n', u''][skip_eol]
            output = message + terminator
            write_string(output, self._screen_file)
@ -212,10 +215,13 @@ class YoutubeDL(object):
    def to_stderr(self, message):
        """Print message to stderr."""
        assert type(message) == type(u'')
        output = message + u'\n'
        if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
            output = output.encode(preferredencoding())
        sys.stderr.write(output)
        if self.params.get('logger'):
            self.params['logger'].error(message)
        else:
            output = message + u'\n'
            if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
                output = output.encode(preferredencoding())
            sys.stderr.write(output)
    def to_console_title(self, message):
        if not self.params.get('consoletitle', False):
@ -370,15 +376,17 @@ class YoutubeDL(object):
    def _match_entry(self, info_dict):
        """ Returns None iff the file should be downloaded """
        title = info_dict['title']
        matchtitle = self.params.get('matchtitle', False)
        if matchtitle:
            if not re.search(matchtitle, title, re.IGNORECASE):
                return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
        rejecttitle = self.params.get('rejecttitle', False)
        if rejecttitle:
            if re.search(rejecttitle, title, re.IGNORECASE):
                return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
        if 'title' in info_dict:
            # This can happen when we're just evaluating the playlist
            title = info_dict['title']
            matchtitle = self.params.get('matchtitle', False)
            if matchtitle:
                if not re.search(matchtitle, title, re.IGNORECASE):
                    return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
            rejecttitle = self.params.get('rejecttitle', False)
            if rejecttitle:
                if re.search(rejecttitle, title, re.IGNORECASE):
                    return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
        date = info_dict.get('upload_date', None)
        if date is not None:
            dateRange = self.params.get('daterange', DateRange())
@ -389,8 +397,8 @@ class YoutubeDL(object):
            if age_limit < info_dict.get('age_limit', 0):
                return u'Skipping "' + title + '" because it is age restricted'
        if self.in_download_archive(info_dict):
            return (u'%(title)s has already been recorded in archive'
                    % info_dict)
            return (u'%s has already been recorded in archive'
                    % info_dict.get('title', info_dict.get('id', u'video')))
        return None
    @staticmethod
@ -469,7 +477,7 @@ class YoutubeDL(object):
                                     ie_key=ie_result.get('ie_key'),
                                     extra_info=extra_info)
        elif result_type == 'playlist':
            self.add_extra_info(ie_result, extra_info)
            # We process each entry in the playlist
            playlist = ie_result.get('title', None) or ie_result.get('id', None)
            self.to_screen(u'[download] Downloading playlist: %s' % playlist)
@ -499,6 +507,12 @@ class YoutubeDL(object):
                    'webpage_url': ie_result['webpage_url'],
                    'extractor_key': ie_result['extractor_key'],
                }
                reason = self._match_entry(entry)
                if reason is not None:
                    self.to_screen(u'[download] ' + reason)
                    continue
                entry_result = self.process_ie_result(entry,
                                                      download=download,
                                                      extra_info=extra)
@ -654,7 +668,7 @@ class YoutubeDL(object):
        # Forced printings
        if self.params.get('forcetitle', False):
            compat_print(info_dict['title'])
            compat_print(info_dict['fulltitle'])
        if self.params.get('forceid', False):
            compat_print(info_dict['id'])
        if self.params.get('forceurl', False):
@ -825,7 +839,16 @@ class YoutubeDL(object):
        fn = self.params.get('download_archive')
        if fn is None:
            return False
        vid_id = info_dict['extractor'] + u' ' + info_dict['id']
        extractor = info_dict.get('extractor_id')
        if extractor is None:
            if 'id' in info_dict:
                extractor = info_dict.get('ie_key')  # key in a playlist
        if extractor is None:
            return False  # Incomplete video information
        # Future-proof against any change in case
        # and backwards compatibility with prior versions
        extractor = extractor.lower()
        vid_id = extractor + u' ' + info_dict['id']
        try:
            with locked_file(fn, 'r', encoding='utf-8') as archive_file:
                for line in archive_file:
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@ -35,6 +35,7 @@ __authors__  = (
    'Jelle van der Waa',
    'Marcin Cieślak',
    'Anton Larionov',
    'Takuya Tsuchida',
 )
 __license__ = 'Public Domain'
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -20,6 +20,7 @@ from .c56 import C56IE
 from .canalplus import CanalplusIE
 from .canalc2 import Canalc2IE
 from .cinemassacre import CinemassacreIE
 from .clipfish import ClipfishIE
 from .cnn import CNNIE
 from .collegehumor import CollegeHumorIE
 from .comedycentral import ComedyCentralIE
@ -98,6 +99,7 @@ from .nba import NBAIE
 from .nbc import NBCNewsIE
 from .newgrounds import NewgroundsIE
 from .nhl import NHLIE, NHLVideocenterIE
 from .niconico import NiconicoIE
 from .nowvideo import NowVideoIE
 from .ooyala import OoyalaIE
 from .orf import ORFIE
@ -156,6 +158,7 @@ from .videofyme import VideofyMeIE
 from .videopremium import VideoPremiumIE
 from .vimeo import VimeoIE, VimeoChannelIE
 from .vine import VineIE
 from .viki import VikiIE
 from .vk import VKIE
 from .wat import WatIE
 from .websurg import WeBSurgIE
@ -183,6 +186,7 @@ from .youtube import (
    YoutubeTruncatedURLIE,
    YoutubeWatchLaterIE,
    YoutubeFavouritesIE,
    YoutubeHistoryIE,
 )
 from .zdf import ZDFIE
--- a/youtube_dl/extractor/bandcamp.py
+++ b/youtube_dl/extractor/bandcamp.py
@ -20,28 +20,6 @@ class BandcampIE(InfoExtractor):
            u"title": u"youtube-dl test song \"'/\\\u00e4\u21ad"
        },
        u'skip': u'There is a limit of 200 free downloads / month for the test song'
    }, {
        u'url': u'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
        u'playlist': [
            {
                u'file': u'1353101989.mp3',
                u'md5': u'39bc1eded3476e927c724321ddf116cf',
                u'info_dict': {
                    u'title': u'Intro',
                }
            },
            {
                u'file': u'38097443.mp3',
                u'md5': u'1a2c32e2691474643e912cc6cd4bffaa',
                u'info_dict': {
                    u'title': u'Kero One - Keep It Alive (Blazo remix)',
                }
            },
        ],
        u'params': {
            u'playlistend': 2
        },
        u'skip': u'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
    }]
    def _real_extract(self, url):
@ -56,20 +34,17 @@ class BandcampIE(InfoExtractor):
            json_code = m_trackinfo.group(1)
            data = json.loads(json_code)
            entries = []
            for d in data:
                formats = [{
                    'format_id': 'format_id',
                    'url': format_url,
                    'ext': format_id.partition('-')[0]
                } for format_id, format_url in sorted(d['file'].items())]
                entries.append({
                return {
                    'id': compat_str(d['id']),
                    'title': d['title'],
                    'formats': formats,
                })
            return self.playlist_result(entries, title, title)
                }
        else:
            raise ExtractorError(u'No free songs found')
@ -112,6 +87,30 @@ class BandcampAlbumIE(InfoExtractor):
    IE_NAME = u'Bandcamp:album'
    _VALID_URL = r'http://.*?\.bandcamp\.com/album/(?P<title>.*)'
    _TEST = {
        u'url': u'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
        u'playlist': [
            {
                u'file': u'1353101989.mp3',
                u'md5': u'39bc1eded3476e927c724321ddf116cf',
                u'info_dict': {
                    u'title': u'Intro',
                }
            },
            {
                u'file': u'38097443.mp3',
                u'md5': u'1a2c32e2691474643e912cc6cd4bffaa',
                u'info_dict': {
                    u'title': u'Kero One - Keep It Alive (Blazo remix)',
                }
            },
        ],
        u'params': {
            u'playlistend': 2
        },
        u'skip': u'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        title = mobj.group('title')
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@ -75,16 +75,22 @@ class BrightcoveIE(InfoExtractor):
        params = {'flashID': object_doc.attrib['id'],
                  'playerID': find_xpath_attr(object_doc, './param', 'name', 'playerID').attrib['value'],
                  }
        playerKey = find_xpath_attr(object_doc, './param', 'name', 'playerKey')
        def find_param(name):
            node = find_xpath_attr(object_doc, './param', 'name', name)
            if node is not None:
                return node.attrib['value']
            return None
        playerKey = find_param('playerKey')
        # Not all pages define this value
        if playerKey is not None:
            params['playerKey'] = playerKey.attrib['value']
        videoPlayer = find_xpath_attr(object_doc, './param', 'name', '@videoPlayer')
            params['playerKey'] = playerKey
        # The three fields hold the id of the video
        videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID')
        if videoPlayer is not None:
            params['@videoPlayer'] = videoPlayer.attrib['value']
        linkBase = find_xpath_attr(object_doc, './param', 'name', 'linkBaseURL')
            params['@videoPlayer'] = videoPlayer
        linkBase = find_param('linkBaseURL')
        if linkBase is not None:
            params['linkBaseURL'] = linkBase.attrib['value']
            params['linkBaseURL'] = linkBase
        data = compat_urllib_parse.urlencode(params)
        return cls._FEDERATED_URL_TEMPLATE % data
--- a/youtube_dl/extractor/clipfish.py
+++ b/youtube_dl/extractor/clipfish.py
@ -0,0 +1,53 @@
 import re
 import time
 import xml.etree.ElementTree
 from .common import InfoExtractor
 class ClipfishIE(InfoExtractor):
    IE_NAME = u'clipfish'
    _VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/'
    _TEST = {
        u'url': u'http://www.clipfish.de/special/supertalent/video/4028320/supertalent-2013-ivana-opacak-singt-nobodys-perfect/',
        u'file': u'4028320.f4v',
        u'md5': u'5e38bda8c329fbfb42be0386a3f5a382',
        u'info_dict': {
            u'title': u'Supertalent 2013: Ivana Opacak singt Nobody\'s Perfect',
            u'duration': 399,
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group(1)
        info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' %
                    (video_id, int(time.time())))
        info_xml = self._download_webpage(
            info_url, video_id, note=u'Downloading info page')
        doc = xml.etree.ElementTree.fromstring(info_xml)
        title = doc.find('title').text
        video_url = doc.find('filename').text
        thumbnail = doc.find('imageurl').text
        duration_str = doc.find('duration').text
        m = re.match(
            r'^(?P<hours>[0-9]+):(?P<minutes>[0-9]{2}):(?P<seconds>[0-9]{2}):(?P<ms>[0-9]*)$',
            duration_str)
        if m:
            duration = (
                (int(m.group('hours')) * 60 * 60) +
                (int(m.group('minutes')) * 60) +
                (int(m.group('seconds')))
            )
        else:
            duration = None
        return {
            'id': video_id,
            'title': title,
            'url': video_url,
            'thumbnail': thumbnail,
            'duration': duration,
        }
--- a/youtube_dl/extractor/collegehumor.py
+++ b/youtube_dl/extractor/collegehumor.py
@ -1,5 +1,4 @@
 import re
 import xml.etree.ElementTree
 from .common import InfoExtractor
 from ..utils import (
@ -46,11 +45,10 @@ class CollegeHumorIE(InfoExtractor):
        self.report_extraction(video_id)
        xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id
        metaXml = self._download_webpage(xmlUrl, video_id,
        mdoc = self._download_xml(xmlUrl, video_id,
                                         u'Downloading info XML',
                                         u'Unable to download video info XML')
        mdoc = xml.etree.ElementTree.fromstring(metaXml)
        try:
            videoNode = mdoc.findall('./video')[0]
            youtubeIdNode = videoNode.find('./youtubeID')
@ -65,11 +63,10 @@ class CollegeHumorIE(InfoExtractor):
        if next_url.endswith(u'manifest.f4m'):
            manifest_url = next_url + '?hdcore=2.10.3'
            manifestXml = self._download_webpage(manifest_url, video_id,
            adoc = self._download_xml(manifest_url, video_id,
                                         u'Downloading XML manifest',
                                         u'Unable to download video info XML')
            adoc = xml.etree.ElementTree.fromstring(manifestXml)
            try:
                video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
            except IndexError:
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -4,6 +4,7 @@ import re
 import socket
 import sys
 import netrc
 import xml.etree.ElementTree
 from ..utils import (
    compat_http_client,
@ -208,6 +209,11 @@ class InfoExtractor(object):
        """ Returns the data of the page as a string """
        return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0]
    def _download_xml(self, url_or_request, video_id, note=u'Downloading XML', errnote=u'Unable to downloand XML'):
        """Return the xml as an xml.etree.ElementTree.Element"""
        xml_string = self._download_webpage(url_or_request, video_id, note, errnote)
        return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
    def to_screen(self, msg):
        """Print msg to screen, prefixing it with '[ie_name]'"""
        self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg))
@ -229,12 +235,14 @@ class InfoExtractor(object):
        self.to_screen(u'Logging in')
    #Methods for following #608
    def url_result(self, url, ie=None):
    def url_result(self, url, ie=None, video_id=None):
        """Returns a url that points to a page that should be processed"""
        #TODO: ie should be the class used for getting the info
        video_info = {'_type': 'url',
                      'url': url,
                      'ie_key': ie}
        if video_id is not None:
            video_info['id'] = video_id
        return video_info
    def playlist_result(self, entries, playlist_id=None, playlist_title=None):
        """Returns a playlist"""
--- a/youtube_dl/extractor/howcast.py
+++ b/youtube_dl/extractor/howcast.py
@ -8,7 +8,7 @@ class HowcastIE(InfoExtractor):
    _TEST = {
        u'url': u'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
        u'file': u'390161.mp4',
        u'md5': u'1d7ba54e2c9d7dc6935ef39e00529138',
        u'md5': u'8b743df908c42f60cf6496586c7f12c3',
        u'info_dict': {
            u"description": u"The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here's the proper way to tie a square knot.", 
            u"title": u"How to Tie a Square Knot Properly"
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@ -60,7 +60,7 @@ class MixcloudIE(InfoExtractor):
            'title': info['name'],
            'url': final_song_url,
            'ext': 'mp3',
            'description': info['description'],
            'description': info.get('description'),
            'thumbnail': info['pictures'].get('extra_large'),
            'uploader': info['user']['name'],
            'uploader_id': info['user']['username'],
--- a/youtube_dl/extractor/niconico.py
+++ b/youtube_dl/extractor/niconico.py
@ -0,0 +1,131 @@
 # encoding: utf-8
 import re
 import socket
 import xml.etree.ElementTree
 from .common import InfoExtractor
 from ..utils import (
    compat_http_client,
    compat_urllib_error,
    compat_urllib_parse,
    compat_urllib_request,
    compat_urlparse,
    compat_str,
    ExtractorError,
    unified_strdate,
 )
 class NiconicoIE(InfoExtractor):
    IE_NAME = u'niconico'
    IE_DESC = u'ニコニコ動画'
    _TEST = {
        u'url': u'http://www.nicovideo.jp/watch/sm22312215',
        u'file': u'sm22312215.mp4',
        u'md5': u'd1a75c0823e2f629128c43e1212760f9',
        u'info_dict': {
            u'title': u'Big Buck Bunny',
            u'uploader': u'takuya0301',
            u'uploader_id': u'2698420',
            u'upload_date': u'20131123',
            u'description': u'(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
        },
        u'params': {
            u'username': u'ydl.niconico@gmail.com',
            u'password': u'youtube-dl',
        },
    }
    _VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/([a-z][a-z][0-9]+)(?:.*)$'
    _NETRC_MACHINE = 'niconico'
    # If True it will raise an error if no login info is provided
    _LOGIN_REQUIRED = True
    def _real_initialize(self):
        self._login()
    def _login(self):
        (username, password) = self._get_login_info()
        # No authentication to be performed
        if username is None:
            if self._LOGIN_REQUIRED:
                raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
            return False
        # Log in
        login_form_strs = {
            u'mail': username,
            u'password': password,
        }
        # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
        # chokes on unicode
        login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
        login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
        request = compat_urllib_request.Request(
            u'https://secure.nicovideo.jp/secure/login', login_data)
        login_results = self._download_webpage(
            request, u'', note=u'Logging in', errnote=u'Unable to log in')
        if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None:
            self._downloader.report_warning(u'unable to log in: bad username or password')
            return False
        return True
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group(1)
        # Get video webpage. We are not actually interested in it, but need
        # the cookies in order to be able to download the info webpage
        self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id)
        video_info_webpage = self._download_webpage(
            'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
            note=u'Downloading video info page')
        # Get flv info
        flv_info_webpage = self._download_webpage(
            u'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
            video_id, u'Downloading flv info')
        video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
        # Start extracting information
        video_info = xml.etree.ElementTree.fromstring(video_info_webpage)
        video_title = video_info.find('.//title').text
        video_extension = video_info.find('.//movie_type').text
        video_format = video_extension.upper()
        video_thumbnail = video_info.find('.//thumbnail_url').text
        video_description = video_info.find('.//description').text
        video_uploader_id = video_info.find('.//user_id').text
        video_upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0])
        video_view_count = video_info.find('.//view_counter').text
        video_webpage_url = video_info.find('.//watch_url').text
        # uploader
        video_uploader = video_uploader_id
        url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id
        try:
            user_info_webpage = self._download_webpage(
                url, video_id, note=u'Downloading user information')
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
            self._downloader.report_warning(u'Unable to download user info webpage: %s' % compat_str(err))
        else:
            user_info = xml.etree.ElementTree.fromstring(user_info_webpage)
            video_uploader = user_info.find('.//nickname').text
        return {
            'id':          video_id,
            'url':         video_real_url,
            'title':       video_title,
            'ext':         video_extension,
            'format':      video_format,
            'thumbnail':   video_thumbnail,
            'description': video_description,
            'uploader':    video_uploader,
            'upload_date': video_upload_date,
            'uploader_id': video_uploader_id,
            'view_count':  video_view_count,
            'webpage_url': video_webpage_url,
        }
--- a/youtube_dl/extractor/streamcloud.py
+++ b/youtube_dl/extractor/streamcloud.py
@ -21,6 +21,7 @@ class StreamcloudIE(InfoExtractor):
            u'title': u'youtube-dl test video  \'/\\ ä ↭',
            u'duration': 9,
        },
        u'skip': u'Only available from the EU'
    }
    def _real_extract(self, url):
--- a/youtube_dl/extractor/viki.py
+++ b/youtube_dl/extractor/viki.py
@ -0,0 +1,91 @@
 import re
 from ..utils import (
    unified_strdate,
 )
 from .subtitles import SubtitlesInfoExtractor
 class VikiIE(SubtitlesInfoExtractor):
    IE_NAME = u'viki'
    _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
    _TEST = {
        u'url': u'http://www.viki.com/videos/1023585v-heirs-episode-14',
        u'file': u'1023585v.mp4',
        u'md5': u'a21454021c2646f5433514177e2caa5f',
        u'info_dict': {
            u'title': u'Heirs Episode 14',
            u'uploader': u'SBS',
            u'description': u'md5:c4b17b9626dd4b143dcc4d855ba3474e',
            u'upload_date': u'20131121',
            u'age_limit': 13,
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group(1)
        webpage = self._download_webpage(url, video_id)
        title = self._og_search_title(webpage)
        description = self._og_search_description(webpage)
        thumbnail = self._og_search_thumbnail(webpage)
        uploader = self._html_search_regex(
            r'<strong>Broadcast Network: </strong>\s*([^<]*)<', webpage,
            u'uploader')
        if uploader is not None:
            uploader = uploader.strip()
        rating_str = self._html_search_regex(
            r'<strong>Rating: </strong>\s*([^<]*)<', webpage,
            u'rating information', default='').strip()
        RATINGS = {
            'G': 0,
            'PG': 10,
            'PG-13': 13,
            'R': 16,
            'NC': 18,
        }
        age_limit = RATINGS.get(rating_str)
        info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id
        info_webpage = self._download_webpage(info_url, video_id)
        video_url = self._html_search_regex(
            r'<source[^>]+src="([^"]+)"', info_webpage, u'video URL')
        upload_date_str = self._html_search_regex(
            r'"created_at":"([^"]+)"', info_webpage, u'upload date')
        upload_date = (
            unified_strdate(upload_date_str)
            if upload_date_str is not None
            else None
        )
        # subtitles
        video_subtitles = self.extract_subtitles(video_id, info_webpage)
        if self._downloader.params.get('listsubtitles', False):
            self._list_available_subtitles(video_id, info_webpage)
            return
        return {
            'id': video_id,
            'title': title,
            'url': video_url,
            'description': description,
            'thumbnail': thumbnail,
            'age_limit': age_limit,
            'uploader': uploader,
            'subtitles': video_subtitles,
            'upload_date': upload_date,
        }
    def _get_available_subtitles(self, video_id, info_webpage):
        res = {}
        for sturl in re.findall(r'<track src="([^"]+)"/>'):
            m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl)
            if not m:
                continue
            res[m.group('lang')] = sturl
        return res
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -1510,7 +1510,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
            })
        return results
 class YoutubePlaylistIE(InfoExtractor):
 class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
    IE_DESC = u'YouTube.com playlists'
    _VALID_URL = r"""(?:
                        (?:https?://)?
@ -1526,8 +1526,9 @@ class YoutubePlaylistIE(InfoExtractor):
                     |
                        ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
                     )"""
    _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
    _MAX_RESULTS = 50
    _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
    _MORE_PAGES_INDICATOR = r'data-link-type="next"'
    _VIDEO_RE = r'href="/watch\?v=([0-9A-Za-z_-]{11})&amp;'
    IE_NAME = u'youtube:playlist'
    @classmethod
@ -1535,6 +1536,9 @@ class YoutubePlaylistIE(InfoExtractor):
        """Receives a URL and returns True if suitable for this IE."""
        return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
    def _real_initialize(self):
        self._login()
    def _real_extract(self, url):
        # Extract playlist id
        mobj = re.match(self._VALID_URL, url, re.VERBOSE)
@ -1548,45 +1552,28 @@ class YoutubePlaylistIE(InfoExtractor):
            video_id = query_dict['v'][0]
            if self._downloader.params.get('noplaylist'):
                self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
                return self.url_result('https://www.youtube.com/watch?v=' + video_id, 'Youtube')
                return self.url_result(video_id, 'Youtube', video_id=video_id)
            else:
                self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
        # Download playlist videos from API
        videos = []
        # Extract the video ids from the playlist pages
        ids = []
        for page_num in itertools.count(1):
            start_index = self._MAX_RESULTS * (page_num - 1) + 1
            if start_index >= 1000:
                self._downloader.report_warning(u'Max number of results reached')
                break
            url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
            url = self._TEMPLATE_URL % (playlist_id, page_num)
            page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
            # The ids are duplicated
            new_ids = orderedSet(re.findall(self._VIDEO_RE, page))
            ids.extend(new_ids)
            try:
                response = json.loads(page)
            except ValueError as err:
                raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
            if 'feed' not in response:
                raise ExtractorError(u'Got a malformed response from YouTube API')
            playlist_title = response['feed']['title']['$t']
            if 'entry' not in response['feed']:
                # Number of videos is a multiple of self._MAX_RESULTS
            if re.search(self._MORE_PAGES_INDICATOR, page) is None:
                break
            for entry in response['feed']['entry']:
                index = entry['yt$position']['$t']
                if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
                    videos.append((
                        index,
                        'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
                    ))
        playlist_title = self._og_search_title(page)
        videos = [v[1] for v in sorted(videos)]
        url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
        return [self.playlist_result(url_results, playlist_id, playlist_title)]
        url_results = [self.url_result(vid_id, 'Youtube', video_id=vid_id)
                       for vid_id in ids]
        return self.playlist_result(url_results, playlist_id, playlist_title)
 class YoutubeChannelIE(InfoExtractor):
@ -1640,9 +1627,9 @@ class YoutubeChannelIE(InfoExtractor):
        self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
        urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
        url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
        return [self.playlist_result(url_entries, channel_id)]
        url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
                       for video_id in video_ids]
        return self.playlist_result(url_entries, channel_id)
 class YoutubeUserIE(InfoExtractor):
@ -1706,9 +1693,11 @@ class YoutubeUserIE(InfoExtractor):
            if len(ids_in_page) < self._GDATA_PAGE_SIZE:
                break
        urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
        url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
        return [self.playlist_result(url_results, playlist_title = username)]
        url_results = [
            self.url_result(video_id, 'Youtube', video_id=video_id)
            for video_id in video_ids]
        return self.playlist_result(url_results, playlist_title=username)
 class YoutubeSearchIE(SearchInfoExtractor):
    IE_DESC = u'YouTube.com searches'
@ -1749,7 +1738,8 @@ class YoutubeSearchIE(SearchInfoExtractor):
        if len(video_ids) > n:
            video_ids = video_ids[:n]
        videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
        videos = [self.url_result(video_id, 'Youtube', video_id=video_id)
                  for video_id in video_ids]
        return self.playlist_result(videos, query)
 class YoutubeSearchDateIE(YoutubeSearchIE):
@ -1809,7 +1799,9 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
            feed_html = info['feed_html']
            m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
            ids = orderedSet(m.group(1) for m in m_ids)
            feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
            feed_entries.extend(
                self.url_result(video_id, 'Youtube', video_id=video_id)
                for video_id in ids)
            if info['paging'] is None:
                break
        return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
@ -1834,6 +1826,20 @@ class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
    _PAGING_STEP = 100
    _PERSONAL_FEED = True
 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
    IE_DESC = u'Youtube watch history, "ythistory" keyword (requires authentication)'
    _VALID_URL = u'https?://www\.youtube\.com/feed/history|:ythistory'
    _FEED_NAME = 'history'
    _PERSONAL_FEED = True
    _PLAYLIST_TITLE = u'Youtube Watch History'
    def _real_extract(self, url):
        webpage = self._download_webpage('https://www.youtube.com/feed/history', u'History')
        data_paging = self._search_regex(r'data-paging="(\d+)"', webpage, u'data-paging')
        # The step is actually a ridiculously big number (like 1374343569725646)
        self._PAGING_STEP = int(data_paging)
        return super(YoutubeHistoryIE, self)._real_extract(url)
 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
    IE_NAME = u'youtube:favorites'
    IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
--- a/youtube_dl/update.py
+++ b/youtube_dl/update.py
@ -41,6 +41,7 @@ def rsa_verify(message, signature, key):
    if signature != sha256(message).digest(): return False
    return True
 def update_self(to_screen, verbose):
    """Update the program file with the latest version from the repository"""
@ -82,6 +83,13 @@ def update_self(to_screen, verbose):
        return
    version_id = versions_info['latest']
    def version_tuple(version_str):
        return tuple(map(int, version_str.split('.')))
    if version_tuple(__version__) >= version_tuple(version_id):
        to_screen(u'youtube-dl is up to date (%s)' % __version__)
        return
    to_screen(u'Updating to version ' + version_id + '...')
    version = versions_info['versions'][version_id]
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -12,6 +12,7 @@ import os
 import pipes
 import platform
 import re
 import ssl
 import socket
 import sys
 import traceback
@ -536,12 +537,29 @@ def formatSeconds(secs):
        return '%d' % secs
 def make_HTTPS_handler(opts_no_check_certificate):
    if sys.version_info < (3,2):
        # Python's 2.x handler is very simplistic
        return compat_urllib_request.HTTPSHandler()
    if sys.version_info < (3, 2):
        import httplib
        class HTTPSConnectionV3(httplib.HTTPSConnection):
            def __init__(self, *args, **kwargs):
                httplib.HTTPSConnection.__init__(self, *args, **kwargs)
            def connect(self):
                sock = socket.create_connection((self.host, self.port), self.timeout)
                if self._tunnel_host:
                    self.sock = sock
                    self._tunnel()
                try:
                    self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv3)
                except ssl.SSLError as e:
                    self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
        class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
            def https_open(self, req):
                return self.do_open(HTTPSConnectionV3, req)
        return HTTPSHandlerV3()
    else:
        import ssl
        context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
        context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)
        context.set_default_verify_paths()
        context.verify_mode = (ssl.CERT_NONE
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,2 +1,2 @@
 __version__ = '2013.11.22'
 __version__ = '2013.11.24.1'