Merge branch 'master' into subtitles_rework

11 years ago · cf1dd0c59e
12 changed files with 158 additions and 20 deletions
--- a/devscripts/gh-pages/add-version.py
+++ b/devscripts/gh-pages/add-version.py
@ -3,7 +3,8 @@
 import json
 import sys
 import hashlib
 import urllib.request
 import os.path
 if len(sys.argv) <= 1:
    print('Specify the version number as parameter')
@ -25,6 +26,7 @@ filenames = {
    'tar': 'youtube-dl-%s.tar.gz' % version}
 build_dir = os.path.join('..', '..', 'build', version)
 for key, filename in filenames.items():
    url = 'https://yt-dl.org/downloads/%s/%s' % (version, filename)
    fn = os.path.join(build_dir, filename)
    with open(fn, 'rb') as f:
        data = f.read()
--- a/devscripts/gh-pages/update-sites.py
+++ b/devscripts/gh-pages/update-sites.py
@ -14,7 +14,7 @@ def main():
        template = tmplf.read()
    ie_htmls = []
    for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME):
    for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME.lower()):
        ie_html = '<b>{}</b>'.format(ie.IE_NAME)
        try:
            ie_html += ': {}'.format(ie.IE_DESC)
--- a/devscripts/youtube_genalgo.py
+++ b/devscripts/youtube_genalgo.py
@ -23,9 +23,9 @@ tests = [
    # 86 - vfluy6kdb 2013/09/06
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
     "yuioplkjhgfdsazxcvbnm12345678q0QWrRTYUIOELKJHGFD-AZXCVBNM!@#$%^&*()_<+={[|};?/>.S"),
    # 85
    # 85 - vflkuzxcs 2013/09/11
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
     ".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"),
     "T>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOvUY.REWQ0987654321mnbqcxzasdfghjklpoiuytr"),
    # 84 - vflg0g8PQ 2013/08/29 (sporadic)
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
     ">?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWq0987654321mnbvcxzasdfghjklpoiuytr"),
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@ -72,10 +72,13 @@ class TestAllURLsMatching(unittest.TestCase):
        self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361"))
    def test_youtube_extract(self):
        self.assertEqual(YoutubeIE()._extract_id('http://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
        self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
        self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc'), 'BaW_jenozKc')
        self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch_popup?v=BaW_jenozKc'), 'BaW_jenozKc')
        assertExtractId = lambda url, id: self.assertEqual(YoutubeIE()._extract_id(url), id)
        assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
        assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
        assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc')
        assertExtractId('https://www.youtube.com/watch_popup?v=BaW_jenozKc', 'BaW_jenozKc')
        assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
        assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
    def test_no_duplicates(self):
        ies = gen_extractors()
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@ -66,7 +66,7 @@ class FileDownloader(object):
    @staticmethod
    def format_seconds(seconds):
        (mins, secs) = divmod(seconds, 60)
        (hours, eta_mins) = divmod(mins, 60)
        (hours, mins) = divmod(mins, 60)
        if hours > 99:
            return '--:--:--'
        if hours == 0:
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -29,6 +29,10 @@ from .escapist import EscapistIE
 from .exfm import ExfmIE
 from .facebook import FacebookIE
 from .flickr import FlickrIE
 from .francetv import (
    PluzzIE,
    FranceTvInfoIE,
 )
 from .freesound import FreesoundIE
 from .funnyordie import FunnyOrDieIE
 from .gamespot import GameSpotIE
@ -75,6 +79,7 @@ from .roxwel import RoxwelIE
 from .rtlnow import RTLnowIE
 from .sina import SinaIE
 from .slashdot import SlashdotIE
 from .slideshare import SlideshareIE
 from .sohu import SohuIE
 from .soundcloud import SoundcloudIE, SoundcloudSetIE
 from .spiegel import SpiegelIE
--- a/youtube_dl/extractor/canalc2.py
+++ b/youtube_dl/extractor/canalc2.py
@ -5,7 +5,7 @@ from .common import InfoExtractor
 class Canalc2IE(InfoExtractor):
    _IE_NAME = 'canalc2.tv'
    IE_NAME = 'canalc2.tv'
    _VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?idVideo=(\d+)&voir=oui'
    _TEST = {
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@ -0,0 +1,77 @@
 # encoding: utf-8
 import re
 import xml.etree.ElementTree
 from .common import InfoExtractor
 from ..utils import (
    compat_urlparse,
 )
 class FranceTVBaseInfoExtractor(InfoExtractor):
    def _extract_video(self, video_id):
        xml_desc = self._download_webpage(
            'http://www.francetvinfo.fr/appftv/webservices/video/'
            'getInfosOeuvre.php?id-diffusion='
            + video_id, video_id, 'Downloading XML config')
        info = xml.etree.ElementTree.fromstring(xml_desc.encode('utf-8'))
        manifest_url = info.find('videos/video/url').text
        video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8')
        video_url = video_url.replace('/z/', '/i/')
        thumbnail_path = info.find('image').text
        return {'id': video_id,
                'ext': 'mp4',
                'url': video_url,
                'title': info.find('titre').text,
                'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path),
                'description': info.find('synopsis').text,
                }
 class PluzzIE(FranceTVBaseInfoExtractor):
    IE_NAME = u'pluzz.francetv.fr'
    _VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html'
    _TEST = {
        u'url': u'http://pluzz.francetv.fr/videos/allo_rufo_saison5_,88439064.html',
        u'file': u'88439064.mp4',
        u'info_dict': {
            u'title': u'Allô Rufo',
            u'description': u'md5:d909f1ebdf963814b65772aea250400e',
        },
        u'params': {
            u'skip_download': True,
        },
    }
    def _real_extract(self, url):
        title = re.match(self._VALID_URL, url).group(1)
        webpage = self._download_webpage(url, title)
        video_id = self._search_regex(
            r'data-diffusion="(\d+)"', webpage, 'ID')
        return self._extract_video(video_id)
 class FranceTvInfoIE(FranceTVBaseInfoExtractor):
    IE_NAME = u'francetvinfo.fr'
    _VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+).html'
    _TEST = {
        u'url': u'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
        u'file': u'84981923.mp4',
        u'info_dict': {
            u'title': u'Soir 3',
        },
        u'params': {
            u'skip_download': True,
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        page_title = mobj.group('title')
        webpage = self._download_webpage(url, page_title)
        video_id = self._search_regex(r'id-video=(\d+?)"', webpage, u'video id')
        return self._extract_video(video_id)
--- a/youtube_dl/extractor/slideshare.py
+++ b/youtube_dl/extractor/slideshare.py
@ -0,0 +1,47 @@
 import re
 import json
 from .common import InfoExtractor
 from ..utils import (
    compat_urlparse,
    ExtractorError,
 )
 class SlideshareIE(InfoExtractor):
    _VALID_URL = r'https?://www\.slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)'
    _TEST = {
        u'url': u'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity',
        u'file': u'25665706.mp4',
        u'info_dict': {
            u'title': u'Managing Scale and Complexity',
            u'description': u'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix',
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        page_title = mobj.group('title')
        webpage = self._download_webpage(url, page_title)
        slideshare_obj = self._search_regex(
            r'var slideshare_object =  ({.*?}); var user_info =',
            webpage, u'slideshare object')
        info = json.loads(slideshare_obj)
        if info['slideshow']['type'] != u'video':
            raise ExtractorError(u'Webpage type is "%s": only video extraction is supported for Slideshare' % info['slideshow']['type'], expected=True)
        doc = info['doc']
        bucket = info['jsplayer']['video_bucket']
        ext = info['jsplayer']['video_extension']
        video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
        return {
            '_type': 'video',
            'id': info['slideshow']['id'],
            'title': info['slideshow']['title'],
            'ext': ext,
            'url': video_url,
            'thumbnail': info['slideshow']['pin_image_url'],
            'description': self._og_search_description(webpage),
        }
--- a/youtube_dl/extractor/sohu.py
+++ b/youtube_dl/extractor/sohu.py
@ -8,7 +8,7 @@ from ..utils import ExtractorError
 class SohuIE(InfoExtractor):
    _VALID_URL = r'https?://tv\.sohu\.com/\d+?/n(?P<id>\d+)\.shtml.*?'
    _VALID_URL = r'https?://(?P<mytv>my\.)?tv\.sohu\.com/.+?/(?(mytv)|n)(?P<id>\d+)\.shtml.*?'
    _TEST = {
        u'url': u'http://tv.sohu.com/20130724/n382479172.shtml#super',
@ -21,8 +21,11 @@ class SohuIE(InfoExtractor):
    def _real_extract(self, url):
        def _fetch_data(vid_id):
            base_data_url = u'http://hot.vrs.sohu.com/vrs_flash.action?vid='
        def _fetch_data(vid_id, mytv=False):
            if mytv:
                base_data_url = 'http://my.tv.sohu.com/play/videonew.do?vid='
            else:
                base_data_url = u'http://hot.vrs.sohu.com/vrs_flash.action?vid='
            data_url = base_data_url + str(vid_id)
            data_json = self._download_webpage(
                data_url, video_id,
@ -31,15 +34,16 @@ class SohuIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        mytv = mobj.group('mytv') is not None
        webpage = self._download_webpage(url, video_id)
        raw_title = self._html_search_regex(r'(?s)<title>(.+?)</title>',
                                            webpage, u'video title')
        title = raw_title.partition('-')[0].strip()
        vid = self._html_search_regex(r'var vid="(\d+)"', webpage,
        vid = self._html_search_regex(r'var vid ?= ?["\'](\d+)["\']', webpage,
                                      u'video path')
        data = _fetch_data(vid)
        data = _fetch_data(vid, mytv)
        QUALITIES = ('ori', 'super', 'high', 'nor')
        vid_ids = [data['data'][q + 'Vid']
@ -51,7 +55,7 @@ class SohuIE(InfoExtractor):
        # For now, we just pick the highest available quality
        vid_id = vid_ids[-1]
        format_data = data if vid == vid_id else _fetch_data(vid_id)
        format_data = data if vid == vid_id else _fetch_data(vid_id, mytv)
        part_count = format_data['data']['totalBlocks']
        allot = format_data['allot']
        prot = format_data['prot']
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -209,7 +209,7 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
                         |youtu\.be/                                          # just youtu.be/xxxx
                         )
                     )?                                                       # all until now is optional -> you can pass the naked ID
                     ([0-9A-Za-z_-]+)                                         # here is it! the YouTube video ID
                     ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
                     (?(1).+)?                                                # if we found the ID, everything can follow
                     $"""
    _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
@ -484,7 +484,7 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
        elif len(s) == 86:
            return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53]
        elif len(s) == 85:
            return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
            return s[40] + s[82:43:-1] + s[22] + s[42:40:-1] + s[83] + s[39:22:-1] + s[0] + s[21:2:-1]
        elif len(s) == 84:
            return s[81:36:-1] + s[0] + s[35:2:-1]
        elif len(s) == 83:
@ -583,7 +583,7 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
        manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
        formats_urls = _get_urls(manifest)
        for format_url in formats_urls:
            itag = self._search_regex(r'itag%3D(\d+?)/', format_url, 'itag')
            itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
            url_map[itag] = format_url
        return url_map
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,2 +1,2 @@
 __version__ = '2013.09.06.1'
 __version__ = '2013.11.09'