[comcarcoff] (#4454)

10 years ago · dc5596ff54
3 changed files with 53 additions and 47 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -48,7 +48,6 @@ from .canalplus import CanalplusIE
 from .canalc2 import Canalc2IE
 from .cbs import CBSIE
 from .cbsnews import CBSNewsIE
 from .ccc import ComCarCoffIE
 from .ceskatelevize import CeskaTelevizeIE
 from .channel9 import Channel9IE
 from .chilloutzone import ChilloutzoneIE
@ -66,6 +65,7 @@ from .cnn import (
 )
 from .collegehumor import CollegeHumorIE
 from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
 from .comcarcoff import ComCarCoffIE
 from .condenast import CondeNastIE
 from .cracked import CrackedIE
 from .criterion import CriterionIE
--- a/youtube_dl/extractor/ccc.py
+++ b/youtube_dl/extractor/ccc.py
@ -1,46 +0,0 @@
 # encoding: utf-8
 import re
 import json
 from .common import InfoExtractor
 from ..utils import (
    unified_strdate,
 )
 class ComCarCoffIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?comediansincarsgettingcoffee\.com/(?P<id>[a-z0-9\-]+)/?'
    _TESTS = [
        {
            'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/',
            'info_dict': {
                'id': 'miranda-sings-happy-thanksgiving-miranda',
                'upload_date': '20141127',
                'title': 'Happy Thanksgiving Miranda',
                'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.',
                'thumbnail': 'http://ccc.crackle.com/images/s5e4_thumb.jpg',
            },
        }
    ]
    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
        full_data = json.loads(self._search_regex(
            r'<script type="application/json" id="videoData">(?P<json>.+?)</script>',
            webpage, 'json'))
        video_id = full_data['activeVideo']['video']
        video_data = full_data['videos'][video_id]
        return {
            'id': video_id,
            'display_id': display_id,
            'title': video_data['title'],
            'description': video_data['description'],
            # XXX: the original datum is a full ISO timestamp... why convert it to a worse format?
            'upload_date': unified_strdate(video_data['pubDate']),
            'thumbnail': video_data['images']['thumb'],
            # XXX: what do we do with video_data['images']['poster']?
            'formats': self._extract_m3u8_formats(video_data['mediaUrl'], video_id),
        }
--- a/youtube_dl/extractor/comcarcoff.py
+++ b/youtube_dl/extractor/comcarcoff.py
@ -0,0 +1,52 @@
 # encoding: utf-8
 import json
 from .common import InfoExtractor
 from ..utils import parse_iso8601
 class ComCarCoffIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?comediansincarsgettingcoffee\.com/(?P<id>[a-z0-9\-]+)/?'
    _TESTS = [{
        'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/',
        'info_dict': {
            'id': 'miranda-sings-happy-thanksgiving-miranda',
            'ext': 'mp4',
            'upload_date': '20141127',
            'timestamp': 1417107600,
            'title': 'Happy Thanksgiving Miranda',
            'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.',
            'thumbnail': 'http://ccc.crackle.com/images/s5e4_thumb.jpg',
        },
        'params': {
            'skip_download': 'requires ffmpeg',
        }
    }]
    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
        full_data = json.loads(self._search_regex(
            r'<script type="application/json" id="videoData">(?P<json>.+?)</script>',
            webpage, 'full data json'))
        video_id = full_data['activeVideo']['video']
        video_data = full_data['videos'][video_id]
        thumbnails = [{
            'url': video_data['images']['thumb'],
        }, {
            'url': video_data['images']['poster'],
        }]
        formats = self._extract_m3u8_formats(
            video_data['mediaUrl'], video_id, ext='mp4')
        return {
            'id': video_id,
            'display_id': display_id,
            'title': video_data['title'],
            'description': video_data.get('description'),
            'timestamp': parse_iso8601(video_data.get('pubDate')),
            'thumbnails': thumbnails,
            'formats': formats,
        }