[massengeschmacktv] Add support for Massengeschmack TV(replaces Fernseh Kritik TV)

7 years ago · cd9ff4ec5b
3 changed files with 78 additions and 52 deletions
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -348,7 +348,6 @@ from .firstpost import FirstpostIE
 from .firsttv import FirstTVIE
 from .fivemin import FiveMinIE
 from .fivetv import FiveTVIE
 from .fktv import FKTVIE
 from .flickr import FlickrIE
 from .flipagram import FlipagramIE
 from .folketinget import FolketingetIE
@ -572,6 +571,7 @@ from .mangomolo import (
    MangomoloLiveIE,
 )
 from .manyvids import ManyVidsIE
 from .massengeschmacktv import MassengeschmackTVIE
 from .matchtv import MatchTVIE
 from .mdr import MDRIE
 from .mediaset import MediasetIE
--- a/youtube_dl/extractor/fktv.py
+++ b/youtube_dl/extractor/fktv.py
@ -1,51 +0,0 @@
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
    clean_html,
    determine_ext,
    js_to_json,
 )
 class FKTVIE(InfoExtractor):
    IE_NAME = 'fernsehkritik.tv'
    _VALID_URL = r'https?://(?:www\.)?fernsehkritik\.tv/folge-(?P<id>[0-9]+)(?:/.*)?'
    _TEST = {
        'url': 'http://fernsehkritik.tv/folge-1',
        'md5': '21f0b0c99bce7d5b524eb1b17b1c6d79',
        'info_dict': {
            'id': '1',
            'ext': 'mp4',
            'title': 'Folge 1 vom 10. April 2007',
            'thumbnail': r're:^https?://.*\.jpg$',
        },
    }
    def _real_extract(self, url):
        episode = self._match_id(url)
        webpage = self._download_webpage(
            'http://fernsehkritik.tv/folge-%s/play' % episode, episode)
        title = clean_html(self._html_search_regex(
            '<h3>([^<]+)</h3>', webpage, 'title'))
        thumbnail = self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False)
        sources = self._parse_json(self._search_regex(r'(?s)MEDIA\s*=\s*(\[.+?\]);', webpage, 'media'), episode, js_to_json)
        formats = []
        for source in sources:
            furl = source.get('src')
            if furl:
                formats.append({
                    'url': furl,
                    'format_id': determine_ext(furl),
                })
        self._sort_formats(formats)
        return {
            'id': episode,
            'title': title,
            'formats': formats,
            'thumbnail': thumbnail,
        }
--- a/youtube_dl/extractor/massengeschmacktv.py
+++ b/youtube_dl/extractor/massengeschmacktv.py
@ -0,0 +1,77 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    clean_html,
    determine_ext,
    int_or_none,
    js_to_json,
    mimetype2ext,
    parse_filesize,
 )
 class MassengeschmackTVIE(InfoExtractor):
    IE_NAME = 'massengeschmack.tv'
    _VALID_URL = r'https?://(?:www\.)?massengeschmack\.tv/play/(?P<id>[^?&#]+)'
    _TEST = {
        'url': 'https://massengeschmack.tv/play/fktv202',
        'md5': 'a9e054db9c2b5a08f0a0527cc201e8d3',
        'info_dict': {
            'id': 'fktv202',
            'ext': 'mp4',
            'title': 'Fernsehkritik-TV - Folge 202',
        },
    }
    def _real_extract(self, url):
        episode = self._match_id(url)
        webpage = self._download_webpage(url, episode)
        title = clean_html(self._html_search_regex(
            '<h3>([^<]+)</h3>', webpage, 'title'))
        thumbnail = self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False)
        sources = self._parse_json(self._search_regex(r'(?s)MEDIA\s*=\s*(\[.+?\]);', webpage, 'media'), episode, js_to_json)
        formats = []
        for source in sources:
            furl = source.get('src')
            if not furl:
                continue
            furl = self._proto_relative_url(furl)
            ext = determine_ext(furl) or mimetype2ext(source.get('type'))
            if ext == 'm3u8':
                formats.extend(self._extract_m3u8_formats(
                    furl, episode, 'mp4', 'm3u8_native',
                    m3u8_id='hls', fatal=False))
            else:
                formats.append({
                    'url': furl,
                    'format_id': determine_ext(furl),
                })
        for (durl, format_id, width, height, filesize) in re.findall(r'''(?x)
                                   <a[^>]+?href="(?P<url>(?:https:)?//[^"]+)".*?
                                   <strong>(?P<format_id>.+?)</strong>.*?
                                   <small>(?:(?P<width>\d+)x(?P<height>\d+))?\s+?\((?P<filesize>[\d,]+\s*[GM]iB)\)</small>
                                ''', webpage):
            formats.append({
                'url': durl,
                'format_id': format_id,
                'width': int_or_none(width),
                'height': int_or_none(height),
                'filesize': parse_filesize(filesize),
                'vcodec': 'none' if format_id.startswith('Audio') else None,
            })
        self._sort_formats(formats, ('width', 'height', 'filesize', 'tbr'))
        return {
            'id': episode,
            'title': title,
            'formats': formats,
            'thumbnail': thumbnail,
        }