Sergey M․
6 years ago
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D
3 changed files with 134 additions and 92 deletions
Split View
Diff Options
-
2youtube_dl/extractor/extractors.py
-
133youtube_dl/extractor/tvnet.py
-
91youtube_dl/extractor/vtv.py
@ -0,0 +1,133 @@ |
|||
# coding: utf-8 |
|||
from __future__ import unicode_literals |
|||
|
|||
import re |
|||
|
|||
from .common import InfoExtractor |
|||
from ..compat import compat_str |
|||
from ..utils import ( |
|||
int_or_none, |
|||
unescapeHTML, |
|||
) |
|||
|
|||
|
|||
class TVNetIE(InfoExtractor): |
|||
_VALID_URL = r'https?://(?:[^/]+)\.tvnet\.gov\.vn/[^/]+/(?P<id>[0-9]+)' |
|||
_TESTS = [{ |
|||
# video |
|||
'url': 'http://de.tvnet.gov.vn/video/109788/vtv1---bac-tuyet-tai-lao-cai-va-ha-giang/tin-nong-24h', |
|||
'md5': 'b4d7abe0252c9b47774760b7519c7558', |
|||
'info_dict': { |
|||
'id': '109788', |
|||
'ext': 'mp4', |
|||
'title': 'VTV1 - Bắc tuyết tại Lào Cai và Hà Giang', |
|||
'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)', |
|||
'is_live': False, |
|||
'view_count': int, |
|||
}, |
|||
}, { |
|||
# audio |
|||
'url': 'http://vn.tvnet.gov.vn/radio/27017/vov1---ban-tin-chieu-10062018/doi-song-va-xa-hoi', |
|||
'md5': 'b5875ce9b0a2eecde029216d0e6db2ae', |
|||
'info_dict': { |
|||
'id': '27017', |
|||
'ext': 'm4a', |
|||
'title': 'VOV1 - Bản tin chiều (10/06/2018)', |
|||
'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)', |
|||
'is_live': False, |
|||
}, |
|||
}, { |
|||
# live stream |
|||
'url': 'http://us.tvnet.gov.vn/kenh-truyen-hinh/1011/vtv1', |
|||
'info_dict': { |
|||
'id': '1011', |
|||
'ext': 'mp4', |
|||
'title': r're:^VTV1 \| LiveTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', |
|||
'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)', |
|||
'is_live': True, |
|||
}, |
|||
'params': { |
|||
'skip_download': True, |
|||
}, |
|||
}, { |
|||
# radio live stream |
|||
'url': 'http://vn.tvnet.gov.vn/kenh-truyen-hinh/1014', |
|||
'info_dict': { |
|||
'id': '1014', |
|||
'ext': 'm4a', |
|||
'title': r're:VOV1 \| LiveTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', |
|||
'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)', |
|||
'is_live': True, |
|||
}, |
|||
'params': { |
|||
'skip_download': True, |
|||
}, |
|||
}] |
|||
|
|||
def _real_extract(self, url): |
|||
video_id = self._match_id(url) |
|||
|
|||
webpage = self._download_webpage(url, video_id) |
|||
|
|||
title = self._og_search_title( |
|||
webpage, default=None) or self._html_search_meta( |
|||
'title', webpage, default=None) or self._search_regex( |
|||
r'<title>([^<]+)<', webpage, 'title') |
|||
title = re.sub(r'\s*-\s*TV Net\s*$', '', title) |
|||
|
|||
if '/video/' in url or '/radio/' in url: |
|||
is_live = False |
|||
elif '/kenh-truyen-hinh/' in url: |
|||
is_live = True |
|||
else: |
|||
is_live = None |
|||
|
|||
data_file = unescapeHTML(self._search_regex( |
|||
r'data-file=(["\'])(?P<url>(?:https?:)?//.+?)\1', webpage, |
|||
'data file', group='url')) |
|||
|
|||
stream_urls = set() |
|||
formats = [] |
|||
for stream in self._download_json(data_file, video_id): |
|||
if not isinstance(stream, dict): |
|||
continue |
|||
stream_url = stream.get('url') |
|||
if (stream_url in stream_urls or not stream_url or |
|||
not isinstance(stream_url, compat_str)): |
|||
continue |
|||
stream_urls.add(stream_url) |
|||
formats.extend(self._extract_m3u8_formats( |
|||
stream_url, video_id, 'mp4', |
|||
entry_protocol='m3u8' if is_live else 'm3u8_native', |
|||
m3u8_id='hls', fatal=False)) |
|||
self._sort_formats(formats) |
|||
|
|||
# better support for radio streams |
|||
if title.startswith('VOV'): |
|||
for f in formats: |
|||
f.update({ |
|||
'ext': 'm4a', |
|||
'vcodec': 'none', |
|||
}) |
|||
|
|||
thumbnail = self._og_search_thumbnail( |
|||
webpage, default=None) or unescapeHTML( |
|||
self._search_regex( |
|||
r'data-image=(["\'])(?P<url>(?:https?:)?//.+?)\1', webpage, |
|||
'thumbnail', default=None, group='url')) |
|||
|
|||
if is_live: |
|||
title = self._live_title(title) |
|||
|
|||
view_count = int_or_none(self._search_regex( |
|||
r'(?s)<div[^>]+\bclass=["\'].*?view-count[^>]+>.*?(\d+).*?</div>', |
|||
webpage, 'view count', default=None)) |
|||
|
|||
return { |
|||
'id': video_id, |
|||
'title': title, |
|||
'thumbnail': thumbnail, |
|||
'is_live': is_live, |
|||
'view_count': view_count, |
|||
'formats': formats, |
|||
} |
@ -1,91 +0,0 @@ |
|||
# coding: utf-8 |
|||
from __future__ import unicode_literals |
|||
|
|||
from .common import InfoExtractor |
|||
|
|||
import re |
|||
|
|||
from ..utils import extract_attributes |
|||
|
|||
class VTVIE(InfoExtractor): |
|||
_VALID_URL = r'https?://(au|ca|cz|de|jp|kr|tw|us|vn)\.tvnet\.gov\.vn/[^/]*/(?P<id>[0-9]+)/?' |
|||
_TESTS = [{ |
|||
# Livestream. Channel: VTV 1 |
|||
'url': 'http://us.tvnet.gov.vn/kenh-truyen-hinh/1011/vtv1', |
|||
'info_dict': { |
|||
'id': '1011', |
|||
'ext': 'mp4', |
|||
'title': r're:^VTV1 | LiveTV - TV Net [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', |
|||
'thumbnail': r're:https?://.*\.png$', |
|||
} |
|||
}, { |
|||
# Downloading a video. |
|||
'url': 'http://de.tvnet.gov.vn/video/109788/vtv1---bac-tuyet-tai-lao-cai-va-ha-giang/tin-nong-24h', |
|||
'md5': '5263c63d738569ed507980f1e49ebc03', |
|||
'info_dict': { |
|||
'id': '109788', |
|||
'ext': 'mp4', |
|||
'title': 'VTV1 - Bắc tuyết tại Lào Cai và Hà Giang - TV Net', |
|||
'thumbnail': r're:https?://.*\.JPG$', |
|||
} |
|||
}, { |
|||
# Radio live stream. Channel: VOV 1 |
|||
'url': 'http://vn.tvnet.gov.vn/kenh-truyen-hinh/1014', |
|||
'info_dict': { |
|||
'id': '1014', |
|||
'ext': 'm4a', |
|||
'vcodec': 'none', |
|||
'title': r're:VOV1 | LiveTV - TV Net [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', |
|||
'thumbnail': r're:https?://.*\.png$', |
|||
} |
|||
|
|||
}] |
|||
|
|||
def _real_extract(self, url): |
|||
video_id = self._match_id(url) |
|||
webpage = self._download_webpage(url, video_id) |
|||
|
|||
title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title', default=None, fatal=False) |
|||
if title is None: |
|||
title = self._og_search_title(webpage) |
|||
title.strip() |
|||
|
|||
mediaplayer_div = self._search_regex(r'(<div[^>]*id="mediaplayer"[^>]*>)', webpage, 'mediaplayer element') |
|||
mediaplayer_div_attributes = extract_attributes(mediaplayer_div) |
|||
|
|||
thumbnail = mediaplayer_div_attributes.get("data-image") |
|||
|
|||
json_url = mediaplayer_div_attributes["data-file"] |
|||
video_streams = self._download_json(json_url, video_id) |
|||
|
|||
|
|||
# get any working playlist from streams. Currently there's 2 and the first always works, |
|||
# but you never know in the future |
|||
for stream in video_streams: |
|||
formats = self._extract_m3u8_formats(stream.get("url"), video_id, ext="mp4", fatal=False) |
|||
if formats: |
|||
break |
|||
|
|||
# better support radio streams |
|||
if title.startswith("VOV"): |
|||
for f in formats: |
|||
f["ext"] = "m4a" |
|||
f["vcodec"] = "none" |
|||
|
|||
if "/video/" in url or "/radio/" in url: |
|||
is_live = False |
|||
elif "/kenh-truyen-hinh/" in url: |
|||
is_live = True |
|||
else: |
|||
is_live = None |
|||
|
|||
if is_live: |
|||
title = self._live_title(title) |
|||
|
|||
return { |
|||
'id': video_id, |
|||
'title': title, |
|||
'thumbnail': thumbnail, |
|||
'formats': formats, |
|||
'is_live': is_live, |
|||
} |
Write
Preview
Loading…
Cancel
Save