Browse Source

[biobiochiletv] Fix extraction, extract m3u8 formats and overall improve (Closes #7314)

master
Sergey M․ 8 years ago
parent
commit
fa023ccb2c
3 changed files with 87 additions and 75 deletions
  1. 1
      youtube_dl/extractor/__init__.py
  2. 86
      youtube_dl/extractor/biobiochiletv.py
  3. 75
      youtube_dl/extractor/biobiotv.py

1
youtube_dl/extractor/__init__.py

@ -72,6 +72,7 @@ from .bet import BetIE
from .bigflix import BigflixIE from .bigflix import BigflixIE
from .bild import BildIE from .bild import BildIE
from .bilibili import BiliBiliIE from .bilibili import BiliBiliIE
from .biobiochiletv import BioBioChileTVIE
from .bleacherreport import ( from .bleacherreport import (
BleacherReportIE, BleacherReportIE,
BleacherReportCMSIE, BleacherReportCMSIE,

86
youtube_dl/extractor/biobiochiletv.py

@ -0,0 +1,86 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import remove_end
class BioBioChileTVIE(InfoExtractor):
_VALID_URL = r'https?://tv\.biobiochile\.cl/notas/(?:[^/]+/)+(?P<id>[^/]+)\.shtml'
_TESTS = [{
'url': 'http://tv.biobiochile.cl/notas/2015/10/21/sobre-camaras-y-camarillas-parlamentarias.shtml',
'md5': '26f51f03cf580265defefb4518faec09',
'info_dict': {
'id': 'sobre-camaras-y-camarillas-parlamentarias',
'ext': 'mp4',
'title': 'Sobre Cámaras y camarillas parlamentarias',
'thumbnail': 're:^https?://.*\.jpg$',
'uploader': 'Fernando Atria',
},
}, {
# different uploader layout
'url': 'http://tv.biobiochile.cl/notas/2016/03/18/natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades.shtml',
'md5': 'edc2e6b58974c46d5b047dea3c539ff3',
'info_dict': {
'id': 'natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades',
'ext': 'mp4',
'title': 'Natalia Valdebenito repasa a diputado Hasbún: Pasó a la categoría de hablar brutalidades',
'thumbnail': 're:^https?://.*\.jpg$',
'uploader': 'Piangella Obrador',
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml',
'only_matching': True,
}, {
'url': 'http://tv.biobiochile.cl/notas/2015/10/21/exclusivo-hector-pinto-formador-de-chupete-revela-version-del-ex-delantero-albo.shtml',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = remove_end(self._og_search_title(webpage), ' - BioBioChile TV')
file_url = self._search_regex(
r'loadFWPlayerVideo\([^,]+,\s*(["\'])(?P<url>.+?)\1',
webpage, 'file url', group='url')
base_url = self._search_regex(
r'file\s*:\s*(["\'])(?P<url>.+?)\1\s*\+\s*fileURL', webpage,
'base url', default='http://unlimited2-cl.digitalproserver.com/bbtv/',
group='url')
formats = self._extract_m3u8_formats(
'%s%s/playlist.m3u8' % (base_url, file_url), video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
f = {
'url': '%s%s' % (base_url, file_url),
'format_id': 'http',
'protocol': 'http',
'preference': 1,
}
if formats:
f_copy = formats[-1].copy()
f_copy.update(f)
f = f_copy
formats.append(f)
self._sort_formats(formats)
thumbnail = self._og_search_thumbnail(webpage)
uploader = self._html_search_regex(
r'<a[^>]+href=["\']https?://busca\.biobiochile\.cl/author[^>]+>(.+?)</a>',
webpage, 'uploader', fatal=False)
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'uploader': uploader,
'formats': formats,
}

75
youtube_dl/extractor/biobiotv.py

@ -1,75 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class BioBioTVIE(InfoExtractor):
_VALID_URL = r'https?://tv\.biobiochile\.cl/notas/(?P<year>\d{4})/\d{2}/\d{2}/(?P<id>[\w-]+)(?:\.shtml)?'
_TESTS = [{
'url': 'http://tv.biobiochile.cl/notas/2015/10/21/sobre-camaras-y-camarillas-parlamentarias.shtml',
'md5': '26f51f03cf580265defefb4518faec09',
'info_dict': {
'id': 'col_c266',
'display_id': 'sobre-camaras-y-camarillas-parlamentarias',
'ext': 'mp4',
'title': 'Sobre Cámaras y camarillas parlamentarias - BioBioChile TV',
'thumbnail': 'http://media.biobiochile.cl/wp-content/uploads/2015/10/atria-2010-730x350.jpg',
'url': 'http://unlimited2-cl.digitalproserver.com/bbtv/2015/col_c266.mp4',
'uploader': 'Fernando Atria',
}
}, {
'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml',
'md5': 'a8c868e6b5f6c17d56873d5633204f84',
'info_dict': {
'id': 'col_c270',
'display_id': 'ninos-transexuales-de-quien-es-la-decision',
'ext': 'mp4',
'title': 'Niños transexuales: ¿De quién es la decisión? - BioBioChile TV',
'thumbnail': 'http://media.biobiochile.cl/wp-content/uploads/2015/10/samantha-2210-730x350.jpg',
'url': 'http://unlimited2-cl.digitalproserver.com/bbtv/2015/col_c270.mp4',
'uploader': 'Samantha Morán',
}
}, {
'url': 'http://tv.biobiochile.cl/notas/2015/10/21/exclusivo-hector-pinto-formador-de-chupete-revela-version-del-ex-delantero-albo.shtml',
'md5': 'c8369b50d42ff0a4f6b969fbd1a7c32d',
'info_dict': {
'id': 'Keno_Pinto',
'display_id': 'exclusivo-hector-pinto-formador-de-chupete-revela-version-del-ex-delantero-albo',
'ext': 'mp4',
'title': 'Exclusivo: Héctor Pinto, formador de “Chupete”, revela versión del ex delantero albo - BioBioChile TV',
'thumbnail': 'http://media.biobiochile.cl/wp-content/uploads/2015/10/pinto-730x350.jpg',
'url': 'http://unlimited2-cl.digitalproserver.com/bbtv/2015/Keno_Pinto.mp4',
'uploader': 'Juan Pablo Echenique',
}
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('id')
year = mobj.group('year')
webpage = self._download_webpage(url, display_id)
title = self._html_search_meta(
'og:title', webpage, 'title', fatal=True)
thumbnail = self._html_search_meta(
'og:image', webpage, 'thumbnail', fatal=True)
video_id = self._html_search_regex(
r'loadFWPlayerVideo\(\"player_0\", \"\d{4}/(.+)\.mp4\"\)', webpage, 'title')
url = 'http://unlimited2-cl.digitalproserver.com/bbtv/' + year + '/' + video_id + '.mp4'
return {
'id': video_id,
'title': title,
'url': url,
'display_id': display_id,
'thumbnail': thumbnail,
'uploader': self._search_regex(r'biobiochile\.cl/author[^"]+"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False),
}
Loading…
Cancel
Save