Browse Source

Merge remote-tracking branch 'soult/br'

master
Philipp Hagemeister 10 years ago
parent
commit
1052d2bfec
2 changed files with 83 additions and 0 deletions
  1. 1
      youtube_dl/extractor/__init__.py
  2. 82
      youtube_dl/extractor/br.py

1
youtube_dl/extractor/__init__.py

@ -19,6 +19,7 @@ from .bbccouk import BBCCoUkIE
from .blinkx import BlinkxIE
from .bliptv import BlipTVIE, BlipTVUserIE
from .bloomberg import BloombergIE
from .br import BRIE
from .breakcom import BreakIE
from .brightcove import BrightcoveIE
from .c56 import C56IE

82
youtube_dl/extractor/br.py

@ -0,0 +1,82 @@
# coding: utf-8
from .common import InfoExtractor
class BRIE(InfoExtractor):
IE_DESC = u"Bayerischer Rundfunk Mediathek"
_VALID_URL = r"^https?://(?:www\.)?br\.de/mediathek/video/(?:sendungen/)?(?:[a-z0-9\-]+\.html)$"
_BASE_URL = u"http://www.br.de"
_TESTS = [
{
u"url": u"http://www.br.de/mediathek/video/anselm-gruen-114.html",
u"file": u"2c8d81c5-6fb7-4a74-88d4-e768e5856532.mp4",
u"md5": u"c4f83cf0f023ba5875aba0bf46860df2",
u"info_dict": {
u"title": u"Feiern und Verzichten",
u"description": u"Anselm Grün: Feiern und Verzichten",
u"uploader": u"BR/Birgit Baier",
u"upload_date": u"20140301"
}
}
]
def _real_extract(self, url):
page = self._download_webpage(url, None)
xml_url = self._search_regex(r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/mediathek/video/[a-z0-9/~_.-]+)'}\)\);", page, "XMLURL")
xml = self._download_xml(self._BASE_URL + xml_url, None)
videos = []
for xml_video in xml.findall("video"):
video = {}
video["id"] = xml_video.get("externalId")
video["title"] = xml_video.find("title").text
video["formats"] = self._extract_formats(xml_video.find("assets"))
video["thumbnails"] = self._extract_thumbnails(xml_video.find("teaserImage/variants"))
video["thumbnail"] = video["thumbnails"][0]["url"]
video["description"] = " ".join(xml_video.find("shareTitle").text.splitlines())
video["uploader"] = xml_video.find("author").text
video["upload_date"] = "".join(reversed(xml_video.find("broadcastDate").text.split(".")))
video["webpage_url"] = xml_video.find("permalink").text
videos.append(video)
if len(videos) > 1:
self._downloader.report_warning(u'found multiple videos; please'
u'report this with the video URL to http://yt-dl.org/bug')
return videos[0]
def _extract_formats(self, assets):
vformats = []
for asset in assets.findall("asset"):
if asset.find("downloadUrl") is None:
continue
vformat = {}
vformat["url"] = asset.find("downloadUrl").text
vformat["ext"] = asset.find("mediaType").text
vformat["format_id"] = asset.get("type")
vformat["width"] = int(asset.find("frameWidth").text)
vformat["height"] = int(asset.find("frameHeight").text)
vformat["resolution"] = "%ix%i" % (vformat["width"], vformat["height"])
vformat["tbr"] = int(asset.find("bitrateVideo").text)
vformat["abr"] = int(asset.find("bitrateAudio").text)
vformat["vcodec"] = asset.find("codecVideo").text
vformat["container"] = vformat["ext"]
vformat["filesize"] = int(asset.find("size").text)
vformat["preference"] = vformat["quality"] = -1
vformat["format"] = "%s container with %i Kbps %s" % (vformat["container"], vformat["tbr"], vformat["vcodec"])
vformats.append(vformat)
self._sort_formats(vformats)
return vformats
def _extract_thumbnails(self, variants):
thumbnails = []
for variant in variants.findall("variant"):
thumbnail = {}
thumbnail["url"] = self._BASE_URL + variant.find("url").text
thumbnail["width"] = int(variant.find("width").text)
thumbnail["height"] = int(variant.find("height").text)
thumbnail["resolution"] = "%ix%i" % (thumbnail["width"], thumbnail["height"])
thumbnails.append(thumbnail)
thumbnails.sort(key = lambda x: x["width"] * x["height"], reverse=True)
return thumbnails
Loading…
Cancel
Save