Browse Source

Merge branch 'master' of github.com:rg3/youtube-dl

master
Sergey M․ 9 years ago
parent
commit
4aa353673b
6 changed files with 121 additions and 23 deletions
  1. 1
      youtube_dl/extractor/__init__.py
  2. 12
      youtube_dl/extractor/channel9.py
  3. 4
      youtube_dl/extractor/criterion.py
  4. 41
      youtube_dl/extractor/fczenit.py
  5. 84
      youtube_dl/extractor/fivemin.py
  6. 2
      youtube_dl/extractor/vimeo.py

1
youtube_dl/extractor/__init__.py

@ -167,6 +167,7 @@ from .extremetube import ExtremeTubeIE
from .facebook import FacebookIE
from .faz import FazIE
from .fc2 import FC2IE
from .fczenit import FczenitIE
from .firstpost import FirstpostIE
from .firsttv import FirstTVIE
from .fivemin import FiveMinIE

12
youtube_dl/extractor/channel9.py

@ -158,7 +158,7 @@ class Channel9IE(InfoExtractor):
def _extract_session_day(self, html):
m = re.search(r'<li class="day">\s*<a href="/Events/[^"]+">(?P<day>[^<]+)</a>\s*</li>', html)
return m.group('day') if m is not None else None
return m.group('day').strip() if m is not None else None
def _extract_session_room(self, html):
m = re.search(r'<li class="room">\s*(?P<room>.+?)\s*</li>', html)
@ -224,12 +224,12 @@ class Channel9IE(InfoExtractor):
if contents is None:
return contents
authors = self._extract_authors(html)
if len(contents) > 1:
raise ExtractorError('Got more than one entry')
result = contents[0]
result['authors'] = self._extract_authors(html)
for content in contents:
content['authors'] = authors
return contents
return result
def _extract_session(self, html, content_path):
contents = self._extract_content(html, content_path)

4
youtube_dl/extractor/criterion.py

@ -27,9 +27,7 @@ class CriterionIE(InfoExtractor):
final_url = self._search_regex(
r'so.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url')
title = self._og_search_title(webpage)
description = self._html_search_regex(
r'<meta name="description" content="(.+?)" />',
webpage, 'video description')
description = self._html_search_meta('description', webpage)
thumbnail = self._search_regex(
r'so.addVariable\("thumbnailURL", "(.+?)"\)\;',
webpage, 'thumbnail url')

41
youtube_dl/extractor/fczenit.py

@ -0,0 +1,41 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class FczenitIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?fc-zenit\.ru/video/gl(?P<id>[0-9]+)'
_TEST = {
'url': 'http://fc-zenit.ru/video/gl6785/',
'md5': '458bacc24549173fe5a5aa29174a5606',
'info_dict': {
'id': '6785',
'ext': 'mp4',
'title': '«Зенит-ТВ»: как Олег Шатов играл против «Урала»',
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_title = self._html_search_regex(r'<div class=\"photoalbum__title\">([^<]+)', webpage, 'title')
bitrates_raw = self._html_search_regex(r'bitrates:.*\n(.*)\]', webpage, 'video URL')
bitrates = re.findall(r'url:.?\'(.+?)\'.*?bitrate:.?([0-9]{3}?)', bitrates_raw)
formats = [{
'url': furl,
'tbr': tbr,
} for furl, tbr in bitrates]
self._sort_formats(formats)
return {
'id': video_id,
'title': video_title,
'formats': formats,
}

84
youtube_dl/extractor/fivemin.py

@ -2,11 +2,15 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urllib_parse,
compat_parse_qs,
compat_urllib_parse_urlparse,
compat_urlparse,
)
from ..utils import (
ExtractorError,
parse_duration,
replace_extension,
)
@ -28,6 +32,7 @@ class FiveMinIE(InfoExtractor):
'id': '518013791',
'ext': 'mp4',
'title': 'iPad Mini with Retina Display Review',
'duration': 177,
},
},
{
@ -38,9 +43,52 @@ class FiveMinIE(InfoExtractor):
'id': '518086247',
'ext': 'mp4',
'title': 'How to Make a Next-Level Fruit Salad',
'duration': 184,
},
},
]
_ERRORS = {
'ErrorVideoNotExist': 'We\'re sorry, but the video you are trying to watch does not exist.',
'ErrorVideoNoLongerAvailable': 'We\'re sorry, but the video you are trying to watch is no longer available.',
'ErrorVideoRejected': 'We\'re sorry, but the video you are trying to watch has been removed.',
'ErrorVideoUserNotGeo': 'We\'re sorry, but the video you are trying to watch cannot be viewed from your current location.',
'ErrorVideoLibraryRestriction': 'We\'re sorry, but the video you are trying to watch is currently unavailable for viewing at this domain.',
'ErrorExposurePermission': 'We\'re sorry, but the video you are trying to watch is currently unavailable for viewing at this domain.',
}
_QUALITIES = {
1: {
'width': 640,
'height': 360,
},
2: {
'width': 854,
'height': 480,
},
4: {
'width': 1280,
'height': 720,
},
8: {
'width': 1920,
'height': 1080,
},
16: {
'width': 640,
'height': 360,
},
32: {
'width': 854,
'height': 480,
},
64: {
'width': 1280,
'height': 720,
},
128: {
'width': 640,
'height': 360,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
@ -59,26 +107,36 @@ class FiveMinIE(InfoExtractor):
'https://syn.5min.com/handlers/SenseHandler.ashx?' + query,
video_id)
if not response['success']:
err_msg = response['errorMessage']
if err_msg == 'ErrorVideoUserNotGeo':
msg = 'Video not available from your location'
else:
msg = 'Aol said: %s' % err_msg
raise ExtractorError(msg, expected=True, video_id=video_id)
raise ExtractorError(
'%s said: %s' % (
self.IE_NAME,
self._ERRORS.get(response['errorMessage'], response['errorMessage'])),
expected=True)
info = response['binding'][0]
second_id = compat_str(int(video_id[:-2]) + 1)
formats = []
for quality, height in [(1, 320), (2, 480), (4, 720), (8, 1080)]:
if any(r['ID'] == quality for r in info['Renditions']):
parsed_video_url = compat_urllib_parse_urlparse(compat_parse_qs(
compat_urllib_parse_urlparse(info['EmbededURL']).query)['videoUrl'][0])
for rendition in info['Renditions']:
if rendition['RenditionType'] == 'm3u8':
formats.extend(self._extract_m3u8_formats(rendition['Url'], video_id, m3u8_id='hls'))
elif rendition['RenditionType'] == 'aac':
continue
else:
rendition_url = compat_urlparse.urlunparse(parsed_video_url._replace(path=replace_extension(parsed_video_url.path.replace('//', '/%s/' % rendition['ID']), rendition['RenditionType'])))
quality = self._QUALITIES.get(rendition['ID'], {})
formats.append({
'format_id': compat_str(quality),
'url': 'http://avideos.5min.com/%s/%s/%s_%s.mp4' % (second_id[-3:], second_id, video_id, quality),
'height': height,
'format_id': '%s-%d' % (rendition['RenditionType'], rendition['ID']),
'url': rendition_url,
'width': quality.get('width'),
'height': quality.get('height'),
})
self._sort_formats(formats)
return {
'id': video_id,
'title': info['Title'],
'thumbnail': info.get('ThumbURL'),
'duration': parse_duration(info.get('Duration')),
'formats': formats,
}

2
youtube_dl/extractor/vimeo.py

@ -212,7 +212,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
url = url.replace('http://', 'https://')
password_request = compat_urllib_request.Request(url + '/password', data)
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
password_request.add_header('Cookie', 'clip_v=1; vuid=%s' % vuid)
password_request.add_header('Cookie', 'clip_test2=1; vuid=%s' % vuid)
password_request.add_header('Referer', url)
return self._download_webpage(
password_request, video_id,

Loading…
Cancel
Save