Browse Source

Merge branch 'jukebox' of https://github.com/remitamine/youtube-dl into remitamine-jukebox

master
remitamine 9 years ago
parent
commit
06e4874c99
4 changed files with 66 additions and 116 deletions
  1. 1
      youtube_dl/extractor/__init__.py
  2. 6
      youtube_dl/extractor/generic.py
  3. 59
      youtube_dl/extractor/jukebox.py
  4. 116
      youtube_dl/extractor/ultimedia.py

1
youtube_dl/extractor/__init__.py

@ -302,7 +302,6 @@ from .izlesene import IzleseneIE
from .jadorecettepub import JadoreCettePubIE from .jadorecettepub import JadoreCettePubIE
from .jeuxvideo import JeuxVideoIE from .jeuxvideo import JeuxVideoIE
from .jove import JoveIE from .jove import JoveIE
from .jukebox import JukeboxIE
from .jwplatform import JWPlatformIE from .jwplatform import JWPlatformIE
from .jpopsukitv import JpopsukiIE from .jpopsukitv import JpopsukiIE
from .kaltura import KalturaIE from .kaltura import KalturaIE

6
youtube_dl/extractor/generic.py

@ -56,6 +56,7 @@ from .mtv import MTVServicesEmbeddedIE
from .pladform import PladformIE from .pladform import PladformIE
from .googledrive import GoogleDriveIE from .googledrive import GoogleDriveIE
from .jwplatform import JWPlatformIE from .jwplatform import JWPlatformIE
from .ultimedia import UltimediaIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -1807,6 +1808,11 @@ class GenericIE(InfoExtractor):
if mobj is not None: if mobj is not None:
return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia') return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
# Look for Ulltimedia embeds
ultimedia_url = UltimediaIE._extract_url(webpage)
if ultimedia_url:
return self.url_result(self._proto_relative_url(ultimedia_url), 'Ultimedia')
# Look for AdobeTVVideo embeds # Look for AdobeTVVideo embeds
mobj = re.search( mobj = re.search(
r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]', r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',

59
youtube_dl/extractor/jukebox.py

@ -1,59 +0,0 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
RegexNotFoundError,
unescapeHTML,
)
class JukeboxIE(InfoExtractor):
_VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<id>[a-z0-9\-]+)\.html'
_TEST = {
'url': 'http://www.jukebox.es/kosheen/videoclip,pride,r303r.html',
'info_dict': {
'id': 'r303r',
'ext': 'flv',
'title': 'Kosheen-En Vivo Pride',
'uploader': 'Kosheen',
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
html = self._download_webpage(url, video_id)
iframe_url = unescapeHTML(self._search_regex(r'<iframe .*src="([^"]*)"', html, 'iframe url'))
iframe_html = self._download_webpage(iframe_url, video_id, 'Downloading iframe')
if re.search(r'class="jkb_waiting"', iframe_html) is not None:
raise ExtractorError('Video is not available(in your country?)!')
self.report_extraction(video_id)
try:
video_url = self._search_regex(r'"config":{"file":"(?P<video_url>http:[^"]+\?mdtk=[0-9]+)"',
iframe_html, 'video url')
video_url = unescapeHTML(video_url).replace('\/', '/')
except RegexNotFoundError:
youtube_url = self._search_regex(
r'config":{"file":"(http:\\/\\/www\.youtube\.com\\/watch\?v=[^"]+)"',
iframe_html, 'youtube url')
youtube_url = unescapeHTML(youtube_url).replace('\/', '/')
self.to_screen('Youtube video detected')
return self.url_result(youtube_url, ie='Youtube')
title = self._html_search_regex(r'<h1 class="inline">([^<]+)</h1>',
html, 'title')
artist = self._html_search_regex(r'<span id="infos_article_artist">([^<]+)</span>',
html, 'artist')
return {
'id': video_id,
'url': video_url,
'title': artist + '-' + title,
'uploader': artist,
}

116
youtube_dl/extractor/ultimedia.py

@ -4,17 +4,30 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlparse
from ..utils import (
ExtractorError,
qualities,
unified_strdate,
clean_html,
)
from ..utils import int_or_none
class UltimediaIE(InfoExtractor): class UltimediaIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ultimedia\.com/default/index/video[^/]+/id/(?P<id>[\d+a-z]+)'
_VALID_URL = r'''(?x)
https?://(?:www\.)?ultimedia\.com/
(?:
deliver/
(?P<embed_type>
generic|
musique
)
(?:/[^/]+)*/
(?:
src|
article
)|
default/index/video
(?P<site_type>
generic|
music
)
/id
)/(?P<id>[\d+a-z]+)'''
_TESTS = [{ _TESTS = [{
# news # news
'url': 'https://www.ultimedia.com/default/index/videogeneric/id/s8uk0r', 'url': 'https://www.ultimedia.com/default/index/videogeneric/id/s8uk0r',
@ -23,9 +36,11 @@ class UltimediaIE(InfoExtractor):
'id': 's8uk0r', 'id': 's8uk0r',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Loi sur la fin de vie: le texte prévoit un renforcement des directives anticipées', 'title': 'Loi sur la fin de vie: le texte prévoit un renforcement des directives anticipées',
'description': 'md5:3e5c8fd65791487333dda5db8aed32af',
'thumbnail': 're:^https?://.*\.jpg', 'thumbnail': 're:^https?://.*\.jpg',
'duration': 74,
'upload_date': '20150317', 'upload_date': '20150317',
'timestamp': 1426604939,
'uploader_id': '3fszv',
}, },
}, { }, {
# music # music
@ -34,72 +49,61 @@ class UltimediaIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': 'xvpfp8', 'id': 'xvpfp8',
'ext': 'mp4', 'ext': 'mp4',
'title': "Two - C'est la vie (Clip)",
'description': 'Two',
'title': 'Two - C\'est La Vie (clip)',
'thumbnail': 're:^https?://.*\.jpg', 'thumbnail': 're:^https?://.*\.jpg',
'duration': 233,
'upload_date': '20150224', 'upload_date': '20150224',
'timestamp': 1424760500,
'uploader_id': '3rfzk',
}, },
}] }]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
@staticmethod
def _extract_url(webpage):
mobj = re.search(
r'<(?:iframe|script)[^>]+src=["\'](?P<url>(?:https?:)?//(?:www\.)?ultimedia\.com/deliver/(?:generic|musique)(?:/[^/]+)*/(?:src|article)/[\d+a-z]+)',
webpage)
if mobj:
return mobj.group('url')
deliver_url = self._proto_relative_url(self._search_regex(
r'<iframe[^>]+src="((?:https?:)?//(?:www\.)?ultimedia\.com/deliver/[^"]+)"',
webpage, 'deliver URL'), compat_urllib_parse_urlparse(url).scheme + ':')
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_type = mobj.group('embed_type') or mobj.group('site_type')
if video_type == 'music':
video_type = 'musique'
deliver_page = self._download_webpage(
deliver_url, video_id, 'Downloading iframe page')
deliver_info = self._download_json(
'http://www.ultimedia.com/deliver/video?video=%s&topic=%s' % (video_id, video_type),
video_id)
if '>This video is currently not available' in deliver_page:
raise ExtractorError(
'Video %s is currently not available' % video_id, expected=True)
yt_id = deliver_info.get('yt_id')
if yt_id:
return self.url_result(yt_id, 'Youtube')
player = self._parse_json(
self._search_regex(
r"jwplayer\('player(?:_temp)?'\)\.setup\(({.+?})\)\.on",
deliver_page, 'player'),
video_id)
jwconf = deliver_info['jwconf']
quality = qualities(['flash', 'html5'])
formats = [] formats = []
for mode in player['modes']:
video_url = mode.get('config', {}).get('file')
if not video_url:
continue
if re.match(r'https?://www\.youtube\.com/.+?', video_url):
return self.url_result(video_url, 'Youtube')
for source in jwconf['playlist'][0]['sources']:
formats.append({ formats.append({
'url': video_url,
'format_id': mode.get('type'),
'quality': quality(mode.get('type')),
'url': source['file'],
'format_id': source.get('label'),
}) })
self._sort_formats(formats)
thumbnail = player.get('image')
title = clean_html((
self._html_search_regex(
r'(?s)<div\s+id="catArticle">.+?</div>(.+?)</h1>',
webpage, 'title', default=None) or
self._search_regex(
r"var\s+nameVideo\s*=\s*'([^']+)'",
deliver_page, 'title')))
description = clean_html(self._html_search_regex(
r'(?s)<span>Description</span>(.+?)</p>', webpage,
'description', fatal=False))
self._sort_formats(formats)
upload_date = unified_strdate(self._search_regex(
r'Ajouté le\s*<span>([^<]+)', webpage,
'upload date', fatal=False))
title = deliver_info['title']
thumbnail = jwconf.get('image')
duration = int_or_none(deliver_info.get('duration'))
timestamp = int_or_none(deliver_info.get('release_time'))
uploader_id = deliver_info.get('owner_id')
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': description,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'upload_date': upload_date,
'duration': duration,
'timestamp': timestamp,
'uploader_id': uploader_id,
'formats': formats, 'formats': formats,
} }
Loading…
Cancel
Save