Browse Source

Add VevoIE

master
Jaime Marquínez Ferrándiz 11 years ago
parent
commit
70d1924f8b
3 changed files with 53 additions and 0 deletions
  1. 11
      test/tests.json
  2. 2
      youtube_dl/extractor/__init__.py
  3. 40
      youtube_dl/extractor/vevo.py

11
test/tests.json

@ -649,5 +649,16 @@
"info_dict": {
"title": "When Girls Act Like D-Bags"
}
},
{
"name": "Vevo",
"url": "http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280",
"file": "GB1101300280.mp4",
"md5": "06bea460acb744eab74a9d7dcb4bfd61",
"info_dict": {
"title": "Somebody To Die For",
"upload_date": "20130624",
"uploader": "Hurts"
}
}
]

2
youtube_dl/extractor/__init__.py

@ -44,6 +44,7 @@ from .ted import TEDIE
from .tumblr import TumblrIE
from .ustream import UstreamIE
from .vbox7 import Vbox7IE
from .vevo import VevoIE
from .vimeo import VimeoIE
from .vine import VineIE
from .worldstarhiphop import WorldStarHipHopIE
@ -125,6 +126,7 @@ def gen_extractors():
GametrailersIE(),
StatigramIE(),
BreakIE(),
VevoIE(),
GenericIE()
]

40
youtube_dl/extractor/vevo.py

@ -0,0 +1,40 @@
import re
import json
from .common import InfoExtractor
from ..utils import (
unified_strdate,
ExtractorError,
)
class VevoIE(InfoExtractor):
_VALID_URL = r'http://www.vevo.com/watch/.*?/.*?/(?P<id>.*)$'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
json_url = 'http://www.vevo.com/data/video/%s' % video_id
base_url = 'http://smil.lvl3.vevo.com'
videos_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (base_url, video_id, video_id.lower())
info_json = self._download_webpage(json_url, video_id, u'Downloading json info')
links_webpage = self._download_webpage(videos_url, video_id, u'Downloading videos urls')
self.report_extraction(video_id)
video_info = json.loads(info_json)
m_urls = list(re.finditer(r'<video src="(?P<ext>.*?):(?P<url>.*?)"', links_webpage))
if m_urls is None or len(m_urls) == 0:
raise ExtractorError(u'Unable to extract video url')
# They are sorted from worst to best quality
m_url = m_urls[-1]
video_url = base_url + m_url.group('url')
ext = m_url.group('ext')
return {'url': video_url,
'ext': ext,
'id': video_id,
'title': video_info['title'],
'thumbnail': video_info['img'],
'upload_date': video_info['launchDate'].replace('/',''),
'uploader': video_info['Artists'][0]['title'],
}
Loading…
Cancel
Save