Browse Source

Merge remote-tracking branch 'origin/master'

master
Philipp Hagemeister 11 years ago
parent
commit
204da0d3e3
2 changed files with 77 additions and 0 deletions
  1. 1
      youtube_dl/extractor/__init__.py
  2. 76
      youtube_dl/extractor/mit.py

1
youtube_dl/extractor/__init__.py

@ -50,6 +50,7 @@ from .keek import KeekIE
from .liveleak import LiveLeakIE
from .livestream import LivestreamIE
from .metacafe import MetacafeIE
from .mit import TechTVMITIE, MITIE
from .mixcloud import MixcloudIE
from .mtv import MTVIE
from .muzu import MuzuTVIE

76
youtube_dl/extractor/mit.py

@ -0,0 +1,76 @@
import re
import json
from .common import InfoExtractor
from ..utils import (
clean_html,
get_element_by_id,
)
class TechTVMITIE(InfoExtractor):
IE_NAME = u'techtv.mit.edu'
_VALID_URL = r'https?://techtv\.mit\.edu/(videos|embeds)/(?P<id>\d+)'
_TEST = {
u'url': u'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set',
u'file': u'25418.mp4',
u'md5': u'1f8cb3e170d41fd74add04d3c9330e5f',
u'info_dict': {
u'title': u'MIT DNA Learning Center Set',
u'description': u'md5:82313335e8a8a3f243351ba55bc1b474',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(
'http://techtv.mit.edu/videos/%s' % video_id, video_id)
embed_page = self._download_webpage(
'http://techtv.mit.edu/embeds/%s/' % video_id, video_id,
note=u'Downloading embed page')
base_url = self._search_regex(r'ipadUrl: \'(.+?cloudfront.net/)',
embed_page, u'base url')
formats_json = self._search_regex(r'bitrates: (\[.+?\])', embed_page,
u'video formats')
formats = json.loads(formats_json)
formats = sorted(formats, key=lambda f: f['bitrate'])
title = get_element_by_id('edit-title', webpage)
description = clean_html(get_element_by_id('edit-description', webpage))
thumbnail = self._search_regex(r'playlist:.*?url: \'(.+?)\'',
embed_page, u'thumbnail', flags=re.DOTALL)
return {'id': video_id,
'title': title,
'url': base_url + formats[-1]['url'].replace('mp4:', ''),
'ext': 'mp4',
'description': description,
'thumbnail': thumbnail,
}
class MITIE(TechTVMITIE):
IE_NAME = u'video.mit.edu'
_VALID_URL = r'https?://video\.mit\.edu/watch/(?P<title>[^/]+)'
_TEST = {
u'url': u'http://video.mit.edu/watch/the-government-is-profiling-you-13222/',
u'file': u'21783.mp4',
u'md5': u'7db01d5ccc1895fc5010e9c9e13648da',
u'info_dict': {
u'title': u'The Government is Profiling You',
u'description': u'md5:ad5795fe1e1623b73620dbfd47df9afd',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
page_title = mobj.group('title')
webpage = self._download_webpage(url, page_title)
self.to_screen('%s: Extracting %s url' % (page_title, TechTVMITIE.IE_NAME))
embed_url = self._search_regex(r'<iframe .*?src="(.+?)"', webpage,
u'embed url')
return self.url_result(embed_url, ie='TechTVMIT')
Loading…
Cancel
Save