Browse Source

Add an extractor for NBC news (closes #1320)

master
Jaime Marquínez Ferrándiz 11 years ago
parent
commit
0bc56fa66a
2 changed files with 34 additions and 0 deletions
  1. 1
      youtube_dl/extractor/__init__.py
  2. 33
      youtube_dl/extractor/nbc.py

1
youtube_dl/extractor/__init__.py

@ -54,6 +54,7 @@ from .muzu import MuzuTVIE
from .myspass import MySpassIE from .myspass import MySpassIE
from .myvideo import MyVideoIE from .myvideo import MyVideoIE
from .nba import NBAIE from .nba import NBAIE
from .nbc import NBCNewsIE
from .ooyala import OoyalaIE from .ooyala import OoyalaIE
from .pbs import PBSIE from .pbs import PBSIE
from .photobucket import PhotobucketIE from .photobucket import PhotobucketIE

33
youtube_dl/extractor/nbc.py

@ -0,0 +1,33 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import find_xpath_attr, compat_str
class NBCNewsIE(InfoExtractor):
_VALID_URL = r'https?://www\.nbcnews\.com/video/.+?/(?P<id>\d+)'
_TEST = {
u'url': u'http://www.nbcnews.com/video/nbc-news/52753292',
u'file': u'52753292.flv',
u'md5': u'47abaac93c6eaf9ad37ee6c4463a5179',
u'info_dict': {
u'title': u'Crew emerges after four-month Mars food study',
u'description': u'md5:24e632ffac72b35f8b67a12d1b6ddfc1',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
info_xml = self._download_webpage('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')).find('video')
return {'id': video_id,
'title': info.find('headline').text,
'ext': 'flv',
'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text,
'description': compat_str(info.find('caption').text),
'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
}
Loading…
Cancel
Save