Browse Source

Merge branch 'MLB' of https://github.com/chaochichen/youtube-dl into chaochichen-MLB

master
Sergey M․ 10 years ago
parent
commit
1aa42fedee
2 changed files with 75 additions and 0 deletions
  1. 1
      youtube_dl/extractor/__init__.py
  2. 74
      youtube_dl/extractor/mlb.py

1
youtube_dl/extractor/__init__.py

@ -170,6 +170,7 @@ from .metacafe import MetacafeIE
from .metacritic import MetacriticIE
from .mit import TechTVMITIE, MITIE, OCWMITIE
from .mixcloud import MixcloudIE
from .mlb import MlbIE
from .mpora import MporaIE
from .mofosex import MofosexIE
from .mooshare import MooshareIE

74
youtube_dl/extractor/mlb.py

@ -0,0 +1,74 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class MlbIE(InfoExtractor):
_VALID_URL = r'http?://m\.mlb\.com/video/topic/[0-9]+/v(?P<id>n?\d+)/.*$'
_TEST = {
'url': 'http://m.mlb.com/video/topic/81536970/v34496663/mianym-stanton-practices-for-the-home-run-derby',
'md5': u'd9c022c10d21f849f49c05ae12a8a7e9',
'info_dict': {
'id': '34496663',
'ext': 'mp4',
'format': 'mp4',
'description': "7/11/14: Giancarlo Stanton practices for the Home Run Derby prior to the game against the Mets",
'title': "Stanton prepares for Derby",
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
title = self._og_search_title(webpage, default=video_id)
description = self._html_search_regex(r'<meta name="description" (?:content|value)="(.*?)"/>', webpage, 'description', fatal=False)
thumbnail = self._html_search_regex(r'<meta itemprop="image" (?:content|value)="(.*?)" />', webpage, 'image', fatal=False)
# use the video_id to find the Media detail XML
id_len = len(video_id)
_mediadetail_url = 'http://m.mlb.com/gen/multimedia/detail/'+video_id[id_len-3]+'/'+video_id[id_len-2]+'/'+video_id[id_len-1]+'/'+video_id+'.xml'
mediadetails = self._download_xml(_mediadetail_url, video_id, "Downloading media detail...")
has1500K = 0
has1200K = 0
has600K = 0
# loop through the list of url's and only get the highest quality MP4 content
for element in mediadetails.findall('url'):
scenario = element.attrib['playback_scenario']
if scenario.startswith(u'FLASH'):
if scenario.startswith(u'FLASH_1800K'):
video_url = element.text
# 1800K is the current highest quality video on MLB.com
break
else:
if scenario.startswith(u'FLASH_1500K'):
video_url = element.text
has1500K = 1
else:
if (scenario.startswith(u'FLASH_1200K') and not has1500K):
video_url = element.text
has1200K = 1
else:
if (scenario.startswith(u'FLASH_600K') and not has1200K):
video_url = element.text
has600K = 1
else:
if (scenario.startswith(u'FLASH_300K') and not has600K):
video_url = element.text
return {
'id': video_id,
'url': video_url,
'extractor': 'mlb',
'webpage_url': url,
'title': title,
'ext': 'mp4',
'format': 'mp4',
'description': description,
'thumbnail': thumbnail,
}
Loading…
Cancel
Save