Browse Source

Merge pull request #1231 from yasoob/master

Added an IE for hark.com
master
Philipp Hagemeister 11 years ago
parent
commit
6dc6302599
2 changed files with 36 additions and 0 deletions
  1. 1
      youtube_dl/extractor/__init__.py
  2. 35
      youtube_dl/extractor/hark.py

1
youtube_dl/extractor/__init__.py

@ -29,6 +29,7 @@ from .gametrailers import GametrailersIE
from .generic import GenericIE
from .googleplus import GooglePlusIE
from .googlesearch import GoogleSearchIE
from .hark import HarkIE
from .hotnewhiphop import HotNewHipHopIE
from .howcast import HowcastIE
from .hypem import HypemIE

35
youtube_dl/extractor/hark.py

@ -0,0 +1,35 @@
# -*- coding: utf-8 -*-
import re
from .common import InfoExtractor
from ..utils import determine_ext
class HarkIE(InfoExtractor):
_VALID_URL = r'https?://www\.hark\.com/clips/(.+?)-.+'
_TEST = {
u'url': u'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013',
u'file': u'mmbzyhkgny.mp3',
u'md5': u'6783a58491b47b92c7c1af5a77d4cbee',
u'info_dict': {
u"title": u"Obama: 'Beyond The Afghan Theater, We Only Target Al Qaeda' On May 23, 2013 ",
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
embed_url = "http://www.hark.com/clips/%s/homepage_embed" %(video_id)
webpage = self._download_webpage(embed_url, video_id)
final_url = self._search_regex(r'src="(.+?).mp3"',
webpage, 'video url')+'.mp3'
title = self._html_search_regex(r'<title>(.+?)</title>',
webpage, 'video title').replace(' Sound Clip and Quote - Hark','').replace(
'Sound Clip , Quote, MP3, and Ringtone - Hark','')
return {'id': video_id,
'url' : final_url,
'title': title,
'ext': determine_ext(final_url),
}
Loading…
Cancel
Save