@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
import re
import re
import json
from .common import InfoExtractor
from .common import InfoExtractor
from ..utils import determine_ext
from ..utils import determine_ext
@ -12,24 +13,25 @@ class HarkIE(InfoExtractor):
u ' file ' : u ' mmbzyhkgny.mp3 ' ,
u ' file ' : u ' mmbzyhkgny.mp3 ' ,
u ' md5 ' : u ' 6783a58491b47b92c7c1af5a77d4cbee ' ,
u ' md5 ' : u ' 6783a58491b47b92c7c1af5a77d4cbee ' ,
u ' info_dict ' : {
u ' info_dict ' : {
u " title " : u " Obama: ' Beyond The Afghan Theater, We Only Target Al Qaeda ' On May 23, 2013 " ,
u ' title ' : u " Obama: ' Beyond The Afghan Theater, We Only Target Al Qaeda ' on May 23, 2013 " ,
u ' description ' : u ' President Barack Obama addressed the nation live on May 23, 2013 in a speech aimed at addressing counter-terrorism policies including the use of drone strikes, detainees at Guantanamo Bay prison facility, and American citizens who are terrorists. ' ,
u ' duration ' : 11 ,
}
}
}
}
def _real_extract ( self , url ) :
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
mobj = re . match ( self . _VALID_URL , url )
video_id = mobj . group ( 1 )
video_id = mobj . group ( 1 )
embed_url = " http://www.hark.com/clips/ %s /homepage_embed " % ( video_id )
webpage = self . _download_webpage ( embed_url , video_id )
final_url = self . _search_regex ( r ' src= " (.+?).mp3 " ' ,
webpage , ' video url ' ) + ' .mp3 '
title = self . _html_search_regex ( r ' <title>(.+?)</title> ' ,
webpage , ' video title ' ) . replace ( ' Sound Clip and Quote - Hark ' , ' ' ) . replace (
' Sound Clip , Quote, MP3, and Ringtone - Hark ' , ' ' )
json_url = " http://www.hark.com/clips/ %s .json " % ( video_id )
info_json = self . _download_webpage ( json_url , video_id )
info = json . loads ( info_json )
final_url = info [ ' url ' ]
return { ' id ' : video_id ,
return { ' id ' : video_id ,
' url ' : final_url ,
' url ' : final_url ,
' title ' : title ,
' title ' : info [ ' name ' ] ,
' ext ' : determine_ext ( final_url ) ,
' ext ' : determine_ext ( final_url ) ,
' description ' : info [ ' description ' ] ,
' thumbnail ' : info [ ' image_original ' ] ,
' duration ' : info [ ' duration ' ] ,
}
}