Browse Source

[thisamericanlife] Add a new extractor

master
Eric Wong 9 years ago
committed by Sergey M․
parent
commit
2a46a27e6c
2 changed files with 33 additions and 0 deletions
  1. 1
      youtube_dl/extractor/__init__.py
  2. 32
      youtube_dl/extractor/thisamericanlife.py

1
youtube_dl/extractor/__init__.py

@ -569,6 +569,7 @@ from .tf1 import TF1IE
from .theonion import TheOnionIE
from .theplatform import ThePlatformIE
from .thesixtyone import TheSixtyOneIE
from .thisamericanlife import ThisAmericanLifeIE
from .thisav import ThisAVIE
from .tinypic import TinyPicIE
from .tlc import TlcIE, TlcDeIE

32
youtube_dl/extractor/thisamericanlife.py

@ -0,0 +1,32 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class ThisAmericanLifeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?thisamericanlife\.org/radio-archives/episode/(?P<id>\d+)'
_TEST = {
'url': 'http://www.thisamericanlife.org/radio-archives/episode/487/harper-high-school-part-one',
'md5': '5cda28076c9f9d1fd0b0f5cff5959948',
'info_dict': {
'id': '487',
'title': '487: Harper High School, Part One',
'url' : 'http://stream.thisamericanlife.org/487/stream/487_64k.m3u8',
'ext': 'aac',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<h1[^>]*>(.*?)</h1>', webpage, 'title')
media_url = 'http://stream.thisamericanlife.org/' + video_id + '/stream/' + video_id + '_64k.m3u8'
return {
'id': video_id,
'title': title,
'url': media_url,
'ext': 'aac',
}
Loading…
Cancel
Save