2 changed files with 141 additions and 0 deletions
Split View
Diff Options
@ -0,0 +1,140 @@ |
|||
# coding: utf-8 |
|||
from __future__ import unicode_literals |
|||
|
|||
import re |
|||
|
|||
from .common import InfoExtractor |
|||
from ..utils import ( |
|||
clean_html, |
|||
determine_ext, |
|||
ExtractorError, |
|||
float_or_none, |
|||
get_element_by_class, |
|||
get_element_by_id, |
|||
parse_duration, |
|||
remove_end, |
|||
urlencode_postdata, |
|||
urljoin, |
|||
) |
|||
|
|||
|
|||
class TeamTreeHouseIE(InfoExtractor): |
|||
_VALID_URL = r'https?://(?:www\.)?teamtreehouse\.com/library/(?P<id>[^/]+)' |
|||
_TESTS = [{ |
|||
# Course |
|||
'url': 'https://teamtreehouse.com/library/introduction-to-user-authentication-in-php', |
|||
'info_dict': { |
|||
'id': 'introduction-to-user-authentication-in-php', |
|||
'title': 'Introduction to User Authentication in PHP', |
|||
'description': 'md5:405d7b4287a159b27ddf30ca72b5b053', |
|||
}, |
|||
'playlist_mincount': 24, |
|||
}, { |
|||
# WorkShop |
|||
'url': 'https://teamtreehouse.com/library/deploying-a-react-app', |
|||
'info_dict': { |
|||
'id': 'deploying-a-react-app', |
|||
'title': 'Deploying a React App', |
|||
'description': 'md5:10a82e3ddff18c14ac13581c9b8e5921', |
|||
}, |
|||
'playlist_mincount': 4, |
|||
}, { |
|||
# Video |
|||
'url': 'https://teamtreehouse.com/library/application-overview-2', |
|||
'info_dict': { |
|||
'id': 'application-overview-2', |
|||
'ext': 'mp4', |
|||
'title': 'Application Overview', |
|||
'description': 'md5:4b0a234385c27140a4378de5f1e15127', |
|||
}, |
|||
'expected_warnings': ['This is just a preview'], |
|||
}] |
|||
_NETRC_MACHINE = 'teamtreehouse' |
|||
|
|||
def _real_initialize(self): |
|||
email, password = self._get_login_info() |
|||
if email is None: |
|||
return |
|||
|
|||
signin_page = self._download_webpage( |
|||
'https://teamtreehouse.com/signin', |
|||
None, 'Downloading signin page') |
|||
data = self._form_hidden_inputs('new_user_session', signin_page) |
|||
data.update({ |
|||
'user_session[email]': email, |
|||
'user_session[password]': password, |
|||
}) |
|||
error_message = get_element_by_class('error-message', self._download_webpage( |
|||
'https://teamtreehouse.com/person_session', |
|||
None, 'Logging in', data=urlencode_postdata(data))) |
|||
if error_message: |
|||
raise ExtractorError(clean_html(error_message), expected=True) |
|||
|
|||
def _real_extract(self, url): |
|||
display_id = self._match_id(url) |
|||
webpage = self._download_webpage(url, display_id) |
|||
title = self._html_search_meta(['og:title', 'twitter:title'], webpage) |
|||
description = self._html_search_meta( |
|||
['description', 'og:description', 'twitter:description'], webpage) |
|||
entries = self._parse_html5_media_entries(url, webpage, display_id) |
|||
if entries: |
|||
info = entries[0] |
|||
|
|||
for subtitles in info.get('subtitles', {}).values(): |
|||
for subtitle in subtitles: |
|||
subtitle['ext'] = determine_ext(subtitle['url'], 'srt') |
|||
|
|||
is_preview = 'data-preview="true"' in webpage |
|||
if is_preview: |
|||
self.report_warning( |
|||
'This is just a preview. You need to be signed in with a Basic account to download the entire video.', display_id) |
|||
duration = 30 |
|||
else: |
|||
duration = float_or_none(self._search_regex( |
|||
r'data-duration="(\d+)"', webpage, 'duration'), 1000) |
|||
if not duration: |
|||
duration = parse_duration(get_element_by_id( |
|||
'video-duration', webpage)) |
|||
|
|||
info.update({ |
|||
'id': display_id, |
|||
'title': title, |
|||
'description': description, |
|||
'duration': duration, |
|||
}) |
|||
return info |
|||
else: |
|||
def extract_urls(html, extract_info=None): |
|||
for path in re.findall(r'<a[^>]+href="([^"]+)"', html): |
|||
page_url = urljoin(url, path) |
|||
entry = { |
|||
'_type': 'url_transparent', |
|||
'id': self._match_id(page_url), |
|||
'url': page_url, |
|||
'id_key': self.ie_key(), |
|||
} |
|||
if extract_info: |
|||
entry.update(extract_info) |
|||
entries.append(entry) |
|||
|
|||
workshop_videos = self._search_regex( |
|||
r'(?s)<ul[^>]+id="workshop-videos"[^>]*>(.+?)</ul>', |
|||
webpage, 'workshop videos', default=None) |
|||
if workshop_videos: |
|||
extract_urls(workshop_videos) |
|||
else: |
|||
stages_path = self._search_regex( |
|||
r'(?s)<div[^>]+id="syllabus-stages"[^>]+data-url="([^"]+)"', |
|||
webpage, 'stages path') |
|||
if stages_path: |
|||
stages_page = self._download_webpage( |
|||
urljoin(url, stages_path), display_id, 'Downloading stages page') |
|||
for chapter_number, (chapter, steps_list) in enumerate(re.findall(r'(?s)<h2[^>]*>\s*(.+?)\s*</h2>.+?<ul[^>]*>(.+?)</ul>', stages_page), 1): |
|||
extract_urls(steps_list, { |
|||
'chapter': chapter, |
|||
'chapter_number': chapter_number, |
|||
}) |
|||
title = remove_end(title, ' Course') |
|||
|
|||
return self.playlist_result( |
|||
entries, display_id, title, description) |
Write
Preview
Loading…
Cancel
Save