You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

102 lines
3.5 KiB

  1. from __future__ import unicode_literals
  2. import re
  3. import json
  4. from .common import InfoExtractor
  5. from ..utils import ExtractorError
  6. class LyndaIE(InfoExtractor):
  7. IE_NAME = 'lynda'
  8. IE_DESC = 'lynda.com videos'
  9. _VALID_URL = r'https?://www\.lynda\.com/[^/]+/[^/]+/\d+/(\d+)-\d\.html'
  10. _TEST = {
  11. 'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
  12. 'file': '114408.mp4',
  13. 'md5': 'ecfc6862da89489161fb9cd5f5a6fac1',
  14. u"info_dict": {
  15. 'title': 'Using the exercise files',
  16. 'duration': 68
  17. }
  18. }
  19. def _real_extract(self, url):
  20. mobj = re.match(self._VALID_URL, url)
  21. video_id = mobj.group(1)
  22. page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id,
  23. video_id, 'Downloading video JSON')
  24. video_json = json.loads(page)
  25. if 'Status' in video_json and video_json['Status'] == 'NotFound':
  26. raise ExtractorError('Video %s does not exist' % video_id, expected=True)
  27. if video_json['HasAccess'] is False:
  28. raise ExtractorError('Video %s is only available for members' % video_id, expected=True)
  29. video_id = video_json['ID']
  30. duration = video_json['DurationInSeconds']
  31. title = video_json['Title']
  32. formats = [{'url': fmt['Url'],
  33. 'ext': fmt['Extension'],
  34. 'width': fmt['Width'],
  35. 'height': fmt['Height'],
  36. 'filesize': fmt['FileSize'],
  37. 'format_id': fmt['Resolution']
  38. } for fmt in video_json['Formats']]
  39. self._sort_formats(formats)
  40. return {
  41. 'id': video_id,
  42. 'title': title,
  43. 'duration': duration,
  44. 'formats': formats
  45. }
  46. class LyndaCourseIE(InfoExtractor):
  47. IE_NAME = 'lynda:course'
  48. IE_DESC = 'lynda.com online courses'
  49. # Course link equals to welcome/introduction video link of same course
  50. # We will recognize it as course link
  51. _VALID_URL = r'https?://(?:www|m)\.lynda\.com/(?P<coursepath>[^/]+/[^/]+/(?P<courseid>\d+))-\d\.html'
  52. def _real_extract(self, url):
  53. mobj = re.match(self._VALID_URL, url)
  54. course_path = mobj.group('coursepath')
  55. course_id = mobj.group('courseid')
  56. page = self._download_webpage('http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
  57. course_id, 'Downloading course JSON')
  58. course_json = json.loads(page)
  59. if 'Status' in course_json and course_json['Status'] == 'NotFound':
  60. raise ExtractorError('Course %s does not exist' % course_id, expected=True)
  61. unaccessible_videos = 0
  62. videos = []
  63. for chapter in course_json['Chapters']:
  64. for video in chapter['Videos']:
  65. if video['HasAccess'] is not True:
  66. unaccessible_videos += 1
  67. continue
  68. videos.append(video['ID'])
  69. if unaccessible_videos > 0:
  70. self._downloader.report_warning('%s videos are only available for members and will not be downloaded' % unaccessible_videos)
  71. entries = [
  72. self.url_result('http://www.lynda.com/%s/%s-4.html' %
  73. (course_path, video_id),
  74. 'Lynda')
  75. for video_id in videos]
  76. course_title = course_json['Title']
  77. return self.playlist_result(entries, course_id, course_title)