You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

98 lines
3.3 KiB

10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
  1. from __future__ import unicode_literals
  2. import re
  3. from ..utils import (
  4. ExtractorError,
  5. unescapeHTML,
  6. unified_strdate,
  7. US_RATINGS,
  8. )
  9. from .subtitles import SubtitlesInfoExtractor
  10. class VikiIE(SubtitlesInfoExtractor):
  11. IE_NAME = 'viki'
  12. _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
  13. _TEST = {
  14. 'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
  15. 'md5': 'a21454021c2646f5433514177e2caa5f',
  16. 'info_dict': {
  17. 'id': '1023585v',
  18. 'ext': 'mp4',
  19. 'title': 'Heirs Episode 14',
  20. 'uploader': 'SBS',
  21. 'description': 'md5:c4b17b9626dd4b143dcc4d855ba3474e',
  22. 'upload_date': '20131121',
  23. 'age_limit': 13,
  24. },
  25. 'skip': 'Blocked in the US',
  26. }
  27. def _real_extract(self, url):
  28. mobj = re.match(self._VALID_URL, url)
  29. video_id = mobj.group(1)
  30. webpage = self._download_webpage(url, video_id)
  31. title = self._og_search_title(webpage)
  32. description = self._og_search_description(webpage)
  33. thumbnail = self._og_search_thumbnail(webpage)
  34. uploader_m = re.search(
  35. r'<strong>Broadcast Network: </strong>\s*([^<]*)<', webpage)
  36. if uploader_m is None:
  37. uploader = None
  38. else:
  39. uploader = uploader_m.group(1).strip()
  40. rating_str = self._html_search_regex(
  41. r'<strong>Rating: </strong>\s*([^<]*)<', webpage,
  42. 'rating information', default='').strip()
  43. age_limit = US_RATINGS.get(rating_str)
  44. info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id
  45. info_webpage = self._download_webpage(
  46. info_url, video_id, note='Downloading info page')
  47. if re.match(r'\s*<div\s+class="video-error', info_webpage):
  48. raise ExtractorError(
  49. 'Video %s is blocked from your location.' % video_id,
  50. expected=True)
  51. video_url = self._html_search_regex(
  52. r'<source[^>]+src="([^"]+)"', info_webpage, 'video URL')
  53. upload_date_str = self._html_search_regex(
  54. r'"created_at":"([^"]+)"', info_webpage, 'upload date')
  55. upload_date = (
  56. unified_strdate(upload_date_str)
  57. if upload_date_str is not None
  58. else None
  59. )
  60. # subtitles
  61. video_subtitles = self.extract_subtitles(video_id, info_webpage)
  62. if self._downloader.params.get('listsubtitles', False):
  63. self._list_available_subtitles(video_id, info_webpage)
  64. return
  65. return {
  66. 'id': video_id,
  67. 'title': title,
  68. 'url': video_url,
  69. 'description': description,
  70. 'thumbnail': thumbnail,
  71. 'age_limit': age_limit,
  72. 'uploader': uploader,
  73. 'subtitles': video_subtitles,
  74. 'upload_date': upload_date,
  75. }
  76. def _get_available_subtitles(self, video_id, info_webpage):
  77. res = {}
  78. for sturl_html in re.findall(r'<track src="([^"]+)"/>', info_webpage):
  79. sturl = unescapeHTML(sturl_html)
  80. m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl)
  81. if not m:
  82. continue
  83. res[m.group('lang')] = sturl
  84. return res