You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

96 lines
3.2 KiB

10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
  1. from __future__ import unicode_literals
  2. import re
  3. from ..utils import (
  4. ExtractorError,
  5. unescapeHTML,
  6. unified_strdate,
  7. US_RATINGS,
  8. )
  9. from .subtitles import SubtitlesInfoExtractor
  10. class VikiIE(SubtitlesInfoExtractor):
  11. IE_NAME = 'viki'
  12. _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
  13. _TEST = {
  14. 'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
  15. 'info_dict': {
  16. 'id': '1023585v',
  17. 'ext': 'mp4',
  18. 'title': 'Heirs Episode 14',
  19. 'uploader': 'SBS',
  20. 'description': 'md5:c4b17b9626dd4b143dcc4d855ba3474e',
  21. 'upload_date': '20131121',
  22. 'age_limit': 13,
  23. },
  24. 'skip': 'Blocked in the US',
  25. }
  26. def _real_extract(self, url):
  27. video_id = self._match_id(url)
  28. webpage = self._download_webpage(url, video_id)
  29. title = self._og_search_title(webpage)
  30. description = self._og_search_description(webpage)
  31. thumbnail = self._og_search_thumbnail(webpage)
  32. uploader_m = re.search(
  33. r'<strong>Broadcast Network: </strong>\s*([^<]*)<', webpage)
  34. if uploader_m is None:
  35. uploader = None
  36. else:
  37. uploader = uploader_m.group(1).strip()
  38. rating_str = self._html_search_regex(
  39. r'<strong>Rating: </strong>\s*([^<]*)<', webpage,
  40. 'rating information', default='').strip()
  41. age_limit = US_RATINGS.get(rating_str)
  42. info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id
  43. info_webpage = self._download_webpage(
  44. info_url, video_id, note='Downloading info page')
  45. if re.match(r'\s*<div\s+class="video-error', info_webpage):
  46. raise ExtractorError(
  47. 'Video %s is blocked from your location.' % video_id,
  48. expected=True)
  49. video_url = self._html_search_regex(
  50. r'<source[^>]+src="([^"]+)"', info_webpage, 'video URL')
  51. upload_date_str = self._html_search_regex(
  52. r'"created_at":"([^"]+)"', info_webpage, 'upload date')
  53. upload_date = (
  54. unified_strdate(upload_date_str)
  55. if upload_date_str is not None
  56. else None
  57. )
  58. # subtitles
  59. video_subtitles = self.extract_subtitles(video_id, info_webpage)
  60. if self._downloader.params.get('listsubtitles', False):
  61. self._list_available_subtitles(video_id, info_webpage)
  62. return
  63. return {
  64. 'id': video_id,
  65. 'title': title,
  66. 'url': video_url,
  67. 'description': description,
  68. 'thumbnail': thumbnail,
  69. 'age_limit': age_limit,
  70. 'uploader': uploader,
  71. 'subtitles': video_subtitles,
  72. 'upload_date': upload_date,
  73. }
  74. def _get_available_subtitles(self, video_id, info_webpage):
  75. res = {}
  76. for sturl_html in re.findall(r'<track src="([^"]+)"/>', info_webpage):
  77. sturl = unescapeHTML(sturl_html)
  78. m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl)
  79. if not m:
  80. continue
  81. res[m.group('lang')] = sturl
  82. return res