You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

97 lines
3.1 KiB

10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
  1. from __future__ import unicode_literals
  2. import re
  3. from ..compat import compat_urlparse
  4. from ..utils import (
  5. ExtractorError,
  6. unescapeHTML,
  7. unified_strdate,
  8. US_RATINGS,
  9. )
  10. from .common import InfoExtractor
  11. class VikiIE(InfoExtractor):
  12. IE_NAME = 'viki'
  13. _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
  14. _TEST = {
  15. 'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
  16. 'info_dict': {
  17. 'id': '1023585v',
  18. 'ext': 'mp4',
  19. 'title': 'Heirs Episode 14',
  20. 'uploader': 'SBS',
  21. 'description': 'md5:c4b17b9626dd4b143dcc4d855ba3474e',
  22. 'upload_date': '20131121',
  23. 'age_limit': 13,
  24. },
  25. 'skip': 'Blocked in the US',
  26. }
  27. def _real_extract(self, url):
  28. video_id = self._match_id(url)
  29. webpage = self._download_webpage(url, video_id)
  30. title = self._og_search_title(webpage)
  31. description = self._og_search_description(webpage)
  32. thumbnail = self._og_search_thumbnail(webpage)
  33. uploader_m = re.search(
  34. r'<strong>Broadcast Network: </strong>\s*([^<]*)<', webpage)
  35. if uploader_m is None:
  36. uploader = None
  37. else:
  38. uploader = uploader_m.group(1).strip()
  39. rating_str = self._html_search_regex(
  40. r'<strong>Rating: </strong>\s*([^<]*)<', webpage,
  41. 'rating information', default='').strip()
  42. age_limit = US_RATINGS.get(rating_str)
  43. info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id
  44. info_webpage = self._download_webpage(
  45. info_url, video_id, note='Downloading info page')
  46. if re.match(r'\s*<div\s+class="video-error', info_webpage):
  47. raise ExtractorError(
  48. 'Video %s is blocked from your location.' % video_id,
  49. expected=True)
  50. video_url = self._html_search_regex(
  51. r'<source[^>]+src="([^"]+)"', info_webpage, 'video URL')
  52. upload_date_str = self._html_search_regex(
  53. r'"created_at":"([^"]+)"', info_webpage, 'upload date')
  54. upload_date = (
  55. unified_strdate(upload_date_str)
  56. if upload_date_str is not None
  57. else None
  58. )
  59. # subtitles
  60. video_subtitles = self.extract_subtitles(video_id, info_webpage)
  61. return {
  62. 'id': video_id,
  63. 'title': title,
  64. 'url': video_url,
  65. 'description': description,
  66. 'thumbnail': thumbnail,
  67. 'age_limit': age_limit,
  68. 'uploader': uploader,
  69. 'subtitles': video_subtitles,
  70. 'upload_date': upload_date,
  71. }
  72. def _get_subtitles(self, video_id, info_webpage):
  73. res = {}
  74. for sturl_html in re.findall(r'<track src="([^"]+)"', info_webpage):
  75. sturl = unescapeHTML(sturl_html)
  76. m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl)
  77. if not m:
  78. continue
  79. res[m.group('lang')] = [{
  80. 'url': compat_urlparse.urljoin('http://www.viki.com', sturl),
  81. 'ext': 'vtt',
  82. }]
  83. return res