You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

147 lines
5.0 KiB

10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
  1. from __future__ import unicode_literals
  2. import re
  3. from ..compat import (
  4. compat_urlparse,
  5. compat_urllib_request,
  6. )
  7. from ..utils import (
  8. ExtractorError,
  9. unescapeHTML,
  10. unified_strdate,
  11. US_RATINGS,
  12. determine_ext,
  13. mimetype2ext,
  14. )
  15. from .common import InfoExtractor
  16. class VikiIE(InfoExtractor):
  17. IE_NAME = 'viki'
  18. # iPad2
  19. _USER_AGENT = 'Mozilla/5.0(iPad; U; CPU OS 4_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8F191 Safari/6533.18.5'
  20. _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
  21. _TESTS = [{
  22. 'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
  23. 'info_dict': {
  24. 'id': '1023585v',
  25. 'ext': 'mp4',
  26. 'title': 'Heirs Episode 14',
  27. 'uploader': 'SBS',
  28. 'description': 'md5:c4b17b9626dd4b143dcc4d855ba3474e',
  29. 'upload_date': '20131121',
  30. 'age_limit': 13,
  31. },
  32. 'skip': 'Blocked in the US',
  33. }, {
  34. 'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference',
  35. 'md5': 'ca6493e6f0a6ec07da9aa8d6304b4b2c',
  36. 'info_dict': {
  37. 'id': '1067139v',
  38. 'ext': 'mp4',
  39. 'description': 'md5:d70b2f9428f5488321bfe1db10d612ea',
  40. 'upload_date': '20150430',
  41. 'title': '\'The Avengers: Age of Ultron\' Press Conference',
  42. }
  43. }, {
  44. 'url': 'http://www.viki.com/videos/1048879v-ankhon-dekhi',
  45. 'info_dict': {
  46. 'id': '1048879v',
  47. 'ext': 'mp4',
  48. 'upload_date': '20140820',
  49. 'description': 'md5:54ff56d51bdfc7a30441ec967394e91c',
  50. 'title': 'Ankhon Dekhi',
  51. },
  52. 'params': {
  53. # requires ffmpeg
  54. 'skip_download': True,
  55. }
  56. }]
  57. def _real_extract(self, url):
  58. video_id = self._match_id(url)
  59. webpage = self._download_webpage(url, video_id)
  60. title = self._og_search_title(webpage)
  61. description = self._og_search_description(webpage)
  62. thumbnail = self._og_search_thumbnail(webpage)
  63. uploader_m = re.search(
  64. r'<strong>Broadcast Network: </strong>\s*([^<]*)<', webpage)
  65. if uploader_m is None:
  66. uploader = None
  67. else:
  68. uploader = uploader_m.group(1).strip()
  69. rating_str = self._html_search_regex(
  70. r'<strong>Rating: </strong>\s*([^<]*)<', webpage,
  71. 'rating information', default='').strip()
  72. age_limit = US_RATINGS.get(rating_str)
  73. req = compat_urllib_request.Request(
  74. 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id)
  75. req.add_header('User-Agent', self._USER_AGENT)
  76. info_webpage = self._download_webpage(
  77. req, video_id, note='Downloading info page')
  78. err_msg = self._html_search_regex(r'<div[^>]+class="video-error[^>]+>(.+)</div>', info_webpage, 'error message', default=None)
  79. if err_msg:
  80. if 'not available in your region' in err_msg:
  81. raise ExtractorError(
  82. 'Video %s is blocked from your location.' % video_id,
  83. expected=True)
  84. else:
  85. raise ExtractorError('Viki said: ' + err_msg)
  86. mobj = re.search(
  87. r'<source[^>]+type="(?P<mime_type>[^"]+)"[^>]+src="(?P<url>[^"]+)"', info_webpage)
  88. if not mobj:
  89. raise ExtractorError('Unable to find video URL')
  90. video_url = unescapeHTML(mobj.group('url'))
  91. video_ext = mimetype2ext(mobj.group('mime_type'))
  92. if determine_ext(video_url) == 'm3u8':
  93. formats = self._extract_m3u8_formats(
  94. video_url, video_id, ext=video_ext)
  95. else:
  96. formats = [{
  97. 'url': video_url,
  98. 'ext': video_ext,
  99. }]
  100. upload_date_str = self._html_search_regex(
  101. r'"created_at":"([^"]+)"', info_webpage, 'upload date')
  102. upload_date = (
  103. unified_strdate(upload_date_str)
  104. if upload_date_str is not None
  105. else None
  106. )
  107. # subtitles
  108. video_subtitles = self.extract_subtitles(video_id, info_webpage)
  109. return {
  110. 'id': video_id,
  111. 'title': title,
  112. 'formats': formats,
  113. 'description': description,
  114. 'thumbnail': thumbnail,
  115. 'age_limit': age_limit,
  116. 'uploader': uploader,
  117. 'subtitles': video_subtitles,
  118. 'upload_date': upload_date,
  119. }
  120. def _get_subtitles(self, video_id, info_webpage):
  121. res = {}
  122. for sturl_html in re.findall(r'<track src="([^"]+)"', info_webpage):
  123. sturl = unescapeHTML(sturl_html)
  124. m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl)
  125. if not m:
  126. continue
  127. res[m.group('lang')] = [{
  128. 'url': compat_urlparse.urljoin('http://www.viki.com', sturl),
  129. 'ext': 'vtt',
  130. }]
  131. return res