You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

358 lines
13 KiB

10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
6 years ago
6 years ago
  1. #!/usr/bin/env python
  2. from __future__ import unicode_literals
  3. # Allow direct execution
  4. import os
  5. import sys
  6. import unittest
  7. sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  8. from test.helper import FakeYDL, md5
  9. from youtube_dl.extractor import (
  10. YoutubeIE,
  11. DailymotionIE,
  12. TEDIE,
  13. VimeoIE,
  14. WallaIE,
  15. CeskaTelevizeIE,
  16. LyndaIE,
  17. NPOIE,
  18. ComedyCentralIE,
  19. NRKTVIE,
  20. RaiPlayIE,
  21. VikiIE,
  22. ThePlatformIE,
  23. ThePlatformFeedIE,
  24. RTVEALaCartaIE,
  25. FunnyOrDieIE,
  26. DemocracynowIE,
  27. )
  28. class BaseTestSubtitles(unittest.TestCase):
  29. url = None
  30. IE = None
  31. def setUp(self):
  32. self.DL = FakeYDL()
  33. self.ie = self.IE()
  34. self.DL.add_info_extractor(self.ie)
  35. def getInfoDict(self):
  36. info_dict = self.DL.extract_info(self.url, download=False)
  37. return info_dict
  38. def getSubtitles(self):
  39. info_dict = self.getInfoDict()
  40. subtitles = info_dict['requested_subtitles']
  41. if not subtitles:
  42. return subtitles
  43. for sub_info in subtitles.values():
  44. if sub_info.get('data') is None:
  45. uf = self.DL.urlopen(sub_info['url'])
  46. sub_info['data'] = uf.read().decode('utf-8')
  47. return dict((l, sub_info['data']) for l, sub_info in subtitles.items())
  48. class TestYoutubeSubtitles(BaseTestSubtitles):
  49. url = 'QRS8MkLhQmM'
  50. IE = YoutubeIE
  51. def test_youtube_allsubtitles(self):
  52. self.DL.params['writesubtitles'] = True
  53. self.DL.params['allsubtitles'] = True
  54. subtitles = self.getSubtitles()
  55. self.assertEqual(len(subtitles.keys()), 13)
  56. self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
  57. self.assertEqual(md5(subtitles['it']), '6d752b98c31f1cf8d597050c7a2cb4b5')
  58. for lang in ['fr', 'de']:
  59. self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
  60. def test_youtube_subtitles_ttml_format(self):
  61. self.DL.params['writesubtitles'] = True
  62. self.DL.params['subtitlesformat'] = 'ttml'
  63. subtitles = self.getSubtitles()
  64. self.assertEqual(md5(subtitles['en']), 'e306f8c42842f723447d9f63ad65df54')
  65. def test_youtube_subtitles_vtt_format(self):
  66. self.DL.params['writesubtitles'] = True
  67. self.DL.params['subtitlesformat'] = 'vtt'
  68. subtitles = self.getSubtitles()
  69. self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
  70. def test_youtube_automatic_captions(self):
  71. self.url = '8YoUxe5ncPo'
  72. self.DL.params['writeautomaticsub'] = True
  73. self.DL.params['subtitleslangs'] = ['it']
  74. subtitles = self.getSubtitles()
  75. self.assertTrue(subtitles['it'] is not None)
  76. def test_youtube_translated_subtitles(self):
  77. # This video has a subtitles track, which can be translated
  78. self.url = 'Ky9eprVWzlI'
  79. self.DL.params['writeautomaticsub'] = True
  80. self.DL.params['subtitleslangs'] = ['it']
  81. subtitles = self.getSubtitles()
  82. self.assertTrue(subtitles['it'] is not None)
  83. def test_youtube_nosubtitles(self):
  84. self.DL.expect_warning('video doesn\'t have subtitles')
  85. self.url = 'n5BB19UTcdA'
  86. self.DL.params['writesubtitles'] = True
  87. self.DL.params['allsubtitles'] = True
  88. subtitles = self.getSubtitles()
  89. self.assertFalse(subtitles)
  90. class TestDailymotionSubtitles(BaseTestSubtitles):
  91. url = 'http://www.dailymotion.com/video/xczg00'
  92. IE = DailymotionIE
  93. def test_allsubtitles(self):
  94. self.DL.params['writesubtitles'] = True
  95. self.DL.params['allsubtitles'] = True
  96. subtitles = self.getSubtitles()
  97. self.assertTrue(len(subtitles.keys()) >= 6)
  98. self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
  99. self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
  100. for lang in ['es', 'fr', 'de']:
  101. self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
  102. def test_nosubtitles(self):
  103. self.DL.expect_warning('video doesn\'t have subtitles')
  104. self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
  105. self.DL.params['writesubtitles'] = True
  106. self.DL.params['allsubtitles'] = True
  107. subtitles = self.getSubtitles()
  108. self.assertFalse(subtitles)
  109. class TestTedSubtitles(BaseTestSubtitles):
  110. url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
  111. IE = TEDIE
  112. def test_allsubtitles(self):
  113. self.DL.params['writesubtitles'] = True
  114. self.DL.params['allsubtitles'] = True
  115. subtitles = self.getSubtitles()
  116. self.assertTrue(len(subtitles.keys()) >= 28)
  117. self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
  118. self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
  119. for lang in ['es', 'fr', 'de']:
  120. self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
  121. class TestVimeoSubtitles(BaseTestSubtitles):
  122. url = 'http://vimeo.com/76979871'
  123. IE = VimeoIE
  124. def test_allsubtitles(self):
  125. self.DL.params['writesubtitles'] = True
  126. self.DL.params['allsubtitles'] = True
  127. subtitles = self.getSubtitles()
  128. self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr']))
  129. self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
  130. self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
  131. def test_nosubtitles(self):
  132. self.DL.expect_warning('video doesn\'t have subtitles')
  133. self.url = 'http://vimeo.com/56015672'
  134. self.DL.params['writesubtitles'] = True
  135. self.DL.params['allsubtitles'] = True
  136. subtitles = self.getSubtitles()
  137. self.assertFalse(subtitles)
  138. class TestWallaSubtitles(BaseTestSubtitles):
  139. url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
  140. IE = WallaIE
  141. def test_allsubtitles(self):
  142. self.DL.expect_warning('Automatic Captions not supported by this server')
  143. self.DL.params['writesubtitles'] = True
  144. self.DL.params['allsubtitles'] = True
  145. subtitles = self.getSubtitles()
  146. self.assertEqual(set(subtitles.keys()), set(['heb']))
  147. self.assertEqual(md5(subtitles['heb']), 'e758c5d7cb982f6bef14f377ec7a3920')
  148. def test_nosubtitles(self):
  149. self.DL.expect_warning('video doesn\'t have subtitles')
  150. self.url = 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one'
  151. self.DL.params['writesubtitles'] = True
  152. self.DL.params['allsubtitles'] = True
  153. subtitles = self.getSubtitles()
  154. self.assertFalse(subtitles)
  155. class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
  156. url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
  157. IE = CeskaTelevizeIE
  158. def test_allsubtitles(self):
  159. self.DL.expect_warning('Automatic Captions not supported by this server')
  160. self.DL.params['writesubtitles'] = True
  161. self.DL.params['allsubtitles'] = True
  162. subtitles = self.getSubtitles()
  163. self.assertEqual(set(subtitles.keys()), set(['cs']))
  164. self.assertTrue(len(subtitles['cs']) > 20000)
  165. def test_nosubtitles(self):
  166. self.DL.expect_warning('video doesn\'t have subtitles')
  167. self.url = 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220'
  168. self.DL.params['writesubtitles'] = True
  169. self.DL.params['allsubtitles'] = True
  170. subtitles = self.getSubtitles()
  171. self.assertFalse(subtitles)
  172. class TestLyndaSubtitles(BaseTestSubtitles):
  173. url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
  174. IE = LyndaIE
  175. def test_allsubtitles(self):
  176. self.DL.params['writesubtitles'] = True
  177. self.DL.params['allsubtitles'] = True
  178. subtitles = self.getSubtitles()
  179. self.assertEqual(set(subtitles.keys()), set(['en']))
  180. self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
  181. class TestNPOSubtitles(BaseTestSubtitles):
  182. url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
  183. IE = NPOIE
  184. def test_allsubtitles(self):
  185. self.DL.params['writesubtitles'] = True
  186. self.DL.params['allsubtitles'] = True
  187. subtitles = self.getSubtitles()
  188. self.assertEqual(set(subtitles.keys()), set(['nl']))
  189. self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4')
  190. class TestMTVSubtitles(BaseTestSubtitles):
  191. url = 'http://www.cc.com/video-clips/p63lk0/adam-devine-s-house-party-chasing-white-swans'
  192. IE = ComedyCentralIE
  193. def getInfoDict(self):
  194. return super(TestMTVSubtitles, self).getInfoDict()['entries'][0]
  195. def test_allsubtitles(self):
  196. self.DL.params['writesubtitles'] = True
  197. self.DL.params['allsubtitles'] = True
  198. subtitles = self.getSubtitles()
  199. self.assertEqual(set(subtitles.keys()), set(['en']))
  200. self.assertEqual(md5(subtitles['en']), '78206b8d8a0cfa9da64dc026eea48961')
  201. class TestNRKSubtitles(BaseTestSubtitles):
  202. url = 'http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1'
  203. IE = NRKTVIE
  204. def test_allsubtitles(self):
  205. self.DL.params['writesubtitles'] = True
  206. self.DL.params['allsubtitles'] = True
  207. subtitles = self.getSubtitles()
  208. self.assertEqual(set(subtitles.keys()), set(['no']))
  209. self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2')
  210. class TestRaiPlaySubtitles(BaseTestSubtitles):
  211. url = 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
  212. IE = RaiPlayIE
  213. def test_allsubtitles(self):
  214. self.DL.params['writesubtitles'] = True
  215. self.DL.params['allsubtitles'] = True
  216. subtitles = self.getSubtitles()
  217. self.assertEqual(set(subtitles.keys()), set(['it']))
  218. self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a')
  219. class TestVikiSubtitles(BaseTestSubtitles):
  220. url = 'http://www.viki.com/videos/1060846v-punch-episode-18'
  221. IE = VikiIE
  222. def test_allsubtitles(self):
  223. self.DL.params['writesubtitles'] = True
  224. self.DL.params['allsubtitles'] = True
  225. subtitles = self.getSubtitles()
  226. self.assertEqual(set(subtitles.keys()), set(['en']))
  227. self.assertEqual(md5(subtitles['en']), '53cb083a5914b2d84ef1ab67b880d18a')
  228. class TestThePlatformSubtitles(BaseTestSubtitles):
  229. # from http://www.3playmedia.com/services-features/tools/integrations/theplatform/
  230. # (see http://theplatform.com/about/partners/type/subtitles-closed-captioning/)
  231. url = 'theplatform:JFUjUE1_ehvq'
  232. IE = ThePlatformIE
  233. def test_allsubtitles(self):
  234. self.DL.params['writesubtitles'] = True
  235. self.DL.params['allsubtitles'] = True
  236. subtitles = self.getSubtitles()
  237. self.assertEqual(set(subtitles.keys()), set(['en']))
  238. self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
  239. class TestThePlatformFeedSubtitles(BaseTestSubtitles):
  240. url = 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207'
  241. IE = ThePlatformFeedIE
  242. def test_allsubtitles(self):
  243. self.DL.params['writesubtitles'] = True
  244. self.DL.params['allsubtitles'] = True
  245. subtitles = self.getSubtitles()
  246. self.assertEqual(set(subtitles.keys()), set(['en']))
  247. self.assertEqual(md5(subtitles['en']), '48649a22e82b2da21c9a67a395eedade')
  248. class TestRtveSubtitles(BaseTestSubtitles):
  249. url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/'
  250. IE = RTVEALaCartaIE
  251. def test_allsubtitles(self):
  252. print('Skipping, only available from Spain')
  253. return
  254. self.DL.params['writesubtitles'] = True
  255. self.DL.params['allsubtitles'] = True
  256. subtitles = self.getSubtitles()
  257. self.assertEqual(set(subtitles.keys()), set(['es']))
  258. self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
  259. class TestFunnyOrDieSubtitles(BaseTestSubtitles):
  260. url = 'http://www.funnyordie.com/videos/224829ff6d/judd-apatow-will-direct-your-vine'
  261. IE = FunnyOrDieIE
  262. def test_allsubtitles(self):
  263. self.DL.params['writesubtitles'] = True
  264. self.DL.params['allsubtitles'] = True
  265. subtitles = self.getSubtitles()
  266. self.assertEqual(set(subtitles.keys()), set(['en']))
  267. self.assertEqual(md5(subtitles['en']), 'c5593c193eacd353596c11c2d4f9ecc4')
  268. class TestDemocracynowSubtitles(BaseTestSubtitles):
  269. url = 'http://www.democracynow.org/shows/2015/7/3'
  270. IE = DemocracynowIE
  271. def test_allsubtitles(self):
  272. self.DL.params['writesubtitles'] = True
  273. self.DL.params['allsubtitles'] = True
  274. subtitles = self.getSubtitles()
  275. self.assertEqual(set(subtitles.keys()), set(['en']))
  276. self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
  277. def test_subtitles_in_page(self):
  278. self.url = 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree'
  279. self.DL.params['writesubtitles'] = True
  280. self.DL.params['allsubtitles'] = True
  281. subtitles = self.getSubtitles()
  282. self.assertEqual(set(subtitles.keys()), set(['en']))
  283. self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
  284. if __name__ == '__main__':
  285. unittest.main()