8 changed files with 259 additions and 163 deletions
Split View
Diff Options
-
8.gitignore
-
1test/parameters.json
-
69test/test_dailymotion_subtitles.py
-
114test/test_youtube_subtitles.py
-
11youtube_dl/__init__.py
-
28youtube_dl/extractor/dailymotion.py
-
86youtube_dl/extractor/subtitles.py
-
105youtube_dl/extractor/youtube.py
@ -0,0 +1,69 @@ |
|||
#!/usr/bin/env python |
|||
|
|||
import sys |
|||
import unittest |
|||
import json |
|||
import io |
|||
import hashlib |
|||
|
|||
# Allow direct execution |
|||
import os |
|||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
|||
|
|||
from youtube_dl.extractor import DailymotionIE |
|||
from youtube_dl.utils import * |
|||
from helper import FakeYDL |
|||
|
|||
md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() |
|||
|
|||
class TestDailymotionSubtitles(unittest.TestCase): |
|||
def setUp(self): |
|||
self.DL = FakeYDL() |
|||
self.url = 'http://www.dailymotion.com/video/xczg00' |
|||
def getInfoDict(self): |
|||
IE = DailymotionIE(self.DL) |
|||
info_dict = IE.extract(self.url) |
|||
return info_dict |
|||
def getSubtitles(self): |
|||
info_dict = self.getInfoDict() |
|||
return info_dict[0]['subtitles'] |
|||
def test_no_writesubtitles(self): |
|||
subtitles = self.getSubtitles() |
|||
self.assertEqual(subtitles, None) |
|||
def test_subtitles(self): |
|||
self.DL.params['writesubtitles'] = True |
|||
subtitles = self.getSubtitles() |
|||
self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f') |
|||
def test_subtitles_lang(self): |
|||
self.DL.params['writesubtitles'] = True |
|||
self.DL.params['subtitleslangs'] = ['fr'] |
|||
subtitles = self.getSubtitles() |
|||
self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792') |
|||
def test_allsubtitles(self): |
|||
self.DL.params['allsubtitles'] = True |
|||
subtitles = self.getSubtitles() |
|||
self.assertEqual(len(subtitles.keys()), 5) |
|||
def test_list_subtitles(self): |
|||
self.DL.params['listsubtitles'] = True |
|||
info_dict = self.getInfoDict() |
|||
self.assertEqual(info_dict, None) |
|||
def test_automatic_captions(self): |
|||
self.DL.params['writeautomaticsub'] = True |
|||
self.DL.params['subtitleslang'] = ['en'] |
|||
subtitles = self.getSubtitles() |
|||
self.assertTrue(len(subtitles.keys()) == 0) |
|||
def test_nosubtitles(self): |
|||
self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv' |
|||
self.DL.params['allsubtitles'] = True |
|||
subtitles = self.getSubtitles() |
|||
self.assertEqual(len(subtitles), 0) |
|||
def test_multiple_langs(self): |
|||
self.DL.params['writesubtitles'] = True |
|||
langs = ['es', 'fr', 'de'] |
|||
self.DL.params['subtitleslangs'] = langs |
|||
subtitles = self.getSubtitles() |
|||
for lang in langs: |
|||
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang) |
|||
|
|||
if __name__ == '__main__': |
|||
unittest.main() |
@ -0,0 +1,86 @@ |
|||
from .common import InfoExtractor |
|||
|
|||
from ..utils import ( |
|||
compat_str, |
|||
ExtractorError, |
|||
) |
|||
|
|||
|
|||
class SubtitlesInfoExtractor(InfoExtractor): |
|||
|
|||
def _list_available_subtitles(self, video_id): |
|||
""" outputs the available subtitles for the video """ |
|||
sub_lang_list = self._get_available_subtitles(video_id) |
|||
sub_lang = ",".join(list(sub_lang_list.keys())) |
|||
self.to_screen(u'%s: Available subtitles for video: %s' % |
|||
(video_id, sub_lang)) |
|||
|
|||
def _extract_subtitles(self, video_id): |
|||
""" returns {sub_lang: sub} or {} if subtitles not found """ |
|||
available_subs_list = self._get_available_subtitles(video_id) |
|||
if not available_subs_list: # error, it didn't get the available subtitles |
|||
return {} |
|||
if self._downloader.params.get('allsubtitles', False): |
|||
sub_lang_list = available_subs_list |
|||
else: |
|||
if self._downloader.params.get('writesubtitles', False): |
|||
if self._downloader.params.get('subtitleslangs', False): |
|||
requested_langs = self._downloader.params.get('subtitleslangs') |
|||
elif 'en' in available_subs_list: |
|||
requested_langs = ['en'] |
|||
else: |
|||
requested_langs = [list(available_subs_list.keys())[0]] |
|||
|
|||
sub_lang_list = {} |
|||
for sub_lang in requested_langs: |
|||
if not sub_lang in available_subs_list: |
|||
self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang) |
|||
continue |
|||
sub_lang_list[sub_lang] = available_subs_list[sub_lang] |
|||
|
|||
subtitles = {} |
|||
for sub_lang, url in sub_lang_list.items(): |
|||
subtitle = self._request_subtitle_url(sub_lang, url) |
|||
if subtitle: |
|||
subtitles[sub_lang] = subtitle |
|||
return subtitles |
|||
|
|||
def _request_subtitle_url(self, sub_lang, url): |
|||
""" makes the http request for the subtitle """ |
|||
try: |
|||
sub = self._download_webpage(url, None, note=False) |
|||
except ExtractorError as err: |
|||
self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err))) |
|||
return |
|||
if not sub: |
|||
self._downloader.report_warning(u'Did not fetch video subtitles') |
|||
return |
|||
return sub |
|||
|
|||
def _get_available_subtitles(self, video_id): |
|||
""" |
|||
returns {sub_lang: url} or {} if not available |
|||
Must be redefined by the subclasses |
|||
""" |
|||
pass |
|||
|
|||
def _request_automatic_caption(self, video_id, webpage): |
|||
""" |
|||
returns {sub_lang: sub} or {} if not available |
|||
Must be redefined by the subclasses that support automatic captions, |
|||
otherwise it will return {} |
|||
""" |
|||
self._downloader.report_warning(u'Automatic Captions not supported by this server') |
|||
return {} |
|||
|
|||
def extract_subtitles(self, video_id, video_webpage=None): |
|||
""" |
|||
Extract the subtitles and/or the automatic captions if requested. |
|||
Returns None or a dictionary in the format {sub_lang: sub} |
|||
""" |
|||
video_subtitles = None |
|||
if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False): |
|||
video_subtitles = self._extract_subtitles(video_id) |
|||
elif self._downloader.params.get('writeautomaticsub', False): |
|||
video_subtitles = self._request_automatic_caption(video_id, video_webpage) |
|||
return video_subtitles |
Write
Preview
Loading…
Cancel
Save