6 changed files with 355 additions and 0 deletions
Split View
Diff Options
-
1AUTHORS
-
19youtube_dl/extractor/__init__.py
-
76youtube_dl/extractor/gogoanime.py
-
149youtube_dl/extractor/play44.py
-
74youtube_dl/extractor/soulanime.py
-
36youtube_dl/extractor/videofun.py
@ -0,0 +1,76 @@ |
|||
from __future__ import unicode_literals |
|||
|
|||
import re |
|||
|
|||
from .common import InfoExtractor |
|||
from ..utils import ( |
|||
ExtractorError, |
|||
compat_urllib_parse, |
|||
get_element_by_attribute, |
|||
unescapeHTML |
|||
) |
|||
|
|||
|
|||
class GoGoAnimeIE(InfoExtractor): |
|||
IE_NAME = 'gogoanime' |
|||
IE_DESC = 'GoGoAnime' |
|||
_VALID_URL = r'http://www.gogoanime.com/(?P<id>[A-Za-z0-9-]+)' |
|||
|
|||
_TEST = { |
|||
'url': 'http://www.gogoanime.com/mahou-shoujo-madoka-magica-movie-1', |
|||
'info_dict': { |
|||
'id': 'mahou-shoujo-madoka-magica-movie-1' |
|||
}, |
|||
'playlist_count': 3 |
|||
} |
|||
|
|||
def _real_extract(self, url): |
|||
video_id = self._match_id(url) |
|||
page = self._download_webpage(url, video_id) |
|||
|
|||
if 'Oops! Page Not Found</font>' in page: |
|||
raise ExtractorError('Video does not exist', expected=True) |
|||
|
|||
content = get_element_by_attribute("class", "postcontent", page) |
|||
vids = re.findall(r'<iframe[^>]*?src=[\'"](h[^\'"]+)[\'"]', content) |
|||
vids = [ |
|||
unescapeHTML(compat_urllib_parse.unquote(x)) |
|||
for x in vids if not re.search(r".*videofun.*", x)] |
|||
|
|||
if re.search(r'<div class="postcontent">[^<]*<p><iframe src=[\'"][^>]+></iframe><br />', page): |
|||
return self.playlist_result([self.url_result(vid) for vid in vids], video_id) |
|||
|
|||
title = self._html_search_regex( |
|||
r'<div class="postdesc">[^<]*<h1>([^<]+)</h1>', page, 'title') |
|||
|
|||
return { |
|||
'_type': 'url', |
|||
'id': video_id, |
|||
'url': vids[0], |
|||
'title': title, |
|||
} |
|||
|
|||
|
|||
class GoGoAnimeSearchIE(InfoExtractor): |
|||
IE_NAME = 'gogoanime:search' |
|||
IE_DESC = 'GoGoAnime Search' |
|||
|
|||
_VALID_URL = r'http://www\.gogoanime\.com/.*\?s=(?P<id>[^&]*)' |
|||
_TEST = { |
|||
'url': 'http://www.gogoanime.com/?s=bokusatsu', |
|||
'info_dict': { |
|||
'id': 'bokusatsu' |
|||
}, |
|||
'playlist_count': 6 |
|||
} |
|||
|
|||
def _real_extract(self, url): |
|||
playlist_id = self._match_id(url) |
|||
webpage = self._download_webpage(url, playlist_id) |
|||
|
|||
posts = re.findall( |
|||
r'<div class="postlist">[^<]*<p[^>]*>[^<]*<a href="(?P<url>[^"]+)"', |
|||
webpage) |
|||
|
|||
return self.playlist_result( |
|||
[self.url_result(p) for p in posts], playlist_id) |
@ -0,0 +1,149 @@ |
|||
from __future__ import unicode_literals |
|||
|
|||
from .common import InfoExtractor |
|||
from ..utils import ( |
|||
compat_urllib_parse |
|||
) |
|||
|
|||
|
|||
class Play44IE(InfoExtractor): |
|||
_VALID_URL = r'http://[w.]*play44\.net/embed\.php[^/]*/(?P<id>.+)' |
|||
|
|||
_TESTS = [{ |
|||
'url': 'http://play44.net/embed.php?w=600&h=438&vid=M/mahou-shoujo-madoka-magica-07.flv', |
|||
'md5': 'e37e99d665f503dd2db952f7c4dba9e6', |
|||
'info_dict': { |
|||
'id': 'mahou-shoujo-madoka-magica-07', |
|||
'ext': 'flv', |
|||
'title': 'mahou-shoujo-madoka-magica-07', |
|||
} |
|||
}] |
|||
|
|||
def _real_extract(self, url): |
|||
video_id = self._match_id(url) |
|||
page = self._download_webpage(url, video_id) |
|||
|
|||
video_url = compat_urllib_parse.unquote(self._html_search_regex( |
|||
r'_url = "(https?://[^"]+?)";', page, 'url')) |
|||
title = self._search_regex(r'.*/(?P<title>[^.]*).', video_url, 'title') |
|||
|
|||
return { |
|||
'id': title, |
|||
'url': video_url, |
|||
'title': title, |
|||
} |
|||
|
|||
|
|||
class ByZooIE(Play44IE): |
|||
_VALID_URL = r'http://[w.]*byzoo\.org/embed\.php[^/]*/(?P<id>.+)' |
|||
|
|||
_TESTS = [{ |
|||
'url': 'http://byzoo.org/embed.php?w=600&h=438&vid=at/nw/mahou_shoujo_madoka_magica_movie_3_-_part1.mp4', |
|||
'md5': '455c83dabe2cd9fd74a87612b01fe017', |
|||
'info_dict': { |
|||
'id': 'mahou_shoujo_madoka_magica_movie_3_-_part1', |
|||
'ext': 'mp4', |
|||
'title': 'mahou_shoujo_madoka_magica_movie_3_-_part1', |
|||
} |
|||
}] |
|||
|
|||
|
|||
class Video44IE(Play44IE): |
|||
_VALID_URL = r'http://[w.]*video44\.net/.*file=(?P<id>[^&].).*' |
|||
|
|||
_TESTS = [{ |
|||
'url': 'http://www.video44.net/gogo/?w=600&h=438&file=chaoshead-12.flv&sv=1', |
|||
'md5': '43eaec6d0beb10e8d42459b9f108aff3', |
|||
'info_dict': { |
|||
'id': 'chaoshead-12', |
|||
'ext': 'mp4', |
|||
'title': 'chaoshead-12', |
|||
} |
|||
}] |
|||
|
|||
|
|||
class VideoWingIE(Play44IE): |
|||
_VALID_URL = r'''(?x) |
|||
http://[w.]*videowing\.[^/]*/ |
|||
(?: |
|||
.*video=/* |
|||
|embed/ |
|||
) |
|||
(?P<id>[^&?.]+) |
|||
''' |
|||
|
|||
_TESTS = [{ |
|||
'url': 'http://videowing.me/embed?w=718&h=438&video=ongoing/boku_wa_tomodachi_ga_sukunai_-_05.mp4', |
|||
'md5': '4ed320e353ed26c742c4f12a9c210b60', |
|||
'info_dict': { |
|||
'id': 'boku_wa_tomodachi_ga_sukunai_-_05', |
|||
'ext': 'mp4', |
|||
'title': 'boku_wa_tomodachi_ga_sukunai_-_05', |
|||
} |
|||
}, { |
|||
'url': 'http://videowing.me/embed/a8d6a39522df066bd734a69f2334497e?w=600&h=438', |
|||
'md5': '33fdd71581357018c226f95c5cedcfd7', |
|||
'info_dict': { |
|||
'id': 'mahoushoujomadokamagicamovie1part1', |
|||
'ext': 'flv', |
|||
'title': 'mahoushoujomadokamagicamovie1part1', |
|||
} |
|||
}] |
|||
|
|||
|
|||
class PlayPandaIE(Play44IE): |
|||
_VALID_URL = r'http://[w.]*playpanda\.[^/]*/.*vid=/*(?P<id>[^&].).*' |
|||
|
|||
_TESTS = [{ |
|||
'url': 'http://playpanda.net/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4', |
|||
'md5': '4ed320e353ed26c742c4f12a9c210b60', |
|||
'info_dict': { |
|||
'id': 'boku_wa_tomodachi_ga_sukunai_-_05', |
|||
'ext': 'mp4', |
|||
'title': 'boku_wa_tomodachi_ga_sukunai_-_05', |
|||
'description': 'boku_wa_tomodachi_ga_sukunai_-_05' |
|||
} |
|||
}] |
|||
|
|||
|
|||
class VideoZooIE(Play44IE): |
|||
_VALID_URL = r'http://[w.]*videozoo\.[^/]*/.*vid=/*(?P<id>[^&].).*' |
|||
|
|||
_TESTS = [{ |
|||
'url': 'http://videozoo.me/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4', |
|||
'md5': '4ed320e353ed26c742c4f12a9c210b60', |
|||
'info_dict': { |
|||
'id': 'boku_wa_tomodachi_ga_sukunai_-_05', |
|||
'ext': 'mp4', |
|||
'title': 'boku_wa_tomodachi_ga_sukunai_-_05', |
|||
} |
|||
}] |
|||
|
|||
|
|||
class PlayBBIE(Play44IE): |
|||
_VALID_URL = r'http://[w.]*playbb\.[^/]*/.*vid=/*(?P<id>[^&].).*' |
|||
|
|||
_TESTS = [{ |
|||
'url': 'http://playbb.me/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4', |
|||
'md5': '4ed320e353ed26c742c4f12a9c210b60', |
|||
'info_dict': { |
|||
'id': 'boku_wa_tomodachi_ga_sukunai_-_05', |
|||
'ext': 'mp4', |
|||
'title': 'boku_wa_tomodachi_ga_sukunai_-_05', |
|||
} |
|||
}] |
|||
|
|||
|
|||
class EasyVideoIE(Play44IE): |
|||
_VALID_URL = r'http://[w.]*easyvideo\.[^/]*/.*file=/*(?P<id>[^&.]+)' |
|||
|
|||
_TESTS = [{ |
|||
'url': 'http://easyvideo.me/gogo/?w=718&h=438&file=bokuwatomodachigasukunai-04.flv&sv=1', |
|||
'md5': '26178b57629b7650106d72b191137176', |
|||
'info_dict': { |
|||
'id': 'bokuwatomodachigasukunai-04', |
|||
'ext': 'mp4', |
|||
'title': 'bokuwatomodachigasukunai-04', |
|||
}, |
|||
'skip': 'Blocked in Germany', |
|||
}] |
@ -0,0 +1,74 @@ |
|||
from __future__ import unicode_literals |
|||
|
|||
import re |
|||
|
|||
from .common import InfoExtractor |
|||
|
|||
|
|||
class SoulAnimeWatchingIE(InfoExtractor): |
|||
IE_NAME = "soulanime:watching" |
|||
IE_DESC = "SoulAnime video" |
|||
_TEST = { |
|||
'url': 'http://www.soul-anime.net/watching/seirei-tsukai-no-blade-dance-episode-9/', |
|||
'md5': '05fae04abf72298098b528e98abf4298', |
|||
'info_dict': { |
|||
'id': 'seirei-tsukai-no-blade-dance-episode-9', |
|||
'ext': 'mp4', |
|||
'title': 'seirei-tsukai-no-blade-dance-episode-9', |
|||
'description': 'seirei-tsukai-no-blade-dance-episode-9' |
|||
} |
|||
} |
|||
_VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/watch[^/]*/(?P<id>[^/]+)' |
|||
|
|||
def _real_extract(self, url): |
|||
mobj = re.match(self._VALID_URL, url) |
|||
video_id = mobj.group('id') |
|||
domain = mobj.group('domain') |
|||
|
|||
page = self._download_webpage(url, video_id) |
|||
|
|||
video_url_encoded = self._html_search_regex( |
|||
r'<div id="download">[^<]*<a href="(?P<url>[^"]+)"', page, 'url') |
|||
video_url = "http://www.soul-anime." + domain + video_url_encoded |
|||
|
|||
vid = self._request_webpage(video_url, video_id) |
|||
ext = vid.info().gettype().split("/")[1] |
|||
|
|||
return { |
|||
'id': video_id, |
|||
'url': video_url, |
|||
'ext': ext, |
|||
'title': video_id, |
|||
'description': video_id |
|||
} |
|||
|
|||
|
|||
class SoulAnimeSeriesIE(InfoExtractor): |
|||
IE_NAME = "soulanime:series" |
|||
IE_DESC = "SoulAnime Series" |
|||
|
|||
_VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/anime./(?P<id>[^/]+)' |
|||
|
|||
_EPISODE_REGEX = r'<option value="(/watch[^/]*/[^"]+)">[^<]*</option>' |
|||
|
|||
_TEST = { |
|||
'url': 'http://www.soul-anime.net/anime1/black-rock-shooter-tv/', |
|||
'info_dict': { |
|||
'id': 'black-rock-shooter-tv' |
|||
}, |
|||
'playlist_count': 8 |
|||
} |
|||
|
|||
def _real_extract(self, url): |
|||
mobj = re.match(self._VALID_URL, url) |
|||
series_id = mobj.group('id') |
|||
domain = mobj.group('domain') |
|||
|
|||
pattern = re.compile(self._EPISODE_REGEX) |
|||
|
|||
page = self._download_webpage(url, series_id, "Downloading series page") |
|||
mobj = pattern.findall(page) |
|||
|
|||
entries = [self.url_result("http://www.soul-anime." + domain + obj) for obj in mobj] |
|||
|
|||
return self.playlist_result(entries, series_id) |
@ -0,0 +1,36 @@ |
|||
from __future__ import unicode_literals |
|||
|
|||
from .common import InfoExtractor |
|||
from ..utils import ( |
|||
compat_urllib_parse |
|||
) |
|||
|
|||
|
|||
class VideoFunIE(InfoExtractor): |
|||
_VALID_URL = r'http://[w.]*videofun\.me/embed/(?P<id>[0-9a-f]+)' |
|||
|
|||
_TEST = { |
|||
'url': 'http://videofun.me/embed/8267659be070860af600fee7deadbcdb?w=600&h=438', |
|||
'md5': 'e37e99d665f503dd2db952f7c4dba9e6', |
|||
'info_dict': { |
|||
'id': 'Mahou-Shoujo-Madoka-Magica-07', |
|||
'ext': 'flv', |
|||
'title': 'Mahou-Shoujo-Madoka-Magica-07', |
|||
} |
|||
} |
|||
|
|||
def _real_extract(self, url): |
|||
video_id = self._match_id(url) |
|||
webpage = self._download_webpage( |
|||
url, video_id, 'Downloading video page') |
|||
|
|||
video_url_encoded = self._html_search_regex( |
|||
r'url: "(http://gateway\.videofun\.me[^"]+)"', webpage, 'video url') |
|||
video_url = compat_urllib_parse.unquote(video_url_encoded) |
|||
title = self._html_search_regex(r'.*/([^.]*)\.', video_url, 'title') |
|||
|
|||
return { |
|||
'id': title, |
|||
'url': video_url, |
|||
'title': title, |
|||
} |
Write
Preview
Loading…
Cancel
Save