You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

76 lines
2.2 KiB

  1. from __future__ import unicode_literals
  2. import re
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. ExtractorError,
  6. compat_urllib_parse,
  7. get_element_by_attribute,
  8. unescapeHTML
  9. )
  10. class GoGoAnimeIE(InfoExtractor):
  11. IE_NAME = 'gogoanime'
  12. IE_DESC = 'GoGoAnime'
  13. _VALID_URL = r'http://www.gogoanime.com/(?P<id>[A-Za-z0-9-]+)'
  14. _TEST = {
  15. 'url': 'http://www.gogoanime.com/mahou-shoujo-madoka-magica-movie-1',
  16. 'info_dict': {
  17. 'id': 'mahou-shoujo-madoka-magica-movie-1'
  18. },
  19. 'playlist_count': 3
  20. }
  21. def _real_extract(self, url):
  22. video_id = self._match_id(url)
  23. page = self._download_webpage(url, video_id)
  24. if 'Oops! Page Not Found</font>' in page:
  25. raise ExtractorError('Video does not exist', expected=True)
  26. content = get_element_by_attribute("class", "postcontent", page)
  27. vids = re.findall(r'<iframe[^>]*?src=[\'"](h[^\'"]+)[\'"]', content)
  28. vids = [
  29. unescapeHTML(compat_urllib_parse.unquote(x))
  30. for x in vids if not re.search(r".*videofun.*", x)]
  31. if re.search(r'<div class="postcontent">[^<]*<p><iframe src=[\'"][^>]+></iframe><br />', page):
  32. return self.playlist_result([self.url_result(vid) for vid in vids], video_id)
  33. title = self._html_search_regex(
  34. r'<div class="postdesc">[^<]*<h1>([^<]+)</h1>', page, 'title')
  35. return {
  36. '_type': 'url',
  37. 'id': video_id,
  38. 'url': vids[0],
  39. 'title': title,
  40. }
  41. class GoGoAnimeSearchIE(InfoExtractor):
  42. IE_NAME = 'gogoanime:search'
  43. IE_DESC = 'GoGoAnime Search'
  44. _VALID_URL = r'http://www\.gogoanime\.com/.*\?s=(?P<id>[^&]*)'
  45. _TEST = {
  46. 'url': 'http://www.gogoanime.com/?s=bokusatsu',
  47. 'info_dict': {
  48. 'id': 'bokusatsu'
  49. },
  50. 'playlist_count': 6
  51. }
  52. def _real_extract(self, url):
  53. playlist_id = self._match_id(url)
  54. webpage = self._download_webpage(url, playlist_id)
  55. posts = re.findall(
  56. r'<div class="postlist">[^<]*<p[^>]*>[^<]*<a href="(?P<url>[^"]+)"',
  57. webpage)
  58. return self.playlist_result(
  59. [self.url_result(p) for p in posts], playlist_id)