You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

52 lines
1.6 KiB

  1. from __future__ import unicode_literals
  2. import json
  3. import re
  4. from .common import InfoExtractor
  5. from ..utils import (
  6. ExtractorError,
  7. )
  8. class LiveLeakIE(InfoExtractor):
  9. _VALID_URL = r'^(?:http://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
  10. _TEST = {
  11. 'url': 'http://www.liveleak.com/view?i=757_1364311680',
  12. 'file': '757_1364311680.mp4',
  13. 'md5': '0813c2430bea7a46bf13acf3406992f4',
  14. 'info_dict': {
  15. 'description': 'extremely bad day for this guy..!',
  16. 'uploader': 'ljfriel2',
  17. 'title': 'Most unlucky car accident'
  18. }
  19. }
  20. def _real_extract(self, url):
  21. mobj = re.match(self._VALID_URL, url)
  22. video_id = mobj.group('video_id')
  23. webpage = self._download_webpage(url, video_id)
  24. sources_raw = self._search_regex(
  25. r'(?s)sources:\s*(\[.*?\]),', webpage, 'video URLs')
  26. sources_json = re.sub(r'\s([a-z]+):\s', r'"\1": ', sources_raw)
  27. sources = json.loads(sources_json)
  28. formats = [{
  29. 'format_note': s.get('label'),
  30. 'url': s['file'],
  31. } for s in sources]
  32. self._sort_formats(formats)
  33. video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip()
  34. video_description = self._og_search_description(webpage)
  35. video_uploader = self._html_search_regex(
  36. r'By:.*?(\w+)</a>', webpage, 'uploader', fatal=False)
  37. return {
  38. 'id': video_id,
  39. 'title': video_title,
  40. 'description': video_description,
  41. 'uploader': video_uploader,
  42. 'formats': formats,
  43. }