You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

54 lines
2.1 KiB

10 years ago
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. from .common import InfoExtractor
  4. from ..utils import unified_strdate
  5. class ElPaisIE(InfoExtractor):
  6. _VALID_URL = r'https?://(?:[^.]+\.)?elpais\.com/.*/(?P<id>[^/#?]+)\.html(?:$|[?#])'
  7. IE_DESC = 'El País'
  8. _TEST = {
  9. 'url': 'http://blogs.elpais.com/la-voz-de-inaki/2014/02/tiempo-nuevo-recetas-viejas.html',
  10. 'md5': '98406f301f19562170ec071b83433d55',
  11. 'info_dict': {
  12. 'id': 'tiempo-nuevo-recetas-viejas',
  13. 'ext': 'mp4',
  14. 'title': 'Tiempo nuevo, recetas viejas',
  15. 'description': 'De lunes a viernes, a partir de las ocho de la mañana, Iñaki Gabilondo nos cuenta su visión de la actualidad nacional e internacional.',
  16. 'upload_date': '20140206',
  17. }
  18. }
  19. def _real_extract(self, url):
  20. video_id = self._match_id(url)
  21. webpage = self._download_webpage(url, video_id)
  22. prefix = self._html_search_regex(
  23. r'var url_cache = "([^"]+)";', webpage, 'URL prefix')
  24. video_suffix = self._search_regex(
  25. r"URLMediaFile = url_cache \+ '([^']+)'", webpage, 'video URL')
  26. video_url = prefix + video_suffix
  27. thumbnail_suffix = self._search_regex(
  28. r"URLMediaStill = url_cache \+ '([^']+)'", webpage, 'thumbnail URL',
  29. fatal=False)
  30. thumbnail = (
  31. None if thumbnail_suffix is None
  32. else prefix + thumbnail_suffix)
  33. title = self._html_search_regex(
  34. '<h2 class="entry-header entry-title.*?>(.*?)</h2>',
  35. webpage, 'title')
  36. date_str = self._search_regex(
  37. r'<p class="date-header date-int updated"\s+title="([^"]+)">',
  38. webpage, 'upload date', fatal=False)
  39. upload_date = (None if date_str is None else unified_strdate(date_str))
  40. return {
  41. 'id': video_id,
  42. 'url': video_url,
  43. 'title': title,
  44. 'description': self._og_search_description(webpage),
  45. 'thumbnail': thumbnail,
  46. 'upload_date': upload_date,
  47. }