You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

61 lines
1.9 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..utils import (
  6. int_or_none,
  7. parse_duration,
  8. parse_iso8601,
  9. unescapeHTML,
  10. )
  11. class RTSIE(InfoExtractor):
  12. IE_DESC = 'RTS.ch'
  13. _VALID_URL = r'^https?://(?:www\.)?rts\.ch/archives/tv/[^/]+/(?P<id>[0-9]+)-.*?\.html'
  14. _TEST = {
  15. 'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html',
  16. 'md5': '753b877968ad8afaeddccc374d4256a5',
  17. 'info_dict': {
  18. 'id': '3449373',
  19. 'ext': 'mp4',
  20. 'duration': 1488,
  21. 'title': 'Les Enfants Terribles',
  22. 'description': 'France Pommier et sa soeur Luce Feral, les deux filles de ce groupe de 5.',
  23. 'uploader': 'Divers',
  24. 'upload_date': '19680921',
  25. 'timestamp': -40280400,
  26. },
  27. }
  28. def _real_extract(self, url):
  29. m = re.match(self._VALID_URL, url)
  30. video_id = m.group('id')
  31. all_info = self._download_json(
  32. 'http://www.rts.ch/a/%s.html?f=json/article' % video_id, video_id)
  33. info = all_info['video']['JSONinfo']
  34. upload_timestamp = parse_iso8601(info.get('broadcast_date'))
  35. duration = parse_duration(info.get('duration'))
  36. thumbnail = unescapeHTML(info.get('preview_image_url'))
  37. formats = [{
  38. 'format_id': fid,
  39. 'url': furl,
  40. 'tbr': int_or_none(self._search_regex(
  41. r'-([0-9]+)k\.', furl, 'bitrate', default=None)),
  42. } for fid, furl in info['streams'].items()]
  43. self._sort_formats(formats)
  44. return {
  45. 'id': video_id,
  46. 'formats': formats,
  47. 'title': info['title'],
  48. 'description': info.get('intro'),
  49. 'duration': duration,
  50. 'uploader': info.get('programName'),
  51. 'timestamp': upload_timestamp,
  52. }