You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

156 lines
6.0 KiB

10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
  1. # encoding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..utils import (
  6. ExtractorError,
  7. clean_html,
  8. unified_strdate,
  9. int_or_none,
  10. )
  11. class RTLnowIE(InfoExtractor):
  12. """Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW"""
  13. _VALID_URL = r'''(?x)
  14. (?:https?://)?
  15. (?P<url>
  16. (?P<domain>
  17. rtl-now\.rtl\.de|
  18. rtl2now\.rtl2\.de|
  19. (?:www\.)?voxnow\.de|
  20. (?:www\.)?rtlnitronow\.de|
  21. (?:www\.)?superrtlnow\.de|
  22. (?:www\.)?n-tvnow\.de)
  23. /+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?
  24. (?:container_id|film_id)=(?P<video_id>[0-9]+)&
  25. player=1(?:&season=[0-9]+)?(?:&.*)?
  26. )'''
  27. _TESTS = [
  28. {
  29. 'url': 'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
  30. 'info_dict': {
  31. 'id': '90419',
  32. 'ext': 'flv',
  33. 'title': 'Ahornallee - Folge 1 - Der Einzug',
  34. 'description': 'md5:ce843b6b5901d9a7f7d04d1bbcdb12de',
  35. 'upload_date': '20070416',
  36. 'duration': 1685,
  37. },
  38. 'params': {
  39. 'skip_download': True,
  40. },
  41. 'skip': 'Only works from Germany',
  42. },
  43. {
  44. 'url': 'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5',
  45. 'info_dict': {
  46. 'id': '69756',
  47. 'ext': 'flv',
  48. 'title': 'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.',
  49. 'description': 'md5:3fb247005ed21a935ffc82b7dfa70cf0',
  50. 'thumbnail': 'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg',
  51. 'upload_date': '20120519',
  52. 'duration': 1245,
  53. },
  54. 'params': {
  55. 'skip_download': True,
  56. },
  57. 'skip': 'Only works from Germany',
  58. },
  59. {
  60. 'url': 'http://www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17',
  61. 'info_dict': {
  62. 'id': '13883',
  63. 'ext': 'flv',
  64. 'title': 'Voxtours - Südafrika-Reporter II',
  65. 'description': 'md5:de7f8d56be6fd4fed10f10f57786db00',
  66. 'upload_date': '20090627',
  67. 'duration': 1800,
  68. },
  69. 'params': {
  70. 'skip_download': True,
  71. },
  72. },
  73. {
  74. 'url': 'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1',
  75. 'info_dict': {
  76. 'id': '99205',
  77. 'ext': 'flv',
  78. 'title': 'Medicopter 117 - Angst!',
  79. 'description': 're:^Im Therapiezentrum \'Sonnalm\' kommen durch eine Unachtsamkeit die für die B.handlung mit Phobikern gehaltenen Voglespinnen frei\. Eine Ausreißerin',
  80. 'thumbnail': 'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg',
  81. 'upload_date': '20080928',
  82. 'duration': 2691,
  83. },
  84. 'params': {
  85. 'skip_download': True,
  86. },
  87. },
  88. {
  89. 'url': 'http://www.n-tvnow.de/deluxe-alles-was-spass-macht/thema-ua-luxushotel-fuer-vierbeiner.php?container_id=153819&player=1&season=0',
  90. 'only_matching': True,
  91. },
  92. ]
  93. def _real_extract(self, url):
  94. mobj = re.match(self._VALID_URL, url)
  95. video_page_url = 'http://%s/' % mobj.group('domain')
  96. video_id = mobj.group('video_id')
  97. webpage = self._download_webpage('http://' + mobj.group('url'), video_id)
  98. mobj = re.search(r'(?s)<div style="margin-left: 20px; font-size: 13px;">(.*?)<div id="playerteaser">', webpage)
  99. if mobj:
  100. raise ExtractorError(clean_html(mobj.group(1)), expected=True)
  101. title = self._og_search_title(webpage)
  102. description = self._og_search_description(webpage)
  103. thumbnail = self._og_search_thumbnail(webpage, default=None)
  104. upload_date = unified_strdate(self._html_search_meta('uploadDate', webpage, 'upload date'))
  105. mobj = re.search(r'<meta itemprop="duration" content="PT(?P<seconds>\d+)S" />', webpage)
  106. duration = int(mobj.group('seconds')) if mobj else None
  107. playerdata_url = self._html_search_regex(
  108. r"'playerdata': '(?P<playerdata_url>[^']+)'", webpage, 'playerdata_url')
  109. playerdata = self._download_xml(playerdata_url, video_id, 'Downloading player data XML')
  110. videoinfo = playerdata.find('./playlist/videoinfo')
  111. formats = []
  112. for filename in videoinfo.findall('filename'):
  113. mobj = re.search(r'(?P<url>rtmpe://(?:[^/]+/){2})(?P<play_path>.+)', filename.text)
  114. if mobj:
  115. fmt = {
  116. 'url': mobj.group('url'),
  117. 'play_path': 'mp4:' + mobj.group('play_path'),
  118. 'page_url': video_page_url,
  119. 'player_url': video_page_url + 'includes/vodplayer.swf',
  120. }
  121. else:
  122. fmt = {
  123. 'url': filename.text,
  124. }
  125. fmt.update({
  126. 'width': int_or_none(filename.get('width')),
  127. 'height': int_or_none(filename.get('height')),
  128. 'vbr': int_or_none(filename.get('bitrate')),
  129. 'ext': 'flv',
  130. })
  131. formats.append(fmt)
  132. return {
  133. 'id': video_id,
  134. 'title': title,
  135. 'description': description,
  136. 'thumbnail': thumbnail,
  137. 'upload_date': upload_date,
  138. 'duration': duration,
  139. 'formats': formats,
  140. }