You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

148 lines
5.7 KiB

10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
  1. # encoding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..utils import (
  6. clean_html,
  7. ExtractorError,
  8. )
  9. class RTLnowIE(InfoExtractor):
  10. """Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW"""
  11. _VALID_URL = r'(?:http://)?(?P<url>(?P<domain>rtl-now\.rtl\.de|rtl2now\.rtl2\.de|(?:www\.)?voxnow\.de|(?:www\.)?rtlnitronow\.de|(?:www\.)?superrtlnow\.de|(?:www\.)?n-tvnow\.de)/+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
  12. _TESTS = [{
  13. 'url': 'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
  14. 'file': '90419.flv',
  15. 'info_dict': {
  16. 'upload_date': '20070416',
  17. 'title': 'Ahornallee - Folge 1 - Der Einzug',
  18. 'description': 'Folge 1 - Der Einzug',
  19. },
  20. 'params': {
  21. 'skip_download': True,
  22. },
  23. 'skip': 'Only works from Germany',
  24. },
  25. {
  26. 'url': 'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5',
  27. 'file': '69756.flv',
  28. 'info_dict': {
  29. 'upload_date': '20120519',
  30. 'title': 'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit...',
  31. 'description': 'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.',
  32. 'thumbnail': 'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg',
  33. },
  34. 'params': {
  35. 'skip_download': True,
  36. },
  37. 'skip': 'Only works from Germany',
  38. },
  39. {
  40. 'url': 'http://www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17',
  41. 'file': '13883.flv',
  42. 'info_dict': {
  43. 'upload_date': '20090627',
  44. 'title': 'Voxtours - Südafrika-Reporter II',
  45. 'description': 'Südafrika-Reporter II',
  46. },
  47. 'params': {
  48. 'skip_download': True,
  49. },
  50. },
  51. {
  52. 'url': 'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1',
  53. 'file': '99205.flv',
  54. 'info_dict': {
  55. 'upload_date': '20080928',
  56. 'title': 'Medicopter 117 - Angst!',
  57. 'description': 'Angst!',
  58. 'thumbnail': 'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg'
  59. },
  60. 'params': {
  61. 'skip_download': True,
  62. },
  63. },
  64. {
  65. 'url': 'http://www.n-tvnow.de/top-gear/episode-1-2013-01-01-00-00-00.php?film_id=124903&player=1&season=10',
  66. 'file': '124903.flv',
  67. 'info_dict': {
  68. 'upload_date': '20130101',
  69. 'title': 'Top Gear vom 01.01.2013',
  70. 'description': 'Episode 1',
  71. },
  72. 'params': {
  73. 'skip_download': True,
  74. },
  75. 'skip': 'Only works from Germany',
  76. }]
  77. def _real_extract(self, url):
  78. mobj = re.match(self._VALID_URL, url)
  79. webpage_url = 'http://' + mobj.group('url')
  80. video_page_url = 'http://' + mobj.group('domain') + '/'
  81. video_id = mobj.group('video_id')
  82. webpage = self._download_webpage(webpage_url, video_id)
  83. note_m = re.search(r'''(?sx)
  84. <div[ ]style="margin-left:[ ]20px;[ ]font-size:[ ]13px;">(.*?)
  85. <div[ ]id="playerteaser">''', webpage)
  86. if note_m:
  87. msg = clean_html(note_m.group(1))
  88. raise ExtractorError(msg)
  89. video_title = self._html_search_regex(
  90. r'<title>(?P<title>[^<]+?)( \| [^<]*)?</title>',
  91. webpage, 'title')
  92. playerdata_url = self._html_search_regex(
  93. r'\'playerdata\': \'(?P<playerdata_url>[^\']+)\'',
  94. webpage, 'playerdata_url')
  95. playerdata = self._download_webpage(playerdata_url, video_id)
  96. mobj = re.search(r'<title><!\[CDATA\[(?P<description>.+?)(?:\s+- (?:Sendung )?vom (?P<upload_date_d>[0-9]{2})\.(?P<upload_date_m>[0-9]{2})\.(?:(?P<upload_date_Y>[0-9]{4})|(?P<upload_date_y>[0-9]{2})) [0-9]{2}:[0-9]{2} Uhr)?\]\]></title>', playerdata)
  97. if mobj:
  98. video_description = mobj.group('description')
  99. if mobj.group('upload_date_Y'):
  100. video_upload_date = mobj.group('upload_date_Y')
  101. elif mobj.group('upload_date_y'):
  102. video_upload_date = '20' + mobj.group('upload_date_y')
  103. else:
  104. video_upload_date = None
  105. if video_upload_date:
  106. video_upload_date += mobj.group('upload_date_m') + mobj.group('upload_date_d')
  107. else:
  108. video_description = None
  109. video_upload_date = None
  110. self._downloader.report_warning('Unable to extract description and upload date')
  111. # Thumbnail: not every video has an thumbnail
  112. mobj = re.search(r'<meta property="og:image" content="(?P<thumbnail>[^"]+)">', webpage)
  113. if mobj:
  114. video_thumbnail = mobj.group('thumbnail')
  115. else:
  116. video_thumbnail = None
  117. mobj = re.search(r'<filename [^>]+><!\[CDATA\[(?P<url>rtmpe://(?:[^/]+/){2})(?P<play_path>[^\]]+)\]\]></filename>', playerdata)
  118. if mobj is None:
  119. raise ExtractorError('Unable to extract media URL')
  120. video_url = mobj.group('url')
  121. video_play_path = 'mp4:' + mobj.group('play_path')
  122. video_player_url = video_page_url + 'includes/vodplayer.swf'
  123. return {
  124. 'id': video_id,
  125. 'url': video_url,
  126. 'play_path': video_play_path,
  127. 'page_url': video_page_url,
  128. 'player_url': video_player_url,
  129. 'ext': 'flv',
  130. 'title': video_title,
  131. 'description': video_description,
  132. 'upload_date': video_upload_date,
  133. 'thumbnail': video_thumbnail,
  134. }