You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

226 lines
7.7 KiB

10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
  1. # -*- coding: utf-8 -*-
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..utils import (
  6. compat_parse_qs,
  7. compat_urlparse,
  8. determine_ext,
  9. unified_strdate,
  10. )
  11. class WDRIE(InfoExtractor):
  12. _PLAYER_REGEX = '-(?:video|audio)player(?:_size-[LMS])?'
  13. _VALID_URL = r'(?P<url>https?://www\d?\.(?:wdr\d?|funkhauseuropa)\.de/)(?P<id>.+?)(?P<player>%s)?\.html' % _PLAYER_REGEX
  14. _TESTS = [
  15. {
  16. 'url': 'http://www1.wdr.de/mediathek/video/sendungen/servicezeit/videoservicezeit560-videoplayer_size-L.html',
  17. 'info_dict': {
  18. 'id': 'mdb-362427',
  19. 'ext': 'flv',
  20. 'title': 'Servicezeit',
  21. 'description': 'md5:c8f43e5e815eeb54d0b96df2fba906cb',
  22. 'upload_date': '20140310',
  23. },
  24. 'params': {
  25. 'skip_download': True,
  26. },
  27. },
  28. {
  29. 'url': 'http://www1.wdr.de/themen/av/videomargaspiegelisttot101-videoplayer.html',
  30. 'info_dict': {
  31. 'id': 'mdb-363194',
  32. 'ext': 'flv',
  33. 'title': 'Marga Spiegel ist tot',
  34. 'description': 'md5:2309992a6716c347891c045be50992e4',
  35. 'upload_date': '20140311',
  36. },
  37. 'params': {
  38. 'skip_download': True,
  39. },
  40. },
  41. {
  42. 'url': 'http://www1.wdr.de/themen/kultur/audioerlebtegeschichtenmargaspiegel100-audioplayer.html',
  43. 'md5': '83e9e8fefad36f357278759870805898',
  44. 'info_dict': {
  45. 'id': 'mdb-194332',
  46. 'ext': 'mp3',
  47. 'title': 'Erlebte Geschichten: Marga Spiegel (29.11.2009)',
  48. 'description': 'md5:2309992a6716c347891c045be50992e4',
  49. 'upload_date': '20091129',
  50. },
  51. },
  52. {
  53. 'url': 'http://www.funkhauseuropa.de/av/audiosuepersongsoulbossanova100-audioplayer.html',
  54. 'md5': '24e83813e832badb0a8d7d1ef9ef0691',
  55. 'info_dict': {
  56. 'id': 'mdb-463528',
  57. 'ext': 'mp3',
  58. 'title': 'Süpersong: Soul Bossa Nova',
  59. 'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a',
  60. 'upload_date': '20140630',
  61. },
  62. },
  63. ]
  64. def _real_extract(self, url):
  65. mobj = re.match(self._VALID_URL, url)
  66. page_url = mobj.group('url')
  67. page_id = mobj.group('id')
  68. webpage = self._download_webpage(url, page_id)
  69. if mobj.group('player') is None:
  70. entries = [
  71. self.url_result(page_url + href, 'WDR')
  72. for href in re.findall(r'<a href="/?(.+?%s\.html)" rel="nofollow"' % self._PLAYER_REGEX, webpage)
  73. ]
  74. return self.playlist_result(entries, page_id)
  75. flashvars = compat_urlparse.parse_qs(
  76. self._html_search_regex(r'<param name="flashvars" value="([^"]+)"', webpage, 'flashvars'))
  77. page_id = flashvars['trackerClipId'][0]
  78. video_url = flashvars['dslSrc'][0]
  79. title = flashvars['trackerClipTitle'][0]
  80. thumbnail = flashvars['startPicture'][0] if 'startPicture' in flashvars else None
  81. if 'trackerClipAirTime' in flashvars:
  82. upload_date = flashvars['trackerClipAirTime'][0]
  83. else:
  84. upload_date = self._html_search_meta('DC.Date', webpage, 'upload date')
  85. if upload_date:
  86. upload_date = unified_strdate(upload_date)
  87. if video_url.endswith('.f4m'):
  88. video_url += '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18'
  89. ext = 'flv'
  90. else:
  91. ext = determine_ext(video_url)
  92. description = self._html_search_meta('Description', webpage, 'description')
  93. return {
  94. 'id': page_id,
  95. 'url': video_url,
  96. 'ext': ext,
  97. 'title': title,
  98. 'description': description,
  99. 'thumbnail': thumbnail,
  100. 'upload_date': upload_date,
  101. }
  102. class WDRMobileIE(InfoExtractor):
  103. _VALID_URL = r'''(?x)
  104. https?://mobile-ondemand\.wdr\.de/
  105. .*?/fsk(?P<age_limit>[0-9]+)
  106. /[0-9]+/[0-9]+/
  107. (?P<id>[0-9]+)_(?P<title>[0-9]+)'''
  108. IE_NAME = 'wdr:mobile'
  109. _TEST = {
  110. 'url': 'http://mobile-ondemand.wdr.de/CMS2010/mdb/ondemand/weltweit/fsk0/42/421735/421735_4283021.mp4',
  111. 'info_dict': {
  112. 'title': '4283021',
  113. 'id': '421735',
  114. 'ext': 'mp4',
  115. 'age_limit': 0,
  116. },
  117. 'skip': 'Problems with loading data.'
  118. }
  119. def _real_extract(self, url):
  120. mobj = re.match(self._VALID_URL, url)
  121. return {
  122. 'id': mobj.group('id'),
  123. 'title': mobj.group('title'),
  124. 'age_limit': int(mobj.group('age_limit')),
  125. 'url': url,
  126. 'ext': determine_ext(url),
  127. 'user_agent': 'mobile',
  128. }
  129. class WDRMausIE(InfoExtractor):
  130. _VALID_URL = 'http://(?:www\.)?wdrmaus\.de/(?:[^/]+/){,2}(?P<id>[^/?#]+)(?:/index\.php5|(?<!index)\.php5|/(?:$|[?#]))'
  131. IE_DESC = 'Sendung mit der Maus'
  132. _TESTS = [{
  133. 'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5',
  134. 'info_dict': {
  135. 'id': 'aktuelle-sendung',
  136. 'ext': 'mp4',
  137. 'thumbnail': 're:^http://.+\.jpg',
  138. 'upload_date': 're:^[0-9]{8}$',
  139. 'title': 're:^[0-9.]{10} - Aktuelle Sendung$',
  140. }
  141. }, {
  142. 'url': 'http://www.wdrmaus.de/sachgeschichten/sachgeschichten/40_jahre_maus.php5',
  143. 'md5': '3b1227ca3ed28d73ec5737c65743b2a3',
  144. 'info_dict': {
  145. 'id': '40_jahre_maus',
  146. 'ext': 'mp4',
  147. 'thumbnail': 're:^http://.+\.jpg',
  148. 'upload_date': '20131007',
  149. 'title': '12.03.2011 - 40 Jahre Maus',
  150. }
  151. }]
  152. def _real_extract(self, url):
  153. mobj = re.match(self._VALID_URL, url)
  154. video_id = mobj.group('id')
  155. webpage = self._download_webpage(url, video_id)
  156. param_code = self._html_search_regex(
  157. r'<a href="\?startVideo=1&amp;([^"]+)"', webpage, 'parameters')
  158. title_date = self._search_regex(
  159. r'<div class="sendedatum"><p>Sendedatum:\s*([0-9\.]+)</p>',
  160. webpage, 'air date')
  161. title_str = self._html_search_regex(
  162. r'<h1>(.*?)</h1>', webpage, 'title')
  163. title = '%s - %s' % (title_date, title_str)
  164. upload_date = unified_strdate(
  165. self._html_search_meta('dc.date', webpage))
  166. fields = compat_parse_qs(param_code)
  167. video_url = fields['firstVideo'][0]
  168. thumbnail = compat_urlparse.urljoin(url, fields['startPicture'][0])
  169. formats = [{
  170. 'format_id': 'rtmp',
  171. 'url': video_url,
  172. }]
  173. jscode = self._download_webpage(
  174. 'http://www.wdrmaus.de/codebase/js/extended-medien.min.js',
  175. video_id, fatal=False,
  176. note='Downloading URL translation table',
  177. errnote='Could not download URL translation table')
  178. if jscode:
  179. for m in re.finditer(
  180. r"stream:\s*'dslSrc=(?P<stream>[^']+)',\s*download:\s*'(?P<dl>[^']+)'\s*\}",
  181. jscode):
  182. if video_url.startswith(m.group('stream')):
  183. http_url = video_url.replace(
  184. m.group('stream'), m.group('dl'))
  185. formats.append({
  186. 'format_id': 'http',
  187. 'url': http_url,
  188. })
  189. break
  190. self._sort_formats(formats)
  191. return {
  192. 'id': video_id,
  193. 'title': title,
  194. 'formats': formats,
  195. 'thumbnail': thumbnail,
  196. 'upload_date': upload_date,
  197. }
  198. # TODO test _1