Browse Source

[rds] Improve

master
Sergey M․ 9 years ago
parent
commit
b6ea9ef21a
3 changed files with 71 additions and 51 deletions
  1. 2
      youtube_dl/extractor/__init__.py
  2. 70
      youtube_dl/extractor/rds.py
  3. 50
      youtube_dl/extractor/rdsca.py

2
youtube_dl/extractor/__init__.py

@ -460,7 +460,7 @@ from .radiobremen import RadioBremenIE
from .radiofrance import RadioFranceIE
from .rai import RaiIE
from .rbmaradio import RBMARadioIE
from .rdsca import RDScaIE
from .rds import RDSIE
from .redtube import RedTubeIE
from .restudy import RestudyIE
from .reverbnation import ReverbNationIE

70
youtube_dl/extractor/rds.py

@ -0,0 +1,70 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
parse_duration,
parse_iso8601,
)
class RDSIE(InfoExtractor):
IE_DESC = 'RDS.ca'
_VALID_URL = r'https?://(?:www\.)?rds\.ca/videos/(?:[^/]+/)+(?P<display_id>[^/]+)-(?P<id>\d+\.\d+)'
_TEST = {
'url': 'http://www.rds.ca/videos/football/nfl/fowler-jr-prend-la-direction-de-jacksonville-3.1132799',
'info_dict': {
'id': '3.1132799',
'display_id': 'fowler-jr-prend-la-direction-de-jacksonville',
'ext': 'mp4',
'title': 'Fowler Jr. prend la direction de Jacksonville',
'description': 'Dante Fowler Jr. est le troisième choix du repêchage 2015 de la NFL. ',
'timestamp': 1430397346,
'upload_date': '20150430',
'duration': 154.354,
'age_limit': 0,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id)
# TODO: extract f4m from 9c9media.com
video_url = self._search_regex(
r'<span[^>]+itemprop="contentURL"[^>]+content="([^"]+)"',
webpage, 'video url')
title = self._og_search_title(webpage) or self._html_search_meta(
'title', webpage, 'title', fatal=True)
description = self._og_search_description(webpage) or self._html_search_meta(
'description', webpage, 'description')
thumbnail = self._og_search_thumbnail(webpage) or self._search_regex(
[r'<link[^>]+itemprop="thumbnailUrl"[^>]+href="([^"]+)"',
r'<span[^>]+itemprop="thumbnailUrl"[^>]+content="([^"]+)"'],
webpage, 'thumbnail', fatal=False)
timestamp = parse_iso8601(self._search_regex(
r'<span[^>]+itemprop="uploadDate"[^>]+content="([^"]+)"',
webpage, 'upload date', fatal=False))
duration = parse_duration(self._search_regex(
r'<span[^>]+itemprop="duration"[^>]+content="([^"]+)"',
webpage, 'duration', fatal=False))
age_limit = self._family_friendly_search(webpage)
return {
'id': video_id,
'display_id': display_id,
'url': video_url,
'title': title,
'description': description,
'thumbnail': thumbnail,
'timestamp': timestamp,
'duration': duration,
'age_limit': age_limit,
}

50
youtube_dl/extractor/rdsca.py

@ -1,50 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
parse_iso8601,
url_basename,
)
class RDScaIE(InfoExtractor):
IE_NAME = 'RDS.ca'
_VALID_URL = r'http://(?:www\.)?rds\.ca/videos/(?P<id>.*)'
_TESTS = [{
'url': 'http://www.rds.ca/videos/football/nfl/fowler-jr-prend-la-direction-de-jacksonville-3.1132799',
'info_dict': {
"ext": "mp4",
"title": "Fowler Jr. prend la direction de Jacksonville",
"description": "Dante Fowler Jr. est le troisième choix du repêchage 2015 de la NFL. ",
"timestamp": 1430397346,
}
}]
def _real_extract(self, url):
video_id = url_basename(url)
webpage = self._download_webpage(url, video_id)
title = self._search_regex(
r'<span itemprop="name"[^>]*>([^\n]*)</span>', webpage, 'video title', default=None)
video_url = self._search_regex(
r'<span itemprop="contentURL" content="([^"]+)"', webpage, 'video URL')
upload_date = parse_iso8601(self._search_regex(
r'<span itemprop="uploadDate" content="([^"]+)"', webpage, 'upload date', default=None))
description = self._search_regex(
r'<span itemprop="description"[^>]*>([^\n]*)</span>', webpage, 'description', default=None)
thumbnail = self._search_regex(
r'<span itemprop="thumbnailUrl" content="([^"]+)"', webpage, 'upload date', default=None)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'timestamp': upload_date,
'formats': [{
'url': video_url,
}],
}
Loading…
Cancel
Save