Browse Source

[radiojavan] Improve extraction (closes #17151)

master
Sergey M․ 6 years ago
parent
commit
93284ff2ea
No known key found for this signature in database GPG Key ID: 2C393E0F18A9236D
1 changed files with 17 additions and 13 deletions
  1. 30
      youtube_dl/extractor/radiojavan.py

30
youtube_dl/extractor/radiojavan.py

@ -4,15 +4,16 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
unified_strdate,
parse_resolution,
str_to_int, str_to_int,
unified_strdate,
urlencode_postdata, urlencode_postdata,
urljoin,
) )
class RadioJavanIE(InfoExtractor): class RadioJavanIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P<id>[^/]+)/?' _VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P<id>[^/]+)/?'
_HOST_TRACKER_URL = 'https://www.radiojavan.com/videos/video_host'
_TEST = { _TEST = {
'url': 'http://www.radiojavan.com/videos/video/chaartaar-ashoobam', 'url': 'http://www.radiojavan.com/videos/video/chaartaar-ashoobam',
'md5': 'e85208ffa3ca8b83534fca9fe19af95b', 'md5': 'e85208ffa3ca8b83534fca9fe19af95b',
@ -31,23 +32,26 @@ class RadioJavanIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
download_host = self._download_json( download_host = self._download_json(
self._HOST_TRACKER_URL,
video_id,
'https://www.radiojavan.com/videos/video_host', video_id,
data=urlencode_postdata({'id': video_id}), data=urlencode_postdata({'id': video_id}),
headers={ headers={
'Content-Type': 'application/x-www-form-urlencoded', 'Content-Type': 'application/x-www-form-urlencoded',
'Referer': url, 'Referer': url,
}
)['host']
}).get('host', 'https://host1.rjmusicmedia.com')
webpage = self._download_webpage(url, video_id)
formats = [{
'url': '%s/%s' % (download_host, video_path),
'format_id': '%sp' % height,
'height': int(height),
} for height, video_path in re.findall(r"RJ\.video(\d+)p\s*=\s*'/?([^']+)'", webpage)]
formats = []
for format_id, _, video_path in re.findall(
r'RJ\.video(?P<format_id>\d+[pPkK])\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2',
webpage):
f = parse_resolution(format_id)
f.update({
'url': urljoin(download_host, video_path),
'format_id': format_id,
})
formats.append(f)
self._sort_formats(formats) self._sort_formats(formats)
title = self._og_search_title(webpage) title = self._og_search_title(webpage)

Loading…
Cancel
Save