|
|
@ -2,6 +2,7 @@ |
|
|
|
from __future__ import unicode_literals |
|
|
|
|
|
|
|
import base64 |
|
|
|
import functools |
|
|
|
import hashlib |
|
|
|
import itertools |
|
|
|
import json |
|
|
@ -16,11 +17,13 @@ from ..utils import ( |
|
|
|
error_to_compat_str, |
|
|
|
ExtractorError, |
|
|
|
int_or_none, |
|
|
|
mimetype2ext, |
|
|
|
OnDemandPagedList, |
|
|
|
parse_iso8601, |
|
|
|
sanitized_Request, |
|
|
|
str_to_int, |
|
|
|
unescapeHTML, |
|
|
|
mimetype2ext, |
|
|
|
urlencode_postdata, |
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
@ -343,58 +346,73 @@ class DailymotionIE(DailymotionBaseInfoExtractor): |
|
|
|
|
|
|
|
class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): |
|
|
|
IE_NAME = 'dailymotion:playlist' |
|
|
|
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>[^/?#&]+)' |
|
|
|
_MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"' |
|
|
|
_PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s' |
|
|
|
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>x[0-9a-z]+)' |
|
|
|
_TESTS = [{ |
|
|
|
'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q', |
|
|
|
'info_dict': { |
|
|
|
'title': 'SPORT', |
|
|
|
'id': 'xv4bw_nqtv_sport', |
|
|
|
'id': 'xv4bw', |
|
|
|
}, |
|
|
|
'playlist_mincount': 20, |
|
|
|
}] |
|
|
|
|
|
|
|
def _extract_entries(self, id): |
|
|
|
video_ids = set() |
|
|
|
processed_urls = set() |
|
|
|
for pagenum in itertools.count(1): |
|
|
|
page_url = self._PAGE_TEMPLATE % (id, pagenum) |
|
|
|
webpage, urlh = self._download_webpage_handle_no_ff( |
|
|
|
page_url, id, 'Downloading page %s' % pagenum) |
|
|
|
if urlh.geturl() in processed_urls: |
|
|
|
self.report_warning('Stopped at duplicated page %s, which is the same as %s' % ( |
|
|
|
page_url, urlh.geturl()), id) |
|
|
|
break |
|
|
|
|
|
|
|
processed_urls.add(urlh.geturl()) |
|
|
|
|
|
|
|
for video_id in re.findall(r'data-xid="(.+?)"', webpage): |
|
|
|
if video_id not in video_ids: |
|
|
|
yield self.url_result( |
|
|
|
'http://www.dailymotion.com/video/%s' % video_id, |
|
|
|
DailymotionIE.ie_key(), video_id) |
|
|
|
video_ids.add(video_id) |
|
|
|
|
|
|
|
if re.search(self._MORE_PAGES_INDICATOR, webpage) is None: |
|
|
|
break |
|
|
|
_PAGE_SIZE = 100 |
|
|
|
|
|
|
|
def _fetch_page(self, playlist_id, authorizaion, page): |
|
|
|
page += 1 |
|
|
|
videos = self._download_json( |
|
|
|
'https://graphql.api.dailymotion.com', |
|
|
|
playlist_id, 'Downloading page %d' % page, |
|
|
|
data=json.dumps({ |
|
|
|
'query': '''{ |
|
|
|
collection(xid: "%s") { |
|
|
|
videos(first: %d, page: %d) { |
|
|
|
pageInfo { |
|
|
|
hasNextPage |
|
|
|
nextPage |
|
|
|
} |
|
|
|
edges { |
|
|
|
node { |
|
|
|
xid |
|
|
|
url |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
}''' % (playlist_id, self._PAGE_SIZE, page) |
|
|
|
}).encode(), headers={ |
|
|
|
'Authorization': authorizaion, |
|
|
|
'Origin': 'https://www.dailymotion.com', |
|
|
|
})['data']['collection']['videos'] |
|
|
|
for edge in videos['edges']: |
|
|
|
node = edge['node'] |
|
|
|
yield self.url_result( |
|
|
|
node['url'], DailymotionIE.ie_key(), node['xid']) |
|
|
|
|
|
|
|
def _real_extract(self, url): |
|
|
|
mobj = re.match(self._VALID_URL, url) |
|
|
|
playlist_id = mobj.group('id') |
|
|
|
playlist_id = self._match_id(url) |
|
|
|
webpage = self._download_webpage(url, playlist_id) |
|
|
|
|
|
|
|
return { |
|
|
|
'_type': 'playlist', |
|
|
|
'id': playlist_id, |
|
|
|
'title': self._og_search_title(webpage), |
|
|
|
'entries': self._extract_entries(playlist_id), |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
class DailymotionUserIE(DailymotionPlaylistIE): |
|
|
|
api = self._parse_json(self._search_regex( |
|
|
|
r'__PLAYER_CONFIG__\s*=\s*({.+?});', |
|
|
|
webpage, 'player config'), playlist_id)['context']['api'] |
|
|
|
auth = self._download_json( |
|
|
|
api.get('auth_url', 'https://graphql.api.dailymotion.com/oauth/token'), |
|
|
|
playlist_id, data=urlencode_postdata({ |
|
|
|
'client_id': api.get('client_id', 'f1a362d288c1b98099c7'), |
|
|
|
'client_secret': api.get('client_secret', 'eea605b96e01c796ff369935357eca920c5da4c5'), |
|
|
|
'grant_type': 'client_credentials', |
|
|
|
})) |
|
|
|
authorizaion = '%s %s' % (auth.get('token_type', 'Bearer'), auth['access_token']) |
|
|
|
entries = OnDemandPagedList(functools.partial( |
|
|
|
self._fetch_page, playlist_id, authorizaion), self._PAGE_SIZE) |
|
|
|
return self.playlist_result( |
|
|
|
entries, playlist_id, |
|
|
|
self._og_search_title(webpage)) |
|
|
|
|
|
|
|
|
|
|
|
class DailymotionUserIE(DailymotionBaseInfoExtractor): |
|
|
|
IE_NAME = 'dailymotion:user' |
|
|
|
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)' |
|
|
|
_MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"' |
|
|
|
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s' |
|
|
|
_TESTS = [{ |
|
|
|
'url': 'https://www.dailymotion.com/user/nqtv', |
|
|
@ -416,6 +434,30 @@ class DailymotionUserIE(DailymotionPlaylistIE): |
|
|
|
'skip': 'Takes too long time', |
|
|
|
}] |
|
|
|
|
|
|
|
def _extract_entries(self, id): |
|
|
|
video_ids = set() |
|
|
|
processed_urls = set() |
|
|
|
for pagenum in itertools.count(1): |
|
|
|
page_url = self._PAGE_TEMPLATE % (id, pagenum) |
|
|
|
webpage, urlh = self._download_webpage_handle_no_ff( |
|
|
|
page_url, id, 'Downloading page %s' % pagenum) |
|
|
|
if urlh.geturl() in processed_urls: |
|
|
|
self.report_warning('Stopped at duplicated page %s, which is the same as %s' % ( |
|
|
|
page_url, urlh.geturl()), id) |
|
|
|
break |
|
|
|
|
|
|
|
processed_urls.add(urlh.geturl()) |
|
|
|
|
|
|
|
for video_id in re.findall(r'data-xid="(.+?)"', webpage): |
|
|
|
if video_id not in video_ids: |
|
|
|
yield self.url_result( |
|
|
|
'http://www.dailymotion.com/video/%s' % video_id, |
|
|
|
DailymotionIE.ie_key(), video_id) |
|
|
|
video_ids.add(video_id) |
|
|
|
|
|
|
|
if re.search(self._MORE_PAGES_INDICATOR, webpage) is None: |
|
|
|
break |
|
|
|
|
|
|
|
def _real_extract(self, url): |
|
|
|
mobj = re.match(self._VALID_URL, url) |
|
|
|
user = mobj.group('user') |
|
|
|