|
@ -3181,54 +3181,94 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor): |
|
|
_MAX_RESULTS = float('inf') |
|
|
_MAX_RESULTS = float('inf') |
|
|
IE_NAME = 'youtube:search' |
|
|
IE_NAME = 'youtube:search' |
|
|
_SEARCH_KEY = 'ytsearch' |
|
|
_SEARCH_KEY = 'ytsearch' |
|
|
_EXTRA_QUERY_ARGS = {} |
|
|
|
|
|
|
|
|
_SEARCH_PARAMS = None |
|
|
_TESTS = [] |
|
|
_TESTS = [] |
|
|
|
|
|
|
|
|
def _get_n_results(self, query, n): |
|
|
|
|
|
"""Get a specified number of results for a query""" |
|
|
|
|
|
|
|
|
|
|
|
videos = [] |
|
|
|
|
|
limit = n |
|
|
|
|
|
|
|
|
|
|
|
url_query = { |
|
|
|
|
|
'search_query': query.encode('utf-8'), |
|
|
|
|
|
|
|
|
def _entries(self, query, n): |
|
|
|
|
|
data = { |
|
|
|
|
|
'context': { |
|
|
|
|
|
'client': { |
|
|
|
|
|
'clientName': 'WEB', |
|
|
|
|
|
'clientVersion': '2.20201021.03.00', |
|
|
|
|
|
} |
|
|
|
|
|
}, |
|
|
|
|
|
'query': query, |
|
|
} |
|
|
} |
|
|
url_query.update(self._EXTRA_QUERY_ARGS) |
|
|
|
|
|
result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query) |
|
|
|
|
|
|
|
|
|
|
|
for pagenum in itertools.count(1): |
|
|
|
|
|
data = self._download_json( |
|
|
|
|
|
result_url, video_id='query "%s"' % query, |
|
|
|
|
|
note='Downloading page %s' % pagenum, |
|
|
|
|
|
errnote='Unable to download API page', |
|
|
|
|
|
query={'spf': 'navigate'}) |
|
|
|
|
|
html_content = data[1]['body']['content'] |
|
|
|
|
|
|
|
|
|
|
|
if 'class="search-message' in html_content: |
|
|
|
|
|
raise ExtractorError( |
|
|
|
|
|
'[youtube] No video results', expected=True) |
|
|
|
|
|
|
|
|
|
|
|
new_videos = list(self._process_page(html_content)) |
|
|
|
|
|
videos += new_videos |
|
|
|
|
|
if not new_videos or len(videos) > limit: |
|
|
|
|
|
|
|
|
if self._SEARCH_PARAMS: |
|
|
|
|
|
data['params'] = self._SEARCH_PARAMS |
|
|
|
|
|
total = 0 |
|
|
|
|
|
for page_num in itertools.count(1): |
|
|
|
|
|
search = self._download_json( |
|
|
|
|
|
'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', |
|
|
|
|
|
video_id='query "%s"' % query, |
|
|
|
|
|
note='Downloading page %s' % page_num, |
|
|
|
|
|
errnote='Unable to download API page', fatal=False, |
|
|
|
|
|
data=json.dumps(data).encode('utf8'), |
|
|
|
|
|
headers={'content-type': 'application/json'}) |
|
|
|
|
|
if not search: |
|
|
break |
|
|
break |
|
|
next_link = self._html_search_regex( |
|
|
|
|
|
r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next', |
|
|
|
|
|
html_content, 'next link', default=None) |
|
|
|
|
|
if next_link is None: |
|
|
|
|
|
|
|
|
slr_contents = try_get( |
|
|
|
|
|
search, |
|
|
|
|
|
(lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'], |
|
|
|
|
|
lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']), |
|
|
|
|
|
list) |
|
|
|
|
|
if not slr_contents: |
|
|
break |
|
|
break |
|
|
result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link) |
|
|
|
|
|
|
|
|
isr_contents = try_get( |
|
|
|
|
|
slr_contents, |
|
|
|
|
|
lambda x: x[0]['itemSectionRenderer']['contents'], |
|
|
|
|
|
list) |
|
|
|
|
|
if not isr_contents: |
|
|
|
|
|
break |
|
|
|
|
|
for content in isr_contents: |
|
|
|
|
|
if not isinstance(content, dict): |
|
|
|
|
|
continue |
|
|
|
|
|
video = content.get('videoRenderer') |
|
|
|
|
|
if not isinstance(video, dict): |
|
|
|
|
|
continue |
|
|
|
|
|
video_id = video.get('videoId') |
|
|
|
|
|
if not video_id: |
|
|
|
|
|
continue |
|
|
|
|
|
title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str) |
|
|
|
|
|
description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str) |
|
|
|
|
|
duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str)) |
|
|
|
|
|
view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or '' |
|
|
|
|
|
view_count = int_or_none(self._search_regex( |
|
|
|
|
|
r'^(\d+)', re.sub(r'\s', '', view_count_text), |
|
|
|
|
|
'view count', default=None)) |
|
|
|
|
|
uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str) |
|
|
|
|
|
total += 1 |
|
|
|
|
|
yield { |
|
|
|
|
|
'_type': 'url_transparent', |
|
|
|
|
|
'ie_key': YoutubeIE.ie_key(), |
|
|
|
|
|
'id': video_id, |
|
|
|
|
|
'url': video_id, |
|
|
|
|
|
'title': title, |
|
|
|
|
|
'description': description, |
|
|
|
|
|
'duration': duration, |
|
|
|
|
|
'view_count': view_count, |
|
|
|
|
|
'uploader': uploader, |
|
|
|
|
|
} |
|
|
|
|
|
if total == n: |
|
|
|
|
|
return |
|
|
|
|
|
token = try_get( |
|
|
|
|
|
slr_contents, |
|
|
|
|
|
lambda x: x[1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'], |
|
|
|
|
|
compat_str) |
|
|
|
|
|
if not token: |
|
|
|
|
|
break |
|
|
|
|
|
data['continuation'] = token |
|
|
|
|
|
|
|
|
if len(videos) > n: |
|
|
|
|
|
videos = videos[:n] |
|
|
|
|
|
return self.playlist_result(videos, query) |
|
|
|
|
|
|
|
|
def _get_n_results(self, query, n): |
|
|
|
|
|
"""Get a specified number of results for a query""" |
|
|
|
|
|
return self.playlist_result(self._entries(query, n), query) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class YoutubeSearchDateIE(YoutubeSearchIE): |
|
|
class YoutubeSearchDateIE(YoutubeSearchIE): |
|
|
IE_NAME = YoutubeSearchIE.IE_NAME + ':date' |
|
|
IE_NAME = YoutubeSearchIE.IE_NAME + ':date' |
|
|
_SEARCH_KEY = 'ytsearchdate' |
|
|
_SEARCH_KEY = 'ytsearchdate' |
|
|
IE_DESC = 'YouTube.com searches, newest videos first' |
|
|
IE_DESC = 'YouTube.com searches, newest videos first' |
|
|
_EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'} |
|
|
|
|
|
|
|
|
_SEARCH_PARAMS = 'CAI%3D' |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor): |
|
|
class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor): |
|
|