mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-01-22 08:46:43 +00:00
[youtube] Fix ytsearch* when cookies are provided
Closes #11924 The API with `page` is no longer used in browsers, and YouTube always returns {'reload': 'now'} when cookies are provided. See http://youtube.github.io/spfjs/documentation/start/ for how SPF works. Basically appending static link with a `spf` parameter yields the corresponding dynamic link.
This commit is contained in:
parent
c54c01f82d
commit
a22b2fd19b
|
@ -1,6 +1,7 @@
|
||||||
version <unreleased>
|
version <unreleased>
|
||||||
|
|
||||||
Extractors
|
Extractors
|
||||||
|
* [youtube] Fix ytsearch when cookies are provided (#11924)
|
||||||
+ [bilibili] Support new Bangumi URLs (#11845)
|
+ [bilibili] Support new Bangumi URLs (#11845)
|
||||||
|
|
||||||
version 2017.02.01
|
version 2017.02.01
|
||||||
|
|
|
@ -2348,18 +2348,18 @@ def _get_n_results(self, query, n):
|
||||||
videos = []
|
videos = []
|
||||||
limit = n
|
limit = n
|
||||||
|
|
||||||
|
url_query = {
|
||||||
|
'search_query': query.encode('utf-8'),
|
||||||
|
}
|
||||||
|
url_query.update(self._EXTRA_QUERY_ARGS)
|
||||||
|
result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
|
||||||
|
|
||||||
for pagenum in itertools.count(1):
|
for pagenum in itertools.count(1):
|
||||||
url_query = {
|
|
||||||
'search_query': query.encode('utf-8'),
|
|
||||||
'page': pagenum,
|
|
||||||
'spf': 'navigate',
|
|
||||||
}
|
|
||||||
url_query.update(self._EXTRA_QUERY_ARGS)
|
|
||||||
result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
|
|
||||||
data = self._download_json(
|
data = self._download_json(
|
||||||
result_url, video_id='query "%s"' % query,
|
result_url, video_id='query "%s"' % query,
|
||||||
note='Downloading page %s' % pagenum,
|
note='Downloading page %s' % pagenum,
|
||||||
errnote='Unable to download API page')
|
errnote='Unable to download API page',
|
||||||
|
query={'spf': 'navigate'})
|
||||||
html_content = data[1]['body']['content']
|
html_content = data[1]['body']['content']
|
||||||
|
|
||||||
if 'class="search-message' in html_content:
|
if 'class="search-message' in html_content:
|
||||||
|
@ -2371,6 +2371,12 @@ def _get_n_results(self, query, n):
|
||||||
videos += new_videos
|
videos += new_videos
|
||||||
if not new_videos or len(videos) > limit:
|
if not new_videos or len(videos) > limit:
|
||||||
break
|
break
|
||||||
|
next_link = self._html_search_regex(
|
||||||
|
r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
|
||||||
|
html_content, 'next link', default=None)
|
||||||
|
if next_link is None:
|
||||||
|
break
|
||||||
|
result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
|
||||||
|
|
||||||
if len(videos) > n:
|
if len(videos) > n:
|
||||||
videos = videos[:n]
|
videos = videos[:n]
|
||||||
|
|
Loading…
Reference in a new issue