Do not verify thumbnail URLs by default

Partially reverts cca80fe611 and 0ba692acc8

Unless `--check-formats` is specified, this causes yt-dlp to return incorrect thumbnail urls.
See https://github.com/yt-dlp/yt-dlp/issues/340#issuecomment-877909966, #402

But the overhead in general use is not worth it

Closes #694, #725
This commit is contained in:
pukkandan 2021-10-18 15:23:42 +05:30
parent b11d210156
commit e820fbaa6f
No known key found for this signature in database
GPG key ID: 0F00D95A001F4698
3 changed files with 5 additions and 20 deletions

View file

@ -2095,25 +2095,14 @@ def _sanitize_thumbnails(self, info_dict):
t.get('url'))) t.get('url')))
def thumbnail_tester(): def thumbnail_tester():
if self.params.get('check_formats'):
test_all = True
to_screen = lambda msg: self.to_screen(f'[info] {msg}')
else:
test_all = False
to_screen = self.write_debug
def test_thumbnail(t): def test_thumbnail(t):
if not test_all and not t.get('_test_url'): self.to_screen(f'[info] Testing thumbnail {t["id"]}')
return True
to_screen('Testing thumbnail %s' % t['id'])
try: try:
self.urlopen(HEADRequest(t['url'])) self.urlopen(HEADRequest(t['url']))
except network_exceptions as err: except network_exceptions as err:
to_screen('Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % ( self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
t['id'], t['url'], error_to_compat_str(err)))
return False return False
return True return True
return test_thumbnail return test_thumbnail
for i, t in enumerate(thumbnails): for i, t in enumerate(thumbnails):
@ -2123,7 +2112,7 @@ def test_thumbnail(t):
t['resolution'] = '%dx%d' % (t['width'], t['height']) t['resolution'] = '%dx%d' % (t['width'], t['height'])
t['url'] = sanitize_url(t['url']) t['url'] = sanitize_url(t['url'])
if self.params.get('check_formats') is not False: if self.params.get('check_formats'):
info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse() info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse()
else: else:
info_dict['thumbnails'] = thumbnails info_dict['thumbnails'] = thumbnails

View file

@ -233,7 +233,6 @@ class InfoExtractor(object):
* "resolution" (optional, string "{width}x{height}", * "resolution" (optional, string "{width}x{height}",
deprecated) deprecated)
* "filesize" (optional, int) * "filesize" (optional, int)
* "_test_url" (optional, bool) - If true, test the URL
thumbnail: Full URL to a video thumbnail image. thumbnail: Full URL to a video thumbnail image.
description: Full video description. description: Full video description.
uploader: Full name of the video uploader. uploader: Full name of the video uploader.

View file

@ -2699,21 +2699,18 @@ def feed_entry(name):
# The best resolution thumbnails sometimes does not appear in the webpage # The best resolution thumbnails sometimes does not appear in the webpage
# See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029> # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3'] thumbnail_names = [
# TODO: Test them also? - For some videos, even these don't exist 'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',
guaranteed_thumbnail_names = [
'hqdefault', 'hq1', 'hq2', 'hq3', '0', 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
'mqdefault', 'mq1', 'mq2', 'mq3', 'mqdefault', 'mq1', 'mq2', 'mq3',
'default', '1', '2', '3' 'default', '1', '2', '3'
] ]
thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
n_thumbnail_names = len(thumbnail_names) n_thumbnail_names = len(thumbnail_names)
thumbnails.extend({ thumbnails.extend({
'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format( 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
video_id=video_id, name=name, ext=ext, video_id=video_id, name=name, ext=ext,
webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''), webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
'_test_url': name in hq_thumbnail_names,
} for name in thumbnail_names for ext in ('webp', 'jpg')) } for name in thumbnail_names for ext in ('webp', 'jpg'))
for thumb in thumbnails: for thumb in thumbnails:
i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names) i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)