From 6e84b21559f586ee4d6affb61688d5c6a0c21221 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 7 Aug 2021 21:16:55 +0530 Subject: [PATCH] Fix bugs related to `sanitize_info` Related: https://github.com/yt-dlp/yt-dlp/commit/8012d892bd38af731357a61e071e0a0d01bc41b4#r54555230 --- test/test_YoutubeDL.py | 17 +++++++++++++++-- yt_dlp/YoutubeDL.py | 26 ++++++++++++++++++-------- yt_dlp/utils.py | 2 +- 3 files changed, 34 insertions(+), 11 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 9a0b286e2..1e0865102 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -18,7 +18,7 @@ from yt_dlp.compat import compat_os_name, compat_setenv, compat_str, compat_urll from yt_dlp.extractor import YoutubeIE from yt_dlp.extractor.common import InfoExtractor from yt_dlp.postprocessor.common import PostProcessor -from yt_dlp.utils import ExtractorError, int_or_none, match_filter_func +from yt_dlp.utils import ExtractorError, int_or_none, match_filter_func, LazyList TEST_URL = 'http://localhost/sample.mp4' @@ -678,10 +678,17 @@ class TestYoutubeDL(unittest.TestCase): self.assertEqual(out, expected[0]) self.assertEqual(fname, expected[1]) + # Side-effects + original_infodict = dict(self.outtmpl_info) + test('foo.bar', 'foo.bar') + original_infodict['epoch'] = self.outtmpl_info.get('epoch') + self.assertTrue(isinstance(original_infodict['epoch'], int)) + test('%(epoch)d', int_or_none) + self.assertEqual(original_infodict, self.outtmpl_info) + # Auto-generated fields test('%(id)s.%(ext)s', '1234.mp4') test('%(duration_string)s', ('27:46:40', '27-46-40')) - test('%(epoch)d', int_or_none) test('%(resolution)s', '1080p') test('%(playlist_index)s', '001') test('%(autonumber)s', '00001') @@ -774,6 +781,12 @@ class TestYoutubeDL(unittest.TestCase): test('%(formats.0.id.-1+id)f', '1235.000000') test('%(formats.0.id.-1+formats.1.id.-1)d', '3') + # Laziness + def gen(): + yield from range(5) + raise self.assertTrue(False, 'LazyList should not be evaluated till here') + test('%(key.4)s', '4', info={'key': LazyList(gen())}) + # Empty filename test('%(foo|)s-%(bar|)s.%(ext)s', '-.mp4') # test('%(foo|)s.%(ext)s', ('.mp4', '_.mp4')) # fixme diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 978f43054..503808727 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -887,14 +887,16 @@ class YoutubeDL(object): def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None): """ Make the template and info_dict suitable for substitution : ydl.outtmpl_escape(outtmpl) % info_dict """ - info_dict = dict(info_dict) + info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set na = self.params.get('outtmpl_na_placeholder', 'NA') + info_dict = dict(info_dict) # Do not sanitize so as not to consume LazyList + for key in ('__original_infodict', '__postprocessors'): + info_dict.pop(key, None) info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs formatSeconds(info_dict['duration'], '-' if sanitize else ':') if info_dict.get('duration', None) is not None else None) - info_dict['epoch'] = int(time.time()) info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads if info_dict.get('resolution') is None: info_dict['resolution'] = self.format_resolution(info_dict, default=None) @@ -964,6 +966,11 @@ class YoutubeDL(object): return value + def _dumpjson_default(obj): + if isinstance(obj, (set, LazyList)): + return list(obj) + raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable') + def create_key(outer_mobj): if not outer_mobj.group('has_key'): return f'%{outer_mobj.group(0)}' @@ -988,7 +995,7 @@ class YoutubeDL(object): if fmt[-1] == 'l': value, fmt = ', '.join(variadic(value)), str_fmt elif fmt[-1] == 'j': - value, fmt = json.dumps(value), str_fmt + value, fmt = json.dumps(value, default=_dumpjson_default), str_fmt elif fmt[-1] == 'q': value, fmt = compat_shlex_quote(str(value)), str_fmt elif fmt[-1] == 'c': @@ -2386,7 +2393,7 @@ class YoutubeDL(object): if self.params.get('forcejson', False): self.post_extract(info_dict) - self.to_stdout(json.dumps(self.sanitize_info(info_dict), default=repr)) + self.to_stdout(json.dumps(self.sanitize_info(info_dict))) def dl(self, name, info, subtitle=False, test=False): @@ -2861,7 +2868,7 @@ class YoutubeDL(object): else: if self.params.get('dump_single_json', False): self.post_extract(res) - self.to_stdout(json.dumps(self.filter_requested_info(res), default=repr)) + self.to_stdout(json.dumps(self.sanitize_info(res))) return self._download_retcode @@ -2885,15 +2892,18 @@ class YoutubeDL(object): @staticmethod def sanitize_info(info_dict, remove_private_keys=False): ''' Sanitize the infodict for converting to json ''' - remove_keys = ['__original_infodict'] # Always remove this since this may contain a copy of the entire dict + info_dict.setdefault('epoch', int(time.time())) + remove_keys = {'__original_infodict'} # Always remove this since this may contain a copy of the entire dict keep_keys = ['_type'], # Always keep this to facilitate load-info-json if remove_private_keys: - remove_keys += ('requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries', 'original_url') + remove_keys |= { + 'requested_formats', 'requested_subtitles', 'requested_entries', + 'filepath', 'entries', 'original_url', 'playlist_autonumber', + } empty_values = (None, {}, [], set(), tuple()) reject = lambda k, v: k not in keep_keys and ( k.startswith('_') or k in remove_keys or v in empty_values) else: - info_dict['epoch'] = int(time.time()) reject = lambda k, v: k in remove_keys filter_fn = lambda obj: ( list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set)) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index a6e613139..fd13febd6 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -1836,7 +1836,7 @@ def write_json_file(obj, fn): try: with tf: - json.dump(obj, tf, default=repr) + json.dump(obj, tf) if sys.platform == 'win32': # Need to remove existing file on Windows, else os.rename raises # WindowsError or FileExistsError.