Ignore format-specific fields in initial pass of --match-filter

Closes #3074
This commit is contained in:
pukkandan 2022-03-25 14:06:46 +05:30
parent 3cea3edd1a
commit 6db9c4d57d
No known key found for this signature in database
GPG key ID: 7EEE9E1E817D0A39
4 changed files with 25 additions and 15 deletions

View file

@ -196,15 +196,7 @@ def expect_dict(self, got_dict, expected_dict):
def sanitize_got_info_dict(got_dict):
IGNORED_FIELDS = (
# Format keys
'url', 'manifest_url', 'format', 'format_id', 'format_note', 'width', 'height', 'resolution',
'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'vbr', 'fps', 'vcodec', 'container', 'filesize',
'filesize_approx', 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'preference',
'language', 'language_preference', 'quality', 'source_preference', 'http_headers',
'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
# RTMP formats
'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time',
*YoutubeDL._format_fields,
# Lists
'formats', 'thumbnails', 'subtitles', 'automatic_captions', 'comments', 'entries',

View file

@ -931,7 +931,7 @@ def get_videos(filter_=None):
res = get_videos()
self.assertEqual(res, ['1', '2'])
def f(v):
def f(v, incomplete):
if v['id'] == '1':
return None
else:

View file

@ -513,6 +513,16 @@ class YoutubeDL(object):
'track_number', 'disc_number', 'release_year',
))
_format_fields = {
# NB: Keep in sync with the docstring of extractor/common.py
'url', 'manifest_url', 'ext', 'format', 'format_id', 'format_note',
'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr',
'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx',
'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
'preference', 'language', 'language_preference', 'quality', 'source_preference',
'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
}
_format_selection_exts = {
'audio': {'m4a', 'mp3', 'ogg', 'aac'},
'video': {'mp4', 'flv', 'webm', '3gp'},
@ -2541,7 +2551,7 @@ def is_wellformed(f):
info_dict, _ = self.pre_process(info_dict)
if self._match_entry(info_dict) is not None:
if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
return info_dict
self.post_extract(info_dict)

View file

@ -3545,6 +3545,11 @@ def _match_one(filter_part, dct, incomplete):
'=': operator.eq,
}
if isinstance(incomplete, bool):
is_incomplete = lambda _: incomplete
else:
is_incomplete = lambda k: k in incomplete
operator_rex = re.compile(r'''(?x)\s*
(?P<key>[a-z_]+)
\s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
@ -3583,7 +3588,7 @@ def _match_one(filter_part, dct, incomplete):
if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
raise ValueError('Operator %s only supports string values!' % m['op'])
if actual_value is None:
return incomplete or m['none_inclusive']
return is_incomplete(m['key']) or m['none_inclusive']
return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
UNARY_OPERATORS = {
@ -3598,7 +3603,7 @@ def _match_one(filter_part, dct, incomplete):
if m:
op = UNARY_OPERATORS[m.group('op')]
actual_value = dct.get(m.group('key'))
if incomplete and actual_value is None:
if is_incomplete(m.group('key')) and actual_value is None:
return True
return op(actual_value)
@ -3606,8 +3611,11 @@ def _match_one(filter_part, dct, incomplete):
def match_str(filter_str, dct, incomplete=False):
""" Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false
When incomplete, all conditions passes on missing fields
""" Filter a dictionary with a simple string syntax.
@returns Whether the filter passes
@param incomplete Set of keys that is expected to be missing from dct.
Can be True/False to indicate all/none of the keys may be missing.
All conditions on incomplete keys pass if the key is missing
"""
return all(
_match_one(filter_part.replace(r'\&', '&'), dct, incomplete)