Let --match-filter reject entries early

Makes redundant: `--match-title`, `--reject-title`, `--min-views`, `--max-views`
This commit is contained in:
pukkandan 2021-08-15 13:42:23 +05:30
parent 3ad56b4236
commit 8f18aca871
No known key found for this signature in database
GPG key ID: 0F00D95A001F4698
5 changed files with 40 additions and 24 deletions

View file

@ -1439,6 +1439,10 @@ #### Redundant options
-e, --get-title --print title -e, --get-title --print title
-g, --get-url --print urls -g, --get-url --print urls
-j, --dump-json --print "%()j" -j, --dump-json --print "%()j"
--match-title REGEX --match-filter "title ~= (?i)REGEX"
--reject-title REGEX --match-filter "title !~= (?i)REGEX"
--min-views COUNT --match-filter "view_count >=? COUNT"
--max-views COUNT --match-filter "view_count <=? COUNT"
#### Not recommended #### Not recommended

View file

@ -1285,9 +1285,15 @@ def test_match_str(self):
self.assertTrue(match_str(r'x="foo \& bar" & x^=foo', {'x': 'foo & bar'})) self.assertTrue(match_str(r'x="foo \& bar" & x^=foo', {'x': 'foo & bar'}))
# Example from docs # Example from docs
self.assertTrue( self.assertTrue(match_str(
r'!is_live & like_count>?100 & description~=\'(?i)\bcats \& dogs\b\'', r"!is_live & like_count>?100 & description~='(?i)\bcats \& dogs\b'",
{'description': 'Raining Cats & Dogs'}) {'description': 'Raining Cats & Dogs'}))
# Incomplete
self.assertFalse(match_str('id!=foo', {'id': 'foo'}, True))
self.assertTrue(match_str('x', {'id': 'foo'}, True))
self.assertTrue(match_str('!x', {'id': 'foo'}, True))
self.assertFalse(match_str('x', {'id': 'foo'}, False))
def test_parse_dfxp_time_expr(self): def test_parse_dfxp_time_expr(self):
self.assertEqual(parse_dfxp_time_expr(None), None) self.assertEqual(parse_dfxp_time_expr(None), None)

View file

@ -1117,10 +1117,13 @@ def check_filter():
if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')): if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
return 'Skipping "%s" because it is age restricted' % video_title return 'Skipping "%s" because it is age restricted' % video_title
if not incomplete:
match_filter = self.params.get('match_filter') match_filter = self.params.get('match_filter')
if match_filter is not None: if match_filter is not None:
ret = match_filter(info_dict) try:
ret = match_filter(info_dict, incomplete=incomplete)
except TypeError:
# For backward compatibility
ret = None if incomplete else match_filter(info_dict)
if ret is not None: if ret is not None:
return ret return ret
return None return None
@ -2873,13 +2876,13 @@ def download(self, url_list):
except UnavailableVideoError: except UnavailableVideoError:
self.report_error('unable to download video') self.report_error('unable to download video')
except MaxDownloadsReached: except MaxDownloadsReached:
self.to_screen('[info] Maximum number of downloaded files reached') self.to_screen('[info] Maximum number of downloads reached')
raise raise
except ExistingVideoReached: except ExistingVideoReached:
self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing') self.to_screen('[info] Encountered a video that is already in the archive, stopping due to --break-on-existing')
raise raise
except RejectedVideoReached: except RejectedVideoReached:
self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject') self.to_screen('[info] Encountered a video that did not match filter, stopping due to --break-on-reject')
raise raise
else: else:
if self.params.get('dump_single_json', False): if self.params.get('dump_single_json', False):

View file

@ -356,11 +356,11 @@ def _dict_from_options_callback(
selection.add_option( selection.add_option(
'--match-title', '--match-title',
dest='matchtitle', metavar='REGEX', dest='matchtitle', metavar='REGEX',
help='Download only matching titles (regex or caseless sub-string)') help=optparse.SUPPRESS_HELP)
selection.add_option( selection.add_option(
'--reject-title', '--reject-title',
dest='rejecttitle', metavar='REGEX', dest='rejecttitle', metavar='REGEX',
help='Skip download for matching titles (regex or caseless sub-string)') help=optparse.SUPPRESS_HELP)
selection.add_option( selection.add_option(
'--max-downloads', '--max-downloads',
dest='max_downloads', metavar='NUMBER', type=int, default=None, dest='max_downloads', metavar='NUMBER', type=int, default=None,
@ -395,11 +395,11 @@ def _dict_from_options_callback(
selection.add_option( selection.add_option(
'--min-views', '--min-views',
metavar='COUNT', dest='min_views', default=None, type=int, metavar='COUNT', dest='min_views', default=None, type=int,
help='Do not download any videos with less than COUNT views') help=optparse.SUPPRESS_HELP)
selection.add_option( selection.add_option(
'--max-views', '--max-views',
metavar='COUNT', dest='max_views', default=None, type=int, metavar='COUNT', dest='max_views', default=None, type=int,
help='Do not download any videos with more than COUNT views') help=optparse.SUPPRESS_HELP)
selection.add_option( selection.add_option(
'--match-filter', '--match-filter',
metavar='FILTER', dest='match_filter', default=None, metavar='FILTER', dest='match_filter', default=None,

View file

@ -4657,7 +4657,7 @@ def filter_using_list(row, filterArray):
return '\n'.join(format_str % tuple(row) for row in table) return '\n'.join(format_str % tuple(row) for row in table)
def _match_one(filter_part, dct): def _match_one(filter_part, dct, incomplete):
# TODO: Generalize code with YoutubeDL._build_format_filter # TODO: Generalize code with YoutubeDL._build_format_filter
STRING_OPERATORS = { STRING_OPERATORS = {
'*=': operator.contains, '*=': operator.contains,
@ -4718,7 +4718,7 @@ def _match_one(filter_part, dct):
'Invalid integer value %r in filter part %r' % ( 'Invalid integer value %r in filter part %r' % (
m.group('intval'), filter_part)) m.group('intval'), filter_part))
if actual_value is None: if actual_value is None:
return m.group('none_inclusive') return incomplete or m.group('none_inclusive')
return op(actual_value, comparison_value) return op(actual_value, comparison_value)
UNARY_OPERATORS = { UNARY_OPERATORS = {
@ -4733,22 +4733,25 @@ def _match_one(filter_part, dct):
if m: if m:
op = UNARY_OPERATORS[m.group('op')] op = UNARY_OPERATORS[m.group('op')]
actual_value = dct.get(m.group('key')) actual_value = dct.get(m.group('key'))
if incomplete and actual_value is None:
return True
return op(actual_value) return op(actual_value)
raise ValueError('Invalid filter part %r' % filter_part) raise ValueError('Invalid filter part %r' % filter_part)
def match_str(filter_str, dct): def match_str(filter_str, dct, incomplete=False):
""" Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """ """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false
When incomplete, all conditions passes on missing fields
"""
return all( return all(
_match_one(filter_part.replace(r'\&', '&'), dct) _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
for filter_part in re.split(r'(?<!\\)&', filter_str)) for filter_part in re.split(r'(?<!\\)&', filter_str))
def match_filter_func(filter_str): def match_filter_func(filter_str):
def _match_func(info_dict): def _match_func(info_dict, *args, **kwargs):
if match_str(filter_str, info_dict): if match_str(filter_str, info_dict, *args, **kwargs):
return None return None
else: else:
video_title = info_dict.get('title', info_dict.get('id', 'video')) video_title = info_dict.get('title', info_dict.get('id', 'video'))