mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-30 12:01:28 +00:00
Let --match-filter
reject entries early
Makes redundant: `--match-title`, `--reject-title`, `--min-views`, `--max-views`
This commit is contained in:
parent
3ad56b4236
commit
8f18aca871
|
@ -1439,6 +1439,10 @@ #### Redundant options
|
||||||
-e, --get-title --print title
|
-e, --get-title --print title
|
||||||
-g, --get-url --print urls
|
-g, --get-url --print urls
|
||||||
-j, --dump-json --print "%()j"
|
-j, --dump-json --print "%()j"
|
||||||
|
--match-title REGEX --match-filter "title ~= (?i)REGEX"
|
||||||
|
--reject-title REGEX --match-filter "title !~= (?i)REGEX"
|
||||||
|
--min-views COUNT --match-filter "view_count >=? COUNT"
|
||||||
|
--max-views COUNT --match-filter "view_count <=? COUNT"
|
||||||
|
|
||||||
|
|
||||||
#### Not recommended
|
#### Not recommended
|
||||||
|
|
|
@ -1285,9 +1285,15 @@ def test_match_str(self):
|
||||||
self.assertTrue(match_str(r'x="foo \& bar" & x^=foo', {'x': 'foo & bar'}))
|
self.assertTrue(match_str(r'x="foo \& bar" & x^=foo', {'x': 'foo & bar'}))
|
||||||
|
|
||||||
# Example from docs
|
# Example from docs
|
||||||
self.assertTrue(
|
self.assertTrue(match_str(
|
||||||
r'!is_live & like_count>?100 & description~=\'(?i)\bcats \& dogs\b\'',
|
r"!is_live & like_count>?100 & description~='(?i)\bcats \& dogs\b'",
|
||||||
{'description': 'Raining Cats & Dogs'})
|
{'description': 'Raining Cats & Dogs'}))
|
||||||
|
|
||||||
|
# Incomplete
|
||||||
|
self.assertFalse(match_str('id!=foo', {'id': 'foo'}, True))
|
||||||
|
self.assertTrue(match_str('x', {'id': 'foo'}, True))
|
||||||
|
self.assertTrue(match_str('!x', {'id': 'foo'}, True))
|
||||||
|
self.assertFalse(match_str('x', {'id': 'foo'}, False))
|
||||||
|
|
||||||
def test_parse_dfxp_time_expr(self):
|
def test_parse_dfxp_time_expr(self):
|
||||||
self.assertEqual(parse_dfxp_time_expr(None), None)
|
self.assertEqual(parse_dfxp_time_expr(None), None)
|
||||||
|
|
|
@ -1117,10 +1117,13 @@ def check_filter():
|
||||||
if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
|
if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
|
||||||
return 'Skipping "%s" because it is age restricted' % video_title
|
return 'Skipping "%s" because it is age restricted' % video_title
|
||||||
|
|
||||||
if not incomplete:
|
|
||||||
match_filter = self.params.get('match_filter')
|
match_filter = self.params.get('match_filter')
|
||||||
if match_filter is not None:
|
if match_filter is not None:
|
||||||
ret = match_filter(info_dict)
|
try:
|
||||||
|
ret = match_filter(info_dict, incomplete=incomplete)
|
||||||
|
except TypeError:
|
||||||
|
# For backward compatibility
|
||||||
|
ret = None if incomplete else match_filter(info_dict)
|
||||||
if ret is not None:
|
if ret is not None:
|
||||||
return ret
|
return ret
|
||||||
return None
|
return None
|
||||||
|
@ -2873,13 +2876,13 @@ def download(self, url_list):
|
||||||
except UnavailableVideoError:
|
except UnavailableVideoError:
|
||||||
self.report_error('unable to download video')
|
self.report_error('unable to download video')
|
||||||
except MaxDownloadsReached:
|
except MaxDownloadsReached:
|
||||||
self.to_screen('[info] Maximum number of downloaded files reached')
|
self.to_screen('[info] Maximum number of downloads reached')
|
||||||
raise
|
raise
|
||||||
except ExistingVideoReached:
|
except ExistingVideoReached:
|
||||||
self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
|
self.to_screen('[info] Encountered a video that is already in the archive, stopping due to --break-on-existing')
|
||||||
raise
|
raise
|
||||||
except RejectedVideoReached:
|
except RejectedVideoReached:
|
||||||
self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
|
self.to_screen('[info] Encountered a video that did not match filter, stopping due to --break-on-reject')
|
||||||
raise
|
raise
|
||||||
else:
|
else:
|
||||||
if self.params.get('dump_single_json', False):
|
if self.params.get('dump_single_json', False):
|
||||||
|
|
|
@ -356,11 +356,11 @@ def _dict_from_options_callback(
|
||||||
selection.add_option(
|
selection.add_option(
|
||||||
'--match-title',
|
'--match-title',
|
||||||
dest='matchtitle', metavar='REGEX',
|
dest='matchtitle', metavar='REGEX',
|
||||||
help='Download only matching titles (regex or caseless sub-string)')
|
help=optparse.SUPPRESS_HELP)
|
||||||
selection.add_option(
|
selection.add_option(
|
||||||
'--reject-title',
|
'--reject-title',
|
||||||
dest='rejecttitle', metavar='REGEX',
|
dest='rejecttitle', metavar='REGEX',
|
||||||
help='Skip download for matching titles (regex or caseless sub-string)')
|
help=optparse.SUPPRESS_HELP)
|
||||||
selection.add_option(
|
selection.add_option(
|
||||||
'--max-downloads',
|
'--max-downloads',
|
||||||
dest='max_downloads', metavar='NUMBER', type=int, default=None,
|
dest='max_downloads', metavar='NUMBER', type=int, default=None,
|
||||||
|
@ -395,11 +395,11 @@ def _dict_from_options_callback(
|
||||||
selection.add_option(
|
selection.add_option(
|
||||||
'--min-views',
|
'--min-views',
|
||||||
metavar='COUNT', dest='min_views', default=None, type=int,
|
metavar='COUNT', dest='min_views', default=None, type=int,
|
||||||
help='Do not download any videos with less than COUNT views')
|
help=optparse.SUPPRESS_HELP)
|
||||||
selection.add_option(
|
selection.add_option(
|
||||||
'--max-views',
|
'--max-views',
|
||||||
metavar='COUNT', dest='max_views', default=None, type=int,
|
metavar='COUNT', dest='max_views', default=None, type=int,
|
||||||
help='Do not download any videos with more than COUNT views')
|
help=optparse.SUPPRESS_HELP)
|
||||||
selection.add_option(
|
selection.add_option(
|
||||||
'--match-filter',
|
'--match-filter',
|
||||||
metavar='FILTER', dest='match_filter', default=None,
|
metavar='FILTER', dest='match_filter', default=None,
|
||||||
|
|
|
@ -4657,7 +4657,7 @@ def filter_using_list(row, filterArray):
|
||||||
return '\n'.join(format_str % tuple(row) for row in table)
|
return '\n'.join(format_str % tuple(row) for row in table)
|
||||||
|
|
||||||
|
|
||||||
def _match_one(filter_part, dct):
|
def _match_one(filter_part, dct, incomplete):
|
||||||
# TODO: Generalize code with YoutubeDL._build_format_filter
|
# TODO: Generalize code with YoutubeDL._build_format_filter
|
||||||
STRING_OPERATORS = {
|
STRING_OPERATORS = {
|
||||||
'*=': operator.contains,
|
'*=': operator.contains,
|
||||||
|
@ -4718,7 +4718,7 @@ def _match_one(filter_part, dct):
|
||||||
'Invalid integer value %r in filter part %r' % (
|
'Invalid integer value %r in filter part %r' % (
|
||||||
m.group('intval'), filter_part))
|
m.group('intval'), filter_part))
|
||||||
if actual_value is None:
|
if actual_value is None:
|
||||||
return m.group('none_inclusive')
|
return incomplete or m.group('none_inclusive')
|
||||||
return op(actual_value, comparison_value)
|
return op(actual_value, comparison_value)
|
||||||
|
|
||||||
UNARY_OPERATORS = {
|
UNARY_OPERATORS = {
|
||||||
|
@ -4733,22 +4733,25 @@ def _match_one(filter_part, dct):
|
||||||
if m:
|
if m:
|
||||||
op = UNARY_OPERATORS[m.group('op')]
|
op = UNARY_OPERATORS[m.group('op')]
|
||||||
actual_value = dct.get(m.group('key'))
|
actual_value = dct.get(m.group('key'))
|
||||||
|
if incomplete and actual_value is None:
|
||||||
|
return True
|
||||||
return op(actual_value)
|
return op(actual_value)
|
||||||
|
|
||||||
raise ValueError('Invalid filter part %r' % filter_part)
|
raise ValueError('Invalid filter part %r' % filter_part)
|
||||||
|
|
||||||
|
|
||||||
def match_str(filter_str, dct):
|
def match_str(filter_str, dct, incomplete=False):
|
||||||
""" Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
|
""" Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false
|
||||||
|
When incomplete, all conditions passes on missing fields
|
||||||
|
"""
|
||||||
return all(
|
return all(
|
||||||
_match_one(filter_part.replace(r'\&', '&'), dct)
|
_match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
|
||||||
for filter_part in re.split(r'(?<!\\)&', filter_str))
|
for filter_part in re.split(r'(?<!\\)&', filter_str))
|
||||||
|
|
||||||
|
|
||||||
def match_filter_func(filter_str):
|
def match_filter_func(filter_str):
|
||||||
def _match_func(info_dict):
|
def _match_func(info_dict, *args, **kwargs):
|
||||||
if match_str(filter_str, info_dict):
|
if match_str(filter_str, info_dict, *args, **kwargs):
|
||||||
return None
|
return None
|
||||||
else:
|
else:
|
||||||
video_title = info_dict.get('title', info_dict.get('id', 'video'))
|
video_title = info_dict.get('title', info_dict.get('id', 'video'))
|
||||||
|
|
Loading…
Reference in a new issue