mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-30 12:01:28 +00:00
Add all format filtering operators also to --match-filter
PR: https://github.com/ytdl-org/youtube-dl/pull/27361 Authored by: max-te
This commit is contained in:
parent
678da2f21b
commit
77b87f0519
34
README.md
34
README.md
|
@ -338,25 +338,21 @@ ## Video Selection:
|
||||||
COUNT views
|
COUNT views
|
||||||
--max-views COUNT Do not download any videos with more than
|
--max-views COUNT Do not download any videos with more than
|
||||||
COUNT views
|
COUNT views
|
||||||
--match-filter FILTER Generic video filter. Specify any key (see
|
--match-filter FILTER Generic video filter. Any field (see
|
||||||
"OUTPUT TEMPLATE" for a list of available
|
"OUTPUT TEMPLATE") can be compared with a
|
||||||
keys) to match if the key is present, !key
|
number or a quoted string using the
|
||||||
to check if the key is not present,
|
operators defined in "Filtering formats".
|
||||||
key>NUMBER (like "view_count > 12", also
|
You can also simply specify a field to
|
||||||
works with >=, <, <=, !=, =) to compare
|
match if the field is present and "!field"
|
||||||
against a number, key = 'LITERAL' (like
|
to check if the field is not present.
|
||||||
"uploader = 'Mike Smith'", also works with
|
Multiple filters can be checked using "&".
|
||||||
!=) to match against a string literal and &
|
For example, to only match videos that are
|
||||||
to require multiple matches. Values which
|
not live, has a like count more than 100, a
|
||||||
are not known are excluded unless you put a
|
dislike count less than 50 (or the dislike
|
||||||
question mark (?) after the operator. For
|
field is not available), and also has a
|
||||||
example, to only match videos that have
|
description that contains "python", use
|
||||||
been liked more than 100 times and disliked
|
--match-filter "!is_live & like_count>100 &
|
||||||
less than 50 times (or the dislike
|
dislike_count<?50 & description*='python'"
|
||||||
functionality is not available at the given
|
|
||||||
service), but who also have a description,
|
|
||||||
use --match-filter "like_count > 100 &
|
|
||||||
dislike_count <? 50 & description"
|
|
||||||
--no-match-filter Do not use generic video filter (default)
|
--no-match-filter Do not use generic video filter (default)
|
||||||
--no-playlist Download only the video, if the URL refers
|
--no-playlist Download only the video, if the URL refers
|
||||||
to a video and a playlist
|
to a video and a playlist
|
||||||
|
|
|
@ -1207,7 +1207,6 @@ def test_render_table(self):
|
||||||
'9999 51')
|
'9999 51')
|
||||||
|
|
||||||
def test_match_str(self):
|
def test_match_str(self):
|
||||||
self.assertRaises(ValueError, match_str, 'xy>foobar', {})
|
|
||||||
self.assertFalse(match_str('xy', {'x': 1200}))
|
self.assertFalse(match_str('xy', {'x': 1200}))
|
||||||
self.assertTrue(match_str('!xy', {'x': 1200}))
|
self.assertTrue(match_str('!xy', {'x': 1200}))
|
||||||
self.assertTrue(match_str('x', {'x': 1200}))
|
self.assertTrue(match_str('x', {'x': 1200}))
|
||||||
|
@ -1224,6 +1223,17 @@ def test_match_str(self):
|
||||||
self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'}))
|
self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'}))
|
||||||
self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'}))
|
self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'}))
|
||||||
self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'}))
|
self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'}))
|
||||||
|
self.assertTrue(match_str('y^=foo', {'y': 'foobar42'}))
|
||||||
|
self.assertFalse(match_str('y!^=foo', {'y': 'foobar42'}))
|
||||||
|
self.assertFalse(match_str('y^=bar', {'y': 'foobar42'}))
|
||||||
|
self.assertTrue(match_str('y!^=bar', {'y': 'foobar42'}))
|
||||||
|
self.assertRaises(ValueError, match_str, 'x^=42', {'x': 42})
|
||||||
|
self.assertTrue(match_str('y*=bar', {'y': 'foobar42'}))
|
||||||
|
self.assertFalse(match_str('y!*=bar', {'y': 'foobar42'}))
|
||||||
|
self.assertFalse(match_str('y*=baz', {'y': 'foobar42'}))
|
||||||
|
self.assertTrue(match_str('y!*=baz', {'y': 'foobar42'}))
|
||||||
|
self.assertTrue(match_str('y$=42', {'y': 'foobar42'}))
|
||||||
|
self.assertFalse(match_str('y$=43', {'y': 'foobar42'}))
|
||||||
self.assertFalse(match_str(
|
self.assertFalse(match_str(
|
||||||
'like_count > 100 & dislike_count <? 50 & description',
|
'like_count > 100 & dislike_count <? 50 & description',
|
||||||
{'like_count': 90, 'description': 'foo'}))
|
{'like_count': 90, 'description': 'foo'}))
|
||||||
|
|
|
@ -375,22 +375,16 @@ def _dict_from_options_callback(
|
||||||
'--match-filter',
|
'--match-filter',
|
||||||
metavar='FILTER', dest='match_filter', default=None,
|
metavar='FILTER', dest='match_filter', default=None,
|
||||||
help=(
|
help=(
|
||||||
'Generic video filter. '
|
'Generic video filter. Any field (see "OUTPUT TEMPLATE") can be compared with a '
|
||||||
'Specify any key (see "OUTPUT TEMPLATE" for a list of available keys) to '
|
'number or a string using the operators defined in "Filtering formats". '
|
||||||
'match if the key is present, '
|
'You can also simply specify a field to match if the field is present '
|
||||||
'!key to check if the key is not present, '
|
'and "!field" to check if the field is not present. '
|
||||||
'key>NUMBER (like "view_count > 12", also works with '
|
'Multiple filters can be checked using "&". '
|
||||||
'>=, <, <=, !=, =) to compare against a number, '
|
'For example, to only match videos that are not live, '
|
||||||
'key = \'LITERAL\' (like "uploader = \'Mike Smith\'", also works with !=) '
|
'has a like count more than 100, a dislike count less than 50 '
|
||||||
'to match against a string literal '
|
'(or the dislike field is not available), and also has a description '
|
||||||
'and & to require multiple matches. '
|
'that contains "python", use --match-filter "!is_live & '
|
||||||
'Values which are not known are excluded unless you '
|
'like_count>100 & dislike_count<?50 & description*=\'python\'"'))
|
||||||
'put a question mark (?) after the operator. '
|
|
||||||
'For example, to only match videos that have been liked more than '
|
|
||||||
'100 times and disliked less than 50 times (or the dislike '
|
|
||||||
'functionality is not available at the given service), but who '
|
|
||||||
'also have a description, use --match-filter '
|
|
||||||
'"like_count > 100 & dislike_count <? 50 & description"'))
|
|
||||||
selection.add_option(
|
selection.add_option(
|
||||||
'--no-match-filter',
|
'--no-match-filter',
|
||||||
metavar='FILTER', dest='match_filter', action='store_const', const=None,
|
metavar='FILTER', dest='match_filter', action='store_const', const=None,
|
||||||
|
|
|
@ -4663,17 +4663,20 @@ def filter_using_list(row, filterArray):
|
||||||
|
|
||||||
|
|
||||||
def _match_one(filter_part, dct):
|
def _match_one(filter_part, dct):
|
||||||
|
# TODO: Generalize code with YoutubeDL._build_format_filter
|
||||||
COMPARISON_OPERATORS = {
|
COMPARISON_OPERATORS = {
|
||||||
'<': operator.lt,
|
'<': operator.lt,
|
||||||
'<=': operator.le,
|
'<=': operator.le,
|
||||||
'>': operator.gt,
|
'>': operator.gt,
|
||||||
'>=': operator.ge,
|
'>=': operator.ge,
|
||||||
'=': operator.eq,
|
'=': operator.eq,
|
||||||
'!=': operator.ne,
|
'*=': operator.contains,
|
||||||
|
'^=': lambda attr, value: attr.startswith(value),
|
||||||
|
'$=': lambda attr, value: attr.endswith(value),
|
||||||
}
|
}
|
||||||
operator_rex = re.compile(r'''(?x)\s*
|
operator_rex = re.compile(r'''(?x)\s*
|
||||||
(?P<key>[a-z_]+)
|
(?P<key>[a-z_]+)
|
||||||
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
|
\s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
|
||||||
(?:
|
(?:
|
||||||
(?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
|
(?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
|
||||||
(?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
|
(?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
|
||||||
|
@ -4683,7 +4686,11 @@ def _match_one(filter_part, dct):
|
||||||
''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
|
''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
|
||||||
m = operator_rex.search(filter_part)
|
m = operator_rex.search(filter_part)
|
||||||
if m:
|
if m:
|
||||||
op = COMPARISON_OPERATORS[m.group('op')]
|
unnegated_op = COMPARISON_OPERATORS[m.group('op')]
|
||||||
|
if m.group('negation'):
|
||||||
|
op = lambda attr, value: not unnegated_op(attr, value)
|
||||||
|
else:
|
||||||
|
op = unnegated_op
|
||||||
actual_value = dct.get(m.group('key'))
|
actual_value = dct.get(m.group('key'))
|
||||||
if (m.group('quotedstrval') is not None
|
if (m.group('quotedstrval') is not None
|
||||||
or m.group('strval') is not None
|
or m.group('strval') is not None
|
||||||
|
@ -4693,14 +4700,14 @@ def _match_one(filter_part, dct):
|
||||||
# https://github.com/ytdl-org/youtube-dl/issues/11082).
|
# https://github.com/ytdl-org/youtube-dl/issues/11082).
|
||||||
or actual_value is not None and m.group('intval') is not None
|
or actual_value is not None and m.group('intval') is not None
|
||||||
and isinstance(actual_value, compat_str)):
|
and isinstance(actual_value, compat_str)):
|
||||||
if m.group('op') not in ('=', '!='):
|
|
||||||
raise ValueError(
|
|
||||||
'Operator %s does not support string values!' % m.group('op'))
|
|
||||||
comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
|
comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
|
||||||
quote = m.group('quote')
|
quote = m.group('quote')
|
||||||
if quote is not None:
|
if quote is not None:
|
||||||
comparison_value = comparison_value.replace(r'\%s' % quote, quote)
|
comparison_value = comparison_value.replace(r'\%s' % quote, quote)
|
||||||
else:
|
else:
|
||||||
|
if m.group('op') in ('*=', '^=', '$='):
|
||||||
|
raise ValueError(
|
||||||
|
'Operator %s only supports string values!' % m.group('op'))
|
||||||
try:
|
try:
|
||||||
comparison_value = int(m.group('intval'))
|
comparison_value = int(m.group('intval'))
|
||||||
except ValueError:
|
except ValueError:
|
||||||
|
|
Loading…
Reference in a new issue