mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-30 12:01:28 +00:00
Reject entire playlists faster with --match-filter
Rejected based on `playlist_id` etc can be checked before any entries are extracted Related: #4383
This commit is contained in:
parent
7d0f6f0c45
commit
3bec830a59
|
@ -1309,7 +1309,7 @@ def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
|
||||||
def _match_entry(self, info_dict, incomplete=False, silent=False):
|
def _match_entry(self, info_dict, incomplete=False, silent=False):
|
||||||
""" Returns None if the file should be downloaded """
|
""" Returns None if the file should be downloaded """
|
||||||
|
|
||||||
video_title = info_dict.get('title', info_dict.get('id', 'video'))
|
video_title = info_dict.get('title', info_dict.get('id', 'entry'))
|
||||||
|
|
||||||
def check_filter():
|
def check_filter():
|
||||||
if 'title' in info_dict:
|
if 'title' in info_dict:
|
||||||
|
@ -1677,23 +1677,37 @@ def _ensure_dir_exists(self, path):
|
||||||
return make_dir(path, self.report_error)
|
return make_dir(path, self.report_error)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _playlist_infodict(ie_result, **kwargs):
|
def _playlist_infodict(ie_result, strict=False, **kwargs):
|
||||||
return {
|
info = {
|
||||||
**ie_result,
|
'playlist_count': ie_result.get('playlist_count'),
|
||||||
'playlist': ie_result.get('title') or ie_result.get('id'),
|
'playlist': ie_result.get('title') or ie_result.get('id'),
|
||||||
'playlist_id': ie_result.get('id'),
|
'playlist_id': ie_result.get('id'),
|
||||||
'playlist_title': ie_result.get('title'),
|
'playlist_title': ie_result.get('title'),
|
||||||
'playlist_uploader': ie_result.get('uploader'),
|
'playlist_uploader': ie_result.get('uploader'),
|
||||||
'playlist_uploader_id': ie_result.get('uploader_id'),
|
'playlist_uploader_id': ie_result.get('uploader_id'),
|
||||||
'playlist_index': 0,
|
|
||||||
**kwargs,
|
**kwargs,
|
||||||
}
|
}
|
||||||
|
if strict:
|
||||||
|
return info
|
||||||
|
return {
|
||||||
|
**info,
|
||||||
|
'playlist_index': 0,
|
||||||
|
'__last_playlist_index': max(ie_result['requested_entries'] or (0, 0)),
|
||||||
|
'extractor': ie_result['extractor'],
|
||||||
|
'webpage_url': ie_result['webpage_url'],
|
||||||
|
'webpage_url_basename': url_basename(ie_result['webpage_url']),
|
||||||
|
'webpage_url_domain': get_domain(ie_result['webpage_url']),
|
||||||
|
'extractor_key': ie_result['extractor_key'],
|
||||||
|
}
|
||||||
|
|
||||||
def __process_playlist(self, ie_result, download):
|
def __process_playlist(self, ie_result, download):
|
||||||
"""Process each entry in the playlist"""
|
"""Process each entry in the playlist"""
|
||||||
assert ie_result['_type'] in ('playlist', 'multi_video')
|
assert ie_result['_type'] in ('playlist', 'multi_video')
|
||||||
|
|
||||||
title = ie_result.get('title') or ie_result.get('id') or '<Untitled>'
|
common_info = self._playlist_infodict(ie_result, strict=True)
|
||||||
|
title = common_info.get('title') or '<Untitled>'
|
||||||
|
if self._match_entry(common_info, incomplete=True) is not None:
|
||||||
|
return
|
||||||
self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')
|
self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')
|
||||||
|
|
||||||
all_entries = PlaylistEntries(self, ie_result)
|
all_entries = PlaylistEntries(self, ie_result)
|
||||||
|
@ -1711,12 +1725,14 @@ def __process_playlist(self, ie_result, download):
|
||||||
# Better to do this after potentially exhausting entries
|
# Better to do this after potentially exhausting entries
|
||||||
ie_result['playlist_count'] = all_entries.get_full_count()
|
ie_result['playlist_count'] = all_entries.get_full_count()
|
||||||
|
|
||||||
|
common_info = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
|
||||||
|
ie_copy = collections.ChainMap(ie_result, common_info)
|
||||||
|
|
||||||
_infojson_written = False
|
_infojson_written = False
|
||||||
write_playlist_files = self.params.get('allow_playlist_files', True)
|
write_playlist_files = self.params.get('allow_playlist_files', True)
|
||||||
if write_playlist_files and self.params.get('list_thumbnails'):
|
if write_playlist_files and self.params.get('list_thumbnails'):
|
||||||
self.list_thumbnails(ie_result)
|
self.list_thumbnails(ie_result)
|
||||||
if write_playlist_files and not self.params.get('simulate'):
|
if write_playlist_files and not self.params.get('simulate'):
|
||||||
ie_copy = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
|
|
||||||
_infojson_written = self._write_info_json(
|
_infojson_written = self._write_info_json(
|
||||||
'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
|
'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
|
||||||
if _infojson_written is None:
|
if _infojson_written is None:
|
||||||
|
@ -1725,7 +1741,7 @@ def __process_playlist(self, ie_result, download):
|
||||||
self.prepare_filename(ie_copy, 'pl_description')) is None:
|
self.prepare_filename(ie_copy, 'pl_description')) is None:
|
||||||
return
|
return
|
||||||
# TODO: This should be passed to ThumbnailsConvertor if necessary
|
# TODO: This should be passed to ThumbnailsConvertor if necessary
|
||||||
self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
|
self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))
|
||||||
|
|
||||||
if lazy:
|
if lazy:
|
||||||
if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
|
if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
|
||||||
|
@ -1749,35 +1765,26 @@ def __process_playlist(self, ie_result, download):
|
||||||
for i, (playlist_index, entry) in enumerate(entries):
|
for i, (playlist_index, entry) in enumerate(entries):
|
||||||
if lazy:
|
if lazy:
|
||||||
resolved_entries.append((playlist_index, entry))
|
resolved_entries.append((playlist_index, entry))
|
||||||
|
if not entry:
|
||||||
# TODO: Add auto-generated fields
|
|
||||||
if not entry or self._match_entry(entry, incomplete=True) is not None:
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
self.to_screen('[download] Downloading video %s of %s' % (
|
|
||||||
self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
|
|
||||||
|
|
||||||
entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
|
entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
|
||||||
if not lazy and 'playlist-index' in self.params.get('compat_opts', []):
|
if not lazy and 'playlist-index' in self.params.get('compat_opts', []):
|
||||||
playlist_index = ie_result['requested_entries'][i]
|
playlist_index = ie_result['requested_entries'][i]
|
||||||
|
|
||||||
entry_result = self.__process_iterable_entry(entry, download, {
|
extra = {
|
||||||
'n_entries': int_or_none(n_entries),
|
**common_info,
|
||||||
'__last_playlist_index': max(ie_result['requested_entries'] or (0, 0)),
|
|
||||||
'playlist_count': ie_result.get('playlist_count'),
|
|
||||||
'playlist_index': playlist_index,
|
'playlist_index': playlist_index,
|
||||||
'playlist_autonumber': i + 1,
|
'playlist_autonumber': i + 1,
|
||||||
'playlist': title,
|
}
|
||||||
'playlist_id': ie_result.get('id'),
|
|
||||||
'playlist_title': ie_result.get('title'),
|
if self._match_entry(collections.ChainMap(entry, extra), incomplete=True) is not None:
|
||||||
'playlist_uploader': ie_result.get('uploader'),
|
continue
|
||||||
'playlist_uploader_id': ie_result.get('uploader_id'),
|
|
||||||
'extractor': ie_result['extractor'],
|
self.to_screen('[download] Downloading video %s of %s' % (
|
||||||
'webpage_url': ie_result['webpage_url'],
|
self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
|
||||||
'webpage_url_basename': url_basename(ie_result['webpage_url']),
|
|
||||||
'webpage_url_domain': get_domain(ie_result['webpage_url']),
|
entry_result = self.__process_iterable_entry(entry, download, extra)
|
||||||
'extractor_key': ie_result['extractor_key'],
|
|
||||||
})
|
|
||||||
if not entry_result:
|
if not entry_result:
|
||||||
failures += 1
|
failures += 1
|
||||||
if failures >= max_failures:
|
if failures >= max_failures:
|
||||||
|
|
|
@ -1149,9 +1149,9 @@ def run(self, info):
|
||||||
if len(in_files) < len(entries):
|
if len(in_files) < len(entries):
|
||||||
raise PostProcessingError('Aborting concatenation because some downloads failed')
|
raise PostProcessingError('Aborting concatenation because some downloads failed')
|
||||||
|
|
||||||
ie_copy = self._downloader._playlist_infodict(info)
|
|
||||||
exts = traverse_obj(entries, (..., 'requested_downloads', 0, 'ext'), (..., 'ext'))
|
exts = traverse_obj(entries, (..., 'requested_downloads', 0, 'ext'), (..., 'ext'))
|
||||||
ie_copy['ext'] = exts[0] if len(set(exts)) == 1 else 'mkv'
|
ie_copy = collections.ChainMap({'ext': exts[0] if len(set(exts)) == 1 else 'mkv'},
|
||||||
|
info, self._downloader._playlist_infodict(info))
|
||||||
out_file = self._downloader.prepare_filename(ie_copy, 'pl_video')
|
out_file = self._downloader.prepare_filename(ie_copy, 'pl_video')
|
||||||
|
|
||||||
files_to_delete = self.concat_files(in_files, out_file)
|
files_to_delete = self.concat_files(in_files, out_file)
|
||||||
|
|
|
@ -3666,7 +3666,7 @@ def _match_func(info_dict, incomplete=False):
|
||||||
if not filters or any(match_str(f, info_dict, incomplete) for f in filters):
|
if not filters or any(match_str(f, info_dict, incomplete) for f in filters):
|
||||||
return NO_DEFAULT if interactive and not incomplete else None
|
return NO_DEFAULT if interactive and not incomplete else None
|
||||||
else:
|
else:
|
||||||
video_title = info_dict.get('title') or info_dict.get('id') or 'video'
|
video_title = info_dict.get('title') or info_dict.get('id') or 'entry'
|
||||||
filter_str = ') | ('.join(map(str.strip, filters))
|
filter_str = ') | ('.join(map(str.strip, filters))
|
||||||
return f'{video_title} does not pass filter ({filter_str}), skipping ..'
|
return f'{video_title} does not pass filter ({filter_str}), skipping ..'
|
||||||
return _match_func
|
return _match_func
|
||||||
|
|
Loading…
Reference in a new issue