Allow running some `postprocessors` before actual download

This commit is contained in:
pukkandan 2021-04-11 03:48:07 +05:30
parent f4f751af40
commit 56d868dbb7
No known key found for this signature in database
GPG Key ID: 0F00D95A001F4698
4 changed files with 54 additions and 60 deletions

View File

@ -291,10 +291,9 @@ class YoutubeDL(object):
postprocessors: A list of dictionaries, each with an entry postprocessors: A list of dictionaries, each with an entry
* key: The name of the postprocessor. See * key: The name of the postprocessor. See
yt_dlp/postprocessor/__init__.py for a list. yt_dlp/postprocessor/__init__.py for a list.
* _after_move: Optional. If True, run this post_processor * when: When to run the postprocessor. Can be one of
after 'MoveFilesAfterDownload' pre_process|before_dl|post_process|after_move.
as well as any further keyword arguments for the Assumed to be 'post_process' if not given
postprocessor.
post_hooks: A list of functions that get called as the final step post_hooks: A list of functions that get called as the final step
for each video file, after all postprocessors have been for each video file, after all postprocessors have been
called. The filename will be passed as the only argument. called. The filename will be passed as the only argument.
@ -423,7 +422,7 @@ class YoutubeDL(object):
params = None params = None
_ies = [] _ies = []
_pps = {'beforedl': [], 'aftermove': [], 'normal': []} _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
__prepare_filename_warned = False __prepare_filename_warned = False
_first_webpage_request = True _first_webpage_request = True
_download_retcode = None _download_retcode = None
@ -438,7 +437,7 @@ class YoutubeDL(object):
params = {} params = {}
self._ies = [] self._ies = []
self._ies_instances = {} self._ies_instances = {}
self._pps = {'beforedl': [], 'aftermove': [], 'normal': []} self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
self.__prepare_filename_warned = False self.__prepare_filename_warned = False
self._first_webpage_request = True self._first_webpage_request = True
self._post_hooks = [] self._post_hooks = []
@ -551,7 +550,7 @@ class YoutubeDL(object):
when = pp_def['when'] when = pp_def['when']
del pp_def['when'] del pp_def['when']
else: else:
when = 'normal' when = 'post_process'
pp = pp_class(self, **compat_kwargs(pp_def)) pp = pp_class(self, **compat_kwargs(pp_def))
self.add_post_processor(pp, when=when) self.add_post_processor(pp, when=when)
@ -605,7 +604,7 @@ class YoutubeDL(object):
for ie in gen_extractor_classes(): for ie in gen_extractor_classes():
self.add_info_extractor(ie) self.add_info_extractor(ie)
def add_post_processor(self, pp, when='normal'): def add_post_processor(self, pp, when='post_process'):
"""Add a PostProcessor object to the end of the chain.""" """Add a PostProcessor object to the end of the chain."""
self._pps[when].append(pp) self._pps[when].append(pp)
pp.set_downloader(self) pp.set_downloader(self)
@ -2114,13 +2113,12 @@ class YoutubeDL(object):
self.post_extract(info_dict) self.post_extract(info_dict)
self._num_downloads += 1 self._num_downloads += 1
info_dict = self.pre_process(info_dict) info_dict, _ = self.pre_process(info_dict)
# info_dict['_filename'] needs to be set for backward compatibility # info_dict['_filename'] needs to be set for backward compatibility
info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True) info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
temp_filename = self.prepare_filename(info_dict, 'temp') temp_filename = self.prepare_filename(info_dict, 'temp')
files_to_move = {} files_to_move = {}
skip_dl = self.params.get('skip_download', False)
# Forced printings # Forced printings
self.__forced_printings(info_dict, full_filename, incomplete=False) self.__forced_printings(info_dict, full_filename, incomplete=False)
@ -2197,11 +2195,9 @@ class YoutubeDL(object):
# ie = self.get_info_extractor(info_dict['extractor_key']) # ie = self.get_info_extractor(info_dict['extractor_key'])
for sub_lang, sub_info in subtitles.items(): for sub_lang, sub_info in subtitles.items():
sub_format = sub_info['ext'] sub_format = sub_info['ext']
sub_fn = self.prepare_filename(info_dict, 'subtitle') sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
sub_filename = subtitles_filename( sub_filename_final = subtitles_filename(
temp_filename if not skip_dl else sub_fn, self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
sub_lang, sub_format, info_dict.get('ext'))
sub_filename_final = subtitles_filename(sub_fn, sub_lang, sub_format, info_dict.get('ext'))
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)): if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format)) self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
sub_info['filepath'] = sub_filename sub_info['filepath'] = sub_filename
@ -2229,28 +2225,6 @@ class YoutubeDL(object):
(sub_lang, error_to_compat_str(err))) (sub_lang, error_to_compat_str(err)))
continue continue
if skip_dl:
if self.params.get('convertsubtitles', False):
# subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
filename_real_ext = os.path.splitext(full_filename)[1][1:]
filename_wo_ext = (
os.path.splitext(full_filename)[0]
if filename_real_ext == info_dict['ext']
else full_filename)
afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
# if subconv.available:
# info_dict['__postprocessors'].append(subconv)
if os.path.exists(encodeFilename(afilename)):
self.to_screen(
'[download] %s has already been downloaded and '
'converted' % afilename)
else:
try:
self.post_process(full_filename, info_dict, files_to_move)
except PostProcessingError as err:
self.report_error('Postprocessing: %s' % str(err))
return
if self.params.get('writeinfojson', False): if self.params.get('writeinfojson', False):
infofn = self.prepare_filename(info_dict, 'infojson') infofn = self.prepare_filename(info_dict, 'infojson')
if not self._ensure_dir_exists(encodeFilename(infofn)): if not self._ensure_dir_exists(encodeFilename(infofn)):
@ -2266,11 +2240,10 @@ class YoutubeDL(object):
return return
info_dict['__infojson_filename'] = infofn info_dict['__infojson_filename'] = infofn
thumbfn = self.prepare_filename(info_dict, 'thumbnail') for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
thumb_fn_temp = temp_filename if not skip_dl else thumbfn thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
for thumb_ext in self._write_thumbnails(info_dict, thumb_fn_temp): thumb_filename = replace_extension(
thumb_filename_temp = replace_extension(thumb_fn_temp, thumb_ext, info_dict.get('ext')) self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
thumb_filename = replace_extension(thumbfn, thumb_ext, info_dict.get('ext'))
files_to_move[thumb_filename_temp] = thumb_filename files_to_move[thumb_filename_temp] = thumb_filename
# Write internet shortcut files # Write internet shortcut files
@ -2322,9 +2295,20 @@ class YoutubeDL(object):
if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True): if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
return return
# Download try:
info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
except PostProcessingError as err:
self.report_error('Preprocessing: %s' % str(err))
return
must_record_download_archive = False must_record_download_archive = False
if not skip_dl: if self.params.get('skip_download', False):
info_dict['filepath'] = temp_filename
info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
info_dict['__files_to_move'] = files_to_move
info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
else:
# Download
try: try:
def existing_file(*filepaths): def existing_file(*filepaths):
@ -2633,11 +2617,12 @@ class YoutubeDL(object):
actual_post_extract(info_dict or {}) actual_post_extract(info_dict or {})
def pre_process(self, ie_info): def pre_process(self, ie_info, key='pre_process', files_to_move=None):
info = dict(ie_info) info = dict(ie_info)
for pp in self._pps['beforedl']: info['__files_to_move'] = files_to_move or {}
for pp in self._pps[key]:
info = self.run_pp(pp, info) info = self.run_pp(pp, info)
return info return info, info.pop('__files_to_move', None)
def post_process(self, filename, ie_info, files_to_move=None): def post_process(self, filename, ie_info, files_to_move=None):
"""Run all the postprocessors on the given file.""" """Run all the postprocessors on the given file."""
@ -2645,11 +2630,11 @@ class YoutubeDL(object):
info['filepath'] = filename info['filepath'] = filename
info['__files_to_move'] = files_to_move or {} info['__files_to_move'] = files_to_move or {}
for pp in ie_info.get('__postprocessors', []) + self._pps['normal']: for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
info = self.run_pp(pp, info) info = self.run_pp(pp, info)
info = self.run_pp(MoveFilesAfterDownloadPP(self), info) info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
del info['__files_to_move'] del info['__files_to_move']
for pp in self._pps['aftermove']: for pp in self._pps['after_move']:
info = self.run_pp(pp, info) info = self.run_pp(pp, info)
return info return info

View File

@ -228,7 +228,7 @@ def _real_main(argv=None):
if not re.match(remux_regex, opts.remuxvideo): if not re.match(remux_regex, opts.remuxvideo):
parser.error('invalid video remux format specified') parser.error('invalid video remux format specified')
if opts.convertsubtitles is not None: if opts.convertsubtitles is not None:
if opts.convertsubtitles not in ['srt', 'vtt', 'ass', 'lrc']: if opts.convertsubtitles not in ('srt', 'vtt', 'ass', 'lrc'):
parser.error('invalid subtitle format specified') parser.error('invalid subtitle format specified')
if opts.date is not None: if opts.date is not None:
@ -322,7 +322,15 @@ def _real_main(argv=None):
postprocessors.append({ postprocessors.append({
'key': 'MetadataFromField', 'key': 'MetadataFromField',
'formats': opts.metafromfield, 'formats': opts.metafromfield,
'when': 'beforedl' # Run this immediately after extraction is complete
'when': 'pre_process'
})
if opts.convertsubtitles:
postprocessors.append({
'key': 'FFmpegSubtitlesConvertor',
'format': opts.convertsubtitles,
# Run this before the actual video download
'when': 'before_dl'
}) })
if opts.extractaudio: if opts.extractaudio:
postprocessors.append({ postprocessors.append({
@ -351,15 +359,11 @@ def _real_main(argv=None):
# so metadata can be added here. # so metadata can be added here.
if opts.addmetadata: if opts.addmetadata:
postprocessors.append({'key': 'FFmpegMetadata'}) postprocessors.append({'key': 'FFmpegMetadata'})
if opts.convertsubtitles:
postprocessors.append({
'key': 'FFmpegSubtitlesConvertor',
'format': opts.convertsubtitles,
})
if opts.embedsubtitles: if opts.embedsubtitles:
already_have_subtitle = opts.writesubtitles already_have_subtitle = opts.writesubtitles
postprocessors.append({ postprocessors.append({
'key': 'FFmpegEmbedSubtitle', 'key': 'FFmpegEmbedSubtitle',
# already_have_subtitle = True prevents the file from being deleted after embedding
'already_have_subtitle': already_have_subtitle 'already_have_subtitle': already_have_subtitle
}) })
if not already_have_subtitle: if not already_have_subtitle:
@ -385,6 +389,7 @@ def _real_main(argv=None):
already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails
postprocessors.append({ postprocessors.append({
'key': 'EmbedThumbnail', 'key': 'EmbedThumbnail',
# already_have_thumbnail = True prevents the file from being deleted after embedding
'already_have_thumbnail': already_have_thumbnail 'already_have_thumbnail': already_have_thumbnail
}) })
if not already_have_thumbnail: if not already_have_thumbnail:
@ -399,7 +404,8 @@ def _real_main(argv=None):
postprocessors.append({ postprocessors.append({
'key': 'ExecAfterDownload', 'key': 'ExecAfterDownload',
'exec_cmd': opts.exec_cmd, 'exec_cmd': opts.exec_cmd,
'when': 'aftermove' # Run this only after the files have been moved to their final locations
'when': 'after_move'
}) })
def report_args_compat(arg, name): def report_args_compat(arg, name):
@ -425,7 +431,6 @@ def _real_main(argv=None):
else match_filter_func(opts.match_filter)) else match_filter_func(opts.match_filter))
ydl_opts = { ydl_opts = {
'convertsubtitles': opts.convertsubtitles,
'usenetrc': opts.usenetrc, 'usenetrc': opts.usenetrc,
'username': opts.username, 'username': opts.username,
'password': opts.password, 'password': opts.password,

View File

@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import os import os
import subprocess import subprocess
import struct import struct

View File

@ -13,6 +13,10 @@ from ..utils import (
class MoveFilesAfterDownloadPP(PostProcessor): class MoveFilesAfterDownloadPP(PostProcessor):
def __init__(self, downloader=None, downloaded=True):
PostProcessor.__init__(self, downloader)
self._downloaded = downloaded
@classmethod @classmethod
def pp_key(cls): def pp_key(cls):
return 'MoveFiles' return 'MoveFiles'
@ -21,7 +25,8 @@ class MoveFilesAfterDownloadPP(PostProcessor):
dl_path, dl_name = os.path.split(encodeFilename(info['filepath'])) dl_path, dl_name = os.path.split(encodeFilename(info['filepath']))
finaldir = info.get('__finaldir', dl_path) finaldir = info.get('__finaldir', dl_path)
finalpath = os.path.join(finaldir, dl_name) finalpath = os.path.join(finaldir, dl_name)
info['__files_to_move'][info['filepath']] = decodeFilename(finalpath) if self._downloaded:
info['__files_to_move'][info['filepath']] = decodeFilename(finalpath)
make_newfilename = lambda old: decodeFilename(os.path.join(finaldir, os.path.basename(encodeFilename(old)))) make_newfilename = lambda old: decodeFilename(os.path.join(finaldir, os.path.basename(encodeFilename(old))))
for oldfile, newfile in info['__files_to_move'].items(): for oldfile, newfile in info['__files_to_move'].items():