From 81a23040ebf330a87a7eb842aa76884d5fc6e504 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 22 May 2021 13:54:12 +0530 Subject: [PATCH] [cleanup] Refactor ffmpeg convertors --- yt_dlp/__init__.py | 30 ++++++++++++--------- yt_dlp/options.py | 29 +++++++++++++------- yt_dlp/postprocessor/ffmpeg.py | 49 ++++++++++++++++++---------------- yt_dlp/utils.py | 2 -- 4 files changed, 64 insertions(+), 46 deletions(-) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index d014d1e01..5b2230ef1 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -31,7 +31,6 @@ preferredencoding, read_batch_urls, RejectedVideoReached, - REMUX_EXTENSIONS, render_table, SameFileError, setproctitle, @@ -45,6 +44,13 @@ from .extractor import gen_extractors, list_extractors from .extractor.common import InfoExtractor from .extractor.adobepass import MSO_INFO +from .postprocessor.ffmpeg import ( + FFmpegExtractAudioPP, + FFmpegSubtitlesConvertorPP, + FFmpegThumbnailsConvertorPP, + FFmpegVideoConvertorPP, + FFmpegVideoRemuxerPP, +) from .postprocessor.metadatafromfield import MetadataFromFieldPP from .YoutubeDL import YoutubeDL @@ -209,25 +215,25 @@ def parse_retries(retries, name=''): if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart: raise ValueError('Playlist end must be greater than playlist start') if opts.extractaudio: - if opts.audioformat not in ['best', 'aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']: + if opts.audioformat not in ['best'] + list(FFmpegExtractAudioPP.SUPPORTED_EXTS): parser.error('invalid audio format specified') if opts.audioquality: opts.audioquality = opts.audioquality.strip('k').strip('K') if not opts.audioquality.isdigit(): parser.error('invalid audio quality specified') if opts.recodevideo is not None: - if opts.recodevideo not in REMUX_EXTENSIONS: - parser.error('invalid video recode format specified') + opts.recodevideo = opts.recodevideo.replace(' ', '') + if not re.match(FFmpegVideoConvertorPP.FORMAT_RE, opts.recodevideo): + parser.error('invalid video remux format specified') if opts.remuxvideo is not None: opts.remuxvideo = opts.remuxvideo.replace(' ', '') - remux_regex = r'{0}(?:/{0})*$'.format(r'(?:\w+>)?(?:%s)' % '|'.join(REMUX_EXTENSIONS)) - if not re.match(remux_regex, opts.remuxvideo): + if not re.match(FFmpegVideoRemuxerPP.FORMAT_RE, opts.remuxvideo): parser.error('invalid video remux format specified') if opts.convertsubtitles is not None: - if opts.convertsubtitles not in ('srt', 'vtt', 'ass', 'lrc'): + if opts.convertsubtitles not in FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS: parser.error('invalid subtitle format specified') if opts.convertthumbnails is not None: - if opts.convertthumbnails not in ('jpg', 'png'): + if opts.convertthumbnails not in FFmpegThumbnailsConvertorPP.SUPPORTED_EXTS: parser.error('invalid thumbnail format specified') if opts.date is not None: @@ -480,10 +486,10 @@ def report_args_compat(arg, name): opts.postprocessor_args['default'] = opts.postprocessor_args['default-compat'] final_ext = ( - opts.recodevideo - or (opts.remuxvideo in REMUX_EXTENSIONS) and opts.remuxvideo - or (opts.extractaudio and opts.audioformat != 'best') and opts.audioformat - or None) + opts.recodevideo if opts.recodevideo in FFmpegVideoConvertorPP.SUPPORTED_EXTS + else opts.remuxvideo if opts.remuxvideo in FFmpegVideoRemuxerPP.SUPPORTED_EXTS + else opts.audioformat if (opts.extractaudio and opts.audioformat != 'best') + else None) match_filter = ( None if opts.match_filter is None diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 812bee572..c72a7d5d0 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -5,7 +5,6 @@ import re import sys -from .downloader.external import list_external_downloaders from .compat import ( compat_expanduser, compat_get_terminal_size, @@ -18,11 +17,18 @@ get_executable_path, OUTTMPL_TYPES, preferredencoding, - REMUX_EXTENSIONS, write_string, ) from .version import __version__ +from .downloader.external import list_external_downloaders +from .postprocessor.ffmpeg import ( + FFmpegExtractAudioPP, + FFmpegSubtitlesConvertorPP, + FFmpegThumbnailsConvertorPP, + FFmpegVideoRemuxerPP, +) + def _hide_login_info(opts): PRIVATE_OPTS = set(['-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username']) @@ -1123,7 +1129,9 @@ def _dict_from_options_callback( help='Convert video files to audio-only files (requires ffmpeg and ffprobe)') postproc.add_option( '--audio-format', metavar='FORMAT', dest='audioformat', default='best', - help='Specify audio format: "best", "aac", "flac", "mp3", "m4a", "opus", "vorbis", or "wav"; "%default" by default; No effect without -x') + help=( + 'Specify audio format to convert the audio to when -x is used. Currently supported formats are: ' + 'best (default) or one of %s' % '|'.join(FFmpegExtractAudioPP.SUPPORTED_EXTS))) postproc.add_option( '--audio-quality', metavar='QUALITY', dest='audioquality', default='5', @@ -1134,15 +1142,14 @@ def _dict_from_options_callback( help=( 'Remux the video into another container if necessary (currently supported: %s). ' 'If target container does not support the video/audio codec, remuxing will fail. ' - 'You can specify multiple rules; eg. "aac>m4a/mov>mp4/mkv" will remux aac to m4a, mov to mp4 ' - 'and anything else to mkv.' % '|'.join(REMUX_EXTENSIONS))) + 'You can specify multiple rules; Eg. "aac>m4a/mov>mp4/mkv" will remux aac to m4a, mov to mp4 ' + 'and anything else to mkv.' % '|'.join(FFmpegVideoRemuxerPP.SUPPORTED_EXTS))) postproc.add_option( '--recode-video', metavar='FORMAT', dest='recodevideo', default=None, help=( 'Re-encode the video into another format if re-encoding is necessary. ' - 'You can specify multiple rules similar to --remux-video. ' - 'The supported formats are also the same as --remux-video')) + 'The syntax and supported formats are the same as --remux-video')) postproc.add_option( '--postprocessor-args', '--ppa', metavar='NAME:ARGS', dest='postprocessor_args', default={}, type='str', @@ -1250,11 +1257,15 @@ def _dict_from_options_callback( postproc.add_option( '--convert-subs', '--convert-sub', '--convert-subtitles', metavar='FORMAT', dest='convertsubtitles', default=None, - help='Convert the subtitles to another format (currently supported: srt|ass|vtt|lrc) (Alias: --convert-subtitles)') + help=( + 'Convert the subtitles to another format (currently supported: %s) ' + '(Alias: --convert-subtitles)' % '|'.join(FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS))) postproc.add_option( '--convert-thumbnails', metavar='FORMAT', dest='convertthumbnails', default=None, - help='Convert the thumbnails to another format (currently supported: jpg, png)') + help=( + 'Convert the thumbnails to another format ' + '(currently supported: %s) ' % '|'.join(FFmpegThumbnailsConvertorPP.SUPPORTED_EXTS))) postproc.add_option( '--split-chapters', '--split-tracks', dest='split_chapters', action='store_true', default=False, diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 810c9cb86..ea728be37 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -290,13 +290,12 @@ def _ffmpeg_filename_argument(self, fn): class FFmpegExtractAudioPP(FFmpegPostProcessor): - COMMON_AUDIO_EXTENSIONS = ('wav', 'flac', 'm4a', 'aiff', 'mp3', 'ogg', 'mka', 'opus', 'wma') + COMMON_AUDIO_EXTS = ('wav', 'flac', 'm4a', 'aiff', 'mp3', 'ogg', 'mka', 'opus', 'wma') + SUPPORTED_EXTS = ('best', 'aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav') def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False): FFmpegPostProcessor.__init__(self, downloader) - if preferredcodec is None: - preferredcodec = 'best' - self._preferredcodec = preferredcodec + self._preferredcodec = preferredcodec or 'best' self._preferredquality = preferredquality self._nopostoverwrites = nopostoverwrites @@ -315,7 +314,7 @@ def run(self, information): path = information['filepath'] orig_ext = information['ext'] - if self._preferredcodec == 'best' and orig_ext in self.COMMON_AUDIO_EXTENSIONS: + if self._preferredcodec == 'best' and orig_ext in self.COMMON_AUDIO_EXTS: self.to_screen('Skipping audio extraction since the file is already in a common audio format') return [], information @@ -400,6 +399,8 @@ def run(self, information): class FFmpegVideoConvertorPP(FFmpegPostProcessor): + SUPPORTED_EXTS = ('mp4', 'mkv', 'flv', 'webm', 'mov', 'avi', 'mp3', 'mka', 'm4a', 'ogg', 'opus') + FORMAT_RE = re.compile(r'{0}(?:/{0})*$'.format(r'(?:\w+>)?(?:%s)' % '|'.join(SUPPORTED_EXTS))) _action = 'converting' def __init__(self, downloader=None, preferedformat=None): @@ -419,14 +420,14 @@ def _options(target_ext): return [] def run(self, information): - path = information['filepath'] - target_ext = self._target_ext(information['ext'].lower()) + path, source_ext = information['filepath'], information['ext'].lower() + target_ext = self._target_ext(source_ext) _skip_msg = ( 'could not find a mapping for %s' if not target_ext else 'already is in target format %s' if source_ext == target_ext else None) if _skip_msg: - self.to_screen('Not %s media file %s; %s' % (self._action, path, _skip_msg % source_ext)) + self.to_screen('Not %s media file "%s"; %s' % (self._action, path, _skip_msg % source_ext)) return [], information prefix, sep, oldext = path.rpartition('.') @@ -708,6 +709,8 @@ def run(self, info): class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): + SUPPORTED_EXTS = ('srt', 'vtt', 'ass', 'lrc') + def __init__(self, downloader=None, format=None): super(FFmpegSubtitlesConvertorPP, self).__init__(downloader) self.format = format @@ -816,6 +819,8 @@ def run(self, info): class FFmpegThumbnailsConvertorPP(FFmpegPostProcessor): + SUPPORTED_EXTS = ('jpg', 'png') + def __init__(self, downloader=None, format=None): super(FFmpegThumbnailsConvertorPP, self).__init__(downloader) self.format = format @@ -841,31 +846,29 @@ def fixup_webp(self, info, idx=-1): info['__files_to_move'][webp_filename] = replace_extension( info['__files_to_move'].pop(thumbnail_filename), 'webp') - def convert_thumbnail(self, thumbnail_filename, ext): - if ext == 'jpg': - format_name = 'JPEG' - opts = ['-bsf:v', 'mjpeg2jpeg'] - elif ext == 'png': - format_name = 'PNG' - opts = [] - else: - raise FFmpegPostProcessorError('Only conversion to either jpg or png is currently supported') + @staticmethod + def _options(target_ext): + if target_ext == 'jpg': + return ['-bsf:v', 'mjpeg2jpeg'] + return [] + + def convert_thumbnail(self, thumbnail_filename, target_ext): # NB: % is supposed to be escaped with %% but this does not work # for input files so working around with standard substitution escaped_thumbnail_filename = thumbnail_filename.replace('%', '#') os.rename(encodeFilename(thumbnail_filename), encodeFilename(escaped_thumbnail_filename)) - escaped_thumbnail_conv_filename = replace_extension(escaped_thumbnail_filename, ext) - self.to_screen('Converting thumbnail "%s" to %s' % (escaped_thumbnail_filename, format_name)) - self.run_ffmpeg(escaped_thumbnail_filename, escaped_thumbnail_conv_filename, opts) - thumbnail_conv_filename = replace_extension(thumbnail_filename, ext) + escaped_thumbnail_conv_filename = replace_extension(escaped_thumbnail_filename, target_ext) + + self.to_screen('Converting thumbnail "%s" to %s' % (escaped_thumbnail_filename, target_ext)) + self.run_ffmpeg(escaped_thumbnail_filename, escaped_thumbnail_conv_filename, self._options(target_ext)) + # Rename back to unescaped + thumbnail_conv_filename = replace_extension(thumbnail_filename, target_ext) os.rename(encodeFilename(escaped_thumbnail_filename), encodeFilename(thumbnail_filename)) os.rename(encodeFilename(escaped_thumbnail_conv_filename), encodeFilename(thumbnail_conv_filename)) return thumbnail_conv_filename def run(self, info): - if self.format not in ('jpg', 'png'): - raise FFmpegPostProcessorError('Only conversion to either jpg or png is currently supported') files_to_delete = [] has_thumbnail = False diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index b7d074ad9..9c9e27694 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -1716,8 +1716,6 @@ def random_user_agent(): 'wav', 'f4f', 'f4m', 'm3u8', 'smil') -REMUX_EXTENSIONS = ('mp4', 'mkv', 'flv', 'webm', 'mov', 'avi', 'mp3', 'mka', 'm4a', 'ogg', 'opus') - # needed for sanitizing filenames in restricted mode ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],