From ba3a7232f0d8d744ce8a44773bf94abacd16fad0 Mon Sep 17 00:00:00 2001 From: Riteo Date: Tue, 21 May 2024 14:58:39 +0200 Subject: [PATCH] [pp/FFmpegEmbedSubtitle] Embed JSON subtitles as Matroska attachments Since we can't embed them as regular subtitles (due to them not having any consistent structure), we embed them as file attachments, if exporting as Matroska. This allows us to have single-file downloads with everything embedded for e.g. archival purposes. --- yt_dlp/postprocessor/ffmpeg.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index e36fe8039..4b6dc0acc 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -619,13 +619,19 @@ def run(self, info): webm_vtt_warn = False mp4_ass_warn = False + json_names, json_filenames = [], [] + for lang, sub_info in subtitles.items(): if not os.path.exists(sub_info.get('filepath', '')): self.report_warning(f'Skipping embedding {lang} subtitle because the file is missing') continue sub_ext = sub_info['ext'] if sub_ext == 'json': - self.report_warning('JSON subtitles cannot be embedded') + if info['ext'] in ('mkv', 'mka'): + json_names.append(lang) + json_filenames.append(sub_info['filepath']) + else: + self.report_warning('JSON subtitles can only be embedded in mkv/mka files.') elif ext != 'webm' or ext == 'webm' and sub_ext == 'vtt': sub_langs.append(lang) sub_names.append(sub_info.get('name')) @@ -644,11 +650,15 @@ def run(self, info): input_files = [filename, *sub_filenames] opts = [ + # Attached JSON subtitles don't have a codec id and we have to + # instruct FFMPEG to not discard them because of that. + '-copy_unknown', *self.stream_copy_opts(ext=info['ext']), # Don't copy the existing subtitles, we may be running the # postprocessor a second time '-map', '-0:s', ] + for i, (lang, name) in enumerate(zip(sub_langs, sub_names)): opts.extend(['-map', f'{i + 1}:0']) lang_code = ISO639Utils.short2long(lang) or lang @@ -657,12 +667,21 @@ def run(self, info): opts.extend([f'-metadata:s:s:{i}', f'handler_name={name}', f'-metadata:s:s:{i}', f'title={name}']) + for (json_filename, json_name) in zip(json_filenames, json_names): + escaped_json_filename = self._ffmpeg_filename_argument(json_filename) + opts.extend([ + '-map', f'-0:m:filename:{json_name}.json?', + '-attach', escaped_json_filename, + f'-metadata:s:m:filename:{escaped_json_filename}', 'mimetype=application/json', + f'-metadata:s:m:filename:{escaped_json_filename}', f'filename={json_name}.json', + ]) + temp_filename = prepend_extension(filename, 'temp') self.to_screen(f'Embedding subtitles in "{filename}"') self.run_ffmpeg_multiple_files(input_files, temp_filename, opts) os.replace(temp_filename, filename) - files_to_delete = [] if self._already_have_subtitle else sub_filenames + files_to_delete = [] if self._already_have_subtitle else sub_filenames + json_filenames return files_to_delete, info