diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 1655f99e34..4d7e79fbff 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3479,6 +3479,7 @@ def process_language(container, base_url, lang_code, sub_name, query): subtitles, automatic_captions = {}, {} for lang_code, caption_track in captions.items(): base_url = caption_track.get('baseUrl') + orig_lang = parse_qs(base_url).get('lang', [None])[-1] if not base_url: continue lang_name = self._get_text(caption_track, 'name', max_runs=1) @@ -3492,6 +3493,7 @@ def process_language(container, base_url, lang_code, sub_name, query): for trans_code, trans_name in translation_languages.items(): if not trans_code: continue + orig_trans_code = trans_code if caption_track.get('kind') != 'asr': if 'translated_subs' in self._configuration_arg('skip'): continue @@ -3499,14 +3501,12 @@ def process_language(container, base_url, lang_code, sub_name, query): trans_name += format_field(lang_name, template=' from %s') # Add an "-orig" label to the original language so that it can be distinguished. # The subs are returned without "-orig" as well for compatibility - if lang_code == f'a-{trans_code}': + if lang_code == f'a-{orig_trans_code}': process_language( automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {}) # Setting tlang=lang returns damaged subtitles. - # Not using lang_code == f'a-{trans_code}' here for future-proofing - orig_lang = parse_qs(base_url).get('lang', [None])[-1] process_language(automatic_captions, base_url, trans_code, trans_name, - {} if orig_lang == trans_code else {'tlang': trans_code}) + {} if orig_lang == orig_trans_code else {'tlang': trans_code}) info['automatic_captions'] = automatic_captions info['subtitles'] = subtitles