[ie/tiktok] Fix and deprioritize JSON subtitles (#10516)

Fixes regression caused by 5ce582448e

Closes #10514
Authored by: bashonly
This commit is contained in:
bashonly 2024-07-23 16:49:31 -05:00 committed by GitHub
parent 713b4cd18f
commit 2f97779f33
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -23,7 +23,6 @@
mimetype2ext, mimetype2ext,
parse_qs, parse_qs,
qualities, qualities,
remove_start,
srt_subtitles_timecode, srt_subtitles_timecode,
str_or_none, str_or_none,
traverse_obj, traverse_obj,
@ -254,7 +253,16 @@ def _extract_web_data_and_status(self, url, video_id, fatal=True):
def _get_subtitles(self, aweme_detail, aweme_id, user_name): def _get_subtitles(self, aweme_detail, aweme_id, user_name):
# TODO: Extract text positioning info # TODO: Extract text positioning info
EXT_MAP = { # From lowest to highest preference
'creator_caption': 'json',
'srt': 'srt',
'webvtt': 'vtt',
}
preference = qualities(tuple(EXT_MAP.values()))
subtitles = {} subtitles = {}
# aweme/detail endpoint subs # aweme/detail endpoint subs
captions_info = traverse_obj( captions_info = traverse_obj(
aweme_detail, ('interaction_stickers', ..., 'auto_video_caption_info', 'auto_captions', ...), expected_type=dict) aweme_detail, ('interaction_stickers', ..., 'auto_video_caption_info', 'auto_captions', ...), expected_type=dict)
@ -278,8 +286,8 @@ def _get_subtitles(self, aweme_detail, aweme_id, user_name):
if not caption.get('url'): if not caption.get('url'):
continue continue
subtitles.setdefault(caption.get('lang') or 'en', []).append({ subtitles.setdefault(caption.get('lang') or 'en', []).append({
'ext': remove_start(caption.get('caption_format'), 'web'),
'url': caption['url'], 'url': caption['url'],
'ext': EXT_MAP.get(caption.get('Format')),
}) })
# webpage subs # webpage subs
if not subtitles: if not subtitles:
@ -288,9 +296,14 @@ def _get_subtitles(self, aweme_detail, aweme_id, user_name):
self._create_url(user_name, aweme_id), aweme_id, fatal=False) self._create_url(user_name, aweme_id), aweme_id, fatal=False)
for caption in traverse_obj(aweme_detail, ('video', 'subtitleInfos', lambda _, v: v['Url'])): for caption in traverse_obj(aweme_detail, ('video', 'subtitleInfos', lambda _, v: v['Url'])):
subtitles.setdefault(caption.get('LanguageCodeName') or 'en', []).append({ subtitles.setdefault(caption.get('LanguageCodeName') or 'en', []).append({
'ext': remove_start(caption.get('Format'), 'web'),
'url': caption['Url'], 'url': caption['Url'],
'ext': EXT_MAP.get(caption.get('Format')),
}) })
# Deprioritize creator_caption json since it can't be embedded or used by media players
for lang, subs_list in subtitles.items():
subtitles[lang] = sorted(subs_list, key=lambda x: preference(x['ext']))
return subtitles return subtitles
def _parse_url_key(self, url_key): def _parse_url_key(self, url_key):