[ie/BiliIntl] Fix and improve subtitles extraction (#7077)

Closes #7075, Closes #6664
Authored by: HobbyistDev, itachi-19, dirkf, seproDev

Co-authored-by: itachi-19 <16500619+itachi-19@users.noreply.github.com>
Co-authored-by: dirkf <fieldhouse@gmx.net>
Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>
This commit is contained in:
HobbyistDev 2024-01-19 08:27:25 +09:00 committed by GitHub
parent 5498729c59
commit cf6413e840
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 29 additions and 13 deletions

View File

@ -18,6 +18,7 @@ from ..utils import (
OnDemandPagedList, OnDemandPagedList,
bool_or_none, bool_or_none,
clean_html, clean_html,
determine_ext,
filter_dict, filter_dict,
float_or_none, float_or_none,
format_field, format_field,
@ -1658,19 +1659,34 @@ class BiliIntlBaseIE(InfoExtractor):
'aid': aid, 'aid': aid,
})) or {} })) or {}
subtitles = {} subtitles = {}
for sub in sub_json.get('subtitles') or []: fetched_urls = set()
sub_url = sub.get('url') for sub in traverse_obj(sub_json, (('subtitles', 'video_subtitle'), ..., {dict})):
if not sub_url: for url in traverse_obj(sub, ((None, 'ass', 'srt'), 'url', {url_or_none})):
continue if url in fetched_urls:
sub_data = self._download_json( continue
sub_url, ep_id or aid, errnote='Unable to download subtitles', fatal=False, fetched_urls.add(url)
note='Downloading subtitles%s' % f' for {sub["lang"]}' if sub.get('lang') else '') sub_ext = determine_ext(url)
if not sub_data: sub_lang = sub.get('lang_key') or 'en'
continue
subtitles.setdefault(sub.get('lang_key', 'en'), []).append({ if sub_ext == 'ass':
'ext': 'srt', subtitles.setdefault(sub_lang, []).append({
'data': self.json2srt(sub_data) 'ext': 'ass',
}) 'url': url,
})
elif sub_ext == 'json':
sub_data = self._download_json(
url, ep_id or aid, fatal=False,
note=f'Downloading subtitles{format_field(sub, "lang", " for %s")} ({sub_lang})',
errnote='Unable to download subtitles')
if sub_data:
subtitles.setdefault(sub_lang, []).append({
'ext': 'srt',
'data': self.json2srt(sub_data),
})
else:
self.report_warning('Unexpected subtitle extension', ep_id or aid)
return subtitles return subtitles
def _get_formats(self, *, ep_id=None, aid=None): def _get_formats(self, *, ep_id=None, aid=None):