[nytimes] Extract subtitles from HLS manifests

This commit is contained in:
Felix S 2021-04-15 10:14:46 +02:00
parent 015c10aeec
commit 47f4203dd3

View file

@ -46,6 +46,7 @@ def get_file_size(file_size):
urls = [] urls = []
formats = [] formats = []
subtitles = {}
for video in video_data.get('renditions', []): for video in video_data.get('renditions', []):
video_url = video.get('url') video_url = video.get('url')
format_id = video.get('type') format_id = video.get('type')
@ -54,9 +55,11 @@ def get_file_size(file_size):
urls.append(video_url) urls.append(video_url)
ext = mimetype2ext(video.get('mimetype')) or determine_ext(video_url) ext = mimetype2ext(video.get('mimetype')) or determine_ext(video_url)
if ext == 'm3u8': if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats( m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
video_url, video_id, 'mp4', 'm3u8_native', video_url, video_id, 'mp4', 'm3u8_native',
m3u8_id=format_id or 'hls', fatal=False)) m3u8_id=format_id or 'hls', fatal=False)
formats.extend(m3u8_fmts)
subtitles = self._merge_subtitles(subtitles, m3u8_subs)
elif ext == 'mpd': elif ext == 'mpd':
continue continue
# formats.extend(self._extract_mpd_formats( # formats.extend(self._extract_mpd_formats(
@ -96,6 +99,7 @@ def get_file_size(file_size):
'uploader': video_data.get('byline'), 'uploader': video_data.get('byline'),
'duration': float_or_none(video_data.get('duration'), 1000), 'duration': float_or_none(video_data.get('duration'), 1000),
'formats': formats, 'formats': formats,
'subtitles': subtitles,
'thumbnails': thumbnails, 'thumbnails': thumbnails,
} }