From e0e624ca7f72b5191a3e3b1d96cc6a7db3676143 Mon Sep 17 00:00:00 2001 From: Felix S Date: Mon, 19 Apr 2021 18:57:25 +0200 Subject: [PATCH] [canvas] Extract subtitles from streaming manifests --- yt_dlp/extractor/canvas.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/yt_dlp/extractor/canvas.py b/yt_dlp/extractor/canvas.py index eefbab241b..1b7c1d2ff7 100644 --- a/yt_dlp/extractor/canvas.py +++ b/yt_dlp/extractor/canvas.py @@ -83,24 +83,31 @@ def _real_extract(self, url): description = data.get('description') formats = [] + subtitles = {} for target in data['targetUrls']: format_url, format_type = url_or_none(target.get('url')), str_or_none(target.get('type')) if not format_url or not format_type: continue format_type = format_type.upper() if format_type in self._HLS_ENTRY_PROTOCOLS_MAP: - formats.extend(self._extract_m3u8_formats( + fmts, subs = self._extract_m3u8_formats_and_subtitles( format_url, video_id, 'mp4', self._HLS_ENTRY_PROTOCOLS_MAP[format_type], - m3u8_id=format_type, fatal=False)) + m3u8_id=format_type, fatal=False) + formats.extend(fmts) + subtitles = self._merge_subtitles(subtitles, subs) elif format_type == 'HDS': formats.extend(self._extract_f4m_formats( format_url, video_id, f4m_id=format_type, fatal=False)) elif format_type == 'MPEG_DASH': - formats.extend(self._extract_mpd_formats( - format_url, video_id, mpd_id=format_type, fatal=False)) + fmts, subs = self._extract_mpd_formats_and_subtitles( + format_url, video_id, mpd_id=format_type, fatal=False) + formats.extend(fmts) + subtitles = self._merge_subtitles(subtitles, subs) elif format_type == 'HSS': - formats.extend(self._extract_ism_formats( - format_url, video_id, ism_id='mss', fatal=False)) + fmts, subs = self._extract_ism_formats_and_subtitles( + format_url, video_id, ism_id='mss', fatal=False) + formats.extend(fmts) + subtitles = self._merge_subtitles(subtitles, subs) else: formats.append({ 'format_id': format_type, @@ -108,7 +115,6 @@ def _real_extract(self, url): }) self._sort_formats(formats) - subtitles = {} subtitle_urls = data.get('subtitleUrls') if isinstance(subtitle_urls, list): for subtitle in subtitle_urls: