From 418bbfd722ba01bb106daf80ab204984a1fc26e5 Mon Sep 17 00:00:00 2001 From: ftk Date: Tue, 12 Jul 2022 20:27:50 +0000 Subject: [PATCH] [extractor/twitch] Support storyboards for VODs (#4342) Authored by: ftk --- yt_dlp/extractor/twitch.py | 94 +++++++++++++++++++++++++++++++++++++- 1 file changed, 92 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py index 834350d12a..a0cb0be026 100644 --- a/yt_dlp/extractor/twitch.py +++ b/yt_dlp/extractor/twitch.py @@ -12,6 +12,7 @@ compat_urllib_parse_urlparse, ) from ..utils import ( + base_url, clean_html, dict_get, ExtractorError, @@ -52,6 +53,7 @@ class TwitchBaseIE(InfoExtractor): 'VideoPreviewOverlay': '3006e77e51b128d838fa4e835723ca4dc9a05c5efd4466c1085215c6e437e65c', 'VideoMetadata': '226edb3e692509f727fd56821f5653c05740242c82b0388883e0c0e75dcbf687', 'VideoPlayer_ChapterSelectButtonVideo': '8d2793384aac3773beab5e59bd5d6f585aedb923d292800119e03d40cd0f9b41', + 'VideoPlayer_VODSeekbarPreviewVideo': '07e99e4d56c5a7c67117a154777b0baf85a5ffefa393b213f4bc712ccaf85dd6', } def _perform_login(self, username, password): @@ -202,6 +204,8 @@ class TwitchVodIE(TwitchBaseIE): 'uploader_id': 'riotgames', 'view_count': int, 'start_time': 310, + 'chapters': [], + 'live_status': 'was_live', }, 'params': { # m3u8 download @@ -270,9 +274,52 @@ class TwitchVodIE(TwitchBaseIE): 'title': 'Art' } ], + 'live_status': 'was_live', + 'thumbnail': r're:^https?://.*\.jpg$', + 'view_count': int, }, 'params': { 'skip_download': True + }, + }, { + 'note': 'Storyboards', + 'url': 'https://www.twitch.tv/videos/635475444', + 'info_dict': { + 'id': 'v635475444', + 'format_id': 'sb0', + 'ext': 'mhtml', + 'title': 'Riot Games', + 'duration': 11643, + 'uploader': 'Riot Games', + 'uploader_id': 'riotgames', + 'timestamp': 1590770569, + 'upload_date': '20200529', + 'chapters': [ + { + 'start_time': 0, + 'end_time': 573, + 'title': 'League of Legends' + }, + { + 'start_time': 573, + 'end_time': 3922, + 'title': 'Legends of Runeterra' + }, + { + 'start_time': 3922, + 'end_time': 11643, + 'title': 'Art' + } + ], + 'live_status': 'was_live', + 'thumbnail': r're:^https?://.*\.jpg$', + 'view_count': int, + 'columns': int, + 'rows': int, + }, + 'params': { + 'format': 'mhtml', + 'skip_download': True } }] @@ -290,16 +337,23 @@ def _download_info(self, item_id): 'includePrivate': False, 'videoID': item_id, }, + }, { + 'operationName': 'VideoPlayer_VODSeekbarPreviewVideo', + 'variables': { + 'includePrivate': False, + 'videoID': item_id, + }, }], 'Downloading stream metadata GraphQL') video = traverse_obj(data, (0, 'data', 'video')) video['moments'] = traverse_obj(data, (1, 'data', 'video', 'moments', 'edges', ..., 'node')) + video['storyboard'] = traverse_obj(data, (2, 'data', 'video', 'seekPreviewsURL'), expected_type=url_or_none) if video is None: raise ExtractorError( 'Video %s does not exist' % item_id, expected=True) - return self._extract_info_gql(video, item_id) + return video def _extract_info(self, info): status = info.get('status') @@ -383,10 +437,44 @@ def _extract_info_gql(self, info, item_id): 'was_live': True, } + def _extract_storyboard(self, item_id, storyboard_json_url, duration): + if not duration or not storyboard_json_url: + return + spec = self._download_json(storyboard_json_url, item_id, 'Downloading storyboard metadata JSON', fatal=False) or [] + # sort from highest quality to lowest + # This makes sb0 the highest-quality format, sb1 - lower, etc which is consistent with youtube sb ordering + spec.sort(key=lambda x: int_or_none(x.get('width')) or 0, reverse=True) + base = base_url(storyboard_json_url) + for i, s in enumerate(spec): + count = int_or_none(s.get('count')) + images = s.get('images') + if not (images and count): + continue + fragment_duration = duration / len(images) + yield { + 'format_id': f'sb{i}', + 'format_note': 'storyboard', + 'ext': 'mhtml', + 'protocol': 'mhtml', + 'acodec': 'none', + 'vcodec': 'none', + 'url': urljoin(base, images[0]), + 'width': int_or_none(s.get('width')), + 'height': int_or_none(s.get('height')), + 'fps': count / duration, + 'rows': int_or_none(s.get('rows')), + 'columns': int_or_none(s.get('cols')), + 'fragments': [{ + 'url': urljoin(base, path), + 'duration': fragment_duration, + } for path in images], + } + def _real_extract(self, url): vod_id = self._match_id(url) - info = self._download_info(vod_id) + video = self._download_info(vod_id) + info = self._extract_info_gql(video, vod_id) access_token = self._download_access_token(vod_id, 'video', 'id') formats = self._extract_m3u8_formats( @@ -403,6 +491,8 @@ def _real_extract(self, url): })), vod_id, 'mp4', entry_protocol='m3u8_native') + formats.extend(self._extract_storyboard(vod_id, video.get('storyboard'), info.get('duration'))) + self._prefer_source(formats) info['formats'] = formats