From 7adae468018a60a2f3ccb4cd1c851a7002217c19 Mon Sep 17 00:00:00 2001 From: mpeter50 <83356418+mpeter50@users.noreply.github.com> Date: Wed, 28 Dec 2022 18:50:42 +0100 Subject: [PATCH] fix twitch vod chat download chat download now uses the GraphQL API, instead of the old one that doesn't work anymore --- yt_dlp/extractor/twitch.py | 87 ++++++++++++++++++++------------------ 1 file changed, 47 insertions(+), 40 deletions(-) diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py index e66199200..d6f16f953 100644 --- a/yt_dlp/extractor/twitch.py +++ b/yt_dlp/extractor/twitch.py @@ -3,6 +3,7 @@ import json import random import re +import time from .common import InfoExtractor from ..compat import ( @@ -55,6 +56,7 @@ class TwitchBaseIE(InfoExtractor): 'VideoMetadata': '49b5b8f268cdeb259d75b58dcb0c1a748e3b575003448a2333dc5cdafd49adad', 'VideoPlayer_ChapterSelectButtonVideo': '8d2793384aac3773beab5e59bd5d6f585aedb923d292800119e03d40cd0f9b41', 'VideoPlayer_VODSeekbarPreviewVideo': '07e99e4d56c5a7c67117a154777b0baf85a5ffefa393b213f4bc712ccaf85dd6', + 'VideoCommentsByOffsetOrCursor': 'b70a3591ff0f4e0313d126c6a1502d79a1c02baebb288227c582044aa76adf6a', } @property @@ -526,67 +528,72 @@ def _extract_storyboard(self, item_id, storyboard_json_url, duration): } for path in images], } - def _download_chat(self, vod_id): - live_chat = list() - - request_url = f'https://api.twitch.tv/v5/videos/{vod_id}/comments' - query_params = { - 'client_id': self._CLIENT_ID - } - - self.to_screen('Downloading chat fragment JSONs') - - # TODO: question: is it OK to use this config value for this purpose? - max_retries = self.get_param('extractor_retries') + def _extract_chat(self, vod_id): + chat_history = [] + has_more_pages = True + retry_sleep = 5 + max_retries = 3 retries = 0 pagenum = 1 - while True: - response_json = self._download_json( - request_url, - vod_id, - fatal=False, - note='Downloading chat fragment JSON page %d' % pagenum, - errnote='Live chat fragment download failed.', - query=query_params) + gql_ops = [ + { + 'operationName': 'VideoCommentsByOffsetOrCursor', + 'variables': { + 'videoID': vod_id, + # 'cursor': + } + } + ] - if response_json is False: - self.report_warning(f'Unable to fetch next chat history fragment. {retries}. try of {max_retries}') + self.to_screen('Downloading chat fragment pages') + + while has_more_pages: + response = self._download_gql(vod_id, gql_ops, 'Downloading chat fragment page %d' % pagenum, fatal=False) + + if response is False: + self.report_warning(f'Unable to fetch next chat history fragment. {retries + 1}. try of {max_retries}') if retries < max_retries: retries += 1 + time.sleep(retry_sleep) continue else: self.report_warning('Chat history download failed: retry limit reached') - # TODO: when this happens, should I forget a partial chat history, or is it better to keep it too? + # TODO: when this happens, should I forget a partial chat history, or is it better to keep it? # I think if I keep it, it might be better to persist a warning that it is incomplete - # live_chat.clear() + # chat_history.clear() break - live_chat.extend(response_json.get('comments') or []) - next_fragment_cursor = str_or_none(response_json.get('_next')) + comments_obj = traverse_obj(response, (0, 'data', 'video', 'comments')) + chat_history.extend(traverse_obj(comments_obj, ('edges', slice, 'node'))) - if next_fragment_cursor is None: - break + has_more_pages = traverse_obj(comments_obj, ('pageInfo', 'hasNextPage')) - query_params['cursor'] = next_fragment_cursor - pagenum += 1 + if has_more_pages: + cursor = traverse_obj(comments_obj, ('edges', 0, 'cursor')) + if cursor is None: + self.report_warning("Cannot continue downloading chat history: cursor is missing. There are additional chat pages to download.") + break - chat_history_length = len(live_chat) + pagenum += 1 + gql_ops[0]['variables']['cursor'] = cursor + if has_more_pages is None: + cursor = traverse_obj(comments_obj, ('edges', 0, 'cursor')) + + if cursor is not None: + self.report_warning("Next page indication is missing, but found cursor. Continuing chat history download.") + else: # In this case maintenance might be needed. Purpose is to prevent silent errors. + self.report_warning("Next page indication is missing, and cursor not found.") + + chat_history_length = len(chat_history) self.to_screen('Extracted %d chat messages' % chat_history_length) if chat_history_length == 0: return None - return self._extract_chat(live_chat, request_url) - - def _extract_chat(self, chat_history, request_url): return { 'live_chat': [ # subtitle tag - { # JSON subformat as URL - 'url': request_url, - 'ext': 'json' - }, - { # JSON subformat as data + { 'data': json.dumps(chat_history), 'ext': 'json' } @@ -626,7 +633,7 @@ def _real_extract(self, url): if ('live_chat' in self.get_param('subtitleslangs', [])) \ and info.get('timestamp') is not None: - info['subtitles'] = self._download_chat(vod_id) + info['subtitles'] = self._extract_chat(vod_id) return info