twitch chat: review changes part 1

This commit is contained in:
mpeter50 2023-07-14 01:03:27 +02:00
parent 1781d316c9
commit 9d94a95e0e
1 changed files with 28 additions and 43 deletions

View File

@ -3,7 +3,6 @@ import itertools
import json import json
import random import random
import re import re
import time
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
@ -25,6 +24,7 @@ from ..utils import (
parse_iso8601, parse_iso8601,
parse_qs, parse_qs,
qualities, qualities,
RetryManager,
str_or_none, str_or_none,
traverse_obj, traverse_obj,
try_get, try_get,
@ -528,44 +528,34 @@ class TwitchVodIE(TwitchBaseIE):
} for path in images], } for path in images],
} }
def _extract_chat(self, vod_id): def _get_subtitles(self, vod_id):
chat_history = [] chat_history = []
has_more_pages = True has_more_pages = True
retry_sleep = 5
max_retries = 3
retries = 0
pagenum = 1 pagenum = 1
gql_ops = [ gql_ops = [{
{ 'operationName': 'VideoCommentsByOffsetOrCursor',
'operationName': 'VideoCommentsByOffsetOrCursor', 'variables': { 'videoID': vod_id }
'variables': { # 'variables.cursor': <filled in in subsequent requests>
'videoID': vod_id, }]
# 'cursor': <filled in in subsequent requests>
}
}
]
self.to_screen('Downloading chat fragment pages')
while has_more_pages: while has_more_pages:
response = self._download_gql(vod_id, gql_ops, 'Downloading chat fragment page %d' % pagenum, fatal=False) response = None
if response is False: for retry in self.RetryManager():
self.report_warning(f'Unable to fetch next chat history fragment. {retries + 1}. try of {max_retries}') response = self._download_gql(vod_id, gql_ops, 'Downloading chat fragment page %d' % pagenum, fatal=False)
# response = False
# TODO: delete the direct False, uncomment _download_gql
if response is False:
retry.error = ExtractorError("f'Unable to fetch next chat history fragment.'", video_id=vod_id, ie=self)
if retries < max_retries:
retries += 1
time.sleep(retry_sleep)
continue
else:
self.report_warning('Chat history download failed: retry limit reached')
# TODO: when this happens, should I forget a partial chat history, or is it better to keep it? # TODO: when this happens, should I forget a partial chat history, or is it better to keep it?
# I think if I keep it, it might be better to persist a warning that it is incomplete # I think if I keep it, it might be better to persist a warning that it is incomplete
# chat_history.clear()
break # time.sleep(5)
response_errors = traverse_obj(response, (..., 'errors')) response_errors = traverse_obj(response, (..., 'errors'))
if response_errors is not None and len(response_errors) > 0: if response_errors:
self.report_warning(f"Error response recevied for fetching next chat history fragment: {response_errors}") self.report_warning(f"Error response recevied for fetching next chat history fragment: {response_errors}")
comments_obj = traverse_obj(response, (0, 'data', 'video', 'comments')) comments_obj = traverse_obj(response, (0, 'data', 'video', 'comments'))
@ -590,19 +580,15 @@ class TwitchVodIE(TwitchBaseIE):
else: # In this case maintenance might be needed. Purpose is to prevent silent errors. else: # In this case maintenance might be needed. Purpose is to prevent silent errors.
self.report_warning("Next page indication is missing, and cursor not found.") self.report_warning("Next page indication is missing, and cursor not found.")
chat_history_length = len(chat_history) if not chat_history:
self.to_screen('Extracted %d chat messages' % chat_history_length) return
if chat_history_length == 0:
return None
return { self.to_screen('Extracted %d chat messages' % len(chat_history))
'live_chat': [ # subtitle tag
{ return { 'rechat': [{
'data': json.dumps(chat_history), 'data': json.dumps(chat_history),
'ext': 'twitch-gql-20221228.json' 'ext': 'twitch-gql-20221228.json'
} }]}
]
}
def _real_extract(self, url): def _real_extract(self, url):
vod_id = self._match_id(url) vod_id = self._match_id(url)
@ -635,9 +621,8 @@ class TwitchVodIE(TwitchBaseIE):
if 't' in query: if 't' in query:
info['start_time'] = parse_duration(query['t'][0]) info['start_time'] = parse_duration(query['t'][0])
if ('live_chat' in self.get_param('subtitleslangs', [])) \ if info.get('timestamp'):
and info.get('timestamp') is not None: info['subtitles'] = self.extract_subtitles(vod_id)
info['subtitles'] = self._extract_chat(vod_id)
return info return info