mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-07-03 01:13:36 +00:00
twitch chat: review changes part 1
This commit is contained in:
parent
1781d316c9
commit
9d94a95e0e
|
@ -3,7 +3,6 @@
|
||||||
import json
|
import json
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
import time
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
|
@ -25,6 +24,7 @@
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
parse_qs,
|
parse_qs,
|
||||||
qualities,
|
qualities,
|
||||||
|
RetryManager,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
try_get,
|
try_get,
|
||||||
|
@ -528,44 +528,34 @@ def _extract_storyboard(self, item_id, storyboard_json_url, duration):
|
||||||
} for path in images],
|
} for path in images],
|
||||||
}
|
}
|
||||||
|
|
||||||
def _extract_chat(self, vod_id):
|
def _get_subtitles(self, vod_id):
|
||||||
chat_history = []
|
chat_history = []
|
||||||
has_more_pages = True
|
has_more_pages = True
|
||||||
retry_sleep = 5
|
|
||||||
max_retries = 3
|
|
||||||
retries = 0
|
|
||||||
pagenum = 1
|
pagenum = 1
|
||||||
gql_ops = [
|
gql_ops = [{
|
||||||
{
|
|
||||||
'operationName': 'VideoCommentsByOffsetOrCursor',
|
'operationName': 'VideoCommentsByOffsetOrCursor',
|
||||||
'variables': {
|
'variables': { 'videoID': vod_id }
|
||||||
'videoID': vod_id,
|
# 'variables.cursor': <filled in in subsequent requests>
|
||||||
# 'cursor': <filled in in subsequent requests>
|
}]
|
||||||
}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
|
|
||||||
self.to_screen('Downloading chat fragment pages')
|
|
||||||
|
|
||||||
while has_more_pages:
|
while has_more_pages:
|
||||||
|
response = None
|
||||||
|
|
||||||
|
for retry in self.RetryManager():
|
||||||
response = self._download_gql(vod_id, gql_ops, 'Downloading chat fragment page %d' % pagenum, fatal=False)
|
response = self._download_gql(vod_id, gql_ops, 'Downloading chat fragment page %d' % pagenum, fatal=False)
|
||||||
|
# response = False
|
||||||
|
# TODO: delete the direct False, uncomment _download_gql
|
||||||
|
|
||||||
if response is False:
|
if response is False:
|
||||||
self.report_warning(f'Unable to fetch next chat history fragment. {retries + 1}. try of {max_retries}')
|
retry.error = ExtractorError("f'Unable to fetch next chat history fragment.'", video_id=vod_id, ie=self)
|
||||||
|
|
||||||
if retries < max_retries:
|
|
||||||
retries += 1
|
|
||||||
time.sleep(retry_sleep)
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
self.report_warning('Chat history download failed: retry limit reached')
|
|
||||||
# TODO: when this happens, should I forget a partial chat history, or is it better to keep it?
|
# TODO: when this happens, should I forget a partial chat history, or is it better to keep it?
|
||||||
# I think if I keep it, it might be better to persist a warning that it is incomplete
|
# I think if I keep it, it might be better to persist a warning that it is incomplete
|
||||||
# chat_history.clear()
|
|
||||||
break
|
# time.sleep(5)
|
||||||
|
|
||||||
response_errors = traverse_obj(response, (..., 'errors'))
|
response_errors = traverse_obj(response, (..., 'errors'))
|
||||||
if response_errors is not None and len(response_errors) > 0:
|
if response_errors:
|
||||||
self.report_warning(f"Error response recevied for fetching next chat history fragment: {response_errors}")
|
self.report_warning(f"Error response recevied for fetching next chat history fragment: {response_errors}")
|
||||||
|
|
||||||
comments_obj = traverse_obj(response, (0, 'data', 'video', 'comments'))
|
comments_obj = traverse_obj(response, (0, 'data', 'video', 'comments'))
|
||||||
|
@ -590,19 +580,15 @@ def _extract_chat(self, vod_id):
|
||||||
else: # In this case maintenance might be needed. Purpose is to prevent silent errors.
|
else: # In this case maintenance might be needed. Purpose is to prevent silent errors.
|
||||||
self.report_warning("Next page indication is missing, and cursor not found.")
|
self.report_warning("Next page indication is missing, and cursor not found.")
|
||||||
|
|
||||||
chat_history_length = len(chat_history)
|
if not chat_history:
|
||||||
self.to_screen('Extracted %d chat messages' % chat_history_length)
|
return
|
||||||
if chat_history_length == 0:
|
|
||||||
return None
|
|
||||||
|
|
||||||
return {
|
self.to_screen('Extracted %d chat messages' % len(chat_history))
|
||||||
'live_chat': [ # subtitle tag
|
|
||||||
{
|
return { 'rechat': [{
|
||||||
'data': json.dumps(chat_history),
|
'data': json.dumps(chat_history),
|
||||||
'ext': 'twitch-gql-20221228.json'
|
'ext': 'twitch-gql-20221228.json'
|
||||||
}
|
}]}
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
vod_id = self._match_id(url)
|
vod_id = self._match_id(url)
|
||||||
|
@ -635,9 +621,8 @@ def _real_extract(self, url):
|
||||||
if 't' in query:
|
if 't' in query:
|
||||||
info['start_time'] = parse_duration(query['t'][0])
|
info['start_time'] = parse_duration(query['t'][0])
|
||||||
|
|
||||||
if ('live_chat' in self.get_param('subtitleslangs', [])) \
|
if info.get('timestamp'):
|
||||||
and info.get('timestamp') is not None:
|
info['subtitles'] = self.extract_subtitles(vod_id)
|
||||||
info['subtitles'] = self._extract_chat(vod_id)
|
|
||||||
|
|
||||||
return info
|
return info
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue