mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-14 20:33:19 +00:00
fix twitch vod chat download
chat download now uses the GraphQL API, instead of the old one that doesn't work anymore
This commit is contained in:
parent
2025816973
commit
7adae46801
|
@ -3,6 +3,7 @@
|
||||||
import json
|
import json
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
|
import time
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
|
@ -55,6 +56,7 @@ class TwitchBaseIE(InfoExtractor):
|
||||||
'VideoMetadata': '49b5b8f268cdeb259d75b58dcb0c1a748e3b575003448a2333dc5cdafd49adad',
|
'VideoMetadata': '49b5b8f268cdeb259d75b58dcb0c1a748e3b575003448a2333dc5cdafd49adad',
|
||||||
'VideoPlayer_ChapterSelectButtonVideo': '8d2793384aac3773beab5e59bd5d6f585aedb923d292800119e03d40cd0f9b41',
|
'VideoPlayer_ChapterSelectButtonVideo': '8d2793384aac3773beab5e59bd5d6f585aedb923d292800119e03d40cd0f9b41',
|
||||||
'VideoPlayer_VODSeekbarPreviewVideo': '07e99e4d56c5a7c67117a154777b0baf85a5ffefa393b213f4bc712ccaf85dd6',
|
'VideoPlayer_VODSeekbarPreviewVideo': '07e99e4d56c5a7c67117a154777b0baf85a5ffefa393b213f4bc712ccaf85dd6',
|
||||||
|
'VideoCommentsByOffsetOrCursor': 'b70a3591ff0f4e0313d126c6a1502d79a1c02baebb288227c582044aa76adf6a',
|
||||||
}
|
}
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
@ -526,67 +528,72 @@ def _extract_storyboard(self, item_id, storyboard_json_url, duration):
|
||||||
} for path in images],
|
} for path in images],
|
||||||
}
|
}
|
||||||
|
|
||||||
def _download_chat(self, vod_id):
|
def _extract_chat(self, vod_id):
|
||||||
live_chat = list()
|
chat_history = []
|
||||||
|
has_more_pages = True
|
||||||
request_url = f'https://api.twitch.tv/v5/videos/{vod_id}/comments'
|
retry_sleep = 5
|
||||||
query_params = {
|
max_retries = 3
|
||||||
'client_id': self._CLIENT_ID
|
|
||||||
}
|
|
||||||
|
|
||||||
self.to_screen('Downloading chat fragment JSONs')
|
|
||||||
|
|
||||||
# TODO: question: is it OK to use this config value for this purpose?
|
|
||||||
max_retries = self.get_param('extractor_retries')
|
|
||||||
retries = 0
|
retries = 0
|
||||||
pagenum = 1
|
pagenum = 1
|
||||||
while True:
|
gql_ops = [
|
||||||
response_json = self._download_json(
|
{
|
||||||
request_url,
|
'operationName': 'VideoCommentsByOffsetOrCursor',
|
||||||
vod_id,
|
'variables': {
|
||||||
fatal=False,
|
'videoID': vod_id,
|
||||||
note='Downloading chat fragment JSON page %d' % pagenum,
|
# 'cursor': <filled in in subsequent requests>
|
||||||
errnote='Live chat fragment download failed.',
|
}
|
||||||
query=query_params)
|
}
|
||||||
|
]
|
||||||
|
|
||||||
if response_json is False:
|
self.to_screen('Downloading chat fragment pages')
|
||||||
self.report_warning(f'Unable to fetch next chat history fragment. {retries}. try of {max_retries}')
|
|
||||||
|
while has_more_pages:
|
||||||
|
response = self._download_gql(vod_id, gql_ops, 'Downloading chat fragment page %d' % pagenum, fatal=False)
|
||||||
|
|
||||||
|
if response is False:
|
||||||
|
self.report_warning(f'Unable to fetch next chat history fragment. {retries + 1}. try of {max_retries}')
|
||||||
|
|
||||||
if retries < max_retries:
|
if retries < max_retries:
|
||||||
retries += 1
|
retries += 1
|
||||||
|
time.sleep(retry_sleep)
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
self.report_warning('Chat history download failed: retry limit reached')
|
self.report_warning('Chat history download failed: retry limit reached')
|
||||||
# TODO: when this happens, should I forget a partial chat history, or is it better to keep it too?
|
# TODO: when this happens, should I forget a partial chat history, or is it better to keep it?
|
||||||
# I think if I keep it, it might be better to persist a warning that it is incomplete
|
# I think if I keep it, it might be better to persist a warning that it is incomplete
|
||||||
# live_chat.clear()
|
# chat_history.clear()
|
||||||
break
|
break
|
||||||
|
|
||||||
live_chat.extend(response_json.get('comments') or [])
|
comments_obj = traverse_obj(response, (0, 'data', 'video', 'comments'))
|
||||||
next_fragment_cursor = str_or_none(response_json.get('_next'))
|
chat_history.extend(traverse_obj(comments_obj, ('edges', slice, 'node')))
|
||||||
|
|
||||||
if next_fragment_cursor is None:
|
has_more_pages = traverse_obj(comments_obj, ('pageInfo', 'hasNextPage'))
|
||||||
break
|
|
||||||
|
|
||||||
query_params['cursor'] = next_fragment_cursor
|
if has_more_pages:
|
||||||
pagenum += 1
|
cursor = traverse_obj(comments_obj, ('edges', 0, 'cursor'))
|
||||||
|
if cursor is None:
|
||||||
|
self.report_warning("Cannot continue downloading chat history: cursor is missing. There are additional chat pages to download.")
|
||||||
|
break
|
||||||
|
|
||||||
chat_history_length = len(live_chat)
|
pagenum += 1
|
||||||
|
gql_ops[0]['variables']['cursor'] = cursor
|
||||||
|
|
||||||
|
if has_more_pages is None:
|
||||||
|
cursor = traverse_obj(comments_obj, ('edges', 0, 'cursor'))
|
||||||
|
|
||||||
|
if cursor is not None:
|
||||||
|
self.report_warning("Next page indication is missing, but found cursor. Continuing chat history download.")
|
||||||
|
else: # In this case maintenance might be needed. Purpose is to prevent silent errors.
|
||||||
|
self.report_warning("Next page indication is missing, and cursor not found.")
|
||||||
|
|
||||||
|
chat_history_length = len(chat_history)
|
||||||
self.to_screen('Extracted %d chat messages' % chat_history_length)
|
self.to_screen('Extracted %d chat messages' % chat_history_length)
|
||||||
if chat_history_length == 0:
|
if chat_history_length == 0:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return self._extract_chat(live_chat, request_url)
|
|
||||||
|
|
||||||
def _extract_chat(self, chat_history, request_url):
|
|
||||||
return {
|
return {
|
||||||
'live_chat': [ # subtitle tag
|
'live_chat': [ # subtitle tag
|
||||||
{ # JSON subformat as URL
|
{
|
||||||
'url': request_url,
|
|
||||||
'ext': 'json'
|
|
||||||
},
|
|
||||||
{ # JSON subformat as data
|
|
||||||
'data': json.dumps(chat_history),
|
'data': json.dumps(chat_history),
|
||||||
'ext': 'json'
|
'ext': 'json'
|
||||||
}
|
}
|
||||||
|
@ -626,7 +633,7 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
if ('live_chat' in self.get_param('subtitleslangs', [])) \
|
if ('live_chat' in self.get_param('subtitleslangs', [])) \
|
||||||
and info.get('timestamp') is not None:
|
and info.get('timestamp') is not None:
|
||||||
info['subtitles'] = self._download_chat(vod_id)
|
info['subtitles'] = self._extract_chat(vod_id)
|
||||||
|
|
||||||
return info
|
return info
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue