mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-30 12:01:28 +00:00
[youtube_live_chat] Support ongoing live chat (#422)
Authored by: siikamiika
This commit is contained in:
parent
8a77e5e6bc
commit
c60ee3a218
|
@ -25,7 +25,7 @@ def _get_real_downloader(info_dict, protocol=None, *args, **kwargs):
|
||||||
from .mhtml import MhtmlFD
|
from .mhtml import MhtmlFD
|
||||||
from .niconico import NiconicoDmcFD
|
from .niconico import NiconicoDmcFD
|
||||||
from .websocket import WebSocketFragmentFD
|
from .websocket import WebSocketFragmentFD
|
||||||
from .youtube_live_chat import YoutubeLiveChatReplayFD
|
from .youtube_live_chat import YoutubeLiveChatFD
|
||||||
from .external import (
|
from .external import (
|
||||||
get_external_downloader,
|
get_external_downloader,
|
||||||
FFmpegFD,
|
FFmpegFD,
|
||||||
|
@ -44,7 +44,8 @@ def _get_real_downloader(info_dict, protocol=None, *args, **kwargs):
|
||||||
'mhtml': MhtmlFD,
|
'mhtml': MhtmlFD,
|
||||||
'niconico_dmc': NiconicoDmcFD,
|
'niconico_dmc': NiconicoDmcFD,
|
||||||
'websocket_frag': WebSocketFragmentFD,
|
'websocket_frag': WebSocketFragmentFD,
|
||||||
'youtube_live_chat_replay': YoutubeLiveChatReplayFD,
|
'youtube_live_chat': YoutubeLiveChatFD,
|
||||||
|
'youtube_live_chat_replay': YoutubeLiveChatFD,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,20 +1,23 @@
|
||||||
from __future__ import division, unicode_literals
|
from __future__ import division, unicode_literals
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
import time
|
||||||
|
|
||||||
from .fragment import FragmentFD
|
from .fragment import FragmentFD
|
||||||
from ..compat import compat_urllib_error
|
from ..compat import compat_urllib_error
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
try_get,
|
try_get,
|
||||||
|
dict_get,
|
||||||
|
int_or_none,
|
||||||
RegexNotFoundError,
|
RegexNotFoundError,
|
||||||
)
|
)
|
||||||
from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE
|
from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE
|
||||||
|
|
||||||
|
|
||||||
class YoutubeLiveChatReplayFD(FragmentFD):
|
class YoutubeLiveChatFD(FragmentFD):
|
||||||
""" Downloads YouTube live chat replays fragment by fragment """
|
""" Downloads YouTube live chats fragment by fragment """
|
||||||
|
|
||||||
FD_NAME = 'youtube_live_chat_replay'
|
FD_NAME = 'youtube_live_chat'
|
||||||
|
|
||||||
def real_download(self, filename, info_dict):
|
def real_download(self, filename, info_dict):
|
||||||
video_id = info_dict['video_id']
|
video_id = info_dict['video_id']
|
||||||
|
@ -31,6 +34,8 @@ def real_download(self, filename, info_dict):
|
||||||
|
|
||||||
ie = YT_BaseIE(self.ydl)
|
ie = YT_BaseIE(self.ydl)
|
||||||
|
|
||||||
|
start_time = int(time.time() * 1000)
|
||||||
|
|
||||||
def dl_fragment(url, data=None, headers=None):
|
def dl_fragment(url, data=None, headers=None):
|
||||||
http_headers = info_dict.get('http_headers', {})
|
http_headers = info_dict.get('http_headers', {})
|
||||||
if headers:
|
if headers:
|
||||||
|
@ -38,36 +43,70 @@ def dl_fragment(url, data=None, headers=None):
|
||||||
http_headers.update(headers)
|
http_headers.update(headers)
|
||||||
return self._download_fragment(ctx, url, info_dict, http_headers, data)
|
return self._download_fragment(ctx, url, info_dict, http_headers, data)
|
||||||
|
|
||||||
def download_and_parse_fragment(url, frag_index, request_data):
|
def parse_actions_replay(live_chat_continuation):
|
||||||
|
offset = continuation_id = None
|
||||||
|
processed_fragment = bytearray()
|
||||||
|
for action in live_chat_continuation.get('actions', []):
|
||||||
|
if 'replayChatItemAction' in action:
|
||||||
|
replay_chat_item_action = action['replayChatItemAction']
|
||||||
|
offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
|
||||||
|
processed_fragment.extend(
|
||||||
|
json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
|
||||||
|
if offset is not None:
|
||||||
|
continuation_id = try_get(
|
||||||
|
live_chat_continuation,
|
||||||
|
lambda x: x['continuations'][0]['liveChatReplayContinuationData']['continuation'])
|
||||||
|
self._append_fragment(ctx, processed_fragment)
|
||||||
|
return continuation_id, offset
|
||||||
|
|
||||||
|
live_offset = 0
|
||||||
|
|
||||||
|
def parse_actions_live(live_chat_continuation):
|
||||||
|
nonlocal live_offset
|
||||||
|
continuation_id = None
|
||||||
|
processed_fragment = bytearray()
|
||||||
|
for action in live_chat_continuation.get('actions', []):
|
||||||
|
timestamp = self.parse_live_timestamp(action)
|
||||||
|
if timestamp is not None:
|
||||||
|
live_offset = timestamp - start_time
|
||||||
|
# compatibility with replay format
|
||||||
|
pseudo_action = {
|
||||||
|
'replayChatItemAction': {'actions': [action]},
|
||||||
|
'videoOffsetTimeMsec': str(live_offset),
|
||||||
|
'isLive': True,
|
||||||
|
}
|
||||||
|
processed_fragment.extend(
|
||||||
|
json.dumps(pseudo_action, ensure_ascii=False).encode('utf-8') + b'\n')
|
||||||
|
continuation_data_getters = [
|
||||||
|
lambda x: x['continuations'][0]['invalidationContinuationData'],
|
||||||
|
lambda x: x['continuations'][0]['timedContinuationData'],
|
||||||
|
]
|
||||||
|
continuation_data = try_get(live_chat_continuation, continuation_data_getters, dict)
|
||||||
|
if continuation_data:
|
||||||
|
continuation_id = continuation_data.get('continuation')
|
||||||
|
timeout_ms = int_or_none(continuation_data.get('timeoutMs'))
|
||||||
|
if timeout_ms is not None:
|
||||||
|
time.sleep(timeout_ms / 1000)
|
||||||
|
self._append_fragment(ctx, processed_fragment)
|
||||||
|
return continuation_id, live_offset
|
||||||
|
|
||||||
|
if info_dict['protocol'] == 'youtube_live_chat_replay':
|
||||||
|
parse_actions = parse_actions_replay
|
||||||
|
elif info_dict['protocol'] == 'youtube_live_chat':
|
||||||
|
parse_actions = parse_actions_live
|
||||||
|
|
||||||
|
def download_and_parse_fragment(url, frag_index, request_data, headers):
|
||||||
count = 0
|
count = 0
|
||||||
while count <= fragment_retries:
|
while count <= fragment_retries:
|
||||||
try:
|
try:
|
||||||
success, raw_fragment = dl_fragment(url, request_data, {'content-type': 'application/json'})
|
success, raw_fragment = dl_fragment(url, request_data, headers)
|
||||||
if not success:
|
if not success:
|
||||||
return False, None, None
|
return False, None, None
|
||||||
try:
|
data = json.loads(raw_fragment)
|
||||||
data = ie._extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
|
|
||||||
except RegexNotFoundError:
|
|
||||||
data = None
|
|
||||||
if not data:
|
|
||||||
data = json.loads(raw_fragment)
|
|
||||||
live_chat_continuation = try_get(
|
live_chat_continuation = try_get(
|
||||||
data,
|
data,
|
||||||
lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
|
lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
|
||||||
offset = continuation_id = None
|
continuation_id, offset = parse_actions(live_chat_continuation)
|
||||||
processed_fragment = bytearray()
|
|
||||||
for action in live_chat_continuation.get('actions', []):
|
|
||||||
if 'replayChatItemAction' in action:
|
|
||||||
replay_chat_item_action = action['replayChatItemAction']
|
|
||||||
offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
|
|
||||||
processed_fragment.extend(
|
|
||||||
json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
|
|
||||||
if offset is not None:
|
|
||||||
continuation_id = try_get(
|
|
||||||
live_chat_continuation,
|
|
||||||
lambda x: x['continuations'][0]['liveChatReplayContinuationData']['continuation'])
|
|
||||||
self._append_fragment(ctx, processed_fragment)
|
|
||||||
|
|
||||||
return True, continuation_id, offset
|
return True, continuation_id, offset
|
||||||
except compat_urllib_error.HTTPError as err:
|
except compat_urllib_error.HTTPError as err:
|
||||||
count += 1
|
count += 1
|
||||||
|
@ -100,7 +139,11 @@ def download_and_parse_fragment(url, frag_index, request_data):
|
||||||
innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'])
|
innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'])
|
||||||
if not api_key or not innertube_context:
|
if not api_key or not innertube_context:
|
||||||
return False
|
return False
|
||||||
url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key
|
visitor_data = try_get(innertube_context, lambda x: x['client']['visitorData'], str)
|
||||||
|
if info_dict['protocol'] == 'youtube_live_chat_replay':
|
||||||
|
url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key
|
||||||
|
elif info_dict['protocol'] == 'youtube_live_chat':
|
||||||
|
url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat?key=' + api_key
|
||||||
|
|
||||||
frag_index = offset = 0
|
frag_index = offset = 0
|
||||||
while continuation_id is not None:
|
while continuation_id is not None:
|
||||||
|
@ -111,8 +154,11 @@ def download_and_parse_fragment(url, frag_index, request_data):
|
||||||
}
|
}
|
||||||
if frag_index > 1:
|
if frag_index > 1:
|
||||||
request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))}
|
request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))}
|
||||||
|
headers = ie._generate_api_headers(ytcfg, visitor_data=visitor_data)
|
||||||
|
headers.update({'content-type': 'application/json'})
|
||||||
|
fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n'
|
||||||
success, continuation_id, offset = download_and_parse_fragment(
|
success, continuation_id, offset = download_and_parse_fragment(
|
||||||
url, frag_index, json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n')
|
url, frag_index, fragment_request_data, headers)
|
||||||
if not success:
|
if not success:
|
||||||
return False
|
return False
|
||||||
if test:
|
if test:
|
||||||
|
@ -120,3 +166,39 @@ def download_and_parse_fragment(url, frag_index, request_data):
|
||||||
|
|
||||||
self._finish_frag_download(ctx)
|
self._finish_frag_download(ctx)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def parse_live_timestamp(action):
|
||||||
|
action_content = dict_get(
|
||||||
|
action,
|
||||||
|
['addChatItemAction', 'addLiveChatTickerItemAction', 'addBannerToLiveChatCommand'])
|
||||||
|
if not isinstance(action_content, dict):
|
||||||
|
return None
|
||||||
|
item = dict_get(action_content, ['item', 'bannerRenderer'])
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
return None
|
||||||
|
renderer = dict_get(item, [
|
||||||
|
# text
|
||||||
|
'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
|
||||||
|
'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
|
||||||
|
# ticker
|
||||||
|
'liveChatTickerPaidMessageItemRenderer',
|
||||||
|
'liveChatTickerSponsorItemRenderer',
|
||||||
|
# banner
|
||||||
|
'liveChatBannerRenderer',
|
||||||
|
])
|
||||||
|
if not isinstance(renderer, dict):
|
||||||
|
return None
|
||||||
|
parent_item_getters = [
|
||||||
|
lambda x: x['showItemEndpoint']['showLiveChatItemEndpoint']['renderer'],
|
||||||
|
lambda x: x['contents'],
|
||||||
|
]
|
||||||
|
parent_item = try_get(renderer, parent_item_getters, dict)
|
||||||
|
if parent_item:
|
||||||
|
renderer = dict_get(parent_item, [
|
||||||
|
'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
|
||||||
|
'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
|
||||||
|
])
|
||||||
|
if not isinstance(renderer, dict):
|
||||||
|
return None
|
||||||
|
return int_or_none(renderer.get('timestampUsec'), 1000)
|
||||||
|
|
|
@ -2339,18 +2339,17 @@ def process_language(container, base_url, lang_code, sub_name, query):
|
||||||
initial_data = self._call_api(
|
initial_data = self._call_api(
|
||||||
'next', {'videoId': video_id}, video_id, fatal=False, api_key=self._extract_api_key(ytcfg))
|
'next', {'videoId': video_id}, video_id, fatal=False, api_key=self._extract_api_key(ytcfg))
|
||||||
|
|
||||||
if not is_live:
|
try:
|
||||||
try:
|
# This will error if there is no livechat
|
||||||
# This will error if there is no livechat
|
initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
|
||||||
initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
|
info['subtitles']['live_chat'] = [{
|
||||||
info['subtitles']['live_chat'] = [{
|
'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
|
||||||
'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
|
'video_id': video_id,
|
||||||
'video_id': video_id,
|
'ext': 'json',
|
||||||
'ext': 'json',
|
'protocol': 'youtube_live_chat' if is_live else 'youtube_live_chat_replay',
|
||||||
'protocol': 'youtube_live_chat_replay',
|
}]
|
||||||
}]
|
except (KeyError, IndexError, TypeError):
|
||||||
except (KeyError, IndexError, TypeError):
|
pass
|
||||||
pass
|
|
||||||
|
|
||||||
if initial_data:
|
if initial_data:
|
||||||
chapters = self._extract_chapters_from_json(
|
chapters = self._extract_chapters_from_json(
|
||||||
|
|
Loading…
Reference in a new issue