From c60ee3a218c0ec6a49674c5c786b893e0fd62e0a Mon Sep 17 00:00:00 2001 From: siikamiika Date: Wed, 23 Jun 2021 03:12:39 +0300 Subject: [PATCH] [youtube_live_chat] Support ongoing live chat (#422) Authored by: siikamiika --- yt_dlp/downloader/__init__.py | 5 +- yt_dlp/downloader/youtube_live_chat.py | 136 ++++++++++++++++++++----- yt_dlp/extractor/youtube.py | 23 ++--- 3 files changed, 123 insertions(+), 41 deletions(-) diff --git a/yt_dlp/downloader/__init__.py b/yt_dlp/downloader/__init__.py index e469b512d..6769cf8e6 100644 --- a/yt_dlp/downloader/__init__.py +++ b/yt_dlp/downloader/__init__.py @@ -25,7 +25,7 @@ def _get_real_downloader(info_dict, protocol=None, *args, **kwargs): from .mhtml import MhtmlFD from .niconico import NiconicoDmcFD from .websocket import WebSocketFragmentFD -from .youtube_live_chat import YoutubeLiveChatReplayFD +from .youtube_live_chat import YoutubeLiveChatFD from .external import ( get_external_downloader, FFmpegFD, @@ -44,7 +44,8 @@ def _get_real_downloader(info_dict, protocol=None, *args, **kwargs): 'mhtml': MhtmlFD, 'niconico_dmc': NiconicoDmcFD, 'websocket_frag': WebSocketFragmentFD, - 'youtube_live_chat_replay': YoutubeLiveChatReplayFD, + 'youtube_live_chat': YoutubeLiveChatFD, + 'youtube_live_chat_replay': YoutubeLiveChatFD, } diff --git a/yt_dlp/downloader/youtube_live_chat.py b/yt_dlp/downloader/youtube_live_chat.py index a6c13335e..f30dcb6bf 100644 --- a/yt_dlp/downloader/youtube_live_chat.py +++ b/yt_dlp/downloader/youtube_live_chat.py @@ -1,20 +1,23 @@ from __future__ import division, unicode_literals import json +import time from .fragment import FragmentFD from ..compat import compat_urllib_error from ..utils import ( try_get, + dict_get, + int_or_none, RegexNotFoundError, ) from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE -class YoutubeLiveChatReplayFD(FragmentFD): - """ Downloads YouTube live chat replays fragment by fragment """ +class YoutubeLiveChatFD(FragmentFD): + """ Downloads YouTube live chats fragment by fragment """ - FD_NAME = 'youtube_live_chat_replay' + FD_NAME = 'youtube_live_chat' def real_download(self, filename, info_dict): video_id = info_dict['video_id'] @@ -31,6 +34,8 @@ def real_download(self, filename, info_dict): ie = YT_BaseIE(self.ydl) + start_time = int(time.time() * 1000) + def dl_fragment(url, data=None, headers=None): http_headers = info_dict.get('http_headers', {}) if headers: @@ -38,36 +43,70 @@ def dl_fragment(url, data=None, headers=None): http_headers.update(headers) return self._download_fragment(ctx, url, info_dict, http_headers, data) - def download_and_parse_fragment(url, frag_index, request_data): + def parse_actions_replay(live_chat_continuation): + offset = continuation_id = None + processed_fragment = bytearray() + for action in live_chat_continuation.get('actions', []): + if 'replayChatItemAction' in action: + replay_chat_item_action = action['replayChatItemAction'] + offset = int(replay_chat_item_action['videoOffsetTimeMsec']) + processed_fragment.extend( + json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n') + if offset is not None: + continuation_id = try_get( + live_chat_continuation, + lambda x: x['continuations'][0]['liveChatReplayContinuationData']['continuation']) + self._append_fragment(ctx, processed_fragment) + return continuation_id, offset + + live_offset = 0 + + def parse_actions_live(live_chat_continuation): + nonlocal live_offset + continuation_id = None + processed_fragment = bytearray() + for action in live_chat_continuation.get('actions', []): + timestamp = self.parse_live_timestamp(action) + if timestamp is not None: + live_offset = timestamp - start_time + # compatibility with replay format + pseudo_action = { + 'replayChatItemAction': {'actions': [action]}, + 'videoOffsetTimeMsec': str(live_offset), + 'isLive': True, + } + processed_fragment.extend( + json.dumps(pseudo_action, ensure_ascii=False).encode('utf-8') + b'\n') + continuation_data_getters = [ + lambda x: x['continuations'][0]['invalidationContinuationData'], + lambda x: x['continuations'][0]['timedContinuationData'], + ] + continuation_data = try_get(live_chat_continuation, continuation_data_getters, dict) + if continuation_data: + continuation_id = continuation_data.get('continuation') + timeout_ms = int_or_none(continuation_data.get('timeoutMs')) + if timeout_ms is not None: + time.sleep(timeout_ms / 1000) + self._append_fragment(ctx, processed_fragment) + return continuation_id, live_offset + + if info_dict['protocol'] == 'youtube_live_chat_replay': + parse_actions = parse_actions_replay + elif info_dict['protocol'] == 'youtube_live_chat': + parse_actions = parse_actions_live + + def download_and_parse_fragment(url, frag_index, request_data, headers): count = 0 while count <= fragment_retries: try: - success, raw_fragment = dl_fragment(url, request_data, {'content-type': 'application/json'}) + success, raw_fragment = dl_fragment(url, request_data, headers) if not success: return False, None, None - try: - data = ie._extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace')) - except RegexNotFoundError: - data = None - if not data: - data = json.loads(raw_fragment) + data = json.loads(raw_fragment) live_chat_continuation = try_get( data, lambda x: x['continuationContents']['liveChatContinuation'], dict) or {} - offset = continuation_id = None - processed_fragment = bytearray() - for action in live_chat_continuation.get('actions', []): - if 'replayChatItemAction' in action: - replay_chat_item_action = action['replayChatItemAction'] - offset = int(replay_chat_item_action['videoOffsetTimeMsec']) - processed_fragment.extend( - json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n') - if offset is not None: - continuation_id = try_get( - live_chat_continuation, - lambda x: x['continuations'][0]['liveChatReplayContinuationData']['continuation']) - self._append_fragment(ctx, processed_fragment) - + continuation_id, offset = parse_actions(live_chat_continuation) return True, continuation_id, offset except compat_urllib_error.HTTPError as err: count += 1 @@ -100,7 +139,11 @@ def download_and_parse_fragment(url, frag_index, request_data): innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT']) if not api_key or not innertube_context: return False - url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key + visitor_data = try_get(innertube_context, lambda x: x['client']['visitorData'], str) + if info_dict['protocol'] == 'youtube_live_chat_replay': + url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key + elif info_dict['protocol'] == 'youtube_live_chat': + url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat?key=' + api_key frag_index = offset = 0 while continuation_id is not None: @@ -111,8 +154,11 @@ def download_and_parse_fragment(url, frag_index, request_data): } if frag_index > 1: request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))} + headers = ie._generate_api_headers(ytcfg, visitor_data=visitor_data) + headers.update({'content-type': 'application/json'}) + fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n' success, continuation_id, offset = download_and_parse_fragment( - url, frag_index, json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n') + url, frag_index, fragment_request_data, headers) if not success: return False if test: @@ -120,3 +166,39 @@ def download_and_parse_fragment(url, frag_index, request_data): self._finish_frag_download(ctx) return True + + @staticmethod + def parse_live_timestamp(action): + action_content = dict_get( + action, + ['addChatItemAction', 'addLiveChatTickerItemAction', 'addBannerToLiveChatCommand']) + if not isinstance(action_content, dict): + return None + item = dict_get(action_content, ['item', 'bannerRenderer']) + if not isinstance(item, dict): + return None + renderer = dict_get(item, [ + # text + 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer', + 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer', + # ticker + 'liveChatTickerPaidMessageItemRenderer', + 'liveChatTickerSponsorItemRenderer', + # banner + 'liveChatBannerRenderer', + ]) + if not isinstance(renderer, dict): + return None + parent_item_getters = [ + lambda x: x['showItemEndpoint']['showLiveChatItemEndpoint']['renderer'], + lambda x: x['contents'], + ] + parent_item = try_get(renderer, parent_item_getters, dict) + if parent_item: + renderer = dict_get(parent_item, [ + 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer', + 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer', + ]) + if not isinstance(renderer, dict): + return None + return int_or_none(renderer.get('timestampUsec'), 1000) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 375fc1909..ad2cdb052 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2339,18 +2339,17 @@ def process_language(container, base_url, lang_code, sub_name, query): initial_data = self._call_api( 'next', {'videoId': video_id}, video_id, fatal=False, api_key=self._extract_api_key(ytcfg)) - if not is_live: - try: - # This will error if there is no livechat - initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'] - info['subtitles']['live_chat'] = [{ - 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies - 'video_id': video_id, - 'ext': 'json', - 'protocol': 'youtube_live_chat_replay', - }] - except (KeyError, IndexError, TypeError): - pass + try: + # This will error if there is no livechat + initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'] + info['subtitles']['live_chat'] = [{ + 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies + 'video_id': video_id, + 'ext': 'json', + 'protocol': 'youtube_live_chat' if is_live else 'youtube_live_chat_replay', + }] + except (KeyError, IndexError, TypeError): + pass if initial_data: chapters = self._extract_chapters_from_json(