From 81158863efe8e83d9134007933b6ab5a3224903b Mon Sep 17 00:00:00 2001 From: Sakura286 Date: Tue, 10 Sep 2024 23:12:55 +0800 Subject: [PATCH] beautify download log; fix 'timestamp' field --- README.md | 2 +- yt_dlp/extractor/mixch.py | 10 ++++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 51da6622c9..c87bec6571 100644 --- a/README.md +++ b/README.md @@ -1866,7 +1866,7 @@ #### digitalconcerthall #### mixchmovie -* `max_comments`: Maximum number of comments to extract - default is `120` +* `max_comments`: Maximum number of comments to extract - default is `120`. The final amount of comments is less than the limit because gifts are filtered. * `fetch_interval_sec`: Comment json files fetching interval. If `max_comments` is set too large, fetch interval need to be limit **Note**: These options may be changed/removed in the future without concern for backward compatibility diff --git a/yt_dlp/extractor/mixch.py b/yt_dlp/extractor/mixch.py index f1b485a2b1..a248e304dc 100644 --- a/yt_dlp/extractor/mixch.py +++ b/yt_dlp/extractor/mixch.py @@ -135,6 +135,7 @@ class MixchMovieIE(InfoExtractor): 'view_count': int, 'like_count': int, 'comment_count': int, + 'timestamp': int, 'uploader_url': 'https://mixch.tv/u/12299174', 'live_status': 'not_live', }, @@ -163,8 +164,8 @@ def _real_extract(self, url): 'view_count': ('ownerInfo', 'view', {int_or_none}), 'like_count': ('movie', 'favCount', {int_or_none}), 'comment_count': ('movie', 'commentCount', {int_or_none}), + 'timestamp': ('movie', 'published', {int_or_none}), }), - 'timestamp': ('movie', 'published', {int_or_none}), 'uploader_url': ('ownerInfo', 'id', {lambda x: x and f'https://mixch.tv/u/{x}'}), 'live_status': 'not_live', '__post_extractor': self.extract_comments(video_id), @@ -181,13 +182,18 @@ def _get_comments(self, video_id): base_url = f'https://mixch.tv/api-web/movies/{video_id}/comments' has_next = True next_cursor = '' + fragment = 1 while has_next and (comments_left > 0): data = self._download_json( - base_url, video_id, note='Downloading comments', errnote='Failed to download comments', + base_url, video_id, + note=f'Downloading comments, fragment {fragment}', errnote='Failed to download comments', query={'cursor': next_cursor, 'limit': COMMENTS_LIMIT}) + fragment += 1 comments_left -= COMMENTS_LIMIT + # Some of the 'comments' are not real comments but gifts. + # Only real comments are extracted here. yield from traverse_obj(data, ('comments', lambda _, v: v['comment'], { 'author': ('user_name', {str}), 'author_id': ('user_id', {int_or_none}),