beautify download log; fix 'timestamp' field

This commit is contained in:
Sakura286 2024-09-10 23:12:55 +08:00
parent 53fd7584b7
commit 81158863ef
2 changed files with 9 additions and 3 deletions

View file

@ -1866,7 +1866,7 @@ #### digitalconcerthall
#### mixchmovie #### mixchmovie
* `max_comments`: Maximum number of comments to extract - default is `120` * `max_comments`: Maximum number of comments to extract - default is `120`. The final amount of comments is less than the limit because gifts are filtered.
* `fetch_interval_sec`: Comment json files fetching interval. If `max_comments` is set too large, fetch interval need to be limit * `fetch_interval_sec`: Comment json files fetching interval. If `max_comments` is set too large, fetch interval need to be limit
**Note**: These options may be changed/removed in the future without concern for backward compatibility **Note**: These options may be changed/removed in the future without concern for backward compatibility

View file

@ -135,6 +135,7 @@ class MixchMovieIE(InfoExtractor):
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'comment_count': int, 'comment_count': int,
'timestamp': int,
'uploader_url': 'https://mixch.tv/u/12299174', 'uploader_url': 'https://mixch.tv/u/12299174',
'live_status': 'not_live', 'live_status': 'not_live',
}, },
@ -163,8 +164,8 @@ def _real_extract(self, url):
'view_count': ('ownerInfo', 'view', {int_or_none}), 'view_count': ('ownerInfo', 'view', {int_or_none}),
'like_count': ('movie', 'favCount', {int_or_none}), 'like_count': ('movie', 'favCount', {int_or_none}),
'comment_count': ('movie', 'commentCount', {int_or_none}), 'comment_count': ('movie', 'commentCount', {int_or_none}),
}),
'timestamp': ('movie', 'published', {int_or_none}), 'timestamp': ('movie', 'published', {int_or_none}),
}),
'uploader_url': ('ownerInfo', 'id', {lambda x: x and f'https://mixch.tv/u/{x}'}), 'uploader_url': ('ownerInfo', 'id', {lambda x: x and f'https://mixch.tv/u/{x}'}),
'live_status': 'not_live', 'live_status': 'not_live',
'__post_extractor': self.extract_comments(video_id), '__post_extractor': self.extract_comments(video_id),
@ -181,13 +182,18 @@ def _get_comments(self, video_id):
base_url = f'https://mixch.tv/api-web/movies/{video_id}/comments' base_url = f'https://mixch.tv/api-web/movies/{video_id}/comments'
has_next = True has_next = True
next_cursor = '' next_cursor = ''
fragment = 1
while has_next and (comments_left > 0): while has_next and (comments_left > 0):
data = self._download_json( data = self._download_json(
base_url, video_id, note='Downloading comments', errnote='Failed to download comments', base_url, video_id,
note=f'Downloading comments, fragment {fragment}', errnote='Failed to download comments',
query={'cursor': next_cursor, 'limit': COMMENTS_LIMIT}) query={'cursor': next_cursor, 'limit': COMMENTS_LIMIT})
fragment += 1
comments_left -= COMMENTS_LIMIT comments_left -= COMMENTS_LIMIT
# Some of the 'comments' are not real comments but gifts.
# Only real comments are extracted here.
yield from traverse_obj(data, ('comments', lambda _, v: v['comment'], { yield from traverse_obj(data, ('comments', lambda _, v: v['comment'], {
'author': ('user_name', {str}), 'author': ('user_name', {str}),
'author_id': ('user_id', {int_or_none}), 'author_id': ('user_id', {int_or_none}),