[ie/tiktok:user] Fix extraction loop (#10035)

Closes #10033
Authored by: bashonly
This commit is contained in:
bashonly 2024-05-26 23:22:46 -05:00 committed by GitHub
parent ae2194e1dd
commit c53c2e40fd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -940,6 +940,7 @@ def _build_web_query(self, sec_uid, cursor):
def _entries(self, sec_uid, user_name): def _entries(self, sec_uid, user_name):
display_id = user_name or sec_uid display_id = user_name or sec_uid
seen_ids = set()
cursor = int(time.time() * 1E3) cursor = int(time.time() * 1E3)
for page in itertools.count(1): for page in itertools.count(1):
@ -949,6 +950,9 @@ def _entries(self, sec_uid, user_name):
for video in traverse_obj(response, ('itemList', lambda _, v: v['id'])): for video in traverse_obj(response, ('itemList', lambda _, v: v['id'])):
video_id = video['id'] video_id = video['id']
if video_id in seen_ids:
continue
seen_ids.add(video_id)
webpage_url = self._create_url(display_id, video_id) webpage_url = self._create_url(display_id, video_id)
yield self.url_result( yield self.url_result(
webpage_url, TikTokIE, webpage_url, TikTokIE,
@ -956,8 +960,8 @@ def _entries(self, sec_uid, user_name):
old_cursor = cursor old_cursor = cursor
cursor = traverse_obj( cursor = traverse_obj(
response, ('itemList', -1, 'createTime', {functools.partial(int_or_none, invscale=1E3)})) response, ('itemList', -1, 'createTime', {lambda x: int(x * 1E3)}))
if not cursor: if not cursor or old_cursor == cursor:
# User may not have posted within this ~1 week lookback, so manually adjust cursor # User may not have posted within this ~1 week lookback, so manually adjust cursor
cursor = old_cursor - 7 * 86_400_000 cursor = old_cursor - 7 * 86_400_000
# In case 'hasMorePrevious' is wrong, break if we have gone back before TikTok existed # In case 'hasMorePrevious' is wrong, break if we have gone back before TikTok existed