[ie/tiktok] Fix API extraction (#9548)

Closes #9506
Authored by: bashonly, Grub4K

Co-authored-by: Simon Sawicki <contact@grub4k.xyz>
This commit is contained in:
bashonly 2024-03-29 18:20:14 -05:00 committed by GitHub
parent 9c42b7eef5
commit cb61e20c26
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 114 additions and 60 deletions

View file

@ -1805,9 +1805,12 @@ #### niconicochannelplus
* `max_comments`: Maximum number of comments to extract - default is `120` * `max_comments`: Maximum number of comments to extract - default is `120`
#### tiktok #### tiktok
* `api_hostname`: Hostname to use for mobile API requests, e.g. `api-h2.tiktokv.com` * `api_hostname`: Hostname to use for mobile API calls, e.g. `api22-normal-c-alisg.tiktokv.com`
* `app_version`: App version to call mobile APIs with - should be set along with `manifest_app_version`, e.g. `20.2.1` * `app_name`: Default app name to use with mobile API calls, e.g. `trill`
* `manifest_app_version`: Numeric app version to call mobile APIs with, e.g. `221` * `app_version`: Default app version to use with mobile API calls - should be set along with `manifest_app_version`, e.g. `34.1.2`
* `manifest_app_version`: Default numeric app version to use with mobile API calls, e.g. `2023401020`
* `aid`: Default app ID to use with API calls, e.g. `1180`
* `app_info`: One or more app info strings in the format of `<iid>/[app_name]/[app_version]/[manifest_app_version]/[aid]`, where `iid` is the unique app install ID. `iid` is the only required value; all other values and their `/` separators can be omitted, e.g. `tiktok:app_info=1234567890123456789` or `tiktok:app_info=123,456/trill///1180,789//34.0.1/340001`
#### rokfinchannel #### rokfinchannel
* `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks` * `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks`

View file

@ -4,6 +4,7 @@
import re import re
import string import string
import time import time
import uuid
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlparse from ..compat import compat_urllib_parse_urlparse
@ -30,19 +31,65 @@
class TikTokBaseIE(InfoExtractor): class TikTokBaseIE(InfoExtractor):
_APP_VERSIONS = [('26.1.3', '260103'), ('26.1.2', '260102'), ('26.1.1', '260101'), ('25.6.2', '250602')]
_WORKING_APP_VERSION = None
_APP_NAME = 'trill'
_AID = 1180
_UPLOADER_URL_FORMAT = 'https://www.tiktok.com/@%s' _UPLOADER_URL_FORMAT = 'https://www.tiktok.com/@%s'
_WEBPAGE_HOST = 'https://www.tiktok.com/' _WEBPAGE_HOST = 'https://www.tiktok.com/'
QUALITIES = ('360p', '540p', '720p', '1080p') QUALITIES = ('360p', '540p', '720p', '1080p')
_APP_INFO_DEFAULTS = {
# unique "install id"
'iid': None,
# TikTok (KR/PH/TW/TH/VN) = trill, TikTok (rest of world) = musical_ly, Douyin = aweme
'app_name': 'musical_ly',
'app_version': '34.1.2',
'manifest_app_version': '2023401020',
# "app id": aweme = 1128, trill = 1180, musical_ly = 1233, universal = 0
'aid': '0',
}
_KNOWN_APP_INFO = [
'7351144126450059040',
'7351149742343391009',
'7351153174894626592',
]
_APP_INFO_POOL = None
_APP_INFO = None
_APP_USER_AGENT = None
@property @property
def _API_HOSTNAME(self): def _API_HOSTNAME(self):
return self._configuration_arg( return self._configuration_arg(
'api_hostname', ['api22-normal-c-useast2a.tiktokv.com'], ie_key=TikTokIE)[0] 'api_hostname', ['api22-normal-c-useast2a.tiktokv.com'], ie_key=TikTokIE)[0]
def _get_next_app_info(self):
if self._APP_INFO_POOL is None:
defaults = {
key: self._configuration_arg(key, [default], ie_key=TikTokIE)[0]
for key, default in self._APP_INFO_DEFAULTS.items()
if key != 'iid'
}
app_info_list = (
self._configuration_arg('app_info', ie_key=TikTokIE)
or random.sample(self._KNOWN_APP_INFO, len(self._KNOWN_APP_INFO)))
self._APP_INFO_POOL = [
{**defaults, **dict(
(k, v) for k, v in zip(self._APP_INFO_DEFAULTS, app_info.split('/')) if v
)} for app_info in app_info_list
]
if not self._APP_INFO_POOL:
return False
self._APP_INFO = self._APP_INFO_POOL.pop(0)
app_name = self._APP_INFO['app_name']
version = self._APP_INFO['manifest_app_version']
if app_name == 'musical_ly':
package = f'com.zhiliaoapp.musically/{version}'
else: # trill, aweme
package = f'com.ss.android.ugc.{app_name}/{version}'
self._APP_USER_AGENT = f'{package} (Linux; U; Android 13; en_US; Pixel 7; Build/TD1A.220804.031; Cronet/58.0.2991.0)'
return True
@staticmethod @staticmethod
def _create_url(user_id, video_id): def _create_url(user_id, video_id):
return f'https://www.tiktok.com/@{user_id or "_"}/video/{video_id}' return f'https://www.tiktok.com/@{user_id or "_"}/video/{video_id}'
@ -58,7 +105,7 @@ def _get_universal_data(self, webpage, display_id):
'universal data', display_id, end_pattern=r'</script>', default={}), 'universal data', display_id, end_pattern=r'</script>', default={}),
('__DEFAULT_SCOPE__', {dict})) or {} ('__DEFAULT_SCOPE__', {dict})) or {}
def _call_api_impl(self, ep, query, manifest_app_version, video_id, fatal=True, def _call_api_impl(self, ep, query, video_id, fatal=True,
note='Downloading API JSON', errnote='Unable to download API page'): note='Downloading API JSON', errnote='Unable to download API page'):
self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choices('0123456789abcdef', k=160))) self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choices('0123456789abcdef', k=160)))
webpage_cookies = self._get_cookies(self._WEBPAGE_HOST) webpage_cookies = self._get_cookies(self._WEBPAGE_HOST)
@ -67,80 +114,84 @@ def _call_api_impl(self, ep, query, manifest_app_version, video_id, fatal=True,
return self._download_json( return self._download_json(
'https://%s/aweme/v1/%s/' % (self._API_HOSTNAME, ep), video_id=video_id, 'https://%s/aweme/v1/%s/' % (self._API_HOSTNAME, ep), video_id=video_id,
fatal=fatal, note=note, errnote=errnote, headers={ fatal=fatal, note=note, errnote=errnote, headers={
'User-Agent': f'com.ss.android.ugc.{self._APP_NAME}/{manifest_app_version} (Linux; U; Android 13; en_US; Pixel 7; Build/TD1A.220804.031; Cronet/58.0.2991.0)', 'User-Agent': self._APP_USER_AGENT,
'Accept': 'application/json', 'Accept': 'application/json',
}, query=query) }, query=query)
def _build_api_query(self, query, app_version, manifest_app_version): def _build_api_query(self, query):
return { return {
**query, **query,
'version_name': app_version,
'version_code': manifest_app_version,
'build_number': app_version,
'manifest_version_code': manifest_app_version,
'update_version_code': manifest_app_version,
'openudid': ''.join(random.choices('0123456789abcdef', k=16)),
'uuid': ''.join(random.choices(string.digits, k=16)),
'_rticket': int(time.time() * 1000),
'ts': int(time.time()),
'device_brand': 'Google',
'device_type': 'Pixel 7',
'device_platform': 'android', 'device_platform': 'android',
'os': 'android',
'ssmix': 'a',
'_rticket': int(time.time() * 1000),
'cdid': str(uuid.uuid4()),
'channel': 'googleplay',
'aid': self._APP_INFO['aid'],
'app_name': self._APP_INFO['app_name'],
'version_code': ''.join((f'{int(v):02d}' for v in self._APP_INFO['app_version'].split('.'))),
'version_name': self._APP_INFO['app_version'],
'manifest_version_code': self._APP_INFO['manifest_app_version'],
'update_version_code': self._APP_INFO['manifest_app_version'],
'ab_version': self._APP_INFO['app_version'],
'resolution': '1080*2400', 'resolution': '1080*2400',
'dpi': 420, 'dpi': 420,
'os_version': '13', 'device_type': 'Pixel 7',
'os_api': '29', 'device_brand': 'Google',
'carrier_region': 'US',
'sys_region': 'US',
'region': 'US',
'app_name': self._APP_NAME,
'app_language': 'en',
'language': 'en', 'language': 'en',
'timezone_name': 'America/New_York', 'os_api': '29',
'timezone_offset': '-14400', 'os_version': '13',
'channel': 'googleplay',
'ac': 'wifi', 'ac': 'wifi',
'mcc_mnc': '310260', 'is_pad': '0',
'is_my_cn': 0, 'current_region': 'US',
'aid': self._AID, 'app_type': 'normal',
'ssmix': 'a', 'sys_region': 'US',
'as': 'a1qwert123', 'last_install_time': int(time.time()) - random.randint(86400, 1123200),
'cp': 'cbfhckdckkde1', 'timezone_name': 'America/New_York',
'residence': 'US',
'app_language': 'en',
'timezone_offset': '-14400',
'host_abi': 'armeabi-v7a',
'locale': 'en',
'ac2': 'wifi5g',
'uoo': '1',
'op_region': 'US',
'build_number': self._APP_INFO['app_version'],
'region': 'US',
'ts': int(time.time()),
'iid': self._APP_INFO['iid'],
'device_id': random.randint(7250000000000000000, 7351147085025500000),
'openudid': ''.join(random.choices('0123456789abcdef', k=16)),
} }
def _call_api(self, ep, query, video_id, fatal=True, def _call_api(self, ep, query, video_id, fatal=True,
note='Downloading API JSON', errnote='Unable to download API page'): note='Downloading API JSON', errnote='Unable to download API page'):
if not self._WORKING_APP_VERSION: if not self._APP_INFO and not self._get_next_app_info():
app_version = self._configuration_arg('app_version', [''], ie_key=TikTokIE.ie_key())[0] message = 'No working app info is available'
manifest_app_version = self._configuration_arg('manifest_app_version', [''], ie_key=TikTokIE.ie_key())[0] if fatal:
if app_version and manifest_app_version: raise ExtractorError(message, expected=True)
self._WORKING_APP_VERSION = (app_version, manifest_app_version) else:
self.write_debug('Imported app version combo from extractor arguments') self.report_warning(message)
elif app_version or manifest_app_version: return
self.report_warning('Only one of the two required version params are passed as extractor arguments', only_once=True)
if self._WORKING_APP_VERSION: max_tries = len(self._APP_INFO_POOL) + 1 # _APP_INFO_POOL + _APP_INFO
app_version, manifest_app_version = self._WORKING_APP_VERSION for count in itertools.count(1):
real_query = self._build_api_query(query, app_version, manifest_app_version) self.write_debug(str(self._APP_INFO))
return self._call_api_impl(ep, real_query, manifest_app_version, video_id, fatal, note, errnote) real_query = self._build_api_query(query)
for count, (app_version, manifest_app_version) in enumerate(self._APP_VERSIONS, start=1):
real_query = self._build_api_query(query, app_version, manifest_app_version)
try: try:
res = self._call_api_impl(ep, real_query, manifest_app_version, video_id, fatal, note, errnote) return self._call_api_impl(ep, real_query, video_id, fatal, note, errnote)
self._WORKING_APP_VERSION = (app_version, manifest_app_version)
return res
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0: if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:
if count == len(self._APP_VERSIONS): message = str(e.cause or e.msg)
if not self._get_next_app_info():
if fatal: if fatal:
raise e raise
else: else:
self.report_warning(str(e.cause or e.msg)) self.report_warning(message)
return return
self.report_warning('%s. Retrying... (attempt %s of %s)' % (str(e.cause or e.msg), count, len(self._APP_VERSIONS))) self.report_warning(f'{message}. Retrying... (attempt {count} of {max_tries})')
continue continue
raise e raise
def _extract_aweme_app(self, aweme_id): def _extract_aweme_app(self, aweme_id):
feed_list = self._call_api( feed_list = self._call_api(