[extractor/youtube] Use device-specific user agent (#4770)

Thwart latest fingerprinting attempt (see https://github.com/iv-org/invidious/issues/3230#issuecomment-1226887639)

Authored by: coletdjnz
This commit is contained in:
coletdjnz 2022-08-28 22:59:54 +00:00 committed by GitHub
parent e0992d5558
commit 50ac0e5416
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 29 additions and 15 deletions

View File

@ -110,8 +110,9 @@ INNERTUBE_CLIENTS = {
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'ANDROID', 'clientName': 'ANDROID',
'clientVersion': '17.29.34', 'clientVersion': '17.31.35',
'androidSdkVersion': 30 'androidSdkVersion': 30,
'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
} }
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 3, 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
@ -122,8 +123,9 @@ INNERTUBE_CLIENTS = {
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'ANDROID_EMBEDDED_PLAYER', 'clientName': 'ANDROID_EMBEDDED_PLAYER',
'clientVersion': '17.29.34', 'clientVersion': '17.31.35',
'androidSdkVersion': 30 'androidSdkVersion': 30,
'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
}, },
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 55, 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
@ -135,7 +137,8 @@ INNERTUBE_CLIENTS = {
'client': { 'client': {
'clientName': 'ANDROID_MUSIC', 'clientName': 'ANDROID_MUSIC',
'clientVersion': '5.16.51', 'clientVersion': '5.16.51',
'androidSdkVersion': 30 'androidSdkVersion': 30,
'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'
} }
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 21, 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
@ -146,8 +149,9 @@ INNERTUBE_CLIENTS = {
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'ANDROID_CREATOR', 'clientName': 'ANDROID_CREATOR',
'clientVersion': '22.28.100', 'clientVersion': '22.30.100',
'androidSdkVersion': 30 'androidSdkVersion': 30,
'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
}, },
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 14, 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
@ -162,6 +166,7 @@ INNERTUBE_CLIENTS = {
'clientName': 'IOS', 'clientName': 'IOS',
'clientVersion': '17.30.1', 'clientVersion': '17.30.1',
'deviceModel': 'iPhone14,3', 'deviceModel': 'iPhone14,3',
'userAgent': 'com.google.ios.youtube/17.30.1 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
} }
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 5, 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
@ -173,6 +178,7 @@ INNERTUBE_CLIENTS = {
'clientName': 'IOS_MESSAGES_EXTENSION', 'clientName': 'IOS_MESSAGES_EXTENSION',
'clientVersion': '17.30.1', 'clientVersion': '17.30.1',
'deviceModel': 'iPhone14,3', 'deviceModel': 'iPhone14,3',
'userAgent': 'com.google.ios.youtube/17.30.1 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
}, },
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 66, 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
@ -555,7 +561,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'Origin': origin, 'Origin': origin,
'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg), 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg), 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg) 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)
} }
if session_index is None: if session_index is None:
session_index = self._extract_session_index(ytcfg) session_index = self._extract_session_index(ytcfg)
@ -3071,7 +3078,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _is_unplayable(player_response): def _is_unplayable(player_response):
return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE' return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr): _STORY_PLAYER_PARAMS = '8AEB'
def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
session_index = self._extract_session_index(player_ytcfg, master_ytcfg) session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr) syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
@ -3081,8 +3090,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
yt_query = { yt_query = {
'videoId': video_id, 'videoId': video_id,
'params': '8AEB' # enable stories
} }
if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':
yt_query['params'] = self._STORY_PLAYER_PARAMS
yt_query.update(self._generate_player_context(sts)) yt_query.update(self._generate_player_context(sts))
return self._extract_response( return self._extract_response(
item_id=video_id, ep='player', query=yt_query, item_id=video_id, ep='player', query=yt_query,
@ -3115,7 +3126,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return orderedSet(requested_clients) return orderedSet(requested_clients)
def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg): def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
initial_pr = None initial_pr = None
if webpage: if webpage:
initial_pr = self._search_json( initial_pr = self._search_json(
@ -3165,7 +3176,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
try: try:
pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response( pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr) client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
except ExtractorError as e: except ExtractorError as e:
if last_error: if last_error:
self.report_warning(last_error) self.report_warning(last_error)
@ -3428,14 +3439,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _download_player_responses(self, url, smuggled_data, video_id, webpage_url): def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
webpage = None webpage = None
if 'webpage' not in self._configuration_arg('player_skip'): if 'webpage' not in self._configuration_arg('player_skip'):
query = {'bpctr': '9999999999', 'has_verified': '1'}
if smuggled_data.get('is_story'):
query['pp'] = self._STORY_PLAYER_PARAMS
webpage = self._download_webpage( webpage = self._download_webpage(
webpage_url + '&bpctr=9999999999&has_verified=1&pp=8AEB', video_id, fatal=False) webpage_url, video_id, fatal=False, query=query)
master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg() master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
player_responses, player_url = self._extract_player_responses( player_responses, player_url = self._extract_player_responses(
self._get_requested_clients(url, smuggled_data), self._get_requested_clients(url, smuggled_data),
video_id, webpage, master_ytcfg) video_id, webpage, master_ytcfg, smuggled_data)
return webpage, master_ytcfg, player_responses, player_url return webpage, master_ytcfg, player_responses, player_url
@ -6008,7 +6022,7 @@ class YoutubeStoriesIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
playlist_id = f'RLTD{self._match_id(url)}' playlist_id = f'RLTD{self._match_id(url)}'
return self.url_result( return self.url_result(
f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', smuggle_url(f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', {'is_story': True}),
ie=YoutubeTabIE, video_id=playlist_id) ie=YoutubeTabIE, video_id=playlist_id)