[youtube] Add age-gate bypass for unverified accounts (#600)

Adds `_creator` variants for each client

Authored by: zerodytrash, colethedj, pukkandan
This commit is contained in:
David 2021-08-01 23:13:46 +02:00 committed by GitHub
parent a46a815b05
commit e7e94f2a5c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 54 additions and 24 deletions

View file

@ -1357,7 +1357,7 @@ # EXTRACTOR ARGUMENTS
The following extractors use this feature: The following extractors use this feature:
* **youtube** * **youtube**
* `skip`: `hls` or `dash` (or both) to skip download of the respective manifests * `skip`: `hls` or `dash` (or both) to skip download of the respective manifests
* `player_client`: Clients to extract video data from - one or more of `web`, `android`, `ios`, `mweb`, `web_music`, `android_music`, `ios_music`, `web_embedded`, `android_embedded`, `ios_embedded`, `web_agegate`, `android_agegate`, `ios_agegate`, `mweb_agegate` or `all`. By default, `android,web` is used. If the URL is from `music.youtube.com`, `android,web,android_music,web_music` is used. If age-gate is detected, the `_agegate` variants are automatically added. * `player_client`: Clients to extract video data from. The main clients are `web`, `android`, `ios`, `mweb`. These also have `_music`, `_embedded`, `_agegate`, and `_creator` variants (Eg: `web_embedded`) (`mweb` has only `_agegate`). By default, `android,web` is used, but the agegate and creator variants are added as required for age-gated videos. Similarly the music variants are added for `music.youtube.com` urls. You can also use `all` to use all the clients
* `player_skip`: `configs` - skip any requests for client configs and use defaults * `player_skip`: `configs` - skip any requests for client configs and use defaults
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side). * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side).
* `max_comments`: maximum amount of comments to download (default all). * `max_comments`: maximum amount of comments to download (default all).

View file

@ -100,6 +100,16 @@ def parse_qs(url):
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 67, 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
}, },
'web_creator': {
'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'WEB_CREATOR',
'clientVersion': '1.20210621.00.00',
}
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
},
'android': { 'android': {
'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
@ -131,6 +141,15 @@ def parse_qs(url):
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 21, 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
}, },
'android_creator': {
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'ANDROID_CREATOR',
'clientVersion': '21.24.100',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 14
},
# ios has HLS live streams # ios has HLS live streams
# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
'ios': { 'ios': {
@ -164,6 +183,15 @@ def parse_qs(url):
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 26 'INNERTUBE_CONTEXT_CLIENT_NAME': 26
}, },
'ios_creator': {
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'IOS_CREATOR',
'clientVersion': '21.24.100',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 15
},
# mweb has 'ultralow' formats # mweb has 'ultralow' formats
# See: https://github.com/yt-dlp/yt-dlp/pull/557 # See: https://github.com/yt-dlp/yt-dlp/pull/557
'mweb': { 'mweb': {
@ -1036,17 +1064,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
} }
_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt') _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
_AGE_GATE_REASONS = (
'Sign in to confirm your age',
'This video may be inappropriate for some users.',
'Sorry, this content is age-restricted.',
'Please confirm your age.')
_AGE_GATE_STATUS_REASONS = (
'AGE_VERIFICATION_REQUIRED',
'AGE_CHECK_REQUIRED'
)
_GEO_BYPASS = False _GEO_BYPASS = False
IE_NAME = 'youtube' IE_NAME = 'youtube'
@ -2402,14 +2419,21 @@ def _generate_player_context(sts=None):
'racyCheckOk': True 'racyCheckOk': True
} }
def _is_agegated(self, player_response): @staticmethod
def _is_agegated(player_response):
if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
return True
reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[]) reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
for reason in reasons: AGE_GATE_REASONS = (
if reason in self._AGE_GATE_REASONS + self._AGE_GATE_STATUS_REASONS: 'confirm your age', 'age-restricted', 'inappropriate', # reason
return True 'age_verification_required', 'age_check_required', # status
if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')) is not None: )
return True return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
return False
@staticmethod
def _is_unplayable(player_response):
return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr): def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr):
@ -2446,7 +2470,7 @@ def _get_requested_clients(self, url, smuggled_data):
if smuggled_data.get('is_music_url') or self.is_music_url(url): if smuggled_data.get('is_music_url') or self.is_music_url(url):
requested_clients.extend( requested_clients.extend(
f'{client}_music' for client in requested_clients if not client.endswith('_music')) f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
return orderedSet(requested_clients) return orderedSet(requested_clients)
@ -2469,6 +2493,11 @@ def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, pl
original_clients = clients original_clients = clients
clients = clients[::-1] clients = clients[::-1]
def append_client(client_name):
if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
clients.append(client_name)
while clients: while clients:
client = clients.pop() client = clients.pop()
player_ytcfg = master_ytcfg if client == 'web' else {} player_ytcfg = master_ytcfg if client == 'web' else {}
@ -2482,10 +2511,11 @@ def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, pl
if pr: if pr:
yield pr yield pr
if self._is_agegated(pr): # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
client = f'{client}_agegate' if client.endswith('_agegate') and self._is_unplayable(pr) and self._generate_sapisidhash_header():
if client in INNERTUBE_CLIENTS and client not in original_clients: append_client(client.replace('_agegate', '_creator'))
clients.append(client) elif self._is_agegated(pr):
append_client(f'{client}_agegate')
# Android player_response does not have microFormats which are needed for # Android player_response does not have microFormats which are needed for
# extraction of some data. So we return the initial_pr with formats # extraction of some data. So we return the initial_pr with formats