[YouTube] Add new age-gate bypass (#3233)

Closes #3182
Authored by: zerodytrash, pukkandan
This commit is contained in:
David 2022-03-29 03:05:31 -07:00 committed by GitHub
parent 8a7f68d0b1
commit e7870111e8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 43 additions and 19 deletions

View file

@ -1657,7 +1657,7 @@ # EXTRACTOR ARGUMENTS
#### youtube
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and auto-translated subtitles respectively
* `player_client`: Clients to extract video data from. The main clients are `web`, `android`, `ios`, `mweb`. These also have `_music`, `_embedded`, `_agegate`, and `_creator` variants (Eg: `web_embedded`) (`mweb` has only `_agegate`). By default, `android,web` is used, but the agegate and creator variants are added as required for age-gated videos. Similarly the music variants are added for `music.youtube.com` urls. You can also use `all` to use all the clients, and `default` for the default clients.
* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (Eg: `web_embedded`); and `mweb` and `tv_embedded` (agegate bypass) with no variants. By default, `android,web` is used, but tv_embedded and creator variants are added as required for age-gated videos. Similarly the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients.
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
* `include_live_dash`: Include live dash formats even without `--live-from-start` (These formats don't download properly)
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)

View file

@ -217,15 +217,35 @@
}
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 2
}
},
# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
# See: https://github.com/zerodytrash/YouTube-Internal-Clients
'tv_embedded': {
'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
'clientVersion': '2.0',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 85
},
}
def _split_innertube_client(client_name):
variant, *base = client_name.rsplit('.', 1)
if base:
return variant, base[0], variant
base, *variant = client_name.split('_', 1)
return client_name, base, variant[0] if variant else None
def build_innertube_clients():
THIRD_PARTY = {
'embedUrl': 'https://google.com', # Can be any valid URL
'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
}
BASE_CLIENTS = ('android', 'web', 'ios', 'mweb')
BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
priority = qualities(BASE_CLIENTS[::-1])
for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
@ -234,15 +254,15 @@ def build_innertube_clients():
ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
base_client, *variant = client.split('_')
_, base_client, variant = _split_innertube_client(client)
ytcfg['priority'] = 10 * priority(base_client)
if not variant:
INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
agegate_ytcfg['priority'] -= 1
elif variant == ['embedded']:
INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
embedscreen['priority'] -= 3
elif variant == 'embedded':
ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
ytcfg['priority'] -= 2
else:
@ -2956,13 +2976,14 @@ def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
prs = []
def append_client(*client_names):
""" Append the first client name that exists """
""" Append the first client name that exists but not already used """
for client_name in client_names:
if client_name in INNERTUBE_CLIENTS:
if client_name not in all_clients:
actual_client = _split_innertube_client(client_name)[0]
if actual_client in INNERTUBE_CLIENTS:
if actual_client not in all_clients:
clients.append(client_name)
all_clients.add(client_name)
return
all_clients.add(actual_client)
return
# Android player_response does not have microFormats which are needed for
# extraction of some data. So we return the initial_pr with formats
@ -2977,7 +2998,7 @@ def append_client(*client_names):
tried_iframe_fallback = False
player_url = None
while clients:
client = clients.pop()
client, base_client, variant = _split_innertube_client(clients.pop())
player_ytcfg = master_ytcfg if client == 'web' else {}
if 'configs' not in self._configuration_arg('player_skip'):
player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
@ -3005,10 +3026,13 @@ def append_client(*client_names):
prs.append(pr)
# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:
append_client(client.replace('_agegate', '_creator'))
if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
append_client(f'{base_client}_creator')
elif self._is_agegated(pr):
append_client(f'{client}_embedded', f'{client.replace("_embedded", "")}_agegate')
if variant == 'tv_embedded':
append_client(f'{base_client}_embedded')
elif not variant:
append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
if last_error:
if not len(prs):