[ie/youtube] Remove broken OAuth support (#11558)

Closes #11462
Authored by: bashonly
This commit is contained in:
bashonly 2024-11-16 23:40:21 +00:00 committed by GitHub
parent 637d62a3a9
commit 52c0ffe40a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -22,7 +22,7 @@
from .common import InfoExtractor, SearchInfoExtractor from .common import InfoExtractor, SearchInfoExtractor
from .openload import PhantomJSwrapper from .openload import PhantomJSwrapper
from ..jsinterp import JSInterpreter from ..jsinterp import JSInterpreter
from ..networking.exceptions import HTTPError, TransportError, network_exceptions from ..networking.exceptions import HTTPError, network_exceptions
from ..utils import ( from ..utils import (
NO_DEFAULT, NO_DEFAULT,
ExtractorError, ExtractorError,
@ -50,12 +50,12 @@
parse_iso8601, parse_iso8601,
parse_qs, parse_qs,
qualities, qualities,
remove_end,
remove_start, remove_start,
smuggle_url, smuggle_url,
str_or_none, str_or_none,
str_to_int, str_to_int,
strftime_or_none, strftime_or_none,
time_seconds,
traverse_obj, traverse_obj,
try_call, try_call,
try_get, try_get,
@ -573,208 +573,18 @@ def _real_initialize(self):
self._check_login_required() self._check_login_required()
def _perform_login(self, username, password): def _perform_login(self, username, password):
auth_type, _, user = (username or '').partition('+') if username.startswith('oauth'):
if auth_type != 'oauth':
raise ExtractorError(self._youtube_login_hint, expected=True)
self._initialize_oauth(user, password)
'''
OAuth 2.0 Device Authorization Grant flow, used by the YouTube TV client (youtube.com/tv).
For more information regarding OAuth 2.0 and the Device Authorization Grant flow in general, see:
- https://developers.google.com/identity/protocols/oauth2/limited-input-device
- https://accounts.google.com/.well-known/openid-configuration
- https://www.rfc-editor.org/rfc/rfc8628
- https://www.rfc-editor.org/rfc/rfc6749
Note: The official client appears to use a proxied version of the oauth2 endpoints on youtube.com/o/oauth2,
which applies some modifications to the response (such as returning errors as 200 OK).
Since the client works with the standard API, we will use that as it is well-documented.
'''
_OAUTH_PROFILE = None
_OAUTH_ACCESS_TOKEN_CACHE = {}
_OAUTH_DISPLAY_ID = 'oauth'
# YouTube TV (TVHTML5) client. You can find these at youtube.com/tv
_OAUTH_CLIENT_ID = '861556708454-d6dlm3lh05idd8npek18k6be8ba3oc68.apps.googleusercontent.com'
_OAUTH_CLIENT_SECRET = 'SboVhoG9s0rNafixCSGGKXAT'
_OAUTH_SCOPE = 'http://gdata.youtube.com https://www.googleapis.com/auth/youtube-paid-content'
# From https://accounts.google.com/.well-known/openid-configuration
# Technically, these should be fetched dynamically and not hard-coded.
# However, as these endpoints rarely change, we can risk saving an extra request for every invocation.
_OAUTH_DEVICE_AUTHORIZATION_ENDPOINT = 'https://oauth2.googleapis.com/device/code'
_OAUTH_TOKEN_ENDPOINT = 'https://oauth2.googleapis.com/token'
@property
def _oauth_cache_key(self):
return f'oauth_refresh_token_{self._OAUTH_PROFILE}'
def _read_oauth_error_response(self, response):
return traverse_obj(
self._webpage_read_content(response, self._OAUTH_TOKEN_ENDPOINT, self._OAUTH_DISPLAY_ID, fatal=False),
({json.loads}, 'error', {str}))
def _set_oauth_info(self, token_response):
YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE.setdefault(self._OAUTH_PROFILE, {}).update({
'access_token': token_response['access_token'],
'token_type': token_response['token_type'],
'expiry': time_seconds(
seconds=traverse_obj(token_response, ('expires_in', {float_or_none}), default=300) - 10),
})
refresh_token = traverse_obj(token_response, ('refresh_token', {str}))
if refresh_token:
self.cache.store(self._NETRC_MACHINE, self._oauth_cache_key, refresh_token)
YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE[self._OAUTH_PROFILE]['refresh_token'] = refresh_token
def _initialize_oauth(self, user, refresh_token):
self._OAUTH_PROFILE = user or 'default'
if self._OAUTH_PROFILE in YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE:
self.write_debug(f'{self._OAUTH_DISPLAY_ID}: Using cached access token for profile "{self._OAUTH_PROFILE}"')
return
YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE[self._OAUTH_PROFILE] = {}
if refresh_token:
msg = f'{self._OAUTH_DISPLAY_ID}: Using password input as refresh token'
if self.get_param('cachedir') is not False:
msg += ' and caching token to disk; you should supply an empty password next time'
self.to_screen(msg)
self.cache.store(self._NETRC_MACHINE, self._oauth_cache_key, refresh_token)
else:
refresh_token = self.cache.load(self._NETRC_MACHINE, self._oauth_cache_key)
if refresh_token:
YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE[self._OAUTH_PROFILE]['refresh_token'] = refresh_token
try:
token_response = self._refresh_token(refresh_token)
except ExtractorError as e:
error_msg = str(e.orig_msg).replace('Failed to refresh access token: ', '')
self.report_warning(f'{self._OAUTH_DISPLAY_ID}: Failed to refresh access token: {error_msg}')
token_response = self._oauth_authorize
else:
token_response = self._oauth_authorize
self._set_oauth_info(token_response)
self.write_debug(f'{self._OAUTH_DISPLAY_ID}: Logged in using profile "{self._OAUTH_PROFILE}"')
def _refresh_token(self, refresh_token):
try:
token_response = self._download_json(
self._OAUTH_TOKEN_ENDPOINT,
video_id=self._OAUTH_DISPLAY_ID,
note='Refreshing access token',
data=json.dumps({
'client_id': self._OAUTH_CLIENT_ID,
'client_secret': self._OAUTH_CLIENT_SECRET,
'refresh_token': refresh_token,
'grant_type': 'refresh_token',
}).encode(),
headers={'Content-Type': 'application/json'})
except ExtractorError as e:
if isinstance(e.cause, HTTPError):
error = self._read_oauth_error_response(e.cause.response)
if error == 'invalid_grant':
# RFC6749 § 5.2
raise ExtractorError( raise ExtractorError(
'Failed to refresh access token: Refresh token is invalid, revoked, or expired (invalid_grant)', f'Login with OAuth is no longer supported. {self._youtube_login_hint}', expected=True)
expected=True, video_id=self._OAUTH_DISPLAY_ID)
raise ExtractorError(
f'Failed to refresh access token: Authorization server returned error {error}',
video_id=self._OAUTH_DISPLAY_ID)
raise
return token_response
@property self.report_warning(
def _oauth_authorize(self): f'Login with password is not supported for YouTube. {self._youtube_login_hint}')
code_response = self._download_json(
self._OAUTH_DEVICE_AUTHORIZATION_ENDPOINT,
video_id=self._OAUTH_DISPLAY_ID,
note='Initializing authorization flow',
data=json.dumps({
'client_id': self._OAUTH_CLIENT_ID,
'scope': self._OAUTH_SCOPE,
}).encode(),
headers={'Content-Type': 'application/json'})
verification_url = traverse_obj(code_response, ('verification_url', {str}))
user_code = traverse_obj(code_response, ('user_code', {str}))
if not verification_url or not user_code:
raise ExtractorError(
'Authorization server did not provide verification_url or user_code', video_id=self._OAUTH_DISPLAY_ID)
# note: The whitespace is intentional
self.to_screen(
f'{self._OAUTH_DISPLAY_ID}: To give yt-dlp access to your account, '
f'go to {verification_url} and enter code {user_code}')
# RFC8628 § 3.5: default poll interval is 5 seconds if not provided
poll_interval = traverse_obj(code_response, ('interval', {int}), default=5)
for retry in self.RetryManager():
while True:
try:
token_response = self._download_json(
self._OAUTH_TOKEN_ENDPOINT,
video_id=self._OAUTH_DISPLAY_ID,
note=False,
errnote='Failed to request access token',
data=json.dumps({
'client_id': self._OAUTH_CLIENT_ID,
'client_secret': self._OAUTH_CLIENT_SECRET,
'device_code': code_response['device_code'],
'grant_type': 'urn:ietf:params:oauth:grant-type:device_code',
}).encode(),
headers={'Content-Type': 'application/json'})
except ExtractorError as e:
if isinstance(e.cause, TransportError):
retry.error = e
break
elif isinstance(e.cause, HTTPError):
error = self._read_oauth_error_response(e.cause.response)
if not error:
retry.error = e
break
if error == 'authorization_pending':
time.sleep(poll_interval)
continue
elif error == 'expired_token':
raise ExtractorError(
'Authorization timed out', expected=True, video_id=self._OAUTH_DISPLAY_ID)
elif error == 'access_denied':
raise ExtractorError(
'You denied access to an account', expected=True, video_id=self._OAUTH_DISPLAY_ID)
elif error == 'slow_down':
# RFC8628 § 3.5: add 5 seconds to the poll interval
poll_interval += 5
time.sleep(poll_interval)
continue
else:
raise ExtractorError(
f'Authorization server returned an error when fetching access token: {error}',
video_id=self._OAUTH_DISPLAY_ID)
raise
return token_response
def _update_oauth(self):
token = YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE.get(self._OAUTH_PROFILE)
if token is None or token['expiry'] > time.time():
return
self._set_oauth_info(self._refresh_token(token['refresh_token']))
@property @property
def _youtube_login_hint(self): def _youtube_login_hint(self):
return ('Use --username=oauth[+PROFILE] --password="" to log in using oauth, ' return (f'{self._login_hint(method="cookies")}. Also see '
f'or else u{self._login_hint(method="cookies")[1:]}. ' 'https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies '
'See https://github.com/yt-dlp/yt-dlp/wiki/Extractors#logging-in-with-oauth for more on how to use oauth. ' 'for tips on effectively exporting YouTube cookies')
'See https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies for help with cookies')
def _check_login_required(self): def _check_login_required(self):
if self._LOGIN_REQUIRED and not self.is_authenticated: if self._LOGIN_REQUIRED and not self.is_authenticated:
@ -924,7 +734,7 @@ def _extract_visitor_data(self, *args):
@functools.cached_property @functools.cached_property
def is_authenticated(self): def is_authenticated(self):
return self._OAUTH_PROFILE or bool(self._generate_sapisidhash_header()) return bool(self._generate_sapisidhash_header())
def extract_ytcfg(self, video_id, webpage): def extract_ytcfg(self, video_id, webpage):
if not webpage: if not webpage:
@ -934,16 +744,6 @@ def extract_ytcfg(self, video_id, webpage):
r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg', r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
default='{}'), video_id, fatal=False) or {} default='{}'), video_id, fatal=False) or {}
def _generate_oauth_headers(self):
self._update_oauth()
oauth_token = YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE.get(self._OAUTH_PROFILE)
if not oauth_token:
return {}
return {
'Authorization': f'{oauth_token["token_type"]} {oauth_token["access_token"]}',
}
def _generate_cookie_auth_headers(self, *, ytcfg=None, account_syncid=None, session_index=None, origin=None, **kwargs): def _generate_cookie_auth_headers(self, *, ytcfg=None, account_syncid=None, session_index=None, origin=None, **kwargs):
headers = {} headers = {}
account_syncid = account_syncid or self._extract_account_syncid(ytcfg) account_syncid = account_syncid or self._extract_account_syncid(ytcfg)
@ -973,14 +773,10 @@ def generate_api_headers(
'Origin': origin, 'Origin': origin,
'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg), 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client), 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client),
**self._generate_oauth_headers(),
**self._generate_cookie_auth_headers(ytcfg=ytcfg, account_syncid=account_syncid, session_index=session_index, origin=origin), **self._generate_cookie_auth_headers(ytcfg=ytcfg, account_syncid=account_syncid, session_index=session_index, origin=origin),
} }
return filter_dict(headers) return filter_dict(headers)
def _generate_webpage_headers(self):
return self._generate_oauth_headers()
def _download_ytcfg(self, client, video_id): def _download_ytcfg(self, client, video_id):
url = { url = {
'web': 'https://www.youtube.com', 'web': 'https://www.youtube.com',
@ -990,8 +786,7 @@ def _download_ytcfg(self, client, video_id):
if not url: if not url:
return {} return {}
webpage = self._download_webpage( webpage = self._download_webpage(
url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config', url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
headers=self._generate_webpage_headers())
return self.extract_ytcfg(video_id, webpage) or {} return self.extract_ytcfg(video_id, webpage) or {}
@staticmethod @staticmethod
@ -3256,8 +3051,7 @@ def _load_player(self, video_id, player_url, fatal=True):
code = self._download_webpage( code = self._download_webpage(
player_url, video_id, fatal=fatal, player_url, video_id, fatal=fatal,
note='Downloading player ' + player_id, note='Downloading player ' + player_id,
errnote=f'Download of {player_url} failed', errnote=f'Download of {player_url} failed')
headers=self._generate_webpage_headers())
if code: if code:
self._code_cache[player_id] = code self._code_cache[player_id] = code
return self._code_cache.get(player_id) return self._code_cache.get(player_id)
@ -3540,8 +3334,7 @@ def _mark_watched(self, video_id, player_responses):
self._download_webpage( self._download_webpage(
url, video_id, f'Marking {label}watched', url, video_id, f'Marking {label}watched',
'Unable to mark watched', fatal=False, 'Unable to mark watched', fatal=False)
headers=self._generate_webpage_headers())
@classmethod @classmethod
def _extract_from_webpage(cls, url, webpage): def _extract_from_webpage(cls, url, webpage):
@ -4523,7 +4316,7 @@ def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
if pp: if pp:
query['pp'] = pp query['pp'] = pp
webpage = self._download_webpage( webpage = self._download_webpage(
webpage_url, video_id, fatal=False, query=query, headers=self._generate_webpage_headers()) webpage_url, video_id, fatal=False, query=query)
master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg() master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
@ -4666,6 +4459,9 @@ def feed_entry(name):
self.raise_geo_restricted(subreason, countries, metadata_available=True) self.raise_geo_restricted(subreason, countries, metadata_available=True)
reason += f'. {subreason}' reason += f'. {subreason}'
if reason: if reason:
if 'sign in' in reason.lower():
reason = remove_end(reason, 'This helps protect our community. Learn more')
reason = f'{remove_end(reason.strip(), ".")}. {self._youtube_login_hint}'
self.raise_no_formats(reason, expected=True) self.raise_no_formats(reason, expected=True)
keywords = get_first(video_details, 'keywords', expected_type=list) or [] keywords = get_first(video_details, 'keywords', expected_type=list) or []
@ -5811,7 +5607,7 @@ def _extract_webpage(self, url, item_id, fatal=True):
webpage, data = None, None webpage, data = None, None
for retry in self.RetryManager(fatal=fatal): for retry in self.RetryManager(fatal=fatal):
try: try:
webpage = self._download_webpage(url, item_id, note='Downloading webpage', headers=self._generate_webpage_headers()) webpage = self._download_webpage(url, item_id, note='Downloading webpage')
data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {} data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, network_exceptions): if isinstance(e.cause, network_exceptions):