From f849d77ab54788446b995d256e1ee0894c4fb927 Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Sat, 16 Mar 2024 16:57:21 +1300 Subject: [PATCH 1/4] [test] Workaround websocket server hanging (#9467) Authored by: coletdjnz --- test/test_websockets.py | 53 +++++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 20 deletions(-) diff --git a/test/test_websockets.py b/test/test_websockets.py index 13b3a1e76..b294b0932 100644 --- a/test/test_websockets.py +++ b/test/test_websockets.py @@ -32,8 +32,6 @@ from yt_dlp.networking.exceptions import ( ) from yt_dlp.utils.networking import HTTPHeaderDict -from test.conftest import validate_and_send - TEST_DIR = os.path.dirname(os.path.abspath(__file__)) @@ -66,7 +64,9 @@ def process_request(self, request): def create_websocket_server(**ws_kwargs): import websockets.sync.server - wsd = websockets.sync.server.serve(websocket_handler, '127.0.0.1', 0, process_request=process_request, **ws_kwargs) + wsd = websockets.sync.server.serve( + websocket_handler, '127.0.0.1', 0, + process_request=process_request, open_timeout=2, **ws_kwargs) ws_port = wsd.socket.getsockname()[1] ws_server_thread = threading.Thread(target=wsd.serve_forever) ws_server_thread.daemon = True @@ -100,6 +100,19 @@ def create_mtls_wss_websocket_server(): return create_websocket_server(ssl_context=sslctx) +def ws_validate_and_send(rh, req): + rh.validate(req) + max_tries = 3 + for i in range(max_tries): + try: + return rh.send(req) + except TransportError as e: + if i < (max_tries - 1) and 'connection closed during handshake' in str(e): + # websockets server sometimes hangs on new connections + continue + raise + + @pytest.mark.skipif(not websockets, reason='websockets must be installed to test websocket request handlers') class TestWebsSocketRequestHandlerConformance: @classmethod @@ -119,7 +132,7 @@ class TestWebsSocketRequestHandlerConformance: @pytest.mark.parametrize('handler', ['Websockets'], indirect=True) def test_basic_websockets(self, handler): with handler() as rh: - ws = validate_and_send(rh, Request(self.ws_base_url)) + ws = ws_validate_and_send(rh, Request(self.ws_base_url)) assert 'upgrade' in ws.headers assert ws.status == 101 ws.send('foo') @@ -131,7 +144,7 @@ class TestWebsSocketRequestHandlerConformance: @pytest.mark.parametrize('handler', ['Websockets'], indirect=True) def test_send_types(self, handler, msg, opcode): with handler() as rh: - ws = validate_and_send(rh, Request(self.ws_base_url)) + ws = ws_validate_and_send(rh, Request(self.ws_base_url)) ws.send(msg) assert int(ws.recv()) == opcode ws.close() @@ -140,10 +153,10 @@ class TestWebsSocketRequestHandlerConformance: def test_verify_cert(self, handler): with handler() as rh: with pytest.raises(CertificateVerifyError): - validate_and_send(rh, Request(self.wss_base_url)) + ws_validate_and_send(rh, Request(self.wss_base_url)) with handler(verify=False) as rh: - ws = validate_and_send(rh, Request(self.wss_base_url)) + ws = ws_validate_and_send(rh, Request(self.wss_base_url)) assert ws.status == 101 ws.close() @@ -151,7 +164,7 @@ class TestWebsSocketRequestHandlerConformance: def test_ssl_error(self, handler): with handler(verify=False) as rh: with pytest.raises(SSLError, match=r'ssl(?:v3|/tls) alert handshake failure') as exc_info: - validate_and_send(rh, Request(self.bad_wss_host)) + ws_validate_and_send(rh, Request(self.bad_wss_host)) assert not issubclass(exc_info.type, CertificateVerifyError) @pytest.mark.parametrize('handler', ['Websockets'], indirect=True) @@ -163,7 +176,7 @@ class TestWebsSocketRequestHandlerConformance: ]) def test_percent_encode(self, handler, path, expected): with handler() as rh: - ws = validate_and_send(rh, Request(f'{self.ws_base_url}{path}')) + ws = ws_validate_and_send(rh, Request(f'{self.ws_base_url}{path}')) ws.send('path') assert ws.recv() == expected assert ws.status == 101 @@ -174,7 +187,7 @@ class TestWebsSocketRequestHandlerConformance: with handler() as rh: # This isn't a comprehensive test, # but it should be enough to check whether the handler is removing dot segments - ws = validate_and_send(rh, Request(f'{self.ws_base_url}/a/b/./../../test')) + ws = ws_validate_and_send(rh, Request(f'{self.ws_base_url}/a/b/./../../test')) assert ws.status == 101 ws.send('path') assert ws.recv() == '/test' @@ -187,7 +200,7 @@ class TestWebsSocketRequestHandlerConformance: def test_raise_http_error(self, handler, status): with handler() as rh: with pytest.raises(HTTPError) as exc_info: - validate_and_send(rh, Request(f'{self.ws_base_url}/gen_{status}')) + ws_validate_and_send(rh, Request(f'{self.ws_base_url}/gen_{status}')) assert exc_info.value.status == status @pytest.mark.parametrize('handler', ['Websockets'], indirect=True) @@ -198,7 +211,7 @@ class TestWebsSocketRequestHandlerConformance: def test_timeout(self, handler, params, extensions): with handler(**params) as rh: with pytest.raises(TransportError): - validate_and_send(rh, Request(self.ws_base_url, extensions=extensions)) + ws_validate_and_send(rh, Request(self.ws_base_url, extensions=extensions)) @pytest.mark.parametrize('handler', ['Websockets'], indirect=True) def test_cookies(self, handler): @@ -210,18 +223,18 @@ class TestWebsSocketRequestHandlerConformance: comment_url=None, rest={})) with handler(cookiejar=cookiejar) as rh: - ws = validate_and_send(rh, Request(self.ws_base_url)) + ws = ws_validate_and_send(rh, Request(self.ws_base_url)) ws.send('headers') assert json.loads(ws.recv())['cookie'] == 'test=ytdlp' ws.close() with handler() as rh: - ws = validate_and_send(rh, Request(self.ws_base_url)) + ws = ws_validate_and_send(rh, Request(self.ws_base_url)) ws.send('headers') assert 'cookie' not in json.loads(ws.recv()) ws.close() - ws = validate_and_send(rh, Request(self.ws_base_url, extensions={'cookiejar': cookiejar})) + ws = ws_validate_and_send(rh, Request(self.ws_base_url, extensions={'cookiejar': cookiejar})) ws.send('headers') assert json.loads(ws.recv())['cookie'] == 'test=ytdlp' ws.close() @@ -231,7 +244,7 @@ class TestWebsSocketRequestHandlerConformance: source_address = f'127.0.0.{random.randint(5, 255)}' verify_address_availability(source_address) with handler(source_address=source_address) as rh: - ws = validate_and_send(rh, Request(self.ws_base_url)) + ws = ws_validate_and_send(rh, Request(self.ws_base_url)) ws.send('source_address') assert source_address == ws.recv() ws.close() @@ -240,7 +253,7 @@ class TestWebsSocketRequestHandlerConformance: def test_response_url(self, handler): with handler() as rh: url = f'{self.ws_base_url}/something' - ws = validate_and_send(rh, Request(url)) + ws = ws_validate_and_send(rh, Request(url)) assert ws.url == url ws.close() @@ -248,14 +261,14 @@ class TestWebsSocketRequestHandlerConformance: def test_request_headers(self, handler): with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh: # Global Headers - ws = validate_and_send(rh, Request(self.ws_base_url)) + ws = ws_validate_and_send(rh, Request(self.ws_base_url)) ws.send('headers') headers = HTTPHeaderDict(json.loads(ws.recv())) assert headers['test1'] == 'test' ws.close() # Per request headers, merged with global - ws = validate_and_send(rh, Request( + ws = ws_validate_and_send(rh, Request( self.ws_base_url, headers={'test2': 'changed', 'test3': 'test3'})) ws.send('headers') headers = HTTPHeaderDict(json.loads(ws.recv())) @@ -288,7 +301,7 @@ class TestWebsSocketRequestHandlerConformance: verify=False, client_cert=client_cert ) as rh: - validate_and_send(rh, Request(self.mtls_wss_base_url)).close() + ws_validate_and_send(rh, Request(self.mtls_wss_base_url)).close() def create_fake_ws_connection(raised): From 0b81d4d252bd065ccd352722987ea34fe17f9244 Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Sat, 16 Mar 2024 22:47:56 -0500 Subject: [PATCH 2/4] Add new options `--impersonate` and `--list-impersonate-targets` Authored by: coletdjnz, Grub4K, pukkandan, bashonly Co-authored-by: Simon Sawicki Co-authored-by: pukkandan Co-authored-by: bashonly --- README.md | 4 + test/test_networking.py | 198 +++++++++++++++++++++++++++---- yt_dlp/YoutubeDL.py | 43 ++++++- yt_dlp/__init__.py | 41 +++++++ yt_dlp/networking/impersonate.py | 141 ++++++++++++++++++++++ yt_dlp/options.py | 12 ++ 6 files changed, 415 insertions(+), 24 deletions(-) create mode 100644 yt_dlp/networking/impersonate.py diff --git a/README.md b/README.md index 1e108a29c..d4b89229f 100644 --- a/README.md +++ b/README.md @@ -389,6 +389,10 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git direct connection --socket-timeout SECONDS Time to wait before giving up, in seconds --source-address IP Client-side IP address to bind to + --impersonate CLIENT[:OS] Client to impersonate for requests. E.g. + chrome, chrome-110, chrome:windows-10. Pass + --impersonate="" to impersonate any client. + --list-impersonate-targets List available clients to impersonate. -4, --force-ipv4 Make all connections via IPv4 -6, --force-ipv6 Make all connections via IPv6 --enable-file-urls Enable file:// URLs. This is disabled by diff --git a/test/test_networking.py b/test/test_networking.py index 628f1f171..b67b521d9 100644 --- a/test/test_networking.py +++ b/test/test_networking.py @@ -27,6 +27,7 @@ import zlib from email.message import Message from http.cookiejar import CookieJar +from test.conftest import validate_and_send from test.helper import FakeYDL, http_server_port, verify_address_availability from yt_dlp.cookies import YoutubeDLCookieJar from yt_dlp.dependencies import brotli, requests, urllib3 @@ -50,11 +51,14 @@ from yt_dlp.networking.exceptions import ( TransportError, UnsupportedRequest, ) +from yt_dlp.networking.impersonate import ( + ImpersonateRequestHandler, + ImpersonateTarget, +) +from yt_dlp.utils import YoutubeDLError from yt_dlp.utils._utils import _YDLLogger as FakeLogger from yt_dlp.utils.networking import HTTPHeaderDict -from test.conftest import validate_and_send - TEST_DIR = os.path.dirname(os.path.abspath(__file__)) @@ -1113,6 +1117,10 @@ class FakeResponse(Response): class FakeRH(RequestHandler): + def __init__(self, *args, **params): + self.params = params + super().__init__(*args, **params) + def _validate(self, request): return @@ -1271,15 +1279,10 @@ class TestYoutubeDLNetworking: ('', {'all': '__noproxy__'}), (None, {'http': 'http://127.0.0.1:8081', 'https': 'http://127.0.0.1:8081'}) # env, set https ]) - def test_proxy(self, proxy, expected): - old_http_proxy = os.environ.get('HTTP_PROXY') - try: - os.environ['HTTP_PROXY'] = 'http://127.0.0.1:8081' # ensure that provided proxies override env - with FakeYDL({'proxy': proxy}) as ydl: - assert ydl.proxies == expected - finally: - if old_http_proxy: - os.environ['HTTP_PROXY'] = old_http_proxy + def test_proxy(self, proxy, expected, monkeypatch): + monkeypatch.setenv('HTTP_PROXY', 'http://127.0.0.1:8081') + with FakeYDL({'proxy': proxy}) as ydl: + assert ydl.proxies == expected def test_compat_request(self): with FakeRHYDL() as ydl: @@ -1331,6 +1334,95 @@ class TestYoutubeDLNetworking: with pytest.raises(SSLError, match='testerror'): ydl.urlopen('ssl://testerror') + def test_unsupported_impersonate_target(self): + class FakeImpersonationRHYDL(FakeYDL): + def __init__(self, *args, **kwargs): + class HTTPRH(RequestHandler): + def _send(self, request: Request): + pass + _SUPPORTED_URL_SCHEMES = ('http',) + _SUPPORTED_PROXY_SCHEMES = None + + super().__init__(*args, **kwargs) + self._request_director = self.build_request_director([HTTPRH]) + + with FakeImpersonationRHYDL() as ydl: + with pytest.raises( + RequestError, + match=r'Impersonate target "test" is not available' + ): + ydl.urlopen(Request('http://', extensions={'impersonate': ImpersonateTarget('test', None, None, None)})) + + def test_unsupported_impersonate_extension(self): + class FakeHTTPRHYDL(FakeYDL): + def __init__(self, *args, **kwargs): + class IRH(ImpersonateRequestHandler): + def _send(self, request: Request): + pass + + _SUPPORTED_URL_SCHEMES = ('http',) + _SUPPORTED_IMPERSONATE_TARGET_MAP = {ImpersonateTarget('abc',): 'test'} + _SUPPORTED_PROXY_SCHEMES = None + + super().__init__(*args, **kwargs) + self._request_director = self.build_request_director([IRH]) + + with FakeHTTPRHYDL() as ydl: + with pytest.raises( + RequestError, + match=r'Impersonate target "test" is not available' + ): + ydl.urlopen(Request('http://', extensions={'impersonate': ImpersonateTarget('test', None, None, None)})) + + def test_raise_impersonate_error(self): + with pytest.raises( + YoutubeDLError, + match=r'Impersonate target "test" is not available' + ): + FakeYDL({'impersonate': ImpersonateTarget('test', None, None, None)}) + + def test_pass_impersonate_param(self, monkeypatch): + + class IRH(ImpersonateRequestHandler): + def _send(self, request: Request): + pass + + _SUPPORTED_URL_SCHEMES = ('http',) + _SUPPORTED_IMPERSONATE_TARGET_MAP = {ImpersonateTarget('abc'): 'test'} + + # Bypass the check on initialize + brh = FakeYDL.build_request_director + monkeypatch.setattr(FakeYDL, 'build_request_director', lambda cls, handlers, preferences=None: brh(cls, handlers=[IRH])) + + with FakeYDL({ + 'impersonate': ImpersonateTarget('abc', None, None, None) + }) as ydl: + rh = self.build_handler(ydl, IRH) + assert rh.impersonate == ImpersonateTarget('abc', None, None, None) + + def test_get_impersonate_targets(self): + handlers = [] + for target_client in ('abc', 'xyz', 'asd'): + class TestRH(ImpersonateRequestHandler): + def _send(self, request: Request): + pass + _SUPPORTED_URL_SCHEMES = ('http',) + _SUPPORTED_IMPERSONATE_TARGET_MAP = {ImpersonateTarget(target_client,): 'test'} + RH_KEY = target_client + RH_NAME = target_client + handlers.append(TestRH) + + with FakeYDL() as ydl: + ydl._request_director = ydl.build_request_director(handlers) + assert set(ydl._get_available_impersonate_targets()) == { + (ImpersonateTarget('xyz'), 'xyz'), + (ImpersonateTarget('abc'), 'abc'), + (ImpersonateTarget('asd'), 'asd') + } + assert ydl._impersonate_target_available(ImpersonateTarget('abc')) + assert ydl._impersonate_target_available(ImpersonateTarget()) + assert not ydl._impersonate_target_available(ImpersonateTarget('zxy')) + @pytest.mark.parametrize('proxy_key,proxy_url,expected', [ ('http', '__noproxy__', None), ('no', '127.0.0.1,foo.bar', '127.0.0.1,foo.bar'), @@ -1341,23 +1433,17 @@ class TestYoutubeDLNetworking: ('http', 'socks4://example.com', 'socks4://example.com'), ('unrelated', '/bad/proxy', '/bad/proxy'), # clean_proxies should ignore bad proxies ]) - def test_clean_proxy(self, proxy_key, proxy_url, expected): + def test_clean_proxy(self, proxy_key, proxy_url, expected, monkeypatch): # proxies should be cleaned in urlopen() with FakeRHYDL() as ydl: req = ydl.urlopen(Request('test://', proxies={proxy_key: proxy_url})).request assert req.proxies[proxy_key] == expected # and should also be cleaned when building the handler - env_key = f'{proxy_key.upper()}_PROXY' - old_env_proxy = os.environ.get(env_key) - try: - os.environ[env_key] = proxy_url # ensure that provided proxies override env - with FakeYDL() as ydl: - rh = self.build_handler(ydl) - assert rh.proxies[proxy_key] == expected - finally: - if old_env_proxy: - os.environ[env_key] = old_env_proxy + monkeypatch.setenv(f'{proxy_key.upper()}_PROXY', proxy_url) + with FakeYDL() as ydl: + rh = self.build_handler(ydl) + assert rh.proxies[proxy_key] == expected def test_clean_proxy_header(self): with FakeRHYDL() as ydl: @@ -1629,3 +1715,71 @@ class TestResponse: assert res.geturl() == res.url assert res.info() is res.headers assert res.getheader('test') == res.get_header('test') + + +class TestImpersonateTarget: + @pytest.mark.parametrize('target_str,expected', [ + ('abc', ImpersonateTarget('abc', None, None, None)), + ('abc-120_esr', ImpersonateTarget('abc', '120_esr', None, None)), + ('abc-120:xyz', ImpersonateTarget('abc', '120', 'xyz', None)), + ('abc-120:xyz-5.6', ImpersonateTarget('abc', '120', 'xyz', '5.6')), + ('abc:xyz', ImpersonateTarget('abc', None, 'xyz', None)), + ('abc:', ImpersonateTarget('abc', None, None, None)), + ('abc-120:', ImpersonateTarget('abc', '120', None, None)), + (':xyz', ImpersonateTarget(None, None, 'xyz', None)), + (':xyz-6.5', ImpersonateTarget(None, None, 'xyz', '6.5')), + (':', ImpersonateTarget(None, None, None, None)), + ('', ImpersonateTarget(None, None, None, None)), + ]) + def test_target_from_str(self, target_str, expected): + assert ImpersonateTarget.from_str(target_str) == expected + + @pytest.mark.parametrize('target_str', [ + '-120', ':-12.0', '-12:-12', '-:-', + '::', 'a-c-d:', 'a-c-d:e-f-g', 'a:b:' + ]) + def test_target_from_invalid_str(self, target_str): + with pytest.raises(ValueError): + ImpersonateTarget.from_str(target_str) + + @pytest.mark.parametrize('target,expected', [ + (ImpersonateTarget('abc', None, None, None), 'abc'), + (ImpersonateTarget('abc', '120', None, None), 'abc-120'), + (ImpersonateTarget('abc', '120', 'xyz', None), 'abc-120:xyz'), + (ImpersonateTarget('abc', '120', 'xyz', '5'), 'abc-120:xyz-5'), + (ImpersonateTarget('abc', None, 'xyz', None), 'abc:xyz'), + (ImpersonateTarget('abc', '120', None, None), 'abc-120'), + (ImpersonateTarget('abc', '120', 'xyz', None), 'abc-120:xyz'), + (ImpersonateTarget('abc', None, 'xyz'), 'abc:xyz'), + (ImpersonateTarget(None, None, 'xyz', '6.5'), ':xyz-6.5'), + (ImpersonateTarget('abc', ), 'abc'), + (ImpersonateTarget(None, None, None, None), ''), + ]) + def test_str(self, target, expected): + assert str(target) == expected + + @pytest.mark.parametrize('args', [ + ('abc', None, None, '5'), + ('abc', '120', None, '5'), + (None, '120', None, None), + (None, '120', None, '5'), + (None, None, None, '5'), + (None, '120', 'xyz', '5'), + ]) + def test_invalid_impersonate_target(self, args): + with pytest.raises(ValueError): + ImpersonateTarget(*args) + + @pytest.mark.parametrize('target1,target2,is_in,is_eq', [ + (ImpersonateTarget('abc', None, None, None), ImpersonateTarget('abc', None, None, None), True, True), + (ImpersonateTarget('abc', None, None, None), ImpersonateTarget('abc', '120', None, None), True, False), + (ImpersonateTarget('abc', None, 'xyz', 'test'), ImpersonateTarget('abc', '120', 'xyz', None), True, False), + (ImpersonateTarget('abc', '121', 'xyz', 'test'), ImpersonateTarget('abc', '120', 'xyz', 'test'), False, False), + (ImpersonateTarget('abc'), ImpersonateTarget('abc', '120', 'xyz', 'test'), True, False), + (ImpersonateTarget('abc', '120', 'xyz', 'test'), ImpersonateTarget('abc'), True, False), + (ImpersonateTarget(), ImpersonateTarget('abc', '120', 'xyz'), True, False), + (ImpersonateTarget(), ImpersonateTarget(), True, True), + ]) + def test_impersonate_target_in(self, target1, target2, is_in, is_eq): + assert (target1 in target2) is is_in + assert (target1 == target2) is is_eq diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index c34d97bba..e3d1db376 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -42,6 +42,7 @@ from .networking.exceptions import ( SSLError, network_exceptions, ) +from .networking.impersonate import ImpersonateRequestHandler from .plugins import directories as plugin_directories from .postprocessor import _PLUGIN_CLASSES as plugin_pps from .postprocessor import ( @@ -99,6 +100,7 @@ from .utils import ( SameFileError, UnavailableVideoError, UserNotLive, + YoutubeDLError, age_restricted, args_to_str, bug_reports_message, @@ -402,6 +404,8 @@ class YoutubeDL: - "detect_or_warn": check whether we can do anything about it, warn otherwise (default) source_address: Client-side IP address to bind to. + impersonate: Client to impersonate for requests. + An ImpersonateTarget (from yt_dlp.networking.impersonate) sleep_interval_requests: Number of seconds to sleep between requests during extraction sleep_interval: Number of seconds to sleep before each download when @@ -713,6 +717,13 @@ class YoutubeDL: for msg in self.params.get('_deprecation_warnings', []): self.deprecated_feature(msg) + if impersonate_target := self.params.get('impersonate'): + if not self._impersonate_target_available(impersonate_target): + raise YoutubeDLError( + f'Impersonate target "{impersonate_target}" is not available. ' + f'Use --list-impersonate-targets to see available targets. ' + f'You may be missing dependencies required to support this target.') + if 'list-formats' in self.params['compat_opts']: self.params['listformats_table'] = False @@ -4077,6 +4088,22 @@ class YoutubeDL: handler = self._request_director.handlers['Urllib'] return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies) + def _get_available_impersonate_targets(self): + # todo(future): make available as public API + return [ + (target, rh.RH_NAME) + for rh in self._request_director.handlers.values() + if isinstance(rh, ImpersonateRequestHandler) + for target in rh.supported_targets + ] + + def _impersonate_target_available(self, target): + # todo(future): make available as public API + return any( + rh.is_supported_target(target) + for rh in self._request_director.handlers.values() + if isinstance(rh, ImpersonateRequestHandler)) + def urlopen(self, req): """ Start an HTTP download """ if isinstance(req, str): @@ -4108,9 +4135,13 @@ class YoutubeDL: raise RequestError( 'file:// URLs are disabled by default in yt-dlp for security reasons. ' 'Use --enable-file-urls to enable at your own risk.', cause=ue) from ue - if 'unsupported proxy type: "https"' in ue.msg.lower(): + if ( + 'unsupported proxy type: "https"' in ue.msg.lower() + and 'requests' not in self._request_director.handlers + and 'curl_cffi' not in self._request_director.handlers + ): raise RequestError( - 'To use an HTTPS proxy for this request, one of the following dependencies needs to be installed: requests') + 'To use an HTTPS proxy for this request, one of the following dependencies needs to be installed: requests, curl_cffi') elif ( re.match(r'unsupported url scheme: "wss?"', ue.msg.lower()) @@ -4120,6 +4151,13 @@ class YoutubeDL: 'This request requires WebSocket support. ' 'Ensure one of the following dependencies are installed: websockets', cause=ue) from ue + + elif re.match(r'unsupported (?:extensions: impersonate|impersonate target)', ue.msg.lower()): + raise RequestError( + f'Impersonate target "{req.extensions["impersonate"]}" is not available.' + f' See --list-impersonate-targets for available targets.' + f' This request requires browser impersonation, however you may be missing dependencies' + f' required to support this target.') raise except SSLError as e: if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e): @@ -4152,6 +4190,7 @@ class YoutubeDL: 'timeout': 'socket_timeout', 'legacy_ssl_support': 'legacyserverconnect', 'enable_file_urls': 'enable_file_urls', + 'impersonate': 'impersonate', 'client_cert': { 'client_certificate': 'client_certificate', 'client_certificate_key': 'client_certificate_key', diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index aeea2625e..940594faf 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -19,6 +19,7 @@ from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS from .downloader.external import get_external_downloader from .extractor import list_extractor_classes from .extractor.adobepass import MSO_INFO +from .networking.impersonate import ImpersonateTarget from .options import parseOpts from .postprocessor import ( FFmpegExtractAudioPP, @@ -48,6 +49,7 @@ from .utils import ( float_or_none, format_field, int_or_none, + join_nonempty, match_filter_func, parse_bytes, parse_duration, @@ -388,6 +390,9 @@ def validate_options(opts): f'Supported keyrings are: {", ".join(sorted(SUPPORTED_KEYRINGS))}') opts.cookiesfrombrowser = (browser_name, profile, keyring, container) + if opts.impersonate is not None: + opts.impersonate = ImpersonateTarget.from_str(opts.impersonate.lower()) + # MetadataParser def metadataparser_actions(f): if isinstance(f, str): @@ -911,6 +916,7 @@ def parse_options(argv=None): 'postprocessors': postprocessors, 'fixup': opts.fixup, 'source_address': opts.source_address, + 'impersonate': opts.impersonate, 'call_home': opts.call_home, 'sleep_interval_requests': opts.sleep_interval_requests, 'sleep_interval': opts.sleep_interval, @@ -980,6 +986,41 @@ def _real_main(argv=None): traceback.print_exc() ydl._download_retcode = 100 + if opts.list_impersonate_targets: + + known_targets = [ + # List of simplified targets we know are supported, + # to help users know what dependencies may be required. + (ImpersonateTarget('chrome'), 'curl_cffi'), + (ImpersonateTarget('edge'), 'curl_cffi'), + (ImpersonateTarget('safari'), 'curl_cffi'), + ] + + available_targets = ydl._get_available_impersonate_targets() + + def make_row(target, handler): + return [ + join_nonempty(target.client.title(), target.version, delim='-') or '-', + join_nonempty((target.os or "").title(), target.os_version, delim='-') or '-', + handler, + ] + + rows = [make_row(target, handler) for target, handler in available_targets] + + for known_target, known_handler in known_targets: + if not any( + known_target in target and handler == known_handler + for target, handler in available_targets + ): + rows.append([ + ydl._format_out(text, ydl.Styles.SUPPRESS) + for text in make_row(known_target, f'{known_handler} (not available)') + ]) + + ydl.to_screen('[info] Available impersonate targets') + ydl.to_stdout(render_table(['Client', 'OS', 'Source'], rows, extra_gap=2, delim='-')) + return + if not actual_use: if pre_process: return ydl._download_retcode diff --git a/yt_dlp/networking/impersonate.py b/yt_dlp/networking/impersonate.py new file mode 100644 index 000000000..ca66180c7 --- /dev/null +++ b/yt_dlp/networking/impersonate.py @@ -0,0 +1,141 @@ +from __future__ import annotations + +import re +from abc import ABC +from dataclasses import dataclass +from typing import Any + +from .common import RequestHandler, register_preference +from .exceptions import UnsupportedRequest +from ..compat.types import NoneType +from ..utils import classproperty, join_nonempty +from ..utils.networking import std_headers + + +@dataclass(order=True, frozen=True) +class ImpersonateTarget: + """ + A target for browser impersonation. + + Parameters: + @param client: the client to impersonate + @param version: the client version to impersonate + @param os: the client OS to impersonate + @param os_version: the client OS version to impersonate + + Note: None is used to indicate to match any. + + """ + client: str | None = None + version: str | None = None + os: str | None = None + os_version: str | None = None + + def __post_init__(self): + if self.version and not self.client: + raise ValueError('client is required if version is set') + if self.os_version and not self.os: + raise ValueError('os is required if os_version is set') + + def __contains__(self, target: ImpersonateTarget): + if not isinstance(target, ImpersonateTarget): + return False + return ( + (self.client is None or target.client is None or self.client == target.client) + and (self.version is None or target.version is None or self.version == target.version) + and (self.os is None or target.os is None or self.os == target.os) + and (self.os_version is None or target.os_version is None or self.os_version == target.os_version) + ) + + def __str__(self): + return f'{join_nonempty(self.client, self.version)}:{join_nonempty(self.os, self.os_version)}'.rstrip(':') + + @classmethod + def from_str(cls, target: str): + mobj = re.fullmatch(r'(?:(?P[^:-]+)(?:-(?P[^:-]+))?)?(?::(?:(?P[^:-]+)(?:-(?P[^:-]+))?)?)?', target) + if not mobj: + raise ValueError(f'Invalid impersonate target "{target}"') + return cls(**mobj.groupdict()) + + +class ImpersonateRequestHandler(RequestHandler, ABC): + """ + Base class for request handlers that support browser impersonation. + + This provides a method for checking the validity of the impersonate extension, + which can be used in _check_extensions. + + Impersonate targets consist of a client, version, os and os_ver. + See the ImpersonateTarget class for more details. + + The following may be defined: + - `_SUPPORTED_IMPERSONATE_TARGET_MAP`: a dict mapping supported targets to custom object. + Any Request with an impersonate target not in this list will raise an UnsupportedRequest. + Set to None to disable this check. + Note: Entries are in order of preference + + Parameters: + @param impersonate: the default impersonate target to use for requests. + Set to None to disable impersonation. + """ + _SUPPORTED_IMPERSONATE_TARGET_MAP: dict[ImpersonateTarget, Any] = {} + + def __init__(self, *, impersonate: ImpersonateTarget = None, **kwargs): + super().__init__(**kwargs) + self.impersonate = impersonate + + def _check_impersonate_target(self, target: ImpersonateTarget): + assert isinstance(target, (ImpersonateTarget, NoneType)) + if target is None or not self.supported_targets: + return + if not self.is_supported_target(target): + raise UnsupportedRequest(f'Unsupported impersonate target: {target}') + + def _check_extensions(self, extensions): + super()._check_extensions(extensions) + if 'impersonate' in extensions: + self._check_impersonate_target(extensions.get('impersonate')) + + def _validate(self, request): + super()._validate(request) + self._check_impersonate_target(self.impersonate) + + def _resolve_target(self, target: ImpersonateTarget | None): + """Resolve a target to a supported target.""" + if target is None: + return + for supported_target in self.supported_targets: + if target in supported_target: + if self.verbose: + self._logger.stdout( + f'{self.RH_NAME}: resolved impersonate target {target} to {supported_target}') + return supported_target + + @classproperty + def supported_targets(self) -> tuple[ImpersonateTarget, ...]: + return tuple(self._SUPPORTED_IMPERSONATE_TARGET_MAP.keys()) + + def is_supported_target(self, target: ImpersonateTarget): + assert isinstance(target, ImpersonateTarget) + return self._resolve_target(target) is not None + + def _get_request_target(self, request): + """Get the requested target for the request""" + return self._resolve_target(request.extensions.get('impersonate') or self.impersonate) + + def _get_impersonate_headers(self, request): + headers = self._merge_headers(request.headers) + if self._get_request_target(request) is not None: + # remove all headers present in std_headers + # todo: change this to not depend on std_headers + for k, v in std_headers.items(): + if headers.get(k) == v: + headers.pop(k) + return headers + + +@register_preference(ImpersonateRequestHandler) +def impersonate_preference(rh, request): + if request.extensions.get('impersonate') or rh.impersonate: + return 1000 + return 0 diff --git a/yt_dlp/options.py b/yt_dlp/options.py index f88472731..dac56dc1f 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -515,6 +515,18 @@ def create_parser(): metavar='IP', dest='source_address', default=None, help='Client-side IP address to bind to', ) + network.add_option( + '--impersonate', + metavar='CLIENT[:OS]', dest='impersonate', default=None, + help=( + 'Client to impersonate for requests. E.g. chrome, chrome-110, chrome:windows-10. ' + 'Pass --impersonate="" to impersonate any client.'), + ) + network.add_option( + '--list-impersonate-targets', + dest='list_impersonate_targets', default=False, action='store_true', + help='List available clients to impersonate.', + ) network.add_option( '-4', '--force-ipv4', action='store_const', const='0.0.0.0', dest='source_address', From 52f5be1f1e0dc45bb397ab950f564721976a39bf Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Sat, 16 Mar 2024 22:52:38 -0500 Subject: [PATCH 3/4] [rh:curlcffi] Add support for `curl_cffi` Authored by: coletdjnz, Grub4K, pukkandan, bashonly Co-authored-by: Simon Sawicki Co-authored-by: pukkandan Co-authored-by: bashonly --- .github/workflows/build.yml | 22 +- .github/workflows/core.yml | 2 +- README.md | 9 + pyproject.toml | 2 + test/test_networking.py | 432 +++++++++++++++++++++------- test/test_socks.py | 33 +-- yt_dlp/__pyinstaller/hook-yt_dlp.py | 6 +- yt_dlp/dependencies/__init__.py | 4 + yt_dlp/networking/__init__.py | 7 + yt_dlp/networking/_curlcffi.py | 221 ++++++++++++++ yt_dlp/networking/_requests.py | 7 +- yt_dlp/networking/_urllib.py | 6 +- yt_dlp/networking/_websockets.py | 6 +- yt_dlp/networking/common.py | 11 +- 14 files changed, 628 insertions(+), 140 deletions(-) create mode 100644 yt_dlp/networking/_curlcffi.py diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index dcbb8c501..da5f26257 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -247,9 +247,25 @@ jobs: run: | brew install coreutils python3 devscripts/install_deps.py --user -o --include build - python3 devscripts/install_deps.py --print --include pyinstaller > requirements.txt + python3 devscripts/install_deps.py --print --include pyinstaller_macos > requirements.txt # We need to ignore wheels otherwise we break universal2 builds python3 -m pip install -U --user --no-binary :all: -r requirements.txt + # We need to fuse our own universal2 wheels for curl_cffi + python3 -m pip install -U --user delocate + mkdir curl_cffi_whls curl_cffi_universal2 + python3 devscripts/install_deps.py --print -o --include curl_cffi > requirements.txt + for platform in "macosx_11_0_arm64" "macosx_11_0_x86_64"; do + python3 -m pip download \ + --only-binary=:all: \ + --platform "${platform}" \ + --pre -d curl_cffi_whls \ + -r requirements.txt + done + python3 -m delocate.cmd.delocate_fuse curl_cffi_whls/curl_cffi*.whl -w curl_cffi_universal2 + python3 -m delocate.cmd.delocate_fuse curl_cffi_whls/cffi*.whl -w curl_cffi_universal2 + cd curl_cffi_universal2 + for wheel in *cffi*.whl; do mv -n -- "${wheel}" "${wheel/x86_64/universal2}"; done + python3 -m pip install -U --user *cffi*.whl - name: Prepare run: | @@ -303,7 +319,7 @@ jobs: run: | brew install coreutils python3 devscripts/install_deps.py --user -o --include build - python3 devscripts/install_deps.py --user --include pyinstaller + python3 devscripts/install_deps.py --user --include pyinstaller_macos --include curl_cffi - name: Prepare run: | @@ -345,7 +361,7 @@ jobs: - name: Install Requirements run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds python devscripts/install_deps.py -o --include build - python devscripts/install_deps.py --include py2exe + python devscripts/install_deps.py --include py2exe --include curl_cffi python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.8.0-py3-none-any.whl" - name: Prepare diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml index ba8630630..076f785bf 100644 --- a/.github/workflows/core.yml +++ b/.github/workflows/core.yml @@ -53,7 +53,7 @@ jobs: with: python-version: ${{ matrix.python-version }} - name: Install test requirements - run: python3 ./devscripts/install_deps.py --include dev + run: python3 ./devscripts/install_deps.py --include dev --include curl_cffi - name: Run tests continue-on-error: False run: | diff --git a/README.md b/README.md index d4b89229f..f1b133438 100644 --- a/README.md +++ b/README.md @@ -196,6 +196,15 @@ While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly * [**websockets**](https://github.com/aaugustin/websockets)\* - For downloading over websocket. Licensed under [BSD-3-Clause](https://github.com/aaugustin/websockets/blob/main/LICENSE) * [**requests**](https://github.com/psf/requests)\* - HTTP library. For HTTPS proxy and persistent connections support. Licensed under [Apache-2.0](https://github.com/psf/requests/blob/main/LICENSE) +#### Impersonation + +The following provide support for impersonating browser requests. This may be required for some sites that employ TLS fingerprinting. + +* [**curl_cffi**](https://github.com/yifeikong/curl_cffi) (recommended) - Python binding for [curl-impersonate](https://github.com/lwthiker/curl-impersonate). Provides impersonation targets for Chrome, Edge and Safari. Licensed under [MIT](https://github.com/yifeikong/curl_cffi/blob/main/LICENSE) + * Can be installed with the `curl_cffi` group, e.g. `pip install yt-dlp[default,curl_cffi]` + * Only included in `yt-dlp.exe`, `yt-dlp_macos` and `yt-dlp_macos_legacy` builds + + ### Metadata * [**mutagen**](https://github.com/quodlibet/mutagen)\* - For `--embed-thumbnail` in certain formats. Licensed under [GPLv2+](https://github.com/quodlibet/mutagen/blob/master/COPYING) diff --git a/pyproject.toml b/pyproject.toml index 64504ff98..aebbadbcb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,6 +53,7 @@ dependencies = [ [project.optional-dependencies] default = [] +curl_cffi = ["curl-cffi==0.5.10; implementation_name=='cpython'"] secretstorage = [ "cffi", "secretstorage", @@ -69,6 +70,7 @@ dev = [ "pytest", ] pyinstaller = ["pyinstaller>=6.3"] +pyinstaller_macos = ["pyinstaller==5.13.2"] # needed for curl_cffi builds py2exe = ["py2exe>=0.12"] [project.urls] diff --git a/test/test_networking.py b/test/test_networking.py index b67b521d9..b50f70d08 100644 --- a/test/test_networking.py +++ b/test/test_networking.py @@ -30,7 +30,7 @@ from http.cookiejar import CookieJar from test.conftest import validate_and_send from test.helper import FakeYDL, http_server_port, verify_address_availability from yt_dlp.cookies import YoutubeDLCookieJar -from yt_dlp.dependencies import brotli, requests, urllib3 +from yt_dlp.dependencies import brotli, curl_cffi, requests, urllib3 from yt_dlp.networking import ( HEADRequest, PUTRequest, @@ -57,7 +57,7 @@ from yt_dlp.networking.impersonate import ( ) from yt_dlp.utils import YoutubeDLError from yt_dlp.utils._utils import _YDLLogger as FakeLogger -from yt_dlp.utils.networking import HTTPHeaderDict +from yt_dlp.utils.networking import HTTPHeaderDict, std_headers TEST_DIR = os.path.dirname(os.path.abspath(__file__)) @@ -79,6 +79,7 @@ def _build_proxy_handler(name): class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler): protocol_version = 'HTTP/1.1' + default_request_version = 'HTTP/1.1' def log_message(self, format, *args): pass @@ -116,6 +117,8 @@ class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler): def _read_data(self): if 'Content-Length' in self.headers: return self.rfile.read(int(self.headers['Content-Length'])) + else: + return b'' def do_POST(self): data = self._read_data() + str(self.headers).encode() @@ -199,7 +202,8 @@ class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler): self._headers() elif self.path.startswith('/308-to-headers'): self.send_response(308) - self.send_header('Location', '/headers') + # redirect to "localhost" for testing cookie redirection handling + self.send_header('Location', f'http://localhost:{self.connection.getsockname()[1]}/headers') self.send_header('Content-Length', '0') self.end_headers() elif self.path == '/trailing_garbage': @@ -314,7 +318,7 @@ class TestRequestHandlerBase: class TestHTTPRequestHandler(TestRequestHandlerBase): - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_verify_cert(self, handler): with handler() as rh: with pytest.raises(CertificateVerifyError): @@ -325,7 +329,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): assert r.status == 200 r.close() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_ssl_error(self, handler): # HTTPS server with too old TLS version # XXX: is there a better way to test this than to create a new server? @@ -339,11 +343,11 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): https_server_thread.start() with handler(verify=False) as rh: - with pytest.raises(SSLError, match=r'ssl(?:v3|/tls) alert handshake failure') as exc_info: + with pytest.raises(SSLError, match=r'(?i)ssl(?:v3|/tls).alert.handshake.failure') as exc_info: validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers')) assert not issubclass(exc_info.type, CertificateVerifyError) - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_percent_encode(self, handler): with handler() as rh: # Unicode characters should be encoded with uppercase percent-encoding @@ -355,7 +359,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): assert res.status == 200 res.close() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) @pytest.mark.parametrize('path', [ '/a/b/./../../headers', '/redirect_dotsegments', @@ -371,6 +375,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): assert res.url == f'http://127.0.0.1:{self.http_port}/headers' res.close() + # Not supported by CurlCFFI (non-standard) @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) def test_unicode_path_redirection(self, handler): with handler() as rh: @@ -378,7 +383,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): assert r.url == f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html' r.close() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_raise_http_error(self, handler): with handler() as rh: for bad_status in (400, 500, 599, 302): @@ -388,7 +393,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): # Should not raise an error validate_and_send(rh, Request('http://127.0.0.1:%d/gen_200' % self.http_port)).close() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_response_url(self, handler): with handler() as rh: # Response url should be that of the last url in redirect chain @@ -399,62 +404,50 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): assert res2.url == f'http://127.0.0.1:{self.http_port}/gen_200' res2.close() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) - def test_redirect(self, handler): + # Covers some basic cases we expect some level of consistency between request handlers for + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) + @pytest.mark.parametrize('redirect_status,method,expected', [ + # A 303 must either use GET or HEAD for subsequent request + (303, 'POST', ('', 'GET', False)), + (303, 'HEAD', ('', 'HEAD', False)), + + # 301 and 302 turn POST only into a GET + (301, 'POST', ('', 'GET', False)), + (301, 'HEAD', ('', 'HEAD', False)), + (302, 'POST', ('', 'GET', False)), + (302, 'HEAD', ('', 'HEAD', False)), + + # 307 and 308 should not change method + (307, 'POST', ('testdata', 'POST', True)), + (308, 'POST', ('testdata', 'POST', True)), + (307, 'HEAD', ('', 'HEAD', False)), + (308, 'HEAD', ('', 'HEAD', False)), + ]) + def test_redirect(self, handler, redirect_status, method, expected): with handler() as rh: - def do_req(redirect_status, method, assert_no_content=False): - data = b'testdata' if method in ('POST', 'PUT') else None - res = validate_and_send( - rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_{redirect_status}', method=method, data=data)) + data = b'testdata' if method == 'POST' else None + headers = {} + if data is not None: + headers['Content-Type'] = 'application/test' + res = validate_and_send( + rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_{redirect_status}', method=method, data=data, + headers=headers)) - headers = b'' - data_sent = b'' - if data is not None: - data_sent += res.read(len(data)) - if data_sent != data: - headers += data_sent - data_sent = b'' + headers = b'' + data_recv = b'' + if data is not None: + data_recv += res.read(len(data)) + if data_recv != data: + headers += data_recv + data_recv = b'' - headers += res.read() + headers += res.read() - if assert_no_content or data is None: - assert b'Content-Type' not in headers - assert b'Content-Length' not in headers - else: - assert b'Content-Type' in headers - assert b'Content-Length' in headers + assert expected[0] == data_recv.decode() + assert expected[1] == res.headers.get('method') + assert expected[2] == ('content-length' in headers.decode().lower()) - return data_sent.decode(), res.headers.get('method', '') - - # A 303 must either use GET or HEAD for subsequent request - assert do_req(303, 'POST', True) == ('', 'GET') - assert do_req(303, 'HEAD') == ('', 'HEAD') - - assert do_req(303, 'PUT', True) == ('', 'GET') - - # 301 and 302 turn POST only into a GET - assert do_req(301, 'POST', True) == ('', 'GET') - assert do_req(301, 'HEAD') == ('', 'HEAD') - assert do_req(302, 'POST', True) == ('', 'GET') - assert do_req(302, 'HEAD') == ('', 'HEAD') - - assert do_req(301, 'PUT') == ('testdata', 'PUT') - assert do_req(302, 'PUT') == ('testdata', 'PUT') - - # 307 and 308 should not change method - for m in ('POST', 'PUT'): - assert do_req(307, m) == ('testdata', m) - assert do_req(308, m) == ('testdata', m) - - assert do_req(307, 'HEAD') == ('', 'HEAD') - assert do_req(308, 'HEAD') == ('', 'HEAD') - - # These should not redirect and instead raise an HTTPError - for code in (300, 304, 305, 306): - with pytest.raises(HTTPError): - do_req(code, 'GET') - - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_request_cookie_header(self, handler): # We should accept a Cookie header being passed as in normal headers and handle it appropriately. with handler() as rh: @@ -463,16 +456,17 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): rh, Request( f'http://127.0.0.1:{self.http_port}/headers', headers={'Cookie': 'test=test'})).read().decode() - assert 'Cookie: test=test' in res + assert 'cookie: test=test' in res.lower() # Specified Cookie header should be removed on any redirect res = validate_and_send( rh, Request( f'http://127.0.0.1:{self.http_port}/308-to-headers', - headers={'Cookie': 'test=test'})).read().decode() - assert 'Cookie: test=test' not in res + headers={'Cookie': 'test=test2'})).read().decode() + assert 'cookie: test=test2' not in res.lower() # Specified Cookie header should override global cookiejar for that request + # Whether cookies from the cookiejar is applied on the redirect is considered undefined for now cookiejar = YoutubeDLCookieJar() cookiejar.set_cookie(http.cookiejar.Cookie( version=0, name='test', value='ytdlp', port=None, port_specified=False, @@ -482,23 +476,23 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): with handler(cookiejar=cookiejar) as rh: data = validate_and_send( - rh, Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'cookie': 'test=test'})).read() - assert b'Cookie: test=ytdlp' not in data - assert b'Cookie: test=test' in data + rh, Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'cookie': 'test=test3'})).read() + assert b'cookie: test=ytdlp' not in data.lower() + assert b'cookie: test=test3' in data.lower() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_redirect_loop(self, handler): with handler() as rh: with pytest.raises(HTTPError, match='redirect loop'): validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_loop')) - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_incompleteread(self, handler): with handler(timeout=2) as rh: - with pytest.raises(IncompleteRead): + with pytest.raises(IncompleteRead, match='13 bytes read, 234221 more expected'): validate_and_send(rh, Request('http://127.0.0.1:%d/incompleteread' % self.http_port)).read() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_cookies(self, handler): cookiejar = YoutubeDLCookieJar() cookiejar.set_cookie(http.cookiejar.Cookie( @@ -507,47 +501,66 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): with handler(cookiejar=cookiejar) as rh: data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read() - assert b'Cookie: test=ytdlp' in data + assert b'cookie: test=ytdlp' in data.lower() # Per request with handler() as rh: data = validate_and_send( rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': cookiejar})).read() - assert b'Cookie: test=ytdlp' in data + assert b'cookie: test=ytdlp' in data.lower() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_headers(self, handler): with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh: # Global Headers - data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read() - assert b'Test1: test' in data + data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read().lower() + assert b'test1: test' in data # Per request headers, merged with global data = validate_and_send(rh, Request( - f'http://127.0.0.1:{self.http_port}/headers', headers={'test2': 'changed', 'test3': 'test3'})).read() - assert b'Test1: test' in data - assert b'Test2: changed' in data - assert b'Test2: test2' not in data - assert b'Test3: test3' in data + f'http://127.0.0.1:{self.http_port}/headers', headers={'test2': 'changed', 'test3': 'test3'})).read().lower() + assert b'test1: test' in data + assert b'test2: changed' in data + assert b'test2: test2' not in data + assert b'test3: test3' in data - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) - def test_timeout(self, handler): + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) + def test_read_timeout(self, handler): with handler() as rh: # Default timeout is 20 seconds, so this should go through validate_and_send( - rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_3')) + rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1')) - with handler(timeout=0.5) as rh: + with handler(timeout=0.1) as rh: with pytest.raises(TransportError): validate_and_send( - rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1')) + rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_5')) # Per request timeout, should override handler timeout validate_and_send( rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1', extensions={'timeout': 4})) - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) + def test_connect_timeout(self, handler): + # nothing should be listening on this port + connect_timeout_url = 'http://10.255.255.255' + with handler(timeout=0.01) as rh: + now = time.time() + with pytest.raises(TransportError): + validate_and_send( + rh, Request(connect_timeout_url)) + assert 0.01 <= time.time() - now < 20 + + with handler() as rh: + with pytest.raises(TransportError): + # Per request timeout, should override handler timeout + now = time.time() + validate_and_send( + rh, Request(connect_timeout_url, extensions={'timeout': 0.01})) + assert 0.01 <= time.time() - now < 20 + + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_source_address(self, handler): source_address = f'127.0.0.{random.randint(5, 255)}' # on some systems these loopback addresses we need for testing may not be available @@ -558,6 +571,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): rh, Request(f'http://127.0.0.1:{self.http_port}/source_address')).read().decode() assert source_address == data + # Not supported by CurlCFFI @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) def test_gzip_trailing_garbage(self, handler): with handler() as rh: @@ -575,7 +589,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): assert res.headers.get('Content-Encoding') == 'br' assert res.read() == b'