From f04b5bedad7b281bee9814686bba1762bae092eb Mon Sep 17 00:00:00 2001 From: bashonly Date: Wed, 16 Aug 2023 18:42:48 -0500 Subject: [PATCH] [ie] Do not smuggle `http_headers` See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-3ch3-jhc6-5r8x Authored by: coletdjnz --- test/test_networking.py | 4 ++++ yt_dlp/extractor/cybrary.py | 2 +- yt_dlp/extractor/duboku.py | 2 +- yt_dlp/extractor/embedly.py | 2 +- yt_dlp/extractor/generic.py | 11 ++++++----- yt_dlp/extractor/slideslive.py | 2 +- yt_dlp/extractor/storyfire.py | 4 +--- yt_dlp/extractor/vimeo.py | 6 +++--- yt_dlp/utils/networking.py | 1 + 9 files changed, 19 insertions(+), 15 deletions(-) diff --git a/test/test_networking.py b/test/test_networking.py index 689161fb2..4466fc048 100644 --- a/test/test_networking.py +++ b/test/test_networking.py @@ -1293,6 +1293,10 @@ def test_clean_header(self): assert 'Youtubedl-no-compression' not in rh.headers assert rh.headers.get('Accept-Encoding') == 'identity' + with FakeYDL({'http_headers': {'Ytdl-socks-proxy': 'socks://localhost:1080'}}) as ydl: + rh = self.build_handler(ydl) + assert 'Ytdl-socks-proxy' not in rh.headers + def test_build_handler_params(self): with FakeYDL({ 'http_headers': {'test': 'testtest'}, diff --git a/yt_dlp/extractor/cybrary.py b/yt_dlp/extractor/cybrary.py index aeffe93b4..c4c78ee1b 100644 --- a/yt_dlp/extractor/cybrary.py +++ b/yt_dlp/extractor/cybrary.py @@ -105,7 +105,7 @@ def _real_extract(self, url): 'chapter': module.get('title'), 'chapter_id': str_or_none(module.get('id')), 'title': activity.get('title'), - 'url': smuggle_url(f'https://player.vimeo.com/video/{vimeo_id}', {'http_headers': {'Referer': 'https://api.cybrary.it'}}) + 'url': smuggle_url(f'https://player.vimeo.com/video/{vimeo_id}', {'referer': 'https://api.cybrary.it'}) } diff --git a/yt_dlp/extractor/duboku.py b/yt_dlp/extractor/duboku.py index fb0546cae..fc9564cef 100644 --- a/yt_dlp/extractor/duboku.py +++ b/yt_dlp/extractor/duboku.py @@ -138,7 +138,7 @@ def _real_extract(self, url): # of the video. return { '_type': 'url_transparent', - 'url': smuggle_url(data_url, {'http_headers': headers}), + 'url': smuggle_url(data_url, {'referer': webpage_url}), 'id': video_id, 'title': title, 'series': series_title, diff --git a/yt_dlp/extractor/embedly.py b/yt_dlp/extractor/embedly.py index 458aaa0a0..a424b49df 100644 --- a/yt_dlp/extractor/embedly.py +++ b/yt_dlp/extractor/embedly.py @@ -106,4 +106,4 @@ def _real_extract(self, url): return self.url_result(src, YoutubeTabIE) return self.url_result(smuggle_url( urllib.parse.unquote(traverse_obj(qs, ('src', 0), ('url', 0))), - {'http_headers': {'Referer': url}})) + {'referer': url})) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index ac7cc673f..1503e5146 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -17,6 +17,7 @@ determine_protocol, dict_get, extract_basic_auth, + filter_dict, format_field, int_or_none, is_html, @@ -2435,10 +2436,10 @@ def _real_extract(self, url): # to accept raw bytes and being able to download only a chunk. # It may probably better to solve this by checking Content-Type for application/octet-stream # after a HEAD request, but not sure if we can rely on this. - full_response = self._request_webpage(url, video_id, headers={ + full_response = self._request_webpage(url, video_id, headers=filter_dict({ 'Accept-Encoding': 'identity', - **smuggled_data.get('http_headers', {}) - }) + 'Referer': smuggled_data.get('referer'), + })) new_url = full_response.url url = urllib.parse.urlparse(url)._replace(scheme=urllib.parse.urlparse(new_url).scheme).geturl() if new_url != extract_basic_auth(url)[0]: @@ -2458,7 +2459,7 @@ def _real_extract(self, url): m = re.match(r'^(?Paudio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P[^;\s]+)', content_type) if m: self.report_detected('direct video link') - headers = smuggled_data.get('http_headers', {}) + headers = filter_dict({'Referer': smuggled_data.get('referer')}) format_id = str(m.group('format_id')) ext = determine_ext(url, default_ext=None) or urlhandle_detect_ext(full_response) subtitles = {} @@ -2710,7 +2711,7 @@ def _extract_embeds(self, url, webpage, *, urlh=None, info_dict={}): 'url': smuggle_url(json_ld['url'], { 'force_videoid': video_id, 'to_generic': True, - 'http_headers': {'Referer': url}, + 'referer': url, }), }, json_ld)] diff --git a/yt_dlp/extractor/slideslive.py b/yt_dlp/extractor/slideslive.py index 13f3109d7..df2af3b35 100644 --- a/yt_dlp/extractor/slideslive.py +++ b/yt_dlp/extractor/slideslive.py @@ -530,7 +530,7 @@ def _real_extract(self, url): if service_name == 'vimeo': info['url'] = smuggle_url( f'https://player.vimeo.com/video/{service_id}', - {'http_headers': {'Referer': url}}) + {'referer': url}) video_slides = traverse_obj(slides, ('slides', ..., 'video', 'id')) if not video_slides: diff --git a/yt_dlp/extractor/storyfire.py b/yt_dlp/extractor/storyfire.py index 035747c31..566f77782 100644 --- a/yt_dlp/extractor/storyfire.py +++ b/yt_dlp/extractor/storyfire.py @@ -32,9 +32,7 @@ def _parse_video(self, video): 'description': video.get('description'), 'url': smuggle_url( 'https://player.vimeo.com/video/' + vimeo_id, { - 'http_headers': { - 'Referer': 'https://storyfire.com/', - } + 'referer': 'https://storyfire.com/', }), 'thumbnail': video.get('storyImage'), 'view_count': int_or_none(video.get('views')), diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index e72fa50fa..e5e8144bb 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -37,14 +37,14 @@ class VimeoBaseInfoExtractor(InfoExtractor): @staticmethod def _smuggle_referrer(url, referrer_url): - return smuggle_url(url, {'http_headers': {'Referer': referrer_url}}) + return smuggle_url(url, {'referer': referrer_url}) def _unsmuggle_headers(self, url): """@returns (url, smuggled_data, headers)""" url, data = unsmuggle_url(url, {}) headers = self.get_param('http_headers').copy() - if 'http_headers' in data: - headers.update(data['http_headers']) + if 'referer' in data: + headers['Referer'] = data['referer'] return url, data, headers def _perform_login(self, username, password): diff --git a/yt_dlp/utils/networking.py b/yt_dlp/utils/networking.py index ba0493cc2..ed0250011 100644 --- a/yt_dlp/utils/networking.py +++ b/yt_dlp/utils/networking.py @@ -123,6 +123,7 @@ def clean_headers(headers: HTTPHeaderDict): if 'Youtubedl-No-Compression' in headers: # compat del headers['Youtubedl-No-Compression'] headers['Accept-Encoding'] = 'identity' + headers.pop('Ytdl-socks-proxy', None) def remove_dot_segments(path):