From 58786a10f212bd63f9ad1d0b4d9e4d31c3b385e2 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Sun, 25 Jun 2023 20:10:00 +0530
Subject: [PATCH 001/218] [extractor/youtube] Add extractor-arg `formats`

Closes #7417
---
 README.md                   |  3 +--
 yt_dlp/extractor/youtube.py | 22 ++++++++++++++++------
 2 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/README.md b/README.md
index 4de4ece969..d89bb204e8 100644
--- a/README.md
+++ b/README.md
@@ -1805,8 +1805,7 @@ #### youtube
 * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
 * `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all`
     * E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total
-* `include_duplicate_formats`: Extract formats with identical content but different URLs or protocol. This is useful if some of the formats are unavailable or throttled.
-* `include_incomplete_formats`: Extract formats that cannot be downloaded completely (live dash and post-live m3u8)
+* `formats`: Change the types of formats to return. `dashy` (convert http to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8)
 * `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others
 * `innertube_key`: Innertube API key to use for all API requests
 
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index a0d0a601ae..bdc631ccb8 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -3752,7 +3752,12 @@ def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, l
             'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
         ])
         streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...))
-        all_formats = self._configuration_arg('include_duplicate_formats')
+        format_types = self._configuration_arg('formats')
+        all_formats = 'duplicate' in format_types
+        if self._configuration_arg('include_duplicate_formats'):
+            all_formats = True
+            self._downloader.deprecated_feature('[youtube] include_duplicate_formats extractor argument is deprecated. '
+                                                'Use formats=duplicate extractor argument instead')
 
         def build_fragments(f):
             return LazyList({
@@ -3892,18 +3897,23 @@ def build_fragments(f):
             if single_stream and dct.get('ext'):
                 dct['container'] = dct['ext'] + '_dash'
 
-            if all_formats and dct['filesize']:
+            if (all_formats or 'dashy' in format_types) and dct['filesize']:
                 yield {
                     **dct,
                     'format_id': f'{dct["format_id"]}-dashy' if all_formats else dct['format_id'],
                     'protocol': 'http_dash_segments',
                     'fragments': build_fragments(dct),
                 }
-            dct['downloader_options'] = {'http_chunk_size': CHUNK_SIZE}
-            yield dct
+            if all_formats or 'dashy' not in format_types:
+                dct['downloader_options'] = {'http_chunk_size': CHUNK_SIZE}
+                yield dct
 
         needs_live_processing = self._needs_live_processing(live_status, duration)
-        skip_bad_formats = not self._configuration_arg('include_incomplete_formats')
+        skip_bad_formats = 'incomplete' not in format_types
+        if self._configuration_arg('include_incomplete_formats'):
+            skip_bad_formats = False
+            self._downloader.deprecated_feature('[youtube] include_incomplete_formats extractor argument is deprecated. '
+                                                'Use formats=incomplete extractor argument instead')
 
         skip_manifests = set(self._configuration_arg('skip'))
         if (not self.get_param('youtube_include_hls_manifest', True)
@@ -3915,7 +3925,7 @@ def build_fragments(f):
             skip_manifests.add('dash')
         if self._configuration_arg('include_live_dash'):
             self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '
-                                                'Use include_incomplete_formats extractor argument instead')
+                                                'Use formats=incomplete extractor argument instead')
         elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
             skip_manifests.add('dash')
 

From f0a1ff118145b6449982ba401f9a9f656ecd8062 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Sun, 25 Jun 2023 13:13:28 -0500
Subject: [PATCH 002/218] [extractor/qdance] Add extractor (#7420)

Closes #7385
Authored by: bashonly
---
 yt_dlp/extractor/_extractors.py |   1 +
 yt_dlp/extractor/qdance.py      | 150 ++++++++++++++++++++++++++++++++
 2 files changed, 151 insertions(+)
 create mode 100644 yt_dlp/extractor/qdance.py

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 49a3f39d37..06340fcd8d 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1531,6 +1531,7 @@
 )
 from .puls4 import Puls4IE
 from .pyvideo import PyvideoIE
+from .qdance import QDanceIE
 from .qingting import QingTingIE
 from .qqmusic import (
     QQMusicIE,
diff --git a/yt_dlp/extractor/qdance.py b/yt_dlp/extractor/qdance.py
new file mode 100644
index 0000000000..d817677f0e
--- /dev/null
+++ b/yt_dlp/extractor/qdance.py
@@ -0,0 +1,150 @@
+import json
+import time
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    jwt_decode_hs256,
+    str_or_none,
+    traverse_obj,
+    try_call,
+    url_or_none,
+)
+
+
+class QDanceIE(InfoExtractor):
+    _NETRC_MACHINE = 'qdance'
+    _VALID_URL = r'https?://(?:www\.)?q-dance\.com/network/(?:library|live)/(?P<id>\d+)'
+    _TESTS = [{
+        'note': 'vod',
+        'url': 'https://www.q-dance.com/network/library/146542138',
+        'info_dict': {
+            'id': '146542138',
+            'ext': 'mp4',
+            'title': 'Sound Rush [LIVE] | Defqon.1 Weekend Festival 2022 | Friday | RED',
+            'display_id': 'sound-rush-live-v3-defqon-1-weekend-festival-2022-friday-red',
+            'description': 'Relive Defqon.1 - Primal Energy 2022 with the sounds of Sound Rush LIVE at the RED on Friday! 🔥',
+            'season': 'Defqon.1 Weekend Festival 2022',
+            'season_id': '31840632',
+            'series': 'Defqon.1',
+            'series_id': '31840378',
+            'thumbnail': 'https://images.q-dance.network/1674829540-20220624171509-220624171509_delio_dn201093-2.jpg',
+            'availability': 'premium_only',
+            'duration': 1829,
+        },
+        'params': {'skip_download': 'm3u8'},
+    }, {
+        'note': 'livestream',
+        'url': 'https://www.q-dance.com/network/live/149170353',
+        'info_dict': {
+            'id': '149170353',
+            'ext': 'mp4',
+            'title': r're:^Defqon\.1 2023 - Friday - RED',
+            'display_id': 'defqon-1-2023-friday-red',
+            'description': 'md5:3c73fbbd4044e578e696adfc64019163',
+            'season': 'Defqon.1 Weekend Festival 2023',
+            'season_id': '141735599',
+            'series': 'Defqon.1',
+            'series_id': '31840378',
+            'thumbnail': 'https://images.q-dance.network/1686849069-area-thumbs_red.png',
+            'availability': 'subscriber_only',
+            'live_status': 'is_live',
+            'channel_id': 'qdancenetwork.video_149170353',
+        },
+        'skip': 'Completed livestream',
+    }]
+
+    _access_token = None
+    _refresh_token = None
+
+    def _call_login_api(self, data, note='Logging in'):
+        login = self._download_json(
+            'https://members.id-t.com/api/auth/login', None, note, headers={
+                'content-type': 'application/json',
+                'brand': 'qdance',
+                'origin': 'https://www.q-dance.com',
+                'referer': 'https://www.q-dance.com/',
+            }, data=json.dumps(data, separators=(',', ':')).encode(),
+            expected_status=lambda x: True)
+
+        tokens = traverse_obj(login, ('data', {
+            '_id-t-accounts-token': ('accessToken', {str}),
+            '_id-t-accounts-refresh': ('refreshToken', {str}),
+            '_id-t-accounts-id-token': ('idToken', {str}),
+        }))
+
+        if not tokens.get('_id-t-accounts-token'):
+            error = ': '.join(traverse_obj(login, ('error', ('code', 'message'), {str})))
+            if 'validation_error' not in error:
+                raise ExtractorError(f'Q-Dance API said "{error}"')
+            msg = 'Invalid username or password' if 'email' in data else 'Refresh token has expired'
+            raise ExtractorError(msg, expected=True)
+
+        for name, value in tokens.items():
+            self._set_cookie('.q-dance.com', name, value)
+
+    def _perform_login(self, username, password):
+        self._call_login_api({'email': username, 'password': password})
+
+    def _real_initialize(self):
+        cookies = self._get_cookies('https://www.q-dance.com/')
+        self._refresh_token = try_call(lambda: cookies['_id-t-accounts-refresh'].value)
+        self._access_token = try_call(lambda: cookies['_id-t-accounts-token'].value)
+        if not self._access_token:
+            self.raise_login_required()
+
+    def _get_auth(self):
+        if (try_call(lambda: jwt_decode_hs256(self._access_token)['exp']) or 0) <= int(time.time() - 120):
+            if not self._refresh_token:
+                raise ExtractorError(
+                    'Cannot refresh access token, login with yt-dlp or refresh cookies in browser')
+            self._call_login_api({'refreshToken': self._refresh_token}, note='Refreshing access token')
+            self._real_initialize()
+
+        return {'Authorization': self._access_token}
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        data = self._search_nuxt_data(webpage, video_id, traverse=('data', 0, 'data'))
+
+        def extract_availability(level):
+            level = int_or_none(level) or 0
+            return self._availability(
+                needs_premium=(level >= 20), needs_subscription=(level >= 15), needs_auth=True)
+
+        info = traverse_obj(data, {
+            'title': ('title', {str.strip}),
+            'description': ('description', {str.strip}),
+            'display_id': ('slug', {str}),
+            'thumbnail': ('thumbnail', {url_or_none}),
+            'duration': ('durationInSeconds', {int_or_none}, {lambda x: x or None}),
+            'availability': ('subscription', 'level', {extract_availability}),
+            'is_live': ('type', {lambda x: x.lower() == 'live'}),
+            'artist': ('acts', ..., {str}),
+            'series': ('event', 'title', {str.strip}),
+            'series_id': ('event', 'id', {str_or_none}),
+            'season': ('eventEdition', 'title', {str.strip}),
+            'season_id': ('eventEdition', 'id', {str_or_none}),
+            'channel_id': ('pubnub', 'channelName', {str}),
+        })
+
+        stream = self._download_json(
+            f'https://dc9h6qmsoymbq.cloudfront.net/api/content/videos/{video_id}/url',
+            video_id, headers=self._get_auth(), expected_status=401)
+
+        m3u8_url = traverse_obj(stream, ('data', 'url', {url_or_none}))
+        if not m3u8_url and traverse_obj(stream, ('error', 'code')) == 'unauthorized':
+            raise ExtractorError('Your account does not have access to this content', expected=True)
+
+        formats = self._extract_m3u8_formats(
+            m3u8_url, video_id, fatal=False, live=True) if m3u8_url else []
+        if not formats:
+            self.raise_no_formats('No active streams found', expected=bool(info.get('is_live')))
+
+        return {
+            **info,
+            'id': video_id,
+            'formats': formats,
+        }

From 5e16cf92eb496b7c1541a6b1d727cb87542984db Mon Sep 17 00:00:00 2001
From: nnoboa <90611593+nnoboa@users.noreply.github.com>
Date: Sun, 25 Jun 2023 16:22:38 -0400
Subject: [PATCH 003/218] [extractor/AdultSwim] Extract subtitles from m3u8
 (#7421)

Authored by: nnoboa
Closes #6191
---
 yt_dlp/extractor/adultswim.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/extractor/adultswim.py b/yt_dlp/extractor/adultswim.py
index bd29eb43e5..daaeddeb6e 100644
--- a/yt_dlp/extractor/adultswim.py
+++ b/yt_dlp/extractor/adultswim.py
@@ -170,8 +170,10 @@ def _real_extract(self, url):
                         continue
                     ext = determine_ext(asset_url, mimetype2ext(asset.get('mime_type')))
                     if ext == 'm3u8':
-                        info['formats'].extend(self._extract_m3u8_formats(
-                            asset_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+                        fmts, subs = self._extract_m3u8_formats_and_subtitles(
+                            asset_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
+                        info['formats'].extend(fmts)
+                        self._merge_subtitles(subs, target=info['subtitles'])
                     elif ext == 'f4m':
                         continue
                         # info['formats'].extend(self._extract_f4m_formats(

From ef8509c300ea50da86aea447eb214d3d6f6db6bb Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@bashonly.com>
Date: Sun, 25 Jun 2023 17:04:42 -0500
Subject: [PATCH 004/218] [extractor/kick] Fix `_VALID_URL`

Closes #7384
Authored by: bashonly
---
 yt_dlp/extractor/kick.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/kick.py b/yt_dlp/extractor/kick.py
index 765ffa0c80..be1dfd4b16 100644
--- a/yt_dlp/extractor/kick.py
+++ b/yt_dlp/extractor/kick.py
@@ -30,7 +30,7 @@ def _call_api(self, path, display_id, note='Downloading API JSON', headers={}, *
 
 
 class KickIE(KickBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?kick\.com/(?!(?:video|categories|search|auth)(?:[/?#]|$))(?P<id>[\w_]+)'
+    _VALID_URL = r'https?://(?:www\.)?kick\.com/(?!(?:video|categories|search|auth)(?:[/?#]|$))(?P<id>[\w-]+)'
     _TESTS = [{
         'url': 'https://kick.com/yuppy',
         'info_dict': {

From d949c10c45bfc359bdacd52e6a180169b8128958 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Mon, 26 Jun 2023 07:25:47 +0530
Subject: [PATCH 005/218] [extractor/youtube] Process `post_live` over 2 hours

---
 yt_dlp/extractor/youtube.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index bdc631ccb8..d5607975e5 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -3737,7 +3737,7 @@ def append_client(*client_names):
 
     def _needs_live_processing(self, live_status, duration):
         if (live_status == 'is_live' and self.get_param('live_from_start')
-                or live_status == 'post_live' and (duration or 0) > 4 * 3600):
+                or live_status == 'post_live' and (duration or 0) > 2 * 3600):
             return live_status
 
     def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
@@ -4238,7 +4238,7 @@ def is_bad_format(fmt):
 
         for fmt in filter(is_bad_format, formats):
             fmt['preference'] = (fmt.get('preference') or -1) - 10
-            fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 4 hours)', delim=' ')
+            fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 2 hours)', delim=' ')
 
         if needs_live_processing:
             self._prepare_live_from_start_formats(

From 8a8af356e3bba98a7f7d333aff0777d5d92130c8 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Mon, 26 Jun 2023 16:13:31 +0530
Subject: [PATCH 006/218] [downloader/aria2c] Add `--no-conf`

Closes #7404
---
 yt_dlp/downloader/external.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py
index 007689a8c9..f637a100bf 100644
--- a/yt_dlp/downloader/external.py
+++ b/yt_dlp/downloader/external.py
@@ -271,7 +271,7 @@ def _call_downloader(self, tmpfilename, info_dict):
         return super()._call_downloader(tmpfilename, info_dict)
 
     def _make_cmd(self, tmpfilename, info_dict):
-        cmd = [self.exe, '-c',
+        cmd = [self.exe, '-c', '--no-conf',
                '--console-log-level=warn', '--summary-interval=0', '--download-result=hide',
                '--http-accept-gzip=true', '--file-allocation=none', '-x16', '-j16', '-s16']
         if 'fragments' in info_dict:

From f393bbe724b1fc6c7f754a5da507e807b2b40ad2 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Mon, 26 Jun 2023 16:14:20 +0530
Subject: [PATCH 007/218] [extractor/sbs] Python 3.7 compat

Closes #7410
---
 yt_dlp/extractor/sbs.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/extractor/sbs.py b/yt_dlp/extractor/sbs.py
index ac0b6de202..119106e8ef 100644
--- a/yt_dlp/extractor/sbs.py
+++ b/yt_dlp/extractor/sbs.py
@@ -139,8 +139,8 @@ def _real_extract(self, url):
                 'release_year': ('releaseYear', {int_or_none}),
                 'duration': ('duration', ({float_or_none}, {parse_duration})),
                 'is_live': ('liveStream', {bool}),
-                'age_limit': (
-                    ('classificationID', 'contentRating'), {str.upper}, {self._AUS_TV_PARENTAL_GUIDELINES.get}),
+                'age_limit': (('classificationID', 'contentRating'), {str.upper}, {
+                    lambda x: self._AUS_TV_PARENTAL_GUIDELINES.get(x)}),  # dict.get is unhashable in py3.7
             }, get_all=False),
             **traverse_obj(media, {
                 'categories': (('genres', ...), ('taxonomy', ('genre', 'subgenre'), 'name'), {str}),

From 91302ed349f34dc26cc1d661bb45a4b71f4417f7 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Mon, 26 Jun 2023 16:19:49 +0530
Subject: [PATCH 008/218] [utils] clean_podcast_url: Handle protocol in
 redirect URL

Closes #7430
---
 yt_dlp/utils/_utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py
index de51f62083..f68cdb9686 100644
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@@ -5113,7 +5113,7 @@ def format_field(obj, field=None, template='%s', ignore=NO_DEFAULT, default='',
 
 
 def clean_podcast_url(url):
-    return re.sub(r'''(?x)
+    url = re.sub(r'''(?x)
         (?:
             (?:
                 chtbl\.com/track|
@@ -5127,6 +5127,7 @@ def clean_podcast_url(url):
                 st\.fm # https://podsights.com/docs/
             )/e
         )/''', '', url)
+    return re.sub(r'^\w+://(\w+://)', r'\1', url)
 
 
 _HEX_TABLE = '0123456789abcdef'

From 5b4b92769afcc398475e481bfa839f1158902fe9 Mon Sep 17 00:00:00 2001
From: Aman Salwan <121633121+AmanSal1@users.noreply.github.com>
Date: Wed, 28 Jun 2023 01:58:23 +0530
Subject: [PATCH 009/218] [extractor/crunchyroll:music] Fix `_VALID_URL`
 (#7439)

Closes #7419
Authored by: AmanSal1, rdamas

Co-authored-by: Robert Damas <robert.damas@byom.de>
---
 yt_dlp/extractor/crunchyroll.py | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/extractor/crunchyroll.py b/yt_dlp/extractor/crunchyroll.py
index d4a21616ba..910504ed29 100644
--- a/yt_dlp/extractor/crunchyroll.py
+++ b/yt_dlp/extractor/crunchyroll.py
@@ -490,8 +490,21 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
     _VALID_URL = r'''(?x)
         https?://(?:www\.)?crunchyroll\.com/
         (?P<lang>(?:\w{2}(?:-\w{2})?/)?)
-        watch/(?P<type>concert|musicvideo)/(?P<id>\w{10})'''
+        watch/(?P<type>concert|musicvideo)/(?P<id>\w+)'''
     _TESTS = [{
+        'url': 'https://www.crunchyroll.com/de/watch/musicvideo/MV5B02C79',
+        'info_dict': {
+            'ext': 'mp4',
+            'id': 'MV5B02C79',
+            'display_id': 'egaono-hana',
+            'title': 'Egaono Hana',
+            'track': 'Egaono Hana',
+            'artist': 'Goose house',
+            'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
+            'genre': ['J-Pop'],
+        },
+        'params': {'skip_download': 'm3u8'},
+    }, {
         'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C',
         'info_dict': {
             'ext': 'mp4',
@@ -519,11 +532,14 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
         },
         'params': {'skip_download': 'm3u8'},
     }, {
-        'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C/crossing-field',
+        'url': 'https://www.crunchyroll.com/de/watch/musicvideo/MV5B02C79/egaono-hana',
         'only_matching': True,
     }, {
         'url': 'https://www.crunchyroll.com/watch/concert/MC2E2AC135/live-is-smile-always-364joker-at-yokohama-arena',
         'only_matching': True,
+    }, {
+        'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C/crossing-field',
+        'only_matching': True,
     }]
     _API_ENDPOINT = 'music'
 

From 8f05fbae2a79ce0713077ccc68b354e63216bf20 Mon Sep 17 00:00:00 2001
From: Xiao Han <38774211+meliber@users.noreply.github.com>
Date: Tue, 27 Jun 2023 16:16:57 -0500
Subject: [PATCH 010/218] [extractor/abc] Fix extraction (#7434)

Closes #6433
Authored by: meliber
---
 yt_dlp/extractor/abc.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/extractor/abc.py b/yt_dlp/extractor/abc.py
index 0ca76b85a8..f56133eb3e 100644
--- a/yt_dlp/extractor/abc.py
+++ b/yt_dlp/extractor/abc.py
@@ -12,6 +12,7 @@
     int_or_none,
     parse_iso8601,
     str_or_none,
+    traverse_obj,
     try_get,
     unescapeHTML,
     update_url_query,
@@ -85,6 +86,15 @@ class ABCIE(InfoExtractor):
             'uploader': 'Behind the News',
             'uploader_id': 'behindthenews',
         }
+    }, {
+        'url': 'https://www.abc.net.au/news/2023-06-25/wagner-boss-orders-troops-back-to-bases-to-avoid-bloodshed/102520540',
+        'info_dict': {
+            'id': '102520540',
+            'title': 'Wagner Group retreating from Russia, leader Prigozhin to move to Belarus',
+            'ext': 'mp4',
+            'description': 'Wagner troops leave Rostov-on-Don and\xa0Yevgeny Prigozhin will move to Belarus under a deal brokered by Belarusian President Alexander Lukashenko to end the mutiny.',
+            'thumbnail': 'https://live-production.wcms.abc-cdn.net.au/0c170f5b57f0105c432f366c0e8e267b?impolicy=wcms_crop_resize&cropH=2813&cropW=5000&xPos=0&yPos=249&width=862&height=485',
+        }
     }]
 
     def _real_extract(self, url):
@@ -107,7 +117,7 @@ def _real_extract(self, url):
                 video = True
 
         if mobj is None:
-            mobj = re.search(r'(?P<type>)"sources": (?P<json_data>\[[^\]]+\]),', webpage)
+            mobj = re.search(r'(?P<type>)"(?:sources|files|renditions)":\s*(?P<json_data>\[[^\]]+\])', webpage)
             if mobj is None:
                 mobj = re.search(
                     r'inline(?P<type>Video|Audio|YouTube)Data\.push\((?P<json_data>[^)]+)\);',
@@ -121,7 +131,8 @@ def _real_extract(self, url):
             urls_info = self._parse_json(
                 mobj.group('json_data'), video_id, transform_source=js_to_json)
             youtube = mobj.group('type') == 'YouTube'
-            video = mobj.group('type') == 'Video' or urls_info[0]['contentType'] == 'video/mp4'
+            video = mobj.group('type') == 'Video' or traverse_obj(
+                urls_info, (0, ('contentType', 'MIMEType')), get_all=False) == 'video/mp4'
 
         if not isinstance(urls_info, list):
             urls_info = [urls_info]

From a2be9781fbf4d7e4db245c277ca2ecc41cf3a7b2 Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@bashonly.com>
Date: Tue, 27 Jun 2023 16:50:02 -0500
Subject: [PATCH 011/218] [extractor/Douyin] Fix extraction from webpage

Closes #7431
Authored by: bashonly
---
 yt_dlp/extractor/tiktok.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py
index 9c6d74007d..2f491c3170 100644
--- a/yt_dlp/extractor/tiktok.py
+++ b/yt_dlp/extractor/tiktok.py
@@ -1015,18 +1015,16 @@ def _real_extract(self, url):
             self.to_screen(f'{e}; trying with webpage')
 
         webpage = self._download_webpage(url, video_id)
-        render_data_json = self._search_regex(
-            r'<script [^>]*\bid=[\'"]RENDER_DATA[\'"][^>]*>(%7B.+%7D)</script>',
-            webpage, 'render data', default=None)
-        if not render_data_json:
+        render_data = self._search_json(
+            r'<script [^>]*\bid=[\'"]RENDER_DATA[\'"][^>]*>', webpage, 'render data', video_id,
+            contains_pattern=r'%7B(?s:.+)%7D', fatal=False, transform_source=compat_urllib_parse_unquote)
+        if not render_data:
             # TODO: Run verification challenge code to generate signature cookies
             cookies = self._get_cookies(self._WEBPAGE_HOST)
             expected = not cookies.get('s_v_web_id') or not cookies.get('ttwid')
             raise ExtractorError(
                 'Fresh cookies (not necessarily logged in) are needed', expected=expected)
 
-        render_data = self._parse_json(
-            render_data_json, video_id, transform_source=compat_urllib_parse_unquote)
         return self._parse_aweme_video_web(get_first(render_data, ('aweme', 'detail')), url, video_id)
 
 

From fcbc9ed760be6e3455bbadfaf277b4504b06f068 Mon Sep 17 00:00:00 2001
From: coletdjnz <coletdjnz@protonmail.com>
Date: Thu, 29 Jun 2023 23:26:27 +0000
Subject: [PATCH 012/218] [extractor/youtube:tab] Support shorts-only playlists
 (#7425)

Fixes https://github.com/yt-dlp/yt-dlp/issues/7424

Authored by: coletdjnz
Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com>
---
 yt_dlp/extractor/youtube.py | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index d5607975e5..967914c0f7 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -4898,7 +4898,8 @@ def _extract_entries(self, parent_renderer, continuation_list):
                     'videoRenderer': lambda x: [self._video_entry(x)],
                     'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
                     'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
-                    'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]
+                    'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)],
+                    'richGridRenderer': lambda x: self._extract_entries(x, continuation_list),
                 }
                 for key, renderer in isr_content.items():
                     if key not in known_renderers:
@@ -6390,6 +6391,28 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
             'channel_is_verified': True,
         },
         'playlist_mincount': 10,
+    }, {
+        # Playlist with only shorts, shown as reel renderers
+        # FIXME: future: YouTube currently doesn't give continuation for this,
+        # may do in future.
+        'url': 'https://www.youtube.com/playlist?list=UUxqPAgubo4coVn9Lx1FuKcg',
+        'info_dict': {
+            'id': 'UUxqPAgubo4coVn9Lx1FuKcg',
+            'channel_url': 'https://www.youtube.com/channel/UCxqPAgubo4coVn9Lx1FuKcg',
+            'view_count': int,
+            'uploader_id': '@BangyShorts',
+            'description': '',
+            'uploader_url': 'https://www.youtube.com/@BangyShorts',
+            'channel_id': 'UCxqPAgubo4coVn9Lx1FuKcg',
+            'channel': 'Bangy Shorts',
+            'uploader': 'Bangy Shorts',
+            'tags': [],
+            'availability': 'public',
+            'modified_date': '20230626',
+            'title': 'Uploads from Bangy Shorts',
+        },
+        'playlist_mincount': 100,
+        'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
     }]
 
     @classmethod

From af1fd12f675220df6793fc019dff320bc76e8080 Mon Sep 17 00:00:00 2001
From: urectanc <5403400+urectanc@users.noreply.github.com>
Date: Sat, 1 Jul 2023 03:27:07 +0900
Subject: [PATCH 013/218] [extractor/stacommu] Add extractors (#7432)

Authored by: urectanc
---
 README.md                           |   2 +-
 yt_dlp/extractor/_extractors.py     |   4 +
 yt_dlp/extractor/stacommu.py        | 148 ++++++++++++++++++++++++++++
 yt_dlp/extractor/wrestleuniverse.py |  33 ++++---
 4 files changed, 173 insertions(+), 14 deletions(-)
 create mode 100644 yt_dlp/extractor/stacommu.py

diff --git a/README.md b/README.md
index d89bb204e8..066ff90528 100644
--- a/README.md
+++ b/README.md
@@ -1855,7 +1855,7 @@ #### rokfinchannel
 #### twitter
 * `legacy_api`: Force usage of the legacy Twitter API instead of the GraphQL API for tweet extraction. Has no effect if login cookies are passed
 
-#### wrestleuniverse
+#### stacommu, wrestleuniverse
 * `device_id`: UUID value assigned by the website and used to enforce device limits for paid livestream content. Can be found in browser local storage
 
 #### twitch
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 06340fcd8d..76a7fef23e 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1855,6 +1855,10 @@
     SRGSSRPlayIE,
 )
 from .srmediathek import SRMediathekIE
+from .stacommu import (
+    StacommuLiveIE,
+    StacommuVODIE,
+)
 from .stanfordoc import StanfordOpenClassroomIE
 from .startv import StarTVIE
 from .steam import (
diff --git a/yt_dlp/extractor/stacommu.py b/yt_dlp/extractor/stacommu.py
new file mode 100644
index 0000000000..6f58f06dc8
--- /dev/null
+++ b/yt_dlp/extractor/stacommu.py
@@ -0,0 +1,148 @@
+import time
+
+from .wrestleuniverse import WrestleUniverseBaseIE
+from ..utils import (
+    int_or_none,
+    traverse_obj,
+    url_or_none,
+)
+
+
+class StacommuBaseIE(WrestleUniverseBaseIE):
+    _NETRC_MACHINE = 'stacommu'
+    _API_HOST = 'api.stacommu.jp'
+    _LOGIN_QUERY = {'key': 'AIzaSyCR9czxhH2eWuijEhTNWBZ5MCcOYEUTAhg'}
+    _LOGIN_HEADERS = {
+        'Accept': '*/*',
+        'Content-Type': 'application/json',
+        'X-Client-Version': 'Chrome/JsCore/9.9.4/FirebaseCore-web',
+        'Referer': 'https://www.stacommu.jp/',
+        'Origin': 'https://www.stacommu.jp',
+    }
+
+    @WrestleUniverseBaseIE._TOKEN.getter
+    def _TOKEN(self):
+        if self._REAL_TOKEN and self._TOKEN_EXPIRY <= int(time.time()):
+            self._refresh_token()
+
+        return self._REAL_TOKEN
+
+    def _get_formats(self, data, path, video_id=None):
+        if not traverse_obj(data, path) and not data.get('canWatch') and not self._TOKEN:
+            self.raise_login_required(method='password')
+        return super()._get_formats(data, path, video_id)
+
+    def _extract_hls_key(self, data, path, decrypt):
+        encryption_data = traverse_obj(data, path)
+        if traverse_obj(encryption_data, ('encryptType', {int})) == 0:
+            return None
+        return traverse_obj(encryption_data, {'key': ('key', {decrypt}), 'iv': ('iv', {decrypt})})
+
+
+class StacommuVODIE(StacommuBaseIE):
+    _VALID_URL = r'https?://www\.stacommu\.jp/videos/episodes/(?P<id>[\da-zA-Z]+)'
+    _TESTS = [{
+        # not encrypted
+        'url': 'https://www.stacommu.jp/videos/episodes/aXcVKjHyAENEjard61soZZ',
+        'info_dict': {
+            'id': 'aXcVKjHyAENEjard61soZZ',
+            'ext': 'mp4',
+            'title': 'スタコミュAWARDの裏側、ほぼ全部見せます！〜晴れ舞台の直前ドキドキ編〜',
+            'description': 'md5:6400275c57ae75c06da36b06f96beb1c',
+            'timestamp': 1679652000,
+            'upload_date': '20230324',
+            'thumbnail': 'https://image.stacommu.jp/6eLobQan8PFtBoU4RL4uGg/6eLobQan8PFtBoU4RL4uGg',
+            'cast': 'count:11',
+            'duration': 250,
+        },
+        'params': {
+            'skip_download': 'm3u8',
+        },
+    }, {
+        # encrypted; requires a premium account
+        'url': 'https://www.stacommu.jp/videos/episodes/3hybMByUvzMEqndSeu5LpD',
+        'info_dict': {
+            'id': '3hybMByUvzMEqndSeu5LpD',
+            'ext': 'mp4',
+            'title': 'スタプラフェス2023〜裏側ほぼ全部見せます〜＃10',
+            'description': 'md5:85494488ccf1dfa1934accdeadd7b340',
+            'timestamp': 1682506800,
+            'upload_date': '20230426',
+            'thumbnail': 'https://image.stacommu.jp/eMdXtEefR4kEyJJMpAFi7x/eMdXtEefR4kEyJJMpAFi7x',
+            'cast': 'count:55',
+            'duration': 312,
+            'hls_aes': {
+                'key': '6bbaf241b8e1fd9f59ecf546a70e4ae7',
+                'iv': '1fc9002a23166c3bb1d240b953d09de9',
+            },
+        },
+        'params': {
+            'skip_download': 'm3u8',
+        },
+    }]
+
+    _API_PATH = 'videoEpisodes'
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        video_info = self._download_metadata(
+            url, video_id, 'ja', ('dehydratedState', 'queries', 0, 'state', 'data'))
+        hls_info, decrypt = self._call_encrypted_api(
+            video_id, ':watch', 'stream information', data={'method': 1})
+
+        return {
+            'id': video_id,
+            'formats': self._get_formats(hls_info, ('protocolHls', 'url', {url_or_none}), video_id),
+            'hls_aes': self._extract_hls_key(hls_info, 'protocolHls', decrypt),
+            **traverse_obj(video_info, {
+                'title': ('displayName', {str}),
+                'description': ('description', {str}),
+                'timestamp': ('watchStartTime', {int_or_none}),
+                'thumbnail': ('keyVisualUrl', {url_or_none}),
+                'cast': ('casts', ..., 'displayName', {str}),
+                'duration': ('duration', {int}),
+            }),
+        }
+
+
+class StacommuLiveIE(StacommuBaseIE):
+    _VALID_URL = r'https?://www\.stacommu\.jp/live/(?P<id>[\da-zA-Z]+)'
+    _TESTS = [{
+        'url': 'https://www.stacommu.jp/live/d2FJ3zLnndegZJCAEzGM3m',
+        'info_dict': {
+            'id': 'd2FJ3zLnndegZJCAEzGM3m',
+            'ext': 'mp4',
+            'title': '仲村悠菜 2023/05/04',
+            'timestamp': 1683195647,
+            'upload_date': '20230504',
+            'thumbnail': 'https://image.stacommu.jp/pHGF57SPEHE2ke83FS92FN/pHGF57SPEHE2ke83FS92FN',
+            'duration': 5322,
+            'hls_aes': {
+                'key': 'efbb3ec0b8246f61adf1764c5a51213a',
+                'iv': '80621d19a1f19167b64cedb415b05d1c',
+            },
+        },
+        'params': {
+            'skip_download': 'm3u8',
+        },
+    }]
+
+    _API_PATH = 'events'
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        video_info = self._call_api(video_id, msg='video information', query={'al': 'ja'}, auth=False)
+        hls_info, decrypt = self._call_encrypted_api(
+            video_id, ':watchArchive', 'stream information', data={'method': 1})
+
+        return {
+            'id': video_id,
+            'formats': self._get_formats(hls_info, ('hls', 'urls', ..., {url_or_none}), video_id),
+            'hls_aes': self._extract_hls_key(hls_info, 'hls', decrypt),
+            **traverse_obj(video_info, {
+                'title': ('displayName', {str}),
+                'timestamp': ('startTime', {int_or_none}),
+                'thumbnail': ('keyVisualUrl', {url_or_none}),
+                'duration': ('duration', {int_or_none}),
+            }),
+        }
diff --git a/yt_dlp/extractor/wrestleuniverse.py b/yt_dlp/extractor/wrestleuniverse.py
index b12b0f0a9e..99a8f01200 100644
--- a/yt_dlp/extractor/wrestleuniverse.py
+++ b/yt_dlp/extractor/wrestleuniverse.py
@@ -14,12 +14,14 @@
     try_call,
     url_or_none,
     urlencode_postdata,
+    variadic,
 )
 
 
 class WrestleUniverseBaseIE(InfoExtractor):
     _NETRC_MACHINE = 'wrestleuniverse'
     _VALID_URL_TMPL = r'https?://(?:www\.)?wrestle-universe\.com/(?:(?P<lang>\w{2})/)?%s/(?P<id>\w+)'
+    _API_HOST = 'api.wrestle-universe.com'
     _API_PATH = None
     _REAL_TOKEN = None
     _TOKEN_EXPIRY = None
@@ -67,24 +69,28 @@ def _perform_login(self, username, password):
                 'returnSecureToken': True,
                 'email': username,
                 'password': password,
-            }, separators=(',', ':')).encode())
+            }, separators=(',', ':')).encode(), expected_status=400)
+        token = traverse_obj(login, ('idToken', {str}))
+        if not token:
+            raise ExtractorError(
+                f'Unable to log in: {traverse_obj(login, ("error", "message"))}', expected=True)
         self._REFRESH_TOKEN = traverse_obj(login, ('refreshToken', {str}))
         if not self._REFRESH_TOKEN:
             self.report_warning('No refresh token was granted')
-        self._TOKEN = traverse_obj(login, ('idToken', {str}))
+        self._TOKEN = token
 
     def _real_initialize(self):
-        if WrestleUniverseBaseIE._DEVICE_ID:
+        if self._DEVICE_ID:
             return
 
-        WrestleUniverseBaseIE._DEVICE_ID = self._configuration_arg('device_id', [None], ie_key='WrestleUniverse')[0]
-        if not WrestleUniverseBaseIE._DEVICE_ID:
-            WrestleUniverseBaseIE._DEVICE_ID = self.cache.load(self._NETRC_MACHINE, 'device_id')
-            if WrestleUniverseBaseIE._DEVICE_ID:
+        self._DEVICE_ID = self._configuration_arg('device_id', [None], ie_key=self._NETRC_MACHINE)[0]
+        if not self._DEVICE_ID:
+            self._DEVICE_ID = self.cache.load(self._NETRC_MACHINE, 'device_id')
+            if self._DEVICE_ID:
                 return
-            WrestleUniverseBaseIE._DEVICE_ID = str(uuid.uuid4())
+            self._DEVICE_ID = str(uuid.uuid4())
 
-        self.cache.store(self._NETRC_MACHINE, 'device_id', WrestleUniverseBaseIE._DEVICE_ID)
+        self.cache.store(self._NETRC_MACHINE, 'device_id', self._DEVICE_ID)
 
     def _refresh_token(self):
         refresh = self._download_json(
@@ -108,10 +114,10 @@ def _call_api(self, video_id, param='', msg='API', auth=True, data=None, query={
         if data:
             headers['Content-Type'] = 'application/json;charset=utf-8'
             data = json.dumps(data, separators=(',', ':')).encode()
-        if auth:
+        if auth and self._TOKEN:
             headers['Authorization'] = f'Bearer {self._TOKEN}'
         return self._download_json(
-            f'https://api.wrestle-universe.com/v1/{self._API_PATH}/{video_id}{param}', video_id,
+            f'https://{self._API_HOST}/v1/{self._API_PATH}/{video_id}{param}', video_id,
             note=f'Downloading {msg} JSON', errnote=f'Failed to download {msg} JSON',
             data=data, headers=headers, query=query, fatal=fatal)
 
@@ -137,12 +143,13 @@ def decrypt(data):
         }, query=query, fatal=fatal)
         return api_json, decrypt
 
-    def _download_metadata(self, url, video_id, lang, props_key):
+    def _download_metadata(self, url, video_id, lang, props_keys):
         metadata = self._call_api(video_id, msg='metadata', query={'al': lang or 'ja'}, auth=False, fatal=False)
         if not metadata:
             webpage = self._download_webpage(url, video_id)
             nextjs_data = self._search_nextjs_data(webpage, video_id)
-            metadata = traverse_obj(nextjs_data, ('props', 'pageProps', props_key, {dict})) or {}
+            metadata = traverse_obj(nextjs_data, (
+                'props', 'pageProps', *variadic(props_keys, (str, bytes, dict, set)), {dict})) or {}
         return metadata
 
     def _get_formats(self, data, path, video_id=None):

From 8776349ef6b1f644584a92dfa00a05208a48edc4 Mon Sep 17 00:00:00 2001
From: c-basalt <117849907+c-basalt@users.noreply.github.com>
Date: Sun, 2 Jul 2023 15:31:00 -0400
Subject: [PATCH 014/218] [extractor/vk] VKPlay, VKPlayLive: Add extractors
 (#7358)

Closes #7107
Authored by: c-basalt
---
 yt_dlp/extractor/_extractors.py |   2 +
 yt_dlp/extractor/vk.py          | 139 ++++++++++++++++++++++++++++++++
 2 files changed, 141 insertions(+)

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 76a7fef23e..6f1873383a 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -2272,6 +2272,8 @@
     VKIE,
     VKUserVideosIE,
     VKWallPostIE,
+    VKPlayIE,
+    VKPlayLiveIE,
 )
 from .vocaroo import VocarooIE
 from .vodlocker import VodlockerIE
diff --git a/yt_dlp/extractor/vk.py b/yt_dlp/extractor/vk.py
index 16ca954f25..5753690283 100644
--- a/yt_dlp/extractor/vk.py
+++ b/yt_dlp/extractor/vk.py
@@ -11,11 +11,13 @@
 from .youtube import YoutubeIE
 from ..utils import (
     ExtractorError,
+    UserNotLive,
     clean_html,
     get_element_by_class,
     get_element_html_by_id,
     int_or_none,
     join_nonempty,
+    parse_resolution,
     str_or_none,
     str_to_int,
     try_call,
@@ -25,6 +27,7 @@
     url_or_none,
     urlencode_postdata,
     urljoin,
+    traverse_obj,
 )
 
 
@@ -701,3 +704,139 @@ def _real_extract(self, url):
         return self.playlist_result(
             entries, post_id, join_nonempty(uploader, f'Wall post {post_id}', delim=' - '),
             clean_html(get_element_by_class('wall_post_text', webpage)))
+
+
+class VKPlayBaseIE(InfoExtractor):
+    _RESOLUTIONS = {
+        'tiny': '256x144',
+        'lowest': '426x240',
+        'low': '640x360',
+        'medium': '852x480',
+        'high': '1280x720',
+        'full_hd': '1920x1080',
+        'quad_hd': '2560x1440',
+    }
+
+    def _extract_from_initial_state(self, url, video_id, path):
+        webpage = self._download_webpage(url, video_id)
+        video_info = traverse_obj(self._search_json(
+            r'<script[^>]+\bid="initial-state"[^>]*>', webpage, 'initial state', video_id),
+            path, expected_type=dict)
+        if not video_info:
+            raise ExtractorError('Unable to extract video info from html inline initial state')
+        return video_info
+
+    def _extract_formats(self, stream_info, video_id):
+        formats = []
+        for stream in traverse_obj(stream_info, (
+                'data', 0, 'playerUrls', lambda _, v: url_or_none(v['url']) and v['type'])):
+            url = stream['url']
+            format_id = str_or_none(stream['type'])
+            if format_id in ('hls', 'live_hls', 'live_playback_hls') or '.m3u8' in url:
+                formats.extend(self._extract_m3u8_formats(url, video_id, m3u8_id=format_id, fatal=False))
+            elif format_id == 'dash':
+                formats.extend(self._extract_mpd_formats(url, video_id, mpd_id=format_id, fatal=False))
+            elif format_id in ('live_dash', 'live_playback_dash'):
+                self.write_debug(f'Not extracting unsupported format "{format_id}"')
+            else:
+                formats.append({
+                    'url': url,
+                    'ext': 'mp4',
+                    'format_id': format_id,
+                    **parse_resolution(self._RESOLUTIONS.get(format_id)),
+                })
+        return formats
+
+    def _extract_common_meta(self, stream_info):
+        return traverse_obj(stream_info, {
+            'id': ('id', {str_or_none}),
+            'title': ('title', {str}),
+            'release_timestamp': ('startTime', {int_or_none}),
+            'thumbnail': ('previewUrl', {url_or_none}),
+            'view_count': ('count', 'views', {int_or_none}),
+            'like_count': ('count', 'likes', {int_or_none}),
+            'categories': ('category', 'title', {str}, {lambda x: [x] if x else None}),
+            'uploader': (('user', ('blog', 'owner')), 'nick', {str}),
+            'uploader_id': (('user', ('blog', 'owner')), 'id', {str_or_none}),
+            'duration': ('duration', {int_or_none}),
+            'is_live': ('isOnline', {bool}),
+            'concurrent_view_count': ('count', 'viewers', {int_or_none}),
+        }, get_all=False)
+
+
+class VKPlayIE(VKPlayBaseIE):
+    _VALID_URL = r'https?://vkplay\.live/(?P<username>[^/]+)/record/(?P<id>[a-f0-9\-]+)'
+    _TESTS = [{
+        'url': 'https://vkplay.live/zitsmann/record/f5e6e3b5-dc52-4d14-965d-0680dd2882da',
+        'info_dict': {
+            'id': 'f5e6e3b5-dc52-4d14-965d-0680dd2882da',
+            'ext': 'mp4',
+            'title': 'Atomic Heart (пробуем!) спасибо подписчику EKZO!',
+            'uploader': 'ZitsmanN',
+            'uploader_id': '13159830',
+            'release_timestamp': 1683461378,
+            'release_date': '20230507',
+            'thumbnail': r're:https://images.vkplay.live/public_video_stream/record/f5e6e3b5-dc52-4d14-965d-0680dd2882da/preview\?change_time=\d+',
+            'duration': 10608,
+            'view_count': int,
+            'like_count': int,
+            'categories': ['Atomic Heart'],
+        },
+        'params': {'skip_download': 'm3u8'},
+    }]
+
+    def _real_extract(self, url):
+        username, video_id = self._match_valid_url(url).groups()
+
+        record_info = traverse_obj(self._download_json(
+            f'https://api.vkplay.live/v1/blog/{username}/public_video_stream/record/{video_id}', video_id, fatal=False),
+            ('data', 'record', {dict}))
+        if not record_info:
+            record_info = self._extract_from_initial_state(url, video_id, ('record', 'currentRecord', 'data'))
+
+        return {
+            **self._extract_common_meta(record_info),
+            'id': video_id,
+            'formats': self._extract_formats(record_info, video_id),
+        }
+
+
+class VKPlayLiveIE(VKPlayBaseIE):
+    _VALID_URL = r'https?://vkplay\.live/(?P<id>[^/]+)/?(?:[#?]|$)'
+    _TESTS = [{
+        'url': 'https://vkplay.live/bayda',
+        'info_dict': {
+            'id': 'f02c321e-427b-408d-b12f-ae34e53e0ea2',
+            'ext': 'mp4',
+            'title': r're:эскапизм крута .*',
+            'uploader': 'Bayda',
+            'uploader_id': 12279401,
+            'release_timestamp': 1687209962,
+            'release_date': '20230619',
+            'thumbnail': r're:https://images.vkplay.live/public_video_stream/12279401/preview\?change_time=\d+',
+            'view_count': int,
+            'concurrent_view_count': int,
+            'like_count': int,
+            'categories': ['EVE Online'],
+            'live_status': 'is_live',
+        },
+        'skip': 'livestream',
+        'params': {'skip_download': True},
+    }]
+
+    def _real_extract(self, url):
+        username = self._match_id(url)
+
+        stream_info = self._download_json(
+            f'https://api.vkplay.live/v1/blog/{username}/public_video_stream', username, fatal=False)
+        if not stream_info:
+            stream_info = self._extract_from_initial_state(url, username, ('stream', 'stream', 'data', 'stream'))
+
+        formats = self._extract_formats(stream_info, username)
+        if not formats and not traverse_obj(stream_info, ('isOnline', {bool})):
+            raise UserNotLive(video_id=username)
+
+        return {
+            **self._extract_common_meta(stream_info),
+            'formats': formats,
+        }

From 4dc4d8473c085900edc841c87c20041233d25b1f Mon Sep 17 00:00:00 2001
From: coletdjnz <coletdjnz@protonmail.com>
Date: Mon, 3 Jul 2023 10:47:10 +0000
Subject: [PATCH 015/218] [extractor/youtube] Ignore incomplete data for
 comment threads by default (#7475)

For both `--ignore-errors` and `--ignore-errors only_download`. Pass `--no-ignore-errors` to not ignore.

Closes https://github.com/yt-dlp/yt-dlp/issues/7474

Authored by: coletdjnz
---
 yt_dlp/extractor/youtube.py | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index 967914c0f7..2c64f8e845 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -3426,7 +3426,9 @@ def extract_thread(contents):
                         # Pinned comments may appear a second time in newest first sort
                         # See: https://github.com/yt-dlp/yt-dlp/issues/6712
                         continue
-                    self.report_warning('Detected YouTube comments looping. Stopping comment extraction as we probably cannot get any more.')
+                    self.report_warning(
+                        'Detected YouTube comments looping. Stopping comment extraction '
+                        f'{"for this thread" if parent else ""} as we probably cannot get any more.')
                     yield
                 else:
                     tracker['seen_comment_ids'].add(comment['id'])
@@ -3517,12 +3519,18 @@ def extract_thread(contents):
                 # Ignore incomplete data error for replies if retries didn't work.
                 # This is to allow any other parent comments and comment threads to be downloaded.
                 # See: https://github.com/yt-dlp/yt-dlp/issues/4669
-                if 'incomplete data' in str(e).lower() and parent and self.get_param('ignoreerrors') is True:
-                    self.report_warning(
-                        'Received incomplete data for a comment reply thread and retrying did not help. '
-                        'Ignoring to let other comments be downloaded.')
-                else:
-                    raise
+                if 'incomplete data' in str(e).lower() and parent:
+                    if self.get_param('ignoreerrors') in (True, 'only_download'):
+                        self.report_warning(
+                            'Received incomplete data for a comment reply thread and retrying did not help. '
+                            'Ignoring to let other comments be downloaded. Pass --no-ignore-errors to not ignore.')
+                        return
+                    else:
+                        raise ExtractorError(
+                            'Incomplete data received for comment reply thread. '
+                            'Pass --ignore-errors to ignore and allow rest of comments to download.',
+                            expected=True)
+                raise
             is_forced_continuation = False
             continuation = None
             for continuation_items in traverse_obj(response, continuation_items_path, expected_type=list, default=[]):

From 3b7f5300c577fef40464d46d4e4037a69d51fe82 Mon Sep 17 00:00:00 2001
From: RfadnjdExt <40250666+RfadnjdExt@users.noreply.github.com>
Date: Wed, 5 Jul 2023 09:17:13 +0700
Subject: [PATCH 016/218] [extractor/googledrive] Fix source format extraction
 (#7395)

Closes #7344
Authored by: RfadnjdExt
---
 yt_dlp/extractor/googledrive.py | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/yt_dlp/extractor/googledrive.py b/yt_dlp/extractor/googledrive.py
index 9e2ccde005..8a4cd1690e 100644
--- a/yt_dlp/extractor/googledrive.py
+++ b/yt_dlp/extractor/googledrive.py
@@ -5,7 +5,9 @@
 from ..utils import (
     ExtractorError,
     determine_ext,
+    extract_attributes,
     get_element_by_class,
+    get_element_html_by_id,
     int_or_none,
     lowercase_escape,
     try_get,
@@ -34,6 +36,7 @@ class GoogleDriveIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Big Buck Bunny.mp4',
             'duration': 45,
+            'thumbnail': 'https://drive.google.com/thumbnail?id=0ByeS4oOUV-49Zzh4R1J6R09zazQ',
         }
     }, {
         # video can't be watched anonymously due to view count limit reached,
@@ -207,10 +210,10 @@ def get_value(key):
                 'export': 'download',
             })
 
-        def request_source_file(source_url, kind):
+        def request_source_file(source_url, kind, data=None):
             return self._request_webpage(
                 source_url, video_id, note='Requesting %s file' % kind,
-                errnote='Unable to request %s file' % kind, fatal=False)
+                errnote='Unable to request %s file' % kind, fatal=False, data=data)
         urlh = request_source_file(source_url, 'source')
         if urlh:
             def add_source_format(urlh):
@@ -237,14 +240,10 @@ def add_source_format(urlh):
                     urlh, url, video_id, note='Downloading confirmation page',
                     errnote='Unable to confirm download', fatal=False)
                 if confirmation_webpage:
-                    confirm = self._search_regex(
-                        r'confirm=([^&"\']+)', confirmation_webpage,
-                        'confirmation code', default=None)
-                    if confirm:
-                        confirmed_source_url = update_url_query(source_url, {
-                            'confirm': confirm,
-                        })
-                        urlh = request_source_file(confirmed_source_url, 'confirmed source')
+                    confirmed_source_url = extract_attributes(
+                        get_element_html_by_id('download-form', confirmation_webpage) or '').get('action')
+                    if confirmed_source_url:
+                        urlh = request_source_file(confirmed_source_url, 'confirmed source', data=b'')
                         if urlh and urlh.headers.get('Content-Disposition'):
                             add_source_format(urlh)
                     else:

From 1cffd621cb371f1563563cfb2fe37d137e8a7bee Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Tue, 4 Jul 2023 22:05:52 -0500
Subject: [PATCH 017/218] [extractor/twitter:spaces] Fix extraction (#7512)

Closes #7455
Authored by: bashonly
---
 yt_dlp/extractor/twitter.py | 38 +++++++++++++++++++++++--------------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py
index f854d9c4a4..1fb9524da6 100644
--- a/yt_dlp/extractor/twitter.py
+++ b/yt_dlp/extractor/twitter.py
@@ -889,8 +889,10 @@ class TwitterIE(TwitterBaseIE):
             'uploader_id': 'MoniqueCamarra',
             'live_status': 'was_live',
             'release_timestamp': 1658417414,
-            'description': 'md5:acce559345fd49f129c20dbcda3f1201',
-            'timestamp': 1658407771464,
+            'description': 'md5:4dc8e972f1d8b3c6580376fabb02a3ad',
+            'timestamp': 1658407771,
+            'release_date': '20220721',
+            'upload_date': '20220721',
         },
         'add_ie': ['TwitterSpaces'],
         'params': {'skip_download': 'm3u8'},
@@ -1436,7 +1438,10 @@ class TwitterSpacesIE(TwitterBaseIE):
             'uploader': r're:Lucio Di Gaetano.*?',
             'uploader_id': 'luciodigaetano',
             'live_status': 'was_live',
-            'timestamp': 1659877956397,
+            'timestamp': 1659877956,
+            'upload_date': '20220807',
+            'release_timestamp': 1659904215,
+            'release_date': '20220807',
         },
         'params': {'skip_download': 'm3u8'},
     }]
@@ -1482,26 +1487,31 @@ def _real_extract(self, url):
 
         metadata = space_data['metadata']
         live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
+        is_live = live_status == 'is_live'
 
         formats = []
         if live_status == 'is_upcoming':
             self.raise_no_formats('Twitter Space not started yet', expected=True)
-        elif live_status == 'post_live':
-            self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
-        else:
-            source = self._call_api(
-                f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key'])['source']
-
-            # XXX: Native downloader does not work
+        elif not is_live and not metadata.get('is_space_available_for_replay'):
+            self.raise_no_formats('Twitter Space ended and replay is disabled', expected=True)
+        elif metadata.get('media_key'):
+            source = traverse_obj(
+                self._call_api(f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key']),
+                ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False)
             formats = self._extract_m3u8_formats(
-                traverse_obj(source, 'noRedirectPlaybackUrl', 'location'),
-                metadata['media_key'], 'm4a', 'm3u8', live=live_status == 'is_live',
-                headers={'Referer': 'https://twitter.com/'})
+                source, metadata['media_key'], 'm4a', live=is_live, fatal=False,
+                headers={'Referer': 'https://twitter.com/'}) if source else []
             for fmt in formats:
                 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
+                if not is_live:
+                    fmt['container'] = 'm4a_dash'
 
         participants = ', '.join(traverse_obj(
             space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
+
+        if not formats and live_status == 'post_live':
+            self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
+
         return {
             'id': space_id,
             'title': metadata.get('title'),
@@ -1513,7 +1523,7 @@ def _real_extract(self, url):
             'live_status': live_status,
             'release_timestamp': try_call(
                 lambda: int_or_none(metadata['scheduled_start'], scale=1000)),
-            'timestamp': metadata.get('created_at'),
+            'timestamp': int_or_none(metadata.get('created_at'), scale=1000),
             'formats': formats,
         }
 

From 49296437a8e5fa91dacb5446e51ab588474c85d3 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Wed, 5 Jul 2023 11:27:36 -0500
Subject: [PATCH 018/218] [extractor/twitter] Fix unauthenticated extraction
 (#7476)

Closes #7473
Authored by: bashonly
---
 README.md                   |   3 -
 yt_dlp/extractor/twitter.py | 180 ++++++++++++++----------------------
 2 files changed, 70 insertions(+), 113 deletions(-)

diff --git a/README.md b/README.md
index 066ff90528..4fb3e450d8 100644
--- a/README.md
+++ b/README.md
@@ -1852,9 +1852,6 @@ #### tiktok
 #### rokfinchannel
 * `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks`
 
-#### twitter
-* `legacy_api`: Force usage of the legacy Twitter API instead of the GraphQL API for tweet extraction. Has no effect if login cookies are passed
-
 #### stacommu, wrestleuniverse
 * `device_id`: UUID value assigned by the website and used to enforce device limits for paid livestream content. Can be found in browser local storage
 
diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py
index 1fb9524da6..eaf9be5268 100644
--- a/yt_dlp/extractor/twitter.py
+++ b/yt_dlp/extractor/twitter.py
@@ -1,5 +1,6 @@
 import json
 import re
+import urllib.error
 
 from .common import InfoExtractor
 from .periscope import PeriscopeBaseIE, PeriscopeIE
@@ -34,7 +35,6 @@ class TwitterBaseIE(InfoExtractor):
     _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
     _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
     _AUTH = {'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'}
-    _guest_token = None
     _flow_token = None
 
     _LOGIN_INIT_DATA = json.dumps({
@@ -145,14 +145,6 @@ def _search_dimensions_in_video_url(a_format, video_url):
     def is_logged_in(self):
         return bool(self._get_cookies(self._API_BASE).get('auth_token'))
 
-    def _fetch_guest_token(self, headers, display_id):
-        headers.pop('x-guest-token', None)
-        self._guest_token = traverse_obj(self._download_json(
-            f'{self._API_BASE}guest/activate.json', display_id,
-            'Downloading guest token', data=b'', headers=headers), 'guest_token')
-        if not self._guest_token:
-            raise ExtractorError('Could not retrieve guest token')
-
     def _set_base_headers(self):
         headers = self._AUTH.copy()
         csrf_token = try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value)
@@ -183,12 +175,15 @@ def _perform_login(self, username, password):
         if self.is_logged_in:
             return
 
-        self._request_webpage('https://twitter.com/', None, 'Requesting cookies')
+        webpage = self._download_webpage('https://twitter.com/', None, 'Downloading login page')
         headers = self._set_base_headers()
-        self._fetch_guest_token(headers, None)
+        guest_token = self._search_regex(
+            r'\.cookie\s*=\s*["\']gt=(\d+);', webpage, 'gt', default=None) or self._download_json(
+            f'{self._API_BASE}guest/activate.json', None, 'Downloading guest token',
+            data=b'', headers=headers)['guest_token']
         headers.update({
             'content-type': 'application/json',
-            'x-guest-token': self._guest_token,
+            'x-guest-token': guest_token,
             'x-twitter-client-language': 'en',
             'x-twitter-active-user': 'yes',
             'Referer': 'https://twitter.com/',
@@ -285,37 +280,24 @@ def input_dict(subtask_id, text):
         self.report_login()
 
     def _call_api(self, path, video_id, query={}, graphql=False):
-        headers = self._set_base_headers()
-        if self.is_logged_in:
-            headers.update({
+        if not self.is_logged_in:
+            self.raise_login_required()
+
+        result = self._download_json(
+            (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path, video_id,
+            f'Downloading {"GraphQL" if graphql else "legacy API"} JSON', headers={
+                **self._set_base_headers(),
                 'x-twitter-auth-type': 'OAuth2Session',
                 'x-twitter-client-language': 'en',
                 'x-twitter-active-user': 'yes',
-            })
+            }, query=query, expected_status={400, 401, 403, 404} if graphql else {403})
 
-        for first_attempt in (True, False):
-            if not self.is_logged_in:
-                if not self._guest_token:
-                    self._fetch_guest_token(headers, video_id)
-                headers['x-guest-token'] = self._guest_token
+        if result.get('errors'):
+            errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
+            raise ExtractorError(
+                f'Error(s) while querying API: {errors or "Unknown error"}', expected=True)
 
-            allowed_status = {400, 401, 403, 404} if graphql else {403}
-            result = self._download_json(
-                (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
-                video_id, headers=headers, query=query, expected_status=allowed_status,
-                note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
-
-            if result.get('errors'):
-                errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
-                if not self.is_logged_in and first_attempt and 'bad guest token' in errors.lower():
-                    self.to_screen('Guest token has expired. Refreshing guest token')
-                    self._guest_token = None
-                    continue
-
-                raise ExtractorError(
-                    f'Error(s) while querying API: {errors or "Unknown error"}', expected=True)
-
-            return result
+        return result
 
     def _build_graphql_query(self, media_id):
         raise NotImplementedError('Method must be implemented to support GraphQL')
@@ -457,6 +439,7 @@ class TwitterIE(TwitterBaseIE):
     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
 
     _TESTS = [{
+        # comment_count, repost_count, view_count are only available with auth (applies to all tests)
         'url': 'https://twitter.com/freethenipple/status/643211948184596480',
         'info_dict': {
             'id': '643211870443208704',
@@ -471,10 +454,7 @@ class TwitterIE(TwitterBaseIE):
             'timestamp': 1442188653,
             'upload_date': '20150913',
             'uploader_url': 'https://twitter.com/freethenipple',
-            'comment_count': int,
-            'repost_count': int,
             'like_count': int,
-            'view_count': int,
             'tags': [],
             'age_limit': 18,
         },
@@ -505,8 +485,6 @@ class TwitterIE(TwitterBaseIE):
             'timestamp': 1447395772,
             'upload_date': '20151113',
             'uploader_url': 'https://twitter.com/starwars',
-            'comment_count': int,
-            'repost_count': int,
             'like_count': int,
             'tags': ['TV', 'StarWars', 'TheForceAwakens'],
             'age_limit': 0,
@@ -550,10 +528,7 @@ class TwitterIE(TwitterBaseIE):
             'timestamp': 1455777459,
             'upload_date': '20160218',
             'uploader_url': 'https://twitter.com/jaydingeer',
-            'comment_count': int,
-            'repost_count': int,
             'like_count': int,
-            'view_count': int,
             'tags': ['Damndaniel'],
             'age_limit': 0,
         },
@@ -591,10 +566,7 @@ class TwitterIE(TwitterBaseIE):
             'upload_date': '20160412',
             'uploader_url': 'https://twitter.com/CaptainAmerica',
             'thumbnail': r're:^https?://.*\.jpg',
-            'comment_count': int,
-            'repost_count': int,
             'like_count': int,
-            'view_count': int,
             'tags': [],
             'age_limit': 0,
         },
@@ -641,10 +613,7 @@ class TwitterIE(TwitterBaseIE):
             'timestamp': 1505803395,
             'upload_date': '20170919',
             'uploader_url': 'https://twitter.com/Prefet971',
-            'comment_count': int,
-            'repost_count': int,
             'like_count': int,
-            'view_count': int,
             'tags': ['Maria'],
             'age_limit': 0,
         },
@@ -667,10 +636,7 @@ class TwitterIE(TwitterBaseIE):
             'timestamp': 1527623489,
             'upload_date': '20180529',
             'uploader_url': 'https://twitter.com/LisPower1',
-            'comment_count': int,
-            'repost_count': int,
             'like_count': int,
-            'view_count': int,
             'tags': [],
             'age_limit': 0,
         },
@@ -692,10 +658,7 @@ class TwitterIE(TwitterBaseIE):
             'timestamp': 1548184644,
             'upload_date': '20190122',
             'uploader_url': 'https://twitter.com/Twitter',
-            'comment_count': int,
-            'repost_count': int,
             'like_count': int,
-            'view_count': int,
             'tags': [],
             'age_limit': 0,
         },
@@ -713,6 +676,7 @@ class TwitterIE(TwitterBaseIE):
             'view_count': int,
         },
         'add_ie': ['TwitterBroadcast'],
+        'skip': 'Requires authentication',
     }, {
         # unified card
         'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
@@ -729,8 +693,6 @@ class TwitterIE(TwitterBaseIE):
             'timestamp': 1610651040,
             'upload_date': '20210114',
             'uploader_url': 'https://twitter.com/BrooklynNets',
-            'comment_count': int,
-            'repost_count': int,
             'like_count': int,
             'tags': [],
             'age_limit': 0,
@@ -753,10 +715,7 @@ class TwitterIE(TwitterBaseIE):
             'thumbnail': r're:^https?://.*\.jpg',
             'duration': 30.03,
             'timestamp': 1665025050,
-            'comment_count': int,
-            'repost_count': int,
             'like_count': int,
-            'view_count': int,
             'tags': [],
             'age_limit': 0,
         },
@@ -765,15 +724,13 @@ class TwitterIE(TwitterBaseIE):
         'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
         'info_dict': {
             'id': '1577719286659006464',
-            'title': 'Ultima | #\u0432\u029f\u043c - Test',
+            'title': 'Ultima📛 | #вʟм - Test',
             'description': 'Test https://t.co/Y3KEZD7Dad',
-            'uploader': 'Ultima | #\u0432\u029f\u043c',
+            'uploader': 'Ultima📛 | #вʟм',
             'uploader_id': 'UltimaShadowX',
             'uploader_url': 'https://twitter.com/UltimaShadowX',
             'upload_date': '20221005',
             'timestamp': 1664992565,
-            'comment_count': int,
-            'repost_count': int,
             'like_count': int,
             'tags': [],
             'age_limit': 0,
@@ -795,10 +752,7 @@ class TwitterIE(TwitterBaseIE):
             'duration': 21.321,
             'timestamp': 1664477766,
             'upload_date': '20220929',
-            'comment_count': int,
-            'repost_count': int,
             'like_count': int,
-            'view_count': int,
             'tags': ['HurricaneIan'],
             'age_limit': 0,
         },
@@ -825,6 +779,20 @@ class TwitterIE(TwitterBaseIE):
         },
         'skip': 'Requires authentication',
     }, {
+        # Single Vimeo video result without auth
+        'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
+        'info_dict': {
+            'id': '551578322',
+            'ext': 'mp4',
+            'title': 'Dusty & The Mayor',
+            'uploader': 'Michael Chau',
+            'uploader_id': 'user29061007',
+            'uploader_url': 'https://vimeo.com/user29061007',
+            'duration': 478,
+            'thumbnail': 'https://i.vimeocdn.com/video/1139658575-0dfdce6e9a2401fe09feb24bf0d14e6f24a53c12f447ff688ace61009ad4c1ba-d_1280',
+        },
+    }, {
+        # Playlist result only with auth
         'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
         'playlist_mincount': 2,
         'info_dict': {
@@ -842,6 +810,7 @@ class TwitterIE(TwitterBaseIE):
             'uploader_url': 'https://twitter.com/Srirachachau',
             'timestamp': 1621447860,
         },
+        'skip': 'Requires authentication',
     }, {
         'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
         'playlist_mincount': 2,
@@ -860,6 +829,7 @@ class TwitterIE(TwitterBaseIE):
             'upload_date': '20221007',
             'age_limit': 0,
         },
+        'skip': 'Requires authentication',
     }, {
         'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
         'playlist_count': 2,
@@ -873,8 +843,6 @@ class TwitterIE(TwitterBaseIE):
             'upload_date': '20221007',
             'age_limit': 0,
             'uploader_url': 'https://twitter.com/primevideouk',
-            'comment_count': int,
-            'repost_count': int,
             'like_count': int,
             'tags': ['TheRingsOfPower'],
         },
@@ -896,6 +864,7 @@ class TwitterIE(TwitterBaseIE):
         },
         'add_ie': ['TwitterSpaces'],
         'params': {'skip_download': 'm3u8'},
+        'skip': 'Requires authentication',
     }, {
         # URL specifies video number but --yes-playlist
         'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
@@ -905,9 +874,7 @@ class TwitterIE(TwitterBaseIE):
             'title': 'md5:be05989b0722e114103ed3851a0ffae2',
             'timestamp': 1670459604.0,
             'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
-            'comment_count': int,
             'uploader_id': 'CTVJLaidlaw',
-            'repost_count': int,
             'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
             'upload_date': '20221208',
             'age_limit': 0,
@@ -926,14 +893,11 @@ class TwitterIE(TwitterBaseIE):
             'timestamp': 1670459604.0,
             'uploader_id': 'CTVJLaidlaw',
             'uploader': 'Jocelyn Laidlaw',
-            'repost_count': int,
-            'comment_count': int,
             'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
             'duration': 102.226,
             'uploader_url': 'https://twitter.com/CTVJLaidlaw',
             'display_id': '1600649710662213632',
             'like_count': int,
-            'view_count': int,
             'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
             'upload_date': '20221208',
             'age_limit': 0,
@@ -959,9 +923,6 @@ class TwitterIE(TwitterBaseIE):
             'age_limit': 18,
             'tags': [],
             'like_count': int,
-            'repost_count': int,
-            'comment_count': int,
-            'view_count': int,
         },
     }, {
         'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
@@ -974,10 +935,7 @@ class TwitterIE(TwitterBaseIE):
             'like_count': int,
             'uploader_id': 'hlo_again',
             'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
-            'repost_count': int,
             'duration': 9.531,
-            'comment_count': int,
-            'view_count': int,
             'upload_date': '20221203',
             'age_limit': 0,
             'timestamp': 1670092210.0,
@@ -994,14 +952,11 @@ class TwitterIE(TwitterBaseIE):
             'ext': 'mp4',
             'uploader_url': 'https://twitter.com/MunTheShinobi',
             'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
-            'view_count': int,
             'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
             'age_limit': 0,
             'uploader': 'Mün The Shinobi',
-            'repost_count': int,
             'upload_date': '20221206',
             'title': 'Mün The Shinobi - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
-            'comment_count': int,
             'like_count': int,
             'tags': [],
             'uploader_id': 'MunTheShinobi',
@@ -1009,14 +964,14 @@ class TwitterIE(TwitterBaseIE):
             'timestamp': 1670306984.0,
         },
     }, {
-        # url to retweet id, legacy API
+        # url to retweet id
         'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
         'info_dict': {
             'id': '1623274794488659969',
             'display_id': '1623739803874349067',
             'ext': 'mp4',
             'title': 'Johnny Bullets - Me after going viral to over 30million people:    Whoopsie-daisy',
-            'description': 'md5:e873616a4a8fe0f93e71872678a672f3',
+            'description': 'md5:224d62f54b0cdef8e33d4c56c41ac503',
             'uploader': 'Johnny Bullets',
             'uploader_id': 'Johnnybull3ts',
             'uploader_url': 'https://twitter.com/Johnnybull3ts',
@@ -1027,10 +982,7 @@ class TwitterIE(TwitterBaseIE):
             'upload_date': '20230208',
             'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
             'like_count': int,
-            'repost_count': int,
-            'comment_count': int,
         },
-        'params': {'extractor_args': {'twitter': {'legacy_api': ['']}}},
     }, {
         # onion route
         'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
@@ -1081,8 +1033,6 @@ def _graphql_to_legacy(self, data, twid):
 
         if 'tombstone' in result:
             cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
-            if cause and 'adult content' in cause:
-                self.raise_login_required(cause)
             raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
 
         status = result.get('legacy', {})
@@ -1138,19 +1088,22 @@ def _build_graphql_query(self, media_id):
 
     def _real_extract(self, url):
         twid, selected_index = self._match_valid_url(url).group('id', 'index')
-        if self._configuration_arg('legacy_api') and not self.is_logged_in:
-            status = traverse_obj(self._call_api(f'statuses/show/{twid}.json', twid, {
-                'cards_platform': 'Web-12',
-                'include_cards': 1,
-                'include_reply_count': 1,
-                'include_user_entities': 0,
-                'tweet_mode': 'extended',
-            }), 'retweeted_status', None)
+        if not self.is_logged_in:
+            try:
+                status = self._download_json(
+                    'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
+                    headers={'User-Agent': 'Googlebot'}, query={'id': twid})
+                self.to_screen(f'Some metadata is missing without authentication. {self._login_hint()}')
+            except ExtractorError as e:
+                if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 404:
+                    self.raise_login_required('Requested tweet may only be available when logged in')
+                raise
         else:
-            result = self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid)
-            status = self._graphql_to_legacy(result, twid)
+            status = self._graphql_to_legacy(
+                self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid), twid)
 
-        title = description = status['full_text'].replace('\n', ' ')
+        title = description = traverse_obj(
+            status, (('full_text', 'text'), {lambda x: x.replace('\n', ' ')}), get_all=False) or ''
         # strip  'https -_t.co_BJYgOjSeGA' junk from filenames
         title = re.sub(r'\s+(https?://[^ ]+)', '', title)
         user = status.get('user') or {}
@@ -1176,12 +1129,16 @@ def _real_extract(self, url):
 
         def extract_from_video_info(media):
             media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
+            if not media_id:
+                # workaround for non-authenticated responses
+                media_id = traverse_obj(media, (
+                    'video_info', 'variants', ..., 'url',
+                    {lambda x: re.search(r'_video/(\d+)/', x)[1]}), get_all=False)
             self.write_debug(f'Extracting from video info: {media_id}')
-            video_info = media.get('video_info') or {}
 
             formats = []
             subtitles = {}
-            for variant in video_info.get('variants', []):
+            for variant in traverse_obj(media, ('video_info', 'variants', ...)):
                 fmts, subs = self._extract_variant_formats(variant, twid)
                 subtitles = self._merge_subtitles(subtitles, subs)
                 formats.extend(fmts)
@@ -1201,12 +1158,12 @@ def add_thumbnail(name, size):
                 add_thumbnail('orig', media.get('original_info') or {})
 
             return {
-                'id': media_id,
+                'id': media_id or twid,
                 'formats': formats,
                 'subtitles': subtitles,
                 'thumbnails': thumbnails,
                 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})),
-                'duration': float_or_none(video_info.get('duration_millis'), 1000),
+                'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
                 # The codec of http formats are unknown
                 '_format_sort_fields': ('res', 'br', 'size', 'proto'),
             }
@@ -1286,12 +1243,15 @@ def get_binding_value(k):
                 }
 
         videos = traverse_obj(status, (
-            (None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo', {dict}))
+            ('mediaDetails', ((None, 'quoted_status'), 'extended_entities', 'media')),
+            lambda _, m: m['type'] != 'photo', {dict}))
 
         if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
             selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
         else:
-            desired_obj = traverse_obj(status, ('extended_entities', 'media', int(selected_index) - 1, {dict}))
+            desired_obj = traverse_obj(status, (
+                ('mediaDetails', ((None, 'quoted_status'), 'extended_entities', 'media')),
+                int(selected_index) - 1, {dict}), get_all=False)
             if not desired_obj:
                 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
             elif desired_obj.get('type') != 'video':

From 90db9a3c00ca80492c6a58c542e4cbf4c2710866 Mon Sep 17 00:00:00 2001
From: coletdjnz <coletdjnz@protonmail.com>
Date: Fri, 7 Jul 2023 01:32:41 +1200
Subject: [PATCH 019/218] [extractor/youtube:stories] Remove (#7459)

YouTube killed them

https://web.archive.org/web/20230630153050/https://support.google.com/youtube/thread/217640760
---
 yt_dlp/extractor/_extractors.py |  1 -
 yt_dlp/extractor/youtube.py     | 43 +--------------------------------
 2 files changed, 1 insertion(+), 43 deletions(-)

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 6f1873383a..c0a330dbe5 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -15,7 +15,6 @@
     YoutubeSearchURLIE,
     YoutubeMusicSearchURLIE,
     YoutubeSubscriptionsIE,
-    YoutubeStoriesIE,
     YoutubeTruncatedIDIE,
     YoutubeTruncatedURLIE,
     YoutubeYtBeIE,
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index 2c64f8e845..552ca099c4 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -2499,29 +2499,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'uploader_id': '@abaointokyo',
             },
             'params': {'skip_download': True}
-        }, {
-            # Story. Requires specific player params to work.
-            'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',
-            'info_dict': {
-                'id': 'vv8qTUWmulI',
-                'ext': 'mp4',
-                'availability': 'unlisted',
-                'view_count': int,
-                'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',
-                'upload_date': '20220526',
-                'categories': ['Education'],
-                'title': 'Story',
-                'channel': 'IT\'S HISTORY',
-                'description': '',
-                'duration': 12,
-                'playable_in_embed': True,
-                'age_limit': 0,
-                'live_status': 'not_live',
-                'tags': [],
-                'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',
-                'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',
-            },
-            'skip': 'stories get removed after some period of time',
         }, {
             'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
             'info_dict': {
@@ -3620,7 +3597,7 @@ def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg,
         yt_query = {
             'videoId': video_id,
         }
-        if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':
+        if _split_innertube_client(client)[0] == 'android':
             yt_query['params'] = self._PLAYER_PARAMS
 
         yt_query.update(self._generate_player_context(sts))
@@ -4033,8 +4010,6 @@ def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
         webpage = None
         if 'webpage' not in self._configuration_arg('player_skip'):
             query = {'bpctr': '9999999999', 'has_verified': '1'}
-            if smuggled_data.get('is_story'):  # XXX: Deprecated
-                query['pp'] = self._PLAYER_PARAMS
             webpage = self._download_webpage(
                 webpage_url, video_id, fatal=False, query=query)
 
@@ -7145,22 +7120,6 @@ class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
     }]
 
 
-class YoutubeStoriesIE(InfoExtractor):
-    IE_DESC = 'YouTube channel stories; "ytstories:" prefix'
-    IE_NAME = 'youtube:stories'
-    _VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'
-    _TESTS = [{
-        'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',
-        'only_matching': True,
-    }]
-
-    def _real_extract(self, url):
-        playlist_id = f'RLTD{self._match_id(url)}'
-        return self.url_result(
-            smuggle_url(f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', {'is_story': True}),
-            ie=YoutubeTabIE, video_id=playlist_id)
-
-
 class YoutubeShortsAudioPivotIE(InfoExtractor):
     IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'
     IE_NAME = 'youtube:shorts:pivot:audio'

From 6355b5f1e1e8e7f4ef866d71d51e03baf0e82f17 Mon Sep 17 00:00:00 2001
From: Jorge <46056498+jorgectf@users.noreply.github.com>
Date: Thu, 6 Jul 2023 16:51:46 +0200
Subject: [PATCH 020/218] [misc] Add CodeQL workflow (#7497)

---
 .github/workflows/codeql.yml | 65 ++++++++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)
 create mode 100644 .github/workflows/codeql.yml

diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
new file mode 100644
index 0000000000..2821d90d06
--- /dev/null
+++ b/.github/workflows/codeql.yml
@@ -0,0 +1,65 @@
+name: "CodeQL"
+
+on:
+  push:
+    branches: [ 'master', 'gh-pages', 'release' ]
+  pull_request:
+    # The branches below must be a subset of the branches above
+    branches: [ 'master' ]
+  schedule:
+    - cron: '59 11 * * 5'
+
+jobs:
+  analyze:
+    name: Analyze
+    runs-on: ubuntu-latest
+    permissions:
+      actions: read
+      contents: read
+      security-events: write
+
+    strategy:
+      fail-fast: false
+      matrix:
+        language: [ 'python' ]
+        # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
+        # Use only 'java' to analyze code written in Java, Kotlin or both
+        # Use only 'javascript' to analyze code written in JavaScript, TypeScript or both
+        # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v3
+
+    # Initializes the CodeQL tools for scanning.
+    - name: Initialize CodeQL
+      uses: github/codeql-action/init@v2
+      with:
+        languages: ${{ matrix.language }}
+        # If you wish to specify custom queries, you can do so here or in a config file.
+        # By default, queries listed here will override any specified in a config file.
+        # Prefix the list here with "+" to use these queries and those in the config file.
+
+        # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
+        # queries: security-extended,security-and-quality
+
+
+    # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift).
+    # If this step fails, then you should remove it and run the build manually (see below)
+    - name: Autobuild
+      uses: github/codeql-action/autobuild@v2
+
+    # ℹ️ Command-line programs to run using the OS shell.
+    # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
+
+    #   If the Autobuild fails above, remove it and uncomment the following three lines.
+    #   modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
+
+    # - run: |
+    #     echo "Run, Build Application using script"
+    #     ./location_of_script_within_repo/buildscript.sh
+
+    - name: Perform CodeQL Analysis
+      uses: github/codeql-action/analyze@v2
+      with:
+        category: "/language:${{matrix.language}}"

From 662ef1e910b72e57957f06589925b2332ba52821 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Tue, 4 Jul 2023 18:46:32 +0530
Subject: [PATCH 021/218] [downloader/http] Avoid infinite loop when no data is
 received

Closes #7504
---
 yt_dlp/downloader/http.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py
index e785f0d4ed..7c5daea859 100644
--- a/yt_dlp/downloader/http.py
+++ b/yt_dlp/downloader/http.py
@@ -339,15 +339,15 @@ def retry(e):
                 elif speed:
                     ctx.throttle_start = None
 
-            if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len:
-                ctx.resume_len = byte_counter
-                # ctx.block_size = block_size
-                raise NextFragment()
-
             if ctx.stream is None:
                 self.to_stderr('\n')
                 self.report_error('Did not get any data blocks')
                 return False
+
+            if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len:
+                ctx.resume_len = byte_counter
+                raise NextFragment()
+
             if ctx.tmpfilename != '-':
                 ctx.stream.close()
 

From 47bcd437247152e0af5b3ebc5592db7bb66855c2 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Thu, 6 Jul 2023 18:08:44 +0530
Subject: [PATCH 022/218] [outtmpl] Pad `playlist_index` etc even when with
 internal formatting

Closes #7501
---
 test/test_YoutubeDL.py |  2 +-
 yt_dlp/YoutubeDL.py    | 13 ++++++-------
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index f495fa6d90..3fbcdd01f3 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -684,7 +684,7 @@ def test(tmpl, expected, *, info=None, **params):
         test('%(id)s.%(ext)s', '1234.mp4')
         test('%(duration_string)s', ('27:46:40', '27-46-40'))
         test('%(resolution)s', '1080p')
-        test('%(playlist_index)s', '001')
+        test('%(playlist_index|)s', '001')
         test('%(playlist_autonumber)s', '02')
         test('%(autonumber)s', '00001')
         test('%(autonumber+2)03d', '005', autonumber_start=3)
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index 6dade0b2a4..d4aff0743e 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -1271,21 +1271,20 @@ def create_key(outer_mobj):
                 return outer_mobj.group(0)
             key = outer_mobj.group('key')
             mobj = re.match(INTERNAL_FORMAT_RE, key)
-            initial_field = mobj.group('fields') if mobj else ''
-            value, replacement, default = None, None, na
+            value, replacement, default, last_field = None, None, na, ''
             while mobj:
                 mobj = mobj.groupdict()
                 default = mobj['default'] if mobj['default'] is not None else default
                 value = get_value(mobj)
-                replacement = mobj['replacement']
+                last_field, replacement = mobj['fields'], mobj['replacement']
                 if value is None and mobj['alternate']:
                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
                 else:
                     break
 
             fmt = outer_mobj.group('format')
-            if fmt == 's' and value is not None and key in field_size_compat_map.keys():
-                fmt = f'0{field_size_compat_map[key]:d}d'
+            if fmt == 's' and value is not None and last_field in field_size_compat_map.keys():
+                fmt = f'0{field_size_compat_map[last_field]:d}d'
 
             if None not in (value, replacement):
                 try:
@@ -1322,7 +1321,7 @@ def create_key(outer_mobj):
                 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
                                               factor=1024 if '#' in flags else 1000)
             elif fmt[-1] == 'S':  # filename sanitization
-                value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
+                value, fmt = filename_sanitizer(last_field, value, restricted='#' in flags), str_fmt
             elif fmt[-1] == 'c':
                 if value:
                     value = str(value)[0]
@@ -1341,7 +1340,7 @@ def create_key(outer_mobj):
                 elif fmt[-1] == 'a':
                     value, fmt = ascii(value), str_fmt
                 if fmt[-1] in 'csra':
-                    value = sanitizer(initial_field, value)
+                    value = sanitizer(last_field, value)
 
             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
             TMPL_DICT[key] = value

From fa44802809d189fca0f4782263d48d6533384503 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Thu, 6 Jul 2023 17:34:51 +0530
Subject: [PATCH 023/218] [devscripts/make_changelog] Skip reverted commits

---
 devscripts/make_changelog.py | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/devscripts/make_changelog.py b/devscripts/make_changelog.py
index 0bcfa6ae72..eb0e3082f9 100644
--- a/devscripts/make_changelog.py
+++ b/devscripts/make_changelog.py
@@ -252,6 +252,7 @@ class CommitRange:
         (?:\ \((?P<issues>\#\d+(?:,\ \#\d+)*)\))?
         ''', re.VERBOSE | re.DOTALL)
     EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE)
+    REVERT_RE = re.compile(r'(?i:Revert)\s+([\da-f]{40})')
     FIXES_RE = re.compile(r'(?i:Fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Revert)\s+([\da-f]{40})')
     UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)')
 
@@ -279,7 +280,7 @@ def _get_commits_and_fixes(self, default_author):
             self.COMMAND, 'log', f'--format=%H%n%s%n%b%n{self.COMMIT_SEPARATOR}',
             f'{self._start}..{self._end}' if self._start else self._end).stdout
 
-        commits = {}
+        commits, reverts = {}, {}
         fixes = defaultdict(list)
         lines = iter(result.splitlines(False))
         for i, commit_hash in enumerate(lines):
@@ -300,6 +301,11 @@ def _get_commits_and_fixes(self, default_author):
                 logger.debug(f'Reached Release commit, breaking: {commit}')
                 break
 
+            revert_match = self.REVERT_RE.fullmatch(commit.short)
+            if revert_match:
+                reverts[revert_match.group(1)] = commit
+                continue
+
             fix_match = self.FIXES_RE.search(commit.short)
             if fix_match:
                 commitish = fix_match.group(1)
@@ -307,6 +313,13 @@ def _get_commits_and_fixes(self, default_author):
 
             commits[commit.hash] = commit
 
+        for commitish, revert_commit in reverts.items():
+            reverted = commits.pop(commitish, None)
+            if reverted:
+                logger.debug(f'{commit} fully reverted {reverted}')
+            else:
+                commits[revert_commit.hash] = revert_commit
+
         for commitish, fix_commits in fixes.items():
             if commitish in commits:
                 hashes = ', '.join(commit.hash[:HASH_LENGTH] for commit in fix_commits)

From 337734d4a8a6500bc65434843db346b5cbd05e81 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Thu, 6 Jul 2023 20:09:42 +0530
Subject: [PATCH 024/218] [cleanup] Misc

---
 devscripts/make_changelog.py  | 7 ++++---
 setup.cfg                     | 1 -
 yt_dlp/YoutubeDL.py           | 2 +-
 yt_dlp/downloader/common.py   | 3 ++-
 yt_dlp/downloader/fragment.py | 4 +---
 yt_dlp/extractor/adobepass.py | 2 +-
 yt_dlp/extractor/iqiyi.py     | 2 +-
 yt_dlp/extractor/vshare.py    | 2 +-
 yt_dlp/extractor/youtube.py   | 2 +-
 yt_dlp/utils/__init__.py      | 1 +
 10 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/devscripts/make_changelog.py b/devscripts/make_changelog.py
index eb0e3082f9..3ad4c5408b 100644
--- a/devscripts/make_changelog.py
+++ b/devscripts/make_changelog.py
@@ -55,6 +55,7 @@ def commit_lookup(cls):
                     'dependencies',
                     'jsinterp',
                     'outtmpl',
+                    'formats',
                     'plugins',
                     'update',
                     'upstream',
@@ -68,9 +69,9 @@ def commit_lookup(cls):
                     'misc',
                     'test',
                 },
-                cls.EXTRACTOR: {'extractor'},
-                cls.DOWNLOADER: {'downloader'},
-                cls.POSTPROCESSOR: {'postprocessor'},
+                cls.EXTRACTOR: {'extractor', 'ie'},
+                cls.DOWNLOADER: {'downloader', 'fd'},
+                cls.POSTPROCESSOR: {'postprocessor', 'pp'},
             }.items()
             for name in names
         }
diff --git a/setup.cfg b/setup.cfg
index 68d9e516d1..6deaa79715 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -8,7 +8,6 @@ ignore = E402,E501,E731,E741,W503
 max_line_length = 120
 per_file_ignores =
     devscripts/lazy_load_template.py: F401
-    yt_dlp/utils/__init__.py: F401, F403
 
 
 [autoflake]
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index d4aff0743e..448a15bc95 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -3687,7 +3687,7 @@ def render_formats_table(self, info_dict):
 
         def simplified_codec(f, field):
             assert field in ('acodec', 'vcodec')
-            codec = f.get(field, 'unknown')
+            codec = f.get(field)
             if not codec:
                 return 'unknown'
             elif codec != 'none':
diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py
index a0219a3509..8fe9d99930 100644
--- a/yt_dlp/downloader/common.py
+++ b/yt_dlp/downloader/common.py
@@ -255,7 +255,8 @@ def sanitize_open(self, filename, open_mode):
 
     @wrap_file_access('remove')
     def try_remove(self, filename):
-        os.remove(filename)
+        if os.path.isfile(filename):
+            os.remove(filename)
 
     @wrap_file_access('rename')
     def try_rename(self, old_filename, new_filename):
diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py
index 458167216c..0698153269 100644
--- a/yt_dlp/downloader/fragment.py
+++ b/yt_dlp/downloader/fragment.py
@@ -300,9 +300,7 @@ def frag_progress_hook(s):
     def _finish_frag_download(self, ctx, info_dict):
         ctx['dest_stream'].close()
         if self.__do_ytdl_file(ctx):
-            ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename']))
-            if os.path.isfile(ytdl_filename):
-                self.try_remove(ytdl_filename)
+            self.try_remove(self.ytdl_filename(ctx['filename']))
         elapsed = time.time() - ctx['started']
 
         to_file = ctx['tmpfilename'] != '-'
diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py
index 68a970f68c..722a534ed6 100644
--- a/yt_dlp/extractor/adobepass.py
+++ b/yt_dlp/extractor/adobepass.py
@@ -1473,7 +1473,7 @@ def extract_redirect_url(html, url=None, fatal=False):
                     elif 'automatically signed in with' in provider_redirect_page:
                         # Seems like comcast is rolling up new way of automatically signing customers
                         oauth_redirect_url = self._html_search_regex(
-                            r'continue:\s*"(https://oauth.xfinity.com/oauth/authorize\?.+)"', provider_redirect_page,
+                            r'continue:\s*"(https://oauth\.xfinity\.com/oauth/authorize\?.+)"', provider_redirect_page,
                             'oauth redirect (signed)')
                         # Just need to process the request. No useful data comes back
                         self._download_webpage(oauth_redirect_url, video_id, 'Confirming auto login')
diff --git a/yt_dlp/extractor/iqiyi.py b/yt_dlp/extractor/iqiyi.py
index ebf49e8359..fa602ba887 100644
--- a/yt_dlp/extractor/iqiyi.py
+++ b/yt_dlp/extractor/iqiyi.py
@@ -527,7 +527,7 @@ def _extract_vms_player_js(self, webpage, video_id):
         if player_js_cache:
             return player_js_cache
         webpack_js_url = self._proto_relative_url(self._search_regex(
-            r'<script src="((?:https?)?//stc.iqiyipic.com/_next/static/chunks/webpack-\w+\.js)"', webpage, 'webpack URL'))
+            r'<script src="((?:https?:)?//stc\.iqiyipic\.com/_next/static/chunks/webpack-\w+\.js)"', webpage, 'webpack URL'))
         webpack_js = self._download_webpage(webpack_js_url, video_id, note='Downloading webpack JS', errnote='Unable to download webpack JS')
         webpack_map = self._search_json(
             r'["\']\s*\+\s*', webpack_js, 'JS locations', video_id,
diff --git a/yt_dlp/extractor/vshare.py b/yt_dlp/extractor/vshare.py
index 1bc7ae4ba1..443ed43cc4 100644
--- a/yt_dlp/extractor/vshare.py
+++ b/yt_dlp/extractor/vshare.py
@@ -22,7 +22,7 @@ def _extract_packed(self, webpage):
         packed = self._search_regex(
             r'(eval\(function.+)', webpage, 'packed code')
         unpacked = decode_packed_codes(packed)
-        digits = self._search_regex(r'\[((?:\d+,?)+)\]', unpacked, 'digits')
+        digits = self._search_regex(r'\[([\d,]+)\]', unpacked, 'digits')
         digits = [int(digit) for digit in digits.split(',')]
         key_digit = self._search_regex(
             r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit')
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index 552ca099c4..2a8106b45c 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -3117,7 +3117,7 @@ def _extract_n_function_name(self, jscode):
             return funcname
 
         return json.loads(js_to_json(self._search_regex(
-            rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])[,;]', jscode,
+            rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])\s*[,;]', jscode,
             f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
 
     def _extract_n_function_code(self, video_id, player_url):
diff --git a/yt_dlp/utils/__init__.py b/yt_dlp/utils/__init__.py
index 74b39e2c7b..2dd20ada25 100644
--- a/yt_dlp/utils/__init__.py
+++ b/yt_dlp/utils/__init__.py
@@ -1,3 +1,4 @@
+# flake8: noqa: F401, F403
 import warnings
 
 from ..compat.compat_utils import passthrough_module

From 906c0bdcd8974340d619e99ccd613c163eb0d0c2 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Thu, 6 Jul 2023 18:17:42 +0530
Subject: [PATCH 025/218] [formats] Fix best fallback for storyboards

Partial fix for #7478
---
 yt_dlp/YoutubeDL.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index 448a15bc95..2c5014f870 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -2766,11 +2766,8 @@ def is_wellformed(f):
             formats_to_download = list(format_selector({
                 'formats': formats,
                 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
-                'incomplete_formats': (
-                    # All formats are video-only or
-                    all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
-                    # all formats are audio-only
-                    or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)),
+                'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats)  # No formats with video
+                                       or all(f.get('acodec') == 'none' for f in formats)),  # OR, No formats with audio
             }))
             if interactive_format_selection and not formats_to_download:
                 self.report_error('Requested format is not available', tb=False, is_error=False)

From bc344cd456380999c1ee74554dfd432a38f32ec7 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Thu, 6 Jul 2023 18:39:50 +0530
Subject: [PATCH 026/218] [core] Allow extractors to mark formats as
 potentially DRM (#7396)

This is useful for HLS where detecting whether the format is
actually DRM requires the child manifest to be downloaded.

Makes the error message when using `--test` inconsistent,
but doesn't really matter.
---
 yt_dlp/YoutubeDL.py        | 37 ++++++++++++++++++++++---------------
 yt_dlp/downloader/hls.py   | 30 ++++++++++++++++++++----------
 yt_dlp/extractor/common.py | 10 ++++------
 3 files changed, 46 insertions(+), 31 deletions(-)

diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index 2c5014f870..cf0122d4ba 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -983,6 +983,7 @@ def trouble(self, message=None, tb=None, is_error=True):
         ID='green',
         DELIM='blue',
         ERROR='red',
+        BAD_FORMAT='light red',
         WARNING='yellow',
         SUPPRESS='light black',
     )
@@ -2085,8 +2086,6 @@ def syntax_error(note, start):
         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
                                   'video': self.params.get('allow_multiple_video_streams', False)}
 
-        check_formats = self.params.get('check_formats') == 'selected'
-
         def _parse_filter(tokens):
             filter_parts = []
             for type, string_, start, _, _ in tokens:
@@ -2259,10 +2258,19 @@ def _merge(formats_pair):
             return new_dict
 
         def _check_formats(formats):
-            if not check_formats:
+            if (self.params.get('check_formats') is not None
+                    or self.params.get('allow_unplayable_formats')):
                 yield from formats
                 return
-            yield from self._check_formats(formats)
+            elif self.params.get('check_formats') == 'selected':
+                yield from self._check_formats(formats)
+                return
+
+            for f in formats:
+                if f.get('has_drm'):
+                    yield from self._check_formats([f])
+                else:
+                    yield f
 
         def _build_selector_function(selector):
             if isinstance(selector, list):  # ,
@@ -2614,10 +2622,10 @@ def sanitize_numeric_fields(info):
         if field_preference:
             info_dict['_format_sort_fields'] = field_preference
 
-        # or None ensures --clean-infojson removes it
-        info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None
+        info_dict['_has_drm'] = any(  # or None ensures --clean-infojson removes it
+            f.get('has_drm') and f['has_drm'] != 'maybe' for f in formats) or None
         if not self.params.get('allow_unplayable_formats'):
-            formats = [f for f in formats if not f.get('has_drm')]
+            formats = [f for f in formats if not f.get('has_drm') or f['has_drm'] == 'maybe']
 
         if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
             self.report_warning(
@@ -3719,14 +3727,13 @@ def simplified_codec(f, field):
                 simplified_codec(f, 'acodec'),
                 format_field(f, 'abr', '\t%dk', func=round),
                 format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
-                join_nonempty(
-                    self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
-                    self._format_out('DRM', 'light red') if f.get('has_drm') else None,
-                    format_field(f, 'language', '[%s]'),
-                    join_nonempty(format_field(f, 'format_note'),
-                                  format_field(f, 'container', ignore=(None, f.get('ext'))),
-                                  delim=', '),
-                    delim=' '),
+                join_nonempty(format_field(f, 'language', '[%s]'), join_nonempty(
+                    self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None,
+                    (self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe'
+                     else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None),
+                    format_field(f, 'format_note'),
+                    format_field(f, 'container', ignore=(None, f.get('ext'))),
+                    delim=', '), delim=' '),
             ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
         header_line = self._list_format_headers(
             'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',
diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py
index f2868dc52b..ab7d496d42 100644
--- a/yt_dlp/downloader/hls.py
+++ b/yt_dlp/downloader/hls.py
@@ -28,7 +28,16 @@ class HlsFD(FragmentFD):
     FD_NAME = 'hlsnative'
 
     @staticmethod
-    def can_download(manifest, info_dict, allow_unplayable_formats=False):
+    def _has_drm(manifest):  # TODO: https://github.com/yt-dlp/yt-dlp/pull/5039
+        return bool(re.search('|'.join((
+            r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://',  # Apple FairPlay
+            r'#EXT-X-(?:SESSION-)?KEY:.*?KEYFORMAT="com\.apple\.streamingkeydelivery"',  # Apple FairPlay
+            r'#EXT-X-(?:SESSION-)?KEY:.*?KEYFORMAT="com\.microsoft\.playready"',  # Microsoft PlayReady
+            r'#EXT-X-FAXS-CM:',  # Adobe Flash Access
+        )), manifest))
+
+    @classmethod
+    def can_download(cls, manifest, info_dict, allow_unplayable_formats=False):
         UNSUPPORTED_FEATURES = [
             # r'#EXT-X-BYTERANGE',  # playlists composed of byte ranges of media files [2]
 
@@ -50,13 +59,15 @@ def can_download(manifest, info_dict, allow_unplayable_formats=False):
         ]
         if not allow_unplayable_formats:
             UNSUPPORTED_FEATURES += [
-                r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)',  # encrypted streams [1]
+                r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)',  # encrypted streams [1], but not necessarily DRM
             ]
 
         def check_results():
             yield not info_dict.get('is_live')
             for feature in UNSUPPORTED_FEATURES:
                 yield not re.search(feature, manifest)
+            if not allow_unplayable_formats:
+                yield not cls._has_drm(manifest)
         return all(check_results())
 
     def real_download(self, filename, info_dict):
@@ -81,14 +92,13 @@ def real_download(self, filename, info_dict):
                 message = ('Live HLS streams are not supported by the native downloader. If this is a livestream, '
                            f'please {install_ffmpeg}add "--downloader ffmpeg --hls-use-mpegts" to your command')
         if not can_download:
-            has_drm = re.search('|'.join([
-                r'#EXT-X-FAXS-CM:',  # Adobe Flash Access
-                r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://',  # Apple FairPlay
-            ]), s)
-            if has_drm and not self.params.get('allow_unplayable_formats'):
-                self.report_error(
-                    'This video is DRM protected; Try selecting another format with --format or '
-                    'add --check-formats to automatically fallback to the next best format')
+            if self._has_drm(s) and not self.params.get('allow_unplayable_formats'):
+                if info_dict.get('has_drm') and self.params.get('test'):
+                    self.to_screen(f'[{self.FD_NAME}] This format is DRM protected', skip_eol=True)
+                else:
+                    self.report_error(
+                        'This format is DRM protected; Try selecting another format with --format or '
+                        'add --check-formats to automatically fallback to the next best format', tb=False)
                 return False
             message = message or 'Unsupported features have been detected'
             fd = FFmpegFD(self.ydl, self.params)
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 3f7dcb82bb..fe08839aaa 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -26,6 +26,7 @@
 from ..compat import compat_etree_fromstring, compat_expanduser, compat_os_name
 from ..cookies import LenientSimpleCookie
 from ..downloader.f4m import get_base_url, remove_encrypted_media
+from ..downloader.hls import HlsFD
 from ..utils import (
     IDENTITY,
     JSON_LD_RE,
@@ -224,7 +225,8 @@ class InfoExtractor:
                                  width : height ratio as float.
                     * no_resume  The server does not support resuming the
                                  (HTTP or RTMP) download. Boolean.
-                    * has_drm    The format has DRM and cannot be downloaded. Boolean
+                    * has_drm    True if the format has DRM and cannot be downloaded.
+                                 'maybe' if the format may have DRM and has to be tested before download.
                     * extra_param_to_segment_url  A query string to append to each
                                  fragment's URL, or to update each existing query string
                                  with. Only applied by the native HLS/DASH downloaders.
@@ -1979,11 +1981,7 @@ def _parse_m3u8_formats_and_subtitles(
             errnote=None, fatal=True, data=None, headers={}, query={},
             video_id=None):
         formats, subtitles = [], {}
-
-        has_drm = re.search('|'.join([
-            r'#EXT-X-FAXS-CM:',  # Adobe Flash Access
-            r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://',  # Apple FairPlay
-        ]), m3u8_doc)
+        has_drm = HlsFD._has_drm(m3u8_doc)
 
         def format_url(url):
             return url if re.match(r'^https?://', url) else urllib.parse.urljoin(m3u8_url, url)

From 94ed638a437fc766699d440e978982e24ce6a30a Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Fri, 23 Jun 2023 18:16:07 +0530
Subject: [PATCH 027/218] [ie/youtube] Avoid false DRM detection (#7396)

Some master manifests contain a mix of DRM and non-DRM formats
---
 yt_dlp/extractor/youtube.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index 2a8106b45c..73bfa662d2 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -3927,9 +3927,12 @@ def process_manifest_format(f, proto, client_name, itag):
             elif itag:
                 f['format_id'] = itag
 
+            if f.get('source_preference') is None:
+                f['source_preference'] = -1
+
             if itag in ('616', '235'):
                 f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
-                f['source_preference'] = (f.get('source_preference') or -1) + 100
+                f['source_preference'] += 100
 
             f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
             if f['quality'] == -1 and f.get('height'):
@@ -3938,6 +3941,10 @@ def process_manifest_format(f, proto, client_name, itag):
                 f['format_note'] = join_nonempty(f.get('format_note'), client_name, delim=', ')
             if f.get('fps') and f['fps'] <= 1:
                 del f['fps']
+
+            if proto == 'hls' and f.get('has_drm'):
+                f['has_drm'] = 'maybe'
+                f['source_preference'] -= 5
             return True
 
         subtitles = {}
@@ -4037,6 +4044,10 @@ def _list_formats(self, video_id, microformats, video_details, player_responses,
                        else None)
         streaming_data = traverse_obj(player_responses, (..., 'streamingData'))
         *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)
+        if all(f.get('has_drm') for f in formats):
+            # If there are no formats that definitely don't have DRM, all have DRM
+            for f in formats:
+                f['has_drm'] = True
 
         return live_broadcast_details, live_status, streaming_data, formats, subtitles
 

From ad8902f616ad2541f9b9626738f1393fad89a64c Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Thu, 6 Jul 2023 19:35:49 +0530
Subject: [PATCH 028/218] [ie/vidlii] Handle relative URLs

Closes #7480
---
 yt_dlp/extractor/vidlii.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/yt_dlp/extractor/vidlii.py b/yt_dlp/extractor/vidlii.py
index 5933783ae6..cde4274d9c 100644
--- a/yt_dlp/extractor/vidlii.py
+++ b/yt_dlp/extractor/vidlii.py
@@ -70,6 +70,7 @@ def _real_extract(self, url):
             r'src\s*:\s*(["\'])(?P<url>(?:https?://)?(?:(?!\1).)+)\1',
             webpage) or []]
         for source in sources:
+            source = urljoin(url, source)
             height = int(self._search_regex(r'(\d+).mp4', source, 'height', default=360))
             if self._request_webpage(HEADRequest(source), video_id, f'Checking {height}p url', errnote=False):
                 formats.append({

From 1ceb657bdd254ad961489e5060f2ccc7d556b729 Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@bashonly.com>
Date: Wed, 5 Jul 2023 15:16:28 -0500
Subject: [PATCH 029/218] [fd/external] Scope cookies

- ffmpeg: Calculate cookies from cookiejar and pass with `-cookies` arg instead of `-headers`
- aria2c, curl, wget: Write cookiejar to file and use external FD built-in cookiejar support
- httpie: Calculate cookies from cookiejar instead of `http_headers`
- axel: Calculate cookies from cookiejar and disable http redirection if cookies are passed
    - May break redirects, but axel simply don't have proper cookie support

Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj

Authored by: bashonly, coletdjnz
---
 test/test_downloader_external.py | 133 +++++++++++++++++++++++++++++++
 yt_dlp/cookies.py                |   7 ++
 yt_dlp/downloader/external.py    |  41 +++++++++-
 3 files changed, 179 insertions(+), 2 deletions(-)
 create mode 100644 test/test_downloader_external.py

diff --git a/test/test_downloader_external.py b/test/test_downloader_external.py
new file mode 100644
index 0000000000..e5b02ba5a4
--- /dev/null
+++ b/test/test_downloader_external.py
@@ -0,0 +1,133 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+import unittest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import http.cookiejar
+
+from test.helper import FakeYDL
+from yt_dlp.downloader.external import (
+    Aria2cFD,
+    AxelFD,
+    CurlFD,
+    FFmpegFD,
+    HttpieFD,
+    WgetFD,
+)
+
+TEST_COOKIE = {
+    'version': 0,
+    'name': 'test',
+    'value': 'ytdlp',
+    'port': None,
+    'port_specified': False,
+    'domain': '.example.com',
+    'domain_specified': True,
+    'domain_initial_dot': False,
+    'path': '/',
+    'path_specified': True,
+    'secure': False,
+    'expires': None,
+    'discard': False,
+    'comment': None,
+    'comment_url': None,
+    'rest': {},
+}
+
+TEST_INFO = {'url': 'http://www.example.com/'}
+
+
+class TestHttpieFD(unittest.TestCase):
+    def test_make_cmd(self):
+        with FakeYDL() as ydl:
+            downloader = HttpieFD(ydl, {})
+            self.assertEqual(
+                downloader._make_cmd('test', TEST_INFO),
+                ['http', '--download', '--output', 'test', 'http://www.example.com/'])
+
+            # Test cookie header is added
+            ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
+            self.assertEqual(
+                downloader._make_cmd('test', TEST_INFO),
+                ['http', '--download', '--output', 'test', 'http://www.example.com/', 'Cookie:test=ytdlp'])
+
+
+class TestAxelFD(unittest.TestCase):
+    def test_make_cmd(self):
+        with FakeYDL() as ydl:
+            downloader = AxelFD(ydl, {})
+            self.assertEqual(
+                downloader._make_cmd('test', TEST_INFO),
+                ['axel', '-o', 'test', '--', 'http://www.example.com/'])
+
+            # Test cookie header is added
+            ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
+            self.assertEqual(
+                downloader._make_cmd('test', TEST_INFO),
+                ['axel', '-o', 'test', 'Cookie: test=ytdlp', '--max-redirect=0', '--', 'http://www.example.com/'])
+
+
+class TestWgetFD(unittest.TestCase):
+    def test_make_cmd(self):
+        with FakeYDL() as ydl:
+            downloader = WgetFD(ydl, {})
+            self.assertNotIn('--load-cookies', downloader._make_cmd('test', TEST_INFO))
+            # Test cookiejar tempfile arg is added
+            ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
+            self.assertIn('--load-cookies', downloader._make_cmd('test', TEST_INFO))
+
+
+class TestCurlFD(unittest.TestCase):
+    def test_make_cmd(self):
+        with FakeYDL() as ydl:
+            downloader = CurlFD(ydl, {})
+            self.assertNotIn('--cookie-jar', downloader._make_cmd('test', TEST_INFO))
+            # Test cookiejar tempfile arg is added
+            ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
+            self.assertIn('--cookie-jar', downloader._make_cmd('test', TEST_INFO))
+
+
+class TestAria2cFD(unittest.TestCase):
+    def test_make_cmd(self):
+        with FakeYDL() as ydl:
+            downloader = Aria2cFD(ydl, {})
+            downloader._make_cmd('test', TEST_INFO)
+            self.assertFalse(hasattr(downloader, '_cookies_tempfile'))
+
+            # Test cookiejar tempfile arg is added
+            ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
+            cmd = downloader._make_cmd('test', TEST_INFO)
+            self.assertIn(f'--load-cookies={downloader._cookies_tempfile}', cmd)
+
+
+@unittest.skipUnless(FFmpegFD.available(), 'ffmpeg not found')
+class TestFFmpegFD(unittest.TestCase):
+    _args = []
+
+    def _test_cmd(self, args):
+        self._args = args
+
+    def test_make_cmd(self):
+        with FakeYDL() as ydl:
+            downloader = FFmpegFD(ydl, {})
+            downloader._debug_cmd = self._test_cmd
+
+            downloader._call_downloader('test', {**TEST_INFO, 'ext': 'mp4'})
+            self.assertEqual(self._args, [
+                'ffmpeg', '-y', '-hide_banner', '-i', 'http://www.example.com/',
+                '-c', 'copy', '-f', 'mp4', 'file:test'])
+
+            # Test cookies arg is added
+            ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
+            downloader._call_downloader('test', {**TEST_INFO, 'ext': 'mp4'})
+            self.assertEqual(self._args, [
+                'ffmpeg', '-y', '-hide_banner', '-cookies', 'test=ytdlp; path=/; domain=.example.com;\r\n',
+                '-i', 'http://www.example.com/', '-c', 'copy', '-f', 'mp4', 'file:test'])
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py
index f21e4f7e7b..53fe0ec2d3 100644
--- a/yt_dlp/cookies.py
+++ b/yt_dlp/cookies.py
@@ -1327,6 +1327,13 @@ def get_cookie_header(self, url):
         self.add_cookie_header(cookie_req)
         return cookie_req.get_header('Cookie')
 
+    def get_cookies_for_url(self, url):
+        """Generate a list of Cookie objects for a given url"""
+        # Policy `_now` attribute must be set before calling `_cookies_for_request`
+        # Ref: https://github.com/python/cpython/blob/3.7/Lib/http/cookiejar.py#L1360
+        self._policy._now = self._now = int(time.time())
+        return self._cookies_for_request(urllib.request.Request(escape_url(sanitize_url(url))))
+
     def clear(self, *args, **kwargs):
         with contextlib.suppress(KeyError):
             return super().clear(*args, **kwargs)
diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py
index f637a100bf..d4045e58f9 100644
--- a/yt_dlp/downloader/external.py
+++ b/yt_dlp/downloader/external.py
@@ -1,9 +1,10 @@
 import enum
 import json
-import os.path
+import os
 import re
 import subprocess
 import sys
+import tempfile
 import time
 import uuid
 
@@ -42,6 +43,7 @@ class ExternalFD(FragmentFD):
     def real_download(self, filename, info_dict):
         self.report_destination(filename)
         tmpfilename = self.temp_name(filename)
+        self._cookies_tempfile = None
 
         try:
             started = time.time()
@@ -54,6 +56,9 @@ def real_download(self, filename, info_dict):
             # should take place
             retval = 0
             self.to_screen('[%s] Interrupted by user' % self.get_basename())
+        finally:
+            if self._cookies_tempfile:
+                self.try_remove(self._cookies_tempfile)
 
         if retval == 0:
             status = {
@@ -125,6 +130,16 @@ def _configuration_args(self, keys=None, *args, **kwargs):
             self.get_basename(), self.params.get('external_downloader_args'), self.EXE_NAME,
             keys, *args, **kwargs)
 
+    def _write_cookies(self):
+        if not self.ydl.cookiejar.filename:
+            tmp_cookies = tempfile.NamedTemporaryFile(suffix='.cookies', delete=False)
+            tmp_cookies.close()
+            self._cookies_tempfile = tmp_cookies.name
+            self.to_screen(f'[download] Writing temporary cookies file to "{self._cookies_tempfile}"')
+        # real_download resets _cookies_tempfile; if it's None then save() will write to cookiejar.filename
+        self.ydl.cookiejar.save(self._cookies_tempfile)
+        return self.ydl.cookiejar.filename or self._cookies_tempfile
+
     def _call_downloader(self, tmpfilename, info_dict):
         """ Either overwrite this or implement _make_cmd """
         cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
@@ -184,6 +199,8 @@ class CurlFD(ExternalFD):
 
     def _make_cmd(self, tmpfilename, info_dict):
         cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed']
+        if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
+            cmd += ['--cookie-jar', self._write_cookies()]
         if info_dict.get('http_headers') is not None:
             for key, val in info_dict['http_headers'].items():
                 cmd += ['--header', f'{key}: {val}']
@@ -214,6 +231,9 @@ def _make_cmd(self, tmpfilename, info_dict):
         if info_dict.get('http_headers') is not None:
             for key, val in info_dict['http_headers'].items():
                 cmd += ['-H', f'{key}: {val}']
+        cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
+        if cookie_header:
+            cmd += [f'Cookie: {cookie_header}', '--max-redirect=0']
         cmd += self._configuration_args()
         cmd += ['--', info_dict['url']]
         return cmd
@@ -223,7 +243,9 @@ class WgetFD(ExternalFD):
     AVAILABLE_OPT = '--version'
 
     def _make_cmd(self, tmpfilename, info_dict):
-        cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies', '--compression=auto']
+        cmd = [self.exe, '-O', tmpfilename, '-nv', '--compression=auto']
+        if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
+            cmd += ['--load-cookies', self._write_cookies()]
         if info_dict.get('http_headers') is not None:
             for key, val in info_dict['http_headers'].items():
                 cmd += ['--header', f'{key}: {val}']
@@ -279,6 +301,8 @@ def _make_cmd(self, tmpfilename, info_dict):
         else:
             cmd += ['--min-split-size', '1M']
 
+        if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
+            cmd += [f'--load-cookies={self._write_cookies()}']
         if info_dict.get('http_headers') is not None:
             for key, val in info_dict['http_headers'].items():
                 cmd += ['--header', f'{key}: {val}']
@@ -417,6 +441,14 @@ def _make_cmd(self, tmpfilename, info_dict):
         if info_dict.get('http_headers') is not None:
             for key, val in info_dict['http_headers'].items():
                 cmd += [f'{key}:{val}']
+
+        # httpie 3.1.0+ removes the Cookie header on redirect, so this should be safe for now. [1]
+        # If we ever need cookie handling for redirects, we can export the cookiejar into a session. [2]
+        # 1: https://github.com/httpie/httpie/security/advisories/GHSA-9w4w-cpc8-h2fq
+        # 2: https://httpie.io/docs/cli/sessions
+        cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
+        if cookie_header:
+            cmd += [f'Cookie:{cookie_header}']
         return cmd
 
 
@@ -527,6 +559,11 @@ def _call_downloader(self, tmpfilename, info_dict):
 
         selected_formats = info_dict.get('requested_formats') or [info_dict]
         for i, fmt in enumerate(selected_formats):
+            cookies = self.ydl.cookiejar.get_cookies_for_url(fmt['url'])
+            if cookies:
+                args.extend(['-cookies', ''.join(
+                    f'{cookie.name}={cookie.value}; path={cookie.path}; domain={cookie.domain};\r\n'
+                    for cookie in cookies)])
             if fmt.get('http_headers') and re.match(r'^https?://', fmt['url']):
                 # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
                 # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.

From f8b4bcc0a791274223723488bfbfc23ea3276641 Mon Sep 17 00:00:00 2001
From: coletdjnz <coletdjnz@protonmail.com>
Date: Tue, 6 Jun 2023 20:44:51 +1200
Subject: [PATCH 030/218] [core] Prevent `Cookie` leaks on HTTP redirect

Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj

Authored by: coletdjnz
---
 test/test_http.py      | 31 +++++++++++++++++++++++++++++++
 yt_dlp/utils/_utils.py |  9 +++++++--
 2 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/test/test_http.py b/test/test_http.py
index 3941a6e776..e4e66dce18 100644
--- a/test/test_http.py
+++ b/test/test_http.py
@@ -132,6 +132,11 @@ def do_GET(self):
             self._method('GET')
         elif self.path.startswith('/headers'):
             self._headers()
+        elif self.path.startswith('/308-to-headers'):
+            self.send_response(308)
+            self.send_header('Location', '/headers')
+            self.send_header('Content-Length', '0')
+            self.end_headers()
         elif self.path == '/trailing_garbage':
             payload = b'<html><video src="/vid.mp4" /></html>'
             self.send_response(200)
@@ -270,6 +275,7 @@ def do_req(redirect_status, method):
             self.assertEqual(do_req(303, 'PUT'), ('', 'GET'))
 
             # 301 and 302 turn POST only into a GET
+            # XXX: we should also test if the Content-Type and Content-Length headers are removed
             self.assertEqual(do_req(301, 'POST'), ('', 'GET'))
             self.assertEqual(do_req(301, 'HEAD'), ('', 'HEAD'))
             self.assertEqual(do_req(302, 'POST'), ('', 'GET'))
@@ -313,6 +319,31 @@ def test_cookiejar(self):
             data = ydl.urlopen(sanitized_Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
             self.assertIn(b'Cookie: test=ytdlp', data)
 
+    def test_passed_cookie_header(self):
+        # We should accept a Cookie header being passed as in normal headers and handle it appropriately.
+        with FakeYDL() as ydl:
+            # Specified Cookie header should be used
+            res = ydl.urlopen(
+                sanitized_Request(f'http://127.0.0.1:{self.http_port}/headers',
+                                  headers={'Cookie': 'test=test'})).read().decode('utf-8')
+            self.assertIn('Cookie: test=test', res)
+
+            # Specified Cookie header should be removed on any redirect
+            res = ydl.urlopen(
+                sanitized_Request(f'http://127.0.0.1:{self.http_port}/308-to-headers', headers={'Cookie': 'test=test'})).read().decode('utf-8')
+            self.assertNotIn('Cookie: test=test', res)
+
+            # Specified Cookie header should override global cookiejar for that request
+            ydl.cookiejar.set_cookie(http.cookiejar.Cookie(
+                version=0, name='test', value='ytdlp', port=None, port_specified=False,
+                domain='127.0.0.1', domain_specified=True, domain_initial_dot=False, path='/',
+                path_specified=True, secure=False, expires=None, discard=False, comment=None,
+                comment_url=None, rest={}))
+
+            data = ydl.urlopen(sanitized_Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'Cookie': 'test=test'})).read()
+            self.assertNotIn(b'Cookie: test=ytdlp', data)
+            self.assertIn(b'Cookie: test=test', data)
+
     def test_no_compression_compat_header(self):
         with FakeYDL() as ydl:
             data = ydl.urlopen(
diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py
index f68cdb9686..82d9ba4d57 100644
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@@ -1556,7 +1556,12 @@ def redirect_request(self, req, fp, code, msg, headers, newurl):
 
         new_method = req.get_method()
         new_data = req.data
-        remove_headers = []
+
+        # Technically the Cookie header should be in unredirected_hdrs,
+        # however in practice some may set it in normal headers anyway.
+        # We will remove it here to prevent any leaks.
+        remove_headers = ['Cookie']
+
         # A 303 must either use GET or HEAD for subsequent request
         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4
         if code == 303 and req.get_method() != 'HEAD':
@@ -1573,7 +1578,7 @@ def redirect_request(self, req, fp, code, msg, headers, newurl):
             new_data = None
             remove_headers.extend(['Content-Length', 'Content-Type'])
 
-        new_headers = {k: v for k, v in req.headers.items() if k.lower() not in remove_headers}
+        new_headers = {k: v for k, v in req.headers.items() if k.title() not in remove_headers}
 
         return urllib.request.Request(
             newurl, headers=new_headers, origin_req_host=req.origin_req_host,

From 3121512228487c9c690d3d39bfd2579addf96e07 Mon Sep 17 00:00:00 2001
From: Simon Sawicki <contact@grub4k.xyz>
Date: Thu, 6 Jul 2023 21:51:04 +0530
Subject: [PATCH 031/218] [core] Change how `Cookie` headers are handled

Cookies are now saved and loaded under `cookies` key in the info dict
instead of `http_headers.Cookie`. Cookies passed in headers are
auto-scoped to the input URLs with a warning.

Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj

Authored by: Grub4K
---
 test/test_YoutubeDL.py      | 56 ++++++++++++++++++++++++++
 yt_dlp/YoutubeDL.py         | 80 +++++++++++++++++++++++++++++++++++--
 yt_dlp/downloader/common.py |  7 +++-
 3 files changed, 139 insertions(+), 4 deletions(-)

diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index 3fbcdd01f3..c15c7704c5 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -1213,6 +1213,62 @@ def _real_extract(self, url):
         self.assertEqual(downloaded['extractor'], 'Video')
         self.assertEqual(downloaded['extractor_key'], 'Video')
 
+    def test_header_cookies(self):
+        from http.cookiejar import Cookie
+
+        ydl = FakeYDL()
+        ydl.report_warning = lambda *_, **__: None
+
+        def cookie(name, value, version=None, domain='', path='', secure=False, expires=None):
+            return Cookie(
+                version or 0, name, value, None, False,
+                domain, bool(domain), bool(domain), path, bool(path),
+                secure, expires, False, None, None, rest={})
+
+        _test_url = 'https://yt.dlp/test'
+
+        def test(encoded_cookies, cookies, headers=False, round_trip=None, error=None):
+            def _test():
+                ydl.cookiejar.clear()
+                ydl._load_cookies(encoded_cookies, from_headers=headers)
+                if headers:
+                    ydl._apply_header_cookies(_test_url)
+                data = {'url': _test_url}
+                ydl._calc_headers(data)
+                self.assertCountEqual(
+                    map(vars, ydl.cookiejar), map(vars, cookies),
+                    'Extracted cookiejar.Cookie is not the same')
+                if not headers:
+                    self.assertEqual(
+                        data.get('cookies'), round_trip or encoded_cookies,
+                        'Cookie is not the same as round trip')
+                ydl.__dict__['_YoutubeDL__header_cookies'] = []
+
+            with self.subTest(msg=encoded_cookies):
+                if not error:
+                    _test()
+                    return
+                with self.assertRaisesRegex(Exception, error):
+                    _test()
+
+        test('test=value; Domain=.yt.dlp', [cookie('test', 'value', domain='.yt.dlp')])
+        test('test=value', [cookie('test', 'value')], error='Unscoped cookies are not allowed')
+        test('cookie1=value1; Domain=.yt.dlp; Path=/test; cookie2=value2; Domain=.yt.dlp; Path=/', [
+            cookie('cookie1', 'value1', domain='.yt.dlp', path='/test'),
+            cookie('cookie2', 'value2', domain='.yt.dlp', path='/')])
+        test('test=value; Domain=.yt.dlp; Path=/test; Secure; Expires=9999999999', [
+            cookie('test', 'value', domain='.yt.dlp', path='/test', secure=True, expires=9999999999)])
+        test('test="value; "; path=/test; domain=.yt.dlp', [
+            cookie('test', 'value; ', domain='.yt.dlp', path='/test')],
+            round_trip='test="value\\073 "; Domain=.yt.dlp; Path=/test')
+        test('name=; Domain=.yt.dlp', [cookie('name', '', domain='.yt.dlp')],
+             round_trip='name=""; Domain=.yt.dlp')
+
+        test('test=value', [cookie('test', 'value', domain='.yt.dlp')], headers=True)
+        test('cookie1=value; Domain=.yt.dlp; cookie2=value', [], headers=True, error='Invalid syntax')
+        ydl.deprecated_feature = ydl.report_error
+        test('test=value', [], headers=True, error='Passing cookies as a header is a potential security risk')
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index cf0122d4ba..7f55716669 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -1,9 +1,11 @@
 import collections
 import contextlib
+import copy
 import datetime
 import errno
 import fileinput
 import functools
+import http.cookiejar
 import io
 import itertools
 import json
@@ -25,7 +27,7 @@
 from .cache import Cache
 from .compat import urllib  # isort: split
 from .compat import compat_os_name, compat_shlex_quote
-from .cookies import load_cookies
+from .cookies import LenientSimpleCookie, load_cookies
 from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
 from .downloader.rtmp import rtmpdump_version
 from .extractor import gen_extractor_classes, get_info_extractor
@@ -673,6 +675,9 @@ def process_color_policy(stream):
         if auto_init and auto_init != 'no_verbose_header':
             self.print_debug_header()
 
+        self.__header_cookies = []
+        self._load_cookies(traverse_obj(self.params.get('http_headers'), 'cookie', casesense=False))  # compat
+
         def check_deprecated(param, option, suggestion):
             if self.params.get(param) is not None:
                 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
@@ -1625,8 +1630,60 @@ def progress(msg):
                 self.to_screen('')
             raise
 
+    def _load_cookies(self, data, *, from_headers=True):
+        """Loads cookies from a `Cookie` header
+
+        This tries to work around the security vulnerability of passing cookies to every domain.
+        See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
+        The unscoped cookies are saved for later to be stored in the jar with a limited scope.
+
+        @param data         The Cookie header as string to load the cookies from
+        @param from_headers If `False`, allows Set-Cookie syntax in the cookie string (at least a domain will be required)
+        """
+        for cookie in LenientSimpleCookie(data).values():
+            if from_headers and any(cookie.values()):
+                raise ValueError('Invalid syntax in Cookie Header')
+
+            domain = cookie.get('domain') or ''
+            expiry = cookie.get('expires')
+            if expiry == '':  # 0 is valid
+                expiry = None
+            prepared_cookie = http.cookiejar.Cookie(
+                cookie.get('version') or 0, cookie.key, cookie.value, None, False,
+                domain, True, True, cookie.get('path') or '', bool(cookie.get('path')),
+                cookie.get('secure') or False, expiry, False, None, None, {})
+
+            if domain:
+                self.cookiejar.set_cookie(prepared_cookie)
+            elif from_headers:
+                self.deprecated_feature(
+                    'Passing cookies as a header is a potential security risk; '
+                    'they will be scoped to the domain of the downloaded urls. '
+                    'Please consider loading cookies from a file or browser instead.')
+                self.__header_cookies.append(prepared_cookie)
+            else:
+                self.report_error('Unscoped cookies are not allowed; please specify some sort of scoping',
+                                  tb=False, is_error=False)
+
+    def _apply_header_cookies(self, url):
+        """Applies stray header cookies to the provided url
+
+        This loads header cookies and scopes them to the domain provided in `url`.
+        While this is not ideal, it helps reduce the risk of them being sent
+        to an unintended destination while mostly maintaining compatibility.
+        """
+        parsed = urllib.parse.urlparse(url)
+        if not parsed.hostname:
+            return
+
+        for cookie in map(copy.copy, self.__header_cookies):
+            cookie.domain = f'.{parsed.hostname}'
+            self.cookiejar.set_cookie(cookie)
+
     @_handle_extraction_exceptions
     def __extract_info(self, url, ie, download, extra_info, process):
+        self._apply_header_cookies(url)
+
         try:
             ie_result = ie.extract(url)
         except UserNotLive as e:
@@ -2414,9 +2471,24 @@ def _calc_headers(self, info_dict):
         if 'Youtubedl-No-Compression' in res:  # deprecated
             res.pop('Youtubedl-No-Compression', None)
             res['Accept-Encoding'] = 'identity'
-        cookies = self.cookiejar.get_cookie_header(info_dict['url'])
+        cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
         if cookies:
-            res['Cookie'] = cookies
+            encoder = LenientSimpleCookie()
+            values = []
+            for cookie in cookies:
+                _, value = encoder.value_encode(cookie.value)
+                values.append(f'{cookie.name}={value}')
+                if cookie.domain:
+                    values.append(f'Domain={cookie.domain}')
+                if cookie.path:
+                    values.append(f'Path={cookie.path}')
+                if cookie.secure:
+                    values.append('Secure')
+                if cookie.expires:
+                    values.append(f'Expires={cookie.expires}')
+                if cookie.version:
+                    values.append(f'Version={cookie.version}')
+            info_dict['cookies'] = '; '.join(values)
 
         if 'X-Forwarded-For' not in res:
             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
@@ -3423,6 +3495,8 @@ def download_with_info_file(self, info_filename):
             infos = [self.sanitize_info(info, self.params.get('clean_infojson', True))
                      for info in variadic(json.loads('\n'.join(f)))]
         for info in infos:
+            self._load_cookies(info.get('cookies'), from_headers=False)
+            self._load_cookies(traverse_obj(info.get('http_headers'), 'Cookie', casesense=False))  # compat
             try:
                 self.__download_wrapper(self.process_ie_result)(info, download=True)
             except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py
index 8fe9d99930..2c404ee902 100644
--- a/yt_dlp/downloader/common.py
+++ b/yt_dlp/downloader/common.py
@@ -32,6 +32,7 @@
     timetuple_from_msec,
     try_call,
 )
+from ..utils.traversal import traverse_obj
 
 
 class FileDownloader:
@@ -419,7 +420,6 @@ def download(self, filename, info_dict, subtitle=False):
         """Download to a filename using the info from info_dict
         Return True on success and False otherwise
         """
-
         nooverwrites_and_exists = (
             not self.params.get('overwrites', True)
             and os.path.exists(encodeFilename(filename))
@@ -453,6 +453,11 @@ def download(self, filename, info_dict, subtitle=False):
             self.to_screen(f'[download] Sleeping {sleep_interval:.2f} seconds ...')
             time.sleep(sleep_interval)
 
+        # Filter the `Cookie` header from the info_dict to prevent leaks.
+        # See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
+        info_dict['http_headers'] = dict(traverse_obj(info_dict, (
+            'http_headers', {dict.items}, lambda _, pair: pair[0].lower() != 'cookie'))) or None
+
         ret = self.real_download(filename, info_dict)
         self._finish_multiline_status()
         return ret, True

From b532a3481046e1eabb6232ee8196fb696c356ff6 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Thu, 6 Jul 2023 19:18:35 +0530
Subject: [PATCH 032/218] [docs] Minor fixes

Closes #7515
---
 Changelog.md                       |  6 ++++--
 README.md                          | 14 +++++++-------
 devscripts/changelog_override.json | 20 ++++++++++++++------
 3 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/Changelog.md b/Changelog.md
index c340b74c9c..d7efa5d259 100644
--- a/Changelog.md
+++ b/Changelog.md
@@ -9,6 +9,8 @@ ### 2023.06.22
 #### Core changes
 - [Fix bug in db3ad8a67661d7b234a6954d9c6a4a9b1749f5eb](https://github.com/yt-dlp/yt-dlp/commit/d7cd97e8d8d42b500fea9abb2aa4ac9b0f98b2ad) by [pukkandan](https://github.com/pukkandan)
 - [Improve `--download-sections`](https://github.com/yt-dlp/yt-dlp/commit/b4e0d75848e9447cee2cd3646ce54d4744a7ff56) by [pukkandan](https://github.com/pukkandan)
+    - Support negative time-ranges
+    - Add `*from-url` to obey time-ranges in URL
 - [Indicate `filesize` approximated from `tbr` better](https://github.com/yt-dlp/yt-dlp/commit/0dff8e4d1e6e9fb938f4256ea9af7d81f42fd54f) by [pukkandan](https://github.com/pukkandan)
 
 #### Extractor changes
@@ -19,7 +21,7 @@ #### Extractor changes
 - **nebula**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/3f756c8c4095b942cf49788eb0862ceaf57847f2) ([#7156](https://github.com/yt-dlp/yt-dlp/issues/7156)) by [Lamieur](https://github.com/Lamieur), [rohieb](https://github.com/rohieb)
 - **rheinmaintv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/98cb1eda7a4cf67c96078980dbd63e6c06ad7f7c) ([#7311](https://github.com/yt-dlp/yt-dlp/issues/7311)) by [barthelmannk](https://github.com/barthelmannk)
 - **youtube**
-    - [Add `ios` to default clients used](https://github.com/yt-dlp/yt-dlp/commit/1e75d97db21152acc764b30a688e516f04b8a142)
+    - [Add `ios` to default clients used](https://github.com/yt-dlp/yt-dlp/commit/1e75d97db21152acc764b30a688e516f04b8a142) by [pukkandan](https://github.com/pukkandan)
         - IOS is affected neither by 403 nor by nsig so helps mitigate them preemptively
         - IOS also has higher bit-rate 'premium' formats though they are not labeled as such
     - [Improve description parsing performance](https://github.com/yt-dlp/yt-dlp/commit/71dc18fa29263a1ff0472c23d81bfc8dd4422d48) ([#7315](https://github.com/yt-dlp/yt-dlp/issues/7315)) by [berkanteber](https://github.com/berkanteber), [pukkandan](https://github.com/pukkandan)
@@ -27,7 +29,7 @@ #### Extractor changes
     - [Workaround 403 for android formats](https://github.com/yt-dlp/yt-dlp/commit/81ca451480051d7ce1a31c017e005358345a9149) by [pukkandan](https://github.com/pukkandan)
 
 #### Misc. changes
-- [Revert "Add automatic duplicate issue detection"](https://github.com/yt-dlp/yt-dlp/commit/a4486bfc1dc7057efca9dd3fe70d7fa25c56f700)
+- [Revert "Add automatic duplicate issue detection"](https://github.com/yt-dlp/yt-dlp/commit/a4486bfc1dc7057efca9dd3fe70d7fa25c56f700) by [pukkandan](https://github.com/pukkandan)
 - **cleanup**
     - Miscellaneous
         - [7f9c6a6](https://github.com/yt-dlp/yt-dlp/commit/7f9c6a63b16e145495479e9f666f5b9e2ee69e2f) by [bashonly](https://github.com/bashonly)
diff --git a/README.md b/README.md
index 4fb3e450d8..0526fe418a 100644
--- a/README.md
+++ b/README.md
@@ -12,7 +12,7 @@
 [![License: Unlicense](https://img.shields.io/badge/-Unlicense-blue.svg?style=for-the-badge)](LICENSE "License")
 [![CI Status](https://img.shields.io/github/actions/workflow/status/yt-dlp/yt-dlp/core.yml?branch=master&label=Tests&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/actions "CI Status")
 [![Commits](https://img.shields.io/github/commit-activity/m/yt-dlp/yt-dlp?label=commits&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/commits "Commit History")
-[![Last Commit](https://img.shields.io/github/last-commit/yt-dlp/yt-dlp/master?label=&style=for-the-badge&display_timestamp=committer)](https://github.com/yt-dlp/yt-dlp/commits "Commit History")
+[![Last Commit](https://img.shields.io/github/last-commit/yt-dlp/yt-dlp/master?label=&style=for-the-badge&display_timestamp=committer)](https://github.com/yt-dlp/yt-dlp/pulse/monthly "Last activity")
 
 </div>
 <!-- MANPAGE: END EXCLUDED SECTION -->
@@ -76,7 +76,7 @@
 
 # NEW FEATURES
 
-* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@42f2d4**](https://github.com/yt-dlp/yt-dlp/commit/42f2d4) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))
+* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@42f2d4**](https://github.com/ytdl-org/youtube-dl/commit/07af47960f3bb262ead02490ce65c8c45c01741e) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))
 
 * **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in YouTube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API
 
@@ -1323,7 +1323,7 @@ # OUTPUT TEMPLATE
  - `extractor` (string): Name of the extractor
  - `extractor_key` (string): Key name of the extractor
  - `epoch` (numeric): Unix epoch of when the information extraction was completed
- - `autonumber` (numeric): Number that will be increased with each download, starting at `--autonumber-start`
+ - `autonumber` (numeric): Number that will be increased with each download, starting at `--autonumber-start`, padded with leading zeros to 5 digits
  - `video_autonumber` (numeric): Number that will be increased with each video
  - `n_entries` (numeric): Total number of extracted items in the playlist
  - `playlist_id` (string): Identifier of the playlist that contains the video
@@ -1509,7 +1509,7 @@ # FORMAT SELECTION
 
 ## Filtering Formats
 
-You can also filter the video formats by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`).
+You can also filter the video formats by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"` since filters without a selector are interpreted as `best`).
 
 The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `>=`, `=` (equals), `!=` (not equals):
 
@@ -1545,7 +1545,7 @@ ## Filtering Formats
 
 **Note**: None of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the website. Any other field made available by the extractor can also be used for filtering.
 
-Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "[height<=?720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. You can also use the filters with `all` to download all formats that satisfy the filter, e.g. `-f "all[vcodec=none]"` selects all audio-only formats.
+Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "bv[height<=?720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. You can also use the filters with `all` to download all formats that satisfy the filter, e.g. `-f "all[vcodec=none]"` selects all audio-only formats.
 
 Format selectors can also be grouped using parentheses; e.g. `-f "(mp4,webm)[height<480]"` will download the best pre-merged mp4 and webm formats with a height lower than 480.
 
@@ -1805,7 +1805,7 @@ #### youtube
 * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
 * `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all`
     * E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total
-* `formats`: Change the types of formats to return. `dashy` (convert http to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8)
+* `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8)
 * `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others
 * `innertube_key`: Innertube API key to use for all API requests
 
@@ -1950,7 +1950,7 @@ # EMBEDDING YT-DLP
     ydl.download(URLS)
 ```
 
-Most likely, you'll want to use various options. For a list of options available, have a look at [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py#L184).
+Most likely, you'll want to use various options. For a list of options available, have a look at [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py#L183) or `help(yt_dlp.YoutubeDL)` in a Python shell. If you are already familiar with the CLI, you can use [`devscripts/cli_to_api.py`](https://github.com/yt-dlp/yt-dlp/blob/master/devscripts/cli_to_api.py) to translate any CLI switches to `YoutubeDL` params.
 
 **Tip**: If you are porting your code from youtube-dl to yt-dlp, one important point to look out for is that we do not guarantee the return value of `YoutubeDL.extract_info` to be json serializable, or even be a dictionary. It will be dictionary-like, but if you want to ensure it is a serializable dictionary, pass it through `YoutubeDL.sanitize_info` as shown in the [example below](#extracting-information)
 
diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json
index df80f45e0f..f573a74630 100644
--- a/devscripts/changelog_override.json
+++ b/devscripts/changelog_override.json
@@ -1,12 +1,12 @@
 [
     {
         "action": "add",
-        "when": "776d1c3f0c9b00399896dd2e40e78e9a43218109",
+        "when": "29cb20bd563c02671b31dd840139e93dd37150a1",
         "short": "[priority] **A new release type has been added!**\n    * [`nightly`](https://github.com/yt-dlp/yt-dlp/releases/tag/nightly) builds will be made after each push, containing the latest fixes (but also possibly bugs).\n    * When using `--update`/`-U`, a release binary will only update to its current channel (either `stable` or `nightly`).\n    * The `--update-to` option has been added allowing the user more control over program upgrades (or downgrades).\n    * `--update-to` can change the release channel (`stable`, `nightly`) and also upgrade or downgrade to specific tags.\n    * **Usage**: `--update-to CHANNEL`, `--update-to TAG`, `--update-to CHANNEL@TAG`"
     },
     {
         "action": "add",
-        "when": "776d1c3f0c9b00399896dd2e40e78e9a43218109",
+        "when": "5038f6d713303e0967d002216e7a88652401c22a",
         "short": "[priority] **YouTube throttling fixes!**"
     },
     {
@@ -38,13 +38,15 @@
     },
     {
         "action": "change",
-        "when": "7b37e8b23691613f331bd4ebc9d639dd6f93c972",
-        "short": "Improve `--download-sections`\n    - Support negative time-ranges\n    - Add `*from-url` to obey time-ranges in URL"
+        "when": "b4e0d75848e9447cee2cd3646ce54d4744a7ff56",
+        "short": "Improve `--download-sections`\n    - Support negative time-ranges\n    - Add `*from-url` to obey time-ranges in URL",
+        "authors": ["pukkandan"]
     },
     {
         "action": "change",
         "when": "1e75d97db21152acc764b30a688e516f04b8a142",
-        "short": "[extractor/youtube] Add `ios` to default clients used\n        - IOS is affected neither by 403 nor by nsig so helps mitigate them preemptively\n        - IOS also has higher bit-rate 'premium' formats though they are not labeled as such"
+        "short": "[extractor/youtube] Add `ios` to default clients used\n        - IOS is affected neither by 403 nor by nsig so helps mitigate them preemptively\n        - IOS also has higher bit-rate 'premium' formats though they are not labeled as such",
+        "authors": ["pukkandan"]
     },
     {
         "action": "change",
@@ -55,6 +57,12 @@
     {
         "action": "change",
         "when": "a4486bfc1dc7057efca9dd3fe70d7fa25c56f700",
-        "short": "[misc] Revert \"Add automatic duplicate issue detection\""
+        "short": "[misc] Revert \"Add automatic duplicate issue detection\"",
+        "authors": ["pukkandan"]
+    },
+    {
+        "action": "add",
+        "when": "1ceb657bdd254ad961489e5060f2ccc7d556b729",
+        "short": "[priority] Security: [[CVE-2023-35934](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-35934)] Fix [Cookie leak](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj)\n    - `--add-header Cookie:` is deprecated and auto-scoped to input URL domains\n    - Cookies are scoped when passed to external downloaders\n    - Add `cookie` field to info.json and deprecate `http_headers.Cookie`"
     }
 ]

From cc0619f62d6da52689797483e96b29290b0c0873 Mon Sep 17 00:00:00 2001
From: github-actions <github-actions@example.com>
Date: Thu, 6 Jul 2023 18:57:59 +0000
Subject: [PATCH 033/218] Release 2023.07.06

Created by: pukkandan

:ci skip all :ci run dl
---
 .github/ISSUE_TEMPLATE/1_broken_site.yml      |  8 +--
 .../ISSUE_TEMPLATE/2_site_support_request.yml |  8 +--
 .../ISSUE_TEMPLATE/3_site_feature_request.yml |  8 +--
 .github/ISSUE_TEMPLATE/4_bug_report.yml       |  8 +--
 .github/ISSUE_TEMPLATE/5_feature_request.yml  |  8 +--
 .github/ISSUE_TEMPLATE/6_question.yml         |  8 +--
 CONTRIBUTORS                                  |  7 +++
 Changelog.md                                  | 52 +++++++++++++++++++
 supportedsites.md                             |  6 ++-
 yt_dlp/version.py                             |  4 +-
 10 files changed, 90 insertions(+), 27 deletions(-)

diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.yml b/.github/ISSUE_TEMPLATE/1_broken_site.yml
index a00a11f271..dd1b33dde2 100644
--- a/.github/ISSUE_TEMPLATE/1_broken_site.yml
+++ b/.github/ISSUE_TEMPLATE/1_broken_site.yml
@@ -18,7 +18,7 @@ body:
       options:
         - label: I'm reporting that yt-dlp is broken on a **supported** site
           required: true
-        - label: I've verified that I'm running yt-dlp version **2023.06.22** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
+        - label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
           required: true
         - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
           required: true
@@ -64,7 +64,7 @@ body:
         [debug] Command-line config: ['-vU', 'test:youtube']
         [debug] Portable config "yt-dlp.conf": ['-i']
         [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
-        [debug] yt-dlp version 2023.06.22 [9d339c4] (win32_exe)
+        [debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe)
         [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
         [debug] Checking exe version: ffmpeg -bsfs
         [debug] Checking exe version: ffprobe -bsfs
@@ -72,8 +72,8 @@ body:
         [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
         [debug] Proxy map: {}
         [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
-        Latest version: 2023.06.22, Current version: 2023.06.22
-        yt-dlp is up to date (2023.06.22)
+        Latest version: 2023.07.06, Current version: 2023.07.06
+        yt-dlp is up to date (2023.07.06)
         <more lines>
       render: shell
     validations:
diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.yml b/.github/ISSUE_TEMPLATE/2_site_support_request.yml
index fc1f41ead5..4f4378924d 100644
--- a/.github/ISSUE_TEMPLATE/2_site_support_request.yml
+++ b/.github/ISSUE_TEMPLATE/2_site_support_request.yml
@@ -18,7 +18,7 @@ body:
       options:
         - label: I'm reporting a new site support request
           required: true
-        - label: I've verified that I'm running yt-dlp version **2023.06.22** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
+        - label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
           required: true
         - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
           required: true
@@ -76,7 +76,7 @@ body:
         [debug] Command-line config: ['-vU', 'test:youtube']
         [debug] Portable config "yt-dlp.conf": ['-i']
         [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
-        [debug] yt-dlp version 2023.06.22 [9d339c4] (win32_exe)
+        [debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe)
         [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
         [debug] Checking exe version: ffmpeg -bsfs
         [debug] Checking exe version: ffprobe -bsfs
@@ -84,8 +84,8 @@ body:
         [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
         [debug] Proxy map: {}
         [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
-        Latest version: 2023.06.22, Current version: 2023.06.22
-        yt-dlp is up to date (2023.06.22)
+        Latest version: 2023.07.06, Current version: 2023.07.06
+        yt-dlp is up to date (2023.07.06)
         <more lines>
       render: shell
     validations:
diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml
index ed51dfa97d..05b4dd23b3 100644
--- a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml
+++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml
@@ -18,7 +18,7 @@ body:
       options:
         - label: I'm requesting a site-specific feature
           required: true
-        - label: I've verified that I'm running yt-dlp version **2023.06.22** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
+        - label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
           required: true
         - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
           required: true
@@ -72,7 +72,7 @@ body:
         [debug] Command-line config: ['-vU', 'test:youtube']
         [debug] Portable config "yt-dlp.conf": ['-i']
         [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
-        [debug] yt-dlp version 2023.06.22 [9d339c4] (win32_exe)
+        [debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe)
         [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
         [debug] Checking exe version: ffmpeg -bsfs
         [debug] Checking exe version: ffprobe -bsfs
@@ -80,8 +80,8 @@ body:
         [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
         [debug] Proxy map: {}
         [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
-        Latest version: 2023.06.22, Current version: 2023.06.22
-        yt-dlp is up to date (2023.06.22)
+        Latest version: 2023.07.06, Current version: 2023.07.06
+        yt-dlp is up to date (2023.07.06)
         <more lines>
       render: shell
     validations:
diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.yml b/.github/ISSUE_TEMPLATE/4_bug_report.yml
index 1c997f3e27..880f1014c2 100644
--- a/.github/ISSUE_TEMPLATE/4_bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/4_bug_report.yml
@@ -18,7 +18,7 @@ body:
       options:
         - label: I'm reporting a bug unrelated to a specific site
           required: true
-        - label: I've verified that I'm running yt-dlp version **2023.06.22** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
+        - label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
           required: true
         - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
           required: true
@@ -57,7 +57,7 @@ body:
         [debug] Command-line config: ['-vU', 'test:youtube']
         [debug] Portable config "yt-dlp.conf": ['-i']
         [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
-        [debug] yt-dlp version 2023.06.22 [9d339c4] (win32_exe)
+        [debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe)
         [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
         [debug] Checking exe version: ffmpeg -bsfs
         [debug] Checking exe version: ffprobe -bsfs
@@ -65,8 +65,8 @@ body:
         [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
         [debug] Proxy map: {}
         [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
-        Latest version: 2023.06.22, Current version: 2023.06.22
-        yt-dlp is up to date (2023.06.22)
+        Latest version: 2023.07.06, Current version: 2023.07.06
+        yt-dlp is up to date (2023.07.06)
         <more lines>
       render: shell
     validations:
diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.yml b/.github/ISSUE_TEMPLATE/5_feature_request.yml
index 1638945bf5..acb11795f6 100644
--- a/.github/ISSUE_TEMPLATE/5_feature_request.yml
+++ b/.github/ISSUE_TEMPLATE/5_feature_request.yml
@@ -20,7 +20,7 @@ body:
           required: true
         - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
           required: true
-        - label: I've verified that I'm running yt-dlp version **2023.06.22** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
+        - label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
           required: true
         - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates
           required: true
@@ -53,7 +53,7 @@ body:
         [debug] Command-line config: ['-vU', 'test:youtube']
         [debug] Portable config "yt-dlp.conf": ['-i']
         [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
-        [debug] yt-dlp version 2023.06.22 [9d339c4] (win32_exe)
+        [debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe)
         [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
         [debug] Checking exe version: ffmpeg -bsfs
         [debug] Checking exe version: ffprobe -bsfs
@@ -61,7 +61,7 @@ body:
         [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
         [debug] Proxy map: {}
         [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
-        Latest version: 2023.06.22, Current version: 2023.06.22
-        yt-dlp is up to date (2023.06.22)
+        Latest version: 2023.07.06, Current version: 2023.07.06
+        yt-dlp is up to date (2023.07.06)
         <more lines>
       render: shell
diff --git a/.github/ISSUE_TEMPLATE/6_question.yml b/.github/ISSUE_TEMPLATE/6_question.yml
index d27bd57426..a2563e975b 100644
--- a/.github/ISSUE_TEMPLATE/6_question.yml
+++ b/.github/ISSUE_TEMPLATE/6_question.yml
@@ -26,7 +26,7 @@ body:
           required: true
         - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
           required: true
-        - label: I've verified that I'm running yt-dlp version **2023.06.22** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
+        - label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
           required: true
         - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions **including closed ones**. DO NOT post duplicates
           required: true
@@ -59,7 +59,7 @@ body:
         [debug] Command-line config: ['-vU', 'test:youtube']
         [debug] Portable config "yt-dlp.conf": ['-i']
         [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
-        [debug] yt-dlp version 2023.06.22 [9d339c4] (win32_exe)
+        [debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe)
         [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
         [debug] Checking exe version: ffmpeg -bsfs
         [debug] Checking exe version: ffprobe -bsfs
@@ -67,7 +67,7 @@ body:
         [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
         [debug] Proxy map: {}
         [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
-        Latest version: 2023.06.22, Current version: 2023.06.22
-        yt-dlp is up to date (2023.06.22)
+        Latest version: 2023.07.06, Current version: 2023.07.06
+        yt-dlp is up to date (2023.07.06)
         <more lines>
       render: shell
diff --git a/CONTRIBUTORS b/CONTRIBUTORS
index 0864f16c4c..6ccd08931d 100644
--- a/CONTRIBUTORS
+++ b/CONTRIBUTORS
@@ -460,3 +460,10 @@ berkanteber
 OverlordQ
 rexlambert22
 Ti4eeT4e
+AmanSal1
+bbilly1
+meliber
+nnoboa
+rdamas
+RfadnjdExt
+urectanc
diff --git a/Changelog.md b/Changelog.md
index d7efa5d259..622ae68b9b 100644
--- a/Changelog.md
+++ b/Changelog.md
@@ -4,6 +4,58 @@ # Changelog
 # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
 -->
 
+### 2023.07.06
+
+#### Important changes
+- Security: [[CVE-2023-35934](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-35934)] Fix [Cookie leak](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj)
+    - `--add-header Cookie:` is deprecated and auto-scoped to input URL domains
+    - Cookies are scoped when passed to external downloaders
+    - Add `cookie` field to info.json and deprecate `http_headers.Cookie`
+
+#### Core changes
+- [Allow extractors to mark formats as potentially DRM](https://github.com/yt-dlp/yt-dlp/commit/bc344cd456380999c1ee74554dfd432a38f32ec7) ([#7396](https://github.com/yt-dlp/yt-dlp/issues/7396)) by [pukkandan](https://github.com/pukkandan)
+- [Bugfix for b4e0d75848e9447cee2cd3646ce54d4744a7ff56](https://github.com/yt-dlp/yt-dlp/commit/e59e20744eb32ce4b6ea0dece7c673be8376a710) by [pukkandan](https://github.com/pukkandan)
+- [Change how `Cookie` headers are handled](https://github.com/yt-dlp/yt-dlp/commit/3121512228487c9c690d3d39bfd2579addf96e07) by [Grub4K](https://github.com/Grub4K)
+- [Prevent `Cookie` leaks on HTTP redirect](https://github.com/yt-dlp/yt-dlp/commit/f8b4bcc0a791274223723488bfbfc23ea3276641) by [coletdjnz](https://github.com/coletdjnz)
+- **formats**: [Fix best fallback for storyboards](https://github.com/yt-dlp/yt-dlp/commit/906c0bdcd8974340d619e99ccd613c163eb0d0c2) by [pukkandan](https://github.com/pukkandan)
+- **outtmpl**: [Pad `playlist_index` etc even when with internal formatting](https://github.com/yt-dlp/yt-dlp/commit/47bcd437247152e0af5b3ebc5592db7bb66855c2) by [pukkandan](https://github.com/pukkandan)
+- **utils**: clean_podcast_url: [Handle protocol in redirect URL](https://github.com/yt-dlp/yt-dlp/commit/91302ed349f34dc26cc1d661bb45a4b71f4417f7) by [pukkandan](https://github.com/pukkandan)
+
+#### Extractor changes
+- **abc**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/8f05fbae2a79ce0713077ccc68b354e63216bf20) ([#7434](https://github.com/yt-dlp/yt-dlp/issues/7434)) by [meliber](https://github.com/meliber)
+- **AdultSwim**: [Extract subtitles from m3u8](https://github.com/yt-dlp/yt-dlp/commit/5e16cf92eb496b7c1541a6b1d727cb87542984db) ([#7421](https://github.com/yt-dlp/yt-dlp/issues/7421)) by [nnoboa](https://github.com/nnoboa)
+- **crunchyroll**: music: [Fix `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/5b4b92769afcc398475e481bfa839f1158902fe9) ([#7439](https://github.com/yt-dlp/yt-dlp/issues/7439)) by [AmanSal1](https://github.com/AmanSal1), [rdamas](https://github.com/rdamas)
+- **Douyin**: [Fix extraction from webpage](https://github.com/yt-dlp/yt-dlp/commit/a2be9781fbf4d7e4db245c277ca2ecc41cf3a7b2) by [bashonly](https://github.com/bashonly)
+- **googledrive**: [Fix source format extraction](https://github.com/yt-dlp/yt-dlp/commit/3b7f5300c577fef40464d46d4e4037a69d51fe82) ([#7395](https://github.com/yt-dlp/yt-dlp/issues/7395)) by [RfadnjdExt](https://github.com/RfadnjdExt)
+- **kick**: [Fix `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/ef8509c300ea50da86aea447eb214d3d6f6db6bb) by [bashonly](https://github.com/bashonly)
+- **qdance**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/f0a1ff118145b6449982ba401f9a9f656ecd8062) ([#7420](https://github.com/yt-dlp/yt-dlp/issues/7420)) by [bashonly](https://github.com/bashonly)
+- **sbs**: [Python 3.7 compat](https://github.com/yt-dlp/yt-dlp/commit/f393bbe724b1fc6c7f754a5da507e807b2b40ad2) by [pukkandan](https://github.com/pukkandan)
+- **stacommu**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/af1fd12f675220df6793fc019dff320bc76e8080) ([#7432](https://github.com/yt-dlp/yt-dlp/issues/7432)) by [urectanc](https://github.com/urectanc)
+- **twitter**
+    - [Fix unauthenticated extraction](https://github.com/yt-dlp/yt-dlp/commit/49296437a8e5fa91dacb5446e51ab588474c85d3) ([#7476](https://github.com/yt-dlp/yt-dlp/issues/7476)) by [bashonly](https://github.com/bashonly)
+    - spaces: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/1cffd621cb371f1563563cfb2fe37d137e8a7bee) ([#7512](https://github.com/yt-dlp/yt-dlp/issues/7512)) by [bashonly](https://github.com/bashonly)
+- **vidlii**: [Handle relative URLs](https://github.com/yt-dlp/yt-dlp/commit/ad8902f616ad2541f9b9626738f1393fad89a64c) by [pukkandan](https://github.com/pukkandan)
+- **vk**: VKPlay, VKPlayLive: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/8776349ef6b1f644584a92dfa00a05208a48edc4) ([#7358](https://github.com/yt-dlp/yt-dlp/issues/7358)) by [c-basalt](https://github.com/c-basalt)
+- **youtube**
+    - [Add extractor-arg `formats`](https://github.com/yt-dlp/yt-dlp/commit/58786a10f212bd63f9ad1d0b4d9e4d31c3b385e2) by [pukkandan](https://github.com/pukkandan)
+    - [Avoid false DRM detection](https://github.com/yt-dlp/yt-dlp/commit/94ed638a437fc766699d440e978982e24ce6a30a) ([#7396](https://github.com/yt-dlp/yt-dlp/issues/7396)) by [pukkandan](https://github.com/pukkandan)
+    - [Fix comments' `is_favorited`](https://github.com/yt-dlp/yt-dlp/commit/89bed013741a776506f60380b7fd89d27d0710b4) ([#7390](https://github.com/yt-dlp/yt-dlp/issues/7390)) by [bbilly1](https://github.com/bbilly1)
+    - [Ignore incomplete data for comment threads by default](https://github.com/yt-dlp/yt-dlp/commit/4dc4d8473c085900edc841c87c20041233d25b1f) ([#7475](https://github.com/yt-dlp/yt-dlp/issues/7475)) by [coletdjnz](https://github.com/coletdjnz)
+    - [Process `post_live` over 2 hours](https://github.com/yt-dlp/yt-dlp/commit/d949c10c45bfc359bdacd52e6a180169b8128958) by [pukkandan](https://github.com/pukkandan)
+    - stories: [Remove](https://github.com/yt-dlp/yt-dlp/commit/90db9a3c00ca80492c6a58c542e4cbf4c2710866) ([#7459](https://github.com/yt-dlp/yt-dlp/issues/7459)) by [pukkandan](https://github.com/pukkandan)
+    - tab: [Support shorts-only playlists](https://github.com/yt-dlp/yt-dlp/commit/fcbc9ed760be6e3455bbadfaf277b4504b06f068) ([#7425](https://github.com/yt-dlp/yt-dlp/issues/7425)) by [coletdjnz](https://github.com/coletdjnz)
+
+#### Downloader changes
+- **aria2c**: [Add `--no-conf`](https://github.com/yt-dlp/yt-dlp/commit/8a8af356e3bba98a7f7d333aff0777d5d92130c8) by [pukkandan](https://github.com/pukkandan)
+- **external**: [Scope cookies](https://github.com/yt-dlp/yt-dlp/commit/1ceb657bdd254ad961489e5060f2ccc7d556b729) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz)
+- **http**: [Avoid infinite loop when no data is received](https://github.com/yt-dlp/yt-dlp/commit/662ef1e910b72e57957f06589925b2332ba52821) by [pukkandan](https://github.com/pukkandan)
+
+#### Misc. changes
+- [Add CodeQL workflow](https://github.com/yt-dlp/yt-dlp/commit/6355b5f1e1e8e7f4ef866d71d51e03baf0e82f17) ([#7497](https://github.com/yt-dlp/yt-dlp/issues/7497)) by [pukkandan](https://github.com/pukkandan)
+- **cleanup**: Miscellaneous: [337734d](https://github.com/yt-dlp/yt-dlp/commit/337734d4a8a6500bc65434843db346b5cbd05e81) by [pukkandan](https://github.com/pukkandan)
+- **docs**: [Minor fixes](https://github.com/yt-dlp/yt-dlp/commit/b532a3481046e1eabb6232ee8196fb696c356ff6) by [pukkandan](https://github.com/pukkandan)
+- **make_changelog**: [Skip reverted commits](https://github.com/yt-dlp/yt-dlp/commit/fa44802809d189fca0f4782263d48d6533384503) by [pukkandan](https://github.com/pukkandan)
+
 ### 2023.06.22
 
 #### Core changes
diff --git a/supportedsites.md b/supportedsites.md
index 7d99d9e227..379d28ef38 100644
--- a/supportedsites.md
+++ b/supportedsites.md
@@ -1136,6 +1136,7 @@ # Supported sites
  - **puhutv:serie**
  - **Puls4**
  - **Pyvideo**
+ - **QDance**: [*qdance*](## "netrc machine")
  - **QingTing**
  - **qqmusic**: QQ音乐
  - **qqmusic:album**: QQ音乐 - 专辑
@@ -1363,6 +1364,8 @@ # Supported sites
  - **sr:mediathek**: Saarländischer Rundfunk
  - **SRGSSR**
  - **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites
+ - **StacommuLive**: [*stacommu*](## "netrc machine")
+ - **StacommuVOD**: [*stacommu*](## "netrc machine")
  - **StagePlusVODConcert**: [*stageplus*](## "netrc machine")
  - **stanfordoc**: Stanford Open ClassRoom
  - **StarTrek**
@@ -1647,6 +1650,8 @@ # Supported sites
  - **vk**: [*vk*](## "netrc machine") VK
  - **vk:uservideos**: [*vk*](## "netrc machine") VK - User's Videos
  - **vk:wallpost**: [*vk*](## "netrc machine")
+ - **VKPlay**
+ - **VKPlayLive**
  - **vm.tiktok**
  - **Vocaroo**
  - **Vodlocker**
@@ -1800,7 +1805,6 @@ # Supported sites
  - **youtube:​search:date**: YouTube search, newest videos first; "ytsearchdate:" prefix
  - **youtube:search_url**: YouTube search URLs with sorting and filter support
  - **youtube:​shorts:pivot:audio**: YouTube Shorts audio pivot (Shorts using audio of a given video)
- - **youtube:stories**: YouTube channel stories; "ytstories:" prefix
  - **youtube:subscriptions**: YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)
  - **youtube:tab**: YouTube Tabs
  - **youtube:user**: YouTube user videos; "ytuser:" prefix
diff --git a/yt_dlp/version.py b/yt_dlp/version.py
index 434f36f48f..67cfe44efd 100644
--- a/yt_dlp/version.py
+++ b/yt_dlp/version.py
@@ -1,8 +1,8 @@
 # Autogenerated by devscripts/update-version.py
 
-__version__ = '2023.06.22'
+__version__ = '2023.07.06'
 
-RELEASE_GIT_HEAD = '812cdfa06c33a40e73a8e04b3e6f42c084666a43'
+RELEASE_GIT_HEAD = 'b532a3481046e1eabb6232ee8196fb696c356ff6'
 
 VARIANT = None
 

From b03fa7834579a01cc5fba48c0e73488a16683d48 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Thu, 6 Jul 2023 02:00:23 +0530
Subject: [PATCH 034/218] Revert 49296437a8e5fa91dacb5446e51ab588474c85d3

---
 README.md                   |   3 +
 yt_dlp/extractor/twitter.py | 180 ++++++++++++++++++++++--------------
 2 files changed, 113 insertions(+), 70 deletions(-)

diff --git a/README.md b/README.md
index 0526fe418a..655cd41f52 100644
--- a/README.md
+++ b/README.md
@@ -1852,6 +1852,9 @@ #### tiktok
 #### rokfinchannel
 * `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks`
 
+#### twitter
+* `legacy_api`: Force usage of the legacy Twitter API instead of the GraphQL API for tweet extraction. Has no effect if login cookies are passed
+
 #### stacommu, wrestleuniverse
 * `device_id`: UUID value assigned by the website and used to enforce device limits for paid livestream content. Can be found in browser local storage
 
diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py
index eaf9be5268..1fb9524da6 100644
--- a/yt_dlp/extractor/twitter.py
+++ b/yt_dlp/extractor/twitter.py
@@ -1,6 +1,5 @@
 import json
 import re
-import urllib.error
 
 from .common import InfoExtractor
 from .periscope import PeriscopeBaseIE, PeriscopeIE
@@ -35,6 +34,7 @@ class TwitterBaseIE(InfoExtractor):
     _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
     _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
     _AUTH = {'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'}
+    _guest_token = None
     _flow_token = None
 
     _LOGIN_INIT_DATA = json.dumps({
@@ -145,6 +145,14 @@ def _search_dimensions_in_video_url(a_format, video_url):
     def is_logged_in(self):
         return bool(self._get_cookies(self._API_BASE).get('auth_token'))
 
+    def _fetch_guest_token(self, headers, display_id):
+        headers.pop('x-guest-token', None)
+        self._guest_token = traverse_obj(self._download_json(
+            f'{self._API_BASE}guest/activate.json', display_id,
+            'Downloading guest token', data=b'', headers=headers), 'guest_token')
+        if not self._guest_token:
+            raise ExtractorError('Could not retrieve guest token')
+
     def _set_base_headers(self):
         headers = self._AUTH.copy()
         csrf_token = try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value)
@@ -175,15 +183,12 @@ def _perform_login(self, username, password):
         if self.is_logged_in:
             return
 
-        webpage = self._download_webpage('https://twitter.com/', None, 'Downloading login page')
+        self._request_webpage('https://twitter.com/', None, 'Requesting cookies')
         headers = self._set_base_headers()
-        guest_token = self._search_regex(
-            r'\.cookie\s*=\s*["\']gt=(\d+);', webpage, 'gt', default=None) or self._download_json(
-            f'{self._API_BASE}guest/activate.json', None, 'Downloading guest token',
-            data=b'', headers=headers)['guest_token']
+        self._fetch_guest_token(headers, None)
         headers.update({
             'content-type': 'application/json',
-            'x-guest-token': guest_token,
+            'x-guest-token': self._guest_token,
             'x-twitter-client-language': 'en',
             'x-twitter-active-user': 'yes',
             'Referer': 'https://twitter.com/',
@@ -280,24 +285,37 @@ def input_dict(subtask_id, text):
         self.report_login()
 
     def _call_api(self, path, video_id, query={}, graphql=False):
-        if not self.is_logged_in:
-            self.raise_login_required()
-
-        result = self._download_json(
-            (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path, video_id,
-            f'Downloading {"GraphQL" if graphql else "legacy API"} JSON', headers={
-                **self._set_base_headers(),
+        headers = self._set_base_headers()
+        if self.is_logged_in:
+            headers.update({
                 'x-twitter-auth-type': 'OAuth2Session',
                 'x-twitter-client-language': 'en',
                 'x-twitter-active-user': 'yes',
-            }, query=query, expected_status={400, 401, 403, 404} if graphql else {403})
+            })
 
-        if result.get('errors'):
-            errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
-            raise ExtractorError(
-                f'Error(s) while querying API: {errors or "Unknown error"}', expected=True)
+        for first_attempt in (True, False):
+            if not self.is_logged_in:
+                if not self._guest_token:
+                    self._fetch_guest_token(headers, video_id)
+                headers['x-guest-token'] = self._guest_token
 
-        return result
+            allowed_status = {400, 401, 403, 404} if graphql else {403}
+            result = self._download_json(
+                (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
+                video_id, headers=headers, query=query, expected_status=allowed_status,
+                note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
+
+            if result.get('errors'):
+                errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
+                if not self.is_logged_in and first_attempt and 'bad guest token' in errors.lower():
+                    self.to_screen('Guest token has expired. Refreshing guest token')
+                    self._guest_token = None
+                    continue
+
+                raise ExtractorError(
+                    f'Error(s) while querying API: {errors or "Unknown error"}', expected=True)
+
+            return result
 
     def _build_graphql_query(self, media_id):
         raise NotImplementedError('Method must be implemented to support GraphQL')
@@ -439,7 +457,6 @@ class TwitterIE(TwitterBaseIE):
     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
 
     _TESTS = [{
-        # comment_count, repost_count, view_count are only available with auth (applies to all tests)
         'url': 'https://twitter.com/freethenipple/status/643211948184596480',
         'info_dict': {
             'id': '643211870443208704',
@@ -454,7 +471,10 @@ class TwitterIE(TwitterBaseIE):
             'timestamp': 1442188653,
             'upload_date': '20150913',
             'uploader_url': 'https://twitter.com/freethenipple',
+            'comment_count': int,
+            'repost_count': int,
             'like_count': int,
+            'view_count': int,
             'tags': [],
             'age_limit': 18,
         },
@@ -485,6 +505,8 @@ class TwitterIE(TwitterBaseIE):
             'timestamp': 1447395772,
             'upload_date': '20151113',
             'uploader_url': 'https://twitter.com/starwars',
+            'comment_count': int,
+            'repost_count': int,
             'like_count': int,
             'tags': ['TV', 'StarWars', 'TheForceAwakens'],
             'age_limit': 0,
@@ -528,7 +550,10 @@ class TwitterIE(TwitterBaseIE):
             'timestamp': 1455777459,
             'upload_date': '20160218',
             'uploader_url': 'https://twitter.com/jaydingeer',
+            'comment_count': int,
+            'repost_count': int,
             'like_count': int,
+            'view_count': int,
             'tags': ['Damndaniel'],
             'age_limit': 0,
         },
@@ -566,7 +591,10 @@ class TwitterIE(TwitterBaseIE):
             'upload_date': '20160412',
             'uploader_url': 'https://twitter.com/CaptainAmerica',
             'thumbnail': r're:^https?://.*\.jpg',
+            'comment_count': int,
+            'repost_count': int,
             'like_count': int,
+            'view_count': int,
             'tags': [],
             'age_limit': 0,
         },
@@ -613,7 +641,10 @@ class TwitterIE(TwitterBaseIE):
             'timestamp': 1505803395,
             'upload_date': '20170919',
             'uploader_url': 'https://twitter.com/Prefet971',
+            'comment_count': int,
+            'repost_count': int,
             'like_count': int,
+            'view_count': int,
             'tags': ['Maria'],
             'age_limit': 0,
         },
@@ -636,7 +667,10 @@ class TwitterIE(TwitterBaseIE):
             'timestamp': 1527623489,
             'upload_date': '20180529',
             'uploader_url': 'https://twitter.com/LisPower1',
+            'comment_count': int,
+            'repost_count': int,
             'like_count': int,
+            'view_count': int,
             'tags': [],
             'age_limit': 0,
         },
@@ -658,7 +692,10 @@ class TwitterIE(TwitterBaseIE):
             'timestamp': 1548184644,
             'upload_date': '20190122',
             'uploader_url': 'https://twitter.com/Twitter',
+            'comment_count': int,
+            'repost_count': int,
             'like_count': int,
+            'view_count': int,
             'tags': [],
             'age_limit': 0,
         },
@@ -676,7 +713,6 @@ class TwitterIE(TwitterBaseIE):
             'view_count': int,
         },
         'add_ie': ['TwitterBroadcast'],
-        'skip': 'Requires authentication',
     }, {
         # unified card
         'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
@@ -693,6 +729,8 @@ class TwitterIE(TwitterBaseIE):
             'timestamp': 1610651040,
             'upload_date': '20210114',
             'uploader_url': 'https://twitter.com/BrooklynNets',
+            'comment_count': int,
+            'repost_count': int,
             'like_count': int,
             'tags': [],
             'age_limit': 0,
@@ -715,7 +753,10 @@ class TwitterIE(TwitterBaseIE):
             'thumbnail': r're:^https?://.*\.jpg',
             'duration': 30.03,
             'timestamp': 1665025050,
+            'comment_count': int,
+            'repost_count': int,
             'like_count': int,
+            'view_count': int,
             'tags': [],
             'age_limit': 0,
         },
@@ -724,13 +765,15 @@ class TwitterIE(TwitterBaseIE):
         'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
         'info_dict': {
             'id': '1577719286659006464',
-            'title': 'Ultima📛 | #вʟм - Test',
+            'title': 'Ultima | #\u0432\u029f\u043c - Test',
             'description': 'Test https://t.co/Y3KEZD7Dad',
-            'uploader': 'Ultima📛 | #вʟм',
+            'uploader': 'Ultima | #\u0432\u029f\u043c',
             'uploader_id': 'UltimaShadowX',
             'uploader_url': 'https://twitter.com/UltimaShadowX',
             'upload_date': '20221005',
             'timestamp': 1664992565,
+            'comment_count': int,
+            'repost_count': int,
             'like_count': int,
             'tags': [],
             'age_limit': 0,
@@ -752,7 +795,10 @@ class TwitterIE(TwitterBaseIE):
             'duration': 21.321,
             'timestamp': 1664477766,
             'upload_date': '20220929',
+            'comment_count': int,
+            'repost_count': int,
             'like_count': int,
+            'view_count': int,
             'tags': ['HurricaneIan'],
             'age_limit': 0,
         },
@@ -779,20 +825,6 @@ class TwitterIE(TwitterBaseIE):
         },
         'skip': 'Requires authentication',
     }, {
-        # Single Vimeo video result without auth
-        'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
-        'info_dict': {
-            'id': '551578322',
-            'ext': 'mp4',
-            'title': 'Dusty & The Mayor',
-            'uploader': 'Michael Chau',
-            'uploader_id': 'user29061007',
-            'uploader_url': 'https://vimeo.com/user29061007',
-            'duration': 478,
-            'thumbnail': 'https://i.vimeocdn.com/video/1139658575-0dfdce6e9a2401fe09feb24bf0d14e6f24a53c12f447ff688ace61009ad4c1ba-d_1280',
-        },
-    }, {
-        # Playlist result only with auth
         'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
         'playlist_mincount': 2,
         'info_dict': {
@@ -810,7 +842,6 @@ class TwitterIE(TwitterBaseIE):
             'uploader_url': 'https://twitter.com/Srirachachau',
             'timestamp': 1621447860,
         },
-        'skip': 'Requires authentication',
     }, {
         'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
         'playlist_mincount': 2,
@@ -829,7 +860,6 @@ class TwitterIE(TwitterBaseIE):
             'upload_date': '20221007',
             'age_limit': 0,
         },
-        'skip': 'Requires authentication',
     }, {
         'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
         'playlist_count': 2,
@@ -843,6 +873,8 @@ class TwitterIE(TwitterBaseIE):
             'upload_date': '20221007',
             'age_limit': 0,
             'uploader_url': 'https://twitter.com/primevideouk',
+            'comment_count': int,
+            'repost_count': int,
             'like_count': int,
             'tags': ['TheRingsOfPower'],
         },
@@ -864,7 +896,6 @@ class TwitterIE(TwitterBaseIE):
         },
         'add_ie': ['TwitterSpaces'],
         'params': {'skip_download': 'm3u8'},
-        'skip': 'Requires authentication',
     }, {
         # URL specifies video number but --yes-playlist
         'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
@@ -874,7 +905,9 @@ class TwitterIE(TwitterBaseIE):
             'title': 'md5:be05989b0722e114103ed3851a0ffae2',
             'timestamp': 1670459604.0,
             'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
+            'comment_count': int,
             'uploader_id': 'CTVJLaidlaw',
+            'repost_count': int,
             'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
             'upload_date': '20221208',
             'age_limit': 0,
@@ -893,11 +926,14 @@ class TwitterIE(TwitterBaseIE):
             'timestamp': 1670459604.0,
             'uploader_id': 'CTVJLaidlaw',
             'uploader': 'Jocelyn Laidlaw',
+            'repost_count': int,
+            'comment_count': int,
             'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
             'duration': 102.226,
             'uploader_url': 'https://twitter.com/CTVJLaidlaw',
             'display_id': '1600649710662213632',
             'like_count': int,
+            'view_count': int,
             'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
             'upload_date': '20221208',
             'age_limit': 0,
@@ -923,6 +959,9 @@ class TwitterIE(TwitterBaseIE):
             'age_limit': 18,
             'tags': [],
             'like_count': int,
+            'repost_count': int,
+            'comment_count': int,
+            'view_count': int,
         },
     }, {
         'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
@@ -935,7 +974,10 @@ class TwitterIE(TwitterBaseIE):
             'like_count': int,
             'uploader_id': 'hlo_again',
             'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
+            'repost_count': int,
             'duration': 9.531,
+            'comment_count': int,
+            'view_count': int,
             'upload_date': '20221203',
             'age_limit': 0,
             'timestamp': 1670092210.0,
@@ -952,11 +994,14 @@ class TwitterIE(TwitterBaseIE):
             'ext': 'mp4',
             'uploader_url': 'https://twitter.com/MunTheShinobi',
             'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
+            'view_count': int,
             'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
             'age_limit': 0,
             'uploader': 'Mün The Shinobi',
+            'repost_count': int,
             'upload_date': '20221206',
             'title': 'Mün The Shinobi - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
+            'comment_count': int,
             'like_count': int,
             'tags': [],
             'uploader_id': 'MunTheShinobi',
@@ -964,14 +1009,14 @@ class TwitterIE(TwitterBaseIE):
             'timestamp': 1670306984.0,
         },
     }, {
-        # url to retweet id
+        # url to retweet id, legacy API
         'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
         'info_dict': {
             'id': '1623274794488659969',
             'display_id': '1623739803874349067',
             'ext': 'mp4',
             'title': 'Johnny Bullets - Me after going viral to over 30million people:    Whoopsie-daisy',
-            'description': 'md5:224d62f54b0cdef8e33d4c56c41ac503',
+            'description': 'md5:e873616a4a8fe0f93e71872678a672f3',
             'uploader': 'Johnny Bullets',
             'uploader_id': 'Johnnybull3ts',
             'uploader_url': 'https://twitter.com/Johnnybull3ts',
@@ -982,7 +1027,10 @@ class TwitterIE(TwitterBaseIE):
             'upload_date': '20230208',
             'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
             'like_count': int,
+            'repost_count': int,
+            'comment_count': int,
         },
+        'params': {'extractor_args': {'twitter': {'legacy_api': ['']}}},
     }, {
         # onion route
         'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
@@ -1033,6 +1081,8 @@ def _graphql_to_legacy(self, data, twid):
 
         if 'tombstone' in result:
             cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
+            if cause and 'adult content' in cause:
+                self.raise_login_required(cause)
             raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
 
         status = result.get('legacy', {})
@@ -1088,22 +1138,19 @@ def _build_graphql_query(self, media_id):
 
     def _real_extract(self, url):
         twid, selected_index = self._match_valid_url(url).group('id', 'index')
-        if not self.is_logged_in:
-            try:
-                status = self._download_json(
-                    'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
-                    headers={'User-Agent': 'Googlebot'}, query={'id': twid})
-                self.to_screen(f'Some metadata is missing without authentication. {self._login_hint()}')
-            except ExtractorError as e:
-                if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 404:
-                    self.raise_login_required('Requested tweet may only be available when logged in')
-                raise
+        if self._configuration_arg('legacy_api') and not self.is_logged_in:
+            status = traverse_obj(self._call_api(f'statuses/show/{twid}.json', twid, {
+                'cards_platform': 'Web-12',
+                'include_cards': 1,
+                'include_reply_count': 1,
+                'include_user_entities': 0,
+                'tweet_mode': 'extended',
+            }), 'retweeted_status', None)
         else:
-            status = self._graphql_to_legacy(
-                self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid), twid)
+            result = self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid)
+            status = self._graphql_to_legacy(result, twid)
 
-        title = description = traverse_obj(
-            status, (('full_text', 'text'), {lambda x: x.replace('\n', ' ')}), get_all=False) or ''
+        title = description = status['full_text'].replace('\n', ' ')
         # strip  'https -_t.co_BJYgOjSeGA' junk from filenames
         title = re.sub(r'\s+(https?://[^ ]+)', '', title)
         user = status.get('user') or {}
@@ -1129,16 +1176,12 @@ def _real_extract(self, url):
 
         def extract_from_video_info(media):
             media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
-            if not media_id:
-                # workaround for non-authenticated responses
-                media_id = traverse_obj(media, (
-                    'video_info', 'variants', ..., 'url',
-                    {lambda x: re.search(r'_video/(\d+)/', x)[1]}), get_all=False)
             self.write_debug(f'Extracting from video info: {media_id}')
+            video_info = media.get('video_info') or {}
 
             formats = []
             subtitles = {}
-            for variant in traverse_obj(media, ('video_info', 'variants', ...)):
+            for variant in video_info.get('variants', []):
                 fmts, subs = self._extract_variant_formats(variant, twid)
                 subtitles = self._merge_subtitles(subtitles, subs)
                 formats.extend(fmts)
@@ -1158,12 +1201,12 @@ def add_thumbnail(name, size):
                 add_thumbnail('orig', media.get('original_info') or {})
 
             return {
-                'id': media_id or twid,
+                'id': media_id,
                 'formats': formats,
                 'subtitles': subtitles,
                 'thumbnails': thumbnails,
                 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})),
-                'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
+                'duration': float_or_none(video_info.get('duration_millis'), 1000),
                 # The codec of http formats are unknown
                 '_format_sort_fields': ('res', 'br', 'size', 'proto'),
             }
@@ -1243,15 +1286,12 @@ def get_binding_value(k):
                 }
 
         videos = traverse_obj(status, (
-            ('mediaDetails', ((None, 'quoted_status'), 'extended_entities', 'media')),
-            lambda _, m: m['type'] != 'photo', {dict}))
+            (None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo', {dict}))
 
         if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
             selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
         else:
-            desired_obj = traverse_obj(status, (
-                ('mediaDetails', ((None, 'quoted_status'), 'extended_entities', 'media')),
-                int(selected_index) - 1, {dict}), get_all=False)
+            desired_obj = traverse_obj(status, ('extended_entities', 'media', int(selected_index) - 1, {dict}))
             if not desired_obj:
                 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
             elif desired_obj.get('type') != 'video':

From 92315c03774cfabb3a921884326beb4b981f786b Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Thu, 6 Jul 2023 14:39:51 -0500
Subject: [PATCH 035/218] [extractor/twitter] Fix GraphQL and legacy API
 (#7516)

Authored by: bashonly
---
 yt_dlp/extractor/twitter.py | 191 +++++++++++++++++++++++-------------
 1 file changed, 124 insertions(+), 67 deletions(-)

diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py
index 1fb9524da6..fc157ac228 100644
--- a/yt_dlp/extractor/twitter.py
+++ b/yt_dlp/extractor/twitter.py
@@ -11,6 +11,7 @@
 from ..utils import (
     ExtractorError,
     dict_get,
+    filter_dict,
     float_or_none,
     format_field,
     int_or_none,
@@ -33,8 +34,8 @@ class TwitterBaseIE(InfoExtractor):
     _API_BASE = 'https://api.twitter.com/1.1/'
     _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
     _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
-    _AUTH = {'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'}
-    _guest_token = None
+    _AUTH = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
+    _LEGACY_AUTH = 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
     _flow_token = None
 
     _LOGIN_INIT_DATA = json.dumps({
@@ -145,20 +146,21 @@ def _search_dimensions_in_video_url(a_format, video_url):
     def is_logged_in(self):
         return bool(self._get_cookies(self._API_BASE).get('auth_token'))
 
-    def _fetch_guest_token(self, headers, display_id):
-        headers.pop('x-guest-token', None)
-        self._guest_token = traverse_obj(self._download_json(
-            f'{self._API_BASE}guest/activate.json', display_id,
-            'Downloading guest token', data=b'', headers=headers), 'guest_token')
-        if not self._guest_token:
+    def _fetch_guest_token(self, display_id):
+        guest_token = traverse_obj(self._download_json(
+            f'{self._API_BASE}guest/activate.json', display_id, 'Downloading guest token', data=b'',
+            headers=self._set_base_headers(legacy=display_id and self._configuration_arg('legacy_api'))),
+            ('guest_token', {str}))
+        if not guest_token:
             raise ExtractorError('Could not retrieve guest token')
+        return guest_token
 
-    def _set_base_headers(self):
-        headers = self._AUTH.copy()
-        csrf_token = try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value)
-        if csrf_token:
-            headers['x-csrf-token'] = csrf_token
-        return headers
+    def _set_base_headers(self, legacy=False):
+        bearer_token = self._LEGACY_AUTH if legacy and not self.is_logged_in else self._AUTH
+        return filter_dict({
+            'Authorization': f'Bearer {bearer_token}',
+            'x-csrf-token': try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value),
+        })
 
     def _call_login_api(self, note, headers, query={}, data=None):
         response = self._download_json(
@@ -183,17 +185,18 @@ def _perform_login(self, username, password):
         if self.is_logged_in:
             return
 
-        self._request_webpage('https://twitter.com/', None, 'Requesting cookies')
-        headers = self._set_base_headers()
-        self._fetch_guest_token(headers, None)
-        headers.update({
+        webpage = self._download_webpage('https://twitter.com/', None, 'Downloading login page')
+        guest_token = self._search_regex(
+            r'\.cookie\s*=\s*["\']gt=(\d+);', webpage, 'gt', default=None) or self._fetch_guest_token(None)
+        headers = {
+            **self._set_base_headers(),
             'content-type': 'application/json',
-            'x-guest-token': self._guest_token,
+            'x-guest-token': guest_token,
             'x-twitter-client-language': 'en',
             'x-twitter-active-user': 'yes',
             'Referer': 'https://twitter.com/',
             'Origin': 'https://twitter.com',
-        })
+        }
 
         def build_login_json(*subtask_inputs):
             return json.dumps({
@@ -285,37 +288,26 @@ def input_dict(subtask_id, text):
         self.report_login()
 
     def _call_api(self, path, video_id, query={}, graphql=False):
-        headers = self._set_base_headers()
-        if self.is_logged_in:
-            headers.update({
-                'x-twitter-auth-type': 'OAuth2Session',
-                'x-twitter-client-language': 'en',
-                'x-twitter-active-user': 'yes',
-            })
+        headers = self._set_base_headers(legacy=not graphql and self._configuration_arg('legacy_api'))
+        headers.update({
+            'x-twitter-auth-type': 'OAuth2Session',
+            'x-twitter-client-language': 'en',
+            'x-twitter-active-user': 'yes',
+        } if self.is_logged_in else {
+            'x-guest-token': self._fetch_guest_token(video_id)
+        })
+        allowed_status = {400, 401, 403, 404} if graphql else {403}
+        result = self._download_json(
+            (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
+            video_id, headers=headers, query=query, expected_status=allowed_status,
+            note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
 
-        for first_attempt in (True, False):
-            if not self.is_logged_in:
-                if not self._guest_token:
-                    self._fetch_guest_token(headers, video_id)
-                headers['x-guest-token'] = self._guest_token
+        if result.get('errors'):
+            errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
+            raise ExtractorError(
+                f'Error(s) while querying API: {errors or "Unknown error"}', expected=True)
 
-            allowed_status = {400, 401, 403, 404} if graphql else {403}
-            result = self._download_json(
-                (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
-                video_id, headers=headers, query=query, expected_status=allowed_status,
-                note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
-
-            if result.get('errors'):
-                errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
-                if not self.is_logged_in and first_attempt and 'bad guest token' in errors.lower():
-                    self.to_screen('Guest token has expired. Refreshing guest token')
-                    self._guest_token = None
-                    continue
-
-                raise ExtractorError(
-                    f'Error(s) while querying API: {errors or "Unknown error"}', expected=True)
-
-            return result
+        return result
 
     def _build_graphql_query(self, media_id):
         raise NotImplementedError('Method must be implemented to support GraphQL')
@@ -765,9 +757,9 @@ class TwitterIE(TwitterBaseIE):
         'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
         'info_dict': {
             'id': '1577719286659006464',
-            'title': 'Ultima | #\u0432\u029f\u043c - Test',
+            'title': 'Ultima📛 | #вʟм - Test',
             'description': 'Test https://t.co/Y3KEZD7Dad',
-            'uploader': 'Ultima | #\u0432\u029f\u043c',
+            'uploader': 'Ultima📛 | #вʟм',
             'uploader_id': 'UltimaShadowX',
             'uploader_url': 'https://twitter.com/UltimaShadowX',
             'upload_date': '20221005',
@@ -825,6 +817,7 @@ class TwitterIE(TwitterBaseIE):
         },
         'skip': 'Requires authentication',
     }, {
+        # Playlist result only with auth
         'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
         'playlist_mincount': 2,
         'info_dict': {
@@ -896,6 +889,7 @@ class TwitterIE(TwitterBaseIE):
         },
         'add_ie': ['TwitterSpaces'],
         'params': {'skip_download': 'm3u8'},
+        'skip': 'Requires authentication',
     }, {
         # URL specifies video number but --yes-playlist
         'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
@@ -1009,14 +1003,14 @@ class TwitterIE(TwitterBaseIE):
             'timestamp': 1670306984.0,
         },
     }, {
-        # url to retweet id, legacy API
+        # url to retweet id w/ legacy api
         'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
         'info_dict': {
             'id': '1623274794488659969',
             'display_id': '1623739803874349067',
             'ext': 'mp4',
             'title': 'Johnny Bullets - Me after going viral to over 30million people:    Whoopsie-daisy',
-            'description': 'md5:e873616a4a8fe0f93e71872678a672f3',
+            'description': 'md5:b06864cd3dc2554821cc327f5348485a',
             'uploader': 'Johnny Bullets',
             'uploader_id': 'Johnnybull3ts',
             'uploader_url': 'https://twitter.com/Johnnybull3ts',
@@ -1028,9 +1022,31 @@ class TwitterIE(TwitterBaseIE):
             'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
             'like_count': int,
             'repost_count': int,
-            'comment_count': int,
         },
         'params': {'extractor_args': {'twitter': {'legacy_api': ['']}}},
+    }, {
+        # orig tweet w/ graphql
+        'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
+        'info_dict': {
+            'id': '1623274794488659969',
+            'display_id': '1623739803874349067',
+            'ext': 'mp4',
+            'title': '@selfisekai@hackerspace.pl 🐀 - RT @Johnnybull3ts: Me after going viral to over 30million people:    Whoopsie-daisy',
+            'description': 'md5:9258bdbb54793bdc124fe1cd47e96c6a',
+            'uploader': '@selfisekai@hackerspace.pl 🐀',
+            'uploader_id': 'liberdalau',
+            'uploader_url': 'https://twitter.com/liberdalau',
+            'age_limit': 0,
+            'tags': [],
+            'duration': 8.033,
+            'timestamp': 1675964711.0,
+            'upload_date': '20230209',
+            'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
+            'like_count': int,
+            'view_count': int,
+            'repost_count': int,
+            'comment_count': int,
+        },
     }, {
         # onion route
         'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
@@ -1073,17 +1089,21 @@ def _graphql_to_legacy(self, data, twid):
         result = traverse_obj(data, (
             'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
             lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
-            'tweet_results', 'result', ('tweet', None),
-        ), expected_type=dict, default={}, get_all=False)
+            'tweet_results', 'result', ('tweet', None), {dict},
+        ), default={}, get_all=False) if self.is_logged_in else traverse_obj(
+            data, ('tweetResult', 'result', {dict}), default={})
 
-        if result.get('__typename') not in ('Tweet', 'TweetTombstone', None):
+        if result.get('__typename') not in ('Tweet', 'TweetTombstone', 'TweetUnavailable', None):
             self.report_warning(f'Unknown typename: {result.get("__typename")}', twid, only_once=True)
 
         if 'tombstone' in result:
             cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
-            if cause and 'adult content' in cause:
-                self.raise_login_required(cause)
             raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
+        elif result.get('__typename') == 'TweetUnavailable':
+            reason = result.get('reason')
+            if reason == 'NsfwLoggedOut':
+                self.raise_login_required('NSFW tweet requires authentication')
+            raise ExtractorError(reason or 'Requested tweet is unavailable', expected=True)
 
         status = result.get('legacy', {})
         status.update(traverse_obj(result, {
@@ -1134,11 +1154,42 @@ def _build_graphql_query(self, media_id):
                 'verified_phone_label_enabled': False,
                 'vibe_api_enabled': True,
             },
+        } if self.is_logged_in else {
+            'variables': {
+                'tweetId': media_id,
+                'withCommunity': False,
+                'includePromotedContent': False,
+                'withVoice': False,
+            },
+            'features': {
+                'creator_subscriptions_tweet_preview_api_enabled': True,
+                'tweetypie_unmention_optimization_enabled': True,
+                'responsive_web_edit_tweet_api_enabled': True,
+                'graphql_is_translatable_rweb_tweet_is_translatable_enabled': True,
+                'view_counts_everywhere_api_enabled': True,
+                'longform_notetweets_consumption_enabled': True,
+                'responsive_web_twitter_article_tweet_consumption_enabled': False,
+                'tweet_awards_web_tipping_enabled': False,
+                'freedom_of_speech_not_reach_fetch_enabled': True,
+                'standardized_nudges_misinfo': True,
+                'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': True,
+                'longform_notetweets_rich_text_read_enabled': True,
+                'longform_notetweets_inline_media_enabled': True,
+                'responsive_web_graphql_exclude_directive_enabled': True,
+                'verified_phone_label_enabled': False,
+                'responsive_web_media_download_video_enabled': False,
+                'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False,
+                'responsive_web_graphql_timeline_navigation_enabled': True,
+                'responsive_web_enhance_cards_enabled': False
+            },
+            'fieldToggles': {
+                'withArticleRichContentState': False
+            }
         }
 
     def _real_extract(self, url):
         twid, selected_index = self._match_valid_url(url).group('id', 'index')
-        if self._configuration_arg('legacy_api') and not self.is_logged_in:
+        if not self.is_logged_in and self._configuration_arg('legacy_api'):
             status = traverse_obj(self._call_api(f'statuses/show/{twid}.json', twid, {
                 'cards_platform': 'Web-12',
                 'include_cards': 1,
@@ -1146,11 +1197,15 @@ def _real_extract(self, url):
                 'include_user_entities': 0,
                 'tweet_mode': 'extended',
             }), 'retweeted_status', None)
+        elif not self.is_logged_in:
+            status = self._graphql_to_legacy(
+                self._call_graphql_api('2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId', twid), twid)
         else:
-            result = self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid)
-            status = self._graphql_to_legacy(result, twid)
+            status = self._graphql_to_legacy(
+                self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid), twid)
 
-        title = description = status['full_text'].replace('\n', ' ')
+        title = description = traverse_obj(
+            status, (('full_text', 'text'), {lambda x: x.replace('\n', ' ')}), get_all=False) or ''
         # strip  'https -_t.co_BJYgOjSeGA' junk from filenames
         title = re.sub(r'\s+(https?://[^ ]+)', '', title)
         user = status.get('user') or {}
@@ -1177,11 +1232,10 @@ def _real_extract(self, url):
         def extract_from_video_info(media):
             media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
             self.write_debug(f'Extracting from video info: {media_id}')
-            video_info = media.get('video_info') or {}
 
             formats = []
             subtitles = {}
-            for variant in video_info.get('variants', []):
+            for variant in traverse_obj(media, ('video_info', 'variants', ...)):
                 fmts, subs = self._extract_variant_formats(variant, twid)
                 subtitles = self._merge_subtitles(subtitles, subs)
                 formats.extend(fmts)
@@ -1206,7 +1260,7 @@ def add_thumbnail(name, size):
                 'subtitles': subtitles,
                 'thumbnails': thumbnails,
                 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})),
-                'duration': float_or_none(video_info.get('duration_millis'), 1000),
+                'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
                 # The codec of http formats are unknown
                 '_format_sort_fields': ('res', 'br', 'size', 'proto'),
             }
@@ -1291,7 +1345,8 @@ def get_binding_value(k):
         if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
             selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
         else:
-            desired_obj = traverse_obj(status, ('extended_entities', 'media', int(selected_index) - 1, {dict}))
+            desired_obj = traverse_obj(status, (
+                (None, 'quoted_status'), 'extended_entities', 'media', int(selected_index) - 1, {dict}), get_all=False)
             if not desired_obj:
                 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
             elif desired_obj.get('type') != 'video':
@@ -1481,6 +1536,8 @@ def _build_graphql_query(self, space_id):
 
     def _real_extract(self, url):
         space_id = self._match_id(url)
+        if not self.is_logged_in:
+            self.raise_login_required('Twitter Spaces require authentication')
         space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
         if not space_data:
             raise ExtractorError('Twitter Space not found', expected=True)

From bdd0b75e3f41ff35440eda6d395008beef19ef2f Mon Sep 17 00:00:00 2001
From: GD-Slime <82302542+GD-Slime@users.noreply.github.com>
Date: Sun, 9 Jul 2023 06:26:03 +0800
Subject: [PATCH 036/218] [ie/BiliBiliBangumi] Fix extractors (#7337)

- Overhaul BiliBiliBangumi extractor for the site's new API
- Add BiliBiliBangumiSeason extractor
- Refactor BiliBiliBangumiMedia extractor

Closes #6701, Closes #7400
Authored by: GD-Slime
---
 yt_dlp/extractor/_extractors.py |   1 +
 yt_dlp/extractor/bilibili.py    | 129 +++++++++++++++++++++-----------
 2 files changed, 85 insertions(+), 45 deletions(-)

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index c0a330dbe5..1e7f165ab9 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -214,6 +214,7 @@
 from .bilibili import (
     BiliBiliIE,
     BiliBiliBangumiIE,
+    BiliBiliBangumiSeasonIE,
     BiliBiliBangumiMediaIE,
     BiliBiliSearchIE,
     BilibiliCategoryIE,
diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py
index 6629fbc08c..e8714a33ab 100644
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@@ -18,6 +18,7 @@
     float_or_none,
     format_field,
     int_or_none,
+    join_nonempty,
     make_archive_id,
     merge_dicts,
     mimetype2ext,
@@ -135,6 +136,17 @@ def _get_all_children(self, reply):
         for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))):
             yield from children
 
+    def _get_episodes_from_season(self, ss_id, url):
+        season_info = self._download_json(
+            'https://api.bilibili.com/pgc/web/season/section', ss_id,
+            note='Downloading season info', query={'season_id': ss_id},
+            headers={'Referer': url, **self.geo_verification_headers()})
+
+        for entry in traverse_obj(season_info, (
+                'result', 'main_section', 'episodes',
+                lambda _, v: url_or_none(v['share_url']) and v['id'])):
+            yield self.url_result(entry['share_url'], BiliBiliBangumiIE, f'ep{entry["id"]}')
+
 
 class BiliBiliIE(BilibiliBaseIE):
     _VALID_URL = r'https?://www\.bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
@@ -403,76 +415,93 @@ def _real_extract(self, url):
 
 
 class BiliBiliBangumiIE(BilibiliBaseIE):
-    _VALID_URL = r'(?x)https?://www\.bilibili\.com/bangumi/play/(?P<id>(?:ss|ep)\d+)'
+    _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/(?P<id>ep\d+)'
 
     _TESTS = [{
-        'url': 'https://www.bilibili.com/bangumi/play/ss897',
+        'url': 'https://www.bilibili.com/bangumi/play/ep267851',
         'info_dict': {
-            'id': 'ss897',
+            'id': '267851',
             'ext': 'mp4',
-            'series': '神的记事本',
-            'season': '神的记事本',
-            'season_id': 897,
+            'series': '鬼灭之刃',
+            'series_id': '4358',
+            'season': '鬼灭之刃',
+            'season_id': '26801',
             'season_number': 1,
-            'episode': '你与旅行包',
-            'episode_number': 2,
-            'title': '神的记事本：第2话 你与旅行包',
-            'duration': 1428.487,
-            'timestamp': 1310809380,
-            'upload_date': '20110716',
-            'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+            'episode': '残酷',
+            'episode_id': '267851',
+            'episode_number': 1,
+            'title': '1 残酷',
+            'duration': 1425.256,
+            'timestamp': 1554566400,
+            'upload_date': '20190406',
+            'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
         },
-    }, {
-        'url': 'https://www.bilibili.com/bangumi/play/ep508406',
-        'only_matching': True,
+        'skip': 'According to the copyright owner\'s request, you may only watch the video after you are premium member.'
     }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
+        episode_id = video_id[2:]
         webpage = self._download_webpage(url, video_id)
 
         if '您所在的地区无法观看本片' in webpage:
             raise GeoRestrictedError('This video is restricted')
-        elif ('开通大会员观看' in webpage and '__playinfo__' not in webpage
-                or '正在观看预览，大会员免费看全片' in webpage):
+        elif '正在观看预览，大会员免费看全片' in webpage:
             self.raise_login_required('This video is for premium members only')
 
-        play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data']
+        headers = {'Referer': url, **self.geo_verification_headers()}
+        play_info = self._download_json(
+            'https://api.bilibili.com/pgc/player/web/v2/playurl', video_id,
+            'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
+            headers=headers)
+        premium_only = play_info.get('code') == -10403
+        play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
+
         formats = self.extract_formats(play_info)
-        if (not formats and '成为大会员抢先看' in webpage
-                and play_info.get('durl') and not play_info.get('dash')):
+        if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage):
             self.raise_login_required('This video is for premium members only')
 
-        initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
+        bangumi_info = self._download_json(
+            'https://api.bilibili.com/pgc/view/web/season', video_id, 'Get episode details',
+            query={'ep_id': episode_id}, headers=headers)['result']
 
-        season_id = traverse_obj(initial_state, ('mediaInfo', 'season_id'))
+        episode_number, episode_info = next((
+            (idx, ep) for idx, ep in enumerate(traverse_obj(
+                bangumi_info, ('episodes', ..., {dict})), 1)
+            if str_or_none(ep.get('id')) == episode_id), (1, {}))
+
+        season_id = bangumi_info.get('season_id')
         season_number = season_id and next((
             idx + 1 for idx, e in enumerate(
-                traverse_obj(initial_state, ('mediaInfo', 'seasons', ...)))
+                traverse_obj(bangumi_info, ('seasons', ...)))
             if e.get('season_id') == season_id
         ), None)
 
+        aid = episode_info.get('aid')
+
         return {
             'id': video_id,
             'formats': formats,
-            'title': traverse_obj(initial_state, 'h1Title'),
-            'episode': traverse_obj(initial_state, ('epInfo', 'long_title')),
-            'episode_number': int_or_none(traverse_obj(initial_state, ('epInfo', 'title'))),
-            'series': traverse_obj(initial_state, ('mediaInfo', 'series')),
-            'season': traverse_obj(initial_state, ('mediaInfo', 'season_title')),
-            'season_id': season_id,
+            **traverse_obj(bangumi_info, {
+                'series': ('series', 'series_title', {str}),
+                'series_id': ('series', 'series_id', {str_or_none}),
+                'thumbnail': ('square_cover', {url_or_none}),
+            }),
+            'title': join_nonempty('title', 'long_title', delim=' ', from_dict=episode_info),
+            'episode': episode_info.get('long_title'),
+            'episode_id': episode_id,
+            'episode_number': int_or_none(episode_info.get('title')) or episode_number,
+            'season_id': str_or_none(season_id),
             'season_number': season_number,
-            'thumbnail': traverse_obj(initial_state, ('epInfo', 'cover')),
-            'timestamp': traverse_obj(initial_state, ('epInfo', 'pub_time')),
+            'timestamp': int_or_none(episode_info.get('pub_time')),
             'duration': float_or_none(play_info.get('timelength'), scale=1000),
-            'subtitles': self.extract_subtitles(
-                video_id, initial_state, traverse_obj(initial_state, ('epInfo', 'cid'))),
-            '__post_extractor': self.extract_comments(traverse_obj(initial_state, ('epInfo', 'aid'))),
-            'http_headers': {'Referer': url, **self.geo_verification_headers()},
+            'subtitles': self.extract_subtitles(video_id, aid, episode_info.get('cid')),
+            '__post_extractor': self.extract_comments(aid),
+            'http_headers': headers,
         }
 
 
-class BiliBiliBangumiMediaIE(InfoExtractor):
+class BiliBiliBangumiMediaIE(BilibiliBaseIE):
     _VALID_URL = r'https?://www\.bilibili\.com/bangumi/media/md(?P<id>\d+)'
     _TESTS = [{
         'url': 'https://www.bilibili.com/bangumi/media/md24097891',
@@ -485,16 +514,26 @@ class BiliBiliBangumiMediaIE(InfoExtractor):
     def _real_extract(self, url):
         media_id = self._match_id(url)
         webpage = self._download_webpage(url, media_id)
+        ss_id = self._search_json(
+            r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)['mediaInfo']['season_id']
 
-        initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)
-        episode_list = self._download_json(
-            'https://api.bilibili.com/pgc/web/season/section', media_id,
-            query={'season_id': initial_state['mediaInfo']['season_id']},
-            note='Downloading season info')['result']['main_section']['episodes']
+        return self.playlist_result(self._get_episodes_from_season(ss_id, url), media_id)
 
-        return self.playlist_result((
-            self.url_result(entry['share_url'], BiliBiliBangumiIE, entry['aid'])
-            for entry in episode_list), media_id)
+
+class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
+    _VALID_URL = r'(?x)https?://www\.bilibili\.com/bangumi/play/ss(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://www.bilibili.com/bangumi/play/ss26801',
+        'info_dict': {
+            'id': '26801'
+        },
+        'playlist_mincount': 26
+    }]
+
+    def _real_extract(self, url):
+        ss_id = self._match_id(url)
+
+        return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id)
 
 
 class BilibiliSpaceBaseIE(InfoExtractor):

From 325191d0c9bf3fe257b8a7c2eb95080f44f6ddfc Mon Sep 17 00:00:00 2001
From: Zprokkel <105783800+Zprokkel@users.noreply.github.com>
Date: Mon, 10 Jul 2023 15:15:47 +0200
Subject: [PATCH 037/218] [ie/vrt] Update token signing key (#7519)

Authored by: Zprokkel
---
 yt_dlp/extractor/vrt.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/extractor/vrt.py b/yt_dlp/extractor/vrt.py
index bacd3df29a..0058357122 100644
--- a/yt_dlp/extractor/vrt.py
+++ b/yt_dlp/extractor/vrt.py
@@ -44,9 +44,11 @@ class VRTBaseIE(GigyaBaseIE):
             'version': '2.7.4-prod-2023-04-19T06:05:45'
         }
     }
-    # From https://player.vrt.be/vrtnws/js/main.js & https://player.vrt.be/ketnet/js/main.fd1de01a40a1e3d842ea.js
+    # From https://player.vrt.be/vrtnws/js/main.js & https://player.vrt.be/ketnet/js/main.8cdb11341bcb79e4cd44.js
     _JWT_KEY_ID = '0-0Fp51UZykfaiCJrfTE3+oMI8zvDteYfPtR+2n1R+z8w='
-    _JWT_SIGNING_KEY = '2a9251d782700769fb856da5725daf38661874ca6f80ae7dc2b05ec1a81a24ae'
+    _JWT_SIGNING_KEY = 'b5f500d55cb44715107249ccd8a5c0136cfb2788dbb71b90a4f142423bacaf38'  # -dev
+    # player-stag.vrt.be key:    d23987504521ae6fbf2716caca6700a24bb1579477b43c84e146b279de5ca595
+    # player.vrt.be key:         2a9251d782700769fb856da5725daf38661874ca6f80ae7dc2b05ec1a81a24ae
 
     def _extract_formats_and_subtitles(self, data, video_id):
         if traverse_obj(data, 'drm'):

From 2af4eeb77246b8183aae75a0a8d19f18c08115b2 Mon Sep 17 00:00:00 2001
From: Mahmoud Abdel-Fattah <accounts@abdel-fattah.net>
Date: Tue, 11 Jul 2023 05:00:38 +0400
Subject: [PATCH 038/218] [utils] `clean_podcast_url`: Handle more trackers
 (#7556)

Authored by: mabdelfattah, bashonly
Closes #7544
---
 test/test_utils.py     |  2 ++
 yt_dlp/utils/_utils.py | 10 +++++++---
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/test/test_utils.py b/test/test_utils.py
index a22f25d730..bdbd2d8796 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -1835,6 +1835,8 @@ def test_iri_to_uri(self):
     def test_clean_podcast_url(self):
         self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3')
         self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3')
+        self.assertEqual(clean_podcast_url('https://pdst.fm/e/2.gum.fm/chtbl.com/track/chrt.fm/track/34D33/pscrb.fm/rss/p/traffic.megaphone.fm/ITLLC7765286967.mp3?updated=1687282661'), 'https://traffic.megaphone.fm/ITLLC7765286967.mp3?updated=1687282661')
+        self.assertEqual(clean_podcast_url('https://pdst.fm/e/https://mgln.ai/e/441/www.buzzsprout.com/1121972/13019085-ep-252-the-deep-life-stack.mp3'), 'https://www.buzzsprout.com/1121972/13019085-ep-252-the-deep-life-stack.mp3')
 
     def test_LazyList(self):
         it = list(range(10))
diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py
index 82d9ba4d57..3023c33b24 100644
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@@ -5123,14 +5123,18 @@ def clean_podcast_url(url):
             (?:
                 chtbl\.com/track|
                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
-                play\.podtrac\.com
-            )/[^/]+|
+                play\.podtrac\.com|
+                chrt\.fm/track|
+                mgln\.ai/e
+            )(?:/[^/.]+)?|
             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
             flex\.acast\.com|
             pd(?:
                 cn\.co| # https://podcorn.com/analytics-prefix/
                 st\.fm # https://podsights.com/docs/
-            )/e
+            )/e|
+            [0-9]\.gum\.fm|
+            pscrb\.fm/rss/p
         )/''', '', url)
     return re.sub(r'^\w+://(\w+://)', r'\1', url)
 

From 2cfe221fbbe46faa3f46552c08d947a51f424903 Mon Sep 17 00:00:00 2001
From: Aleri Kaisattera <73682764+alerikaisattera@users.noreply.github.com>
Date: Thu, 13 Jul 2023 20:17:05 +0600
Subject: [PATCH 039/218] [ie/streamanity] Remove (#7571)

Service is dead
Authored by: alerikaisattera
---
 yt_dlp/extractor/_extractors.py |  1 -
 yt_dlp/extractor/streamanity.py | 47 ---------------------------------
 2 files changed, 48 deletions(-)
 delete mode 100644 yt_dlp/extractor/streamanity.py

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 1e7f165ab9..2af99b3dad 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1871,7 +1871,6 @@
     StoryFireSeriesIE,
 )
 from .streamable import StreamableIE
-from .streamanity import StreamanityIE
 from .streamcloud import StreamcloudIE
 from .streamcz import StreamCZIE
 from .streamff import StreamFFIE
diff --git a/yt_dlp/extractor/streamanity.py b/yt_dlp/extractor/streamanity.py
deleted file mode 100644
index 6eaee52d95..0000000000
--- a/yt_dlp/extractor/streamanity.py
+++ /dev/null
@@ -1,47 +0,0 @@
-from .common import InfoExtractor
-
-
-class StreamanityIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?streamanity\.com/video/(?P<id>[A-Za-z0-9]+)'
-    _TESTS = [{
-        'url': 'https://streamanity.com/video/9DFPTnuYi8f2',
-        'md5': '6ab171e8d4a02ad5dcbff6bea44cf5a1',
-        'info_dict': {
-            'id': '9DFPTnuYi8f2',
-            'ext': 'mp4',
-            'title': 'Bitcoin vs The Lighting Network',
-            'thumbnail': r're:https://res\.cloudinary\.com/.+\.png',
-            'description': '',
-            'uploader': 'Tom Bombadil (Freddy78)',
-        }
-    }, {
-        'url': 'https://streamanity.com/video/JktOUjSlfzTD',
-        'md5': '31f131e28abd3377c38be586a59532dc',
-        'info_dict': {
-            'id': 'JktOUjSlfzTD',
-            'ext': 'mp4',
-            'title': 'Share data when you see it',
-            'thumbnail': r're:https://res\.cloudinary\.com/.+\.png',
-            'description': 'Reposting as data should be public and stored on blockchain',
-            'uploader': 'digitalcurrencydaily',
-        }
-    }]
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        video_info = self._download_json(
-            f'https://app.streamanity.com/api/video/{video_id}', video_id)['data']['video']
-
-        formats = self._extract_m3u8_formats(
-            f'https://stream.mux.com/{video_info["play_id"]}.m3u8?token={video_info["token"]}',
-            video_id, ext='mp4', m3u8_id='hls')
-
-        return {
-            'id': video_id,
-            'title': video_info['title'],
-            'description': video_info.get('description'),
-            'uploader': video_info.get('author_name'),
-            'is_live': False,
-            'thumbnail': video_info.get('thumb'),
-            'formats': formats,
-        }

From 8a4cd12c8f8e93292e3e95200b9d17a3af39624c Mon Sep 17 00:00:00 2001
From: Neurognostic <donovan@tremura.email>
Date: Thu, 13 Jul 2023 16:39:21 -0400
Subject: [PATCH 040/218] [pp/EmbedThumbnail] Support `m4v` (#7583)

Authored by: Neurognostic
---
 yt_dlp/postprocessor/embedthumbnail.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py
index 88a767132a..d7be0b398e 100644
--- a/yt_dlp/postprocessor/embedthumbnail.py
+++ b/yt_dlp/postprocessor/embedthumbnail.py
@@ -114,7 +114,7 @@ def run(self, info):
             self._report_run('ffmpeg', filename)
             self.run_ffmpeg(filename, temp_filename, options)
 
-        elif info['ext'] in ['m4a', 'mp4', 'mov']:
+        elif info['ext'] in ['m4a', 'mp4', 'm4v', 'mov']:
             prefer_atomicparsley = 'embed-thumbnail-atomicparsley' in self.get_param('compat_opts', [])
             # Method 1: Use mutagen
             if not mutagen or prefer_atomicparsley:
@@ -213,7 +213,7 @@ def run(self, info):
             temp_filename = filename
 
         else:
-            raise EmbedThumbnailPPError('Supported filetypes for thumbnail embedding are: mp3, mkv/mka, ogg/opus/flac, m4a/mp4/mov')
+            raise EmbedThumbnailPPError('Supported filetypes for thumbnail embedding are: mp3, mkv/mka, ogg/opus/flac, m4a/mp4/m4v/mov')
 
         if success and temp_filename != filename:
             os.replace(temp_filename, filename)

From 1bcb9fe8715b1f288efc322be3de409ee0597080 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Finn=20R=2E=20G=C3=A4rtner?=
 <65015656+FinnRG@users.noreply.github.com>
Date: Fri, 14 Jul 2023 20:09:02 +0200
Subject: [PATCH 041/218] [ie/piapro] Support `/content` URL (#7592)

Authored by: FinnRG
---
 yt_dlp/extractor/piapro.py | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/yt_dlp/extractor/piapro.py b/yt_dlp/extractor/piapro.py
index d8d9c78010..eb5923d110 100644
--- a/yt_dlp/extractor/piapro.py
+++ b/yt_dlp/extractor/piapro.py
@@ -12,17 +12,22 @@
 
 class PiaproIE(InfoExtractor):
     _NETRC_MACHINE = 'piapro'
-    _VALID_URL = r'https?://piapro\.jp/t/(?P<id>\w+)/?'
+    _VALID_URL = r'https?://piapro\.jp/(?:t|content)/(?P<id>\w+)/?'
     _TESTS = [{
         'url': 'https://piapro.jp/t/NXYR',
-        'md5': 'a9d52f27d13bafab7ee34116a7dcfa77',
+        'md5': 'f7c0f760913fb1d44a1c45a4af793909',
         'info_dict': {
             'id': 'NXYR',
             'ext': 'mp3',
             'uploader': 'wowaka',
             'uploader_id': 'wowaka',
             'title': '裏表ラバーズ',
-            'thumbnail': r're:^https?://.*\.jpg$',
+            'description': 'http://www.nicovideo.jp/watch/sm8082467',
+            'duration': 189.0,
+            'timestamp': 1251785475,
+            'thumbnail': r're:^https?://.*\.(?:png|jpg)$',
+            'upload_date': '20090901',
+            'view_count': int,
         }
     }, {
         'note': 'There are break lines in description, mandating (?s) flag',
@@ -34,8 +39,16 @@ class PiaproIE(InfoExtractor):
             'title': '青に溶けた風船 / 初音ミク',
             'description': 'md5:d395a9bd151447631a5a1460bc7f9132',
             'uploader': 'シアン・キノ',
+            'duration': 229.0,
+            'timestamp': 1644030039,
+            'upload_date': '20220205',
+            'view_count': int,
+            'thumbnail': r're:^https?://.*\.(?:png|jpg)$',
             'uploader_id': 'cyankino',
         }
+    }, {
+        'url': 'https://piapro.jp/content/hcw0z3a169wtemz6',
+        'only_matching': True
     }]
 
     _login_status = False

From 1ba6fe9db5f660d5538588315c23ad6cf0371c5f Mon Sep 17 00:00:00 2001
From: coletdjnz <coletdjnz@protonmail.com>
Date: Sat, 15 Jul 2023 15:20:24 +1200
Subject: [PATCH 042/218] [ie/youtube:tab] Detect looping feeds (#6621)

Closes https://github.com/yt-dlp/yt-dlp/issues/5555

Note: the first page may still be repeated, however this is better than nothing.

Authored by: coletdjnz
---
 yt_dlp/extractor/youtube.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index 73bfa662d2..826bbb20e1 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -4921,10 +4921,15 @@ def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
             or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
         yield from extract_entries(parent_renderer)
         continuation = continuation_list[0]
-
+        seen_continuations = set()
         for page_num in itertools.count(1):
             if not continuation:
                 break
+            continuation_token = continuation.get('continuation')
+            if continuation_token is not None and continuation_token in seen_continuations:
+                self.write_debug('Detected YouTube feed looping - assuming end of feed.')
+                break
+            seen_continuations.add(continuation_token)
             headers = self.generate_api_headers(
                 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
             response = self._extract_response(

From 1b392f905d20ef1f1b300b180f867d43c9ce49b8 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Sat, 15 Jul 2023 11:41:08 +0530
Subject: [PATCH 043/218] [utils] Add temporary shim for logging

Related: #5680, #7517
---
 test/test_downloader_http.py | 12 +-----------
 yt_dlp/cookies.py            | 23 ++++-------------------
 yt_dlp/utils/_utils.py       | 30 ++++++++++++++++++++++++++++++
 3 files changed, 35 insertions(+), 30 deletions(-)

diff --git a/test/test_downloader_http.py b/test/test_downloader_http.py
index 381b2583cd..099ec2fff4 100644
--- a/test/test_downloader_http.py
+++ b/test/test_downloader_http.py
@@ -16,6 +16,7 @@
 from yt_dlp import YoutubeDL
 from yt_dlp.downloader.http import HttpFD
 from yt_dlp.utils import encodeFilename
+from yt_dlp.utils._utils import _YDLLogger as FakeLogger
 
 TEST_DIR = os.path.dirname(os.path.abspath(__file__))
 
@@ -67,17 +68,6 @@ def do_GET(self):
             assert False
 
 
-class FakeLogger:
-    def debug(self, msg):
-        pass
-
-    def warning(self, msg):
-        pass
-
-    def error(self, msg):
-        pass
-
-
 class TestHttpFD(unittest.TestCase):
     def setUp(self):
         self.httpd = http.server.HTTPServer(
diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py
index 53fe0ec2d3..16f1918e6a 100644
--- a/yt_dlp/cookies.py
+++ b/yt_dlp/cookies.py
@@ -41,30 +41,15 @@
     try_call,
     write_string,
 )
+from .utils._utils import _YDLLogger
 
 CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
 SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
 
 
-class YDLLogger:
-    def __init__(self, ydl=None):
-        self._ydl = ydl
-
-    def debug(self, message):
-        if self._ydl:
-            self._ydl.write_debug(message)
-
-    def info(self, message):
-        if self._ydl:
-            self._ydl.to_screen(f'[Cookies] {message}')
-
-    def warning(self, message, only_once=False):
-        if self._ydl:
-            self._ydl.report_warning(message, only_once)
-
-    def error(self, message):
-        if self._ydl:
-            self._ydl.report_error(message)
+class YDLLogger(_YDLLogger):
+    def warning(self, message, only_once=False):  # compat
+        return super().warning(message, once=only_once)
 
     class ProgressBar(MultilinePrinter):
         _DELAY, _timer = 0.1, 0
diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py
index 3023c33b24..4af955743d 100644
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@@ -5994,3 +5994,33 @@ def calculate_preference(self, format):
             format['tbr'] = try_call(lambda: format['vbr'] + format['abr']) or None
 
         return tuple(self._calculate_field_preference(format, field) for field in self._order)
+
+
+# XXX: Temporary
+class _YDLLogger:
+    def __init__(self, ydl=None):
+        self._ydl = ydl
+
+    def debug(self, message):
+        if self._ydl:
+            self._ydl.write_debug(message)
+
+    def info(self, message):
+        if self._ydl:
+            self._ydl.to_screen(message)
+
+    def warning(self, message, *, once=False):
+        if self._ydl:
+            self._ydl.report_warning(message, only_once=once)
+
+    def error(self, message, *, is_error=True):
+        if self._ydl:
+            self._ydl.report_error(message, is_error=is_error)
+
+    def stdout(self, message):
+        if self._ydl:
+            self._ydl.to_stdout(message)
+
+    def stderr(self, message):
+        if self._ydl:
+            self._ydl.to_stderr(message)

From c365dba8430ee33abda85d31f95128605bf240eb Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Sat, 15 Jul 2023 14:30:08 +0530
Subject: [PATCH 044/218] [networking] Add module (#2861)

No actual changes - code is only moved around
---
 Makefile                                  |   2 +-
 devscripts/make_changelog.py              |   1 +
 test/{test_http.py => test_networking.py} |   0
 test/test_utils.py                        |  18 +-
 yt_dlp/YoutubeDL.py                       |  40 +-
 yt_dlp/networking/__init__.py             |   0
 yt_dlp/networking/_helper.py              | 139 +++++++
 yt_dlp/networking/_urllib.py              | 315 ++++++++++++++
 yt_dlp/networking/exceptions.py           |   9 +
 yt_dlp/utils/__init__.py                  |   5 +-
 yt_dlp/utils/_deprecated.py               |  19 +
 yt_dlp/utils/_utils.py                    | 479 +---------------------
 yt_dlp/utils/networking.py                |  60 +++
 13 files changed, 587 insertions(+), 500 deletions(-)
 rename test/{test_http.py => test_networking.py} (100%)
 create mode 100644 yt_dlp/networking/__init__.py
 create mode 100644 yt_dlp/networking/_helper.py
 create mode 100644 yt_dlp/networking/_urllib.py
 create mode 100644 yt_dlp/networking/exceptions.py
 create mode 100644 yt_dlp/utils/networking.py

diff --git a/Makefile b/Makefile
index b1ac0e7d68..c85b24c13e 100644
--- a/Makefile
+++ b/Makefile
@@ -74,7 +74,7 @@ offlinetest: codetest
 	$(PYTHON) -m pytest -k "not download"
 
 # XXX: This is hard to maintain
-CODE_FOLDERS = yt_dlp yt_dlp/downloader yt_dlp/extractor yt_dlp/postprocessor yt_dlp/compat yt_dlp/compat/urllib yt_dlp/utils yt_dlp/dependencies
+CODE_FOLDERS = yt_dlp yt_dlp/downloader yt_dlp/extractor yt_dlp/postprocessor yt_dlp/compat yt_dlp/compat/urllib yt_dlp/utils yt_dlp/dependencies yt_dlp/networking
 yt-dlp: yt_dlp/*.py yt_dlp/*/*.py
 	mkdir -p zip
 	for d in $(CODE_FOLDERS) ; do \
diff --git a/devscripts/make_changelog.py b/devscripts/make_changelog.py
index 3ad4c5408b..157c661267 100644
--- a/devscripts/make_changelog.py
+++ b/devscripts/make_changelog.py
@@ -54,6 +54,7 @@ def commit_lookup(cls):
                     'core',
                     'dependencies',
                     'jsinterp',
+                    'networking',
                     'outtmpl',
                     'formats',
                     'plugins',
diff --git a/test/test_http.py b/test/test_networking.py
similarity index 100%
rename from test/test_http.py
rename to test/test_networking.py
diff --git a/test/test_utils.py b/test/test_utils.py
index bdbd2d8796..862c7d0f75 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -258,15 +258,6 @@ def test_sanitize_url(self):
         self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar')
         self.assertEqual(sanitize_url('foo bar'), 'foo bar')
 
-    def test_extract_basic_auth(self):
-        auth_header = lambda url: sanitized_Request(url).get_header('Authorization')
-        self.assertFalse(auth_header('http://foo.bar'))
-        self.assertFalse(auth_header('http://:foo.bar'))
-        self.assertEqual(auth_header('http://@foo.bar'), 'Basic Og==')
-        self.assertEqual(auth_header('http://:pass@foo.bar'), 'Basic OnBhc3M=')
-        self.assertEqual(auth_header('http://user:@foo.bar'), 'Basic dXNlcjo=')
-        self.assertEqual(auth_header('http://user:pass@foo.bar'), 'Basic dXNlcjpwYXNz')
-
     def test_expand_path(self):
         def env(var):
             return f'%{var}%' if sys.platform == 'win32' else f'${var}'
@@ -2324,6 +2315,15 @@ def test_traverse_obj(self):
         self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 'group')), ['0123', '3'],
                          msg='function on a `re.Match` should give group name as well')
 
+    def test_extract_basic_auth(self):
+        auth_header = lambda url: sanitized_Request(url).get_header('Authorization')
+        self.assertFalse(auth_header('http://foo.bar'))
+        self.assertFalse(auth_header('http://:foo.bar'))
+        self.assertEqual(auth_header('http://@foo.bar'), 'Basic Og==')
+        self.assertEqual(auth_header('http://:pass@foo.bar'), 'Basic OnBhc3M=')
+        self.assertEqual(auth_header('http://user:@foo.bar'), 'Basic dXNlcjo=')
+        self.assertEqual(auth_header('http://user:pass@foo.bar'), 'Basic dXNlcjpwYXNz')
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index 7f55716669..138646ebfc 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -151,6 +151,7 @@
     write_json_file,
     write_string,
 )
+from .utils.networking import clean_headers
 from .version import CHANNEL, RELEASE_GIT_HEAD, VARIANT, __version__
 
 if compat_os_name == 'nt':
@@ -672,6 +673,7 @@ def process_color_policy(stream):
                     raise
 
         self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
+        self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
         if auto_init and auto_init != 'no_verbose_header':
             self.print_debug_header()
 
@@ -745,9 +747,6 @@ def check_deprecated(param, option, suggestion):
             else self.params['format'] if callable(self.params['format'])
             else self.build_format_selector(self.params['format']))
 
-        # Set http_headers defaults according to std_headers
-        self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
-
         hooks = {
             'post_hooks': self.add_post_hook,
             'progress_hooks': self.add_progress_hook,
@@ -941,12 +940,14 @@ def __enter__(self):
         self.save_console_title()
         return self
 
-    def __exit__(self, *args):
-        self.restore_console_title()
-
+    def save_cookies(self):
         if self.params.get('cookiefile') is not None:
             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 
+    def __exit__(self, *args):
+        self.restore_console_title()
+        self.save_cookies()
+
     def trouble(self, message=None, tb=None, is_error=True):
         """Determine action to take when a download problem appears.
 
@@ -2468,9 +2469,7 @@ def restore_last_token(self):
 
     def _calc_headers(self, info_dict):
         res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
-        if 'Youtubedl-No-Compression' in res:  # deprecated
-            res.pop('Youtubedl-No-Compression', None)
-            res['Accept-Encoding'] = 'identity'
+        clean_headers(res)
         cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
         if cookies:
             encoder = LenientSimpleCookie()
@@ -3856,12 +3855,6 @@ def list_thumbnails(self, info_dict):
     def list_subtitles(self, video_id, subtitles, name='subtitles'):
         self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
 
-    def urlopen(self, req):
-        """ Start an HTTP download """
-        if isinstance(req, str):
-            req = sanitized_Request(req)
-        return self._opener.open(req, timeout=self._socket_timeout)
-
     def print_debug_header(self):
         if not self.params.get('verbose'):
             return
@@ -3989,13 +3982,8 @@ def _setup_opener(self):
             return
         timeout_val = self.params.get('socket_timeout')
         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
-
-        opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
-        opts_cookiefile = self.params.get('cookiefile')
         opts_proxy = self.params.get('proxy')
 
-        self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
-
         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
         if opts_proxy is not None:
             if opts_proxy == '':
@@ -4037,6 +4025,18 @@ def file_open(*args, **kwargs):
         opener.addheaders = []
         self._opener = opener
 
+    @functools.cached_property
+    def cookiejar(self):
+        """Global cookiejar instance"""
+        return load_cookies(
+            self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self)
+
+    def urlopen(self, req):
+        """ Start an HTTP download """
+        if isinstance(req, str):
+            req = sanitized_Request(req)
+        return self._opener.open(req, timeout=self._socket_timeout)
+
     def encode(self, s):
         if isinstance(s, bytes):
             return s  # Already encoded
diff --git a/yt_dlp/networking/__init__.py b/yt_dlp/networking/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/yt_dlp/networking/_helper.py b/yt_dlp/networking/_helper.py
new file mode 100644
index 0000000000..367f3f4447
--- /dev/null
+++ b/yt_dlp/networking/_helper.py
@@ -0,0 +1,139 @@
+from __future__ import annotations
+
+import contextlib
+import ssl
+import sys
+import urllib.parse
+
+from ..dependencies import certifi
+from ..socks import ProxyType
+from ..utils import YoutubeDLError
+
+
+def ssl_load_certs(context: ssl.SSLContext, use_certifi=True):
+    if certifi and use_certifi:
+        context.load_verify_locations(cafile=certifi.where())
+    else:
+        try:
+            context.load_default_certs()
+        # Work around the issue in load_default_certs when there are bad certificates. See:
+        # https://github.com/yt-dlp/yt-dlp/issues/1060,
+        # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
+        except ssl.SSLError:
+            # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
+            if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
+                for storename in ('CA', 'ROOT'):
+                    _ssl_load_windows_store_certs(context, storename)
+            context.set_default_verify_paths()
+
+
+def _ssl_load_windows_store_certs(ssl_context, storename):
+    # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
+    try:
+        certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
+                 if encoding == 'x509_asn' and (
+                     trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
+    except PermissionError:
+        return
+    for cert in certs:
+        with contextlib.suppress(ssl.SSLError):
+            ssl_context.load_verify_locations(cadata=cert)
+
+
+def make_socks_proxy_opts(socks_proxy):
+    url_components = urllib.parse.urlparse(socks_proxy)
+    if url_components.scheme.lower() == 'socks5':
+        socks_type = ProxyType.SOCKS5
+    elif url_components.scheme.lower() in ('socks', 'socks4'):
+        socks_type = ProxyType.SOCKS4
+    elif url_components.scheme.lower() == 'socks4a':
+        socks_type = ProxyType.SOCKS4A
+
+    def unquote_if_non_empty(s):
+        if not s:
+            return s
+        return urllib.parse.unquote_plus(s)
+    return {
+        'proxytype': socks_type,
+        'addr': url_components.hostname,
+        'port': url_components.port or 1080,
+        'rdns': True,
+        'username': unquote_if_non_empty(url_components.username),
+        'password': unquote_if_non_empty(url_components.password),
+    }
+
+
+def get_redirect_method(method, status):
+    """Unified redirect method handling"""
+
+    # A 303 must either use GET or HEAD for subsequent request
+    # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4
+    if status == 303 and method != 'HEAD':
+        method = 'GET'
+    # 301 and 302 redirects are commonly turned into a GET from a POST
+    # for subsequent requests by browsers, so we'll do the same.
+    # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.2
+    # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.3
+    if status in (301, 302) and method == 'POST':
+        method = 'GET'
+    return method
+
+
+def make_ssl_context(
+    verify=True,
+    client_certificate=None,
+    client_certificate_key=None,
+    client_certificate_password=None,
+    legacy_support=False,
+    use_certifi=True,
+):
+    context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
+    context.check_hostname = verify
+    context.verify_mode = ssl.CERT_REQUIRED if verify else ssl.CERT_NONE
+
+    # Some servers may reject requests if ALPN extension is not sent. See:
+    # https://github.com/python/cpython/issues/85140
+    # https://github.com/yt-dlp/yt-dlp/issues/3878
+    with contextlib.suppress(NotImplementedError):
+        context.set_alpn_protocols(['http/1.1'])
+    if verify:
+        ssl_load_certs(context, use_certifi)
+
+    if legacy_support:
+        context.options |= 4  # SSL_OP_LEGACY_SERVER_CONNECT
+        context.set_ciphers('DEFAULT')  # compat
+
+    elif ssl.OPENSSL_VERSION_INFO >= (1, 1, 1) and not ssl.OPENSSL_VERSION.startswith('LibreSSL'):
+        # Use the default SSL ciphers and minimum TLS version settings from Python 3.10 [1].
+        # This is to ensure consistent behavior across Python versions and libraries, and help avoid fingerprinting
+        # in some situations [2][3].
+        # Python 3.10 only supports OpenSSL 1.1.1+ [4]. Because this change is likely
+        # untested on older versions, we only apply this to OpenSSL 1.1.1+ to be safe.
+        # LibreSSL is excluded until further investigation due to cipher support issues [5][6].
+        # 1. https://github.com/python/cpython/commit/e983252b516edb15d4338b0a47631b59ef1e2536
+        # 2. https://github.com/yt-dlp/yt-dlp/issues/4627
+        # 3. https://github.com/yt-dlp/yt-dlp/pull/5294
+        # 4. https://peps.python.org/pep-0644/
+        # 5. https://peps.python.org/pep-0644/#libressl-support
+        # 6. https://github.com/yt-dlp/yt-dlp/commit/5b9f253fa0aee996cf1ed30185d4b502e00609c4#commitcomment-89054368
+        context.set_ciphers(
+            '@SECLEVEL=2:ECDH+AESGCM:ECDH+CHACHA20:ECDH+AES:DHE+AES:!aNULL:!eNULL:!aDSS:!SHA1:!AESCCM')
+        context.minimum_version = ssl.TLSVersion.TLSv1_2
+
+    if client_certificate:
+        try:
+            context.load_cert_chain(
+                client_certificate, keyfile=client_certificate_key,
+                password=client_certificate_password)
+        except ssl.SSLError:
+            raise YoutubeDLError('Unable to load client certificate')
+
+    return context
+
+
+def add_accept_encoding_header(headers, supported_encodings):
+    if supported_encodings and 'Accept-Encoding' not in headers:
+        headers['Accept-Encoding'] = ', '.join(supported_encodings)
+
+    elif 'Accept-Encoding' not in headers:
+        headers['Accept-Encoding'] = 'identity'
diff --git a/yt_dlp/networking/_urllib.py b/yt_dlp/networking/_urllib.py
new file mode 100644
index 0000000000..1f5871ae67
--- /dev/null
+++ b/yt_dlp/networking/_urllib.py
@@ -0,0 +1,315 @@
+import functools
+import gzip
+import http.client
+import io
+import socket
+import ssl
+import urllib.error
+import urllib.parse
+import urllib.request
+import urllib.response
+import zlib
+
+from ._helper import (
+    add_accept_encoding_header,
+    get_redirect_method,
+    make_socks_proxy_opts,
+)
+from ..dependencies import brotli
+from ..socks import sockssocket
+from ..utils import escape_url, update_url_query
+from ..utils.networking import clean_headers, std_headers
+
+SUPPORTED_ENCODINGS = ['gzip', 'deflate']
+
+if brotli:
+    SUPPORTED_ENCODINGS.append('br')
+
+
+def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
+    hc = http_class(*args, **kwargs)
+    source_address = ydl_handler._params.get('source_address')
+
+    if source_address is not None:
+        # This is to workaround _create_connection() from socket where it will try all
+        # address data from getaddrinfo() including IPv6. This filters the result from
+        # getaddrinfo() based on the source_address value.
+        # This is based on the cpython socket.create_connection() function.
+        # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
+        def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
+            host, port = address
+            err = None
+            addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
+            af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
+            ip_addrs = [addr for addr in addrs if addr[0] == af]
+            if addrs and not ip_addrs:
+                ip_version = 'v4' if af == socket.AF_INET else 'v6'
+                raise OSError(
+                    "No remote IP%s addresses available for connect, can't use '%s' as source address"
+                    % (ip_version, source_address[0]))
+            for res in ip_addrs:
+                af, socktype, proto, canonname, sa = res
+                sock = None
+                try:
+                    sock = socket.socket(af, socktype, proto)
+                    if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
+                        sock.settimeout(timeout)
+                    sock.bind(source_address)
+                    sock.connect(sa)
+                    err = None  # Explicitly break reference cycle
+                    return sock
+                except OSError as _:
+                    err = _
+                    if sock is not None:
+                        sock.close()
+            if err is not None:
+                raise err
+            else:
+                raise OSError('getaddrinfo returns an empty list')
+        if hasattr(hc, '_create_connection'):
+            hc._create_connection = _create_connection
+        hc.source_address = (source_address, 0)
+
+    return hc
+
+
+class HTTPHandler(urllib.request.HTTPHandler):
+    """Handler for HTTP requests and responses.
+
+    This class, when installed with an OpenerDirector, automatically adds
+    the standard headers to every HTTP request and handles gzipped, deflated and
+    brotli responses from web servers.
+
+    Part of this code was copied from:
+
+    http://techknack.net/python-urllib2-handlers/
+
+    Andrew Rowls, the author of that code, agreed to release it to the
+    public domain.
+    """
+
+    def __init__(self, params, *args, **kwargs):
+        urllib.request.HTTPHandler.__init__(self, *args, **kwargs)
+        self._params = params
+
+    def http_open(self, req):
+        conn_class = http.client.HTTPConnection
+
+        socks_proxy = req.headers.get('Ytdl-socks-proxy')
+        if socks_proxy:
+            conn_class = make_socks_conn_class(conn_class, socks_proxy)
+            del req.headers['Ytdl-socks-proxy']
+
+        return self.do_open(functools.partial(
+            _create_http_connection, self, conn_class, False),
+            req)
+
+    @staticmethod
+    def deflate(data):
+        if not data:
+            return data
+        try:
+            return zlib.decompress(data, -zlib.MAX_WBITS)
+        except zlib.error:
+            return zlib.decompress(data)
+
+    @staticmethod
+    def brotli(data):
+        if not data:
+            return data
+        return brotli.decompress(data)
+
+    @staticmethod
+    def gz(data):
+        gz = gzip.GzipFile(fileobj=io.BytesIO(data), mode='rb')
+        try:
+            return gz.read()
+        except OSError as original_oserror:
+            # There may be junk add the end of the file
+            # See http://stackoverflow.com/q/4928560/35070 for details
+            for i in range(1, 1024):
+                try:
+                    gz = gzip.GzipFile(fileobj=io.BytesIO(data[:-i]), mode='rb')
+                    return gz.read()
+                except OSError:
+                    continue
+            else:
+                raise original_oserror
+
+    def http_request(self, req):
+        # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
+        # always respected by websites, some tend to give out URLs with non percent-encoded
+        # non-ASCII characters (see telemb.py, ard.py [#3412])
+        # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
+        # To work around aforementioned issue we will replace request's original URL with
+        # percent-encoded one
+        # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
+        # the code of this workaround has been moved here from YoutubeDL.urlopen()
+        url = req.get_full_url()
+        url_escaped = escape_url(url)
+
+        # Substitute URL if any change after escaping
+        if url != url_escaped:
+            req = update_Request(req, url=url_escaped)
+
+        for h, v in self._params.get('http_headers', std_headers).items():
+            # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
+            # The dict keys are capitalized because of this bug by urllib
+            if h.capitalize() not in req.headers:
+                req.add_header(h, v)
+
+        clean_headers(req.headers)
+        add_accept_encoding_header(req.headers, SUPPORTED_ENCODINGS)
+        return super().do_request_(req)
+
+    def http_response(self, req, resp):
+        old_resp = resp
+
+        # Content-Encoding header lists the encodings in order that they were applied [1].
+        # To decompress, we simply do the reverse.
+        # [1]: https://datatracker.ietf.org/doc/html/rfc9110#name-content-encoding
+        decoded_response = None
+        for encoding in (e.strip() for e in reversed(resp.headers.get('Content-encoding', '').split(','))):
+            if encoding == 'gzip':
+                decoded_response = self.gz(decoded_response or resp.read())
+            elif encoding == 'deflate':
+                decoded_response = self.deflate(decoded_response or resp.read())
+            elif encoding == 'br' and brotli:
+                decoded_response = self.brotli(decoded_response or resp.read())
+
+        if decoded_response is not None:
+            resp = urllib.request.addinfourl(io.BytesIO(decoded_response), old_resp.headers, old_resp.url, old_resp.code)
+            resp.msg = old_resp.msg
+        # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
+        # https://github.com/ytdl-org/youtube-dl/issues/6457).
+        if 300 <= resp.code < 400:
+            location = resp.headers.get('Location')
+            if location:
+                # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
+                location = location.encode('iso-8859-1').decode()
+                location_escaped = escape_url(location)
+                if location != location_escaped:
+                    del resp.headers['Location']
+                    resp.headers['Location'] = location_escaped
+        return resp
+
+    https_request = http_request
+    https_response = http_response
+
+
+def make_socks_conn_class(base_class, socks_proxy):
+    assert issubclass(base_class, (
+        http.client.HTTPConnection, http.client.HTTPSConnection))
+
+    proxy_args = make_socks_proxy_opts(socks_proxy)
+
+    class SocksConnection(base_class):
+        def connect(self):
+            self.sock = sockssocket()
+            self.sock.setproxy(**proxy_args)
+            if isinstance(self.timeout, (int, float)):
+                self.sock.settimeout(self.timeout)
+            self.sock.connect((self.host, self.port))
+
+            if isinstance(self, http.client.HTTPSConnection):
+                if hasattr(self, '_context'):  # Python > 2.6
+                    self.sock = self._context.wrap_socket(
+                        self.sock, server_hostname=self.host)
+                else:
+                    self.sock = ssl.wrap_socket(self.sock)
+
+    return SocksConnection
+
+
+class RedirectHandler(urllib.request.HTTPRedirectHandler):
+    """YoutubeDL redirect handler
+
+    The code is based on HTTPRedirectHandler implementation from CPython [1].
+
+    This redirect handler fixes and improves the logic to better align with RFC7261
+     and what browsers tend to do [2][3]
+
+    1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
+    2. https://datatracker.ietf.org/doc/html/rfc7231
+    3. https://github.com/python/cpython/issues/91306
+    """
+
+    http_error_301 = http_error_303 = http_error_307 = http_error_308 = urllib.request.HTTPRedirectHandler.http_error_302
+
+    def redirect_request(self, req, fp, code, msg, headers, newurl):
+        if code not in (301, 302, 303, 307, 308):
+            raise urllib.error.HTTPError(req.full_url, code, msg, headers, fp)
+
+        new_data = req.data
+
+        # Technically the Cookie header should be in unredirected_hdrs,
+        # however in practice some may set it in normal headers anyway.
+        # We will remove it here to prevent any leaks.
+        remove_headers = ['Cookie']
+
+        new_method = get_redirect_method(req.get_method(), code)
+        # only remove payload if method changed (e.g. POST to GET)
+        if new_method != req.get_method():
+            new_data = None
+            remove_headers.extend(['Content-Length', 'Content-Type'])
+
+        new_headers = {k: v for k, v in req.headers.items() if k.title() not in remove_headers}
+
+        return urllib.request.Request(
+            newurl, headers=new_headers, origin_req_host=req.origin_req_host,
+            unverifiable=True, method=new_method, data=new_data)
+
+
+class ProxyHandler(urllib.request.ProxyHandler):
+    def __init__(self, proxies=None):
+        # Set default handlers
+        for type in ('http', 'https'):
+            setattr(self, '%s_open' % type,
+                    lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
+                        meth(r, proxy, type))
+        urllib.request.ProxyHandler.__init__(self, proxies)
+
+    def proxy_open(self, req, proxy, type):
+        req_proxy = req.headers.get('Ytdl-request-proxy')
+        if req_proxy is not None:
+            proxy = req_proxy
+            del req.headers['Ytdl-request-proxy']
+
+        if proxy == '__noproxy__':
+            return None  # No Proxy
+        if urllib.parse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
+            req.add_header('Ytdl-socks-proxy', proxy)
+            # yt-dlp's http/https handlers do wrapping the socket with socks
+            return None
+        return urllib.request.ProxyHandler.proxy_open(
+            self, req, proxy, type)
+
+
+class PUTRequest(urllib.request.Request):
+    def get_method(self):
+        return 'PUT'
+
+
+class HEADRequest(urllib.request.Request):
+    def get_method(self):
+        return 'HEAD'
+
+
+def update_Request(req, url=None, data=None, headers=None, query=None):
+    req_headers = req.headers.copy()
+    req_headers.update(headers or {})
+    req_data = data or req.data
+    req_url = update_url_query(url or req.get_full_url(), query)
+    req_get_method = req.get_method()
+    if req_get_method == 'HEAD':
+        req_type = HEADRequest
+    elif req_get_method == 'PUT':
+        req_type = PUTRequest
+    else:
+        req_type = urllib.request.Request
+    new_req = req_type(
+        req_url, data=req_data, headers=req_headers,
+        origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
+    if hasattr(req, 'timeout'):
+        new_req.timeout = req.timeout
+    return new_req
diff --git a/yt_dlp/networking/exceptions.py b/yt_dlp/networking/exceptions.py
new file mode 100644
index 0000000000..89b484a220
--- /dev/null
+++ b/yt_dlp/networking/exceptions.py
@@ -0,0 +1,9 @@
+import http.client
+import socket
+import ssl
+import urllib.error
+
+network_exceptions = [urllib.error.URLError, http.client.HTTPException, socket.error]
+if hasattr(ssl, 'CertificateError'):
+    network_exceptions.append(ssl.CertificateError)
+network_exceptions = tuple(network_exceptions)
diff --git a/yt_dlp/utils/__init__.py b/yt_dlp/utils/__init__.py
index 2dd20ada25..0b00adddb4 100644
--- a/yt_dlp/utils/__init__.py
+++ b/yt_dlp/utils/__init__.py
@@ -3,13 +3,10 @@
 
 from ..compat.compat_utils import passthrough_module
 
-# XXX: Implement this the same way as other DeprecationWarnings without circular import
-passthrough_module(__name__, '._legacy', callback=lambda attr: warnings.warn(
-    DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=5))
+passthrough_module(__name__, '._deprecated')
 del passthrough_module
 
 # isort: off
 from .traversal import *
 from ._utils import *
 from ._utils import _configuration_args, _get_exe_version_output
-from ._deprecated import *
diff --git a/yt_dlp/utils/_deprecated.py b/yt_dlp/utils/_deprecated.py
index 4454d84a72..ca0fb1614d 100644
--- a/yt_dlp/utils/_deprecated.py
+++ b/yt_dlp/utils/_deprecated.py
@@ -1,7 +1,26 @@
 """Deprecated - New code should avoid these"""
+import warnings
+
+from ..compat.compat_utils import passthrough_module
+
+# XXX: Implement this the same way as other DeprecationWarnings without circular import
+passthrough_module(__name__, '.._legacy', callback=lambda attr: warnings.warn(
+    DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=6))
+del passthrough_module
+
 
 from ._utils import preferredencoding
 
+# isort: split
+from ..networking._urllib import PUTRequest  # noqa: F401
+from ..networking._urllib import SUPPORTED_ENCODINGS, HEADRequest  # noqa: F401
+from ..networking._urllib import HTTPHandler as YoutubeDLHandler  # noqa: F401
+from ..networking._urllib import ProxyHandler as PerRequestProxyHandler  # noqa: F401
+from ..networking._urllib import RedirectHandler as YoutubeDLRedirectHandler  # noqa: F401
+from ..networking._urllib import make_socks_conn_class, update_Request  # noqa: F401
+from ..networking.exceptions import network_exceptions  # noqa: F401
+from .networking import random_user_agent, std_headers  # noqa: F401
+
 
 def encodeFilename(s, for_subprocess=False):
     assert isinstance(s, str)
diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py
index 4af955743d..d5704cadca 100644
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@@ -11,7 +11,6 @@
 import email.header
 import email.utils
 import errno
-import gzip
 import hashlib
 import hmac
 import html.entities
@@ -46,7 +45,6 @@
 import urllib.parse
 import urllib.request
 import xml.etree.ElementTree
-import zlib
 
 from . import traversal
 
@@ -58,8 +56,7 @@
     compat_os_name,
     compat_shlex_quote,
 )
-from ..dependencies import brotli, certifi, websockets, xattr
-from ..socks import ProxyType, sockssocket
+from ..dependencies import websockets, xattr
 
 __name__ = __name__.rsplit('.', 1)[0]  # Pretend to be the parent module
 
@@ -67,65 +64,6 @@
 compiled_regex_type = type(re.compile(''))
 
 
-def random_user_agent():
-    _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
-    _CHROME_VERSIONS = (
-        '90.0.4430.212',
-        '90.0.4430.24',
-        '90.0.4430.70',
-        '90.0.4430.72',
-        '90.0.4430.85',
-        '90.0.4430.93',
-        '91.0.4472.101',
-        '91.0.4472.106',
-        '91.0.4472.114',
-        '91.0.4472.124',
-        '91.0.4472.164',
-        '91.0.4472.19',
-        '91.0.4472.77',
-        '92.0.4515.107',
-        '92.0.4515.115',
-        '92.0.4515.131',
-        '92.0.4515.159',
-        '92.0.4515.43',
-        '93.0.4556.0',
-        '93.0.4577.15',
-        '93.0.4577.63',
-        '93.0.4577.82',
-        '94.0.4606.41',
-        '94.0.4606.54',
-        '94.0.4606.61',
-        '94.0.4606.71',
-        '94.0.4606.81',
-        '94.0.4606.85',
-        '95.0.4638.17',
-        '95.0.4638.50',
-        '95.0.4638.54',
-        '95.0.4638.69',
-        '95.0.4638.74',
-        '96.0.4664.18',
-        '96.0.4664.45',
-        '96.0.4664.55',
-        '96.0.4664.93',
-        '97.0.4692.20',
-    )
-    return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
-
-
-SUPPORTED_ENCODINGS = [
-    'gzip', 'deflate'
-]
-if brotli:
-    SUPPORTED_ENCODINGS.append('br')
-
-std_headers = {
-    'User-Agent': random_user_agent(),
-    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
-    'Accept-Language': 'en-us,en;q=0.5',
-    'Sec-Fetch-Mode': 'navigate',
-}
-
-
 USER_AGENTS = {
     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
 }
@@ -958,80 +896,16 @@ def formatSeconds(secs, delim=':', msec=False):
     return '%s.%03d' % (ret, time.milliseconds) if msec else ret
 
 
-def _ssl_load_windows_store_certs(ssl_context, storename):
-    # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
-    try:
-        certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
-                 if encoding == 'x509_asn' and (
-                     trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
-    except PermissionError:
-        return
-    for cert in certs:
-        with contextlib.suppress(ssl.SSLError):
-            ssl_context.load_verify_locations(cadata=cert)
-
-
 def make_HTTPS_handler(params, **kwargs):
-    opts_check_certificate = not params.get('nocheckcertificate')
-    context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
-    context.check_hostname = opts_check_certificate
-    if params.get('legacyserverconnect'):
-        context.options |= 4  # SSL_OP_LEGACY_SERVER_CONNECT
-        # Allow use of weaker ciphers in Python 3.10+. See https://bugs.python.org/issue43998
-        context.set_ciphers('DEFAULT')
-    elif (
-        sys.version_info < (3, 10)
-        and ssl.OPENSSL_VERSION_INFO >= (1, 1, 1)
-        and not ssl.OPENSSL_VERSION.startswith('LibreSSL')
-    ):
-        # Backport the default SSL ciphers and minimum TLS version settings from Python 3.10 [1].
-        # This is to ensure consistent behavior across Python versions, and help avoid fingerprinting
-        # in some situations [2][3].
-        # Python 3.10 only supports OpenSSL 1.1.1+ [4]. Because this change is likely
-        # untested on older versions, we only apply this to OpenSSL 1.1.1+ to be safe.
-        # LibreSSL is excluded until further investigation due to cipher support issues [5][6].
-        # 1. https://github.com/python/cpython/commit/e983252b516edb15d4338b0a47631b59ef1e2536
-        # 2. https://github.com/yt-dlp/yt-dlp/issues/4627
-        # 3. https://github.com/yt-dlp/yt-dlp/pull/5294
-        # 4. https://peps.python.org/pep-0644/
-        # 5. https://peps.python.org/pep-0644/#libressl-support
-        # 6. https://github.com/yt-dlp/yt-dlp/commit/5b9f253fa0aee996cf1ed30185d4b502e00609c4#commitcomment-89054368
-        context.set_ciphers('@SECLEVEL=2:ECDH+AESGCM:ECDH+CHACHA20:ECDH+AES:DHE+AES:!aNULL:!eNULL:!aDSS:!SHA1:!AESCCM')
-        context.minimum_version = ssl.TLSVersion.TLSv1_2
-
-    context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
-    if opts_check_certificate:
-        if certifi and 'no-certifi' not in params.get('compat_opts', []):
-            context.load_verify_locations(cafile=certifi.where())
-        else:
-            try:
-                context.load_default_certs()
-                # Work around the issue in load_default_certs when there are bad certificates. See:
-                # https://github.com/yt-dlp/yt-dlp/issues/1060,
-                # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
-            except ssl.SSLError:
-                # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
-                if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
-                    for storename in ('CA', 'ROOT'):
-                        _ssl_load_windows_store_certs(context, storename)
-                context.set_default_verify_paths()
-
-    client_certfile = params.get('client_certificate')
-    if client_certfile:
-        try:
-            context.load_cert_chain(
-                client_certfile, keyfile=params.get('client_certificate_key'),
-                password=params.get('client_certificate_password'))
-        except ssl.SSLError:
-            raise YoutubeDLError('Unable to load client certificate')
-
-    # Some servers may reject requests if ALPN extension is not sent. See:
-    # https://github.com/python/cpython/issues/85140
-    # https://github.com/yt-dlp/yt-dlp/issues/3878
-    with contextlib.suppress(NotImplementedError):
-        context.set_alpn_protocols(['http/1.1'])
-
-    return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
+    from ..networking._helper import make_ssl_context
+    return YoutubeDLHTTPSHandler(params, context=make_ssl_context(
+        verify=not params.get('nocheckcertificate'),
+        client_certificate=params.get('client_certificate'),
+        client_certificate_key=params.get('client_certificate_key'),
+        client_certificate_password=params.get('client_certificate_password'),
+        legacy_support=params.get('legacyserverconnect'),
+        use_certifi='no-certifi' not in params.get('compat_opts', []),
+    ), **kwargs)
 
 
 def bug_reports_message(before=';'):
@@ -1059,12 +933,6 @@ def __init__(self, msg=None):
         super().__init__(self.msg)
 
 
-network_exceptions = [urllib.error.URLError, http.client.HTTPException, socket.error]
-if hasattr(ssl, 'CertificateError'):
-    network_exceptions.append(ssl.CertificateError)
-network_exceptions = tuple(network_exceptions)
-
-
 class ExtractorError(YoutubeDLError):
     """Error during info extraction."""
 
@@ -1072,6 +940,7 @@ def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=N
         """ tb, if given, is the original traceback (so that it can be printed out).
         If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
         """
+        from ..networking.exceptions import network_exceptions
         if sys.exc_info()[0] in network_exceptions:
             expected = True
 
@@ -1271,225 +1140,6 @@ class XAttrUnavailableError(YoutubeDLError):
     pass
 
 
-def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
-    hc = http_class(*args, **kwargs)
-    source_address = ydl_handler._params.get('source_address')
-
-    if source_address is not None:
-        # This is to workaround _create_connection() from socket where it will try all
-        # address data from getaddrinfo() including IPv6. This filters the result from
-        # getaddrinfo() based on the source_address value.
-        # This is based on the cpython socket.create_connection() function.
-        # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
-        def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
-            host, port = address
-            err = None
-            addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
-            af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
-            ip_addrs = [addr for addr in addrs if addr[0] == af]
-            if addrs and not ip_addrs:
-                ip_version = 'v4' if af == socket.AF_INET else 'v6'
-                raise OSError(
-                    "No remote IP%s addresses available for connect, can't use '%s' as source address"
-                    % (ip_version, source_address[0]))
-            for res in ip_addrs:
-                af, socktype, proto, canonname, sa = res
-                sock = None
-                try:
-                    sock = socket.socket(af, socktype, proto)
-                    if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
-                        sock.settimeout(timeout)
-                    sock.bind(source_address)
-                    sock.connect(sa)
-                    err = None  # Explicitly break reference cycle
-                    return sock
-                except OSError as _:
-                    err = _
-                    if sock is not None:
-                        sock.close()
-            if err is not None:
-                raise err
-            else:
-                raise OSError('getaddrinfo returns an empty list')
-        if hasattr(hc, '_create_connection'):
-            hc._create_connection = _create_connection
-        hc.source_address = (source_address, 0)
-
-    return hc
-
-
-class YoutubeDLHandler(urllib.request.HTTPHandler):
-    """Handler for HTTP requests and responses.
-
-    This class, when installed with an OpenerDirector, automatically adds
-    the standard headers to every HTTP request and handles gzipped, deflated and
-    brotli responses from web servers.
-
-    Part of this code was copied from:
-
-    http://techknack.net/python-urllib2-handlers/
-
-    Andrew Rowls, the author of that code, agreed to release it to the
-    public domain.
-    """
-
-    def __init__(self, params, *args, **kwargs):
-        urllib.request.HTTPHandler.__init__(self, *args, **kwargs)
-        self._params = params
-
-    def http_open(self, req):
-        conn_class = http.client.HTTPConnection
-
-        socks_proxy = req.headers.get('Ytdl-socks-proxy')
-        if socks_proxy:
-            conn_class = make_socks_conn_class(conn_class, socks_proxy)
-            del req.headers['Ytdl-socks-proxy']
-
-        return self.do_open(functools.partial(
-            _create_http_connection, self, conn_class, False),
-            req)
-
-    @staticmethod
-    def deflate(data):
-        if not data:
-            return data
-        try:
-            return zlib.decompress(data, -zlib.MAX_WBITS)
-        except zlib.error:
-            return zlib.decompress(data)
-
-    @staticmethod
-    def brotli(data):
-        if not data:
-            return data
-        return brotli.decompress(data)
-
-    @staticmethod
-    def gz(data):
-        gz = gzip.GzipFile(fileobj=io.BytesIO(data), mode='rb')
-        try:
-            return gz.read()
-        except OSError as original_oserror:
-            # There may be junk add the end of the file
-            # See http://stackoverflow.com/q/4928560/35070 for details
-            for i in range(1, 1024):
-                try:
-                    gz = gzip.GzipFile(fileobj=io.BytesIO(data[:-i]), mode='rb')
-                    return gz.read()
-                except OSError:
-                    continue
-            else:
-                raise original_oserror
-
-    def http_request(self, req):
-        # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
-        # always respected by websites, some tend to give out URLs with non percent-encoded
-        # non-ASCII characters (see telemb.py, ard.py [#3412])
-        # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
-        # To work around aforementioned issue we will replace request's original URL with
-        # percent-encoded one
-        # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
-        # the code of this workaround has been moved here from YoutubeDL.urlopen()
-        url = req.get_full_url()
-        url_escaped = escape_url(url)
-
-        # Substitute URL if any change after escaping
-        if url != url_escaped:
-            req = update_Request(req, url=url_escaped)
-
-        for h, v in self._params.get('http_headers', std_headers).items():
-            # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
-            # The dict keys are capitalized because of this bug by urllib
-            if h.capitalize() not in req.headers:
-                req.add_header(h, v)
-
-        if 'Youtubedl-no-compression' in req.headers:  # deprecated
-            req.headers.pop('Youtubedl-no-compression', None)
-            req.add_header('Accept-encoding', 'identity')
-
-        if 'Accept-encoding' not in req.headers:
-            req.add_header('Accept-encoding', ', '.join(SUPPORTED_ENCODINGS))
-
-        return super().do_request_(req)
-
-    def http_response(self, req, resp):
-        old_resp = resp
-
-        # Content-Encoding header lists the encodings in order that they were applied [1].
-        # To decompress, we simply do the reverse.
-        # [1]: https://datatracker.ietf.org/doc/html/rfc9110#name-content-encoding
-        decoded_response = None
-        for encoding in (e.strip() for e in reversed(resp.headers.get('Content-encoding', '').split(','))):
-            if encoding == 'gzip':
-                decoded_response = self.gz(decoded_response or resp.read())
-            elif encoding == 'deflate':
-                decoded_response = self.deflate(decoded_response or resp.read())
-            elif encoding == 'br' and brotli:
-                decoded_response = self.brotli(decoded_response or resp.read())
-
-        if decoded_response is not None:
-            resp = urllib.request.addinfourl(io.BytesIO(decoded_response), old_resp.headers, old_resp.url, old_resp.code)
-            resp.msg = old_resp.msg
-        # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
-        # https://github.com/ytdl-org/youtube-dl/issues/6457).
-        if 300 <= resp.code < 400:
-            location = resp.headers.get('Location')
-            if location:
-                # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
-                location = location.encode('iso-8859-1').decode()
-                location_escaped = escape_url(location)
-                if location != location_escaped:
-                    del resp.headers['Location']
-                    resp.headers['Location'] = location_escaped
-        return resp
-
-    https_request = http_request
-    https_response = http_response
-
-
-def make_socks_conn_class(base_class, socks_proxy):
-    assert issubclass(base_class, (
-        http.client.HTTPConnection, http.client.HTTPSConnection))
-
-    url_components = urllib.parse.urlparse(socks_proxy)
-    if url_components.scheme.lower() == 'socks5':
-        socks_type = ProxyType.SOCKS5
-    elif url_components.scheme.lower() in ('socks', 'socks4'):
-        socks_type = ProxyType.SOCKS4
-    elif url_components.scheme.lower() == 'socks4a':
-        socks_type = ProxyType.SOCKS4A
-
-    def unquote_if_non_empty(s):
-        if not s:
-            return s
-        return urllib.parse.unquote_plus(s)
-
-    proxy_args = (
-        socks_type,
-        url_components.hostname, url_components.port or 1080,
-        True,  # Remote DNS
-        unquote_if_non_empty(url_components.username),
-        unquote_if_non_empty(url_components.password),
-    )
-
-    class SocksConnection(base_class):
-        def connect(self):
-            self.sock = sockssocket()
-            self.sock.setproxy(*proxy_args)
-            if isinstance(self.timeout, (int, float)):
-                self.sock.settimeout(self.timeout)
-            self.sock.connect((self.host, self.port))
-
-            if isinstance(self, http.client.HTTPSConnection):
-                if hasattr(self, '_context'):  # Python > 2.6
-                    self.sock = self._context.wrap_socket(
-                        self.sock, server_hostname=self.host)
-                else:
-                    self.sock = ssl.wrap_socket(self.sock)
-
-    return SocksConnection
-
-
 class YoutubeDLHTTPSHandler(urllib.request.HTTPSHandler):
     def __init__(self, params, https_conn_class=None, *args, **kwargs):
         urllib.request.HTTPSHandler.__init__(self, *args, **kwargs)
@@ -1507,9 +1157,11 @@ def https_open(self, req):
 
         socks_proxy = req.headers.get('Ytdl-socks-proxy')
         if socks_proxy:
+            from ..networking._urllib import make_socks_conn_class
             conn_class = make_socks_conn_class(conn_class, socks_proxy)
             del req.headers['Ytdl-socks-proxy']
 
+        from ..networking._urllib import _create_http_connection
         try:
             return self.do_open(
                 functools.partial(_create_http_connection, self, conn_class, True), req, **kwargs)
@@ -1535,56 +1187,6 @@ def http_response(self, request, response):
     https_response = http_response
 
 
-class YoutubeDLRedirectHandler(urllib.request.HTTPRedirectHandler):
-    """YoutubeDL redirect handler
-
-    The code is based on HTTPRedirectHandler implementation from CPython [1].
-
-    This redirect handler fixes and improves the logic to better align with RFC7261
-     and what browsers tend to do [2][3]
-
-    1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
-    2. https://datatracker.ietf.org/doc/html/rfc7231
-    3. https://github.com/python/cpython/issues/91306
-    """
-
-    http_error_301 = http_error_303 = http_error_307 = http_error_308 = urllib.request.HTTPRedirectHandler.http_error_302
-
-    def redirect_request(self, req, fp, code, msg, headers, newurl):
-        if code not in (301, 302, 303, 307, 308):
-            raise urllib.error.HTTPError(req.full_url, code, msg, headers, fp)
-
-        new_method = req.get_method()
-        new_data = req.data
-
-        # Technically the Cookie header should be in unredirected_hdrs,
-        # however in practice some may set it in normal headers anyway.
-        # We will remove it here to prevent any leaks.
-        remove_headers = ['Cookie']
-
-        # A 303 must either use GET or HEAD for subsequent request
-        # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4
-        if code == 303 and req.get_method() != 'HEAD':
-            new_method = 'GET'
-        # 301 and 302 redirects are commonly turned into a GET from a POST
-        # for subsequent requests by browsers, so we'll do the same.
-        # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.2
-        # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.3
-        elif code in (301, 302) and req.get_method() == 'POST':
-            new_method = 'GET'
-
-        # only remove payload if method changed (e.g. POST to GET)
-        if new_method != req.get_method():
-            new_data = None
-            remove_headers.extend(['Content-Length', 'Content-Type'])
-
-        new_headers = {k: v for k, v in req.headers.items() if k.title() not in remove_headers}
-
-        return urllib.request.Request(
-            newurl, headers=new_headers, origin_req_host=req.origin_req_host,
-            unverifiable=True, method=new_method, data=new_data)
-
-
 def extract_timezone(date_str):
     m = re.search(
         r'''(?x)
@@ -2390,16 +1992,6 @@ def urljoin(base, path):
     return urllib.parse.urljoin(base, path)
 
 
-class HEADRequest(urllib.request.Request):
-    def get_method(self):
-        return 'HEAD'
-
-
-class PUTRequest(urllib.request.Request):
-    def get_method(self):
-        return 'PUT'
-
-
 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
     if get_attr and v is not None:
         v = getattr(v, get_attr, None)
@@ -3016,26 +2608,6 @@ def update_url_query(url, query):
     return update_url(url, query_update=query)
 
 
-def update_Request(req, url=None, data=None, headers=None, query=None):
-    req_headers = req.headers.copy()
-    req_headers.update(headers or {})
-    req_data = data or req.data
-    req_url = update_url_query(url or req.get_full_url(), query)
-    req_get_method = req.get_method()
-    if req_get_method == 'HEAD':
-        req_type = HEADRequest
-    elif req_get_method == 'PUT':
-        req_type = PUTRequest
-    else:
-        req_type = urllib.request.Request
-    new_req = req_type(
-        req_url, data=req_data, headers=req_headers,
-        origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
-    if hasattr(req, 'timeout'):
-        new_req.timeout = req.timeout
-    return new_req
-
-
 def _multipart_encode_impl(data, boundary):
     content_type = 'multipart/form-data; boundary=%s' % boundary
 
@@ -4769,31 +4341,6 @@ def random_ipv4(cls, code_or_block):
             struct.pack('!L', random.randint(addr_min, addr_max))))
 
 
-class PerRequestProxyHandler(urllib.request.ProxyHandler):
-    def __init__(self, proxies=None):
-        # Set default handlers
-        for type in ('http', 'https'):
-            setattr(self, '%s_open' % type,
-                    lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
-                        meth(r, proxy, type))
-        urllib.request.ProxyHandler.__init__(self, proxies)
-
-    def proxy_open(self, req, proxy, type):
-        req_proxy = req.headers.get('Ytdl-request-proxy')
-        if req_proxy is not None:
-            proxy = req_proxy
-            del req.headers['Ytdl-request-proxy']
-
-        if proxy == '__noproxy__':
-            return None  # No Proxy
-        if urllib.parse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
-            req.add_header('Ytdl-socks-proxy', proxy)
-            # yt-dlp's http/https handlers do wrapping the socket with socks
-            return None
-        return urllib.request.ProxyHandler.proxy_open(
-            self, req, proxy, type)
-
-
 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
 # released into Public Domain
 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
diff --git a/yt_dlp/utils/networking.py b/yt_dlp/utils/networking.py
new file mode 100644
index 0000000000..95b54fabef
--- /dev/null
+++ b/yt_dlp/utils/networking.py
@@ -0,0 +1,60 @@
+import random
+
+
+def random_user_agent():
+    _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
+    _CHROME_VERSIONS = (
+        '90.0.4430.212',
+        '90.0.4430.24',
+        '90.0.4430.70',
+        '90.0.4430.72',
+        '90.0.4430.85',
+        '90.0.4430.93',
+        '91.0.4472.101',
+        '91.0.4472.106',
+        '91.0.4472.114',
+        '91.0.4472.124',
+        '91.0.4472.164',
+        '91.0.4472.19',
+        '91.0.4472.77',
+        '92.0.4515.107',
+        '92.0.4515.115',
+        '92.0.4515.131',
+        '92.0.4515.159',
+        '92.0.4515.43',
+        '93.0.4556.0',
+        '93.0.4577.15',
+        '93.0.4577.63',
+        '93.0.4577.82',
+        '94.0.4606.41',
+        '94.0.4606.54',
+        '94.0.4606.61',
+        '94.0.4606.71',
+        '94.0.4606.81',
+        '94.0.4606.85',
+        '95.0.4638.17',
+        '95.0.4638.50',
+        '95.0.4638.54',
+        '95.0.4638.69',
+        '95.0.4638.74',
+        '96.0.4664.18',
+        '96.0.4664.45',
+        '96.0.4664.55',
+        '96.0.4664.93',
+        '97.0.4692.20',
+    )
+    return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
+
+
+std_headers = {
+    'User-Agent': random_user_agent(),
+    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+    'Accept-Language': 'en-us,en;q=0.5',
+    'Sec-Fetch-Mode': 'navigate',
+}
+
+
+def clean_headers(headers):
+    if 'Youtubedl-no-compression' in headers:  # compat
+        del headers['Youtubedl-no-compression']
+        headers['Accept-Encoding'] = 'identity'

From 227bf1a33be7b89cd7d44ad046844c4ccba104f4 Mon Sep 17 00:00:00 2001
From: coletdjnz <coletdjnz@protonmail.com>
Date: Sat, 15 Jul 2023 15:55:23 +0530
Subject: [PATCH 045/218] [networking] Rewrite architecture (#2861)

New networking interface consists of a `RequestDirector` that directs
each `Request` to appropriate `RequestHandler` and returns the
`Response` or raises `RequestError`. The handlers define adapters to
transform its internal Request/Response/Errors to our interfaces.

User-facing changes:
- Fix issues with per request proxies on redirects for urllib
- Support for `ALL_PROXY` environment variable for proxy setting
- Support for `socks5h` proxy
   - Closes https://github.com/yt-dlp/yt-dlp/issues/6325, https://github.com/ytdl-org/youtube-dl/issues/22618, https://github.com/ytdl-org/youtube-dl/pull/28093
- Raise error when using `https` proxy instead of silently converting it to `http`

Authored by: coletdjnz
---
 test/test_download.py           |    9 +-
 test/test_networking.py         | 1351 +++++++++++++++++++++++++------
 test/test_networking_utils.py   |  239 ++++++
 test/test_utils.py              |   46 +-
 yt_dlp/YoutubeDL.py             |  175 ++--
 yt_dlp/compat/__init__.py       |   10 +
 yt_dlp/downloader/http.py       |   24 +-
 yt_dlp/extractor/common.py      |   32 +-
 yt_dlp/networking/__init__.py   |   13 +
 yt_dlp/networking/_helper.py    |   91 ++-
 yt_dlp/networking/_urllib.py    |  231 +++++-
 yt_dlp/networking/common.py     |  522 ++++++++++++
 yt_dlp/networking/exceptions.py |  202 ++++-
 yt_dlp/utils/_deprecated.py     |   13 +-
 yt_dlp/utils/_utils.py          |   35 +-
 yt_dlp/utils/networking.py      |   67 +-
 16 files changed, 2586 insertions(+), 474 deletions(-)
 create mode 100644 test/test_networking_utils.py
 create mode 100644 yt_dlp/networking/common.py

diff --git a/test/test_download.py b/test/test_download.py
index 43b39c36b3..fd7752cddf 100755
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -10,10 +10,7 @@
 
 import collections
 import hashlib
-import http.client
 import json
-import socket
-import urllib.error
 
 from test.helper import (
     assertGreaterEqual,
@@ -29,6 +26,7 @@
 
 import yt_dlp.YoutubeDL  # isort: split
 from yt_dlp.extractor import get_info_extractor
+from yt_dlp.networking.exceptions import HTTPError, TransportError
 from yt_dlp.utils import (
     DownloadError,
     ExtractorError,
@@ -162,8 +160,7 @@ def try_rm_tcs_files(tcs=None):
                         force_generic_extractor=params.get('force_generic_extractor', False))
                 except (DownloadError, ExtractorError) as err:
                     # Check if the exception is not a network related one
-                    if (err.exc_info[0] not in (urllib.error.URLError, socket.timeout, UnavailableVideoError, http.client.BadStatusLine)
-                            or (err.exc_info[0] == urllib.error.HTTPError and err.exc_info[1].code == 503)):
+                    if not isinstance(err.exc_info[1], (TransportError, UnavailableVideoError)) or (isinstance(err.exc_info[1], HTTPError) and err.exc_info[1].code == 503):
                         err.msg = f'{getattr(err, "msg", err)} ({tname})'
                         raise
 
@@ -249,7 +246,7 @@ def try_rm_tcs_files(tcs=None):
                 # extractor returns full results even with extract_flat
                 res_tcs = [{'info_dict': e} for e in res_dict['entries']]
                 try_rm_tcs_files(res_tcs)
-
+            ydl.close()
     return test_template
 
 
diff --git a/test/test_networking.py b/test/test_networking.py
index e4e66dce18..147a4ff491 100644
--- a/test/test_networking.py
+++ b/test/test_networking.py
@@ -3,32 +3,74 @@
 # Allow direct execution
 import os
 import sys
-import unittest
+
+import pytest
 
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
+import functools
 import gzip
+import http.client
 import http.cookiejar
 import http.server
+import inspect
 import io
 import pathlib
+import random
 import ssl
 import tempfile
 import threading
+import time
 import urllib.error
 import urllib.request
+import warnings
 import zlib
+from email.message import Message
+from http.cookiejar import CookieJar
 
-from test.helper import http_server_port
-from yt_dlp import YoutubeDL
+from test.helper import FakeYDL, http_server_port
 from yt_dlp.dependencies import brotli
-from yt_dlp.utils import sanitized_Request, urlencode_postdata
-
-from .helper import FakeYDL
+from yt_dlp.networking import (
+    HEADRequest,
+    PUTRequest,
+    Request,
+    RequestDirector,
+    RequestHandler,
+    Response,
+)
+from yt_dlp.networking._urllib import UrllibRH
+from yt_dlp.networking.common import _REQUEST_HANDLERS
+from yt_dlp.networking.exceptions import (
+    CertificateVerifyError,
+    HTTPError,
+    IncompleteRead,
+    NoSupportingHandlers,
+    RequestError,
+    SSLError,
+    TransportError,
+    UnsupportedRequest,
+)
+from yt_dlp.utils._utils import _YDLLogger as FakeLogger
+from yt_dlp.utils.networking import HTTPHeaderDict
 
 TEST_DIR = os.path.dirname(os.path.abspath(__file__))
 
 
+def _build_proxy_handler(name):
+    class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
+        proxy_name = name
+
+        def log_message(self, format, *args):
+            pass
+
+        def do_GET(self):
+            self.send_response(200)
+            self.send_header('Content-Type', 'text/plain; charset=utf-8')
+            self.end_headers()
+            self.wfile.write('{self.proxy_name}: {self.path}'.format(self=self).encode())
+    return HTTPTestRequestHandler
+
+
 class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
     protocol_version = 'HTTP/1.1'
 
@@ -36,7 +78,7 @@ def log_message(self, format, *args):
         pass
 
     def _headers(self):
-        payload = str(self.headers).encode('utf-8')
+        payload = str(self.headers).encode()
         self.send_response(200)
         self.send_header('Content-Type', 'application/json')
         self.send_header('Content-Length', str(len(payload)))
@@ -70,7 +112,7 @@ def _read_data(self):
             return self.rfile.read(int(self.headers['Content-Length']))
 
     def do_POST(self):
-        data = self._read_data()
+        data = self._read_data() + str(self.headers).encode()
         if self.path.startswith('/redirect_'):
             self._redirect()
         elif self.path.startswith('/method'):
@@ -89,7 +131,7 @@ def do_HEAD(self):
             self._status(404)
 
     def do_PUT(self):
-        data = self._read_data()
+        data = self._read_data() + str(self.headers).encode()
         if self.path.startswith('/redirect_'):
             self._redirect()
         elif self.path.startswith('/method'):
@@ -102,7 +144,7 @@ def do_GET(self):
             payload = b'<html><video src="/vid.mp4" /></html>'
             self.send_response(200)
             self.send_header('Content-Type', 'text/html; charset=utf-8')
-            self.send_header('Content-Length', str(len(payload)))  # required for persistent connections
+            self.send_header('Content-Length', str(len(payload)))
             self.end_headers()
             self.wfile.write(payload)
         elif self.path == '/vid.mp4':
@@ -126,10 +168,15 @@ def do_GET(self):
             self.send_header('Content-Length', str(len(payload)))
             self.end_headers()
             self.wfile.write(payload)
+        elif self.path.startswith('/redirect_loop'):
+            self.send_response(301)
+            self.send_header('Location', self.path)
+            self.send_header('Content-Length', '0')
+            self.end_headers()
         elif self.path.startswith('/redirect_'):
             self._redirect()
         elif self.path.startswith('/method'):
-            self._method('GET')
+            self._method('GET', str(self.headers).encode())
         elif self.path.startswith('/headers'):
             self._headers()
         elif self.path.startswith('/308-to-headers'):
@@ -179,7 +226,32 @@ def do_GET(self):
             self.send_header('Content-Length', str(len(payload)))
             self.end_headers()
             self.wfile.write(payload)
-
+        elif self.path.startswith('/gen_'):
+            payload = b'<html></html>'
+            self.send_response(int(self.path[len('/gen_'):]))
+            self.send_header('Content-Type', 'text/html; charset=utf-8')
+            self.send_header('Content-Length', str(len(payload)))
+            self.end_headers()
+            self.wfile.write(payload)
+        elif self.path.startswith('/incompleteread'):
+            payload = b'<html></html>'
+            self.send_response(200)
+            self.send_header('Content-Type', 'text/html; charset=utf-8')
+            self.send_header('Content-Length', '234234')
+            self.end_headers()
+            self.wfile.write(payload)
+            self.finish()
+        elif self.path.startswith('/timeout_'):
+            time.sleep(int(self.path[len('/timeout_'):]))
+            self._headers()
+        elif self.path == '/source_address':
+            payload = str(self.client_address[0]).encode()
+            self.send_response(200)
+            self.send_header('Content-Type', 'text/html; charset=utf-8')
+            self.send_header('Content-Length', str(len(payload)))
+            self.end_headers()
+            self.wfile.write(payload)
+            self.finish()
         else:
             self._status(404)
 
@@ -198,334 +270,1099 @@ def send_header(self, keyword, value):
         self._headers_buffer.append(f'{keyword}: {value}\r\n'.encode())
 
 
-class FakeLogger:
-    def debug(self, msg):
-        pass
-
-    def warning(self, msg):
-        pass
-
-    def error(self, msg):
-        pass
+def validate_and_send(rh, req):
+    rh.validate(req)
+    return rh.send(req)
 
 
-class TestHTTP(unittest.TestCase):
-    def setUp(self):
-        # HTTP server
-        self.http_httpd = http.server.ThreadingHTTPServer(
+class TestRequestHandlerBase:
+    @classmethod
+    def setup_class(cls):
+        cls.http_httpd = http.server.ThreadingHTTPServer(
             ('127.0.0.1', 0), HTTPTestRequestHandler)
-        self.http_port = http_server_port(self.http_httpd)
-        self.http_server_thread = threading.Thread(target=self.http_httpd.serve_forever)
+        cls.http_port = http_server_port(cls.http_httpd)
+        cls.http_server_thread = threading.Thread(target=cls.http_httpd.serve_forever)
         # FIXME: we should probably stop the http server thread after each test
         # See: https://github.com/yt-dlp/yt-dlp/pull/7094#discussion_r1199746041
-        self.http_server_thread.daemon = True
-        self.http_server_thread.start()
+        cls.http_server_thread.daemon = True
+        cls.http_server_thread.start()
 
         # HTTPS server
         certfn = os.path.join(TEST_DIR, 'testcert.pem')
-        self.https_httpd = http.server.ThreadingHTTPServer(
+        cls.https_httpd = http.server.ThreadingHTTPServer(
             ('127.0.0.1', 0), HTTPTestRequestHandler)
         sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
         sslctx.load_cert_chain(certfn, None)
-        self.https_httpd.socket = sslctx.wrap_socket(self.https_httpd.socket, server_side=True)
-        self.https_port = http_server_port(self.https_httpd)
-        self.https_server_thread = threading.Thread(target=self.https_httpd.serve_forever)
-        self.https_server_thread.daemon = True
-        self.https_server_thread.start()
+        cls.https_httpd.socket = sslctx.wrap_socket(cls.https_httpd.socket, server_side=True)
+        cls.https_port = http_server_port(cls.https_httpd)
+        cls.https_server_thread = threading.Thread(target=cls.https_httpd.serve_forever)
+        cls.https_server_thread.daemon = True
+        cls.https_server_thread.start()
 
-    def test_nocheckcertificate(self):
-        with FakeYDL({'logger': FakeLogger()}) as ydl:
-            with self.assertRaises(urllib.error.URLError):
-                ydl.urlopen(sanitized_Request(f'https://127.0.0.1:{self.https_port}/headers'))
 
-        with FakeYDL({'logger': FakeLogger(), 'nocheckcertificate': True}) as ydl:
-            r = ydl.urlopen(sanitized_Request(f'https://127.0.0.1:{self.https_port}/headers'))
-            self.assertEqual(r.status, 200)
+@pytest.fixture
+def handler(request):
+    RH_KEY = request.param
+    if inspect.isclass(RH_KEY) and issubclass(RH_KEY, RequestHandler):
+        handler = RH_KEY
+    elif RH_KEY in _REQUEST_HANDLERS:
+        handler = _REQUEST_HANDLERS[RH_KEY]
+    else:
+        pytest.skip(f'{RH_KEY} request handler is not available')
+
+    return functools.partial(handler, logger=FakeLogger)
+
+
+class TestHTTPRequestHandler(TestRequestHandlerBase):
+    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+    def test_verify_cert(self, handler):
+        with handler() as rh:
+            with pytest.raises(CertificateVerifyError):
+                validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
+
+        with handler(verify=False) as rh:
+            r = validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
+            assert r.status == 200
             r.close()
 
-    def test_percent_encode(self):
-        with FakeYDL() as ydl:
+    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+    def test_ssl_error(self, handler):
+        # HTTPS server with too old TLS version
+        # XXX: is there a better way to test this than to create a new server?
+        https_httpd = http.server.ThreadingHTTPServer(
+            ('127.0.0.1', 0), HTTPTestRequestHandler)
+        sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
+        https_httpd.socket = sslctx.wrap_socket(https_httpd.socket, server_side=True)
+        https_port = http_server_port(https_httpd)
+        https_server_thread = threading.Thread(target=https_httpd.serve_forever)
+        https_server_thread.daemon = True
+        https_server_thread.start()
+
+        with handler(verify=False) as rh:
+            with pytest.raises(SSLError, match='sslv3 alert handshake failure') as exc_info:
+                validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
+            assert not issubclass(exc_info.type, CertificateVerifyError)
+
+    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+    def test_percent_encode(self, handler):
+        with handler() as rh:
             # Unicode characters should be encoded with uppercase percent-encoding
-            res = ydl.urlopen(sanitized_Request(f'http://127.0.0.1:{self.http_port}/中文.html'))
-            self.assertEqual(res.status, 200)
+            res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/中文.html'))
+            assert res.status == 200
             res.close()
             # don't normalize existing percent encodings
-            res = ydl.urlopen(sanitized_Request(f'http://127.0.0.1:{self.http_port}/%c7%9f'))
-            self.assertEqual(res.status, 200)
+            res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/%c7%9f'))
+            assert res.status == 200
             res.close()
 
-    def test_unicode_path_redirection(self):
-        with FakeYDL() as ydl:
-            r = ydl.urlopen(sanitized_Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect'))
-            self.assertEqual(r.url, f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html')
+    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+    def test_unicode_path_redirection(self, handler):
+        with handler() as rh:
+            r = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect'))
+            assert r.url == f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html'
             r.close()
 
-    def test_redirect(self):
-        with FakeYDL() as ydl:
-            def do_req(redirect_status, method):
+    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+    def test_raise_http_error(self, handler):
+        with handler() as rh:
+            for bad_status in (400, 500, 599, 302):
+                with pytest.raises(HTTPError):
+                    validate_and_send(rh, Request('http://127.0.0.1:%d/gen_%d' % (self.http_port, bad_status)))
+
+            # Should not raise an error
+            validate_and_send(rh, Request('http://127.0.0.1:%d/gen_200' % self.http_port)).close()
+
+    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+    def test_response_url(self, handler):
+        with handler() as rh:
+            # Response url should be that of the last url in redirect chain
+            res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_301'))
+            assert res.url == f'http://127.0.0.1:{self.http_port}/method'
+            res.close()
+            res2 = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_200'))
+            assert res2.url == f'http://127.0.0.1:{self.http_port}/gen_200'
+            res2.close()
+
+    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+    def test_redirect(self, handler):
+        with handler() as rh:
+            def do_req(redirect_status, method, assert_no_content=False):
                 data = b'testdata' if method in ('POST', 'PUT') else None
-                res = ydl.urlopen(sanitized_Request(
-                    f'http://127.0.0.1:{self.http_port}/redirect_{redirect_status}', method=method, data=data))
-                return res.read().decode('utf-8'), res.headers.get('method', '')
+                res = validate_and_send(
+                    rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_{redirect_status}', method=method, data=data))
+
+                headers = b''
+                data_sent = b''
+                if data is not None:
+                    data_sent += res.read(len(data))
+                    if data_sent != data:
+                        headers += data_sent
+                        data_sent = b''
+
+                headers += res.read()
+
+                if assert_no_content or data is None:
+                    assert b'Content-Type' not in headers
+                    assert b'Content-Length' not in headers
+                else:
+                    assert b'Content-Type' in headers
+                    assert b'Content-Length' in headers
+
+                return data_sent.decode(), res.headers.get('method', '')
 
             # A 303 must either use GET or HEAD for subsequent request
-            self.assertEqual(do_req(303, 'POST'), ('', 'GET'))
-            self.assertEqual(do_req(303, 'HEAD'), ('', 'HEAD'))
+            assert do_req(303, 'POST', True) == ('', 'GET')
+            assert do_req(303, 'HEAD') == ('', 'HEAD')
 
-            self.assertEqual(do_req(303, 'PUT'), ('', 'GET'))
+            assert do_req(303, 'PUT', True) == ('', 'GET')
 
             # 301 and 302 turn POST only into a GET
-            # XXX: we should also test if the Content-Type and Content-Length headers are removed
-            self.assertEqual(do_req(301, 'POST'), ('', 'GET'))
-            self.assertEqual(do_req(301, 'HEAD'), ('', 'HEAD'))
-            self.assertEqual(do_req(302, 'POST'), ('', 'GET'))
-            self.assertEqual(do_req(302, 'HEAD'), ('', 'HEAD'))
+            assert do_req(301, 'POST', True) == ('', 'GET')
+            assert do_req(301, 'HEAD') == ('', 'HEAD')
+            assert do_req(302, 'POST', True) == ('', 'GET')
+            assert do_req(302, 'HEAD') == ('', 'HEAD')
 
-            self.assertEqual(do_req(301, 'PUT'), ('testdata', 'PUT'))
-            self.assertEqual(do_req(302, 'PUT'), ('testdata', 'PUT'))
+            assert do_req(301, 'PUT') == ('testdata', 'PUT')
+            assert do_req(302, 'PUT') == ('testdata', 'PUT')
 
             # 307 and 308 should not change method
             for m in ('POST', 'PUT'):
-                self.assertEqual(do_req(307, m), ('testdata', m))
-                self.assertEqual(do_req(308, m), ('testdata', m))
+                assert do_req(307, m) == ('testdata', m)
+                assert do_req(308, m) == ('testdata', m)
 
-            self.assertEqual(do_req(307, 'HEAD'), ('', 'HEAD'))
-            self.assertEqual(do_req(308, 'HEAD'), ('', 'HEAD'))
+            assert do_req(307, 'HEAD') == ('', 'HEAD')
+            assert do_req(308, 'HEAD') == ('', 'HEAD')
 
             # These should not redirect and instead raise an HTTPError
             for code in (300, 304, 305, 306):
-                with self.assertRaises(urllib.error.HTTPError):
+                with pytest.raises(HTTPError):
                     do_req(code, 'GET')
 
-    def test_content_type(self):
-        # https://github.com/yt-dlp/yt-dlp/commit/379a4f161d4ad3e40932dcf5aca6e6fb9715ab28
-        with FakeYDL({'nocheckcertificate': True}) as ydl:
-            # method should be auto-detected as POST
-            r = sanitized_Request(f'https://localhost:{self.https_port}/headers', data=urlencode_postdata({'test': 'test'}))
-
-            headers = ydl.urlopen(r).read().decode('utf-8')
-            self.assertIn('Content-Type: application/x-www-form-urlencoded', headers)
-
-            # test http
-            r = sanitized_Request(f'http://localhost:{self.http_port}/headers', data=urlencode_postdata({'test': 'test'}))
-            headers = ydl.urlopen(r).read().decode('utf-8')
-            self.assertIn('Content-Type: application/x-www-form-urlencoded', headers)
-
-    def test_cookiejar(self):
-        with FakeYDL() as ydl:
-            ydl.cookiejar.set_cookie(http.cookiejar.Cookie(
-                0, 'test', 'ytdlp', None, False, '127.0.0.1', True,
-                False, '/headers', True, False, None, False, None, None, {}))
-            data = ydl.urlopen(sanitized_Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
-            self.assertIn(b'Cookie: test=ytdlp', data)
-
-    def test_passed_cookie_header(self):
+    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+    def test_request_cookie_header(self, handler):
         # We should accept a Cookie header being passed as in normal headers and handle it appropriately.
-        with FakeYDL() as ydl:
+        with handler() as rh:
             # Specified Cookie header should be used
-            res = ydl.urlopen(
-                sanitized_Request(f'http://127.0.0.1:{self.http_port}/headers',
-                                  headers={'Cookie': 'test=test'})).read().decode('utf-8')
-            self.assertIn('Cookie: test=test', res)
+            res = validate_and_send(
+                rh, Request(
+                    f'http://127.0.0.1:{self.http_port}/headers',
+                    headers={'Cookie': 'test=test'})).read().decode()
+            assert 'Cookie: test=test' in res
 
             # Specified Cookie header should be removed on any redirect
-            res = ydl.urlopen(
-                sanitized_Request(f'http://127.0.0.1:{self.http_port}/308-to-headers', headers={'Cookie': 'test=test'})).read().decode('utf-8')
-            self.assertNotIn('Cookie: test=test', res)
+            res = validate_and_send(
+                rh, Request(
+                    f'http://127.0.0.1:{self.http_port}/308-to-headers',
+                    headers={'Cookie': 'test=test'})).read().decode()
+            assert 'Cookie: test=test' not in res
 
-            # Specified Cookie header should override global cookiejar for that request
-            ydl.cookiejar.set_cookie(http.cookiejar.Cookie(
-                version=0, name='test', value='ytdlp', port=None, port_specified=False,
-                domain='127.0.0.1', domain_specified=True, domain_initial_dot=False, path='/',
-                path_specified=True, secure=False, expires=None, discard=False, comment=None,
-                comment_url=None, rest={}))
+        # Specified Cookie header should override global cookiejar for that request
+        cookiejar = http.cookiejar.CookieJar()
+        cookiejar.set_cookie(http.cookiejar.Cookie(
+            version=0, name='test', value='ytdlp', port=None, port_specified=False,
+            domain='127.0.0.1', domain_specified=True, domain_initial_dot=False, path='/',
+            path_specified=True, secure=False, expires=None, discard=False, comment=None,
+            comment_url=None, rest={}))
 
-            data = ydl.urlopen(sanitized_Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'Cookie': 'test=test'})).read()
-            self.assertNotIn(b'Cookie: test=ytdlp', data)
-            self.assertIn(b'Cookie: test=test', data)
+        with handler(cookiejar=cookiejar) as rh:
+            data = validate_and_send(
+                rh, Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'cookie': 'test=test'})).read()
+            assert b'Cookie: test=ytdlp' not in data
+            assert b'Cookie: test=test' in data
 
-    def test_no_compression_compat_header(self):
-        with FakeYDL() as ydl:
-            data = ydl.urlopen(
-                sanitized_Request(
-                    f'http://127.0.0.1:{self.http_port}/headers',
-                    headers={'Youtubedl-no-compression': True})).read()
-            self.assertIn(b'Accept-Encoding: identity', data)
-            self.assertNotIn(b'youtubedl-no-compression', data.lower())
+    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+    def test_redirect_loop(self, handler):
+        with handler() as rh:
+            with pytest.raises(HTTPError, match='redirect loop'):
+                validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_loop'))
 
-    def test_gzip_trailing_garbage(self):
-        # https://github.com/ytdl-org/youtube-dl/commit/aa3e950764337ef9800c936f4de89b31c00dfcf5
-        # https://github.com/ytdl-org/youtube-dl/commit/6f2ec15cee79d35dba065677cad9da7491ec6e6f
-        with FakeYDL() as ydl:
-            data = ydl.urlopen(sanitized_Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode('utf-8')
-            self.assertEqual(data, '<html><video src="/vid.mp4" /></html>')
+    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+    def test_incompleteread(self, handler):
+        with handler(timeout=2) as rh:
+            with pytest.raises(IncompleteRead):
+                validate_and_send(rh, Request('http://127.0.0.1:%d/incompleteread' % self.http_port)).read()
 
-    @unittest.skipUnless(brotli, 'brotli support is not installed')
-    def test_brotli(self):
-        with FakeYDL() as ydl:
-            res = ydl.urlopen(
-                sanitized_Request(
+    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+    def test_cookies(self, handler):
+        cookiejar = http.cookiejar.CookieJar()
+        cookiejar.set_cookie(http.cookiejar.Cookie(
+            0, 'test', 'ytdlp', None, False, '127.0.0.1', True,
+            False, '/headers', True, False, None, False, None, None, {}))
+
+        with handler(cookiejar=cookiejar) as rh:
+            data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
+            assert b'Cookie: test=ytdlp' in data
+
+        # Per request
+        with handler() as rh:
+            data = validate_and_send(
+                rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': cookiejar})).read()
+            assert b'Cookie: test=ytdlp' in data
+
+    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+    def test_headers(self, handler):
+
+        with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh:
+            # Global Headers
+            data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
+            assert b'Test1: test' in data
+
+            # Per request headers, merged with global
+            data = validate_and_send(rh, Request(
+                f'http://127.0.0.1:{self.http_port}/headers', headers={'test2': 'changed', 'test3': 'test3'})).read()
+            assert b'Test1: test' in data
+            assert b'Test2: changed' in data
+            assert b'Test2: test2' not in data
+            assert b'Test3: test3' in data
+
+    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+    def test_timeout(self, handler):
+        with handler() as rh:
+            # Default timeout is 20 seconds, so this should go through
+            validate_and_send(
+                rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_3'))
+
+        with handler(timeout=0.5) as rh:
+            with pytest.raises(TransportError):
+                validate_and_send(
+                    rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1'))
+
+            # Per request timeout, should override handler timeout
+            validate_and_send(
+                rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1', extensions={'timeout': 4}))
+
+    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+    def test_source_address(self, handler):
+        source_address = f'127.0.0.{random.randint(5, 255)}'
+        with handler(source_address=source_address) as rh:
+            data = validate_and_send(
+                rh, Request(f'http://127.0.0.1:{self.http_port}/source_address')).read().decode()
+            assert source_address == data
+
+    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+    def test_gzip_trailing_garbage(self, handler):
+        with handler() as rh:
+            data = validate_and_send(rh, Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode()
+            assert data == '<html><video src="/vid.mp4" /></html>'
+
+    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+    @pytest.mark.skipif(not brotli, reason='brotli support is not installed')
+    def test_brotli(self, handler):
+        with handler() as rh:
+            res = validate_and_send(
+                rh, Request(
                     f'http://127.0.0.1:{self.http_port}/content-encoding',
                     headers={'ytdl-encoding': 'br'}))
-            self.assertEqual(res.headers.get('Content-Encoding'), 'br')
-            self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
+            assert res.headers.get('Content-Encoding') == 'br'
+            assert res.read() == b'<html><video src="/vid.mp4" /></html>'
 
-    def test_deflate(self):
-        with FakeYDL() as ydl:
-            res = ydl.urlopen(
-                sanitized_Request(
+    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+    def test_deflate(self, handler):
+        with handler() as rh:
+            res = validate_and_send(
+                rh, Request(
                     f'http://127.0.0.1:{self.http_port}/content-encoding',
                     headers={'ytdl-encoding': 'deflate'}))
-            self.assertEqual(res.headers.get('Content-Encoding'), 'deflate')
-            self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
+            assert res.headers.get('Content-Encoding') == 'deflate'
+            assert res.read() == b'<html><video src="/vid.mp4" /></html>'
 
-    def test_gzip(self):
-        with FakeYDL() as ydl:
-            res = ydl.urlopen(
-                sanitized_Request(
+    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+    def test_gzip(self, handler):
+        with handler() as rh:
+            res = validate_and_send(
+                rh, Request(
                     f'http://127.0.0.1:{self.http_port}/content-encoding',
                     headers={'ytdl-encoding': 'gzip'}))
-            self.assertEqual(res.headers.get('Content-Encoding'), 'gzip')
-            self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
+            assert res.headers.get('Content-Encoding') == 'gzip'
+            assert res.read() == b'<html><video src="/vid.mp4" /></html>'
 
-    def test_multiple_encodings(self):
-        # https://www.rfc-editor.org/rfc/rfc9110.html#section-8.4
-        with FakeYDL() as ydl:
+    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+    def test_multiple_encodings(self, handler):
+        with handler() as rh:
             for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
-                res = ydl.urlopen(
-                    sanitized_Request(
+                res = validate_and_send(
+                    rh, Request(
                         f'http://127.0.0.1:{self.http_port}/content-encoding',
                         headers={'ytdl-encoding': pair}))
-                self.assertEqual(res.headers.get('Content-Encoding'), pair)
-                self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
+                assert res.headers.get('Content-Encoding') == pair
+                assert res.read() == b'<html><video src="/vid.mp4" /></html>'
 
-    def test_unsupported_encoding(self):
-        # it should return the raw content
-        with FakeYDL() as ydl:
-            res = ydl.urlopen(
-                sanitized_Request(
+    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+    def test_unsupported_encoding(self, handler):
+        with handler() as rh:
+            res = validate_and_send(
+                rh, Request(
                     f'http://127.0.0.1:{self.http_port}/content-encoding',
                     headers={'ytdl-encoding': 'unsupported'}))
-            self.assertEqual(res.headers.get('Content-Encoding'), 'unsupported')
-            self.assertEqual(res.read(), b'raw')
+            assert res.headers.get('Content-Encoding') == 'unsupported'
+            assert res.read() == b'raw'
+
+    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+    def test_read(self, handler):
+        with handler() as rh:
+            res = validate_and_send(
+                rh, Request(f'http://127.0.0.1:{self.http_port}/headers'))
+            assert res.readable()
+            assert res.read(1) == b'H'
+            assert res.read(3) == b'ost'
 
 
-class TestClientCert(unittest.TestCase):
-    def setUp(self):
+class TestHTTPProxy(TestRequestHandlerBase):
+    @classmethod
+    def setup_class(cls):
+        super().setup_class()
+        # HTTP Proxy server
+        cls.proxy = http.server.ThreadingHTTPServer(
+            ('127.0.0.1', 0), _build_proxy_handler('normal'))
+        cls.proxy_port = http_server_port(cls.proxy)
+        cls.proxy_thread = threading.Thread(target=cls.proxy.serve_forever)
+        cls.proxy_thread.daemon = True
+        cls.proxy_thread.start()
+
+        # Geo proxy server
+        cls.geo_proxy = http.server.ThreadingHTTPServer(
+            ('127.0.0.1', 0), _build_proxy_handler('geo'))
+        cls.geo_port = http_server_port(cls.geo_proxy)
+        cls.geo_proxy_thread = threading.Thread(target=cls.geo_proxy.serve_forever)
+        cls.geo_proxy_thread.daemon = True
+        cls.geo_proxy_thread.start()
+
+    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+    def test_http_proxy(self, handler):
+        http_proxy = f'http://127.0.0.1:{self.proxy_port}'
+        geo_proxy = f'http://127.0.0.1:{self.geo_port}'
+
+        # Test global http proxy
+        # Test per request http proxy
+        # Test per request http proxy disables proxy
+        url = 'http://foo.com/bar'
+
+        # Global HTTP proxy
+        with handler(proxies={'http': http_proxy}) as rh:
+            res = validate_and_send(rh, Request(url)).read().decode()
+            assert res == f'normal: {url}'
+
+            # Per request proxy overrides global
+            res = validate_and_send(rh, Request(url, proxies={'http': geo_proxy})).read().decode()
+            assert res == f'geo: {url}'
+
+            # and setting to None disables all proxies for that request
+            real_url = f'http://127.0.0.1:{self.http_port}/headers'
+            res = validate_and_send(
+                rh, Request(real_url, proxies={'http': None})).read().decode()
+            assert res != f'normal: {real_url}'
+            assert 'Accept' in res
+
+    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+    def test_noproxy(self, handler):
+        with handler(proxies={'proxy': f'http://127.0.0.1:{self.proxy_port}'}) as rh:
+            # NO_PROXY
+            for no_proxy in (f'127.0.0.1:{self.http_port}', '127.0.0.1', 'localhost'):
+                nop_response = validate_and_send(
+                    rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'no': no_proxy})).read().decode(
+                    'utf-8')
+                assert 'Accept' in nop_response
+
+    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+    def test_allproxy(self, handler):
+        url = 'http://foo.com/bar'
+        with handler() as rh:
+            response = validate_and_send(rh, Request(url, proxies={'all': f'http://127.0.0.1:{self.proxy_port}'})).read().decode(
+                'utf-8')
+            assert response == f'normal: {url}'
+
+    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+    def test_http_proxy_with_idn(self, handler):
+        with handler(proxies={
+            'http': f'http://127.0.0.1:{self.proxy_port}',
+        }) as rh:
+            url = 'http://中文.tw/'
+            response = rh.send(Request(url)).read().decode()
+            # b'xn--fiq228c' is '中文'.encode('idna')
+            assert response == 'normal: http://xn--fiq228c.tw/'
+
+
+class TestClientCertificate:
+
+    @classmethod
+    def setup_class(cls):
         certfn = os.path.join(TEST_DIR, 'testcert.pem')
-        self.certdir = os.path.join(TEST_DIR, 'testdata', 'certificate')
-        cacertfn = os.path.join(self.certdir, 'ca.crt')
-        self.httpd = http.server.HTTPServer(('127.0.0.1', 0), HTTPTestRequestHandler)
+        cls.certdir = os.path.join(TEST_DIR, 'testdata', 'certificate')
+        cacertfn = os.path.join(cls.certdir, 'ca.crt')
+        cls.httpd = http.server.ThreadingHTTPServer(('127.0.0.1', 0), HTTPTestRequestHandler)
         sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
         sslctx.verify_mode = ssl.CERT_REQUIRED
         sslctx.load_verify_locations(cafile=cacertfn)
         sslctx.load_cert_chain(certfn, None)
-        self.httpd.socket = sslctx.wrap_socket(self.httpd.socket, server_side=True)
-        self.port = http_server_port(self.httpd)
-        self.server_thread = threading.Thread(target=self.httpd.serve_forever)
-        self.server_thread.daemon = True
-        self.server_thread.start()
+        cls.httpd.socket = sslctx.wrap_socket(cls.httpd.socket, server_side=True)
+        cls.port = http_server_port(cls.httpd)
+        cls.server_thread = threading.Thread(target=cls.httpd.serve_forever)
+        cls.server_thread.daemon = True
+        cls.server_thread.start()
 
-    def _run_test(self, **params):
-        ydl = YoutubeDL({
-            'logger': FakeLogger(),
+    def _run_test(self, handler, **handler_kwargs):
+        with handler(
             # Disable client-side validation of unacceptable self-signed testcert.pem
             # The test is of a check on the server side, so unaffected
-            'nocheckcertificate': True,
-            **params,
+            verify=False,
+            **handler_kwargs,
+        ) as rh:
+            validate_and_send(rh, Request(f'https://127.0.0.1:{self.port}/video.html')).read().decode()
+
+    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+    def test_certificate_combined_nopass(self, handler):
+        self._run_test(handler, client_cert={
+            'client_certificate': os.path.join(self.certdir, 'clientwithkey.crt'),
         })
-        r = ydl.extract_info(f'https://127.0.0.1:{self.port}/video.html')
-        self.assertEqual(r['url'], f'https://127.0.0.1:{self.port}/vid.mp4')
 
-    def test_certificate_combined_nopass(self):
-        self._run_test(client_certificate=os.path.join(self.certdir, 'clientwithkey.crt'))
-
-    def test_certificate_nocombined_nopass(self):
-        self._run_test(client_certificate=os.path.join(self.certdir, 'client.crt'),
-                       client_certificate_key=os.path.join(self.certdir, 'client.key'))
-
-    def test_certificate_combined_pass(self):
-        self._run_test(client_certificate=os.path.join(self.certdir, 'clientwithencryptedkey.crt'),
-                       client_certificate_password='foobar')
-
-    def test_certificate_nocombined_pass(self):
-        self._run_test(client_certificate=os.path.join(self.certdir, 'client.crt'),
-                       client_certificate_key=os.path.join(self.certdir, 'clientencrypted.key'),
-                       client_certificate_password='foobar')
-
-
-def _build_proxy_handler(name):
-    class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
-        proxy_name = name
-
-        def log_message(self, format, *args):
-            pass
-
-        def do_GET(self):
-            self.send_response(200)
-            self.send_header('Content-Type', 'text/plain; charset=utf-8')
-            self.end_headers()
-            self.wfile.write(f'{self.proxy_name}: {self.path}'.encode())
-    return HTTPTestRequestHandler
-
-
-class TestProxy(unittest.TestCase):
-    def setUp(self):
-        self.proxy = http.server.HTTPServer(
-            ('127.0.0.1', 0), _build_proxy_handler('normal'))
-        self.port = http_server_port(self.proxy)
-        self.proxy_thread = threading.Thread(target=self.proxy.serve_forever)
-        self.proxy_thread.daemon = True
-        self.proxy_thread.start()
-
-        self.geo_proxy = http.server.HTTPServer(
-            ('127.0.0.1', 0), _build_proxy_handler('geo'))
-        self.geo_port = http_server_port(self.geo_proxy)
-        self.geo_proxy_thread = threading.Thread(target=self.geo_proxy.serve_forever)
-        self.geo_proxy_thread.daemon = True
-        self.geo_proxy_thread.start()
-
-    def test_proxy(self):
-        geo_proxy = f'127.0.0.1:{self.geo_port}'
-        ydl = YoutubeDL({
-            'proxy': f'127.0.0.1:{self.port}',
-            'geo_verification_proxy': geo_proxy,
+    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+    def test_certificate_nocombined_nopass(self, handler):
+        self._run_test(handler, client_cert={
+            'client_certificate': os.path.join(self.certdir, 'client.crt'),
+            'client_certificate_key': os.path.join(self.certdir, 'client.key'),
         })
-        url = 'http://foo.com/bar'
-        response = ydl.urlopen(url).read().decode()
-        self.assertEqual(response, f'normal: {url}')
 
-        req = urllib.request.Request(url)
-        req.add_header('Ytdl-request-proxy', geo_proxy)
-        response = ydl.urlopen(req).read().decode()
-        self.assertEqual(response, f'geo: {url}')
-
-    def test_proxy_with_idn(self):
-        ydl = YoutubeDL({
-            'proxy': f'127.0.0.1:{self.port}',
+    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+    def test_certificate_combined_pass(self, handler):
+        self._run_test(handler, client_cert={
+            'client_certificate': os.path.join(self.certdir, 'clientwithencryptedkey.crt'),
+            'client_certificate_password': 'foobar',
+        })
+
+    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+    def test_certificate_nocombined_pass(self, handler):
+        self._run_test(handler, client_cert={
+            'client_certificate': os.path.join(self.certdir, 'client.crt'),
+            'client_certificate_key': os.path.join(self.certdir, 'clientencrypted.key'),
+            'client_certificate_password': 'foobar',
         })
-        url = 'http://中文.tw/'
-        response = ydl.urlopen(url).read().decode()
-        # b'xn--fiq228c' is '中文'.encode('idna')
-        self.assertEqual(response, 'normal: http://xn--fiq228c.tw/')
 
 
-class TestFileURL(unittest.TestCase):
-    # See https://github.com/ytdl-org/youtube-dl/issues/8227
-    def test_file_urls(self):
+class TestUrllibRequestHandler(TestRequestHandlerBase):
+    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+    def test_file_urls(self, handler):
+        # See https://github.com/ytdl-org/youtube-dl/issues/8227
         tf = tempfile.NamedTemporaryFile(delete=False)
         tf.write(b'foobar')
         tf.close()
-        url = pathlib.Path(tf.name).as_uri()
-        with FakeYDL() as ydl:
-            self.assertRaisesRegex(
-                urllib.error.URLError, 'file:// URLs are explicitly disabled in yt-dlp for security reasons', ydl.urlopen, url)
-        with FakeYDL({'enable_file_urls': True}) as ydl:
-            res = ydl.urlopen(url)
-            self.assertEqual(res.read(), b'foobar')
+        req = Request(pathlib.Path(tf.name).as_uri())
+        with handler() as rh:
+            with pytest.raises(UnsupportedRequest):
+                rh.validate(req)
+
+            # Test that urllib never loaded FileHandler
+            with pytest.raises(TransportError):
+                rh.send(req)
+
+        with handler(enable_file_urls=True) as rh:
+            res = validate_and_send(rh, req)
+            assert res.read() == b'foobar'
             res.close()
+
         os.unlink(tf.name)
 
+    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+    def test_http_error_returns_content(self, handler):
+        # urllib HTTPError will try close the underlying response if reference to the HTTPError object is lost
+        def get_response():
+            with handler() as rh:
+                # headers url
+                try:
+                    validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_404'))
+                except HTTPError as e:
+                    return e.response
 
-if __name__ == '__main__':
-    unittest.main()
+        assert get_response().read() == b'<html></html>'
+
+    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+    def test_verify_cert_error_text(self, handler):
+        # Check the output of the error message
+        with handler() as rh:
+            with pytest.raises(
+                CertificateVerifyError,
+                match=r'\[SSL: CERTIFICATE_VERIFY_FAILED\] certificate verify failed: self.signed certificate'
+            ):
+                validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
+
+    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+    def test_httplib_validation_errors(self, handler):
+        with handler() as rh:
+
+            # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1256
+            with pytest.raises(RequestError, match='method can\'t contain control characters') as exc_info:
+                validate_and_send(rh, Request('http://127.0.0.1', method='GET\n'))
+            assert not isinstance(exc_info.value, TransportError)
+
+            # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1265
+            with pytest.raises(RequestError, match='URL can\'t contain control characters') as exc_info:
+                validate_and_send(rh, Request('http://127.0.0. 1', method='GET\n'))
+            assert not isinstance(exc_info.value, TransportError)
+
+            # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1288C31-L1288C50
+            with pytest.raises(RequestError, match='Invalid header name') as exc_info:
+                validate_and_send(rh, Request('http://127.0.0.1', headers={'foo\n': 'bar'}))
+            assert not isinstance(exc_info.value, TransportError)
+
+
+def run_validation(handler, fail, req, **handler_kwargs):
+    with handler(**handler_kwargs) as rh:
+        if fail:
+            with pytest.raises(UnsupportedRequest):
+                rh.validate(req)
+        else:
+            rh.validate(req)
+
+
+class TestRequestHandlerValidation:
+
+    class ValidationRH(RequestHandler):
+        def _send(self, request):
+            raise RequestError('test')
+
+    class NoCheckRH(ValidationRH):
+        _SUPPORTED_FEATURES = None
+        _SUPPORTED_PROXY_SCHEMES = None
+        _SUPPORTED_URL_SCHEMES = None
+
+    class HTTPSupportedRH(ValidationRH):
+        _SUPPORTED_URL_SCHEMES = ('http',)
+
+    URL_SCHEME_TESTS = [
+        # scheme, expected to fail, handler kwargs
+        ('Urllib', [
+            ('http', False, {}),
+            ('https', False, {}),
+            ('data', False, {}),
+            ('ftp', False, {}),
+            ('file', True, {}),
+            ('file', False, {'enable_file_urls': True}),
+        ]),
+        (NoCheckRH, [('http', False, {})]),
+        (ValidationRH, [('http', True, {})])
+    ]
+
+    PROXY_SCHEME_TESTS = [
+        # scheme, expected to fail
+        ('Urllib', [
+            ('http', False),
+            ('https', True),
+            ('socks4', False),
+            ('socks4a', False),
+            ('socks5', False),
+            ('socks5h', False),
+            ('socks', True),
+        ]),
+        (NoCheckRH, [('http', False)]),
+        (HTTPSupportedRH, [('http', True)]),
+    ]
+
+    PROXY_KEY_TESTS = [
+        # key, expected to fail
+        ('Urllib', [
+            ('all', False),
+            ('unrelated', False),
+        ]),
+        (NoCheckRH, [('all', False)]),
+        (HTTPSupportedRH, [('all', True)]),
+        (HTTPSupportedRH, [('no', True)]),
+    ]
+
+    @pytest.mark.parametrize('handler,scheme,fail,handler_kwargs', [
+        (handler_tests[0], scheme, fail, handler_kwargs)
+        for handler_tests in URL_SCHEME_TESTS
+        for scheme, fail, handler_kwargs in handler_tests[1]
+
+    ], indirect=['handler'])
+    def test_url_scheme(self, handler, scheme, fail, handler_kwargs):
+        run_validation(handler, fail, Request(f'{scheme}://'), **(handler_kwargs or {}))
+
+    @pytest.mark.parametrize('handler,fail', [('Urllib', False)], indirect=['handler'])
+    def test_no_proxy(self, handler, fail):
+        run_validation(handler, fail, Request('http://', proxies={'no': '127.0.0.1,github.com'}))
+        run_validation(handler, fail, Request('http://'), proxies={'no': '127.0.0.1,github.com'})
+
+    @pytest.mark.parametrize('handler,proxy_key,fail', [
+        (handler_tests[0], proxy_key, fail)
+        for handler_tests in PROXY_KEY_TESTS
+        for proxy_key, fail in handler_tests[1]
+    ], indirect=['handler'])
+    def test_proxy_key(self, handler, proxy_key, fail):
+        run_validation(handler, fail, Request('http://', proxies={proxy_key: 'http://example.com'}))
+        run_validation(handler, fail, Request('http://'), proxies={proxy_key: 'http://example.com'})
+
+    @pytest.mark.parametrize('handler,scheme,fail', [
+        (handler_tests[0], scheme, fail)
+        for handler_tests in PROXY_SCHEME_TESTS
+        for scheme, fail in handler_tests[1]
+    ], indirect=['handler'])
+    def test_proxy_scheme(self, handler, scheme, fail):
+        run_validation(handler, fail, Request('http://', proxies={'http': f'{scheme}://example.com'}))
+        run_validation(handler, fail, Request('http://'), proxies={'http': f'{scheme}://example.com'})
+
+    @pytest.mark.parametrize('handler', ['Urllib', HTTPSupportedRH], indirect=True)
+    def test_empty_proxy(self, handler):
+        run_validation(handler, False, Request('http://', proxies={'http': None}))
+        run_validation(handler, False, Request('http://'), proxies={'http': None})
+
+    @pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1'])
+    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+    def test_missing_proxy_scheme(self, handler, proxy_url):
+        run_validation(handler, True, Request('http://', proxies={'http': 'example.com'}))
+
+    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+    def test_cookiejar_extension(self, handler):
+        run_validation(handler, True, Request('http://', extensions={'cookiejar': 'notacookiejar'}))
+
+    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+    def test_timeout_extension(self, handler):
+        run_validation(handler, True, Request('http://', extensions={'timeout': 'notavalidtimeout'}))
+
+    def test_invalid_request_type(self):
+        rh = self.ValidationRH(logger=FakeLogger())
+        for method in (rh.validate, rh.send):
+            with pytest.raises(TypeError, match='Expected an instance of Request'):
+                method('not a request')
+
+
+class FakeResponse(Response):
+    def __init__(self, request):
+        # XXX: we could make request part of standard response interface
+        self.request = request
+        super().__init__(fp=io.BytesIO(b''), headers={}, url=request.url)
+
+
+class FakeRH(RequestHandler):
+
+    def _validate(self, request):
+        return
+
+    def _send(self, request: Request):
+        if request.url.startswith('ssl://'):
+            raise SSLError(request.url[len('ssl://'):])
+        return FakeResponse(request)
+
+
+class FakeRHYDL(FakeYDL):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._request_director = self.build_request_director([FakeRH])
+
+
+class TestRequestDirector:
+
+    def test_handler_operations(self):
+        director = RequestDirector(logger=FakeLogger())
+        handler = FakeRH(logger=FakeLogger())
+        director.add_handler(handler)
+        assert director.handlers.get(FakeRH.RH_KEY) is handler
+
+        # Handler should overwrite
+        handler2 = FakeRH(logger=FakeLogger())
+        director.add_handler(handler2)
+        assert director.handlers.get(FakeRH.RH_KEY) is not handler
+        assert director.handlers.get(FakeRH.RH_KEY) is handler2
+        assert len(director.handlers) == 1
+
+        class AnotherFakeRH(FakeRH):
+            pass
+        director.add_handler(AnotherFakeRH(logger=FakeLogger()))
+        assert len(director.handlers) == 2
+        assert director.handlers.get(AnotherFakeRH.RH_KEY).RH_KEY == AnotherFakeRH.RH_KEY
+
+        director.handlers.pop(FakeRH.RH_KEY, None)
+        assert director.handlers.get(FakeRH.RH_KEY) is None
+        assert len(director.handlers) == 1
+
+        # RequestErrors should passthrough
+        with pytest.raises(SSLError):
+            director.send(Request('ssl://something'))
+
+    def test_send(self):
+        director = RequestDirector(logger=FakeLogger())
+        with pytest.raises(RequestError):
+            director.send(Request('any://'))
+        director.add_handler(FakeRH(logger=FakeLogger()))
+        assert isinstance(director.send(Request('http://')), FakeResponse)
+
+    def test_unsupported_handlers(self):
+        director = RequestDirector(logger=FakeLogger())
+        director.add_handler(FakeRH(logger=FakeLogger()))
+
+        class SupportedRH(RequestHandler):
+            _SUPPORTED_URL_SCHEMES = ['http']
+
+            def _send(self, request: Request):
+                return Response(fp=io.BytesIO(b'supported'), headers={}, url=request.url)
+
+        # This handler should by default take preference over FakeRH
+        director.add_handler(SupportedRH(logger=FakeLogger()))
+        assert director.send(Request('http://')).read() == b'supported'
+        assert director.send(Request('any://')).read() == b''
+
+        director.handlers.pop(FakeRH.RH_KEY)
+        with pytest.raises(NoSupportingHandlers):
+            director.send(Request('any://'))
+
+    def test_unexpected_error(self):
+        director = RequestDirector(logger=FakeLogger())
+
+        class UnexpectedRH(FakeRH):
+            def _send(self, request: Request):
+                raise TypeError('something')
+
+        director.add_handler(UnexpectedRH(logger=FakeLogger))
+        with pytest.raises(NoSupportingHandlers, match=r'1 unexpected error'):
+            director.send(Request('any://'))
+
+        director.handlers.clear()
+        assert len(director.handlers) == 0
+
+        # Should not be fatal
+        director.add_handler(FakeRH(logger=FakeLogger()))
+        director.add_handler(UnexpectedRH(logger=FakeLogger))
+        assert director.send(Request('any://'))
+
+
+# XXX: do we want to move this to test_YoutubeDL.py?
+class TestYoutubeDLNetworking:
+
+    @staticmethod
+    def build_handler(ydl, handler: RequestHandler = FakeRH):
+        return ydl.build_request_director([handler]).handlers.get(handler.RH_KEY)
+
+    def test_compat_opener(self):
+        with FakeYDL() as ydl:
+            with warnings.catch_warnings():
+                warnings.simplefilter('ignore', category=DeprecationWarning)
+                assert isinstance(ydl._opener, urllib.request.OpenerDirector)
+
+    @pytest.mark.parametrize('proxy,expected', [
+        ('http://127.0.0.1:8080', {'all': 'http://127.0.0.1:8080'}),
+        ('', {'all': '__noproxy__'}),
+        (None, {'http': 'http://127.0.0.1:8081', 'https': 'http://127.0.0.1:8081'})  # env, set https
+    ])
+    def test_proxy(self, proxy, expected):
+        old_http_proxy = os.environ.get('HTTP_PROXY')
+        try:
+            os.environ['HTTP_PROXY'] = 'http://127.0.0.1:8081'  # ensure that provided proxies override env
+            with FakeYDL({'proxy': proxy}) as ydl:
+                assert ydl.proxies == expected
+        finally:
+            if old_http_proxy:
+                os.environ['HTTP_PROXY'] = old_http_proxy
+
+    def test_compat_request(self):
+        with FakeRHYDL() as ydl:
+            assert ydl.urlopen('test://')
+            urllib_req = urllib.request.Request('http://foo.bar', data=b'test', method='PUT', headers={'X-Test': '1'})
+            urllib_req.add_unredirected_header('Cookie', 'bob=bob')
+            urllib_req.timeout = 2
+
+            req = ydl.urlopen(urllib_req).request
+            assert req.url == urllib_req.get_full_url()
+            assert req.data == urllib_req.data
+            assert req.method == urllib_req.get_method()
+            assert 'X-Test' in req.headers
+            assert 'Cookie' in req.headers
+            assert req.extensions.get('timeout') == 2
+
+            with pytest.raises(AssertionError):
+                ydl.urlopen(None)
+
+    def test_extract_basic_auth(self):
+        with FakeRHYDL() as ydl:
+            res = ydl.urlopen(Request('http://user:pass@foo.bar'))
+            assert res.request.headers['Authorization'] == 'Basic dXNlcjpwYXNz'
+
+    def test_sanitize_url(self):
+        with FakeRHYDL() as ydl:
+            res = ydl.urlopen(Request('httpss://foo.bar'))
+            assert res.request.url == 'https://foo.bar'
+
+    def test_file_urls_error(self):
+        # use urllib handler
+        with FakeYDL() as ydl:
+            with pytest.raises(RequestError, match=r'file:// URLs are disabled by default'):
+                ydl.urlopen('file://')
+
+    def test_legacy_server_connect_error(self):
+        with FakeRHYDL() as ydl:
+            for error in ('UNSAFE_LEGACY_RENEGOTIATION_DISABLED', 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
+                with pytest.raises(RequestError, match=r'Try using --legacy-server-connect'):
+                    ydl.urlopen(f'ssl://{error}')
+
+            with pytest.raises(SSLError, match='testerror'):
+                ydl.urlopen('ssl://testerror')
+
+    @pytest.mark.parametrize('proxy_key,proxy_url,expected', [
+        ('http', '__noproxy__', None),
+        ('no', '127.0.0.1,foo.bar', '127.0.0.1,foo.bar'),
+        ('https', 'example.com', 'http://example.com'),
+        ('https', 'socks5://example.com', 'socks5h://example.com'),
+        ('http', 'socks://example.com', 'socks4://example.com'),
+        ('http', 'socks4://example.com', 'socks4://example.com'),
+    ])
+    def test_clean_proxy(self, proxy_key, proxy_url, expected):
+        # proxies should be cleaned in urlopen()
+        with FakeRHYDL() as ydl:
+            req = ydl.urlopen(Request('test://', proxies={proxy_key: proxy_url})).request
+            assert req.proxies[proxy_key] == expected
+
+        # and should also be cleaned when building the handler
+        env_key = f'{proxy_key.upper()}_PROXY'
+        old_env_proxy = os.environ.get(env_key)
+        try:
+            os.environ[env_key] = proxy_url  # ensure that provided proxies override env
+            with FakeYDL() as ydl:
+                rh = self.build_handler(ydl)
+                assert rh.proxies[proxy_key] == expected
+        finally:
+            if old_env_proxy:
+                os.environ[env_key] = old_env_proxy
+
+    def test_clean_proxy_header(self):
+        with FakeRHYDL() as ydl:
+            req = ydl.urlopen(Request('test://', headers={'ytdl-request-proxy': '//foo.bar'})).request
+            assert 'ytdl-request-proxy' not in req.headers
+            assert req.proxies == {'all': 'http://foo.bar'}
+
+        with FakeYDL({'http_headers': {'ytdl-request-proxy': '//foo.bar'}}) as ydl:
+            rh = self.build_handler(ydl)
+            assert 'ytdl-request-proxy' not in rh.headers
+            assert rh.proxies == {'all': 'http://foo.bar'}
+
+    def test_clean_header(self):
+        with FakeRHYDL() as ydl:
+            res = ydl.urlopen(Request('test://', headers={'Youtubedl-no-compression': True}))
+            assert 'Youtubedl-no-compression' not in res.request.headers
+            assert res.request.headers.get('Accept-Encoding') == 'identity'
+
+        with FakeYDL({'http_headers': {'Youtubedl-no-compression': True}}) as ydl:
+            rh = self.build_handler(ydl)
+            assert 'Youtubedl-no-compression' not in rh.headers
+            assert rh.headers.get('Accept-Encoding') == 'identity'
+
+    def test_build_handler_params(self):
+        with FakeYDL({
+            'http_headers': {'test': 'testtest'},
+            'socket_timeout': 2,
+            'proxy': 'http://127.0.0.1:8080',
+            'source_address': '127.0.0.45',
+            'debug_printtraffic': True,
+            'compat_opts': ['no-certifi'],
+            'nocheckcertificate': True,
+            'legacy_server_connect': True,
+        }) as ydl:
+            rh = self.build_handler(ydl)
+            assert rh.headers.get('test') == 'testtest'
+            assert 'Accept' in rh.headers  # ensure std_headers are still there
+            assert rh.timeout == 2
+            assert rh.proxies.get('all') == 'http://127.0.0.1:8080'
+            assert rh.source_address == '127.0.0.45'
+            assert rh.verbose is True
+            assert rh.prefer_system_certs is True
+            assert rh.verify is False
+            assert rh.legacy_ssl_support is True
+
+    @pytest.mark.parametrize('ydl_params', [
+        {'client_certificate': 'fakecert.crt'},
+        {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key'},
+        {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'},
+        {'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'},
+    ])
+    def test_client_certificate(self, ydl_params):
+        with FakeYDL(ydl_params) as ydl:
+            rh = self.build_handler(ydl)
+            assert rh._client_cert == ydl_params  # XXX: Too bound to implementation
+
+    def test_urllib_file_urls(self):
+        with FakeYDL({'enable_file_urls': False}) as ydl:
+            rh = self.build_handler(ydl, UrllibRH)
+            assert rh.enable_file_urls is False
+
+        with FakeYDL({'enable_file_urls': True}) as ydl:
+            rh = self.build_handler(ydl, UrllibRH)
+            assert rh.enable_file_urls is True
+
+
+class TestRequest:
+
+    def test_query(self):
+        req = Request('http://example.com?q=something', query={'v': 'xyz'})
+        assert req.url == 'http://example.com?q=something&v=xyz'
+
+        req.update(query={'v': '123'})
+        assert req.url == 'http://example.com?q=something&v=123'
+        req.update(url='http://example.com', query={'v': 'xyz'})
+        assert req.url == 'http://example.com?v=xyz'
+
+    def test_method(self):
+        req = Request('http://example.com')
+        assert req.method == 'GET'
+        req.data = b'test'
+        assert req.method == 'POST'
+        req.data = None
+        assert req.method == 'GET'
+        req.data = b'test2'
+        req.method = 'PUT'
+        assert req.method == 'PUT'
+        req.data = None
+        assert req.method == 'PUT'
+        with pytest.raises(TypeError):
+            req.method = 1
+
+    def test_request_helpers(self):
+        assert HEADRequest('http://example.com').method == 'HEAD'
+        assert PUTRequest('http://example.com').method == 'PUT'
+
+    def test_headers(self):
+        req = Request('http://example.com', headers={'tesT': 'test'})
+        assert req.headers == HTTPHeaderDict({'test': 'test'})
+        req.update(headers={'teSt2': 'test2'})
+        assert req.headers == HTTPHeaderDict({'test': 'test', 'test2': 'test2'})
+
+        req.headers = new_headers = HTTPHeaderDict({'test': 'test'})
+        assert req.headers == HTTPHeaderDict({'test': 'test'})
+        assert req.headers is new_headers
+
+        # test converts dict to case insensitive dict
+        req.headers = new_headers = {'test2': 'test2'}
+        assert isinstance(req.headers, HTTPHeaderDict)
+        assert req.headers is not new_headers
+
+        with pytest.raises(TypeError):
+            req.headers = None
+
+    def test_data_type(self):
+        req = Request('http://example.com')
+        assert req.data is None
+        # test bytes is allowed
+        req.data = b'test'
+        assert req.data == b'test'
+        # test iterable of bytes is allowed
+        i = [b'test', b'test2']
+        req.data = i
+        assert req.data == i
+
+        # test file-like object is allowed
+        f = io.BytesIO(b'test')
+        req.data = f
+        assert req.data == f
+
+        # common mistake: test str not allowed
+        with pytest.raises(TypeError):
+            req.data = 'test'
+        assert req.data != 'test'
+
+        # common mistake: test dict is not allowed
+        with pytest.raises(TypeError):
+            req.data = {'test': 'test'}
+        assert req.data != {'test': 'test'}
+
+    def test_content_length_header(self):
+        req = Request('http://example.com', headers={'Content-Length': '0'}, data=b'')
+        assert req.headers.get('Content-Length') == '0'
+
+        req.data = b'test'
+        assert 'Content-Length' not in req.headers
+
+        req = Request('http://example.com', headers={'Content-Length': '10'})
+        assert 'Content-Length' not in req.headers
+
+    def test_content_type_header(self):
+        req = Request('http://example.com', headers={'Content-Type': 'test'}, data=b'test')
+        assert req.headers.get('Content-Type') == 'test'
+        req.data = b'test2'
+        assert req.headers.get('Content-Type') == 'test'
+        req.data = None
+        assert 'Content-Type' not in req.headers
+        req.data = b'test3'
+        assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded'
+
+    def test_proxies(self):
+        req = Request(url='http://example.com', proxies={'http': 'http://127.0.0.1:8080'})
+        assert req.proxies == {'http': 'http://127.0.0.1:8080'}
+
+    def test_extensions(self):
+        req = Request(url='http://example.com', extensions={'timeout': 2})
+        assert req.extensions == {'timeout': 2}
+
+    def test_copy(self):
+        req = Request(
+            url='http://example.com',
+            extensions={'cookiejar': CookieJar()},
+            headers={'Accept-Encoding': 'br'},
+            proxies={'http': 'http://127.0.0.1'},
+            data=[b'123']
+        )
+        req_copy = req.copy()
+        assert req_copy is not req
+        assert req_copy.url == req.url
+        assert req_copy.headers == req.headers
+        assert req_copy.headers is not req.headers
+        assert req_copy.proxies == req.proxies
+        assert req_copy.proxies is not req.proxies
+
+        # Data is not able to be copied
+        assert req_copy.data == req.data
+        assert req_copy.data is req.data
+
+        # Shallow copy extensions
+        assert req_copy.extensions is not req.extensions
+        assert req_copy.extensions['cookiejar'] == req.extensions['cookiejar']
+
+        # Subclasses are copied by default
+        class AnotherRequest(Request):
+            pass
+
+        req = AnotherRequest(url='http://127.0.0.1')
+        assert isinstance(req.copy(), AnotherRequest)
+
+    def test_url(self):
+        req = Request(url='https://фtest.example.com/ some spaceв?ä=c',)
+        assert req.url == 'https://xn--test-z6d.example.com/%20some%20space%D0%B2?%C3%A4=c'
+
+        assert Request(url='//example.com').url == 'http://example.com'
+
+        with pytest.raises(TypeError):
+            Request(url='https://').url = None
+
+
+class TestResponse:
+
+    @pytest.mark.parametrize('reason,status,expected', [
+        ('custom', 200, 'custom'),
+        (None, 404, 'Not Found'),  # fallback status
+        ('', 403, 'Forbidden'),
+        (None, 999, None)
+    ])
+    def test_reason(self, reason, status, expected):
+        res = Response(io.BytesIO(b''), url='test://', headers={}, status=status, reason=reason)
+        assert res.reason == expected
+
+    def test_headers(self):
+        headers = Message()
+        headers.add_header('Test', 'test')
+        headers.add_header('Test', 'test2')
+        headers.add_header('content-encoding', 'br')
+        res = Response(io.BytesIO(b''), headers=headers, url='test://')
+        assert res.headers.get_all('test') == ['test', 'test2']
+        assert 'Content-Encoding' in res.headers
+
+    def test_get_header(self):
+        headers = Message()
+        headers.add_header('Set-Cookie', 'cookie1')
+        headers.add_header('Set-cookie', 'cookie2')
+        headers.add_header('Test', 'test')
+        headers.add_header('Test', 'test2')
+        res = Response(io.BytesIO(b''), headers=headers, url='test://')
+        assert res.get_header('test') == 'test, test2'
+        assert res.get_header('set-Cookie') == 'cookie1'
+        assert res.get_header('notexist', 'default') == 'default'
+
+    def test_compat(self):
+        res = Response(io.BytesIO(b''), url='test://', status=404, headers={'test': 'test'})
+        assert res.code == res.getcode() == res.status
+        assert res.geturl() == res.url
+        assert res.info() is res.headers
+        assert res.getheader('test') == res.get_header('test')
diff --git a/test/test_networking_utils.py b/test/test_networking_utils.py
new file mode 100644
index 0000000000..f9f876af3d
--- /dev/null
+++ b/test/test_networking_utils.py
@@ -0,0 +1,239 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+
+import pytest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import io
+import platform
+import random
+import ssl
+import urllib.error
+
+from yt_dlp.cookies import YoutubeDLCookieJar
+from yt_dlp.dependencies import certifi
+from yt_dlp.networking import Response
+from yt_dlp.networking._helper import (
+    InstanceStoreMixin,
+    add_accept_encoding_header,
+    get_redirect_method,
+    make_socks_proxy_opts,
+    select_proxy,
+    ssl_load_certs,
+)
+from yt_dlp.networking.exceptions import (
+    HTTPError,
+    IncompleteRead,
+    _CompatHTTPError,
+)
+from yt_dlp.socks import ProxyType
+from yt_dlp.utils.networking import HTTPHeaderDict
+
+TEST_DIR = os.path.dirname(os.path.abspath(__file__))
+
+
+class TestNetworkingUtils:
+
+    def test_select_proxy(self):
+        proxies = {
+            'all': 'socks5://example.com',
+            'http': 'http://example.com:1080',
+            'no': 'bypass.example.com,yt-dl.org'
+        }
+
+        assert select_proxy('https://example.com', proxies) == proxies['all']
+        assert select_proxy('http://example.com', proxies) == proxies['http']
+        assert select_proxy('http://bypass.example.com', proxies) is None
+        assert select_proxy('https://yt-dl.org', proxies) is None
+
+    @pytest.mark.parametrize('socks_proxy,expected', [
+        ('socks5h://example.com', {
+            'proxytype': ProxyType.SOCKS5,
+            'addr': 'example.com',
+            'port': 1080,
+            'rdns': True,
+            'username': None,
+            'password': None
+        }),
+        ('socks5://user:@example.com:5555', {
+            'proxytype': ProxyType.SOCKS5,
+            'addr': 'example.com',
+            'port': 5555,
+            'rdns': False,
+            'username': 'user',
+            'password': ''
+        }),
+        ('socks4://u%40ser:pa%20ss@127.0.0.1:1080', {
+            'proxytype': ProxyType.SOCKS4,
+            'addr': '127.0.0.1',
+            'port': 1080,
+            'rdns': False,
+            'username': 'u@ser',
+            'password': 'pa ss'
+        }),
+        ('socks4a://:pa%20ss@127.0.0.1', {
+            'proxytype': ProxyType.SOCKS4A,
+            'addr': '127.0.0.1',
+            'port': 1080,
+            'rdns': True,
+            'username': '',
+            'password': 'pa ss'
+        })
+    ])
+    def test_make_socks_proxy_opts(self, socks_proxy, expected):
+        assert make_socks_proxy_opts(socks_proxy) == expected
+
+    def test_make_socks_proxy_unknown(self):
+        with pytest.raises(ValueError, match='Unknown SOCKS proxy version: socks'):
+            make_socks_proxy_opts('socks://127.0.0.1')
+
+    @pytest.mark.skipif(not certifi, reason='certifi is not installed')
+    def test_load_certifi(self):
+        context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
+        context2 = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
+        ssl_load_certs(context, use_certifi=True)
+        context2.load_verify_locations(cafile=certifi.where())
+        assert context.get_ca_certs() == context2.get_ca_certs()
+
+        # Test load normal certs
+        # XXX: could there be a case where system certs are the same as certifi?
+        context3 = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
+        ssl_load_certs(context3, use_certifi=False)
+        assert context3.get_ca_certs() != context.get_ca_certs()
+
+    @pytest.mark.parametrize('method,status,expected', [
+        ('GET', 303, 'GET'),
+        ('HEAD', 303, 'HEAD'),
+        ('PUT', 303, 'GET'),
+        ('POST', 301, 'GET'),
+        ('HEAD', 301, 'HEAD'),
+        ('POST', 302, 'GET'),
+        ('HEAD', 302, 'HEAD'),
+        ('PUT', 302, 'PUT'),
+        ('POST', 308, 'POST'),
+        ('POST', 307, 'POST'),
+        ('HEAD', 308, 'HEAD'),
+        ('HEAD', 307, 'HEAD'),
+    ])
+    def test_get_redirect_method(self, method, status, expected):
+        assert get_redirect_method(method, status) == expected
+
+    @pytest.mark.parametrize('headers,supported_encodings,expected', [
+        ({'Accept-Encoding': 'br'}, ['gzip', 'br'], {'Accept-Encoding': 'br'}),
+        ({}, ['gzip', 'br'], {'Accept-Encoding': 'gzip, br'}),
+        ({'Content-type': 'application/json'}, [], {'Content-type': 'application/json', 'Accept-Encoding': 'identity'}),
+    ])
+    def test_add_accept_encoding_header(self, headers, supported_encodings, expected):
+        headers = HTTPHeaderDict(headers)
+        add_accept_encoding_header(headers, supported_encodings)
+        assert headers == HTTPHeaderDict(expected)
+
+
+class TestInstanceStoreMixin:
+
+    class FakeInstanceStoreMixin(InstanceStoreMixin):
+        def _create_instance(self, **kwargs):
+            return random.randint(0, 1000000)
+
+        def _close_instance(self, instance):
+            pass
+
+    def test_mixin(self):
+        mixin = self.FakeInstanceStoreMixin()
+        assert mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}}) == mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}})
+
+        assert mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'e', 4}}) != mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}})
+
+        assert mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}} != mixin._get_instance(d={'a': 1, 'b': 2, 'g': {'d', 4}}))
+
+        assert mixin._get_instance(d={'a': 1}, e=[1, 2, 3]) == mixin._get_instance(d={'a': 1}, e=[1, 2, 3])
+
+        assert mixin._get_instance(d={'a': 1}, e=[1, 2, 3]) != mixin._get_instance(d={'a': 1}, e=[1, 2, 3, 4])
+
+        cookiejar = YoutubeDLCookieJar()
+        assert mixin._get_instance(b=[1, 2], c=cookiejar) == mixin._get_instance(b=[1, 2], c=cookiejar)
+
+        assert mixin._get_instance(b=[1, 2], c=cookiejar) != mixin._get_instance(b=[1, 2], c=YoutubeDLCookieJar())
+
+        # Different order
+        assert mixin._get_instance(c=cookiejar, b=[1, 2]) == mixin._get_instance(b=[1, 2], c=cookiejar)
+
+        m = mixin._get_instance(t=1234)
+        assert mixin._get_instance(t=1234) == m
+        mixin._clear_instances()
+        assert mixin._get_instance(t=1234) != m
+
+
+class TestNetworkingExceptions:
+
+    @staticmethod
+    def create_response(status):
+        return Response(fp=io.BytesIO(b'test'), url='http://example.com', headers={'tesT': 'test'}, status=status)
+
+    @pytest.mark.parametrize('http_error_class', [HTTPError, lambda r: _CompatHTTPError(HTTPError(r))])
+    def test_http_error(self, http_error_class):
+
+        response = self.create_response(403)
+        error = http_error_class(response)
+
+        assert error.status == 403
+        assert str(error) == error.msg == 'HTTP Error 403: Forbidden'
+        assert error.reason == response.reason
+        assert error.response is response
+
+        data = error.response.read()
+        assert data == b'test'
+        assert repr(error) == '<HTTPError 403: Forbidden>'
+
+    @pytest.mark.parametrize('http_error_class', [HTTPError, lambda *args, **kwargs: _CompatHTTPError(HTTPError(*args, **kwargs))])
+    def test_redirect_http_error(self, http_error_class):
+        response = self.create_response(301)
+        error = http_error_class(response, redirect_loop=True)
+        assert str(error) == error.msg == 'HTTP Error 301: Moved Permanently (redirect loop detected)'
+        assert error.reason == 'Moved Permanently'
+
+    def test_compat_http_error(self):
+        response = self.create_response(403)
+        error = _CompatHTTPError(HTTPError(response))
+        assert isinstance(error, HTTPError)
+        assert isinstance(error, urllib.error.HTTPError)
+
+        assert error.code == 403
+        assert error.getcode() == 403
+        assert error.hdrs is error.response.headers
+        assert error.info() is error.response.headers
+        assert error.headers is error.response.headers
+        assert error.filename == error.response.url
+        assert error.url == error.response.url
+        assert error.geturl() == error.response.url
+
+        # Passthrough file operations
+        assert error.read() == b'test'
+        assert not error.closed
+        # Technically Response operations are also passed through, which should not be used.
+        assert error.get_header('test') == 'test'
+
+    @pytest.mark.skipif(
+        platform.python_implementation() == 'PyPy', reason='garbage collector works differently in pypy')
+    def test_compat_http_error_autoclose(self):
+        # Compat HTTPError should not autoclose response
+        response = self.create_response(403)
+        _CompatHTTPError(HTTPError(response))
+        assert not response.closed
+
+    def test_incomplete_read_error(self):
+        error = IncompleteRead(b'test', 3, cause='test')
+        assert isinstance(error, IncompleteRead)
+        assert repr(error) == '<IncompleteRead: 4 bytes read, 3 more expected>'
+        assert str(error) == error.msg == '4 bytes read, 3 more expected'
+        assert error.partial == b'test'
+        assert error.expected == 3
+        assert error.cause == 'test'
+
+        error = IncompleteRead(b'aaa')
+        assert repr(error) == '<IncompleteRead: 3 bytes read>'
+        assert str(error) == '3 bytes read'
diff --git a/test/test_utils.py b/test/test_utils.py
index 862c7d0f75..768edfd0cf 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -51,6 +51,7 @@
     escape_url,
     expand_path,
     extract_attributes,
+    extract_basic_auth,
     find_xpath_attr,
     fix_xml_ampersands,
     float_or_none,
@@ -103,7 +104,6 @@
     sanitize_filename,
     sanitize_path,
     sanitize_url,
-    sanitized_Request,
     shell_quote,
     smuggle_url,
     str_or_none,
@@ -132,6 +132,7 @@
     xpath_text,
     xpath_with_ns,
 )
+from yt_dlp.utils.networking import HTTPHeaderDict
 
 
 class TestUtil(unittest.TestCase):
@@ -2315,14 +2316,43 @@ def test_traverse_obj(self):
         self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 'group')), ['0123', '3'],
                          msg='function on a `re.Match` should give group name as well')
 
+    def test_http_header_dict(self):
+        headers = HTTPHeaderDict()
+        headers['ytdl-test'] = 1
+        self.assertEqual(list(headers.items()), [('Ytdl-Test', '1')])
+        headers['Ytdl-test'] = '2'
+        self.assertEqual(list(headers.items()), [('Ytdl-Test', '2')])
+        self.assertTrue('ytDl-Test' in headers)
+        self.assertEqual(str(headers), str(dict(headers)))
+        self.assertEqual(repr(headers), str(dict(headers)))
+
+        headers.update({'X-dlp': 'data'})
+        self.assertEqual(set(headers.items()), {('Ytdl-Test', '2'), ('X-Dlp', 'data')})
+        self.assertEqual(dict(headers), {'Ytdl-Test': '2', 'X-Dlp': 'data'})
+        self.assertEqual(len(headers), 2)
+        self.assertEqual(headers.copy(), headers)
+        headers2 = HTTPHeaderDict({'X-dlp': 'data3'}, **headers, **{'X-dlp': 'data2'})
+        self.assertEqual(set(headers2.items()), {('Ytdl-Test', '2'), ('X-Dlp', 'data2')})
+        self.assertEqual(len(headers2), 2)
+        headers2.clear()
+        self.assertEqual(len(headers2), 0)
+
+        # ensure we prefer latter headers
+        headers3 = HTTPHeaderDict({'Ytdl-TeSt': 1}, {'Ytdl-test': 2})
+        self.assertEqual(set(headers3.items()), {('Ytdl-Test', '2')})
+        del headers3['ytdl-tesT']
+        self.assertEqual(dict(headers3), {})
+
+        headers4 = HTTPHeaderDict({'ytdl-test': 'data;'})
+        self.assertEqual(set(headers4.items()), {('Ytdl-Test', 'data;')})
+
     def test_extract_basic_auth(self):
-        auth_header = lambda url: sanitized_Request(url).get_header('Authorization')
-        self.assertFalse(auth_header('http://foo.bar'))
-        self.assertFalse(auth_header('http://:foo.bar'))
-        self.assertEqual(auth_header('http://@foo.bar'), 'Basic Og==')
-        self.assertEqual(auth_header('http://:pass@foo.bar'), 'Basic OnBhc3M=')
-        self.assertEqual(auth_header('http://user:@foo.bar'), 'Basic dXNlcjo=')
-        self.assertEqual(auth_header('http://user:pass@foo.bar'), 'Basic dXNlcjpwYXNz')
+        assert extract_basic_auth('http://:foo.bar') == ('http://:foo.bar', None)
+        assert extract_basic_auth('http://foo.bar') == ('http://foo.bar', None)
+        assert extract_basic_auth('http://@foo.bar') == ('http://foo.bar', 'Basic Og==')
+        assert extract_basic_auth('http://:pass@foo.bar') == ('http://foo.bar', 'Basic OnBhc3M=')
+        assert extract_basic_auth('http://user:@foo.bar') == ('http://foo.bar', 'Basic dXNlcjo=')
+        assert extract_basic_auth('http://user:pass@foo.bar') == ('http://foo.bar', 'Basic dXNlcjpwYXNz')
 
 
 if __name__ == '__main__':
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index 138646ebfc..29a18aef02 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -4,7 +4,6 @@
 import datetime
 import errno
 import fileinput
-import functools
 import http.cookiejar
 import io
 import itertools
@@ -25,8 +24,8 @@
 import unicodedata
 
 from .cache import Cache
-from .compat import urllib  # isort: split
-from .compat import compat_os_name, compat_shlex_quote
+from .compat import functools, urllib  # isort: split
+from .compat import compat_os_name, compat_shlex_quote, urllib_req_to_req
 from .cookies import LenientSimpleCookie, load_cookies
 from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
 from .downloader.rtmp import rtmpdump_version
@@ -34,6 +33,15 @@
 from .extractor.common import UnsupportedURLIE
 from .extractor.openload import PhantomJSwrapper
 from .minicurses import format_text
+from .networking import Request, RequestDirector
+from .networking.common import _REQUEST_HANDLERS
+from .networking.exceptions import (
+    HTTPError,
+    NoSupportingHandlers,
+    RequestError,
+    SSLError,
+    _CompatHTTPError,
+)
 from .plugins import directories as plugin_directories
 from .postprocessor import _PLUGIN_CLASSES as plugin_pps
 from .postprocessor import (
@@ -78,7 +86,6 @@
     MaxDownloadsReached,
     Namespace,
     PagedList,
-    PerRequestProxyHandler,
     PlaylistEntries,
     Popen,
     PostProcessingError,
@@ -87,9 +94,6 @@
     SameFileError,
     UnavailableVideoError,
     UserNotLive,
-    YoutubeDLCookieProcessor,
-    YoutubeDLHandler,
-    YoutubeDLRedirectHandler,
     age_restricted,
     args_to_str,
     bug_reports_message,
@@ -102,6 +106,7 @@
     error_to_compat_str,
     escapeHTML,
     expand_path,
+    extract_basic_auth,
     filter_dict,
     float_or_none,
     format_bytes,
@@ -117,8 +122,6 @@
     locked_file,
     make_archive_id,
     make_dir,
-    make_HTTPS_handler,
-    merge_headers,
     network_exceptions,
     number_of_digits,
     orderedSet,
@@ -132,7 +135,6 @@
     sanitize_filename,
     sanitize_path,
     sanitize_url,
-    sanitized_Request,
     std_headers,
     str_or_none,
     strftime_or_none,
@@ -151,7 +153,12 @@
     write_json_file,
     write_string,
 )
-from .utils.networking import clean_headers
+from .utils._utils import _YDLLogger
+from .utils.networking import (
+    HTTPHeaderDict,
+    clean_headers,
+    clean_proxies,
+)
 from .version import CHANNEL, RELEASE_GIT_HEAD, VARIANT, __version__
 
 if compat_os_name == 'nt':
@@ -673,7 +680,9 @@ def process_color_policy(stream):
                     raise
 
         self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
-        self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
+        self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
+        self._request_director = self.build_request_director(
+            sorted(_REQUEST_HANDLERS.values(), key=lambda rh: rh.RH_NAME.lower()))
         if auto_init and auto_init != 'no_verbose_header':
             self.print_debug_header()
 
@@ -763,8 +772,6 @@ def check_deprecated(param, option, suggestion):
                 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
                 when=when)
 
-        self._setup_opener()
-
         def preload_download_archive(fn):
             """Preload the archive, if any is specified"""
             archive = set()
@@ -946,7 +953,11 @@ def save_cookies(self):
 
     def __exit__(self, *args):
         self.restore_console_title()
+        self.close()
+
+    def close(self):
         self.save_cookies()
+        self._request_director.close()
 
     def trouble(self, message=None, tb=None, is_error=True):
         """Determine action to take when a download problem appears.
@@ -2468,7 +2479,7 @@ def restore_last_token(self):
         return _build_selector_function(parsed_selector)
 
     def _calc_headers(self, info_dict):
-        res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
+        res = HTTPHeaderDict(self.params['http_headers'], info_dict.get('http_headers'))
         clean_headers(res)
         cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
         if cookies:
@@ -3943,13 +3954,8 @@ def get_encoding(stream):
             join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
         })) or 'none'))
 
-        self._setup_opener()
-        proxy_map = {}
-        for handler in self._opener.handlers:
-            if hasattr(handler, 'proxies'):
-                proxy_map.update(handler.proxies)
-        write_debug(f'Proxy map: {proxy_map}')
-
+        write_debug(f'Proxy map: {self.proxies}')
+        # write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers)}')
         for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
             display_list = ['%s%s' % (
                 klass.__name__, '' if klass.__name__ == name else f' as {name}')
@@ -3977,53 +3983,21 @@ def get_encoding(stream):
                     'See https://yt-dl.org/update if you need help updating.' %
                     latest_version)
 
-    def _setup_opener(self):
-        if hasattr(self, '_opener'):
-            return
-        timeout_val = self.params.get('socket_timeout')
-        self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
+    @functools.cached_property
+    def proxies(self):
+        """Global proxy configuration"""
         opts_proxy = self.params.get('proxy')
-
-        cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
         if opts_proxy is not None:
             if opts_proxy == '':
-                proxies = {}
-            else:
-                proxies = {'http': opts_proxy, 'https': opts_proxy}
+                opts_proxy = '__noproxy__'
+            proxies = {'all': opts_proxy}
         else:
             proxies = urllib.request.getproxies()
-            # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
+            # compat. Set HTTPS_PROXY to __noproxy__ to revert
             if 'http' in proxies and 'https' not in proxies:
                 proxies['https'] = proxies['http']
-        proxy_handler = PerRequestProxyHandler(proxies)
 
-        debuglevel = 1 if self.params.get('debug_printtraffic') else 0
-        https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
-        ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
-        redirect_handler = YoutubeDLRedirectHandler()
-        data_handler = urllib.request.DataHandler()
-
-        # When passing our own FileHandler instance, build_opener won't add the
-        # default FileHandler and allows us to disable the file protocol, which
-        # can be used for malicious purposes (see
-        # https://github.com/ytdl-org/youtube-dl/issues/8227)
-        file_handler = urllib.request.FileHandler()
-
-        if not self.params.get('enable_file_urls'):
-            def file_open(*args, **kwargs):
-                raise urllib.error.URLError(
-                    'file:// URLs are explicitly disabled in yt-dlp for security reasons. '
-                    'Use --enable-file-urls to enable at your own risk.')
-            file_handler.file_open = file_open
-
-        opener = urllib.request.build_opener(
-            proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
-
-        # Delete the default user-agent header, which would otherwise apply in
-        # cases where our custom HTTP handler doesn't come into play
-        # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
-        opener.addheaders = []
-        self._opener = opener
+        return proxies
 
     @functools.cached_property
     def cookiejar(self):
@@ -4031,11 +4005,84 @@ def cookiejar(self):
         return load_cookies(
             self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self)
 
+    @property
+    def _opener(self):
+        """
+        Get a urllib OpenerDirector from the Urllib handler (deprecated).
+        """
+        self.deprecation_warning('YoutubeDL._opener() is deprecated, use YoutubeDL.urlopen()')
+        handler = self._request_director.handlers['Urllib']
+        return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies)
+
     def urlopen(self, req):
         """ Start an HTTP download """
         if isinstance(req, str):
-            req = sanitized_Request(req)
-        return self._opener.open(req, timeout=self._socket_timeout)
+            req = Request(req)
+        elif isinstance(req, urllib.request.Request):
+            req = urllib_req_to_req(req)
+        assert isinstance(req, Request)
+
+        # compat: Assume user:pass url params are basic auth
+        url, basic_auth_header = extract_basic_auth(req.url)
+        if basic_auth_header:
+            req.headers['Authorization'] = basic_auth_header
+        req.url = sanitize_url(url)
+
+        clean_proxies(proxies=req.proxies, headers=req.headers)
+        clean_headers(req.headers)
+
+        try:
+            return self._request_director.send(req)
+        except NoSupportingHandlers as e:
+            for ue in e.unsupported_errors:
+                if not (ue.handler and ue.msg):
+                    continue
+                if ue.handler.RH_KEY == 'Urllib' and 'unsupported url scheme: "file"' in ue.msg.lower():
+                    raise RequestError(
+                        'file:// URLs are disabled by default in yt-dlp for security reasons. '
+                        'Use --enable-file-urls to enable at your own risk.', cause=ue) from ue
+            raise
+        except SSLError as e:
+            if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e):
+                raise RequestError('UNSAFE_LEGACY_RENEGOTIATION_DISABLED: Try using --legacy-server-connect', cause=e) from e
+            elif 'SSLV3_ALERT_HANDSHAKE_FAILURE' in str(e):
+                raise RequestError(
+                    'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. '
+                    'Try using --legacy-server-connect', cause=e) from e
+            raise
+        except HTTPError as e:  # TODO: Remove in a future release
+            raise _CompatHTTPError(e) from e
+
+    def build_request_director(self, handlers):
+        logger = _YDLLogger(self)
+        headers = self.params.get('http_headers').copy()
+        proxies = self.proxies.copy()
+        clean_headers(headers)
+        clean_proxies(proxies, headers)
+
+        director = RequestDirector(logger=logger, verbose=self.params.get('debug_printtraffic'))
+        for handler in handlers:
+            director.add_handler(handler(
+                logger=logger,
+                headers=headers,
+                cookiejar=self.cookiejar,
+                proxies=proxies,
+                prefer_system_certs='no-certifi' in self.params['compat_opts'],
+                verify=not self.params.get('nocheckcertificate'),
+                **traverse_obj(self.params, {
+                    'verbose': 'debug_printtraffic',
+                    'source_address': 'source_address',
+                    'timeout': 'socket_timeout',
+                    'legacy_ssl_support': 'legacy_server_connect',
+                    'enable_file_urls': 'enable_file_urls',
+                    'client_cert': {
+                        'client_certificate': 'client_certificate',
+                        'client_certificate_key': 'client_certificate_key',
+                        'client_certificate_password': 'client_certificate_password',
+                    },
+                }),
+            ))
+        return director
 
     def encode(self, s):
         if isinstance(s, bytes):
@@ -4188,7 +4235,7 @@ def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None
             else:
                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
                 try:
-                    uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
+                    uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {})))
                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
                         shutil.copyfileobj(uf, thumbf)
diff --git a/yt_dlp/compat/__init__.py b/yt_dlp/compat/__init__.py
index c6c02541c2..a41a80ebb6 100644
--- a/yt_dlp/compat/__init__.py
+++ b/yt_dlp/compat/__init__.py
@@ -70,3 +70,13 @@ def compat_expanduser(path):
         return userhome + path[i:]
 else:
     compat_expanduser = os.path.expanduser
+
+
+def urllib_req_to_req(urllib_request):
+    """Convert urllib Request to a networking Request"""
+    from ..networking import Request
+    from ..utils.networking import HTTPHeaderDict
+    return Request(
+        urllib_request.get_full_url(), data=urllib_request.data, method=urllib_request.get_method(),
+        headers=HTTPHeaderDict(urllib_request.headers, urllib_request.unredirected_hdrs),
+        extensions={'timeout': urllib_request.timeout} if hasattr(urllib_request, 'timeout') else None)
diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py
index 7c5daea859..45d094721a 100644
--- a/yt_dlp/downloader/http.py
+++ b/yt_dlp/downloader/http.py
@@ -1,12 +1,10 @@
-import http.client
 import os
 import random
-import socket
-import ssl
 import time
 import urllib.error
 
 from .common import FileDownloader
+from ..networking.exceptions import CertificateVerifyError, TransportError
 from ..utils import (
     ContentTooShortError,
     RetryManager,
@@ -21,14 +19,6 @@
     write_xattr,
 )
 
-RESPONSE_READ_EXCEPTIONS = (
-    TimeoutError,
-    socket.timeout,  # compat: py < 3.10
-    ConnectionError,
-    ssl.SSLError,
-    http.client.HTTPException
-)
-
 
 class HttpFD(FileDownloader):
     def real_download(self, filename, info_dict):
@@ -196,13 +186,9 @@ def establish_connection():
                     # Unexpected HTTP error
                     raise
                 raise RetryDownload(err)
-            except urllib.error.URLError as err:
-                if isinstance(err.reason, ssl.CertificateError):
-                    raise
-                raise RetryDownload(err)
-            # In urllib.request.AbstractHTTPHandler, the response is partially read on request.
-            # Any errors that occur during this will not be wrapped by URLError
-            except RESPONSE_READ_EXCEPTIONS as err:
+            except CertificateVerifyError:
+                raise
+            except TransportError as err:
                 raise RetryDownload(err)
 
         def close_stream():
@@ -258,7 +244,7 @@ def retry(e):
                 try:
                     # Download and write
                     data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
-                except RESPONSE_READ_EXCEPTIONS as err:
+                except TransportError as err:
                     retry(err)
 
                 byte_counter += len(data_block)
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index fe08839aaa..63156d3ac9 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -17,16 +17,22 @@
 import sys
 import time
 import types
-import urllib.error
 import urllib.parse
 import urllib.request
 import xml.etree.ElementTree
 
 from ..compat import functools  # isort: split
-from ..compat import compat_etree_fromstring, compat_expanduser, compat_os_name
+from ..compat import (
+    compat_etree_fromstring,
+    compat_expanduser,
+    compat_os_name,
+    urllib_req_to_req,
+)
 from ..cookies import LenientSimpleCookie
 from ..downloader.f4m import get_base_url, remove_encrypted_media
 from ..downloader.hls import HlsFD
+from ..networking.common import HEADRequest, Request
+from ..networking.exceptions import network_exceptions
 from ..utils import (
     IDENTITY,
     JSON_LD_RE,
@@ -35,7 +41,6 @@
     FormatSorter,
     GeoRestrictedError,
     GeoUtils,
-    HEADRequest,
     LenientJSONDecoder,
     Popen,
     RegexNotFoundError,
@@ -61,7 +66,6 @@
     js_to_json,
     mimetype2ext,
     netrc_from_content,
-    network_exceptions,
     orderedSet,
     parse_bitrate,
     parse_codecs,
@@ -71,7 +75,6 @@
     parse_resolution,
     sanitize_filename,
     sanitize_url,
-    sanitized_Request,
     smuggle_url,
     str_or_none,
     str_to_int,
@@ -83,8 +86,6 @@
     unescapeHTML,
     unified_strdate,
     unified_timestamp,
-    update_Request,
-    update_url_query,
     url_basename,
     url_or_none,
     urlhandle_detect_ext,
@@ -797,10 +798,12 @@ def __can_accept_status_code(err, expected_status):
 
     def _create_request(self, url_or_request, data=None, headers=None, query=None):
         if isinstance(url_or_request, urllib.request.Request):
-            return update_Request(url_or_request, data=data, headers=headers, query=query)
-        if query:
-            url_or_request = update_url_query(url_or_request, query)
-        return sanitized_Request(url_or_request, data, headers or {})
+            url_or_request = urllib_req_to_req(url_or_request)
+        elif not isinstance(url_or_request, Request):
+            url_or_request = Request(url_or_request)
+
+        url_or_request.update(data=data, headers=headers, query=query)
+        return url_or_request
 
     def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers=None, query=None, expected_status=None):
         """
@@ -838,12 +841,7 @@ def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fa
         except network_exceptions as err:
             if isinstance(err, urllib.error.HTTPError):
                 if self.__can_accept_status_code(err, expected_status):
-                    # Retain reference to error to prevent file object from
-                    # being closed before it can be read. Works around the
-                    # effects of <https://bugs.python.org/issue15002>
-                    # introduced in Python 3.4.1.
-                    err.fp._error = err
-                    return err.fp
+                    return err.response
 
             if errnote is False:
                 return False
diff --git a/yt_dlp/networking/__init__.py b/yt_dlp/networking/__init__.py
index e69de29bb2..5e88764844 100644
--- a/yt_dlp/networking/__init__.py
+++ b/yt_dlp/networking/__init__.py
@@ -0,0 +1,13 @@
+# flake8: noqa: 401
+from .common import (
+    HEADRequest,
+    PUTRequest,
+    Request,
+    RequestDirector,
+    RequestHandler,
+    Response,
+)
+
+# isort: split
+# TODO: all request handlers should be safely imported
+from . import _urllib
diff --git a/yt_dlp/networking/_helper.py b/yt_dlp/networking/_helper.py
index 367f3f4447..a43c57bb4b 100644
--- a/yt_dlp/networking/_helper.py
+++ b/yt_dlp/networking/_helper.py
@@ -1,13 +1,22 @@
 from __future__ import annotations
 
 import contextlib
+import functools
 import ssl
 import sys
+import typing
 import urllib.parse
+import urllib.request
 
+from .exceptions import RequestError, UnsupportedRequest
 from ..dependencies import certifi
 from ..socks import ProxyType
-from ..utils import YoutubeDLError
+from ..utils import format_field, traverse_obj
+
+if typing.TYPE_CHECKING:
+    from collections.abc import Iterable
+
+    from ..utils.networking import HTTPHeaderDict
 
 
 def ssl_load_certs(context: ssl.SSLContext, use_certifi=True):
@@ -23,11 +32,11 @@ def ssl_load_certs(context: ssl.SSLContext, use_certifi=True):
             # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
             if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
                 for storename in ('CA', 'ROOT'):
-                    _ssl_load_windows_store_certs(context, storename)
+                    ssl_load_windows_store_certs(context, storename)
             context.set_default_verify_paths()
 
 
-def _ssl_load_windows_store_certs(ssl_context, storename):
+def ssl_load_windows_store_certs(ssl_context, storename):
     # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
     try:
         certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
@@ -44,10 +53,18 @@ def make_socks_proxy_opts(socks_proxy):
     url_components = urllib.parse.urlparse(socks_proxy)
     if url_components.scheme.lower() == 'socks5':
         socks_type = ProxyType.SOCKS5
-    elif url_components.scheme.lower() in ('socks', 'socks4'):
+        rdns = False
+    elif url_components.scheme.lower() == 'socks5h':
+        socks_type = ProxyType.SOCKS5
+        rdns = True
+    elif url_components.scheme.lower() == 'socks4':
         socks_type = ProxyType.SOCKS4
+        rdns = False
     elif url_components.scheme.lower() == 'socks4a':
         socks_type = ProxyType.SOCKS4A
+        rdns = True
+    else:
+        raise ValueError(f'Unknown SOCKS proxy version: {url_components.scheme.lower()}')
 
     def unquote_if_non_empty(s):
         if not s:
@@ -57,12 +74,25 @@ def unquote_if_non_empty(s):
         'proxytype': socks_type,
         'addr': url_components.hostname,
         'port': url_components.port or 1080,
-        'rdns': True,
+        'rdns': rdns,
         'username': unquote_if_non_empty(url_components.username),
         'password': unquote_if_non_empty(url_components.password),
     }
 
 
+def select_proxy(url, proxies):
+    """Unified proxy selector for all backends"""
+    url_components = urllib.parse.urlparse(url)
+    if 'no' in proxies:
+        hostport = url_components.hostname + format_field(url_components.port, None, ':%s')
+        if urllib.request.proxy_bypass_environment(hostport, {'no': proxies['no']}):
+            return
+        elif urllib.request.proxy_bypass(hostport):  # check system settings
+            return
+
+    return traverse_obj(proxies, url_components.scheme or 'http', 'all')
+
+
 def get_redirect_method(method, status):
     """Unified redirect method handling"""
 
@@ -126,14 +156,53 @@ def make_ssl_context(
                 client_certificate, keyfile=client_certificate_key,
                 password=client_certificate_password)
         except ssl.SSLError:
-            raise YoutubeDLError('Unable to load client certificate')
+            raise RequestError('Unable to load client certificate')
 
+        if getattr(context, 'post_handshake_auth', None) is not None:
+            context.post_handshake_auth = True
     return context
 
 
-def add_accept_encoding_header(headers, supported_encodings):
-    if supported_encodings and 'Accept-Encoding' not in headers:
-        headers['Accept-Encoding'] = ', '.join(supported_encodings)
+class InstanceStoreMixin:
+    def __init__(self, **kwargs):
+        self.__instances = []
+        super().__init__(**kwargs)  # So that both MRO works
 
-    elif 'Accept-Encoding' not in headers:
-        headers['Accept-Encoding'] = 'identity'
+    @staticmethod
+    def _create_instance(**kwargs):
+        raise NotImplementedError
+
+    def _get_instance(self, **kwargs):
+        for key, instance in self.__instances:
+            if key == kwargs:
+                return instance
+
+        instance = self._create_instance(**kwargs)
+        self.__instances.append((kwargs, instance))
+        return instance
+
+    def _close_instance(self, instance):
+        if callable(getattr(instance, 'close', None)):
+            instance.close()
+
+    def _clear_instances(self):
+        for _, instance in self.__instances:
+            self._close_instance(instance)
+        self.__instances.clear()
+
+
+def add_accept_encoding_header(headers: HTTPHeaderDict, supported_encodings: Iterable[str]):
+    if 'Accept-Encoding' not in headers:
+        headers['Accept-Encoding'] = ', '.join(supported_encodings) or 'identity'
+
+
+def wrap_request_errors(func):
+    @functools.wraps(func)
+    def wrapper(self, *args, **kwargs):
+        try:
+            return func(self, *args, **kwargs)
+        except UnsupportedRequest as e:
+            if e.handler is None:
+                e.handler = self
+            raise
+    return wrapper
diff --git a/yt_dlp/networking/_urllib.py b/yt_dlp/networking/_urllib.py
index 1f5871ae67..2c5f09872a 100644
--- a/yt_dlp/networking/_urllib.py
+++ b/yt_dlp/networking/_urllib.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import functools
 import gzip
 import http.client
@@ -9,26 +11,48 @@
 import urllib.request
 import urllib.response
 import zlib
+from urllib.request import (
+    DataHandler,
+    FileHandler,
+    FTPHandler,
+    HTTPCookieProcessor,
+    HTTPDefaultErrorHandler,
+    HTTPErrorProcessor,
+    UnknownHandler,
+)
 
 from ._helper import (
+    InstanceStoreMixin,
     add_accept_encoding_header,
     get_redirect_method,
     make_socks_proxy_opts,
+    select_proxy,
+)
+from .common import Features, RequestHandler, Response, register
+from .exceptions import (
+    CertificateVerifyError,
+    HTTPError,
+    IncompleteRead,
+    ProxyError,
+    RequestError,
+    SSLError,
+    TransportError,
 )
 from ..dependencies import brotli
+from ..socks import ProxyError as SocksProxyError
 from ..socks import sockssocket
 from ..utils import escape_url, update_url_query
-from ..utils.networking import clean_headers, std_headers
 
 SUPPORTED_ENCODINGS = ['gzip', 'deflate']
+CONTENT_DECODE_ERRORS = [zlib.error, OSError]
 
 if brotli:
     SUPPORTED_ENCODINGS.append('br')
+    CONTENT_DECODE_ERRORS.append(brotli.error)
 
 
-def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
+def _create_http_connection(http_class, source_address, *args, **kwargs):
     hc = http_class(*args, **kwargs)
-    source_address = ydl_handler._params.get('source_address')
 
     if source_address is not None:
         # This is to workaround _create_connection() from socket where it will try all
@@ -73,7 +97,7 @@ def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_a
     return hc
 
 
-class HTTPHandler(urllib.request.HTTPHandler):
+class HTTPHandler(urllib.request.AbstractHTTPHandler):
     """Handler for HTTP requests and responses.
 
     This class, when installed with an OpenerDirector, automatically adds
@@ -88,21 +112,30 @@ class HTTPHandler(urllib.request.HTTPHandler):
     public domain.
     """
 
-    def __init__(self, params, *args, **kwargs):
-        urllib.request.HTTPHandler.__init__(self, *args, **kwargs)
-        self._params = params
+    def __init__(self, context=None, source_address=None, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._source_address = source_address
+        self._context = context
 
-    def http_open(self, req):
-        conn_class = http.client.HTTPConnection
-
-        socks_proxy = req.headers.get('Ytdl-socks-proxy')
+    @staticmethod
+    def _make_conn_class(base, req):
+        conn_class = base
+        socks_proxy = req.headers.pop('Ytdl-socks-proxy', None)
         if socks_proxy:
             conn_class = make_socks_conn_class(conn_class, socks_proxy)
-            del req.headers['Ytdl-socks-proxy']
+        return conn_class
 
+    def http_open(self, req):
+        conn_class = self._make_conn_class(http.client.HTTPConnection, req)
         return self.do_open(functools.partial(
-            _create_http_connection, self, conn_class, False),
-            req)
+            _create_http_connection, conn_class, self._source_address), req)
+
+    def https_open(self, req):
+        conn_class = self._make_conn_class(http.client.HTTPSConnection, req)
+        return self.do_open(
+            functools.partial(
+                _create_http_connection, conn_class, self._source_address),
+            req, context=self._context)
 
     @staticmethod
     def deflate(data):
@@ -152,14 +185,6 @@ def http_request(self, req):
         if url != url_escaped:
             req = update_Request(req, url=url_escaped)
 
-        for h, v in self._params.get('http_headers', std_headers).items():
-            # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
-            # The dict keys are capitalized because of this bug by urllib
-            if h.capitalize() not in req.headers:
-                req.add_header(h, v)
-
-        clean_headers(req.headers)
-        add_accept_encoding_header(req.headers, SUPPORTED_ENCODINGS)
         return super().do_request_(req)
 
     def http_response(self, req, resp):
@@ -207,16 +232,12 @@ class SocksConnection(base_class):
         def connect(self):
             self.sock = sockssocket()
             self.sock.setproxy(**proxy_args)
-            if isinstance(self.timeout, (int, float)):
+            if type(self.timeout) in (int, float):  # noqa: E721
                 self.sock.settimeout(self.timeout)
             self.sock.connect((self.host, self.port))
 
             if isinstance(self, http.client.HTTPSConnection):
-                if hasattr(self, '_context'):  # Python > 2.6
-                    self.sock = self._context.wrap_socket(
-                        self.sock, server_hostname=self.host)
-                else:
-                    self.sock = ssl.wrap_socket(self.sock)
+                self.sock = self._context.wrap_socket(self.sock, server_hostname=self.host)
 
     return SocksConnection
 
@@ -260,29 +281,25 @@ def redirect_request(self, req, fp, code, msg, headers, newurl):
             unverifiable=True, method=new_method, data=new_data)
 
 
-class ProxyHandler(urllib.request.ProxyHandler):
+class ProxyHandler(urllib.request.BaseHandler):
+    handler_order = 100
+
     def __init__(self, proxies=None):
+        self.proxies = proxies
         # Set default handlers
-        for type in ('http', 'https'):
-            setattr(self, '%s_open' % type,
-                    lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
-                        meth(r, proxy, type))
-        urllib.request.ProxyHandler.__init__(self, proxies)
+        for type in ('http', 'https', 'ftp'):
+            setattr(self, '%s_open' % type, lambda r, meth=self.proxy_open: meth(r))
 
-    def proxy_open(self, req, proxy, type):
-        req_proxy = req.headers.get('Ytdl-request-proxy')
-        if req_proxy is not None:
-            proxy = req_proxy
-            del req.headers['Ytdl-request-proxy']
-
-        if proxy == '__noproxy__':
-            return None  # No Proxy
-        if urllib.parse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
+    def proxy_open(self, req):
+        proxy = select_proxy(req.get_full_url(), self.proxies)
+        if proxy is None:
+            return
+        if urllib.parse.urlparse(proxy).scheme.lower() in ('socks4', 'socks4a', 'socks5', 'socks5h'):
             req.add_header('Ytdl-socks-proxy', proxy)
             # yt-dlp's http/https handlers do wrapping the socket with socks
             return None
         return urllib.request.ProxyHandler.proxy_open(
-            self, req, proxy, type)
+            self, req, proxy, None)
 
 
 class PUTRequest(urllib.request.Request):
@@ -313,3 +330,129 @@ def update_Request(req, url=None, data=None, headers=None, query=None):
     if hasattr(req, 'timeout'):
         new_req.timeout = req.timeout
     return new_req
+
+
+class UrllibResponseAdapter(Response):
+    """
+    HTTP Response adapter class for urllib addinfourl and http.client.HTTPResponse
+    """
+
+    def __init__(self, res: http.client.HTTPResponse | urllib.response.addinfourl):
+        # addinfourl: In Python 3.9+, .status was introduced and .getcode() was deprecated [1]
+        # HTTPResponse: .getcode() was deprecated, .status always existed [2]
+        # 1. https://docs.python.org/3/library/urllib.request.html#urllib.response.addinfourl.getcode
+        # 2. https://docs.python.org/3.10/library/http.client.html#http.client.HTTPResponse.status
+        super().__init__(
+            fp=res, headers=res.headers, url=res.url,
+            status=getattr(res, 'status', None) or res.getcode(), reason=getattr(res, 'reason', None))
+
+    def read(self, amt=None):
+        try:
+            return self.fp.read(amt)
+        except Exception as e:
+            handle_response_read_exceptions(e)
+            raise e
+
+
+def handle_sslerror(e: ssl.SSLError):
+    if not isinstance(e, ssl.SSLError):
+        return
+    if isinstance(e, ssl.SSLCertVerificationError):
+        raise CertificateVerifyError(cause=e) from e
+    raise SSLError(cause=e) from e
+
+
+def handle_response_read_exceptions(e):
+    if isinstance(e, http.client.IncompleteRead):
+        raise IncompleteRead(partial=e.partial, cause=e, expected=e.expected) from e
+    elif isinstance(e, ssl.SSLError):
+        handle_sslerror(e)
+    elif isinstance(e, (OSError, EOFError, http.client.HTTPException, *CONTENT_DECODE_ERRORS)):
+        # OSErrors raised here should mostly be network related
+        raise TransportError(cause=e) from e
+
+
+@register
+class UrllibRH(RequestHandler, InstanceStoreMixin):
+    _SUPPORTED_URL_SCHEMES = ('http', 'https', 'data', 'ftp')
+    _SUPPORTED_PROXY_SCHEMES = ('http', 'socks4', 'socks4a', 'socks5', 'socks5h')
+    _SUPPORTED_FEATURES = (Features.NO_PROXY, Features.ALL_PROXY)
+    RH_NAME = 'urllib'
+
+    def __init__(self, *, enable_file_urls: bool = False, **kwargs):
+        super().__init__(**kwargs)
+        self.enable_file_urls = enable_file_urls
+        if self.enable_file_urls:
+            self._SUPPORTED_URL_SCHEMES = (*self._SUPPORTED_URL_SCHEMES, 'file')
+
+    def _create_instance(self, proxies, cookiejar):
+        opener = urllib.request.OpenerDirector()
+        handlers = [
+            ProxyHandler(proxies),
+            HTTPHandler(
+                debuglevel=int(bool(self.verbose)),
+                context=self._make_sslcontext(),
+                source_address=self.source_address),
+            HTTPCookieProcessor(cookiejar),
+            DataHandler(),
+            UnknownHandler(),
+            HTTPDefaultErrorHandler(),
+            FTPHandler(),
+            HTTPErrorProcessor(),
+            RedirectHandler(),
+        ]
+
+        if self.enable_file_urls:
+            handlers.append(FileHandler())
+
+        for handler in handlers:
+            opener.add_handler(handler)
+
+        # Delete the default user-agent header, which would otherwise apply in
+        # cases where our custom HTTP handler doesn't come into play
+        # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
+        opener.addheaders = []
+        return opener
+
+    def _send(self, request):
+        headers = self._merge_headers(request.headers)
+        add_accept_encoding_header(headers, SUPPORTED_ENCODINGS)
+        urllib_req = urllib.request.Request(
+            url=request.url,
+            data=request.data,
+            headers=dict(headers),
+            method=request.method
+        )
+
+        opener = self._get_instance(
+            proxies=request.proxies or self.proxies,
+            cookiejar=request.extensions.get('cookiejar') or self.cookiejar
+        )
+        try:
+            res = opener.open(urllib_req, timeout=float(request.extensions.get('timeout') or self.timeout))
+        except urllib.error.HTTPError as e:
+            if isinstance(e.fp, (http.client.HTTPResponse, urllib.response.addinfourl)):
+                # Prevent file object from being closed when urllib.error.HTTPError is destroyed.
+                e._closer.file = None
+                raise HTTPError(UrllibResponseAdapter(e.fp), redirect_loop='redirect error' in str(e)) from e
+            raise  # unexpected
+        except urllib.error.URLError as e:
+            cause = e.reason  # NOTE: cause may be a string
+
+            # proxy errors
+            if 'tunnel connection failed' in str(cause).lower() or isinstance(cause, SocksProxyError):
+                raise ProxyError(cause=e) from e
+
+            handle_response_read_exceptions(cause)
+            raise TransportError(cause=e) from e
+        except (http.client.InvalidURL, ValueError) as e:
+            # Validation errors
+            # http.client.HTTPConnection raises ValueError in some validation cases
+            # such as if request method contains illegal control characters [1]
+            # 1. https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1256
+            raise RequestError(cause=e) from e
+        except Exception as e:
+            handle_response_read_exceptions(e)
+            raise  # unexpected
+
+        return UrllibResponseAdapter(res)
diff --git a/yt_dlp/networking/common.py b/yt_dlp/networking/common.py
new file mode 100644
index 0000000000..e4b3628276
--- /dev/null
+++ b/yt_dlp/networking/common.py
@@ -0,0 +1,522 @@
+from __future__ import annotations
+
+import abc
+import copy
+import enum
+import functools
+import io
+import typing
+import urllib.parse
+import urllib.request
+import urllib.response
+from collections.abc import Iterable, Mapping
+from email.message import Message
+from http import HTTPStatus
+from http.cookiejar import CookieJar
+
+from ._helper import make_ssl_context, wrap_request_errors
+from .exceptions import (
+    NoSupportingHandlers,
+    RequestError,
+    TransportError,
+    UnsupportedRequest,
+)
+from ..utils import (
+    bug_reports_message,
+    classproperty,
+    error_to_str,
+    escape_url,
+    update_url_query,
+)
+from ..utils.networking import HTTPHeaderDict
+
+if typing.TYPE_CHECKING:
+    RequestData = bytes | Iterable[bytes] | typing.IO | None
+
+
+class RequestDirector:
+    """RequestDirector class
+
+    Helper class that, when given a request, forward it to a RequestHandler that supports it.
+
+    @param logger: Logger instance.
+    @param verbose: Print debug request information to stdout.
+    """
+
+    def __init__(self, logger, verbose=False):
+        self.handlers: dict[str, RequestHandler] = {}
+        self.logger = logger  # TODO(Grub4k): default logger
+        self.verbose = verbose
+
+    def close(self):
+        for handler in self.handlers.values():
+            handler.close()
+
+    def add_handler(self, handler: RequestHandler):
+        """Add a handler. If a handler of the same RH_KEY exists, it will overwrite it"""
+        assert isinstance(handler, RequestHandler), 'handler must be a RequestHandler'
+        self.handlers[handler.RH_KEY] = handler
+
+    def _print_verbose(self, msg):
+        if self.verbose:
+            self.logger.stdout(f'director: {msg}')
+
+    def send(self, request: Request) -> Response:
+        """
+        Passes a request onto a suitable RequestHandler
+        """
+        if not self.handlers:
+            raise RequestError('No request handlers configured')
+
+        assert isinstance(request, Request)
+
+        unexpected_errors = []
+        unsupported_errors = []
+        # TODO (future): add a per-request preference system
+        for handler in reversed(list(self.handlers.values())):
+            self._print_verbose(f'Checking if "{handler.RH_NAME}" supports this request.')
+            try:
+                handler.validate(request)
+            except UnsupportedRequest as e:
+                self._print_verbose(
+                    f'"{handler.RH_NAME}" cannot handle this request (reason: {error_to_str(e)})')
+                unsupported_errors.append(e)
+                continue
+
+            self._print_verbose(f'Sending request via "{handler.RH_NAME}"')
+            try:
+                response = handler.send(request)
+            except RequestError:
+                raise
+            except Exception as e:
+                self.logger.error(
+                    f'[{handler.RH_NAME}] Unexpected error: {error_to_str(e)}{bug_reports_message()}',
+                    is_error=False)
+                unexpected_errors.append(e)
+                continue
+
+            assert isinstance(response, Response)
+            return response
+
+        raise NoSupportingHandlers(unsupported_errors, unexpected_errors)
+
+
+_REQUEST_HANDLERS = {}
+
+
+def register(handler):
+    """Register a RequestHandler class"""
+    assert issubclass(handler, RequestHandler), f'{handler} must be a subclass of RequestHandler'
+    assert handler.RH_KEY not in _REQUEST_HANDLERS, f'RequestHandler {handler.RH_KEY} already registered'
+    _REQUEST_HANDLERS[handler.RH_KEY] = handler
+    return handler
+
+
+class Features(enum.Enum):
+    ALL_PROXY = enum.auto()
+    NO_PROXY = enum.auto()
+
+
+class RequestHandler(abc.ABC):
+
+    """Request Handler class
+
+    Request handlers are class that, given a Request,
+    process the request from start to finish and return a Response.
+
+    Concrete subclasses need to redefine the _send(request) method,
+    which handles the underlying request logic and returns a Response.
+
+    RH_NAME class variable may contain a display name for the RequestHandler.
+    By default, this is generated from the class name.
+
+    The concrete request handler MUST have "RH" as the suffix in the class name.
+
+    All exceptions raised by a RequestHandler should be an instance of RequestError.
+    Any other exception raised will be treated as a handler issue.
+
+    If a Request is not supported by the handler, an UnsupportedRequest
+    should be raised with a reason.
+
+    By default, some checks are done on the request in _validate() based on the following class variables:
+    - `_SUPPORTED_URL_SCHEMES`: a tuple of supported url schemes.
+        Any Request with an url scheme not in this list will raise an UnsupportedRequest.
+
+    - `_SUPPORTED_PROXY_SCHEMES`: a tuple of support proxy url schemes. Any Request that contains
+        a proxy url with an url scheme not in this list will raise an UnsupportedRequest.
+
+    - `_SUPPORTED_FEATURES`: a tuple of supported features, as defined in Features enum.
+    The above may be set to None to disable the checks.
+
+    Parameters:
+    @param logger: logger instance
+    @param headers: HTTP Headers to include when sending requests.
+    @param cookiejar: Cookiejar to use for requests.
+    @param timeout: Socket timeout to use when sending requests.
+    @param proxies: Proxies to use for sending requests.
+    @param source_address: Client-side IP address to bind to for requests.
+    @param verbose: Print debug request and traffic information to stdout.
+    @param prefer_system_certs: Whether to prefer system certificates over other means (e.g. certifi).
+    @param client_cert: SSL client certificate configuration.
+            dict with {client_certificate, client_certificate_key, client_certificate_password}
+    @param verify: Verify SSL certificates
+    @param legacy_ssl_support: Enable legacy SSL options such as legacy server connect and older cipher support.
+
+    Some configuration options may be available for individual Requests too. In this case,
+    either the Request configuration option takes precedence or they are merged.
+
+    Requests may have additional optional parameters defined as extensions.
+     RequestHandler subclasses may choose to support custom extensions.
+
+    The following extensions are defined for RequestHandler:
+    - `cookiejar`: Cookiejar to use for this request
+    - `timeout`: socket timeout to use for this request
+
+    Apart from the url protocol, proxies dict may contain the following keys:
+    - `all`: proxy to use for all protocols. Used as a fallback if no proxy is set for a specific protocol.
+    - `no`: comma seperated list of hostnames (optionally with port) to not use a proxy for.
+    Note: a RequestHandler may not support these, as defined in `_SUPPORTED_FEATURES`.
+
+    """
+
+    _SUPPORTED_URL_SCHEMES = ()
+    _SUPPORTED_PROXY_SCHEMES = ()
+    _SUPPORTED_FEATURES = ()
+
+    def __init__(
+        self, *,
+        logger,  # TODO(Grub4k): default logger
+        headers: HTTPHeaderDict = None,
+        cookiejar: CookieJar = None,
+        timeout: float | int | None = None,
+        proxies: dict = None,
+        source_address: str = None,
+        verbose: bool = False,
+        prefer_system_certs: bool = False,
+        client_cert: dict[str, str | None] = None,
+        verify: bool = True,
+        legacy_ssl_support: bool = False,
+        **_,
+    ):
+
+        self._logger = logger
+        self.headers = headers or {}
+        self.cookiejar = cookiejar if cookiejar is not None else CookieJar()
+        self.timeout = float(timeout or 20)
+        self.proxies = proxies or {}
+        self.source_address = source_address
+        self.verbose = verbose
+        self.prefer_system_certs = prefer_system_certs
+        self._client_cert = client_cert or {}
+        self.verify = verify
+        self.legacy_ssl_support = legacy_ssl_support
+        super().__init__()
+
+    def _make_sslcontext(self):
+        return make_ssl_context(
+            verify=self.verify,
+            legacy_support=self.legacy_ssl_support,
+            use_certifi=not self.prefer_system_certs,
+            **self._client_cert,
+        )
+
+    def _merge_headers(self, request_headers):
+        return HTTPHeaderDict(self.headers, request_headers)
+
+    def _check_url_scheme(self, request: Request):
+        scheme = urllib.parse.urlparse(request.url).scheme.lower()
+        if self._SUPPORTED_URL_SCHEMES is not None and scheme not in self._SUPPORTED_URL_SCHEMES:
+            raise UnsupportedRequest(f'Unsupported url scheme: "{scheme}"')
+        return scheme  # for further processing
+
+    def _check_proxies(self, proxies):
+        for proxy_key, proxy_url in proxies.items():
+            if proxy_url is None:
+                continue
+            if proxy_key == 'no':
+                if self._SUPPORTED_FEATURES is not None and Features.NO_PROXY not in self._SUPPORTED_FEATURES:
+                    raise UnsupportedRequest('"no" proxy is not supported')
+                continue
+            if (
+                proxy_key == 'all'
+                and self._SUPPORTED_FEATURES is not None
+                and Features.ALL_PROXY not in self._SUPPORTED_FEATURES
+            ):
+                raise UnsupportedRequest('"all" proxy is not supported')
+
+            # Unlikely this handler will use this proxy, so ignore.
+            # This is to allow a case where a proxy may be set for a protocol
+            # for one handler in which such protocol (and proxy) is not supported by another handler.
+            if self._SUPPORTED_URL_SCHEMES is not None and proxy_key not in (*self._SUPPORTED_URL_SCHEMES, 'all'):
+                continue
+
+            if self._SUPPORTED_PROXY_SCHEMES is None:
+                # Skip proxy scheme checks
+                continue
+
+            # Scheme-less proxies are not supported
+            if urllib.request._parse_proxy(proxy_url)[0] is None:
+                raise UnsupportedRequest(f'Proxy "{proxy_url}" missing scheme')
+
+            scheme = urllib.parse.urlparse(proxy_url).scheme.lower()
+            if scheme not in self._SUPPORTED_PROXY_SCHEMES:
+                raise UnsupportedRequest(f'Unsupported proxy type: "{scheme}"')
+
+    def _check_cookiejar_extension(self, extensions):
+        if not extensions.get('cookiejar'):
+            return
+        if not isinstance(extensions['cookiejar'], CookieJar):
+            raise UnsupportedRequest('cookiejar is not a CookieJar')
+
+    def _check_timeout_extension(self, extensions):
+        if extensions.get('timeout') is None:
+            return
+        if not isinstance(extensions['timeout'], (float, int)):
+            raise UnsupportedRequest('timeout is not a float or int')
+
+    def _check_extensions(self, extensions):
+        self._check_cookiejar_extension(extensions)
+        self._check_timeout_extension(extensions)
+
+    def _validate(self, request):
+        self._check_url_scheme(request)
+        self._check_proxies(request.proxies or self.proxies)
+        self._check_extensions(request.extensions)
+
+    @wrap_request_errors
+    def validate(self, request: Request):
+        if not isinstance(request, Request):
+            raise TypeError('Expected an instance of Request')
+        self._validate(request)
+
+    @wrap_request_errors
+    def send(self, request: Request) -> Response:
+        if not isinstance(request, Request):
+            raise TypeError('Expected an instance of Request')
+        return self._send(request)
+
+    @abc.abstractmethod
+    def _send(self, request: Request):
+        """Handle a request from start to finish. Redefine in subclasses."""
+
+    def close(self):
+        pass
+
+    @classproperty
+    def RH_NAME(cls):
+        return cls.__name__[:-2]
+
+    @classproperty
+    def RH_KEY(cls):
+        assert cls.__name__.endswith('RH'), 'RequestHandler class names must end with "RH"'
+        return cls.__name__[:-2]
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *args):
+        self.close()
+
+
+class Request:
+    """
+    Represents a request to be made.
+    Partially backwards-compatible with urllib.request.Request.
+
+    @param url: url to send. Will be sanitized.
+    @param data: payload data to send. Must be bytes, iterable of bytes, a file-like object or None
+    @param headers: headers to send.
+    @param proxies: proxy dict mapping of proto:proxy to use for the request and any redirects.
+    @param query: URL query parameters to update the url with.
+    @param method: HTTP method to use. If no method specified, will use POST if payload data is present else GET
+    @param extensions: Dictionary of Request extensions to add, as supported by handlers.
+    """
+
+    def __init__(
+            self,
+            url: str,
+            data: RequestData = None,
+            headers: typing.Mapping = None,
+            proxies: dict = None,
+            query: dict = None,
+            method: str = None,
+            extensions: dict = None
+    ):
+
+        self._headers = HTTPHeaderDict()
+        self._data = None
+
+        if query:
+            url = update_url_query(url, query)
+
+        self.url = url
+        self.method = method
+        if headers:
+            self.headers = headers
+        self.data = data  # note: must be done after setting headers
+        self.proxies = proxies or {}
+        self.extensions = extensions or {}
+
+    @property
+    def url(self):
+        return self._url
+
+    @url.setter
+    def url(self, url):
+        if not isinstance(url, str):
+            raise TypeError('url must be a string')
+        elif url.startswith('//'):
+            url = 'http:' + url
+        self._url = escape_url(url)
+
+    @property
+    def method(self):
+        return self._method or ('POST' if self.data is not None else 'GET')
+
+    @method.setter
+    def method(self, method):
+        if method is None:
+            self._method = None
+        elif isinstance(method, str):
+            self._method = method.upper()
+        else:
+            raise TypeError('method must be a string')
+
+    @property
+    def data(self):
+        return self._data
+
+    @data.setter
+    def data(self, data: RequestData):
+        # Try catch some common mistakes
+        if data is not None and (
+            not isinstance(data, (bytes, io.IOBase, Iterable)) or isinstance(data, (str, Mapping))
+        ):
+            raise TypeError('data must be bytes, iterable of bytes, or a file-like object')
+
+        if data == self._data and self._data is None:
+            self.headers.pop('Content-Length', None)
+
+        # https://docs.python.org/3/library/urllib.request.html#urllib.request.Request.data
+        if data != self._data:
+            if self._data is not None:
+                self.headers.pop('Content-Length', None)
+            self._data = data
+
+        if self._data is None:
+            self.headers.pop('Content-Type', None)
+
+        if 'Content-Type' not in self.headers and self._data is not None:
+            self.headers['Content-Type'] = 'application/x-www-form-urlencoded'
+
+    @property
+    def headers(self) -> HTTPHeaderDict:
+        return self._headers
+
+    @headers.setter
+    def headers(self, new_headers: Mapping):
+        """Replaces headers of the request. If not a CaseInsensitiveDict, it will be converted to one."""
+        if isinstance(new_headers, HTTPHeaderDict):
+            self._headers = new_headers
+        elif isinstance(new_headers, Mapping):
+            self._headers = HTTPHeaderDict(new_headers)
+        else:
+            raise TypeError('headers must be a mapping')
+
+    def update(self, url=None, data=None, headers=None, query=None):
+        self.data = data or self.data
+        self.headers.update(headers or {})
+        self.url = update_url_query(url or self.url, query or {})
+
+    def copy(self):
+        return self.__class__(
+            url=self.url,
+            headers=copy.deepcopy(self.headers),
+            proxies=copy.deepcopy(self.proxies),
+            data=self._data,
+            extensions=copy.copy(self.extensions),
+            method=self._method,
+        )
+
+
+HEADRequest = functools.partial(Request, method='HEAD')
+PUTRequest = functools.partial(Request, method='PUT')
+
+
+class Response(io.IOBase):
+    """
+    Base class for HTTP response adapters.
+
+    By default, it provides a basic wrapper for a file-like response object.
+
+    Interface partially backwards-compatible with addinfourl and http.client.HTTPResponse.
+
+    @param fp: Original, file-like, response.
+    @param url: URL that this is a response of.
+    @param headers: response headers.
+    @param status: Response HTTP status code. Default is 200 OK.
+    @param reason: HTTP status reason. Will use built-in reasons based on status code if not provided.
+    """
+
+    def __init__(
+            self,
+            fp: typing.IO,
+            url: str,
+            headers: Mapping[str, str],
+            status: int = 200,
+            reason: str = None):
+
+        self.fp = fp
+        self.headers = Message()
+        for name, value in headers.items():
+            self.headers.add_header(name, value)
+        self.status = status
+        self.url = url
+        try:
+            self.reason = reason or HTTPStatus(status).phrase
+        except ValueError:
+            self.reason = None
+
+    def readable(self):
+        return self.fp.readable()
+
+    def read(self, amt: int = None) -> bytes:
+        # Expected errors raised here should be of type RequestError or subclasses.
+        # Subclasses should redefine this method with more precise error handling.
+        try:
+            return self.fp.read(amt)
+        except Exception as e:
+            raise TransportError(cause=e) from e
+
+    def close(self):
+        self.fp.close()
+        return super().close()
+
+    def get_header(self, name, default=None):
+        """Get header for name.
+        If there are multiple matching headers, return all seperated by comma."""
+        headers = self.headers.get_all(name)
+        if not headers:
+            return default
+        if name.title() == 'Set-Cookie':
+            # Special case, only get the first one
+            # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.3-4.1
+            return headers[0]
+        return ', '.join(headers)
+
+    # The following methods are for compatability reasons and are deprecated
+    @property
+    def code(self):
+        return self.status
+
+    def getcode(self):
+        return self.status
+
+    def geturl(self):
+        return self.url
+
+    def info(self):
+        return self.headers
+
+    def getheader(self, name, default=None):
+        return self.get_header(name, default)
diff --git a/yt_dlp/networking/exceptions.py b/yt_dlp/networking/exceptions.py
index 89b484a220..6fe8afb925 100644
--- a/yt_dlp/networking/exceptions.py
+++ b/yt_dlp/networking/exceptions.py
@@ -1,9 +1,197 @@
-import http.client
-import socket
-import ssl
+from __future__ import annotations
+
+import typing
 import urllib.error
 
-network_exceptions = [urllib.error.URLError, http.client.HTTPException, socket.error]
-if hasattr(ssl, 'CertificateError'):
-    network_exceptions.append(ssl.CertificateError)
-network_exceptions = tuple(network_exceptions)
+from ..utils import YoutubeDLError
+
+if typing.TYPE_CHECKING:
+    from .common import RequestHandler, Response
+
+
+class RequestError(YoutubeDLError):
+    def __init__(
+        self,
+        msg: str | None = None,
+        cause: Exception | str | None = None,
+        handler: RequestHandler = None
+    ):
+        self.handler = handler
+        self.cause = cause
+        if not msg and cause:
+            msg = str(cause)
+        super().__init__(msg)
+
+
+class UnsupportedRequest(RequestError):
+    """raised when a handler cannot handle a request"""
+    pass
+
+
+class NoSupportingHandlers(RequestError):
+    """raised when no handlers can support a request for various reasons"""
+
+    def __init__(self, unsupported_errors: list[UnsupportedRequest], unexpected_errors: list[Exception]):
+        self.unsupported_errors = unsupported_errors or []
+        self.unexpected_errors = unexpected_errors or []
+
+        # Print a quick summary of the errors
+        err_handler_map = {}
+        for err in unsupported_errors:
+            err_handler_map.setdefault(err.msg, []).append(err.handler.RH_NAME)
+
+        reason_str = ', '.join([f'{msg} ({", ".join(handlers)})' for msg, handlers in err_handler_map.items()])
+        if unexpected_errors:
+            reason_str = ' + '.join(filter(None, [reason_str, f'{len(unexpected_errors)} unexpected error(s)']))
+
+        err_str = 'Unable to handle request'
+        if reason_str:
+            err_str += f': {reason_str}'
+
+        super().__init__(msg=err_str)
+
+
+class TransportError(RequestError):
+    """Network related errors"""
+
+
+class HTTPError(RequestError):
+    def __init__(self, response: Response, redirect_loop=False):
+        self.response = response
+        self.status = response.status
+        self.reason = response.reason
+        self.redirect_loop = redirect_loop
+        msg = f'HTTP Error {response.status}: {response.reason}'
+        if redirect_loop:
+            msg += ' (redirect loop detected)'
+
+        super().__init__(msg=msg)
+
+    def close(self):
+        self.response.close()
+
+    def __repr__(self):
+        return f'<HTTPError {self.status}: {self.reason}>'
+
+
+class IncompleteRead(TransportError):
+    def __init__(self, partial, expected=None, **kwargs):
+        self.partial = partial
+        self.expected = expected
+        msg = f'{len(partial)} bytes read'
+        if expected is not None:
+            msg += f', {expected} more expected'
+
+        super().__init__(msg=msg, **kwargs)
+
+    def __repr__(self):
+        return f'<IncompleteRead: {self.msg}>'
+
+
+class SSLError(TransportError):
+    pass
+
+
+class CertificateVerifyError(SSLError):
+    """Raised when certificate validated has failed"""
+    pass
+
+
+class ProxyError(TransportError):
+    pass
+
+
+class _CompatHTTPError(urllib.error.HTTPError, HTTPError):
+    """
+    Provides backwards compatibility with urllib.error.HTTPError.
+    Do not use this class directly, use HTTPError instead.
+    """
+
+    def __init__(self, http_error: HTTPError):
+        super().__init__(
+            url=http_error.response.url,
+            code=http_error.status,
+            msg=http_error.msg,
+            hdrs=http_error.response.headers,
+            fp=http_error.response
+        )
+        self._closer.file = None  # Disable auto close
+        self._http_error = http_error
+        HTTPError.__init__(self, http_error.response, redirect_loop=http_error.redirect_loop)
+
+    @property
+    def status(self):
+        return self._http_error.status
+
+    @status.setter
+    def status(self, value):
+        return
+
+    @property
+    def reason(self):
+        return self._http_error.reason
+
+    @reason.setter
+    def reason(self, value):
+        return
+
+    @property
+    def headers(self):
+        return self._http_error.response.headers
+
+    @headers.setter
+    def headers(self, value):
+        return
+
+    def info(self):
+        return self.response.headers
+
+    def getcode(self):
+        return self.status
+
+    def geturl(self):
+        return self.response.url
+
+    @property
+    def code(self):
+        return self.status
+
+    @code.setter
+    def code(self, value):
+        return
+
+    @property
+    def url(self):
+        return self.response.url
+
+    @url.setter
+    def url(self, value):
+        return
+
+    @property
+    def hdrs(self):
+        return self.response.headers
+
+    @hdrs.setter
+    def hdrs(self, value):
+        return
+
+    @property
+    def filename(self):
+        return self.response.url
+
+    @filename.setter
+    def filename(self, value):
+        return
+
+    def __getattr__(self, name):
+        return super().__getattr__(name)
+
+    def __str__(self):
+        return str(self._http_error)
+
+    def __repr__(self):
+        return repr(self._http_error)
+
+
+network_exceptions = (HTTPError, TransportError)
diff --git a/yt_dlp/utils/_deprecated.py b/yt_dlp/utils/_deprecated.py
index ca0fb1614d..e55d42354a 100644
--- a/yt_dlp/utils/_deprecated.py
+++ b/yt_dlp/utils/_deprecated.py
@@ -10,16 +10,16 @@
 
 
 from ._utils import preferredencoding
+from ..networking._urllib import HTTPHandler
 
 # isort: split
+from .networking import random_user_agent, std_headers  # noqa: F401
 from ..networking._urllib import PUTRequest  # noqa: F401
 from ..networking._urllib import SUPPORTED_ENCODINGS, HEADRequest  # noqa: F401
-from ..networking._urllib import HTTPHandler as YoutubeDLHandler  # noqa: F401
 from ..networking._urllib import ProxyHandler as PerRequestProxyHandler  # noqa: F401
 from ..networking._urllib import RedirectHandler as YoutubeDLRedirectHandler  # noqa: F401
 from ..networking._urllib import make_socks_conn_class, update_Request  # noqa: F401
 from ..networking.exceptions import network_exceptions  # noqa: F401
-from .networking import random_user_agent, std_headers  # noqa: F401
 
 
 def encodeFilename(s, for_subprocess=False):
@@ -47,3 +47,12 @@ def decodeOption(optval):
 
 def error_to_compat_str(err):
     return str(err)
+
+
+class YoutubeDLHandler(HTTPHandler):
+    def __init__(self, params, *args, **kwargs):
+        self._params = params
+        super().__init__(*args, **kwargs)
+
+
+YoutubeDLHTTPSHandler = YoutubeDLHandler
diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py
index d5704cadca..d0e3287166 100644
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@@ -15,8 +15,6 @@
 import hmac
 import html.entities
 import html.parser
-import http.client
-import http.cookiejar
 import inspect
 import io
 import itertools
@@ -897,6 +895,7 @@ def formatSeconds(secs, delim=':', msec=False):
 
 
 def make_HTTPS_handler(params, **kwargs):
+    from ._deprecated import YoutubeDLHTTPSHandler
     from ..networking._helper import make_ssl_context
     return YoutubeDLHTTPSHandler(params, context=make_ssl_context(
         verify=not params.get('nocheckcertificate'),
@@ -1140,38 +1139,6 @@ class XAttrUnavailableError(YoutubeDLError):
     pass
 
 
-class YoutubeDLHTTPSHandler(urllib.request.HTTPSHandler):
-    def __init__(self, params, https_conn_class=None, *args, **kwargs):
-        urllib.request.HTTPSHandler.__init__(self, *args, **kwargs)
-        self._https_conn_class = https_conn_class or http.client.HTTPSConnection
-        self._params = params
-
-    def https_open(self, req):
-        kwargs = {}
-        conn_class = self._https_conn_class
-
-        if hasattr(self, '_context'):  # python > 2.6
-            kwargs['context'] = self._context
-        if hasattr(self, '_check_hostname'):  # python 3.x
-            kwargs['check_hostname'] = self._check_hostname
-
-        socks_proxy = req.headers.get('Ytdl-socks-proxy')
-        if socks_proxy:
-            from ..networking._urllib import make_socks_conn_class
-            conn_class = make_socks_conn_class(conn_class, socks_proxy)
-            del req.headers['Ytdl-socks-proxy']
-
-        from ..networking._urllib import _create_http_connection
-        try:
-            return self.do_open(
-                functools.partial(_create_http_connection, self, conn_class, True), req, **kwargs)
-        except urllib.error.URLError as e:
-            if (isinstance(e.reason, ssl.SSLError)
-                    and getattr(e.reason, 'reason', None) == 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
-                raise YoutubeDLError('SSLV3_ALERT_HANDSHAKE_FAILURE: Try using --legacy-server-connect')
-            raise
-
-
 def is_path_like(f):
     return isinstance(f, (str, bytes, os.PathLike))
 
diff --git a/yt_dlp/utils/networking.py b/yt_dlp/utils/networking.py
index 95b54fabef..ac355ddc85 100644
--- a/yt_dlp/utils/networking.py
+++ b/yt_dlp/utils/networking.py
@@ -1,4 +1,9 @@
+import collections
 import random
+import urllib.parse
+import urllib.request
+
+from ._utils import remove_start
 
 
 def random_user_agent():
@@ -46,15 +51,67 @@ def random_user_agent():
     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
 
 
-std_headers = {
+class HTTPHeaderDict(collections.UserDict, dict):
+    """
+    Store and access keys case-insensitively.
+    The constructor can take multiple dicts, in which keys in the latter are prioritised.
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__()
+        for dct in args:
+            if dct is not None:
+                self.update(dct)
+        self.update(kwargs)
+
+    def __setitem__(self, key, value):
+        super().__setitem__(key.title(), str(value))
+
+    def __getitem__(self, key):
+        return super().__getitem__(key.title())
+
+    def __delitem__(self, key):
+        super().__delitem__(key.title())
+
+    def __contains__(self, key):
+        return super().__contains__(key.title() if isinstance(key, str) else key)
+
+
+std_headers = HTTPHeaderDict({
     'User-Agent': random_user_agent(),
     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
     'Accept-Language': 'en-us,en;q=0.5',
     'Sec-Fetch-Mode': 'navigate',
-}
+})
 
 
-def clean_headers(headers):
-    if 'Youtubedl-no-compression' in headers:  # compat
-        del headers['Youtubedl-no-compression']
+def clean_proxies(proxies: dict, headers: HTTPHeaderDict):
+    req_proxy = headers.pop('Ytdl-Request-Proxy', None)
+    if req_proxy:
+        proxies.clear()  # XXX: compat: Ytdl-Request-Proxy takes preference over everything, including NO_PROXY
+        proxies['all'] = req_proxy
+    for proxy_key, proxy_url in proxies.items():
+        if proxy_url == '__noproxy__':
+            proxies[proxy_key] = None
+            continue
+        if proxy_key == 'no':  # special case
+            continue
+        if proxy_url is not None:
+            # Ensure proxies without a scheme are http.
+            proxy_scheme = urllib.request._parse_proxy(proxy_url)[0]
+            if proxy_scheme is None:
+                proxies[proxy_key] = 'http://' + remove_start(proxy_url, '//')
+
+            replace_scheme = {
+                'socks5': 'socks5h',  # compat: socks5 was treated as socks5h
+                'socks': 'socks4'  # compat: non-standard
+            }
+            if proxy_scheme in replace_scheme:
+                proxies[proxy_key] = urllib.parse.urlunparse(
+                    urllib.parse.urlparse(proxy_url)._replace(scheme=replace_scheme[proxy_scheme]))
+
+
+def clean_headers(headers: HTTPHeaderDict):
+    if 'Youtubedl-No-Compression' in headers:  # compat
+        del headers['Youtubedl-No-Compression']
         headers['Accept-Encoding'] = 'identity'

From 3d2623a898196640f7cc0fc8b70118ff19e6925d Mon Sep 17 00:00:00 2001
From: coletdjnz <coletdjnz@protonmail.com>
Date: Sun, 9 Jul 2023 13:23:02 +0530
Subject: [PATCH 046/218] [compat, networking] Deprecate old functions (#2861)

Authored by: coletdjnz, pukkandan
---
 test/test_download.py                   |  2 +-
 test/test_networking.py                 | 27 ++++++-----
 test/test_networking_utils.py           | 64 ++++++++++++++++++++-----
 yt_dlp/YoutubeDL.py                     | 12 +++--
 yt_dlp/__init__.py                      |  2 +-
 yt_dlp/compat/_deprecated.py            |  1 -
 yt_dlp/compat/_legacy.py                |  1 +
 yt_dlp/downloader/external.py           |  7 ++-
 yt_dlp/downloader/f4m.py                |  8 ++--
 yt_dlp/downloader/fragment.py           | 19 +++-----
 yt_dlp/downloader/hls.py                |  2 +-
 yt_dlp/downloader/http.py               | 41 ++++++++--------
 yt_dlp/downloader/ism.py                |  4 +-
 yt_dlp/downloader/niconico.py           | 11 ++---
 yt_dlp/downloader/youtube_live_chat.py  | 10 ++--
 yt_dlp/extractor/abematv.py             |  3 +-
 yt_dlp/extractor/adn.py                 | 16 +++----
 yt_dlp/extractor/adobepass.py           | 20 ++++----
 yt_dlp/extractor/ant1newsgr.py          |  4 +-
 yt_dlp/extractor/archiveorg.py          | 12 ++---
 yt_dlp/extractor/atresplayer.py         |  6 +--
 yt_dlp/extractor/bbc.py                 | 14 +++---
 yt_dlp/extractor/bilibili.py            |  4 +-
 yt_dlp/extractor/bitchute.py            |  2 +-
 yt_dlp/extractor/bravotv.py             |  4 +-
 yt_dlp/extractor/brightcove.py          |  6 +--
 yt_dlp/extractor/canalplus.py           |  2 +-
 yt_dlp/extractor/cbsnews.py             |  2 +-
 yt_dlp/extractor/ceskatelevize.py       | 30 ++++++------
 yt_dlp/extractor/cinetecamilano.py      |  4 +-
 yt_dlp/extractor/ciscowebex.py          |  6 +--
 yt_dlp/extractor/common.py              | 41 +++++++++-------
 yt_dlp/extractor/crackle.py             |  4 +-
 yt_dlp/extractor/crunchyroll.py         |  4 +-
 yt_dlp/extractor/cultureunplugged.py    |  6 +--
 yt_dlp/extractor/dacast.py              |  4 +-
 yt_dlp/extractor/dailymotion.py         |  6 +--
 yt_dlp/extractor/discovery.py           |  6 +--
 yt_dlp/extractor/dplay.py               |  8 ++--
 yt_dlp/extractor/eagleplatform.py       |  6 +--
 yt_dlp/extractor/eitb.py                | 10 ++--
 yt_dlp/extractor/eporner.py             |  2 +-
 yt_dlp/extractor/facebook.py            | 14 +++---
 yt_dlp/extractor/fc2.py                 |  6 +--
 yt_dlp/extractor/filmon.py              | 14 +++---
 yt_dlp/extractor/fox.py                 | 10 ++--
 yt_dlp/extractor/foxsports.py           |  5 +-
 yt_dlp/extractor/fujitv.py              |  2 +-
 yt_dlp/extractor/funimation.py          |  6 +--
 yt_dlp/extractor/gdcvault.py            | 15 ++----
 yt_dlp/extractor/generic.py             |  8 ++--
 yt_dlp/extractor/globo.py               |  2 +-
 yt_dlp/extractor/googledrive.py         |  2 +-
 yt_dlp/extractor/hketv.py               |  2 +-
 yt_dlp/extractor/hotnewhiphop.py        | 14 ++----
 yt_dlp/extractor/hotstar.py             |  5 +-
 yt_dlp/extractor/hrti.py                | 10 ++--
 yt_dlp/extractor/ign.py                 | 17 +++----
 yt_dlp/extractor/imggaming.py           |  6 +--
 yt_dlp/extractor/instagram.py           |  6 +--
 yt_dlp/extractor/iprima.py              |  4 +-
 yt_dlp/extractor/kakao.py               |  6 +--
 yt_dlp/extractor/kick.py                |  3 +-
 yt_dlp/extractor/kuwo.py                |  2 +-
 yt_dlp/extractor/la7.py                 |  9 +---
 yt_dlp/extractor/lbry.py                |  4 +-
 yt_dlp/extractor/lecturio.py            |  2 +-
 yt_dlp/extractor/lego.py                |  4 +-
 yt_dlp/extractor/limelight.py           |  6 +--
 yt_dlp/extractor/linuxacademy.py        | 15 +++---
 yt_dlp/extractor/mediasite.py           |  2 +-
 yt_dlp/extractor/megatvcom.py           |  6 +--
 yt_dlp/extractor/mgtv.py                |  6 +--
 yt_dlp/extractor/minds.py               |  2 +-
 yt_dlp/extractor/miomio.py              | 10 ++--
 yt_dlp/extractor/mtv.py                 | 11 ++---
 yt_dlp/extractor/nbc.py                 |  2 +-
 yt_dlp/extractor/nebula.py              |  4 +-
 yt_dlp/extractor/neteasemusic.py        |  6 +--
 yt_dlp/extractor/niconico.py            | 16 +++----
 yt_dlp/extractor/njpwworld.py           |  2 +-
 yt_dlp/extractor/nosvideo.py            |  6 +--
 yt_dlp/extractor/nowness.py             |  8 ++--
 yt_dlp/extractor/nrk.py                 |  5 +-
 yt_dlp/extractor/odkmedia.py            |  6 +--
 yt_dlp/extractor/odnoklassniki.py       |  4 +-
 yt_dlp/extractor/orf.py                 |  2 +-
 yt_dlp/extractor/owncloud.py            |  2 +-
 yt_dlp/extractor/packtpub.py            | 11 ++---
 yt_dlp/extractor/patreon.py             |  6 +--
 yt_dlp/extractor/peloton.py             | 12 ++---
 yt_dlp/extractor/piapro.py              |  2 +-
 yt_dlp/extractor/pladform.py            |  2 +-
 yt_dlp/extractor/platzi.py              |  2 +-
 yt_dlp/extractor/playplustv.py          | 14 ++----
 yt_dlp/extractor/pornhub.py             | 11 +++--
 yt_dlp/extractor/puhutv.py              |  8 ++--
 yt_dlp/extractor/radiko.py              |  2 +-
 yt_dlp/extractor/radiocanada.py         |  6 +--
 yt_dlp/extractor/rcs.py                 |  2 +-
 yt_dlp/extractor/rcti.py                |  4 +-
 yt_dlp/extractor/recurbate.py           |  5 +-
 yt_dlp/extractor/redbulltv.py           |  6 +--
 yt_dlp/extractor/redgifs.py             |  4 +-
 yt_dlp/extractor/regiotv.py             | 10 ++--
 yt_dlp/extractor/rokfin.py              |  4 +-
 yt_dlp/extractor/roosterteeth.py        | 10 ++--
 yt_dlp/extractor/rozhlas.py             |  4 +-
 yt_dlp/extractor/rte.py                 |  6 +--
 yt_dlp/extractor/rts.py                 |  4 +-
 yt_dlp/extractor/rumble.py              |  4 +-
 yt_dlp/extractor/safari.py              |  6 +--
 yt_dlp/extractor/sbs.py                 |  2 +-
 yt_dlp/extractor/sevenplus.py           | 10 ++--
 yt_dlp/extractor/shahid.py              |  8 ++--
 yt_dlp/extractor/sina.py                | 12 ++---
 yt_dlp/extractor/sixplay.py             |  2 +-
 yt_dlp/extractor/slideslive.py          |  2 +-
 yt_dlp/extractor/sonyliv.py             | 10 ++--
 yt_dlp/extractor/soundcloud.py          | 17 +++----
 yt_dlp/extractor/teachable.py           |  2 +-
 yt_dlp/extractor/telemundo.py           |  9 ++--
 yt_dlp/extractor/tennistv.py            |  2 +-
 yt_dlp/extractor/tenplay.py             |  9 ++--
 yt_dlp/extractor/tfo.py                 |  8 +---
 yt_dlp/extractor/theplatform.py         |  4 +-
 yt_dlp/extractor/thisoldhouse.py        |  4 +-
 yt_dlp/extractor/threeqsdn.py           |  4 +-
 yt_dlp/extractor/tiktok.py              |  4 +-
 yt_dlp/extractor/toutv.py               |  6 +--
 yt_dlp/extractor/triller.py             |  4 +-
 yt_dlp/extractor/trueid.py              |  6 +--
 yt_dlp/extractor/tubetugraz.py          |  8 ++--
 yt_dlp/extractor/tubitv.py              |  8 ++--
 yt_dlp/extractor/tumblr.py              |  2 +-
 yt_dlp/extractor/tunein.py              |  4 +-
 yt_dlp/extractor/tv2.py                 | 10 ++--
 yt_dlp/extractor/tvp.py                 |  4 +-
 yt_dlp/extractor/tvplay.py              | 10 ++--
 yt_dlp/extractor/tvplayer.py            | 10 ++--
 yt_dlp/extractor/twitcasting.py         |  4 +-
 yt_dlp/extractor/twitch.py              |  2 +-
 yt_dlp/extractor/twitter.py             |  2 +-
 yt_dlp/extractor/udemy.py               | 15 +++---
 yt_dlp/extractor/vevo.py                | 10 ++--
 yt_dlp/extractor/vice.py                | 10 ++--
 yt_dlp/extractor/videocampus_sachsen.py |  4 +-
 yt_dlp/extractor/vidio.py               |  2 +-
 yt_dlp/extractor/vidlii.py              |  2 +-
 yt_dlp/extractor/viewlift.py            |  6 +--
 yt_dlp/extractor/viidea.py              |  6 +--
 yt_dlp/extractor/vimeo.py               | 31 +++++-------
 yt_dlp/extractor/vk.py                  |  2 +-
 yt_dlp/extractor/vocaroo.py             |  6 +--
 yt_dlp/extractor/vodlocker.py           | 12 ++---
 yt_dlp/extractor/voot.py                |  4 +-
 yt_dlp/extractor/vrt.py                 |  4 +-
 yt_dlp/extractor/vrv.py                 |  7 +--
 yt_dlp/extractor/weibo.py               |  2 +-
 yt_dlp/extractor/weverse.py             |  8 ++--
 yt_dlp/extractor/wistia.py              |  6 +--
 yt_dlp/extractor/wykop.py               |  4 +-
 yt_dlp/extractor/xhamster.py            |  2 +-
 yt_dlp/extractor/xtube.py               |  4 +-
 yt_dlp/extractor/yesjapan.py            |  9 ++--
 yt_dlp/extractor/youtube.py             | 15 +++---
 yt_dlp/extractor/zaiko.py               |  2 +-
 yt_dlp/extractor/zattoo.py              |  5 +-
 yt_dlp/extractor/zype.py                |  6 +--
 yt_dlp/networking/common.py             |  6 +++
 yt_dlp/networking/exceptions.py         | 22 ++++++++-
 yt_dlp/postprocessor/common.py          | 11 ++---
 yt_dlp/update.py                        |  9 ++--
 yt_dlp/utils/_deprecated.py             | 19 --------
 yt_dlp/utils/_legacy.py                 | 62 +++++++++++++++++++++++-
 yt_dlp/utils/_utils.py                  | 47 +-----------------
 176 files changed, 707 insertions(+), 729 deletions(-)

diff --git a/test/test_download.py b/test/test_download.py
index fd7752cddf..6f00a4deda 100755
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -160,7 +160,7 @@ def try_rm_tcs_files(tcs=None):
                         force_generic_extractor=params.get('force_generic_extractor', False))
                 except (DownloadError, ExtractorError) as err:
                     # Check if the exception is not a network related one
-                    if not isinstance(err.exc_info[1], (TransportError, UnavailableVideoError)) or (isinstance(err.exc_info[1], HTTPError) and err.exc_info[1].code == 503):
+                    if not isinstance(err.exc_info[1], (TransportError, UnavailableVideoError)) or (isinstance(err.exc_info[1], HTTPError) and err.exc_info[1].status == 503):
                         err.msg = f'{getattr(err, "msg", err)} ({tname})'
                         raise
 
diff --git a/test/test_networking.py b/test/test_networking.py
index 147a4ff491..b60ed283be 100644
--- a/test/test_networking.py
+++ b/test/test_networking.py
@@ -1057,14 +1057,15 @@ def test_compat_request(self):
             urllib_req = urllib.request.Request('http://foo.bar', data=b'test', method='PUT', headers={'X-Test': '1'})
             urllib_req.add_unredirected_header('Cookie', 'bob=bob')
             urllib_req.timeout = 2
-
-            req = ydl.urlopen(urllib_req).request
-            assert req.url == urllib_req.get_full_url()
-            assert req.data == urllib_req.data
-            assert req.method == urllib_req.get_method()
-            assert 'X-Test' in req.headers
-            assert 'Cookie' in req.headers
-            assert req.extensions.get('timeout') == 2
+            with warnings.catch_warnings():
+                warnings.simplefilter('ignore', category=DeprecationWarning)
+                req = ydl.urlopen(urllib_req).request
+                assert req.url == urllib_req.get_full_url()
+                assert req.data == urllib_req.data
+                assert req.method == urllib_req.get_method()
+                assert 'X-Test' in req.headers
+                assert 'Cookie' in req.headers
+                assert req.extensions.get('timeout') == 2
 
             with pytest.raises(AssertionError):
                 ydl.urlopen(None)
@@ -1362,7 +1363,9 @@ def test_get_header(self):
 
     def test_compat(self):
         res = Response(io.BytesIO(b''), url='test://', status=404, headers={'test': 'test'})
-        assert res.code == res.getcode() == res.status
-        assert res.geturl() == res.url
-        assert res.info() is res.headers
-        assert res.getheader('test') == res.get_header('test')
+        with warnings.catch_warnings():
+            warnings.simplefilter('ignore', category=DeprecationWarning)
+            assert res.code == res.getcode() == res.status
+            assert res.geturl() == res.url
+            assert res.info() is res.headers
+            assert res.getheader('test') == res.get_header('test')
diff --git a/test/test_networking_utils.py b/test/test_networking_utils.py
index f9f876af3d..ef46f79ed0 100644
--- a/test/test_networking_utils.py
+++ b/test/test_networking_utils.py
@@ -8,11 +8,13 @@
 
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
+import contextlib
 import io
 import platform
 import random
 import ssl
 import urllib.error
+import warnings
 
 from yt_dlp.cookies import YoutubeDLCookieJar
 from yt_dlp.dependencies import certifi
@@ -202,20 +204,58 @@ def test_compat_http_error(self):
         assert isinstance(error, HTTPError)
         assert isinstance(error, urllib.error.HTTPError)
 
-        assert error.code == 403
-        assert error.getcode() == 403
-        assert error.hdrs is error.response.headers
-        assert error.info() is error.response.headers
-        assert error.headers is error.response.headers
-        assert error.filename == error.response.url
-        assert error.url == error.response.url
-        assert error.geturl() == error.response.url
+        @contextlib.contextmanager
+        def raises_deprecation_warning():
+            with warnings.catch_warnings(record=True) as w:
+                warnings.simplefilter('always')
+                yield
+
+                if len(w) == 0:
+                    pytest.fail('Did not raise DeprecationWarning')
+                if len(w) > 1:
+                    pytest.fail(f'Raised multiple warnings: {w}')
+
+                if not issubclass(w[-1].category, DeprecationWarning):
+                    pytest.fail(f'Expected DeprecationWarning, got {w[-1].category}')
+                w.clear()
+
+        with raises_deprecation_warning():
+            assert error.code == 403
+
+        with raises_deprecation_warning():
+            assert error.getcode() == 403
+
+        with raises_deprecation_warning():
+            assert error.hdrs is error.response.headers
+
+        with raises_deprecation_warning():
+            assert error.info() is error.response.headers
+
+        with raises_deprecation_warning():
+            assert error.headers is error.response.headers
+
+        with raises_deprecation_warning():
+            assert error.filename == error.response.url
+
+        with raises_deprecation_warning():
+            assert error.url == error.response.url
+
+        with raises_deprecation_warning():
+            assert error.geturl() == error.response.url
 
         # Passthrough file operations
-        assert error.read() == b'test'
-        assert not error.closed
-        # Technically Response operations are also passed through, which should not be used.
-        assert error.get_header('test') == 'test'
+        with raises_deprecation_warning():
+            assert error.read() == b'test'
+
+        with raises_deprecation_warning():
+            assert not error.closed
+
+        with raises_deprecation_warning():
+            # Technically Response operations are also passed through, which should not be used.
+            assert error.get_header('test') == 'test'
+
+        # Should not raise a warning
+        error.close()
 
     @pytest.mark.skipif(
         platform.python_implementation() == 'PyPy', reason='garbage collector works differently in pypy')
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index 29a18aef02..850eb8ae0a 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -33,7 +33,7 @@
 from .extractor.common import UnsupportedURLIE
 from .extractor.openload import PhantomJSwrapper
 from .minicurses import format_text
-from .networking import Request, RequestDirector
+from .networking import HEADRequest, Request, RequestDirector
 from .networking.common import _REQUEST_HANDLERS
 from .networking.exceptions import (
     HTTPError,
@@ -41,6 +41,7 @@
     RequestError,
     SSLError,
     _CompatHTTPError,
+    network_exceptions,
 )
 from .plugins import directories as plugin_directories
 from .postprocessor import _PLUGIN_CLASSES as plugin_pps
@@ -80,7 +81,6 @@
     ExtractorError,
     FormatSorter,
     GeoRestrictedError,
-    HEADRequest,
     ISO3166Utils,
     LazyList,
     MaxDownloadsReached,
@@ -122,7 +122,6 @@
     locked_file,
     make_archive_id,
     make_dir,
-    network_exceptions,
     number_of_digits,
     orderedSet,
     orderedSet_from_options,
@@ -135,7 +134,6 @@
     sanitize_filename,
     sanitize_path,
     sanitize_url,
-    std_headers,
     str_or_none,
     strftime_or_none,
     subtitles_filename,
@@ -158,6 +156,7 @@
     HTTPHeaderDict,
     clean_headers,
     clean_proxies,
+    std_headers,
 )
 from .version import CHANNEL, RELEASE_GIT_HEAD, VARIANT, __version__
 
@@ -4019,6 +4018,9 @@ def urlopen(self, req):
         if isinstance(req, str):
             req = Request(req)
         elif isinstance(req, urllib.request.Request):
+            self.deprecation_warning(
+                'Passing a urllib.request.Request object to YoutubeDL.urlopen() is deprecated. '
+                'Use yt_dlp.networking.common.Request instead.')
             req = urllib_req_to_req(req)
         assert isinstance(req, Request)
 
@@ -4242,7 +4244,7 @@ def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None
                     ret.append((thumb_filename, thumb_filename_final))
                     t['filepath'] = thumb_filename
                 except network_exceptions as err:
-                    if isinstance(err, urllib.error.HTTPError) and err.code == 404:
+                    if isinstance(err, HTTPError) and err.status == 404:
                         self.to_screen(f'[info] {thumb_display_id.title()} does not exist')
                     else:
                         self.report_warning(f'Unable to download {thumb_display_id}: {err}')
diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py
index b81277a572..991dbcda7e 100644
--- a/yt_dlp/__init__.py
+++ b/yt_dlp/__init__.py
@@ -57,11 +57,11 @@
     read_stdin,
     render_table,
     setproctitle,
-    std_headers,
     traverse_obj,
     variadic,
     write_string,
 )
+from .utils.networking import std_headers
 from .YoutubeDL import YoutubeDL
 
 _IN_CLI = False
diff --git a/yt_dlp/compat/_deprecated.py b/yt_dlp/compat/_deprecated.py
index 342f1f80d6..14d37b2367 100644
--- a/yt_dlp/compat/_deprecated.py
+++ b/yt_dlp/compat/_deprecated.py
@@ -8,7 +8,6 @@
 
 compat_b64decode = base64.b64decode
 
-compat_HTTPError = urllib.error.HTTPError
 compat_urlparse = urllib.parse
 compat_parse_qs = urllib.parse.parse_qs
 compat_urllib_parse_unquote = urllib.parse.unquote
diff --git a/yt_dlp/compat/_legacy.py b/yt_dlp/compat/_legacy.py
index 83bf869a80..912907a021 100644
--- a/yt_dlp/compat/_legacy.py
+++ b/yt_dlp/compat/_legacy.py
@@ -70,6 +70,7 @@ def compat_setenv(key, value, env=os.environ):
 compat_HTMLParser = compat_html_parser_HTMLParser = html.parser.HTMLParser
 compat_http_client = http.client
 compat_http_server = http.server
+compat_HTTPError = urllib.error.HTTPError
 compat_input = input
 compat_integer_types = (int, )
 compat_itertools_count = itertools.count
diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py
index d4045e58f9..e307502db1 100644
--- a/yt_dlp/downloader/external.py
+++ b/yt_dlp/downloader/external.py
@@ -10,6 +10,7 @@
 
 from .fragment import FragmentFD
 from ..compat import functools
+from ..networking import Request
 from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor
 from ..utils import (
     Popen,
@@ -25,7 +26,6 @@
     encodeFilename,
     find_available_port,
     remove_end,
-    sanitized_Request,
     traverse_obj,
 )
 
@@ -357,13 +357,12 @@ def aria2c_rpc(self, rpc_port, rpc_secret, method, params=()):
             'method': method,
             'params': [f'token:{rpc_secret}', *params],
         }).encode('utf-8')
-        request = sanitized_Request(
+        request = Request(
             f'http://localhost:{rpc_port}/jsonrpc',
             data=d, headers={
                 'Content-Type': 'application/json',
                 'Content-Length': f'{len(d)}',
-                'Ytdl-request-proxy': '__noproxy__',
-            })
+            }, proxies={'all': None})
         with self.ydl.urlopen(request) as r:
             resp = json.load(r)
         assert resp.get('id') == sanitycheck, 'Something went wrong with RPC server'
diff --git a/yt_dlp/downloader/f4m.py b/yt_dlp/downloader/f4m.py
index 306f92192f..28cbba0169 100644
--- a/yt_dlp/downloader/f4m.py
+++ b/yt_dlp/downloader/f4m.py
@@ -3,11 +3,11 @@
 import itertools
 import struct
 import time
-import urllib.error
 import urllib.parse
 
 from .fragment import FragmentFD
 from ..compat import compat_etree_fromstring
+from ..networking.exceptions import HTTPError
 from ..utils import fix_xml_ampersands, xpath_text
 
 
@@ -312,7 +312,7 @@ def real_download(self, filename, info_dict):
         self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)
 
         urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
-        man_url = urlh.geturl()
+        man_url = urlh.url
         # Some manifests may be malformed, e.g. prosiebensat1 generated manifests
         # (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244
         # and https://github.com/ytdl-org/youtube-dl/issues/7823)
@@ -407,8 +407,8 @@ def real_download(self, filename, info_dict):
                     if box_type == b'mdat':
                         self._append_fragment(ctx, box_data)
                         break
-            except urllib.error.HTTPError as err:
-                if live and (err.code == 404 or err.code == 410):
+            except HTTPError as err:
+                if live and (err.status == 404 or err.status == 410):
                     # We didn't keep up with the live window. Continue
                     # with the next available fragment.
                     msg = 'Fragment %d unavailable' % frag_i
diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py
index 0698153269..b4b680dae1 100644
--- a/yt_dlp/downloader/fragment.py
+++ b/yt_dlp/downloader/fragment.py
@@ -1,24 +1,19 @@
 import concurrent.futures
 import contextlib
-import http.client
 import json
 import math
 import os
 import struct
 import time
-import urllib.error
 
 from .common import FileDownloader
 from .http import HttpFD
 from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
 from ..compat import compat_os_name
-from ..utils import (
-    DownloadError,
-    RetryManager,
-    encodeFilename,
-    sanitized_Request,
-    traverse_obj,
-)
+from ..networking import Request
+from ..networking.exceptions import HTTPError, IncompleteRead
+from ..utils import DownloadError, RetryManager, encodeFilename, traverse_obj
+from ..utils.networking import HTTPHeaderDict
 
 
 class HttpQuietDownloader(HttpFD):
@@ -75,7 +70,7 @@ def report_skip_fragment(self, frag_index, err=None):
 
     def _prepare_url(self, info_dict, url):
         headers = info_dict.get('http_headers')
-        return sanitized_Request(url, None, headers) if headers else url
+        return Request(url, None, headers) if headers else url
 
     def _prepare_and_start_frag_download(self, ctx, info_dict):
         self._prepare_frag_download(ctx)
@@ -457,7 +452,7 @@ def download_fragment(fragment, ctx):
 
             frag_index = ctx['fragment_index'] = fragment['frag_index']
             ctx['last_error'] = None
-            headers = info_dict.get('http_headers', {}).copy()
+            headers = HTTPHeaderDict(info_dict.get('http_headers'))
             byte_range = fragment.get('byte_range')
             if byte_range:
                 headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1)
@@ -477,7 +472,7 @@ def error_callback(err, count, retries):
                     if not self._download_fragment(
                             ctx, fragment['url'], info_dict, headers, info_dict.get('request_data')):
                         return
-                except (urllib.error.HTTPError, http.client.IncompleteRead) as err:
+                except (HTTPError, IncompleteRead) as err:
                     retry.error = err
                     continue
                 except DownloadError:  # has own retry settings
diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py
index ab7d496d42..d4b3f03200 100644
--- a/yt_dlp/downloader/hls.py
+++ b/yt_dlp/downloader/hls.py
@@ -75,7 +75,7 @@ def real_download(self, filename, info_dict):
         self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
 
         urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
-        man_url = urlh.geturl()
+        man_url = urlh.url
         s = urlh.read().decode('utf-8', 'ignore')
 
         can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None
diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py
index 45d094721a..f5237443e2 100644
--- a/yt_dlp/downloader/http.py
+++ b/yt_dlp/downloader/http.py
@@ -1,10 +1,14 @@
 import os
 import random
 import time
-import urllib.error
 
 from .common import FileDownloader
-from ..networking.exceptions import CertificateVerifyError, TransportError
+from ..networking import Request
+from ..networking.exceptions import (
+    CertificateVerifyError,
+    HTTPError,
+    TransportError,
+)
 from ..utils import (
     ContentTooShortError,
     RetryManager,
@@ -14,10 +18,10 @@
     encodeFilename,
     int_or_none,
     parse_http_range,
-    sanitized_Request,
     try_call,
     write_xattr,
 )
+from ..utils.networking import HTTPHeaderDict
 
 
 class HttpFD(FileDownloader):
@@ -36,10 +40,7 @@ class DownloadContext(dict):
         ctx.stream = None
 
         # Disable compression
-        headers = {'Accept-Encoding': 'identity'}
-        add_headers = info_dict.get('http_headers')
-        if add_headers:
-            headers.update(add_headers)
+        headers = HTTPHeaderDict({'Accept-Encoding': 'identity'}, info_dict.get('http_headers'))
 
         is_test = self.params.get('test', False)
         chunk_size = self._TEST_FILE_SIZE if is_test else (
@@ -110,10 +111,10 @@ def establish_connection():
             if try_call(lambda: range_end >= ctx.content_len):
                 range_end = ctx.content_len - 1
 
-            request = sanitized_Request(url, request_data, headers)
+            request = Request(url, request_data, headers)
             has_range = range_start is not None
             if has_range:
-                request.add_header('Range', f'bytes={int(range_start)}-{int_or_none(range_end) or ""}')
+                request.headers['Range'] = f'bytes={int(range_start)}-{int_or_none(range_end) or ""}'
             # Establish connection
             try:
                 ctx.data = self.ydl.urlopen(request)
@@ -144,17 +145,17 @@ def establish_connection():
                         self.report_unable_to_resume()
                     ctx.resume_len = 0
                     ctx.open_mode = 'wb'
-                ctx.data_len = ctx.content_len = int_or_none(ctx.data.info().get('Content-length', None))
-            except urllib.error.HTTPError as err:
-                if err.code == 416:
+                ctx.data_len = ctx.content_len = int_or_none(ctx.data.headers.get('Content-length', None))
+            except HTTPError as err:
+                if err.status == 416:
                     # Unable to resume (requested range not satisfiable)
                     try:
                         # Open the connection again without the range header
                         ctx.data = self.ydl.urlopen(
-                            sanitized_Request(url, request_data, headers))
-                        content_length = ctx.data.info()['Content-Length']
-                    except urllib.error.HTTPError as err:
-                        if err.code < 500 or err.code >= 600:
+                            Request(url, request_data, headers))
+                        content_length = ctx.data.headers['Content-Length']
+                    except HTTPError as err:
+                        if err.status < 500 or err.status >= 600:
                             raise
                     else:
                         # Examine the reported length
@@ -182,7 +183,7 @@ def establish_connection():
                             ctx.resume_len = 0
                             ctx.open_mode = 'wb'
                             return
-                elif err.code < 500 or err.code >= 600:
+                elif err.status < 500 or err.status >= 600:
                     # Unexpected HTTP error
                     raise
                 raise RetryDownload(err)
@@ -198,9 +199,9 @@ def close_stream():
                 ctx.stream = None
 
         def download():
-            data_len = ctx.data.info().get('Content-length')
+            data_len = ctx.data.headers.get('Content-length')
 
-            if ctx.data.info().get('Content-encoding'):
+            if ctx.data.headers.get('Content-encoding'):
                 # Content-encoding is present, Content-length is not reliable anymore as we are
                 # doing auto decompression. (See: https://github.com/yt-dlp/yt-dlp/pull/6176)
                 data_len = None
@@ -345,7 +346,7 @@ def retry(e):
 
             # Update file modification time
             if self.params.get('updatetime', True):
-                info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.info().get('last-modified', None))
+                info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.headers.get('last-modified', None))
 
             self._hook_progress({
                 'downloaded_bytes': byte_counter,
diff --git a/yt_dlp/downloader/ism.py b/yt_dlp/downloader/ism.py
index a157a8ad93..dd688f586d 100644
--- a/yt_dlp/downloader/ism.py
+++ b/yt_dlp/downloader/ism.py
@@ -2,9 +2,9 @@
 import io
 import struct
 import time
-import urllib.error
 
 from .fragment import FragmentFD
+from ..networking.exceptions import HTTPError
 from ..utils import RetryManager
 
 u8 = struct.Struct('>B')
@@ -271,7 +271,7 @@ def real_download(self, filename, info_dict):
                         write_piff_header(ctx['dest_stream'], info_dict['_download_params'])
                         extra_state['ism_track_written'] = True
                     self._append_fragment(ctx, frag_content)
-                except urllib.error.HTTPError as err:
+                except HTTPError as err:
                     retry.error = err
                     continue
 
diff --git a/yt_dlp/downloader/niconico.py b/yt_dlp/downloader/niconico.py
index 7d8575c2a4..5720f6eb8f 100644
--- a/yt_dlp/downloader/niconico.py
+++ b/yt_dlp/downloader/niconico.py
@@ -5,13 +5,8 @@
 from . import get_suitable_downloader
 from .common import FileDownloader
 from .external import FFmpegFD
-from ..utils import (
-    DownloadError,
-    WebSocketsWrapper,
-    sanitized_Request,
-    str_or_none,
-    try_get,
-)
+from ..networking import Request
+from ..utils import DownloadError, WebSocketsWrapper, str_or_none, try_get
 
 
 class NiconicoDmcFD(FileDownloader):
@@ -33,7 +28,7 @@ def real_download(self, filename, info_dict):
         heartbeat_data = heartbeat_info_dict['data'].encode()
         heartbeat_interval = heartbeat_info_dict.get('interval', 30)
 
-        request = sanitized_Request(heartbeat_url, heartbeat_data)
+        request = Request(heartbeat_url, heartbeat_data)
 
         def heartbeat():
             try:
diff --git a/yt_dlp/downloader/youtube_live_chat.py b/yt_dlp/downloader/youtube_live_chat.py
index 5928fecf0b..c7a86374aa 100644
--- a/yt_dlp/downloader/youtube_live_chat.py
+++ b/yt_dlp/downloader/youtube_live_chat.py
@@ -1,8 +1,8 @@
 import json
 import time
-import urllib.error
 
 from .fragment import FragmentFD
+from ..networking.exceptions import HTTPError
 from ..utils import (
     RegexNotFoundError,
     RetryManager,
@@ -10,6 +10,7 @@
     int_or_none,
     try_get,
 )
+from ..utils.networking import HTTPHeaderDict
 
 
 class YoutubeLiveChatFD(FragmentFD):
@@ -37,10 +38,7 @@ def real_download(self, filename, info_dict):
         start_time = int(time.time() * 1000)
 
         def dl_fragment(url, data=None, headers=None):
-            http_headers = info_dict.get('http_headers', {})
-            if headers:
-                http_headers = http_headers.copy()
-                http_headers.update(headers)
+            http_headers = HTTPHeaderDict(info_dict.get('http_headers'), headers)
             return self._download_fragment(ctx, url, info_dict, http_headers, data)
 
         def parse_actions_replay(live_chat_continuation):
@@ -129,7 +127,7 @@ def download_and_parse_fragment(url, frag_index, request_data=None, headers=None
                             or frag_index == 1 and try_refresh_replay_beginning
                             or parse_actions_replay)
                     return (True, *func(live_chat_continuation))
-                except urllib.error.HTTPError as err:
+                except HTTPError as err:
                     retry.error = err
                     continue
             return False, None, None, None
diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py
index c9166b6b8c..98ece8da7d 100644
--- a/yt_dlp/extractor/abematv.py
+++ b/yt_dlp/extractor/abematv.py
@@ -22,7 +22,6 @@
     int_or_none,
     intlist_to_bytes,
     OnDemandPagedList,
-    request_to_url,
     time_seconds,
     traverse_obj,
     update_url_query,
@@ -137,7 +136,7 @@ def _get_videokey_from_ticket(self, ticket):
         return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))
 
     def abematv_license_open(self, url):
-        url = request_to_url(url)
+        url = url.get_full_url() if isinstance(url, urllib.request.Request) else url
         ticket = urllib.parse.urlparse(url).netloc
         response_data = self._get_videokey_from_ticket(ticket)
         return urllib.response.addinfourl(io.BytesIO(response_data), headers={
diff --git a/yt_dlp/extractor/adn.py b/yt_dlp/extractor/adn.py
index f1f55e87fc..b59dbc8500 100644
--- a/yt_dlp/extractor/adn.py
+++ b/yt_dlp/extractor/adn.py
@@ -6,10 +6,8 @@
 
 from .common import InfoExtractor
 from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
-from ..compat import (
-    compat_HTTPError,
-    compat_b64decode,
-)
+from ..compat import compat_b64decode
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ass_subtitles_timecode,
     bytes_to_intlist,
@@ -142,9 +140,9 @@ def _perform_login(self, username, password):
                 self._HEADERS = {'authorization': 'Bearer ' + access_token}
         except ExtractorError as e:
             message = None
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+            if isinstance(e.cause, HTTPError) and e.cause.status == 401:
                 resp = self._parse_json(
-                    e.cause.read().decode(), None, fatal=False) or {}
+                    e.cause.response.read().decode(), None, fatal=False) or {}
                 message = resp.get('message') or resp.get('code')
             self.report_warning(message or self._LOGIN_ERR_MESSAGE)
 
@@ -195,14 +193,14 @@ def _real_extract(self, url):
                     })
                 break
             except ExtractorError as e:
-                if not isinstance(e.cause, compat_HTTPError):
+                if not isinstance(e.cause, HTTPError):
                     raise e
 
-                if e.cause.code == 401:
+                if e.cause.status == 401:
                     # This usually goes away with a different random pkcs1pad, so retry
                     continue
 
-                error = self._parse_json(e.cause.read(), video_id)
+                error = self._parse_json(e.cause.response.read(), video_id)
                 message = error.get('message')
                 if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
                     self.raise_geo_restricted(msg=message)
diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py
index 722a534ed6..5eed0ca226 100644
--- a/yt_dlp/extractor/adobepass.py
+++ b/yt_dlp/extractor/adobepass.py
@@ -2,11 +2,11 @@
 import json
 import re
 import time
-import urllib.error
 import xml.etree.ElementTree as etree
 
 from .common import InfoExtractor
 from ..compat import compat_urlparse
+from ..networking.exceptions import HTTPError
 from ..utils import (
     NO_DEFAULT,
     ExtractorError,
@@ -1394,7 +1394,7 @@ def post_form(form_page_res, note, data={}):
             form_page, urlh = form_page_res
             post_url = self._html_search_regex(r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_page, 'post url', group='url')
             if not re.match(r'https?://', post_url):
-                post_url = compat_urlparse.urljoin(urlh.geturl(), post_url)
+                post_url = compat_urlparse.urljoin(urlh.url, post_url)
             form_data = self._hidden_inputs(form_page)
             form_data.update(data)
             return self._download_webpage_handle(
@@ -1619,7 +1619,7 @@ def extract_redirect_url(html, url=None, fatal=False):
                     hidden_data['history'] = 1
 
                     provider_login_page_res = self._download_webpage_handle(
-                        urlh.geturl(), video_id, 'Sending first bookend',
+                        urlh.url, video_id, 'Sending first bookend',
                         query=hidden_data)
 
                     provider_association_redirect, urlh = post_form(
@@ -1629,7 +1629,7 @@ def extract_redirect_url(html, url=None, fatal=False):
                         })
 
                     provider_refresh_redirect_url = extract_redirect_url(
-                        provider_association_redirect, url=urlh.geturl())
+                        provider_association_redirect, url=urlh.url)
 
                     last_bookend_page, urlh = self._download_webpage_handle(
                         provider_refresh_redirect_url, video_id,
@@ -1638,7 +1638,7 @@ def extract_redirect_url(html, url=None, fatal=False):
                     hidden_data['history'] = 3
 
                     mvpd_confirm_page_res = self._download_webpage_handle(
-                        urlh.geturl(), video_id, 'Sending final bookend',
+                        urlh.url, video_id, 'Sending final bookend',
                         query=hidden_data)
 
                     post_form(mvpd_confirm_page_res, 'Confirming Login')
@@ -1652,7 +1652,7 @@ def extract_redirect_url(html, url=None, fatal=False):
                     hidden_data['history_val'] = 1
 
                     provider_login_redirect_page_res = self._download_webpage_handle(
-                        urlh.geturl(), video_id, 'Sending First Bookend',
+                        urlh.url, video_id, 'Sending First Bookend',
                         query=hidden_data)
 
                     provider_login_redirect_page, urlh = provider_login_redirect_page_res
@@ -1680,7 +1680,7 @@ def extract_redirect_url(html, url=None, fatal=False):
                         })
 
                     provider_refresh_redirect_url = extract_redirect_url(
-                        provider_association_redirect, url=urlh.geturl())
+                        provider_association_redirect, url=urlh.url)
 
                     last_bookend_page, urlh = self._download_webpage_handle(
                         provider_refresh_redirect_url, video_id,
@@ -1690,7 +1690,7 @@ def extract_redirect_url(html, url=None, fatal=False):
                     hidden_data['history_val'] = 3
 
                     mvpd_confirm_page_res = self._download_webpage_handle(
-                        urlh.geturl(), video_id, 'Sending Final Bookend',
+                        urlh.url, video_id, 'Sending Final Bookend',
                         query=hidden_data)
 
                     post_form(mvpd_confirm_page_res, 'Confirming Login')
@@ -1699,7 +1699,7 @@ def extract_redirect_url(html, url=None, fatal=False):
                     # based redirect that should be followed.
                     provider_redirect_page, urlh = provider_redirect_page_res
                     provider_refresh_redirect_url = extract_redirect_url(
-                        provider_redirect_page, url=urlh.geturl())
+                        provider_redirect_page, url=urlh.url)
                     if provider_refresh_redirect_url:
                         provider_redirect_page_res = self._download_webpage_handle(
                             provider_refresh_redirect_url, video_id,
@@ -1724,7 +1724,7 @@ def extract_redirect_url(html, url=None, fatal=False):
                             'requestor_id': requestor_id,
                         }), headers=mvpd_headers)
                 except ExtractorError as e:
-                    if not mso_id and isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 401:
+                    if not mso_id and isinstance(e.cause, HTTPError) and e.cause.status == 401:
                         raise_mvpd_required()
                     raise
                 if '<pendingLogout' in session:
diff --git a/yt_dlp/extractor/ant1newsgr.py b/yt_dlp/extractor/ant1newsgr.py
index 7b384b22d0..217e3acc43 100644
--- a/yt_dlp/extractor/ant1newsgr.py
+++ b/yt_dlp/extractor/ant1newsgr.py
@@ -1,8 +1,8 @@
 import urllib.parse
 
 from .common import InfoExtractor
+from ..networking import HEADRequest
 from ..utils import (
-    HEADRequest,
     ExtractorError,
     determine_ext,
     scale_thumbnails_to_max_format_width,
@@ -121,7 +121,7 @@ def _real_extract(self, url):
         canonical_url = self._request_webpage(
             HEADRequest(url), video_id,
             note='Resolve canonical player URL',
-            errnote='Could not resolve canonical player URL').geturl()
+            errnote='Could not resolve canonical player URL').url
         _, netloc, _, _, query, _ = urllib.parse.urlparse(canonical_url)
         cid = urllib.parse.parse_qs(query)['cid'][0]
 
diff --git a/yt_dlp/extractor/archiveorg.py b/yt_dlp/extractor/archiveorg.py
index 4ccd398257..2541cd6fd8 100644
--- a/yt_dlp/extractor/archiveorg.py
+++ b/yt_dlp/extractor/archiveorg.py
@@ -1,16 +1,16 @@
 import json
 import re
-import urllib.error
 import urllib.parse
 
 from .common import InfoExtractor
 from .naver import NaverBaseIE
 from .youtube import YoutubeBaseInfoExtractor, YoutubeIE
-from ..compat import compat_HTTPError, compat_urllib_parse_unquote
+from ..compat import compat_urllib_parse_unquote
+from ..networking import HEADRequest
+from ..networking.exceptions import HTTPError
 from ..utils import (
     KNOWN_EXTENSIONS,
     ExtractorError,
-    HEADRequest,
     bug_reports_message,
     clean_html,
     dict_get,
@@ -899,7 +899,7 @@ def _real_extract(self, url):
                     video_id, note='Fetching archived video file url', expected_status=True)
             except ExtractorError as e:
                 # HTTP Error 404 is expected if the video is not saved.
-                if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
+                if isinstance(e.cause, HTTPError) and e.cause.status == 404:
                     self.raise_no_formats(
                         'The requested video is not archived, indexed, or there is an issue with web.archive.org (try again later)', expected=True)
                 else:
@@ -926,7 +926,7 @@ def _real_extract(self, url):
         info['thumbnails'] = self._extract_thumbnails(video_id)
 
         if urlh:
-            url = compat_urllib_parse_unquote(urlh.geturl())
+            url = compat_urllib_parse_unquote(urlh.url)
             video_file_url_qs = parse_qs(url)
             # Attempt to recover any ext & format info from playback url & response headers
             format = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))}
@@ -1052,7 +1052,7 @@ def _download_archived_page(self, url, video_id, *, timestamp='2', **kwargs):
             try:
                 return self._download_webpage(f'https://web.archive.org/web/{timestamp}id_/{url}', video_id, **kwargs)
             except ExtractorError as e:
-                if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 404:
+                if isinstance(e.cause, HTTPError) and e.cause.status == 404:
                     raise ExtractorError('Page was not archived', expected=True)
                 retry.error = e
                 continue
diff --git a/yt_dlp/extractor/atresplayer.py b/yt_dlp/extractor/atresplayer.py
index a20e7f9889..3a44e5265b 100644
--- a/yt_dlp/extractor/atresplayer.py
+++ b/yt_dlp/extractor/atresplayer.py
@@ -1,5 +1,5 @@
 from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     int_or_none,
@@ -34,8 +34,8 @@ class AtresPlayerIE(InfoExtractor):
     _API_BASE = 'https://api.atresplayer.com/'
 
     def _handle_error(self, e, code):
-        if isinstance(e.cause, compat_HTTPError) and e.cause.code == code:
-            error = self._parse_json(e.cause.read(), None)
+        if isinstance(e.cause, HTTPError) and e.cause.status == code:
+            error = self._parse_json(e.cause.response.read(), None)
             if error.get('error') == 'required_registered':
                 self.raise_login_required()
             raise ExtractorError(error['error_description'], expected=True)
diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py
index 9d28e70a3a..a55cdef2b8 100644
--- a/yt_dlp/extractor/bbc.py
+++ b/yt_dlp/extractor/bbc.py
@@ -2,11 +2,11 @@
 import itertools
 import json
 import re
-import urllib.error
 import xml.etree.ElementTree
 
 from .common import InfoExtractor
-from ..compat import compat_HTTPError, compat_str, compat_urlparse
+from ..compat import compat_str, compat_urlparse
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     OnDemandPagedList,
@@ -277,7 +277,7 @@ def _perform_login(self, username, password):
             post_url, None, 'Logging in', data=urlencode_postdata(login_form),
             headers={'Referer': self._LOGIN_URL})
 
-        if self._LOGIN_URL in urlh.geturl():
+        if self._LOGIN_URL in urlh.url:
             error = clean_html(get_element_by_class('form-message', response))
             if error:
                 raise ExtractorError(
@@ -388,8 +388,8 @@ def _process_media_selector(self, media_selection, programme_id):
                                 href, programme_id, ext='mp4', entry_protocol='m3u8_native',
                                 m3u8_id=format_id, fatal=False)
                         except ExtractorError as e:
-                            if not (isinstance(e.exc_info[1], urllib.error.HTTPError)
-                                    and e.exc_info[1].code in (403, 404)):
+                            if not (isinstance(e.exc_info[1], HTTPError)
+                                    and e.exc_info[1].status in (403, 404)):
                                 raise
                             fmts = []
                         formats.extend(fmts)
@@ -472,7 +472,7 @@ def _download_playlist(self, playlist_id):
 
             return programme_id, title, description, duration, formats, subtitles
         except ExtractorError as ee:
-            if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
+            if not (isinstance(ee.cause, HTTPError) and ee.cause.status == 404):
                 raise
 
         # fallback to legacy playlist
@@ -983,7 +983,7 @@ def _real_extract(self, url):
                                     # Some playlist URL may fail with 500, at the same time
                                     # the other one may work fine (e.g.
                                     # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
-                                    if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
+                                    if isinstance(e.cause, HTTPError) and e.cause.status == 500:
                                         continue
                                     raise
                             if entry:
diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py
index e8714a33ab..cb7ab2a174 100644
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@@ -4,11 +4,11 @@
 import itertools
 import math
 import time
-import urllib.error
 import urllib.parse
 
 from .common import InfoExtractor, SearchInfoExtractor
 from ..dependencies import Cryptodome
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     GeoRestrictedError,
@@ -614,7 +614,7 @@ def fetch_page(page_idx):
                 response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
                                                playlist_id, note=f'Downloading page {page_idx}', query=query)
             except ExtractorError as e:
-                if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 412:
+                if isinstance(e.cause, HTTPError) and e.cause.status == 412:
                     raise ExtractorError(
                         'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
                 raise
diff --git a/yt_dlp/extractor/bitchute.py b/yt_dlp/extractor/bitchute.py
index a6779505e5..0805b8b46f 100644
--- a/yt_dlp/extractor/bitchute.py
+++ b/yt_dlp/extractor/bitchute.py
@@ -2,9 +2,9 @@
 import re
 
 from .common import InfoExtractor
+from ..networking import HEADRequest
 from ..utils import (
     ExtractorError,
-    HEADRequest,
     OnDemandPagedList,
     clean_html,
     get_element_by_class,
diff --git a/yt_dlp/extractor/bravotv.py b/yt_dlp/extractor/bravotv.py
index 13cc1927f1..419fe8c9c8 100644
--- a/yt_dlp/extractor/bravotv.py
+++ b/yt_dlp/extractor/bravotv.py
@@ -1,6 +1,6 @@
 from .adobepass import AdobePassIE
+from ..networking import HEADRequest
 from ..utils import (
-    HEADRequest,
     extract_attributes,
     float_or_none,
     get_element_html_by_class,
@@ -155,7 +155,7 @@ def _real_extract(self, url):
             chapters = None
 
         m3u8_url = self._request_webpage(HEADRequest(
-            update_url_query(f'{tp_url}/stream.m3u8', query)), video_id, 'Checking m3u8 URL').geturl()
+            update_url_query(f'{tp_url}/stream.m3u8', query)), video_id, 'Checking m3u8 URL').url
         if 'mpeg_cenc' in m3u8_url:
             self.report_drm(video_id)
         formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')
diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py
index cd0e8ff275..61b18412d4 100644
--- a/yt_dlp/extractor/brightcove.py
+++ b/yt_dlp/extractor/brightcove.py
@@ -7,10 +7,10 @@
 from .common import InfoExtractor
 from ..compat import (
     compat_etree_fromstring,
-    compat_HTTPError,
     compat_parse_qs,
     compat_urlparse,
 )
+from ..networking.exceptions import HTTPError
 from ..utils import (
     clean_html,
     dict_get,
@@ -915,8 +915,8 @@ def extract_policy_key():
                 json_data = self._download_json(api_url, video_id, headers=headers)
                 break
             except ExtractorError as e:
-                if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
-                    json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
+                if isinstance(e.cause, HTTPError) and e.cause.status in (401, 403):
+                    json_data = self._parse_json(e.cause.response.read().decode(), video_id)[0]
                     message = json_data.get('message') or json_data['error_code']
                     if json_data.get('error_subcode') == 'CLIENT_GEO':
                         self.raise_geo_restricted(msg=message)
diff --git a/yt_dlp/extractor/canalplus.py b/yt_dlp/extractor/canalplus.py
index b7e2f9dd46..3ff5c3fbfc 100644
--- a/yt_dlp/extractor/canalplus.py
+++ b/yt_dlp/extractor/canalplus.py
@@ -64,7 +64,7 @@ def _real_extract(self, url):
         #     response = self._request_webpage(
         #         HEADRequest(fmt_url), video_id,
         #         'Checking if the video is georestricted')
-        #     if '/blocage' in response.geturl():
+        #     if '/blocage' in response.url:
         #         raise ExtractorError(
         #             'The video is not available in your country',
         #             expected=True)
diff --git a/yt_dlp/extractor/cbsnews.py b/yt_dlp/extractor/cbsnews.py
index 65ecc62f02..5a8ebb8476 100644
--- a/yt_dlp/extractor/cbsnews.py
+++ b/yt_dlp/extractor/cbsnews.py
@@ -7,9 +7,9 @@
 from .anvato import AnvatoIE
 from .common import InfoExtractor
 from .paramountplus import ParamountPlusIE
+from ..networking import HEADRequest
 from ..utils import (
     ExtractorError,
-    HEADRequest,
     UserNotLive,
     determine_ext,
     float_or_none,
diff --git a/yt_dlp/extractor/ceskatelevize.py b/yt_dlp/extractor/ceskatelevize.py
index be2b0bb433..8390160a0d 100644
--- a/yt_dlp/extractor/ceskatelevize.py
+++ b/yt_dlp/extractor/ceskatelevize.py
@@ -1,20 +1,20 @@
 import re
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_urllib_parse_unquote,
-    compat_urllib_parse_urlparse,
-)
+from ..compat import compat_urllib_parse_unquote, compat_urllib_parse_urlparse
+from ..networking import Request
 from ..utils import (
     ExtractorError,
     float_or_none,
-    sanitized_Request,
     str_or_none,
     traverse_obj,
     urlencode_postdata,
-    USER_AGENTS,
 )
 
+USER_AGENTS = {
+    'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
+}
+
 
 class CeskaTelevizeIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
@@ -97,7 +97,7 @@ class CeskaTelevizeIE(InfoExtractor):
     def _real_extract(self, url):
         playlist_id = self._match_id(url)
         webpage, urlh = self._download_webpage_handle(url, playlist_id)
-        parsed_url = compat_urllib_parse_urlparse(urlh.geturl())
+        parsed_url = compat_urllib_parse_urlparse(urlh.url)
         site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize')
         playlist_title = self._og_search_title(webpage, default=None)
         if site_name and playlist_title:
@@ -163,16 +163,16 @@ def _real_extract(self, url):
         entries = []
 
         for user_agent in (None, USER_AGENTS['Safari']):
-            req = sanitized_Request(
+            req = Request(
                 'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist/',
                 data=urlencode_postdata(data))
 
-            req.add_header('Content-type', 'application/x-www-form-urlencoded')
-            req.add_header('x-addr', '127.0.0.1')
-            req.add_header('X-Requested-With', 'XMLHttpRequest')
+            req.headers['Content-type'] = 'application/x-www-form-urlencoded'
+            req.headers['x-addr'] = '127.0.0.1'
+            req.headers['X-Requested-With'] = 'XMLHttpRequest'
             if user_agent:
-                req.add_header('User-Agent', user_agent)
-            req.add_header('Referer', url)
+                req.headers['User-Agent'] = user_agent
+            req.headers['Referer'] = url
 
             playlistpage = self._download_json(req, playlist_id, fatal=False)
 
@@ -183,8 +183,8 @@ def _real_extract(self, url):
             if playlist_url == 'error_region':
                 raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
 
-            req = sanitized_Request(compat_urllib_parse_unquote(playlist_url))
-            req.add_header('Referer', url)
+            req = Request(compat_urllib_parse_unquote(playlist_url))
+            req.headers['Referer'] = url
 
             playlist = self._download_json(req, playlist_id, fatal=False)
             if not playlist:
diff --git a/yt_dlp/extractor/cinetecamilano.py b/yt_dlp/extractor/cinetecamilano.py
index 5e770ebac2..9cffa11e81 100644
--- a/yt_dlp/extractor/cinetecamilano.py
+++ b/yt_dlp/extractor/cinetecamilano.py
@@ -1,6 +1,6 @@
 import json
-import urllib.error
 from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     float_or_none,
@@ -40,7 +40,7 @@ def _real_extract(self, url):
                     'Authorization': try_get(self._get_cookies('https://www.cinetecamilano.it'), lambda x: f'Bearer {x["cnt-token"].value}') or ''
                 })
         except ExtractorError as e:
-            if ((isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 500)
+            if ((isinstance(e.cause, HTTPError) and e.cause.status == 500)
                     or isinstance(e.cause, json.JSONDecodeError)):
                 self.raise_login_required(method='cookies')
             raise
diff --git a/yt_dlp/extractor/ciscowebex.py b/yt_dlp/extractor/ciscowebex.py
index 40430505d6..85585dffbb 100644
--- a/yt_dlp/extractor/ciscowebex.py
+++ b/yt_dlp/extractor/ciscowebex.py
@@ -33,7 +33,7 @@ def _real_extract(self, url):
         if rcid:
             webpage = self._download_webpage(url, None, note='Getting video ID')
             url = self._search_regex(self._VALID_URL, webpage, 'redirection url', group='url')
-        url = self._request_webpage(url, None, note='Resolving final URL').geturl()
+        url = self._request_webpage(url, None, note='Resolving final URL').url
         mobj = self._match_valid_url(url)
         subdomain = mobj.group('subdomain')
         siteurl = mobj.group('siteurl_1') or mobj.group('siteurl_2')
@@ -49,7 +49,7 @@ def _real_extract(self, url):
             'https://%s.webex.com/webappng/api/v1/recordings/%s/stream' % (subdomain, video_id),
             video_id, headers=headers, query={'siteurl': siteurl}, expected_status=(403, 429))
 
-        if urlh.getcode() == 403:
+        if urlh.status == 403:
             if stream['code'] == 53004:
                 self.raise_login_required()
             if stream['code'] == 53005:
@@ -59,7 +59,7 @@ def _real_extract(self, url):
                     'This video is protected by a password, use the --video-password option', expected=True)
             raise ExtractorError(f'{self.IE_NAME} said: {stream["code"]} - {stream["message"]}', expected=True)
 
-        if urlh.getcode() == 429:
+        if urlh.status == 429:
             self.raise_login_required(
                 f'{self.IE_NAME} asks you to solve a CAPTCHA. Solve CAPTCHA in browser and',
                 method='cookies')
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 63156d3ac9..d449187764 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -31,8 +31,12 @@
 from ..cookies import LenientSimpleCookie
 from ..downloader.f4m import get_base_url, remove_encrypted_media
 from ..downloader.hls import HlsFD
-from ..networking.common import HEADRequest, Request
-from ..networking.exceptions import network_exceptions
+from ..networking import HEADRequest, Request
+from ..networking.exceptions import (
+    HTTPError,
+    IncompleteRead,
+    network_exceptions,
+)
 from ..utils import (
     IDENTITY,
     JSON_LD_RE,
@@ -729,7 +733,7 @@ def extract(self, url):
             e.ie = e.ie or self.IE_NAME,
             e.traceback = e.traceback or sys.exc_info()[2]
             raise
-        except http.client.IncompleteRead as e:
+        except IncompleteRead as e:
             raise ExtractorError('A network error has occurred.', cause=e, expected=True, video_id=self.get_temp_id(url))
         except (KeyError, StopIteration) as e:
             raise ExtractorError('An extractor error has occurred.', cause=e, video_id=self.get_temp_id(url))
@@ -788,16 +792,19 @@ def IE_NAME(cls):
 
     @staticmethod
     def __can_accept_status_code(err, expected_status):
-        assert isinstance(err, urllib.error.HTTPError)
+        assert isinstance(err, HTTPError)
         if expected_status is None:
             return False
         elif callable(expected_status):
-            return expected_status(err.code) is True
+            return expected_status(err.status) is True
         else:
-            return err.code in variadic(expected_status)
+            return err.status in variadic(expected_status)
 
     def _create_request(self, url_or_request, data=None, headers=None, query=None):
         if isinstance(url_or_request, urllib.request.Request):
+            self._downloader.deprecation_warning(
+                'Passing a urllib.request.Request to _create_request() is deprecated. '
+                'Use yt_dlp.networking.common.Request instead.')
             url_or_request = urllib_req_to_req(url_or_request)
         elif not isinstance(url_or_request, Request):
             url_or_request = Request(url_or_request)
@@ -839,7 +846,7 @@ def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fa
         try:
             return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query))
         except network_exceptions as err:
-            if isinstance(err, urllib.error.HTTPError):
+            if isinstance(err, HTTPError):
                 if self.__can_accept_status_code(err, expected_status):
                     return err.response
 
@@ -973,11 +980,11 @@ def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errno
         if prefix is not None:
             webpage_bytes = prefix + webpage_bytes
         if self.get_param('dump_intermediate_pages', False):
-            self.to_screen('Dumping request to ' + urlh.geturl())
+            self.to_screen('Dumping request to ' + urlh.url)
             dump = base64.b64encode(webpage_bytes).decode('ascii')
             self._downloader.to_screen(dump)
         if self.get_param('write_pages'):
-            filename = self._request_dump_filename(urlh.geturl(), video_id)
+            filename = self._request_dump_filename(urlh.url, video_id)
             self.to_screen(f'Saving request to {filename}')
             with open(filename, 'wb') as outf:
                 outf.write(webpage_bytes)
@@ -1109,7 +1116,7 @@ def _download_webpage(
         while True:
             try:
                 return self.__download_webpage(url_or_request, video_id, note, errnote, None, fatal, *args, **kwargs)
-            except http.client.IncompleteRead as e:
+            except IncompleteRead as e:
                 try_count += 1
                 if try_count >= tries:
                     raise e
@@ -1806,7 +1813,7 @@ def _extract_f4m_formats(self, manifest_url, video_id, preference=None, quality=
             return []
 
         manifest, urlh = res
-        manifest_url = urlh.geturl()
+        manifest_url = urlh.url
 
         return self._parse_f4m_formats(
             manifest, manifest_url, video_id, preference=preference, quality=quality, f4m_id=f4m_id,
@@ -1965,7 +1972,7 @@ def _extract_m3u8_formats_and_subtitles(
             return [], {}
 
         m3u8_doc, urlh = res
-        m3u8_url = urlh.geturl()
+        m3u8_url = urlh.url
 
         return self._parse_m3u8_formats_and_subtitles(
             m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
@@ -2243,7 +2250,7 @@ def _extract_smil_formats_and_subtitles(self, smil_url, video_id, fatal=True, f4
             return [], {}
 
         smil, urlh = res
-        smil_url = urlh.geturl()
+        smil_url = urlh.url
 
         namespace = self._parse_smil_namespace(smil)
 
@@ -2266,7 +2273,7 @@ def _extract_smil_info(self, smil_url, video_id, fatal=True, f4m_params=None):
             return {}
 
         smil, urlh = res
-        smil_url = urlh.geturl()
+        smil_url = urlh.url
 
         return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params)
 
@@ -2458,7 +2465,7 @@ def _extract_xspf_playlist(self, xspf_url, playlist_id, fatal=True):
             return []
 
         xspf, urlh = res
-        xspf_url = urlh.geturl()
+        xspf_url = urlh.url
 
         return self._parse_xspf(
             xspf, playlist_id, xspf_url=xspf_url,
@@ -2529,7 +2536,7 @@ def _extract_mpd_formats_and_subtitles(
             return [], {}
 
         # We could have been redirected to a new url when we retrieved our mpd file.
-        mpd_url = urlh.geturl()
+        mpd_url = urlh.url
         mpd_base_url = base_url(mpd_url)
 
         return self._parse_mpd_formats_and_subtitles(
@@ -2900,7 +2907,7 @@ def _extract_ism_formats_and_subtitles(self, ism_url, video_id, ism_id=None, not
         if ism_doc is None:
             return [], {}
 
-        return self._parse_ism_formats_and_subtitles(ism_doc, urlh.geturl(), ism_id)
+        return self._parse_ism_formats_and_subtitles(ism_doc, urlh.url, ism_id)
 
     def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
         """
diff --git a/yt_dlp/extractor/crackle.py b/yt_dlp/extractor/crackle.py
index 46100151a9..1ef90b5a07 100644
--- a/yt_dlp/extractor/crackle.py
+++ b/yt_dlp/extractor/crackle.py
@@ -4,7 +4,7 @@
 import time
 
 from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..networking.exceptions import HTTPError
 from ..utils import (
     determine_ext,
     float_or_none,
@@ -113,7 +113,7 @@ def _real_extract(self, url):
                     errnote='Unable to download media JSON')
             except ExtractorError as e:
                 # 401 means geo restriction, trying next country
-                if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+                if isinstance(e.cause, HTTPError) and e.cause.status == 401:
                     continue
                 raise
 
diff --git a/yt_dlp/extractor/crunchyroll.py b/yt_dlp/extractor/crunchyroll.py
index 910504ed29..adb3d5dcf6 100644
--- a/yt_dlp/extractor/crunchyroll.py
+++ b/yt_dlp/extractor/crunchyroll.py
@@ -1,7 +1,7 @@
 import base64
-import urllib.error
 
 from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     float_or_none,
@@ -114,7 +114,7 @@ def _call_api(self, path, internal_id, lang, note='api', query={}):
             result = self._call_base_api(
                 path, internal_id, lang, f'Downloading {note} JSON ({self._API_ENDPOINT})', query=query)
         except ExtractorError as error:
-            if isinstance(error.cause, urllib.error.HTTPError) and error.cause.code == 404:
+            if isinstance(error.cause, HTTPError) and error.cause.status == 404:
                 return None
             raise
 
diff --git a/yt_dlp/extractor/cultureunplugged.py b/yt_dlp/extractor/cultureunplugged.py
index 2fb22800f3..9c8509f1f3 100644
--- a/yt_dlp/extractor/cultureunplugged.py
+++ b/yt_dlp/extractor/cultureunplugged.py
@@ -1,10 +1,8 @@
 import time
 
 from .common import InfoExtractor
-from ..utils import (
-    int_or_none,
-    HEADRequest,
-)
+from ..networking import HEADRequest
+from ..utils import int_or_none
 
 
 class CultureUnpluggedIE(InfoExtractor):
diff --git a/yt_dlp/extractor/dacast.py b/yt_dlp/extractor/dacast.py
index cf683bad48..4e81aa4a7b 100644
--- a/yt_dlp/extractor/dacast.py
+++ b/yt_dlp/extractor/dacast.py
@@ -1,9 +1,9 @@
 import hashlib
 import re
 import time
-import urllib.error
 
 from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     classproperty,
@@ -105,7 +105,7 @@ def _real_extract(self, url):
                 formats = self._extract_m3u8_formats(hls_url, video_id, 'mp4', m3u8_id='hls')
             except ExtractorError as e:
                 # CDN will randomly respond with 403
-                if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 403:
+                if isinstance(e.cause, HTTPError) and e.cause.status == 403:
                     retry.error = e
                     continue
                 raise
diff --git a/yt_dlp/extractor/dailymotion.py b/yt_dlp/extractor/dailymotion.py
index 2a44718fb5..21263d41b0 100644
--- a/yt_dlp/extractor/dailymotion.py
+++ b/yt_dlp/extractor/dailymotion.py
@@ -3,7 +3,7 @@
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     OnDemandPagedList,
@@ -68,9 +68,9 @@ def _call_api(self, object_type, xid, object_fields, note, filter_extra=None):
                         None, 'Downloading Access Token',
                         data=urlencode_postdata(data))['access_token']
                 except ExtractorError as e:
-                    if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
+                    if isinstance(e.cause, HTTPError) and e.cause.status == 400:
                         raise ExtractorError(self._parse_json(
-                            e.cause.read().decode(), xid)['error_description'], expected=True)
+                            e.cause.response.read().decode(), xid)['error_description'], expected=True)
                     raise
                 self._set_dailymotion_cookie('access_token' if username else 'client_token', token)
             self._HEADERS['Authorization'] = 'Bearer ' + token
diff --git a/yt_dlp/extractor/discovery.py b/yt_dlp/extractor/discovery.py
index e6e109d5c5..75b464353b 100644
--- a/yt_dlp/extractor/discovery.py
+++ b/yt_dlp/extractor/discovery.py
@@ -3,8 +3,8 @@
 
 from .discoverygo import DiscoveryGoBaseIE
 from ..compat import compat_urllib_parse_unquote
+from ..networking.exceptions import HTTPError
 from ..utils import ExtractorError
-from ..compat import compat_HTTPError
 
 
 class DiscoveryIE(DiscoveryGoBaseIE):
@@ -100,9 +100,9 @@ def _real_extract(self, url):
                 self._API_BASE_URL + 'streaming/video/' + video_id,
                 display_id, 'Downloading streaming JSON metadata', headers=headers)
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
+            if isinstance(e.cause, HTTPError) and e.cause.status in (401, 403):
                 e_description = self._parse_json(
-                    e.cause.read().decode(), display_id)['description']
+                    e.cause.response.read().decode(), display_id)['description']
                 if 'resource not available for country' in e_description:
                     self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
                 if 'Authorized Networks' in e_description:
diff --git a/yt_dlp/extractor/dplay.py b/yt_dlp/extractor/dplay.py
index cf6d149342..6404752f7e 100644
--- a/yt_dlp/extractor/dplay.py
+++ b/yt_dlp/extractor/dplay.py
@@ -2,7 +2,7 @@
 import uuid
 
 from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..networking.exceptions import HTTPError
 from ..utils import (
     determine_ext,
     ExtractorError,
@@ -39,7 +39,7 @@ def _get_auth(self, disco_base, display_id, realm, needs_device_id=True):
         return f'Bearer {token}'
 
     def _process_errors(self, e, geo_countries):
-        info = self._parse_json(e.cause.read().decode('utf-8'), None)
+        info = self._parse_json(e.cause.response.read().decode('utf-8'), None)
         error = info['errors'][0]
         error_code = error.get('code')
         if error_code == 'access.denied.geoblocked':
@@ -87,7 +87,7 @@ def _get_disco_api_info(self, url, display_id, disco_host, realm, country, domai
                     'include': 'images,primaryChannel,show,tags'
                 })
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
+            if isinstance(e.cause, HTTPError) and e.cause.status == 400:
                 self._process_errors(e, geo_countries)
             raise
         video_id = video['data']['id']
@@ -99,7 +99,7 @@ def _get_disco_api_info(self, url, display_id, disco_host, realm, country, domai
             streaming = self._download_video_playback_info(
                 disco_base, video_id, headers)
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+            if isinstance(e.cause, HTTPError) and e.cause.status == 403:
                 self._process_errors(e, geo_countries)
             raise
         for format_dict in streaming:
diff --git a/yt_dlp/extractor/eagleplatform.py b/yt_dlp/extractor/eagleplatform.py
index 9ebd24d808..739d17912a 100644
--- a/yt_dlp/extractor/eagleplatform.py
+++ b/yt_dlp/extractor/eagleplatform.py
@@ -2,7 +2,7 @@
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     int_or_none,
@@ -111,8 +111,8 @@ def _download_json(self, url_or_request, video_id, *args, **kwargs):
             response = super(EaglePlatformIE, self)._download_json(
                 url_or_request, video_id, *args, **kwargs)
         except ExtractorError as ee:
-            if isinstance(ee.cause, compat_HTTPError):
-                response = self._parse_json(ee.cause.read().decode('utf-8'), video_id)
+            if isinstance(ee.cause, HTTPError):
+                response = self._parse_json(ee.cause.response.read().decode('utf-8'), video_id)
                 self._handle_error(response)
             raise
         return response
diff --git a/yt_dlp/extractor/eitb.py b/yt_dlp/extractor/eitb.py
index bd027da6b4..66afbb6bb2 100644
--- a/yt_dlp/extractor/eitb.py
+++ b/yt_dlp/extractor/eitb.py
@@ -1,10 +1,6 @@
 from .common import InfoExtractor
-from ..utils import (
-    float_or_none,
-    int_or_none,
-    parse_iso8601,
-    sanitized_Request,
-)
+from ..networking import Request
+from ..utils import float_or_none, int_or_none, parse_iso8601
 
 
 class EitbIE(InfoExtractor):
@@ -54,7 +50,7 @@ def _real_extract(self, url):
 
         hls_url = media.get('HLS_SURL')
         if hls_url:
-            request = sanitized_Request(
+            request = Request(
                 'http://mam.eitb.eus/mam/REST/ServiceMultiweb/DomainRestrictedSecurity/TokenAuth/',
                 headers={'Referer': url})
             token_data = self._download_json(
diff --git a/yt_dlp/extractor/eporner.py b/yt_dlp/extractor/eporner.py
index a2337979b8..aee2dee581 100644
--- a/yt_dlp/extractor/eporner.py
+++ b/yt_dlp/extractor/eporner.py
@@ -52,7 +52,7 @@ def _real_extract(self, url):
 
         webpage, urlh = self._download_webpage_handle(url, display_id)
 
-        video_id = self._match_id(urlh.geturl())
+        video_id = self._match_id(urlh.url)
 
         hash = self._search_regex(
             r'hash\s*[:=]\s*["\']([\da-f]{32})', webpage, 'hash')
diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py
index 9d871eb286..9f4d3fb789 100644
--- a/yt_dlp/extractor/facebook.py
+++ b/yt_dlp/extractor/facebook.py
@@ -8,6 +8,8 @@
     compat_str,
     compat_urllib_parse_unquote,
 )
+from ..networking import Request
+from ..networking.exceptions import network_exceptions
 from ..utils import (
     ExtractorError,
     clean_html,
@@ -19,11 +21,9 @@
     int_or_none,
     js_to_json,
     merge_dicts,
-    network_exceptions,
     parse_count,
     parse_qs,
     qualities,
-    sanitized_Request,
     traverse_obj,
     try_get,
     url_or_none,
@@ -319,7 +319,7 @@ class FacebookIE(InfoExtractor):
     }
 
     def _perform_login(self, username, password):
-        login_page_req = sanitized_Request(self._LOGIN_URL)
+        login_page_req = Request(self._LOGIN_URL)
         self._set_cookie('facebook.com', 'locale', 'en_US')
         login_page = self._download_webpage(login_page_req, None,
                                             note='Downloading login page',
@@ -340,8 +340,8 @@ def _perform_login(self, username, password):
             'timezone': '-60',
             'trynum': '1',
         }
-        request = sanitized_Request(self._LOGIN_URL, urlencode_postdata(login_form))
-        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+        request = Request(self._LOGIN_URL, urlencode_postdata(login_form))
+        request.headers['Content-Type'] = 'application/x-www-form-urlencoded'
         try:
             login_results = self._download_webpage(request, None,
                                                    note='Logging in', errnote='unable to fetch login page')
@@ -367,8 +367,8 @@ def _perform_login(self, username, password):
                 'h': h,
                 'name_action_selected': 'dont_save',
             }
-            check_req = sanitized_Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
-            check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
+            check_req = Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
+            check_req.headers['Content-Type'] = 'application/x-www-form-urlencoded'
             check_response = self._download_webpage(check_req, None,
                                                     note='Confirming login')
             if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
diff --git a/yt_dlp/extractor/fc2.py b/yt_dlp/extractor/fc2.py
index dd5e088fc1..ba19b6cab4 100644
--- a/yt_dlp/extractor/fc2.py
+++ b/yt_dlp/extractor/fc2.py
@@ -3,11 +3,11 @@
 from .common import InfoExtractor
 from ..compat import compat_parse_qs
 from ..dependencies import websockets
+from ..networking import Request
 from ..utils import (
     ExtractorError,
     WebSocketsWrapper,
     js_to_json,
-    sanitized_Request,
     traverse_obj,
     update_url_query,
     urlencode_postdata,
@@ -57,7 +57,7 @@ def _login(self):
         }
 
         login_data = urlencode_postdata(login_form_strs)
-        request = sanitized_Request(
+        request = Request(
             'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data)
 
         login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in')
@@ -66,7 +66,7 @@ def _login(self):
             return False
 
         # this is also needed
-        login_redir = sanitized_Request('http://id.fc2.com/?mode=redirect&login=done')
+        login_redir = Request('http://id.fc2.com/?mode=redirect&login=done')
         self._download_webpage(
             login_redir, None, note='Login redirect', errnote='Login redirect failed')
 
diff --git a/yt_dlp/extractor/filmon.py b/yt_dlp/extractor/filmon.py
index 9a93cb9840..0cd18f4947 100644
--- a/yt_dlp/extractor/filmon.py
+++ b/yt_dlp/extractor/filmon.py
@@ -1,8 +1,6 @@
 from .common import InfoExtractor
-from ..compat import (
-    compat_str,
-    compat_HTTPError,
-)
+from ..compat import compat_str
+from ..networking.exceptions import HTTPError
 from ..utils import (
     qualities,
     strip_or_none,
@@ -40,8 +38,8 @@ def _real_extract(self, url):
                 'https://www.filmon.com/api/vod/movie?id=%s' % video_id,
                 video_id)['response']
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError):
-                errmsg = self._parse_json(e.cause.read().decode(), video_id)['reason']
+            if isinstance(e.cause, HTTPError):
+                errmsg = self._parse_json(e.cause.response.read().decode(), video_id)['reason']
                 raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True)
             raise
 
@@ -124,8 +122,8 @@ def _real_extract(self, url):
             channel_data = self._download_json(
                 'http://www.filmon.com/api-v2/channel/' + channel_id, channel_id)['data']
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError):
-                errmsg = self._parse_json(e.cause.read().decode(), channel_id)['message']
+            if isinstance(e.cause, HTTPError):
+                errmsg = self._parse_json(e.cause.response.read().decode(), channel_id)['message']
                 raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True)
             raise
 
diff --git a/yt_dlp/extractor/fox.py b/yt_dlp/extractor/fox.py
index 15c0c48c17..8fb4ada6be 100644
--- a/yt_dlp/extractor/fox.py
+++ b/yt_dlp/extractor/fox.py
@@ -3,10 +3,10 @@
 
 from .common import InfoExtractor
 from ..compat import (
-    compat_HTTPError,
     compat_str,
     compat_urllib_parse_unquote,
 )
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     int_or_none,
@@ -68,9 +68,9 @@ def _call_api(self, path, video_id, data=None):
                 'https://api3.fox.com/v2.0/' + path,
                 video_id, data=data, headers=headers)
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+            if isinstance(e.cause, HTTPError) and e.cause.status == 403:
                 entitlement_issues = self._parse_json(
-                    e.cause.read().decode(), video_id)['entitlementIssues']
+                    e.cause.response.read().decode(), video_id)['entitlementIssues']
                 for e in entitlement_issues:
                     if e.get('errorCode') == 1005:
                         raise ExtractorError(
@@ -123,8 +123,8 @@ def _real_extract(self, url):
         try:
             m3u8_url = self._download_json(release_url, video_id)['playURL']
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
-                error = self._parse_json(e.cause.read().decode(), video_id)
+            if isinstance(e.cause, HTTPError) and e.cause.status == 403:
+                error = self._parse_json(e.cause.response.read().decode(), video_id)
                 if error.get('exception') == 'GeoLocationBlocked':
                     self.raise_geo_restricted(countries=['US'])
                 raise ExtractorError(error['description'], expected=True)
diff --git a/yt_dlp/extractor/foxsports.py b/yt_dlp/extractor/foxsports.py
index f906a1718d..8e89ccf841 100644
--- a/yt_dlp/extractor/foxsports.py
+++ b/yt_dlp/extractor/foxsports.py
@@ -1,6 +1,7 @@
 from .common import InfoExtractor
 from .uplynk import UplynkPreplayIE
-from ..utils import HEADRequest, float_or_none, make_archive_id, smuggle_url
+from ..networking import HEADRequest
+from ..utils import float_or_none, make_archive_id, smuggle_url
 
 
 class FoxSportsIE(InfoExtractor):
@@ -35,7 +36,7 @@ def _real_extract(self, url):
                 'x-api-key': 'cf289e299efdfa39fb6316f259d1de93',
             })
         preplay_url = self._request_webpage(
-            HEADRequest(data['url']), video_id, 'Fetching preplay URL').geturl()
+            HEADRequest(data['url']), video_id, 'Fetching preplay URL').url
 
         return {
             '_type': 'url_transparent',
diff --git a/yt_dlp/extractor/fujitv.py b/yt_dlp/extractor/fujitv.py
index 668bb2743c..77e826e2db 100644
--- a/yt_dlp/extractor/fujitv.py
+++ b/yt_dlp/extractor/fujitv.py
@@ -1,5 +1,5 @@
-from ..utils import HEADRequest
 from .common import InfoExtractor
+from ..networking import HEADRequest
 
 
 class FujiTVFODPlus7IE(InfoExtractor):
diff --git a/yt_dlp/extractor/funimation.py b/yt_dlp/extractor/funimation.py
index 47c316664a..41de85cc64 100644
--- a/yt_dlp/extractor/funimation.py
+++ b/yt_dlp/extractor/funimation.py
@@ -3,7 +3,7 @@
 import string
 
 from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     determine_ext,
@@ -46,8 +46,8 @@ def _perform_login(self, username, password):
                 }))
             FunimationBaseIE._TOKEN = data['token']
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
-                error = self._parse_json(e.cause.read().decode(), None)['error']
+            if isinstance(e.cause, HTTPError) and e.cause.status == 401:
+                error = self._parse_json(e.cause.response.read().decode(), None)['error']
                 raise ExtractorError(error, expected=True)
             raise
 
diff --git a/yt_dlp/extractor/gdcvault.py b/yt_dlp/extractor/gdcvault.py
index 2878bbd88c..4265feb61f 100644
--- a/yt_dlp/extractor/gdcvault.py
+++ b/yt_dlp/extractor/gdcvault.py
@@ -2,13 +2,8 @@
 
 from .common import InfoExtractor
 from .kaltura import KalturaIE
-from ..utils import (
-    HEADRequest,
-    remove_start,
-    sanitized_Request,
-    smuggle_url,
-    urlencode_postdata,
-)
+from ..networking import HEADRequest, Request
+from ..utils import remove_start, smuggle_url, urlencode_postdata
 
 
 class GDCVaultIE(InfoExtractor):
@@ -138,8 +133,8 @@ def _login(self, webpage_url, display_id):
             'password': password,
         }
 
-        request = sanitized_Request(login_url, urlencode_postdata(login_form))
-        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+        request = Request(login_url, urlencode_postdata(login_form))
+        request.headers['Content-Type'] = 'application/x-www-form-urlencoded'
         self._download_webpage(request, display_id, 'Logging in')
         start_page = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page')
         self._download_webpage(logout_url, display_id, 'Logging out')
@@ -163,7 +158,7 @@ def _real_extract(self, url):
             video_url = 'http://www.gdcvault.com' + direct_url
             # resolve the url so that we can detect the correct extension
             video_url = self._request_webpage(
-                HEADRequest(video_url), video_id).geturl()
+                HEADRequest(video_url), video_id).url
 
             return {
                 'id': video_id,
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index 87cf11d6bd..8fa4c62217 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -2431,7 +2431,7 @@ def _real_extract(self, url):
             'Accept-Encoding': 'identity',
             **smuggled_data.get('http_headers', {})
         })
-        new_url = full_response.geturl()
+        new_url = full_response.url
         url = urllib.parse.urlparse(url)._replace(scheme=urllib.parse.urlparse(new_url).scheme).geturl()
         if new_url != extract_basic_auth(url)[0]:
             self.report_following_redirect(new_url)
@@ -2529,12 +2529,12 @@ def _real_extract(self, url):
                 return self.playlist_result(
                     self._parse_xspf(
                         doc, video_id, xspf_url=url,
-                        xspf_base_url=full_response.geturl()),
+                        xspf_base_url=full_response.url),
                     video_id)
             elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
                 info_dict['formats'], info_dict['subtitles'] = self._parse_mpd_formats_and_subtitles(
                     doc,
-                    mpd_base_url=full_response.geturl().rpartition('/')[0],
+                    mpd_base_url=full_response.url.rpartition('/')[0],
                     mpd_url=url)
                 self._extra_manifest_info(info_dict, url)
                 self.report_detected('DASH manifest')
@@ -2572,7 +2572,7 @@ def _extract_embeds(self, url, webpage, *, urlh=None, info_dict={}):
         info_dict = types.MappingProxyType(info_dict)  # Prevents accidental mutation
         video_id = traverse_obj(info_dict, 'display_id', 'id') or self._generic_id(url)
         url, smuggled_data = unsmuggle_url(url, {})
-        actual_url = urlh.geturl() if urlh else url
+        actual_url = urlh.url if urlh else url
 
         # Sometimes embedded video player is hidden behind percent encoding
         # (e.g. https://github.com/ytdl-org/youtube-dl/issues/2448)
diff --git a/yt_dlp/extractor/globo.py b/yt_dlp/extractor/globo.py
index a7be2cb766..df98f093c6 100644
--- a/yt_dlp/extractor/globo.py
+++ b/yt_dlp/extractor/globo.py
@@ -8,8 +8,8 @@
 from ..compat import (
     compat_str,
 )
+from ..networking import HEADRequest
 from ..utils import (
-    HEADRequest,
     ExtractorError,
     float_or_none,
     orderedSet,
diff --git a/yt_dlp/extractor/googledrive.py b/yt_dlp/extractor/googledrive.py
index 8a4cd1690e..2fdec20f66 100644
--- a/yt_dlp/extractor/googledrive.py
+++ b/yt_dlp/extractor/googledrive.py
@@ -228,7 +228,7 @@ def add_source_format(urlh):
                     # Using original URLs may result in redirect loop due to
                     # google.com's cookies mistakenly used for googleusercontent.com
                     # redirect URLs (see #23919).
-                    'url': urlh.geturl(),
+                    'url': urlh.url,
                     'ext': determine_ext(title, 'mp4').lower(),
                     'format_id': 'source',
                     'quality': 1,
diff --git a/yt_dlp/extractor/hketv.py b/yt_dlp/extractor/hketv.py
index 10879564fa..e026996da6 100644
--- a/yt_dlp/extractor/hketv.py
+++ b/yt_dlp/extractor/hketv.py
@@ -126,7 +126,7 @@ def _real_extract(self, url):
             # If we ever wanted to provide the final resolved URL that
             # does not require cookies, albeit with a shorter lifespan:
             #     urlh = self._downloader.urlopen(file_url)
-            #     resolved_url = urlh.geturl()
+            #     resolved_url = urlh.url
             label = fmt.get('label')
             h = self._FORMAT_HEIGHTS.get(label)
             w = h * width // height if h and width and height else None
diff --git a/yt_dlp/extractor/hotnewhiphop.py b/yt_dlp/extractor/hotnewhiphop.py
index f8570cb861..3007fbb530 100644
--- a/yt_dlp/extractor/hotnewhiphop.py
+++ b/yt_dlp/extractor/hotnewhiphop.py
@@ -1,11 +1,7 @@
 from .common import InfoExtractor
 from ..compat import compat_b64decode
-from ..utils import (
-    ExtractorError,
-    HEADRequest,
-    sanitized_Request,
-    urlencode_postdata,
-)
+from ..networking import HEADRequest, Request
+from ..utils import ExtractorError, urlencode_postdata
 
 
 class HotNewHipHopIE(InfoExtractor):
@@ -36,9 +32,9 @@ def _real_extract(self, url):
             ('mediaType', 's'),
             ('mediaId', video_id),
         ])
-        r = sanitized_Request(
+        r = Request(
             'http://www.hotnewhiphop.com/ajax/media/getActions/', data=reqdata)
-        r.add_header('Content-Type', 'application/x-www-form-urlencoded')
+        r.headers['Content-Type'] = 'application/x-www-form-urlencoded'
         mkd = self._download_json(
             r, video_id, note='Requesting media key',
             errnote='Could not download media key')
@@ -50,7 +46,7 @@ def _real_extract(self, url):
         req = self._request_webpage(
             redirect_req, video_id,
             note='Resolving final URL', errnote='Could not resolve final URL')
-        video_url = req.geturl()
+        video_url = req.url
         if video_url.endswith('.html'):
             raise ExtractorError('Redirect failed')
 
diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py
index 591e23b8ad..324e9f51db 100644
--- a/yt_dlp/extractor/hotstar.py
+++ b/yt_dlp/extractor/hotstar.py
@@ -6,7 +6,8 @@
 import uuid
 
 from .common import InfoExtractor
-from ..compat import compat_HTTPError, compat_str
+from ..compat import compat_str
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     determine_ext,
@@ -233,7 +234,7 @@ def _real_extract(self, url):
                         'height': int_or_none(playback_set.get('height')),
                     }]
             except ExtractorError as e:
-                if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+                if isinstance(e.cause, HTTPError) and e.cause.status == 403:
                     geo_restricted = True
                 continue
 
diff --git a/yt_dlp/extractor/hrti.py b/yt_dlp/extractor/hrti.py
index cfec80d144..57b76e46b4 100644
--- a/yt_dlp/extractor/hrti.py
+++ b/yt_dlp/extractor/hrti.py
@@ -1,13 +1,13 @@
 import json
 
 from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..networking import Request
+from ..networking.exceptions import HTTPError
 from ..utils import (
     clean_html,
     ExtractorError,
     int_or_none,
     parse_age_limit,
-    sanitized_Request,
     try_get,
 )
 
@@ -42,7 +42,7 @@ def _initialize_pre_login(self):
             'application_version': self._APP_VERSION
         }
 
-        req = sanitized_Request(self._API_URL, data=json.dumps(app_data).encode('utf-8'))
+        req = Request(self._API_URL, data=json.dumps(app_data).encode('utf-8'))
         req.get_method = lambda: 'PUT'
 
         resources = self._download_json(
@@ -73,8 +73,8 @@ def _perform_login(self, username, password):
                 self._login_url, None, note='Logging in', errnote='Unable to log in',
                 data=json.dumps(auth_data).encode('utf-8'))
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 406:
-                auth_info = self._parse_json(e.cause.read().encode('utf-8'), None)
+            if isinstance(e.cause, HTTPError) and e.cause.status == 406:
+                auth_info = self._parse_json(e.cause.response.read().encode('utf-8'), None)
             else:
                 raise
 
diff --git a/yt_dlp/extractor/ign.py b/yt_dlp/extractor/ign.py
index e4db7f9fa9..64875f8ceb 100644
--- a/yt_dlp/extractor/ign.py
+++ b/yt_dlp/extractor/ign.py
@@ -1,8 +1,9 @@
 import re
-import urllib.error
+import urllib.parse
 
 from .common import InfoExtractor
 from ..compat import compat_parse_qs
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     determine_ext,
@@ -27,9 +28,9 @@ def _checked_call_api(self, slug):
         try:
             return self._call_api(slug)
         except ExtractorError as e:
-            if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 404:
+            if isinstance(e.cause, HTTPError) and e.cause.status == 404:
                 e.cause.args = e.cause.args or [
-                    e.cause.geturl(), e.cause.getcode(), e.cause.reason]
+                    e.cause.response.url, e.cause.status, e.cause.reason]
                 raise ExtractorError(
                     'Content not found: expired?', cause=e.cause,
                     expected=True)
@@ -226,7 +227,7 @@ def _real_extract(self, url):
             parsed_url._replace(path=parsed_url.path.rsplit('/', 1)[0] + '/embed'))
 
         webpage, urlh = self._download_webpage_handle(embed_url, video_id)
-        new_url = urlh.geturl()
+        new_url = urlh.url
         ign_url = compat_parse_qs(
             urllib.parse.urlparse(new_url).query).get('url', [None])[-1]
         if ign_url:
@@ -323,14 +324,14 @@ def _checked_call_api(self, slug):
         try:
             return self._call_api(slug)
         except ExtractorError as e:
-            if isinstance(e.cause, urllib.error.HTTPError):
+            if isinstance(e.cause, HTTPError):
                 e.cause.args = e.cause.args or [
-                    e.cause.geturl(), e.cause.getcode(), e.cause.reason]
-                if e.cause.code == 404:
+                    e.cause.response.url, e.cause.status, e.cause.reason]
+                if e.cause.status == 404:
                     raise ExtractorError(
                         'Content not found: expired?', cause=e.cause,
                         expected=True)
-                elif e.cause.code == 503:
+                elif e.cause.status == 503:
                     self.report_warning(error_to_compat_str(e.cause))
                     return
             raise
diff --git a/yt_dlp/extractor/imggaming.py b/yt_dlp/extractor/imggaming.py
index 8e220fd9f3..a40aa21763 100644
--- a/yt_dlp/extractor/imggaming.py
+++ b/yt_dlp/extractor/imggaming.py
@@ -1,7 +1,7 @@
 import json
 
 from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     int_or_none,
@@ -52,9 +52,9 @@ def _extract_dve_api_url(self, media_id, media_type):
             return self._call_api(
                 stream_path, media_id)['playerUrlCallback']
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+            if isinstance(e.cause, HTTPError) and e.cause.status == 403:
                 raise ExtractorError(
-                    self._parse_json(e.cause.read().decode(), media_id)['messages'][0],
+                    self._parse_json(e.cause.response.read().decode(), media_id)['messages'][0],
                     expected=True)
             raise
 
diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py
index 02335138f1..bfc4b7b888 100644
--- a/yt_dlp/extractor/instagram.py
+++ b/yt_dlp/extractor/instagram.py
@@ -3,9 +3,9 @@
 import json
 import re
 import time
-import urllib.error
 
 from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     decode_base_n,
@@ -442,7 +442,7 @@ def _real_extract(self, url):
             shared_data = self._search_json(
                 r'window\._sharedData\s*=', webpage, 'shared data', video_id, fatal=False) or {}
 
-            if shared_data and self._LOGIN_URL not in urlh.geturl():
+            if shared_data and self._LOGIN_URL not in urlh.url:
                 media.update(traverse_obj(
                     shared_data, ('entry_data', 'PostPage', 0, 'graphql', 'shortcode_media'),
                     ('entry_data', 'PostPage', 0, 'media'), expected_type=dict) or {})
@@ -589,7 +589,7 @@ def _extract_graphql(self, data, url):
                 except ExtractorError as e:
                     # if it's an error caused by a bad query, and there are
                     # more GIS templates to try, ignore it and keep trying
-                    if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 403:
+                    if isinstance(e.cause, HTTPError) and e.cause.status == 403:
                         if gis_tmpl != gis_tmpls[-1]:
                             continue
                     raise
diff --git a/yt_dlp/extractor/iprima.py b/yt_dlp/extractor/iprima.py
index e58e9c2ee1..6dec1510da 100644
--- a/yt_dlp/extractor/iprima.py
+++ b/yt_dlp/extractor/iprima.py
@@ -81,7 +81,7 @@ def _perform_login(self, username, password):
             note='Logging in')
 
         # a profile may need to be selected first, even when there is only a single one
-        if '/profile-select' in login_handle.geturl():
+        if '/profile-select' in login_handle.url:
             profile_id = self._search_regex(
                 r'data-identifier\s*=\s*["\']?(\w+)', profile_select_html, 'profile id')
 
@@ -89,7 +89,7 @@ def _perform_login(self, username, password):
                 f'{self._AUTH_ROOT}/user/profile-select-perform/{profile_id}', None,
                 query={'continueUrl': '/user/login?redirect_uri=/user/'}, note='Selecting profile')
 
-        code = traverse_obj(login_handle.geturl(), ({parse_qs}, 'code', 0))
+        code = traverse_obj(login_handle.url, ({parse_qs}, 'code', 0))
         if not code:
             raise ExtractorError('Login failed', expected=True)
 
diff --git a/yt_dlp/extractor/kakao.py b/yt_dlp/extractor/kakao.py
index 1f0f0a5d5c..43055e89de 100644
--- a/yt_dlp/extractor/kakao.py
+++ b/yt_dlp/extractor/kakao.py
@@ -1,5 +1,5 @@
 from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     int_or_none,
@@ -101,8 +101,8 @@ def _real_extract(self, url):
                     cdn_api_base, video_id, query=query,
                     note='Downloading video URL for profile %s' % profile_name)
             except ExtractorError as e:
-                if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
-                    resp = self._parse_json(e.cause.read().decode(), video_id)
+                if isinstance(e.cause, HTTPError) and e.cause.status == 403:
+                    resp = self._parse_json(e.cause.response.read().decode(), video_id)
                     if resp.get('code') == 'GeoBlocked':
                         self.raise_geo_restricted()
                 raise
diff --git a/yt_dlp/extractor/kick.py b/yt_dlp/extractor/kick.py
index be1dfd4b16..d124372424 100644
--- a/yt_dlp/extractor/kick.py
+++ b/yt_dlp/extractor/kick.py
@@ -1,7 +1,6 @@
 from .common import InfoExtractor
-
+from ..networking import HEADRequest
 from ..utils import (
-    HEADRequest,
     UserNotLive,
     float_or_none,
     merge_dicts,
diff --git a/yt_dlp/extractor/kuwo.py b/yt_dlp/extractor/kuwo.py
index cfec1c50f6..e8a061a104 100644
--- a/yt_dlp/extractor/kuwo.py
+++ b/yt_dlp/extractor/kuwo.py
@@ -91,7 +91,7 @@ def _real_extract(self, url):
         webpage, urlh = self._download_webpage_handle(
             url, song_id, note='Download song detail info',
             errnote='Unable to get song detail info')
-        if song_id not in urlh.geturl() or '对不起，该歌曲由于版权问题已被下线，将返回网站首页' in webpage:
+        if song_id not in urlh.url or '对不起，该歌曲由于版权问题已被下线，将返回网站首页' in webpage:
             raise ExtractorError('this song has been offline because of copyright issues', expected=True)
 
         song_name = self._html_search_regex(
diff --git a/yt_dlp/extractor/la7.py b/yt_dlp/extractor/la7.py
index 36bfaf5c30..a3cd12b003 100644
--- a/yt_dlp/extractor/la7.py
+++ b/yt_dlp/extractor/la7.py
@@ -1,13 +1,8 @@
 import re
 
 from .common import InfoExtractor
-from ..utils import (
-    float_or_none,
-    HEADRequest,
-    int_or_none,
-    parse_duration,
-    unified_strdate,
-)
+from ..networking import HEADRequest
+from ..utils import float_or_none, int_or_none, parse_duration, unified_strdate
 
 
 class LA7IE(InfoExtractor):
diff --git a/yt_dlp/extractor/lbry.py b/yt_dlp/extractor/lbry.py
index 23d3daf13e..6af64f0df4 100644
--- a/yt_dlp/extractor/lbry.py
+++ b/yt_dlp/extractor/lbry.py
@@ -3,9 +3,9 @@
 import urllib.parse
 
 from .common import InfoExtractor
+from ..networking import HEADRequest
 from ..utils import (
     ExtractorError,
-    HEADRequest,
     OnDemandPagedList,
     UnsupportedError,
     determine_ext,
@@ -266,7 +266,7 @@ def _real_extract(self, url):
             # HEAD request returns redirect response to m3u8 URL if available
             final_url = self._request_webpage(
                 HEADRequest(streaming_url), display_id, headers=headers,
-                note='Downloading streaming redirect url info').geturl()
+                note='Downloading streaming redirect url info').url
 
         elif result.get('value_type') == 'stream':
             claim_id, is_live = result['signing_channel']['claim_id'], True
diff --git a/yt_dlp/extractor/lecturio.py b/yt_dlp/extractor/lecturio.py
index 973764c63f..bb059d3a29 100644
--- a/yt_dlp/extractor/lecturio.py
+++ b/yt_dlp/extractor/lecturio.py
@@ -25,7 +25,7 @@ def _perform_login(self, username, password):
             self._LOGIN_URL, None, 'Downloading login popup')
 
         def is_logged(url_handle):
-            return self._LOGIN_URL not in url_handle.geturl()
+            return self._LOGIN_URL not in url_handle.url
 
         # Already logged in
         if is_logged(urlh):
diff --git a/yt_dlp/extractor/lego.py b/yt_dlp/extractor/lego.py
index 811b447587..46fc7a9b60 100644
--- a/yt_dlp/extractor/lego.py
+++ b/yt_dlp/extractor/lego.py
@@ -1,7 +1,7 @@
 import uuid
 
 from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     int_or_none,
@@ -75,7 +75,7 @@ def _real_extract(self, url):
                     'videoId': '%s_%s' % (uuid.UUID(video_id), locale),
                 }, headers=self.geo_verification_headers())
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 451:
+            if isinstance(e.cause, HTTPError) and e.cause.status == 451:
                 self.raise_geo_restricted(countries=countries)
             raise
 
diff --git a/yt_dlp/extractor/limelight.py b/yt_dlp/extractor/limelight.py
index e11ec43d66..4e50f106f9 100644
--- a/yt_dlp/extractor/limelight.py
+++ b/yt_dlp/extractor/limelight.py
@@ -1,7 +1,7 @@
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..networking.exceptions import HTTPError
 from ..utils import (
     determine_ext,
     float_or_none,
@@ -69,8 +69,8 @@ def _call_playlist_service(self, item_id, method, fatal=True, referer=None):
                 item_id, 'Downloading PlaylistService %s JSON' % method,
                 fatal=fatal, headers=headers)
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
-                error = self._parse_json(e.cause.read().decode(), item_id)['detail']['contentAccessPermission']
+            if isinstance(e.cause, HTTPError) and e.cause.status == 403:
+                error = self._parse_json(e.cause.response.read().decode(), item_id)['detail']['contentAccessPermission']
                 if error == 'CountryDisabled':
                     self.raise_geo_restricted()
                 raise ExtractorError(error, expected=True)
diff --git a/yt_dlp/extractor/linuxacademy.py b/yt_dlp/extractor/linuxacademy.py
index 7bb64e17c4..0b16442932 100644
--- a/yt_dlp/extractor/linuxacademy.py
+++ b/yt_dlp/extractor/linuxacademy.py
@@ -2,11 +2,8 @@
 import random
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_b64decode,
-    compat_HTTPError,
-    compat_str,
-)
+from ..compat import compat_b64decode, compat_str
+from ..networking.exceptions import HTTPError
 from ..utils import (
     clean_html,
     ExtractorError,
@@ -107,7 +104,7 @@ def random_string():
             'sso': 'true',
         })
 
-        login_state_url = urlh.geturl()
+        login_state_url = urlh.url
 
         try:
             login_page = self._download_webpage(
@@ -119,8 +116,8 @@ def random_string():
                     'Referer': login_state_url,
                 })
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
-                error = self._parse_json(e.cause.read(), None)
+            if isinstance(e.cause, HTTPError) and e.cause.status == 401:
+                error = self._parse_json(e.cause.response.read(), None)
                 message = error.get('description') or error['code']
                 raise ExtractorError(
                     '%s said: %s' % (self.IE_NAME, message), expected=True)
@@ -137,7 +134,7 @@ def random_string():
             })
 
         access_token = self._search_regex(
-            r'access_token=([^=&]+)', urlh.geturl(),
+            r'access_token=([^=&]+)', urlh.url,
             'access token', default=None)
         if not access_token:
             access_token = self._parse_json(
diff --git a/yt_dlp/extractor/mediasite.py b/yt_dlp/extractor/mediasite.py
index fe549c49fb..7ea78ab691 100644
--- a/yt_dlp/extractor/mediasite.py
+++ b/yt_dlp/extractor/mediasite.py
@@ -171,7 +171,7 @@ def _real_extract(self, url):
         query = mobj.group('query')
 
         webpage, urlh = self._download_webpage_handle(url, resource_id)  # XXX: add UrlReferrer?
-        redirect_url = urlh.geturl()
+        redirect_url = urlh.url
 
         # XXX: might have also extracted UrlReferrer and QueryString from the html
         service_path = compat_urlparse.urljoin(redirect_url, self._html_search_regex(
diff --git a/yt_dlp/extractor/megatvcom.py b/yt_dlp/extractor/megatvcom.py
index 2f3f11f519..93c7e7dc08 100644
--- a/yt_dlp/extractor/megatvcom.py
+++ b/yt_dlp/extractor/megatvcom.py
@@ -1,14 +1,14 @@
 import re
 
 from .common import InfoExtractor
+from ..networking import HEADRequest
 from ..utils import (
+    ExtractorError,
     clean_html,
     determine_ext,
-    ExtractorError,
     extract_attributes,
     get_element_by_class,
     get_element_html_by_id,
-    HEADRequest,
     parse_qs,
     unescapeHTML,
     unified_timestamp,
@@ -160,5 +160,5 @@ def _real_extract(self, url):
         canonical_url = self._request_webpage(
             HEADRequest(canonical_url), video_id,
             note='Resolve canonical URL',
-            errnote='Could not resolve canonical URL').geturl()
+            errnote='Could not resolve canonical URL').url
         return self.url_result(canonical_url, MegaTVComIE.ie_key(), video_id)
diff --git a/yt_dlp/extractor/mgtv.py b/yt_dlp/extractor/mgtv.py
index 06edcb396a..31ccf004ec 100644
--- a/yt_dlp/extractor/mgtv.py
+++ b/yt_dlp/extractor/mgtv.py
@@ -1,9 +1,9 @@
 import base64
 import time
-import urllib.error
 import uuid
 
 from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     int_or_none,
@@ -86,8 +86,8 @@ def _real_extract(self, url):
                     'type': 'pch5'
                 }, headers=self.geo_verification_headers())['data']
         except ExtractorError as e:
-            if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 401:
-                error = self._parse_json(e.cause.read().decode(), None)
+            if isinstance(e.cause, HTTPError) and e.cause.status == 401:
+                error = self._parse_json(e.cause.response.read().decode(), None)
                 if error.get('code') == 40005:
                     self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
                 raise ExtractorError(error['msg'], expected=True)
diff --git a/yt_dlp/extractor/minds.py b/yt_dlp/extractor/minds.py
index 2fb17920cc..27a6e38056 100644
--- a/yt_dlp/extractor/minds.py
+++ b/yt_dlp/extractor/minds.py
@@ -106,7 +106,7 @@ def _real_extract(self, url):
         if poster:
             urlh = self._request_webpage(poster, video_id, fatal=False)
             if urlh:
-                thumbnail = urlh.geturl()
+                thumbnail = urlh.url
 
         return {
             'id': video_id,
diff --git a/yt_dlp/extractor/miomio.py b/yt_dlp/extractor/miomio.py
index a0a041ea54..8df8cba191 100644
--- a/yt_dlp/extractor/miomio.py
+++ b/yt_dlp/extractor/miomio.py
@@ -2,12 +2,8 @@
 
 from .common import InfoExtractor
 from ..compat import compat_urlparse
-from ..utils import (
-    xpath_text,
-    int_or_none,
-    ExtractorError,
-    sanitized_Request,
-)
+from ..networking import Request
+from ..utils import ExtractorError, int_or_none, xpath_text
 
 
 class MioMioIE(InfoExtractor):
@@ -61,7 +57,7 @@ def _extract_mioplayer(self, webpage, video_id, title, http_headers):
             'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/xml.php?id=%s&r=%s' % (id, random.randint(100, 999)),
             video_id)
 
-        vid_config_request = sanitized_Request(
+        vid_config_request = Request(
             'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/sina.php?{0}'.format(xml_config),
             headers=http_headers)
 
diff --git a/yt_dlp/extractor/mtv.py b/yt_dlp/extractor/mtv.py
index d91be62700..0d700b9a82 100644
--- a/yt_dlp/extractor/mtv.py
+++ b/yt_dlp/extractor/mtv.py
@@ -2,16 +2,15 @@
 
 from .common import InfoExtractor
 from ..compat import compat_str
+from ..networking import HEADRequest, Request
 from ..utils import (
     ExtractorError,
+    RegexNotFoundError,
     find_xpath_attr,
     fix_xml_ampersands,
     float_or_none,
-    HEADRequest,
     int_or_none,
     join_nonempty,
-    RegexNotFoundError,
-    sanitized_Request,
     strip_or_none,
     timeconvert,
     try_get,
@@ -51,15 +50,15 @@ def _get_thumbnail_url(self, uri, itemdoc):
 
     def _extract_mobile_video_formats(self, mtvn_id):
         webpage_url = self._MOBILE_TEMPLATE % mtvn_id
-        req = sanitized_Request(webpage_url)
+        req = Request(webpage_url)
         # Otherwise we get a webpage that would execute some javascript
-        req.add_header('User-Agent', 'curl/7')
+        req.headers['User-Agent'] = 'curl/7'
         webpage = self._download_webpage(req, mtvn_id,
                                          'Downloading mobile page')
         metrics_url = unescapeHTML(self._search_regex(r'<a href="(http://metrics.+?)"', webpage, 'url'))
         req = HEADRequest(metrics_url)
         response = self._request_webpage(req, mtvn_id, 'Resolving url')
-        url = response.geturl()
+        url = response.url
         # Transform the url to get the best quality:
         url = re.sub(r'.+pxE=mp4', 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=0+_pxK=18639+_pxE=mp4', url, 1)
         return [{'url': url, 'ext': 'mp4'}]
diff --git a/yt_dlp/extractor/nbc.py b/yt_dlp/extractor/nbc.py
index ddc89a7c29..299b051745 100644
--- a/yt_dlp/extractor/nbc.py
+++ b/yt_dlp/extractor/nbc.py
@@ -6,9 +6,9 @@
 from .theplatform import ThePlatformIE, default_ns
 from .adobepass import AdobePassIE
 from ..compat import compat_urllib_parse_unquote
+from ..networking import HEADRequest
 from ..utils import (
     ExtractorError,
-    HEADRequest,
     RegexNotFoundError,
     UserNotLive,
     clean_html,
diff --git a/yt_dlp/extractor/nebula.py b/yt_dlp/extractor/nebula.py
index 7a5a02dfa6..4f3e691b71 100644
--- a/yt_dlp/extractor/nebula.py
+++ b/yt_dlp/extractor/nebula.py
@@ -1,8 +1,8 @@
 import itertools
 import json
-import urllib.error
 
 from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
 from ..utils import ExtractorError, make_archive_id, parse_iso8601, remove_start
 
 _BASE_URL_RE = r'https?://(?:www\.|beta\.)?(?:watchnebula\.com|nebula\.app|nebula\.tv)'
@@ -48,7 +48,7 @@ def inner_call():
             return inner_call()
         except ExtractorError as exc:
             # if 401 or 403, attempt credential re-auth and retry
-            if exc.cause and isinstance(exc.cause, urllib.error.HTTPError) and exc.cause.code in (401, 403):
+            if exc.cause and isinstance(exc.cause, HTTPError) and exc.cause.status in (401, 403):
                 self.to_screen(f'Reauthenticating to Nebula and retrying, because last {auth_type} call resulted in error {exc.cause.code}')
                 self._perform_login()
                 return inner_call()
diff --git a/yt_dlp/extractor/neteasemusic.py b/yt_dlp/extractor/neteasemusic.py
index 595709899a..5b7307bc8f 100644
--- a/yt_dlp/extractor/neteasemusic.py
+++ b/yt_dlp/extractor/neteasemusic.py
@@ -11,6 +11,7 @@
 from .common import InfoExtractor
 from ..aes import aes_ecb_encrypt, pkcs7_padding
 from ..compat import compat_urllib_parse_urlencode
+from ..networking import Request
 from ..utils import (
     ExtractorError,
     bytes_to_intlist,
@@ -18,7 +19,6 @@
     float_or_none,
     int_or_none,
     intlist_to_bytes,
-    sanitized_Request,
     try_get,
 )
 
@@ -146,8 +146,8 @@ def convert_milliseconds(cls, ms):
         return int(round(ms / 1000.0))
 
     def query_api(self, endpoint, video_id, note):
-        req = sanitized_Request('%s%s' % (self._API_BASE, endpoint))
-        req.add_header('Referer', self._API_BASE)
+        req = Request('%s%s' % (self._API_BASE, endpoint))
+        req.headers['Referer'] = self._API_BASE
         return self._download_json(req, video_id, note)
 
 
diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py
index 89e8e60939..fa2d709d28 100644
--- a/yt_dlp/extractor/niconico.py
+++ b/yt_dlp/extractor/niconico.py
@@ -8,10 +8,8 @@
 from urllib.parse import urlparse
 
 from .common import InfoExtractor, SearchInfoExtractor
-from ..compat import (
-    compat_HTTPError,
-)
 from ..dependencies import websockets
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     OnDemandPagedList,
@@ -396,7 +394,7 @@ def _real_extract(self, url):
             webpage, handle = self._download_webpage_handle(
                 'https://www.nicovideo.jp/watch/' + video_id, video_id)
             if video_id.startswith('so'):
-                video_id = self._match_id(handle.geturl())
+                video_id = self._match_id(handle.url)
 
             api_data = self._parse_json(self._html_search_regex(
                 'data-api-data="([^"]+)"', webpage,
@@ -407,9 +405,9 @@ def _real_extract(self, url):
                     'https://www.nicovideo.jp/api/watch/v3/%s?_frontendId=6&_frontendVersion=0&actionTrackId=AAAAAAAAAA_%d' % (video_id, round(time.time() * 1000)), video_id,
                     note='Downloading API JSON', errnote='Unable to fetch data')['data']
             except ExtractorError:
-                if not isinstance(e.cause, compat_HTTPError):
+                if not isinstance(e.cause, HTTPError):
                     raise
-                webpage = e.cause.read().decode('utf-8', 'replace')
+                webpage = e.cause.response.read().decode('utf-8', 'replace')
                 error_msg = self._html_search_regex(
                     r'(?s)<section\s+class="(?:(?:ErrorMessage|WatchExceptionPage-message)\s*)+">(.+?)</section>',
                     webpage, 'error reason', default=None)
@@ -742,7 +740,7 @@ def _real_extract(self, url):
         try:
             mylist = self._call_api(list_id, 'list', {'pageSize': 1})
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+            if isinstance(e.cause, HTTPError) and e.cause.status == 401:
                 self.raise_login_required('You have to be logged in to get your history')
             raise
         return self.playlist_result(self._entries(list_id), list_id, **self._parse_owner(mylist))
@@ -951,8 +949,8 @@ def _real_extract(self, url):
             'frontend_id': traverse_obj(embedded_data, ('site', 'frontendId')) or '9',
         })
 
-        hostname = remove_start(urlparse(urlh.geturl()).hostname, 'sp.')
-        cookies = try_get(urlh.geturl(), self._downloader._calc_cookies)
+        hostname = remove_start(urlparse(urlh.url).hostname, 'sp.')
+        cookies = try_get(urlh.url, self._downloader._calc_cookies)
         latency = try_get(self._configuration_arg('latency'), lambda x: x[0])
         if latency not in self._KNOWN_LATENCY:
             latency = 'high'
diff --git a/yt_dlp/extractor/njpwworld.py b/yt_dlp/extractor/njpwworld.py
index 7b8a526f02..6078381330 100644
--- a/yt_dlp/extractor/njpwworld.py
+++ b/yt_dlp/extractor/njpwworld.py
@@ -51,7 +51,7 @@ def _perform_login(self, username, password):
             data=urlencode_postdata({'login_id': username, 'pw': password}),
             headers={'Referer': 'https://front.njpwworld.com/auth'})
         # /auth/login will return 302 for successful logins
-        if urlh.geturl() == self._LOGIN_URL:
+        if urlh.url == self._LOGIN_URL:
             self.report_warning('unable to login')
             return False
 
diff --git a/yt_dlp/extractor/nosvideo.py b/yt_dlp/extractor/nosvideo.py
index b6d3ea40c1..7e9688c0bc 100644
--- a/yt_dlp/extractor/nosvideo.py
+++ b/yt_dlp/extractor/nosvideo.py
@@ -1,9 +1,9 @@
 import re
 
 from .common import InfoExtractor
+from ..networking import Request
 from ..utils import (
     ExtractorError,
-    sanitized_Request,
     urlencode_postdata,
     xpath_text,
     xpath_with_ns,
@@ -36,8 +36,8 @@ def _real_extract(self, url):
             'op': 'download1',
             'method_free': 'Continue to Video',
         }
-        req = sanitized_Request(url, urlencode_postdata(fields))
-        req.add_header('Content-type', 'application/x-www-form-urlencoded')
+        req = Request(url, urlencode_postdata(fields))
+        req.headers['Content-type'] = 'application/x-www-form-urlencoded'
         webpage = self._download_webpage(req, video_id,
                                          'Downloading download page')
         if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
diff --git a/yt_dlp/extractor/nowness.py b/yt_dlp/extractor/nowness.py
index fc9043bceb..a3c29f62cc 100644
--- a/yt_dlp/extractor/nowness.py
+++ b/yt_dlp/extractor/nowness.py
@@ -4,10 +4,8 @@
 )
 from .common import InfoExtractor
 from ..compat import compat_str
-from ..utils import (
-    ExtractorError,
-    sanitized_Request,
-)
+from ..networking import Request
+from ..utils import ExtractorError
 
 
 class NownessBaseIE(InfoExtractor):
@@ -40,7 +38,7 @@ def _extract_url_result(self, post):
 
     def _api_request(self, url, request_path):
         display_id = self._match_id(url)
-        request = sanitized_Request(
+        request = Request(
             'http://api.nowness.com/api/' + request_path % display_id,
             headers={
                 'X-Nowness-Language': 'zh-cn' if 'cn.nowness.com' in url else 'en-us',
diff --git a/yt_dlp/extractor/nrk.py b/yt_dlp/extractor/nrk.py
index 88d08e5e3a..384865accd 100644
--- a/yt_dlp/extractor/nrk.py
+++ b/yt_dlp/extractor/nrk.py
@@ -3,7 +3,8 @@
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_HTTPError, compat_str
+from ..compat import compat_str
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     determine_ext,
@@ -148,7 +149,7 @@ def call_playback_api(item, query=None):
             try:
                 return self._call_api(f'playback/{item}/program/{video_id}', video_id, item, query=query)
             except ExtractorError as e:
-                if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
+                if isinstance(e.cause, HTTPError) and e.cause.status == 400:
                     return self._call_api(f'playback/{item}/{video_id}', video_id, item, query=query)
                 raise
 
diff --git a/yt_dlp/extractor/odkmedia.py b/yt_dlp/extractor/odkmedia.py
index 2960860d6c..b852160b9f 100644
--- a/yt_dlp/extractor/odkmedia.py
+++ b/yt_dlp/extractor/odkmedia.py
@@ -1,7 +1,7 @@
 import json
-import urllib.error
 
 from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     GeoRestrictedError,
@@ -74,8 +74,8 @@ def _real_extract(self, url):
                 f'https://odkmedia.io/odc/api/v2/playback/{video_info["id"]}/', display_id,
                 headers={'Authorization': '', 'service-name': 'odc'})
         except ExtractorError as e:
-            if isinstance(e.cause, urllib.error.HTTPError):
-                error_data = self._parse_json(e.cause.read(), display_id)['detail']
+            if isinstance(e.cause, HTTPError):
+                error_data = self._parse_json(e.cause.response.read(), display_id)['detail']
                 raise GeoRestrictedError(error_data)
 
         formats, subtitles = [], {}
diff --git a/yt_dlp/extractor/odnoklassniki.py b/yt_dlp/extractor/odnoklassniki.py
index e63714e846..1be45d8adc 100644
--- a/yt_dlp/extractor/odnoklassniki.py
+++ b/yt_dlp/extractor/odnoklassniki.py
@@ -7,9 +7,9 @@
     compat_urllib_parse_unquote,
     compat_urllib_parse_urlparse,
 )
+from ..networking import HEADRequest
 from ..utils import (
     ExtractorError,
-    HEADRequest,
     float_or_none,
     int_or_none,
     qualities,
@@ -448,7 +448,7 @@ def _extract_mobile(self, url):
         json_data = self._parse_json(unescapeHTML(json_data), video_id) or {}
 
         redirect_url = self._request_webpage(HEADRequest(
-            json_data['videoSrc']), video_id, 'Requesting download URL').geturl()
+            json_data['videoSrc']), video_id, 'Requesting download URL').url
         self._clear_cookies(redirect_url)
 
         return {
diff --git a/yt_dlp/extractor/orf.py b/yt_dlp/extractor/orf.py
index e9d23a4d12..cc3c003fa0 100644
--- a/yt_dlp/extractor/orf.py
+++ b/yt_dlp/extractor/orf.py
@@ -2,11 +2,11 @@
 import re
 
 from .common import InfoExtractor
+from ..networking import HEADRequest
 from ..utils import (
     clean_html,
     determine_ext,
     float_or_none,
-    HEADRequest,
     InAdvancePagedList,
     int_or_none,
     join_nonempty,
diff --git a/yt_dlp/extractor/owncloud.py b/yt_dlp/extractor/owncloud.py
index e1d5682f87..79fd830bb3 100644
--- a/yt_dlp/extractor/owncloud.py
+++ b/yt_dlp/extractor/owncloud.py
@@ -44,7 +44,7 @@ def _real_extract(self, url):
         webpage, urlh = self._download_webpage_handle(url, video_id)
 
         if re.search(r'<label[^>]+for="password"', webpage):
-            webpage = self._verify_video_password(webpage, urlh.geturl(), video_id)
+            webpage = self._verify_video_password(webpage, urlh.url, video_id)
 
         hidden_inputs = self._hidden_inputs(webpage)
         title = hidden_inputs.get('filename')
diff --git a/yt_dlp/extractor/packtpub.py b/yt_dlp/extractor/packtpub.py
index 51778d8a20..56203306fb 100644
--- a/yt_dlp/extractor/packtpub.py
+++ b/yt_dlp/extractor/packtpub.py
@@ -1,10 +1,7 @@
 import json
 
 from .common import InfoExtractor
-from ..compat import (
-    # compat_str,
-    compat_HTTPError,
-)
+from ..networking.exceptions import HTTPError
 from ..utils import (
     clean_html,
     ExtractorError,
@@ -54,8 +51,8 @@ def _perform_login(self, username, password):
                     'password': password,
                 }).encode())['data']['access']
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401, 404):
-                message = self._parse_json(e.cause.read().decode(), None)['message']
+            if isinstance(e.cause, HTTPError) and e.cause.status in (400, 401, 404):
+                message = self._parse_json(e.cause.response.read().decode(), None)['message']
                 raise ExtractorError(message, expected=True)
             raise
 
@@ -70,7 +67,7 @@ def _real_extract(self, url):
                 'https://services.packtpub.com/products-v1/products/%s/%s/%s' % (course_id, chapter_id, video_id), video_id,
                 'Downloading JSON video', headers=headers)['data']
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
+            if isinstance(e.cause, HTTPError) and e.cause.status == 400:
                 self.raise_login_required('This video is locked')
             raise
 
diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py
index e93e37eb93..447087436d 100644
--- a/yt_dlp/extractor/patreon.py
+++ b/yt_dlp/extractor/patreon.py
@@ -1,10 +1,10 @@
 import itertools
-from urllib.error import HTTPError
 
 from .common import InfoExtractor
 from .vimeo import VimeoIE
 
 from ..compat import compat_urllib_parse_unquote
+from ..networking.exceptions import HTTPError
 from ..utils import (
     clean_html,
     determine_ext,
@@ -37,9 +37,9 @@ def _call_api(self, ep, item_id, query=None, headers=None, fatal=True, note=None
                 item_id, note='Downloading API JSON' if not note else note,
                 query=query, fatal=fatal, headers=headers)
         except ExtractorError as e:
-            if not isinstance(e.cause, HTTPError) or mimetype2ext(e.cause.headers.get('Content-Type')) != 'json':
+            if not isinstance(e.cause, HTTPError) or mimetype2ext(e.cause.response.headers.get('Content-Type')) != 'json':
                 raise
-            err_json = self._parse_json(self._webpage_read_content(e.cause, None, item_id), item_id, fatal=False)
+            err_json = self._parse_json(self._webpage_read_content(e.cause.response, None, item_id), item_id, fatal=False)
             err_message = traverse_obj(err_json, ('errors', ..., 'detail'), get_all=False)
             if err_message:
                 raise ExtractorError(f'Patreon said: {err_message}', expected=True)
diff --git a/yt_dlp/extractor/peloton.py b/yt_dlp/extractor/peloton.py
index 4835822cf5..7864299881 100644
--- a/yt_dlp/extractor/peloton.py
+++ b/yt_dlp/extractor/peloton.py
@@ -3,7 +3,7 @@
 import urllib.parse
 
 from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     float_or_none,
@@ -83,8 +83,8 @@ def _login(self, video_id):
                 }).encode(),
                 headers={'Content-Type': 'application/json', 'User-Agent': 'web'})
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
-                json_string = self._webpage_read_content(e.cause, None, video_id)
+            if isinstance(e.cause, HTTPError) and e.cause.status == 401:
+                json_string = self._webpage_read_content(e.cause.response, None, video_id)
                 res = self._parse_json(json_string, video_id)
                 raise ExtractorError(res['message'], expected=res['message'] == 'Login failed')
             else:
@@ -96,8 +96,8 @@ def _get_token(self, video_id):
                 'https://api.onepeloton.com/api/subscription/stream', video_id, note='Downloading token',
                 data=json.dumps({}).encode(), headers={'Content-Type': 'application/json'})
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
-                json_string = self._webpage_read_content(e.cause, None, video_id)
+            if isinstance(e.cause, HTTPError) and e.cause.status == 403:
+                json_string = self._webpage_read_content(e.cause.response, None, video_id)
                 res = self._parse_json(json_string, video_id)
                 raise ExtractorError(res['message'], expected=res['message'] == 'Stream limit reached')
             else:
@@ -109,7 +109,7 @@ def _real_extract(self, url):
         try:
             self._start_session(video_id)
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+            if isinstance(e.cause, HTTPError) and e.cause.status == 401:
                 self._login(video_id)
                 self._start_session(video_id)
             else:
diff --git a/yt_dlp/extractor/piapro.py b/yt_dlp/extractor/piapro.py
index eb5923d110..5f39e06396 100644
--- a/yt_dlp/extractor/piapro.py
+++ b/yt_dlp/extractor/piapro.py
@@ -69,7 +69,7 @@ def _perform_login(self, username, password):
         if urlh is False:
             login_ok = False
         else:
-            parts = compat_urlparse.urlparse(urlh.geturl())
+            parts = compat_urlparse.urlparse(urlh.url)
             if parts.path != '/':
                 login_ok = False
         if not login_ok:
diff --git a/yt_dlp/extractor/pladform.py b/yt_dlp/extractor/pladform.py
index dcf18e1f3b..00500686fe 100644
--- a/yt_dlp/extractor/pladform.py
+++ b/yt_dlp/extractor/pladform.py
@@ -78,7 +78,7 @@ def fail(text):
                 expected=True)
 
         if not video:
-            targetUrl = self._request_webpage(url, video_id, note='Resolving final URL').geturl()
+            targetUrl = self._request_webpage(url, video_id, note='Resolving final URL').url
             if targetUrl == url:
                 raise ExtractorError('Can\'t parse page')
             return self.url_result(targetUrl)
diff --git a/yt_dlp/extractor/platzi.py b/yt_dlp/extractor/platzi.py
index b8a4414940..166b98c4a2 100644
--- a/yt_dlp/extractor/platzi.py
+++ b/yt_dlp/extractor/platzi.py
@@ -36,7 +36,7 @@ def _perform_login(self, username, password):
             headers={'Referer': self._LOGIN_URL})
 
         # login succeeded
-        if 'platzi.com/login' not in urlh.geturl():
+        if 'platzi.com/login' not in urlh.url:
             return
 
         login_error = self._webpage_read_content(
diff --git a/yt_dlp/extractor/playplustv.py b/yt_dlp/extractor/playplustv.py
index 316f220f79..a4439c8bc5 100644
--- a/yt_dlp/extractor/playplustv.py
+++ b/yt_dlp/extractor/playplustv.py
@@ -1,13 +1,9 @@
 import json
 
 from .common import InfoExtractor
-from ..compat import compat_HTTPError
-from ..utils import (
-    clean_html,
-    ExtractorError,
-    int_or_none,
-    PUTRequest,
-)
+from ..networking import PUTRequest
+from ..networking.exceptions import HTTPError
+from ..utils import ExtractorError, clean_html, int_or_none
 
 
 class PlayPlusTVIE(InfoExtractor):
@@ -47,9 +43,9 @@ def _perform_login(self, username, password):
         try:
             self._token = self._download_json(req, None)['token']
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+            if isinstance(e.cause, HTTPError) and e.cause.status == 401:
                 raise ExtractorError(self._parse_json(
-                    e.cause.read(), None)['errorMessage'], expected=True)
+                    e.cause.response.read(), None)['errorMessage'], expected=True)
             raise
 
         self._profile = self._call_api('Profiles')['list'][0]['_id']
diff --git a/yt_dlp/extractor/pornhub.py b/yt_dlp/extractor/pornhub.py
index 2f5a572a5b..f08414030b 100644
--- a/yt_dlp/extractor/pornhub.py
+++ b/yt_dlp/extractor/pornhub.py
@@ -3,11 +3,12 @@
 import math
 import operator
 import re
-import urllib.request
 
 from .common import InfoExtractor
 from .openload import PhantomJSwrapper
-from ..compat import compat_HTTPError, compat_str
+from ..compat import compat_str
+from ..networking import Request
+from ..networking.exceptions import HTTPError
 from ..utils import (
     NO_DEFAULT,
     ExtractorError,
@@ -46,8 +47,8 @@ def dl(*args, **kwargs):
                 r'document\.cookie\s*=\s*["\']RNKEY=',
                 r'document\.location\.reload\(true\)')):
             url_or_request = args[0]
-            url = (url_or_request.get_full_url()
-                   if isinstance(url_or_request, urllib.request.Request)
+            url = (url_or_request.url
+                   if isinstance(url_or_request, Request)
                    else url_or_request)
             phantom = PhantomJSwrapper(self, required_version='2.0')
             phantom.get(url, html=webpage)
@@ -602,7 +603,7 @@ def download_page(base_url, num, fallback=False):
                 base_url, item_id, note, query={'page': num})
 
         def is_404(e):
-            return isinstance(e.cause, compat_HTTPError) and e.cause.code == 404
+            return isinstance(e.cause, HTTPError) and e.cause.status == 404
 
         base_url = url
         has_page = page is not None
diff --git a/yt_dlp/extractor/puhutv.py b/yt_dlp/extractor/puhutv.py
index 482e5705f0..4b8e5e90de 100644
--- a/yt_dlp/extractor/puhutv.py
+++ b/yt_dlp/extractor/puhutv.py
@@ -1,8 +1,6 @@
 from .common import InfoExtractor
-from ..compat import (
-    compat_HTTPError,
-    compat_str,
-)
+from ..compat import compat_str
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     int_or_none,
@@ -72,7 +70,7 @@ def _real_extract(self, url):
                 display_id, 'Downloading video JSON',
                 headers=self.geo_verification_headers())
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+            if isinstance(e.cause, HTTPError) and e.cause.status == 403:
                 self.raise_geo_restricted()
             raise
 
diff --git a/yt_dlp/extractor/radiko.py b/yt_dlp/extractor/radiko.py
index 7fdf782831..cef68eba08 100644
--- a/yt_dlp/extractor/radiko.py
+++ b/yt_dlp/extractor/radiko.py
@@ -41,7 +41,7 @@ def _auth_client(self):
                 'x-radiko-device': 'pc',
                 'x-radiko-user': 'dummy_user',
             })
-        auth1_header = auth1_handle.info()
+        auth1_header = auth1_handle.headers
 
         auth_token = auth1_header['X-Radiko-AuthToken']
         kl = int(auth1_header['X-Radiko-KeyLength'])
diff --git a/yt_dlp/extractor/radiocanada.py b/yt_dlp/extractor/radiocanada.py
index 72c21d5022..1a5a6355a6 100644
--- a/yt_dlp/extractor/radiocanada.py
+++ b/yt_dlp/extractor/radiocanada.py
@@ -1,5 +1,5 @@
 from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..networking.exceptions import HTTPError
 from ..utils import (
     determine_ext,
     ExtractorError,
@@ -74,8 +74,8 @@ def _call_api(self, path, video_id=None, app_code=None, query=None):
             return self._download_json(
                 'https://services.radio-canada.ca/media/' + path, video_id, query=query)
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 422):
-                data = self._parse_json(e.cause.read().decode(), None)
+            if isinstance(e.cause, HTTPError) and e.cause.status in (401, 422):
+                data = self._parse_json(e.cause.response.read().decode(), None)
                 error = data.get('error_description') or data['errorMessage']['text']
                 raise ExtractorError(error, expected=True)
             raise
diff --git a/yt_dlp/extractor/rcs.py b/yt_dlp/extractor/rcs.py
index 2440858ca1..028d3d90bb 100644
--- a/yt_dlp/extractor/rcs.py
+++ b/yt_dlp/extractor/rcs.py
@@ -1,9 +1,9 @@
 import re
 
 from .common import InfoExtractor
+from ..networking import HEADRequest
 from ..utils import (
     ExtractorError,
-    HEADRequest,
     base_url,
     clean_html,
     extract_attributes,
diff --git a/yt_dlp/extractor/rcti.py b/yt_dlp/extractor/rcti.py
index 27b4ad7bbc..79d9c8e31e 100644
--- a/yt_dlp/extractor/rcti.py
+++ b/yt_dlp/extractor/rcti.py
@@ -3,7 +3,7 @@
 import time
 
 from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..networking.exceptions import HTTPError
 from ..utils import (
     dict_get,
     ExtractorError,
@@ -186,7 +186,7 @@ def _real_extract(self, url):
         try:
             formats = self._extract_m3u8_formats(video_url, display_id, 'mp4', headers={'Referer': 'https://www.rctiplus.com/'})
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+            if isinstance(e.cause, HTTPError) and e.cause.status == 403:
                 self.raise_geo_restricted(countries=['ID'], metadata_available=True)
             else:
                 raise e
diff --git a/yt_dlp/extractor/recurbate.py b/yt_dlp/extractor/recurbate.py
index 5534cf3c35..d7294cb143 100644
--- a/yt_dlp/extractor/recurbate.py
+++ b/yt_dlp/extractor/recurbate.py
@@ -1,6 +1,5 @@
-import urllib.error
-
 from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
 from ..utils import ExtractorError, merge_dicts
 
 
@@ -25,7 +24,7 @@ def _real_extract(self, url):
         try:
             webpage = self._download_webpage(url, video_id)
         except ExtractorError as e:
-            if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 403:
+            if isinstance(e.cause, HTTPError) and e.cause.status == 403:
                 self.raise_login_required(msg=SUBSCRIPTION_MISSING_MESSAGE, method='cookies')
             raise
         token = self._html_search_regex(r'data-token="([^"]+)"', webpage, 'token')
diff --git a/yt_dlp/extractor/redbulltv.py b/yt_dlp/extractor/redbulltv.py
index a01bc8434c..d1de2490fc 100644
--- a/yt_dlp/extractor/redbulltv.py
+++ b/yt_dlp/extractor/redbulltv.py
@@ -1,5 +1,5 @@
 from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..networking.exceptions import HTTPError
 from ..utils import (
     float_or_none,
     ExtractorError,
@@ -68,9 +68,9 @@ def extract_info(self, video_id):
                 headers={'Authorization': token}
             )
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
+            if isinstance(e.cause, HTTPError) and e.cause.status == 404:
                 error_message = self._parse_json(
-                    e.cause.read().decode(), video_id)['error']
+                    e.cause.response.read().decode(), video_id)['error']
                 raise ExtractorError('%s said: %s' % (
                     self.IE_NAME, error_message), expected=True)
             raise
diff --git a/yt_dlp/extractor/redgifs.py b/yt_dlp/extractor/redgifs.py
index 098fb81857..f9453202b7 100644
--- a/yt_dlp/extractor/redgifs.py
+++ b/yt_dlp/extractor/redgifs.py
@@ -1,8 +1,8 @@
 import functools
-import urllib
 
 from .common import InfoExtractor
 from ..compat import compat_parse_qs
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     int_or_none,
@@ -82,7 +82,7 @@ def _call_api(self, ep, video_id, *args, **kwargs):
                     f'https://api.redgifs.com/v2/{ep}', video_id, headers=headers, *args, **kwargs)
                 break
             except ExtractorError as e:
-                if first_attempt and isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 401:
+                if first_attempt and isinstance(e.cause, HTTPError) and e.cause.status == 401:
                     del self._API_HEADERS['authorization']  # refresh the token
                     continue
                 raise
diff --git a/yt_dlp/extractor/regiotv.py b/yt_dlp/extractor/regiotv.py
index 6114841fb2..edb6ae5bce 100644
--- a/yt_dlp/extractor/regiotv.py
+++ b/yt_dlp/extractor/regiotv.py
@@ -1,10 +1,6 @@
 from .common import InfoExtractor
-
-from ..utils import (
-    sanitized_Request,
-    xpath_text,
-    xpath_with_ns,
-)
+from ..networking import Request
+from ..utils import xpath_text, xpath_with_ns
 
 
 class RegioTVIE(InfoExtractor):
@@ -33,7 +29,7 @@ def _real_extract(self, url):
 
         SOAP_TEMPLATE = '<?xml version="1.0" encoding="utf-8"?><soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><soap:Body><{0} xmlns="http://v.telvi.de/"><key xsi:type="xsd:string">{1}</key></{0}></soap:Body></soap:Envelope>'
 
-        request = sanitized_Request(
+        request = Request(
             'http://v.telvi.de/',
             SOAP_TEMPLATE.format('GetHTML5VideoData', key).encode('utf-8'))
         video_data = self._download_xml(request, video_id, 'Downloading video XML')
diff --git a/yt_dlp/extractor/rokfin.py b/yt_dlp/extractor/rokfin.py
index 0e40eb32a3..4a4d40befd 100644
--- a/yt_dlp/extractor/rokfin.py
+++ b/yt_dlp/extractor/rokfin.py
@@ -245,7 +245,7 @@ def _perform_login(self, username, password):
             f'{self._AUTH_BASE}/token', None,
             note='getting access credentials', errnote='error getting access credentials',
             data=urlencode_postdata({
-                'code': urllib.parse.parse_qs(urllib.parse.urldefrag(urlh.geturl()).fragment).get('code')[0],
+                'code': urllib.parse.parse_qs(urllib.parse.urldefrag(urlh.url).fragment).get('code')[0],
                 'client_id': 'web',
                 'grant_type': 'authorization_code',
                 'redirect_uri': 'https://rokfin.com/silent-check-sso.html'
@@ -269,7 +269,7 @@ def _download_json_using_access_token(self, url_or_request, video_id, headers={}
 
         json_string, urlh = self._download_webpage_handle(
             url_or_request, video_id, headers=headers, query=query, expected_status=401)
-        if not auth_token or urlh.code != 401 or refresh_token is None:
+        if not auth_token or urlh.status != 401 or refresh_token is None:
             return self._parse_json(json_string, video_id)
 
         self._access_mgmt_tokens = self._download_json(
diff --git a/yt_dlp/extractor/roosterteeth.py b/yt_dlp/extractor/roosterteeth.py
index 776fbfbc08..94e673b133 100644
--- a/yt_dlp/extractor/roosterteeth.py
+++ b/yt_dlp/extractor/roosterteeth.py
@@ -1,5 +1,5 @@
 from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     int_or_none,
@@ -35,8 +35,8 @@ def _perform_login(self, username, password):
                 }))
         except ExtractorError as e:
             msg = 'Unable to login'
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
-                resp = self._parse_json(e.cause.read().decode(), None, fatal=False)
+            if isinstance(e.cause, HTTPError) and e.cause.status == 401:
+                resp = self._parse_json(e.cause.response.read().decode(), None, fatal=False)
                 if resp:
                     error = resp.get('extra_info') or resp.get('error_description') or resp.get('error')
                     if error:
@@ -138,8 +138,8 @@ def _real_extract(self, url):
             m3u8_url = video_data['attributes']['url']
             # XXX: additional URL at video_data['links']['download']
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
-                if self._parse_json(e.cause.read().decode(), display_id).get('access') is False:
+            if isinstance(e.cause, HTTPError) and e.cause.status == 403:
+                if self._parse_json(e.cause.response.read().decode(), display_id).get('access') is False:
                     self.raise_login_required(
                         '%s is only available for FIRST members' % display_id)
             raise
diff --git a/yt_dlp/extractor/rozhlas.py b/yt_dlp/extractor/rozhlas.py
index 5f83d42e83..63134322dc 100644
--- a/yt_dlp/extractor/rozhlas.py
+++ b/yt_dlp/extractor/rozhlas.py
@@ -1,7 +1,7 @@
 import itertools
-import urllib.error
 
 from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     extract_attributes,
@@ -81,7 +81,7 @@ def _extract_formats(self, entry, audio_id):
                             'vcodec': 'none',
                         })
                 except ExtractorError as e:
-                    if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 429:
+                    if isinstance(e.cause, HTTPError) and e.cause.status == 429:
                         retry.error = e.cause
                     else:
                         self.report_warning(e.msg)
diff --git a/yt_dlp/extractor/rte.py b/yt_dlp/extractor/rte.py
index aedaa5b550..7ba80d4ba7 100644
--- a/yt_dlp/extractor/rte.py
+++ b/yt_dlp/extractor/rte.py
@@ -1,7 +1,7 @@
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..networking.exceptions import HTTPError
 from ..utils import (
     float_or_none,
     parse_iso8601,
@@ -31,8 +31,8 @@ def _real_extract(self, url):
             except ExtractorError as ee:
                 if num < len(ENDPOINTS) or formats:
                     continue
-                if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
-                    error_info = self._parse_json(ee.cause.read().decode(), item_id, fatal=False)
+                if isinstance(ee.cause, HTTPError) and ee.cause.status == 404:
+                    error_info = self._parse_json(ee.cause.response.read().decode(), item_id, fatal=False)
                     if error_info:
                         raise ExtractorError(
                             '%s said: %s' % (self.IE_NAME, error_info['message']),
diff --git a/yt_dlp/extractor/rts.py b/yt_dlp/extractor/rts.py
index 81c4d7cac8..9f73d1811f 100644
--- a/yt_dlp/extractor/rts.py
+++ b/yt_dlp/extractor/rts.py
@@ -136,8 +136,8 @@ def download_json(internal_id):
 
             if not entries:
                 page, urlh = self._download_webpage_handle(url, display_id)
-                if re.match(self._VALID_URL, urlh.geturl()).group('id') != media_id:
-                    return self.url_result(urlh.geturl(), 'RTS')
+                if re.match(self._VALID_URL, urlh.url).group('id') != media_id:
+                    return self.url_result(urlh.url, 'RTS')
 
                 # article with videos on rhs
                 videos = re.findall(
diff --git a/yt_dlp/extractor/rumble.py b/yt_dlp/extractor/rumble.py
index 82f3f0f8c2..f8bf4a1825 100644
--- a/yt_dlp/extractor/rumble.py
+++ b/yt_dlp/extractor/rumble.py
@@ -2,7 +2,7 @@
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     UnsupportedError,
@@ -371,7 +371,7 @@ def entries(self, url, playlist_id):
             try:
                 webpage = self._download_webpage(f'{url}?page={page}', playlist_id, note='Downloading page %d' % page)
             except ExtractorError as e:
-                if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
+                if isinstance(e.cause, HTTPError) and e.cause.status == 404:
                     break
                 raise
             for video_url in re.findall(r'class=video-item--a\s?href=([^>]+\.html)', webpage):
diff --git a/yt_dlp/extractor/safari.py b/yt_dlp/extractor/safari.py
index 450a661e9f..8d322d7105 100644
--- a/yt_dlp/extractor/safari.py
+++ b/yt_dlp/extractor/safari.py
@@ -28,13 +28,13 @@ def _perform_login(self, username, password):
             'Downloading login page')
 
         def is_logged(urlh):
-            return 'learning.oreilly.com/home/' in urlh.geturl()
+            return 'learning.oreilly.com/home/' in urlh.url
 
         if is_logged(urlh):
             self.LOGGED_IN = True
             return
 
-        redirect_url = urlh.geturl()
+        redirect_url = urlh.url
         parsed_url = compat_urlparse.urlparse(redirect_url)
         qs = compat_parse_qs(parsed_url.query)
         next_uri = compat_urlparse.urljoin(
@@ -129,7 +129,7 @@ def _real_extract(self, url):
 
             webpage, urlh = self._download_webpage_handle(url, video_id)
 
-            mobj = re.match(self._VALID_URL, urlh.geturl())
+            mobj = re.match(self._VALID_URL, urlh.url)
             reference_id = mobj.group('reference_id')
             if not reference_id:
                 reference_id = self._search_regex(
diff --git a/yt_dlp/extractor/sbs.py b/yt_dlp/extractor/sbs.py
index 119106e8ef..7a91150475 100644
--- a/yt_dlp/extractor/sbs.py
+++ b/yt_dlp/extractor/sbs.py
@@ -1,6 +1,6 @@
 from .common import InfoExtractor
+from ..networking import HEADRequest
 from ..utils import (
-    HEADRequest,
     float_or_none,
     int_or_none,
     parse_duration,
diff --git a/yt_dlp/extractor/sevenplus.py b/yt_dlp/extractor/sevenplus.py
index 222bf6ce7a..6c688d1505 100644
--- a/yt_dlp/extractor/sevenplus.py
+++ b/yt_dlp/extractor/sevenplus.py
@@ -2,10 +2,8 @@
 import re
 
 from .brightcove import BrightcoveNewBaseIE
-from ..compat import (
-    compat_HTTPError,
-    compat_str,
-)
+from ..compat import compat_str
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     try_get,
@@ -97,9 +95,9 @@ def _real_extract(self, url):
                     'videoType': 'vod',
                 }, headers=headers)['media']
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+            if isinstance(e.cause, HTTPError) and e.cause.status == 403:
                 raise ExtractorError(self._parse_json(
-                    e.cause.read().decode(), episode_id)[0]['error_code'], expected=True)
+                    e.cause.response.read().decode(), episode_id)[0]['error_code'], expected=True)
             raise
 
         for source in media.get('sources', {}):
diff --git a/yt_dlp/extractor/shahid.py b/yt_dlp/extractor/shahid.py
index 26a0bff400..d509e8879c 100644
--- a/yt_dlp/extractor/shahid.py
+++ b/yt_dlp/extractor/shahid.py
@@ -3,7 +3,7 @@
 import re
 
 from .aws import AWSIE
-from ..compat import compat_HTTPError
+from ..networking.exceptions import HTTPError
 from ..utils import (
     clean_html,
     ExtractorError,
@@ -22,7 +22,7 @@ class ShahidBaseIE(AWSIE):
 
     def _handle_error(self, e):
         fail_data = self._parse_json(
-            e.cause.read().decode('utf-8'), None, fatal=False)
+            e.cause.response.read().decode('utf-8'), None, fatal=False)
         if fail_data:
             faults = fail_data.get('faults', [])
             faults_message = ', '.join([clean_html(fault['userMessage']) for fault in faults if fault.get('userMessage')])
@@ -40,7 +40,7 @@ def _call_api(self, path, video_id, request=None):
                 'secret_key': '4WUUJWuFvtTkXbhaWTDv7MhO+0LqoYDWfEnUXoWn',
             }, video_id, query)
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError):
+            if isinstance(e.cause, HTTPError):
                 self._handle_error(e)
             raise
 
@@ -88,7 +88,7 @@ def _perform_login(self, username, password):
                     'Content-Type': 'application/json; charset=UTF-8',
                 })['user']
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError):
+            if isinstance(e.cause, HTTPError):
                 self._handle_error(e)
             raise
 
diff --git a/yt_dlp/extractor/sina.py b/yt_dlp/extractor/sina.py
index aeba4e3771..9842811888 100644
--- a/yt_dlp/extractor/sina.py
+++ b/yt_dlp/extractor/sina.py
@@ -1,12 +1,12 @@
 from .common import InfoExtractor
+from ..networking import HEADRequest
 from ..utils import (
-    HEADRequest,
     ExtractorError,
-    int_or_none,
-    update_url_query,
-    qualities,
-    get_element_by_attribute,
     clean_html,
+    get_element_by_attribute,
+    int_or_none,
+    qualities,
+    update_url_query,
 )
 
 
@@ -60,7 +60,7 @@ def _real_extract(self, url):
                 self.to_screen('Getting video id')
                 request = HEADRequest(url)
                 _, urlh = self._download_webpage_handle(request, 'NA', False)
-                return self._real_extract(urlh.geturl())
+                return self._real_extract(urlh.url)
             else:
                 pseudo_id = mobj.group('pseudo_id')
                 webpage = self._download_webpage(url, pseudo_id)
diff --git a/yt_dlp/extractor/sixplay.py b/yt_dlp/extractor/sixplay.py
index a6fb6c1f5b..ef93b92768 100644
--- a/yt_dlp/extractor/sixplay.py
+++ b/yt_dlp/extractor/sixplay.py
@@ -79,7 +79,7 @@ def _real_extract(self, url):
                             headers=self.geo_verification_headers())
                         if not urlh:
                             continue
-                        asset_url = urlh.geturl()
+                        asset_url = urlh.url
                     asset_url = asset_url.replace('_drmnp.ism/', '_unpnp.ism/')
                     for i in range(3, 0, -1):
                         asset_url = asset_url = asset_url.replace('_sd1/', '_sd%d/' % i)
diff --git a/yt_dlp/extractor/slideslive.py b/yt_dlp/extractor/slideslive.py
index 3d36edbbc3..25f867a601 100644
--- a/yt_dlp/extractor/slideslive.py
+++ b/yt_dlp/extractor/slideslive.py
@@ -426,7 +426,7 @@ def _real_extract(self, url):
             video_id, headers=traverse_obj(parse_qs(url), {
                 'Referer': ('embed_parent_url', -1),
                 'Origin': ('embed_container_origin', -1)}))
-        redirect_url = urlh.geturl()
+        redirect_url = urlh.url
         if 'domain_not_allowed' in redirect_url:
             domain = traverse_obj(parse_qs(redirect_url), ('allowed_domains[]', ...), get_all=False)
             if not domain:
diff --git a/yt_dlp/extractor/sonyliv.py b/yt_dlp/extractor/sonyliv.py
index 5ebe20df7a..4379572592 100644
--- a/yt_dlp/extractor/sonyliv.py
+++ b/yt_dlp/extractor/sonyliv.py
@@ -6,7 +6,7 @@
 import uuid
 
 from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     int_or_none,
@@ -123,12 +123,12 @@ def _call_api(self, version, path, video_id):
                 'https://apiv2.sonyliv.com/AGL/%s/A/ENG/WEB/%s' % (version, path),
                 video_id, headers=self._HEADERS)['resultObj']
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 406 and self._parse_json(
-                    e.cause.read().decode(), video_id)['message'] == 'Please subscribe to watch this content':
+            if isinstance(e.cause, HTTPError) and e.cause.status == 406 and self._parse_json(
+                    e.cause.response.read().decode(), video_id)['message'] == 'Please subscribe to watch this content':
                 self.raise_login_required(self._LOGIN_HINT, method=None)
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+            if isinstance(e.cause, HTTPError) and e.cause.status == 403:
                 message = self._parse_json(
-                    e.cause.read().decode(), video_id)['message']
+                    e.cause.response.read().decode(), video_id)['message']
                 if message == 'Geoblocked Country':
                     self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
                 raise ExtractorError(message)
diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py
index 979f23f44f..a7c2afd497 100644
--- a/yt_dlp/extractor/soundcloud.py
+++ b/yt_dlp/extractor/soundcloud.py
@@ -7,15 +7,13 @@
     InfoExtractor,
     SearchInfoExtractor
 )
-from ..compat import (
-    compat_HTTPError,
-    compat_str,
-)
+from ..compat import compat_str
+from ..networking import HEADRequest, Request
+from ..networking.exceptions import HTTPError
 from ..utils import (
     error_to_compat_str,
     ExtractorError,
     float_or_none,
-    HEADRequest,
     int_or_none,
     KNOWN_EXTENSIONS,
     mimetype2ext,
@@ -26,7 +24,6 @@
     update_url_query,
     url_or_none,
     urlhandle_detect_ext,
-    sanitized_Request,
 )
 
 
@@ -103,7 +100,7 @@ def _download_json(self, *args, **kwargs):
             try:
                 return super()._download_json(*args, **kwargs)
             except ExtractorError as e:
-                if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
+                if isinstance(e.cause, HTTPError) and e.cause.status in (401, 403):
                     self._store_client_id(None)
                     self._update_client_id()
                     continue
@@ -123,7 +120,7 @@ def _perform_login(self, username, password):
         self._access_token = password
         query = self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID
         payload = {'session': {'access_token': self._access_token}}
-        token_verification = sanitized_Request(self._API_VERIFY_AUTH_TOKEN % query, json.dumps(payload).encode('utf-8'))
+        token_verification = Request(self._API_VERIFY_AUTH_TOKEN % query, json.dumps(payload).encode('utf-8'))
         response = self._download_json(token_verification, None, note='Verifying login token...', fatal=False)
         if response is not False:
             self._HEADERS = {'Authorization': 'OAuth ' + self._access_token}
@@ -212,7 +209,7 @@ def _extract_info_dict(self, info, full_title=None, secret_token=None, extract_f
                 urlh = self._request_webpage(
                     HEADRequest(redirect_url), track_id, fatal=False)
                 if urlh:
-                    format_url = urlh.geturl()
+                    format_url = urlh.url
                     format_urls.add(format_url)
                     formats.append({
                         'format_id': 'download',
@@ -669,7 +666,7 @@ def _entries(self, url, playlist_id):
                 except ExtractorError as e:
                     # Downloading page may result in intermittent 502 HTTP error
                     # See https://github.com/yt-dlp/yt-dlp/issues/872
-                    if not isinstance(e.cause, compat_HTTPError) or e.cause.code != 502:
+                    if not isinstance(e.cause, HTTPError) or e.cause.status != 502:
                         raise
                     retry.error = e
                     continue
diff --git a/yt_dlp/extractor/teachable.py b/yt_dlp/extractor/teachable.py
index c212a4926e..01906bda9d 100644
--- a/yt_dlp/extractor/teachable.py
+++ b/yt_dlp/extractor/teachable.py
@@ -56,7 +56,7 @@ def is_logged(webpage):
             self._logged_in = True
             return
 
-        login_url = urlh.geturl()
+        login_url = urlh.url
 
         login_form = self._hidden_inputs(login_page)
 
diff --git a/yt_dlp/extractor/telemundo.py b/yt_dlp/extractor/telemundo.py
index 88f29cb83c..54e74a6c02 100644
--- a/yt_dlp/extractor/telemundo.py
+++ b/yt_dlp/extractor/telemundo.py
@@ -1,9 +1,6 @@
 from .common import InfoExtractor
-from ..utils import (
-    try_get,
-    unified_timestamp,
-    HEADRequest,
-)
+from ..networking import HEADRequest
+from ..utils import try_get, unified_timestamp
 
 
 class TelemundoIE(InfoExtractor):
@@ -38,7 +35,7 @@ def _real_extract(self, url):
 
         m3u8_url = self._request_webpage(HEADRequest(
             redirect_url + '?format=redirect&manifest=m3u&format=redirect&Tracking=true&Embedded=true&formats=MPEG4'),
-            video_id, 'Processing m3u8').geturl()
+            video_id, 'Processing m3u8').url
         formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
         date = unified_timestamp(try_get(
             metadata, lambda x: x['props']['initialState']['video']['associatedPlaylists'][0]['videos'][0]['datePublished'].split(' ', 1)[1]))
diff --git a/yt_dlp/extractor/tennistv.py b/yt_dlp/extractor/tennistv.py
index bc64226bf0..c1b4a33124 100644
--- a/yt_dlp/extractor/tennistv.py
+++ b/yt_dlp/extractor/tennistv.py
@@ -86,7 +86,7 @@ def _perform_login(self, username, password):
             })
 
         self.get_token(None, {
-            'code': urllib.parse.parse_qs(handle.geturl())['code'][-1],
+            'code': urllib.parse.parse_qs(handle.url)['code'][-1],
             'grant_type': 'authorization_code',
             'client_id': 'tennis-tv-web',
             'redirect_uri': 'https://www.tennistv.com/resources/v1.1.10/html/silent-check-sso.html'
diff --git a/yt_dlp/extractor/tenplay.py b/yt_dlp/extractor/tenplay.py
index 633032e310..c7097cf025 100644
--- a/yt_dlp/extractor/tenplay.py
+++ b/yt_dlp/extractor/tenplay.py
@@ -2,11 +2,8 @@
 import base64
 
 from .common import InfoExtractor
-from ..utils import (
-    HEADRequest,
-    int_or_none,
-    urlencode_postdata,
-)
+from ..networking import HEADRequest
+from ..utils import int_or_none, urlencode_postdata
 
 
 class TenPlayIE(InfoExtractor):
@@ -94,7 +91,7 @@ def _real_extract(self, url):
             data.get('playbackApiEndpoint'), content_id, 'Downloading video JSON',
             headers=headers).get('source')
         m3u8_url = self._request_webpage(HEADRequest(
-            _video_url), content_id).geturl()
+            _video_url), content_id).url
         if '10play-not-in-oz' in m3u8_url:
             self.raise_geo_restricted(countries=['AU'])
         formats = self._extract_m3u8_formats(m3u8_url, content_id, 'mp4')
diff --git a/yt_dlp/extractor/tfo.py b/yt_dlp/extractor/tfo.py
index a24789cb37..d417f50e10 100644
--- a/yt_dlp/extractor/tfo.py
+++ b/yt_dlp/extractor/tfo.py
@@ -1,12 +1,8 @@
 import json
 
 from .common import InfoExtractor
-from ..utils import (
-    HEADRequest,
-    ExtractorError,
-    int_or_none,
-    clean_html,
-)
+from ..networking import HEADRequest
+from ..utils import ExtractorError, clean_html, int_or_none
 
 
 class TFOIE(InfoExtractor):
diff --git a/yt_dlp/extractor/theplatform.py b/yt_dlp/extractor/theplatform.py
index e659b8ee10..537f6f6cd0 100644
--- a/yt_dlp/extractor/theplatform.py
+++ b/yt_dlp/extractor/theplatform.py
@@ -7,13 +7,13 @@
 
 from .once import OnceIE
 from .adobepass import AdobePassIE
+from ..networking import Request
 from ..utils import (
     determine_ext,
     ExtractorError,
     float_or_none,
     int_or_none,
     parse_qs,
-    sanitized_Request,
     unsmuggle_url,
     update_url_query,
     xpath_with_ns,
@@ -270,7 +270,7 @@ def _real_extract(self, url):
             source_url = smuggled_data.get('source_url')
             if source_url:
                 headers['Referer'] = source_url
-            request = sanitized_Request(url, headers=headers)
+            request = Request(url, headers=headers)
             webpage = self._download_webpage(request, video_id)
             smil_url = self._search_regex(
                 r'<link[^>]+href=(["\'])(?P<url>.+?)\1[^>]+type=["\']application/smil\+xml',
diff --git a/yt_dlp/extractor/thisoldhouse.py b/yt_dlp/extractor/thisoldhouse.py
index 55b6413ae6..cc7beeea52 100644
--- a/yt_dlp/extractor/thisoldhouse.py
+++ b/yt_dlp/extractor/thisoldhouse.py
@@ -1,5 +1,5 @@
 from .common import InfoExtractor
-from ..utils import HEADRequest
+from ..networking import HEADRequest
 
 
 class ThisOldHouseIE(InfoExtractor):
@@ -50,6 +50,6 @@ def _real_extract(self, url):
             r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?thisoldhouse\.(?:chorus\.build|com)/videos/zype/([0-9a-f]{24})[^\'"]*)[\'"]',
             webpage, 'video url')
         if 'subscription_required=true' in video_url or 'c-entry-group-labels__image' in webpage:
-            return self.url_result(self._request_webpage(HEADRequest(video_url), display_id).geturl(), 'Zype', display_id)
+            return self.url_result(self._request_webpage(HEADRequest(video_url), display_id).url, 'Zype', display_id)
         video_id = self._search_regex(r'(?:https?:)?//(?:www\.)?thisoldhouse\.(?:chorus\.build|com)/videos/zype/([0-9a-f]{24})', video_url, 'video id')
         return self.url_result(self._ZYPE_TMPL % video_id, 'Zype', video_id)
diff --git a/yt_dlp/extractor/threeqsdn.py b/yt_dlp/extractor/threeqsdn.py
index b1041902bf..7841f8da69 100644
--- a/yt_dlp/extractor/threeqsdn.py
+++ b/yt_dlp/extractor/threeqsdn.py
@@ -1,5 +1,5 @@
 from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..networking.exceptions import HTTPError
 from ..utils import (
     determine_ext,
     ExtractorError,
@@ -90,7 +90,7 @@ def _real_extract(self, url):
             config = self._download_json(
                 url.replace('://playout.3qsdn.com/', '://playout.3qsdn.com/config/'), video_id)
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+            if isinstance(e.cause, HTTPError) and e.cause.status == 401:
                 self.raise_geo_restricted()
             raise
 
diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py
index 2f491c3170..48de61f934 100644
--- a/yt_dlp/extractor/tiktok.py
+++ b/yt_dlp/extractor/tiktok.py
@@ -7,9 +7,9 @@
 
 from .common import InfoExtractor
 from ..compat import compat_urllib_parse_unquote, compat_urllib_parse_urlparse
+from ..networking import HEADRequest
 from ..utils import (
     ExtractorError,
-    HEADRequest,
     LazyList,
     UnsupportedError,
     UserNotLive,
@@ -1084,7 +1084,7 @@ class TikTokVMIE(InfoExtractor):
 
     def _real_extract(self, url):
         new_url = self._request_webpage(
-            HEADRequest(url), self._match_id(url), headers={'User-Agent': 'facebookexternalhit/1.1'}).geturl()
+            HEADRequest(url), self._match_id(url), headers={'User-Agent': 'facebookexternalhit/1.1'}).url
         if self.suitable(new_url):  # Prevent infinite loop in case redirect fails
             raise UnsupportedError(new_url)
         return self.url_result(new_url)
diff --git a/yt_dlp/extractor/toutv.py b/yt_dlp/extractor/toutv.py
index f60c199f0e..ced1224fad 100644
--- a/yt_dlp/extractor/toutv.py
+++ b/yt_dlp/extractor/toutv.py
@@ -1,7 +1,7 @@
 import json
 
 from .radiocanada import RadioCanadaIE
-from ..compat import compat_HTTPError
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     int_or_none,
@@ -52,8 +52,8 @@ def _perform_login(self, username, password):
                     'Content-Type': 'application/json;charset=utf-8',
                 })['access_token']
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
-                error = self._parse_json(e.cause.read().decode(), None)['Message']
+            if isinstance(e.cause, HTTPError) and e.cause.status == 401:
+                error = self._parse_json(e.cause.response.read().decode(), None)['Message']
                 raise ExtractorError(error, expected=True)
             raise
         self._claims = self._call_api('validation/v2/getClaims')['claims']
diff --git a/yt_dlp/extractor/triller.py b/yt_dlp/extractor/triller.py
index 6a4dadb9bd..c5d01c8271 100644
--- a/yt_dlp/extractor/triller.py
+++ b/yt_dlp/extractor/triller.py
@@ -3,9 +3,9 @@
 import re
 
 from .common import InfoExtractor
+from ..networking import HEADRequest
 from ..utils import (
     ExtractorError,
-    HEADRequest,
     UnsupportedError,
     determine_ext,
     int_or_none,
@@ -327,7 +327,7 @@ class TrillerShortIE(InfoExtractor):
     }]
 
     def _real_extract(self, url):
-        real_url = self._request_webpage(HEADRequest(url), self._match_id(url)).geturl()
+        real_url = self._request_webpage(HEADRequest(url), self._match_id(url)).url
         if self.suitable(real_url):  # Prevent infinite loop in case redirect fails
             raise UnsupportedError(real_url)
         return self.url_result(real_url)
diff --git a/yt_dlp/extractor/trueid.py b/yt_dlp/extractor/trueid.py
index 696343627b..86f0990e83 100644
--- a/yt_dlp/extractor/trueid.py
+++ b/yt_dlp/extractor/trueid.py
@@ -1,5 +1,5 @@
 from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..networking.exceptions import HTTPError
 from ..utils import (
     determine_ext,
     ExtractorError,
@@ -88,9 +88,9 @@ def _real_extract(self, url):
             stream_data = self._download_json(
                 f'https://{domain}/cmsPostProxy/contents/video/{video_id}/streamer?os=android', video_id, data=b'')['data']
         except ExtractorError as e:
-            if not isinstance(e.cause, compat_HTTPError):
+            if not isinstance(e.cause, HTTPError):
                 raise e
-            errmsg = self._parse_json(e.cause.read().decode(), video_id)['meta']['message']
+            errmsg = self._parse_json(e.cause.response.read().decode(), video_id)['meta']['message']
             if 'country' in errmsg:
                 self.raise_geo_restricted(
                     errmsg, [initial_data['display_country']] if initial_data.get('display_country') else None, True)
diff --git a/yt_dlp/extractor/tubetugraz.py b/yt_dlp/extractor/tubetugraz.py
index 2199fea19a..a351e4e550 100644
--- a/yt_dlp/extractor/tubetugraz.py
+++ b/yt_dlp/extractor/tubetugraz.py
@@ -22,7 +22,7 @@ def _perform_login(self, username, password):
             return
 
         content, urlh = self._download_webpage_handle(
-            urlh.geturl(), None, fatal=False, headers={'referer': urlh.geturl()},
+            urlh.url, None, fatal=False, headers={'referer': urlh.url},
             note='logging in', errnote='unable to log in',
             data=urlencode_postdata({
                 'lang': 'de',
@@ -30,7 +30,7 @@ def _perform_login(self, username, password):
                 'j_username': username,
                 'j_password': password
             }))
-        if not urlh or urlh.geturl() == 'https://tube.tugraz.at/paella/ui/index.html':
+        if not urlh or urlh.url == 'https://tube.tugraz.at/paella/ui/index.html':
             return
 
         if not self._html_search_regex(
@@ -40,14 +40,14 @@ def _perform_login(self, username, password):
             return
 
         content, urlh = self._download_webpage_handle(
-            urlh.geturl(), None, fatal=False, headers={'referer': urlh.geturl()},
+            urlh.url, None, fatal=False, headers={'referer': urlh.url},
             note='logging in with TFA', errnote='unable to log in with TFA',
             data=urlencode_postdata({
                 'lang': 'de',
                 '_eventId_proceed': '',
                 'j_tokenNumber': self._get_tfa_info(),
             }))
-        if not urlh or urlh.geturl() == 'https://tube.tugraz.at/paella/ui/index.html':
+        if not urlh or urlh.url == 'https://tube.tugraz.at/paella/ui/index.html':
             return
 
         self.report_warning('unable to login: incorrect TFA code')
diff --git a/yt_dlp/extractor/tubitv.py b/yt_dlp/extractor/tubitv.py
index de8b5da697..bd46bc3630 100644
--- a/yt_dlp/extractor/tubitv.py
+++ b/yt_dlp/extractor/tubitv.py
@@ -1,13 +1,13 @@
 import re
 
 from .common import InfoExtractor
+from ..networking import Request
 from ..utils import (
     ExtractorError,
     int_or_none,
     js_to_json,
-    sanitized_Request,
-    urlencode_postdata,
     traverse_obj,
+    urlencode_postdata,
 )
 
 
@@ -72,8 +72,8 @@ def _perform_login(self, username, password):
             'password': password,
         }
         payload = urlencode_postdata(form_data)
-        request = sanitized_Request(self._LOGIN_URL, payload)
-        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+        request = Request(self._LOGIN_URL, payload)
+        request.headers['Content-Type'] = 'application/x-www-form-urlencoded'
         login_page = self._download_webpage(
             request, None, False, 'Wrong login info')
         if not re.search(r'id="tubi-logout"', login_page):
diff --git a/yt_dlp/extractor/tumblr.py b/yt_dlp/extractor/tumblr.py
index 88d4ae32de..a26bdcaae7 100644
--- a/yt_dlp/extractor/tumblr.py
+++ b/yt_dlp/extractor/tumblr.py
@@ -274,7 +274,7 @@ def _real_extract(self, url):
         url = f'http://{blog}.tumblr.com/post/{video_id}/'
         webpage, urlh = self._download_webpage_handle(url, video_id)
 
-        redirect_url = urlh.geturl()
+        redirect_url = urlh.url
 
         api_only = bool(self._search_regex(
             r'(tumblr.com|^)/(safe-mode|login_required|blog/view)',
diff --git a/yt_dlp/extractor/tunein.py b/yt_dlp/extractor/tunein.py
index e02121bd8b..fd2fe132c1 100644
--- a/yt_dlp/extractor/tunein.py
+++ b/yt_dlp/extractor/tunein.py
@@ -225,10 +225,10 @@ def _real_extract(self, url):
         urlh = self._request_webpage(
             url, redirect_id, note='Downloading redirect page')
 
-        url = urlh.geturl()
+        url = urlh.url
         url_parsed = urllib.parse.urlparse(url)
         if url_parsed.port == 443:
-            url = url_parsed._replace(netloc=url_parsed.hostname).geturl()
+            url = url_parsed._replace(netloc=url_parsed.hostname).url
 
         self.to_screen('Following redirect: %s' % url)
         return self.url_result(url)
diff --git a/yt_dlp/extractor/tv2.py b/yt_dlp/extractor/tv2.py
index c51e633712..f6b452dc80 100644
--- a/yt_dlp/extractor/tv2.py
+++ b/yt_dlp/extractor/tv2.py
@@ -1,7 +1,7 @@
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..networking.exceptions import HTTPError
 from ..utils import (
     determine_ext,
     ExtractorError,
@@ -57,8 +57,8 @@ def _real_extract(self, url):
                                            headers={'content-type': 'application/json'},
                                            data='{"device":{"id":"1-1-1","name":"Nettleser (HTML)"}}'.encode())['playback']
             except ExtractorError as e:
-                if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
-                    error = self._parse_json(e.cause.read().decode(), video_id)['error']
+                if isinstance(e.cause, HTTPError) and e.cause.status == 401:
+                    error = self._parse_json(e.cause.response.read().decode(), video_id)['error']
                     error_code = error.get('code')
                     if error_code == 'ASSET_PLAYBACK_INVALID_GEO_LOCATION':
                         self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
@@ -211,8 +211,8 @@ def _real_extract(self, url):
                     api_base + '/play.json?protocol=%s&videoFormat=SMIL+ISMUSP' % protocol,
                     video_id, 'Downloading play JSON')['playback']
             except ExtractorError as e:
-                if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
-                    error = self._parse_json(e.cause.read().decode(), video_id)['error']
+                if isinstance(e.cause, HTTPError) and e.cause.status == 401:
+                    error = self._parse_json(e.cause.response.read().decode(), video_id)['error']
                     error_code = error.get('code')
                     if error_code == 'ASSET_PLAYBACK_INVALID_GEO_LOCATION':
                         self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
diff --git a/yt_dlp/extractor/tvp.py b/yt_dlp/extractor/tvp.py
index c686044fa2..2aa0dd870a 100644
--- a/yt_dlp/extractor/tvp.py
+++ b/yt_dlp/extractor/tvp.py
@@ -488,9 +488,9 @@ def _call_api(self, resource, video_id, query={}, **kwargs):
             f'{self._API_BASE_URL}/{resource}', video_id,
             query={'lang': 'pl', 'platform': 'BROWSER', **query},
             expected_status=lambda x: is_valid(x) or 400 <= x < 500, **kwargs)
-        if is_valid(urlh.getcode()):
+        if is_valid(urlh.status):
             return document
-        raise ExtractorError(f'Woronicza said: {document.get("code")} (HTTP {urlh.getcode()})')
+        raise ExtractorError(f'Woronicza said: {document.get("code")} (HTTP {urlh.status})')
 
     def _parse_video(self, video, with_url=True):
         info_dict = traverse_obj(video, {
diff --git a/yt_dlp/extractor/tvplay.py b/yt_dlp/extractor/tvplay.py
index e056f9872c..48a6efe1cc 100644
--- a/yt_dlp/extractor/tvplay.py
+++ b/yt_dlp/extractor/tvplay.py
@@ -1,10 +1,8 @@
 import re
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_HTTPError,
-    compat_urlparse,
-)
+from ..compat import compat_urlparse
+from ..networking.exceptions import HTTPError
 from ..utils import (
     determine_ext,
     ExtractorError,
@@ -129,8 +127,8 @@ def _real_extract(self, url):
                 'http://playapi.mtgx.tv/v3/videos/stream/%s' % video_id,
                 video_id, 'Downloading streams JSON')
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
-                msg = self._parse_json(e.cause.read().decode('utf-8'), video_id)
+            if isinstance(e.cause, HTTPError) and e.cause.status == 403:
+                msg = self._parse_json(e.cause.response.read().decode('utf-8'), video_id)
                 raise ExtractorError(msg['msg'], expected=True)
             raise
 
diff --git a/yt_dlp/extractor/tvplayer.py b/yt_dlp/extractor/tvplayer.py
index b05355f876..228c2366ed 100644
--- a/yt_dlp/extractor/tvplayer.py
+++ b/yt_dlp/extractor/tvplayer.py
@@ -1,8 +1,6 @@
 from .common import InfoExtractor
-from ..compat import (
-    compat_HTTPError,
-    compat_str,
-)
+from ..compat import compat_str
+from ..networking.exceptions import HTTPError
 from ..utils import (
     extract_attributes,
     try_get,
@@ -64,9 +62,9 @@ def _real_extract(self, url):
                     'validate': validate,
                 }))['tvplayer']['response']
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError):
+            if isinstance(e.cause, HTTPError):
                 response = self._parse_json(
-                    e.cause.read().decode(), resource_id)['tvplayer']['response']
+                    e.cause.response.read().decode(), resource_id)['tvplayer']['response']
                 raise ExtractorError(
                     '%s said: %s' % (self.IE_NAME, response['error']), expected=True)
             raise
diff --git a/yt_dlp/extractor/twitcasting.py b/yt_dlp/extractor/twitcasting.py
index 2548dae047..dff353a4f9 100644
--- a/yt_dlp/extractor/twitcasting.py
+++ b/yt_dlp/extractor/twitcasting.py
@@ -107,9 +107,9 @@ def _real_extract(self, url):
                 url, video_id, data=request_data,
                 headers={'Origin': 'https://twitcasting.tv'},
                 note='Trying video password')
-        if urlh.geturl() != url and request_data:
+        if urlh.url != url and request_data:
             webpage = self._download_webpage(
-                urlh.geturl(), video_id, data=request_data,
+                urlh.url, video_id, data=request_data,
                 headers={'Origin': 'https://twitcasting.tv'},
                 note='Retrying authentication')
         # has to check here as the first request can contain password input form even if the password is correct
diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py
index c8ee520144..3297ef0917 100644
--- a/yt_dlp/extractor/twitch.py
+++ b/yt_dlp/extractor/twitch.py
@@ -71,7 +71,7 @@ def login_step(page, urlh, note, data):
             form = self._hidden_inputs(page)
             form.update(data)
 
-            page_url = urlh.geturl()
+            page_url = urlh.url
             post_url = self._search_regex(
                 r'<form[^>]+action=(["\'])(?P<url>.+?)\1', page,
                 'post url', default=self._LOGIN_POST_URL, group='url')
diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py
index fc157ac228..4015277a86 100644
--- a/yt_dlp/extractor/twitter.py
+++ b/yt_dlp/extractor/twitter.py
@@ -1596,7 +1596,7 @@ def _real_extract(self, url):
         if eid:
             id = eid
             url = self._BASE_URL + id
-        new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).geturl()
+        new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).url
         __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
         if new_url.startswith(__UNSAFE_LINK):
             new_url = new_url.replace(__UNSAFE_LINK, "")
diff --git a/yt_dlp/extractor/udemy.py b/yt_dlp/extractor/udemy.py
index 329e5da2d9..5c296051af 100644
--- a/yt_dlp/extractor/udemy.py
+++ b/yt_dlp/extractor/udemy.py
@@ -1,8 +1,9 @@
 import re
-import urllib.request
 
 from .common import InfoExtractor
-from ..compat import compat_HTTPError, compat_str, compat_urlparse
+from ..compat import compat_str, compat_urlparse
+from ..networking import Request
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     determine_ext,
@@ -10,7 +11,6 @@
     float_or_none,
     int_or_none,
     js_to_json,
-    sanitized_Request,
     smuggle_url,
     try_get,
     unescapeHTML,
@@ -153,11 +153,10 @@ def _download_json(self, url_or_request, *args, **kwargs):
                 headers['X-Udemy-Bearer-Token'] = cookie.value
                 headers['X-Udemy-Authorization'] = 'Bearer %s' % cookie.value
 
-        if isinstance(url_or_request, urllib.request.Request):
-            for header, value in headers.items():
-                url_or_request.add_header(header, value)
+        if isinstance(url_or_request, Request):
+            url_or_request.headers.update(headers)
         else:
-            url_or_request = sanitized_Request(url_or_request, headers=headers)
+            url_or_request = Request(url_or_request, headers=headers)
 
         response = super(UdemyIE, self)._download_json(url_or_request, *args, **kwargs)
         self._handle_error(response)
@@ -212,7 +211,7 @@ def _real_extract(self, url):
             lecture = self._download_lecture(course_id, lecture_id)
         except ExtractorError as e:
             # Error could possibly mean we are not enrolled in the course
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+            if isinstance(e.cause, HTTPError) and e.cause.status == 403:
                 webpage = webpage or self._download_webpage(url, lecture_id)
                 self._enroll_course(url, webpage, course_id)
                 lecture = self._download_lecture(course_id, lecture_id)
diff --git a/yt_dlp/extractor/vevo.py b/yt_dlp/extractor/vevo.py
index da4ce49ca6..aa40227a76 100644
--- a/yt_dlp/extractor/vevo.py
+++ b/yt_dlp/extractor/vevo.py
@@ -2,10 +2,8 @@
 import json
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_str,
-    compat_HTTPError,
-)
+from ..compat import compat_str
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     int_or_none,
@@ -184,8 +182,8 @@ def _call_api(self, path, *args, **kwargs):
         try:
             data = self._download_json(self._api_url_template % path, *args, **kwargs)
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError):
-                errors = self._parse_json(e.cause.read().decode(), None)['errors']
+            if isinstance(e.cause, HTTPError):
+                errors = self._parse_json(e.cause.response.read().decode(), None)['errors']
                 error_message = ', '.join([error['message'] for error in errors])
                 raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
             raise
diff --git a/yt_dlp/extractor/vice.py b/yt_dlp/extractor/vice.py
index d1a3b48aac..8a71268539 100644
--- a/yt_dlp/extractor/vice.py
+++ b/yt_dlp/extractor/vice.py
@@ -7,10 +7,8 @@
 from .adobepass import AdobePassIE
 from .common import InfoExtractor
 from .youtube import YoutubeIE
-from ..compat import (
-    compat_HTTPError,
-    compat_str,
-)
+from ..compat import compat_str
+from ..networking.exceptions import HTTPError
 from ..utils import (
     clean_html,
     ExtractorError,
@@ -140,8 +138,8 @@ def _real_extract(self, url):
                 'https://vms.vice.com/%s/video/preplay/%s' % (locale, video_id),
                 video_id, query=query)
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401):
-                error = json.loads(e.cause.read().decode())
+            if isinstance(e.cause, HTTPError) and e.cause.status in (400, 401):
+                error = json.loads(e.cause.response.read().decode())
                 error_message = error.get('error_description') or error['details']
                 raise ExtractorError('%s said: %s' % (
                     self.IE_NAME, error_message), expected=True)
diff --git a/yt_dlp/extractor/videocampus_sachsen.py b/yt_dlp/extractor/videocampus_sachsen.py
index 982ab3dd08..37bc7d7181 100644
--- a/yt_dlp/extractor/videocampus_sachsen.py
+++ b/yt_dlp/extractor/videocampus_sachsen.py
@@ -2,7 +2,7 @@
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..networking.exceptions import HTTPError
 from ..utils import ExtractorError, OnDemandPagedList, urlencode_postdata
 
 
@@ -169,7 +169,7 @@ def _real_extract(self, url):
                 f'https://{host}/media/hlsMedium/key/{video_id}/format/auto/ext/mp4/learning/0/path/m3u8',
                 video_id, 'mp4', m3u8_id='hls', fatal=True)
         except ExtractorError as e:
-            if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (404, 500):
+            if not isinstance(e.cause, HTTPError) or e.cause.status not in (404, 500):
                 raise
 
         formats.append({'url': f'https://{host}/getMedium/{video_id}.mp4'})
diff --git a/yt_dlp/extractor/vidio.py b/yt_dlp/extractor/vidio.py
index 23e1aaf202..770aa284da 100644
--- a/yt_dlp/extractor/vidio.py
+++ b/yt_dlp/extractor/vidio.py
@@ -39,7 +39,7 @@ def is_logged_in():
         login_post, login_post_urlh = self._download_webpage_handle(
             self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(login_form), expected_status=[302, 401])
 
-        if login_post_urlh.getcode() == 401:
+        if login_post_urlh.status == 401:
             if get_element_by_class('onboarding-content-register-popup__title', login_post):
                 raise ExtractorError(
                     'Unable to log in: The provided email has not registered yet.', expected=True)
diff --git a/yt_dlp/extractor/vidlii.py b/yt_dlp/extractor/vidlii.py
index cde4274d9c..44353b7fc4 100644
--- a/yt_dlp/extractor/vidlii.py
+++ b/yt_dlp/extractor/vidlii.py
@@ -1,8 +1,8 @@
 import re
 
 from .common import InfoExtractor
+from ..networking import HEADRequest
 from ..utils import (
-    HEADRequest,
     format_field,
     float_or_none,
     get_element_by_id,
diff --git a/yt_dlp/extractor/viewlift.py b/yt_dlp/extractor/viewlift.py
index 3812601148..8f686f05db 100644
--- a/yt_dlp/extractor/viewlift.py
+++ b/yt_dlp/extractor/viewlift.py
@@ -1,7 +1,7 @@
 import json
 
 from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     int_or_none,
@@ -46,8 +46,8 @@ def _call_api(self, site, path, video_id, url, query):
             return self._download_json(
                 self._API_BASE + path, video_id, headers={'Authorization': self._TOKENS.get(site)}, query=query)
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
-                webpage = e.cause.read().decode()
+            if isinstance(e.cause, HTTPError) and e.cause.status == 403:
+                webpage = e.cause.response.read().decode()
                 try:
                     error_message = traverse_obj(json.loads(webpage), 'errorMessage', 'message')
                 except json.JSONDecodeError:
diff --git a/yt_dlp/extractor/viidea.py b/yt_dlp/extractor/viidea.py
index 4cdf2677b2..649ffe395b 100644
--- a/yt_dlp/extractor/viidea.py
+++ b/yt_dlp/extractor/viidea.py
@@ -2,10 +2,10 @@
 
 from .common import InfoExtractor
 from ..compat import (
-    compat_HTTPError,
     compat_str,
     compat_urlparse,
 )
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     js_to_json,
@@ -133,9 +133,9 @@ def _real_extract(self, url):
                 '%s/site/api/lecture/%s?format=json' % (base_url, lecture_id),
                 lecture_id)['lecture'][0]
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+            if isinstance(e.cause, HTTPError) and e.cause.status == 403:
                 msg = self._parse_json(
-                    e.cause.read().decode('utf-8'), lecture_id)
+                    e.cause.response.read().decode('utf-8'), lecture_id)
                 raise ExtractorError(msg['detail'], expected=True)
             raise
 
diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py
index d81d9c5518..e72fa50fa8 100644
--- a/yt_dlp/extractor/vimeo.py
+++ b/yt_dlp/extractor/vimeo.py
@@ -2,20 +2,16 @@
 import functools
 import re
 import itertools
-import urllib.error
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_HTTPError,
-    compat_str,
-    compat_urlparse,
-)
+from ..compat import compat_str, compat_urlparse
+from ..networking import HEADRequest, Request
+from ..networking.exceptions import HTTPError
 from ..utils import (
     clean_html,
     determine_ext,
     ExtractorError,
     get_element_by_class,
-    HEADRequest,
     js_to_json,
     int_or_none,
     merge_dicts,
@@ -23,7 +19,6 @@
     parse_filesize,
     parse_iso8601,
     parse_qs,
-    sanitized_Request,
     smuggle_url,
     str_or_none,
     try_get,
@@ -72,7 +67,7 @@ def _perform_login(self, username, password):
                     'Referer': self._LOGIN_URL,
                 })
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 418:
+            if isinstance(e.cause, HTTPError) and e.cause.status == 418:
                 raise ExtractorError(
                     'Unable to log in: bad username or password',
                     expected=True)
@@ -809,7 +804,7 @@ def _try_album_password(self, url):
                         'X-Requested-With': 'XMLHttpRequest',
                     })
             except ExtractorError as e:
-                if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+                if isinstance(e.cause, HTTPError) and e.cause.status == 401:
                     raise ExtractorError('Wrong password', expected=True)
                 raise
 
@@ -832,10 +827,10 @@ def _real_extract(self, url):
             # Retrieve video webpage to extract further information
             webpage, urlh = self._download_webpage_handle(
                 url, video_id, headers=headers)
-            redirect_url = urlh.geturl()
+            redirect_url = urlh.url
         except ExtractorError as ee:
-            if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
-                errmsg = ee.cause.read()
+            if isinstance(ee.cause, HTTPError) and ee.cause.status == 403:
+                errmsg = ee.cause.response.read()
                 if b'Because of its privacy settings, this video cannot be played here' in errmsg:
                     raise ExtractorError(
                         'Cannot download embed-only video without embedding '
@@ -1154,7 +1149,7 @@ def _fetch_page(self, album_id, authorization, hashed_pass, page):
                     'Authorization': 'jwt ' + authorization,
                 })['data']
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
+            if isinstance(e.cause, HTTPError) and e.cause.status == 400:
                 return
         for video in videos:
             link = video.get('link')
@@ -1196,7 +1191,7 @@ def _real_extract(self, url):
                         'X-Requested-With': 'XMLHttpRequest',
                     })['hashed_pass']
             except ExtractorError as e:
-                if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+                if isinstance(e.cause, HTTPError) and e.cause.status == 401:
                     raise ExtractorError('Wrong password', expected=True)
                 raise
         entries = OnDemandPagedList(functools.partial(
@@ -1309,10 +1304,10 @@ class VimeoWatchLaterIE(VimeoChannelIE):  # XXX: Do not subclass from concrete I
 
     def _page_url(self, base_url, pagenum):
         url = '%s/page:%d/' % (base_url, pagenum)
-        request = sanitized_Request(url)
+        request = Request(url)
         # Set the header to get a partial html page with the ids,
         # the normal page doesn't contain them.
-        request.add_header('X-Requested-With', 'XMLHttpRequest')
+        request.headers['X-Requested-With'] = 'XMLHttpRequest'
         return request
 
     def _real_extract(self, url):
@@ -1432,7 +1427,7 @@ def _real_extract(self, url):
                     **self._hidden_inputs(password_form),
                 }), note='Logging in with video password')
             except ExtractorError as e:
-                if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 418:
+                if isinstance(e.cause, HTTPError) and e.cause.status == 418:
                     raise ExtractorError('Wrong video password', expected=True)
                 raise
 
diff --git a/yt_dlp/extractor/vk.py b/yt_dlp/extractor/vk.py
index 5753690283..6b7379d46c 100644
--- a/yt_dlp/extractor/vk.py
+++ b/yt_dlp/extractor/vk.py
@@ -36,7 +36,7 @@ class VKBaseIE(InfoExtractor):
 
     def _download_webpage_handle(self, url_or_request, video_id, *args, fatal=True, **kwargs):
         response = super()._download_webpage_handle(url_or_request, video_id, *args, fatal=fatal, **kwargs)
-        challenge_url, cookie = response[1].geturl() if response else '', None
+        challenge_url, cookie = response[1].url if response else '', None
         if challenge_url.startswith('https://vk.com/429.html?'):
             cookie = self._get_cookies(challenge_url).get('hash429')
         if not cookie:
diff --git a/yt_dlp/extractor/vocaroo.py b/yt_dlp/extractor/vocaroo.py
index 704e25c227..d98fbfd2d9 100644
--- a/yt_dlp/extractor/vocaroo.py
+++ b/yt_dlp/extractor/vocaroo.py
@@ -1,8 +1,6 @@
 from .common import InfoExtractor
-from ..utils import (
-    HEADRequest,
-    float_or_none,
-)
+from ..networking import HEADRequest
+from ..utils import float_or_none
 
 
 class VocarooIE(InfoExtractor):
diff --git a/yt_dlp/extractor/vodlocker.py b/yt_dlp/extractor/vodlocker.py
index 1c7236ed31..b215d6c9d6 100644
--- a/yt_dlp/extractor/vodlocker.py
+++ b/yt_dlp/extractor/vodlocker.py
@@ -1,10 +1,6 @@
 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-    NO_DEFAULT,
-    sanitized_Request,
-    urlencode_postdata,
-)
+from ..networking import Request
+from ..utils import NO_DEFAULT, ExtractorError, urlencode_postdata
 
 
 class VodlockerIE(InfoExtractor):
@@ -37,8 +33,8 @@ def _real_extract(self, url):
         if fields['op'] == 'download1':
             self._sleep(3, video_id)  # they do detect when requests happen too fast!
             post = urlencode_postdata(fields)
-            req = sanitized_Request(url, post)
-            req.add_header('Content-type', 'application/x-www-form-urlencoded')
+            req = Request(url, post)
+            req.headers['Content-type'] = 'application/x-www-form-urlencoded'
             webpage = self._download_webpage(
                 req, video_id, 'Downloading video page')
 
diff --git a/yt_dlp/extractor/voot.py b/yt_dlp/extractor/voot.py
index dd41647aa9..b19a279344 100644
--- a/yt_dlp/extractor/voot.py
+++ b/yt_dlp/extractor/voot.py
@@ -1,10 +1,10 @@
 import json
 import time
-import urllib.error
 import uuid
 
 from .common import InfoExtractor
 from ..compat import compat_str
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     float_or_none,
@@ -140,7 +140,7 @@ def _real_extract(self, url):
                     'voottoken': self._TOKEN,
                 })['m3u8']
         except ExtractorError as e:
-            if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 400:
+            if isinstance(e.cause, HTTPError) and e.cause.status == 400:
                 self._check_token_expiry()
             raise
 
diff --git a/yt_dlp/extractor/vrt.py b/yt_dlp/extractor/vrt.py
index 0058357122..497233d95f 100644
--- a/yt_dlp/extractor/vrt.py
+++ b/yt_dlp/extractor/vrt.py
@@ -1,10 +1,10 @@
 import functools
 import json
 import time
-import urllib.error
 import urllib.parse
 
 from .gigya import GigyaBaseIE
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     clean_html,
@@ -263,7 +263,7 @@ def _perform_login(self, username, password):
                         '_csrf': self._get_cookies('https://login.vrt.be').get('OIDCXSRF').value,
                     }))
             except ExtractorError as e:
-                if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 401:
+                if isinstance(e.cause, HTTPError) and e.cause.status == 401:
                     retry.error = e
                     continue
                 raise
diff --git a/yt_dlp/extractor/vrv.py b/yt_dlp/extractor/vrv.py
index ad9dc568a6..523c442e65 100644
--- a/yt_dlp/extractor/vrv.py
+++ b/yt_dlp/extractor/vrv.py
@@ -8,7 +8,8 @@
 import urllib.parse
 
 from .common import InfoExtractor
-from ..compat import compat_HTTPError, compat_urllib_parse_urlencode
+from ..compat import compat_urllib_parse_urlencode
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     float_or_none,
@@ -54,8 +55,8 @@ def _call_api(self, path, video_id, note, data=None):
                 '?'.join([base_url, encoded_query]), video_id,
                 note='Downloading %s JSON metadata' % note, headers=headers, data=data)
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
-                raise ExtractorError(json.loads(e.cause.read().decode())['message'], expected=True)
+            if isinstance(e.cause, HTTPError) and e.cause.status == 401:
+                raise ExtractorError(json.loads(e.cause.response.read().decode())['message'], expected=True)
             raise
 
     def _call_cms(self, path, video_id, note):
diff --git a/yt_dlp/extractor/weibo.py b/yt_dlp/extractor/weibo.py
index 81a23b9df3..bc9a71abe0 100644
--- a/yt_dlp/extractor/weibo.py
+++ b/yt_dlp/extractor/weibo.py
@@ -31,7 +31,7 @@ def _real_extract(self, url):
         # to get Referer url for genvisitor
         webpage, urlh = self._download_webpage_handle(url, video_id)
 
-        visitor_url = urlh.geturl()
+        visitor_url = urlh.url
 
         if 'passport.weibo.com' in visitor_url:
             # first visit
diff --git a/yt_dlp/extractor/weverse.py b/yt_dlp/extractor/weverse.py
index 8f2a7ee06b..9a08b8e43b 100644
--- a/yt_dlp/extractor/weverse.py
+++ b/yt_dlp/extractor/weverse.py
@@ -5,13 +5,13 @@
 import json
 import re
 import time
-import urllib.error
 import urllib.parse
 import uuid
 
 from .common import InfoExtractor
 from .naver import NaverBaseIE
 from .youtube import YoutubeIE
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     UserNotLive,
@@ -59,7 +59,7 @@ def _perform_login(self, username, password):
                     'password': password,
                 }, separators=(',', ':')).encode(), headers=headers, note='Logging in')
         except ExtractorError as e:
-            if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 401:
+            if isinstance(e.cause, HTTPError) and e.cause.status == 401:
                 raise ExtractorError('Invalid password provided', expected=True)
             raise
 
@@ -97,10 +97,10 @@ def _call_api(self, ep, video_id, data=None, note='Downloading API JSON'):
                     'wmd': wmd,
                 })
         except ExtractorError as e:
-            if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 401:
+            if isinstance(e.cause, HTTPError) and e.cause.status == 401:
                 self.raise_login_required(
                     'Session token has expired. Log in again or refresh cookies in browser')
-            elif isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 403:
+            elif isinstance(e.cause, HTTPError) and e.cause.status == 403:
                 raise ExtractorError('Your account does not have access to this content', expected=True)
             raise
 
diff --git a/yt_dlp/extractor/wistia.py b/yt_dlp/extractor/wistia.py
index 884fa4b5fd..bce5e8326b 100644
--- a/yt_dlp/extractor/wistia.py
+++ b/yt_dlp/extractor/wistia.py
@@ -1,12 +1,12 @@
 import re
-import urllib.error
 import urllib.parse
 from base64 import b64decode
 
 from .common import InfoExtractor
+from ..networking import HEADRequest
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
-    HEADRequest,
     determine_ext,
     float_or_none,
     int_or_none,
@@ -365,7 +365,7 @@ def _real_extract(self, url):
 
         try:
             data = self._download_embed_config('channel', channel_id, url)
-        except (ExtractorError, urllib.error.HTTPError):
+        except (ExtractorError, HTTPError):
             # Some channels give a 403 from the JSON API
             self.report_warning('Failed to download channel data from API, falling back to webpage.')
             webpage = self._download_webpage(f'https://fast.wistia.net/embed/channel/{channel_id}', channel_id)
diff --git a/yt_dlp/extractor/wykop.py b/yt_dlp/extractor/wykop.py
index 0fa6d524db..1d29cc89b4 100644
--- a/yt_dlp/extractor/wykop.py
+++ b/yt_dlp/extractor/wykop.py
@@ -1,7 +1,7 @@
 import json
-import urllib.error
 
 from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     format_field,
@@ -43,7 +43,7 @@ def _call_api(self, path, video_id, note='Downloading JSON metadata'):
             try:
                 return self._do_call_api(path, video_id, note, headers={'Authorization': f'Bearer {token}'})
             except ExtractorError as e:
-                if not retrying and isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 403:
+                if not retrying and isinstance(e.cause, HTTPError) and e.cause.status == 403:
                     token = self._get_token(True)
                     continue
                 raise
diff --git a/yt_dlp/extractor/xhamster.py b/yt_dlp/extractor/xhamster.py
index 7af6c8f037..37224799bf 100644
--- a/yt_dlp/extractor/xhamster.py
+++ b/yt_dlp/extractor/xhamster.py
@@ -183,7 +183,7 @@ def get_height(s):
                         'height': get_height(quality),
                         'filesize': format_sizes.get(quality),
                         'http_headers': {
-                            'Referer': urlh.geturl(),
+                            'Referer': urlh.url,
                         },
                     })
             xplayer_sources = try_get(
diff --git a/yt_dlp/extractor/xtube.py b/yt_dlp/extractor/xtube.py
index ce4480c7d8..db82925896 100644
--- a/yt_dlp/extractor/xtube.py
+++ b/yt_dlp/extractor/xtube.py
@@ -2,12 +2,12 @@
 import re
 
 from .common import InfoExtractor
+from ..networking import Request
 from ..utils import (
     int_or_none,
     js_to_json,
     orderedSet,
     parse_duration,
-    sanitized_Request,
     str_to_int,
     url_or_none,
 )
@@ -186,7 +186,7 @@ def _real_extract(self, url):
 
         entries = []
         for pagenum in itertools.count(1):
-            request = sanitized_Request(
+            request = Request(
                 'http://www.xtube.com/profile/%s/videos/%d' % (user_id, pagenum),
                 headers={
                     'Cookie': 'popunder=4',
diff --git a/yt_dlp/extractor/yesjapan.py b/yt_dlp/extractor/yesjapan.py
index b45fa8f144..94e41660de 100644
--- a/yt_dlp/extractor/yesjapan.py
+++ b/yt_dlp/extractor/yesjapan.py
@@ -1,9 +1,6 @@
 from .common import InfoExtractor
-from ..utils import (
-    HEADRequest,
-    get_element_by_attribute,
-    parse_iso8601,
-)
+from ..networking import HEADRequest
+from ..utils import get_element_by_attribute, parse_iso8601
 
 
 class YesJapanIE(InfoExtractor):
@@ -42,7 +39,7 @@ def _real_extract(self, url):
         req = self._request_webpage(
             redirect_req, video_id, note='Resolving final URL', errnote='Could not resolve final URL', fatal=False)
         if req:
-            video_url = req.geturl()
+            video_url = req.url
 
         formats = [{
             'format_id': 'sd',
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index 826bbb20e1..2b3776aa1d 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -15,13 +15,13 @@
 import threading
 import time
 import traceback
-import urllib.error
 import urllib.parse
 
 from .common import InfoExtractor, SearchInfoExtractor
 from .openload import PhantomJSwrapper
 from ..compat import functools
 from ..jsinterp import JSInterpreter
+from ..networking.exceptions import HTTPError, network_exceptions
 from ..utils import (
     NO_DEFAULT,
     ExtractorError,
@@ -41,7 +41,6 @@
     join_nonempty,
     js_to_json,
     mimetype2ext,
-    network_exceptions,
     orderedSet,
     parse_codecs,
     parse_count,
@@ -959,15 +958,15 @@ def _extract_response(self, item_id, query, note='Downloading API JSON', headers
             except ExtractorError as e:
                 if not isinstance(e.cause, network_exceptions):
                     return self._error_or_warning(e, fatal=fatal)
-                elif not isinstance(e.cause, urllib.error.HTTPError):
+                elif not isinstance(e.cause, HTTPError):
                     retry.error = e
                     continue
 
-                first_bytes = e.cause.read(512)
+                first_bytes = e.cause.response.read(512)
                 if not is_html(first_bytes):
                     yt_error = try_get(
                         self._parse_json(
-                            self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
+                            self._webpage_read_content(e.cause.response, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
                         lambda x: x['error']['message'], str)
                     if yt_error:
                         self._report_alerts([('ERROR', yt_error)], fatal=False)
@@ -975,7 +974,7 @@ def _extract_response(self, item_id, query, note='Downloading API JSON', headers
                 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
                 # We also want to catch all other network exceptions since errors in later pages can be troublesome
                 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
-                if e.cause.code not in (403, 429):
+                if e.cause.status not in (403, 429):
                     retry.error = e
                     continue
                 return self._error_or_warning(e, fatal=fatal)
@@ -2837,7 +2836,7 @@ def _extract_sequence_from_mpd(refresh_sequence, immediate):
             # Obtain from MPD's maximum seq value
             old_mpd_url = mpd_url
             last_error = ctx.pop('last_error', None)
-            expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403
+            expire_fast = immediate or last_error and isinstance(last_error, HTTPError) and last_error.status == 403
             mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
                                                or (mpd_url, stream_number, False))
             if not refresh_sequence:
@@ -5263,7 +5262,7 @@ def _extract_webpage(self, url, item_id, fatal=True):
                 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
             except ExtractorError as e:
                 if isinstance(e.cause, network_exceptions):
-                    if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
+                    if not isinstance(e.cause, HTTPError) or e.cause.status not in (403, 429):
                         retry.error = e
                         continue
                 self._error_or_warning(e, fatal=fatal)
diff --git a/yt_dlp/extractor/zaiko.py b/yt_dlp/extractor/zaiko.py
index 84cee4445e..0ccacbb6aa 100644
--- a/yt_dlp/extractor/zaiko.py
+++ b/yt_dlp/extractor/zaiko.py
@@ -16,7 +16,7 @@
 class ZaikoBaseIE(InfoExtractor):
     def _download_real_webpage(self, url, video_id):
         webpage, urlh = self._download_webpage_handle(url, video_id)
-        final_url = urlh.geturl()
+        final_url = urlh.url
         if 'zaiko.io/login' in final_url:
             self.raise_login_required()
         elif '/_buy/' in final_url:
diff --git a/yt_dlp/extractor/zattoo.py b/yt_dlp/extractor/zattoo.py
index 22620c0a32..6bd9ea064e 100644
--- a/yt_dlp/extractor/zattoo.py
+++ b/yt_dlp/extractor/zattoo.py
@@ -2,7 +2,8 @@
 from uuid import uuid4
 
 from .common import InfoExtractor
-from ..compat import compat_HTTPError, compat_str
+from ..compat import compat_str
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     int_or_none,
@@ -36,7 +37,7 @@ def _perform_login(self, username, password):
                     'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
                 })
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
+            if isinstance(e.cause, HTTPError) and e.cause.status == 400:
                 raise ExtractorError(
                     'Unable to login: incorrect username and/or password',
                     expected=True)
diff --git a/yt_dlp/extractor/zype.py b/yt_dlp/extractor/zype.py
index 8cf994505a..2f3b4c47f5 100644
--- a/yt_dlp/extractor/zype.py
+++ b/yt_dlp/extractor/zype.py
@@ -1,7 +1,7 @@
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..networking.exceptions import HTTPError
 from ..utils import (
     dict_get,
     ExtractorError,
@@ -37,9 +37,9 @@ def _real_extract(self, url):
             response = self._download_json(re.sub(
                 r'\.(?:js|html)\?', '.json?', url), video_id)['response']
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401, 403):
+            if isinstance(e.cause, HTTPError) and e.cause.status in (400, 401, 403):
                 raise ExtractorError(self._parse_json(
-                    e.cause.read().decode(), video_id)['message'], expected=True)
+                    e.cause.response.read().decode(), video_id)['message'], expected=True)
             raise
 
         body = response['body']
diff --git a/yt_dlp/networking/common.py b/yt_dlp/networking/common.py
index e4b3628276..458eca39f8 100644
--- a/yt_dlp/networking/common.py
+++ b/yt_dlp/networking/common.py
@@ -24,6 +24,7 @@
 from ..utils import (
     bug_reports_message,
     classproperty,
+    deprecation_warning,
     error_to_str,
     escape_url,
     update_url_query,
@@ -507,16 +508,21 @@ def get_header(self, name, default=None):
     # The following methods are for compatability reasons and are deprecated
     @property
     def code(self):
+        deprecation_warning('Response.code is deprecated, use Response.status', stacklevel=2)
         return self.status
 
     def getcode(self):
+        deprecation_warning('Response.getcode() is deprecated, use Response.status', stacklevel=2)
         return self.status
 
     def geturl(self):
+        deprecation_warning('Response.geturl() is deprecated, use Response.url', stacklevel=2)
         return self.url
 
     def info(self):
+        deprecation_warning('Response.info() is deprecated, use Response.headers', stacklevel=2)
         return self.headers
 
     def getheader(self, name, default=None):
+        deprecation_warning('Response.getheader() is deprecated, use Response.get_header', stacklevel=2)
         return self.get_header(name, default)
diff --git a/yt_dlp/networking/exceptions.py b/yt_dlp/networking/exceptions.py
index 6fe8afb925..10afc9ccbf 100644
--- a/yt_dlp/networking/exceptions.py
+++ b/yt_dlp/networking/exceptions.py
@@ -3,7 +3,7 @@
 import typing
 import urllib.error
 
-from ..utils import YoutubeDLError
+from ..utils import YoutubeDLError, deprecation_warning
 
 if typing.TYPE_CHECKING:
     from .common import RequestHandler, Response
@@ -137,6 +137,7 @@ def reason(self, value):
 
     @property
     def headers(self):
+        deprecation_warning('HTTPError.headers is deprecated, use HTTPError.response.headers instead')
         return self._http_error.response.headers
 
     @headers.setter
@@ -144,16 +145,20 @@ def headers(self, value):
         return
 
     def info(self):
+        deprecation_warning('HTTPError.info() is deprecated, use HTTPError.response.headers instead')
         return self.response.headers
 
     def getcode(self):
+        deprecation_warning('HTTPError.getcode is deprecated, use HTTPError.status instead')
         return self.status
 
     def geturl(self):
+        deprecation_warning('HTTPError.geturl is deprecated, use HTTPError.response.url instead')
         return self.response.url
 
     @property
     def code(self):
+        deprecation_warning('HTTPError.code is deprecated, use HTTPError.status instead')
         return self.status
 
     @code.setter
@@ -162,6 +167,7 @@ def code(self, value):
 
     @property
     def url(self):
+        deprecation_warning('HTTPError.url is deprecated, use HTTPError.response.url instead')
         return self.response.url
 
     @url.setter
@@ -170,6 +176,7 @@ def url(self, value):
 
     @property
     def hdrs(self):
+        deprecation_warning('HTTPError.hdrs is deprecated, use HTTPError.response.headers instead')
         return self.response.headers
 
     @hdrs.setter
@@ -178,6 +185,7 @@ def hdrs(self, value):
 
     @property
     def filename(self):
+        deprecation_warning('HTTPError.filename is deprecated, use HTTPError.response.url instead')
         return self.response.url
 
     @filename.setter
@@ -185,6 +193,18 @@ def filename(self, value):
         return
 
     def __getattr__(self, name):
+        # File operations are passed through the response.
+        # Warn for some commonly used ones
+        passthrough_warnings = {
+            'read': 'response.read()',
+            # technically possibly due to passthrough, but we should discourage this
+            'get_header': 'response.get_header()',
+            'readable': 'response.readable()',
+            'closed': 'response.closed',
+            'tell': 'response.tell()',
+        }
+        if name in passthrough_warnings:
+            deprecation_warning(f'HTTPError.{name} is deprecated, use HTTPError.{passthrough_warnings[name]} instead')
         return super().__getattr__(name)
 
     def __str__(self):
diff --git a/yt_dlp/postprocessor/common.py b/yt_dlp/postprocessor/common.py
index 08b0fe1ff9..8cef86c43a 100644
--- a/yt_dlp/postprocessor/common.py
+++ b/yt_dlp/postprocessor/common.py
@@ -1,16 +1,15 @@
 import functools
 import json
 import os
-import urllib.error
 
+from ..networking import Request
+from ..networking.exceptions import HTTPError, network_exceptions
 from ..utils import (
     PostProcessingError,
     RetryManager,
     _configuration_args,
     deprecation_warning,
     encodeFilename,
-    network_exceptions,
-    sanitized_Request,
 )
 
 
@@ -203,13 +202,13 @@ def _download_json(self, url, *, expected_http_errors=(404,)):
         self.write_debug(f'{self.PP_NAME} query: {url}')
         for retry in RetryManager(self.get_param('extractor_retries', 3), self._retry_download):
             try:
-                rsp = self._downloader.urlopen(sanitized_Request(url))
+                rsp = self._downloader.urlopen(Request(url))
             except network_exceptions as e:
-                if isinstance(e, urllib.error.HTTPError) and e.code in expected_http_errors:
+                if isinstance(e, HTTPError) and e.status in expected_http_errors:
                     return None
                 retry.error = PostProcessingError(f'Unable to communicate with {self.PP_NAME} API: {e}')
                 continue
-        return json.loads(rsp.read().decode(rsp.info().get_param('charset') or 'utf-8'))
+        return json.loads(rsp.read().decode(rsp.headers.get_param('charset') or 'utf-8'))
 
 
 class AudioConversionError(PostProcessingError):  # Deprecated
diff --git a/yt_dlp/update.py b/yt_dlp/update.py
index 4790075eb6..d708b09e35 100644
--- a/yt_dlp/update.py
+++ b/yt_dlp/update.py
@@ -7,19 +7,18 @@
 import re
 import subprocess
 import sys
-import urllib.error
 from zipimport import zipimporter
 
 from .compat import functools  # isort: split
 from .compat import compat_realpath, compat_shlex_quote
+from .networking import Request
+from .networking.exceptions import HTTPError, network_exceptions
 from .utils import (
     Popen,
     cached_method,
     deprecation_warning,
-    network_exceptions,
     remove_end,
     remove_start,
-    sanitized_Request,
     shell_quote,
     system_identifier,
     version_tuple,
@@ -190,7 +189,7 @@ def _tag(self):
     def _get_version_info(self, tag):
         url = f'{API_BASE_URL}/{self._target_repo}/releases/{tag}'
         self.ydl.write_debug(f'Fetching release info: {url}')
-        return json.loads(self.ydl.urlopen(sanitized_Request(url, headers={
+        return json.loads(self.ydl.urlopen(Request(url, headers={
             'Accept': 'application/vnd.github+json',
             'User-Agent': 'yt-dlp',
             'X-GitHub-Api-Version': '2022-11-28',
@@ -315,7 +314,7 @@ def update(self):
         try:
             newcontent = self._download(self.release_name, self._tag)
         except network_exceptions as e:
-            if isinstance(e, urllib.error.HTTPError) and e.code == 404:
+            if isinstance(e, HTTPError) and e.status == 404:
                 return self._report_error(
                     f'The requested tag {self._label(self.target_channel, self.target_tag)} does not exist', True)
             return self._report_network_error(f'fetch updates: {e}')
diff --git a/yt_dlp/utils/_deprecated.py b/yt_dlp/utils/_deprecated.py
index e55d42354a..a8ae8ecb5d 100644
--- a/yt_dlp/utils/_deprecated.py
+++ b/yt_dlp/utils/_deprecated.py
@@ -10,16 +10,6 @@
 
 
 from ._utils import preferredencoding
-from ..networking._urllib import HTTPHandler
-
-# isort: split
-from .networking import random_user_agent, std_headers  # noqa: F401
-from ..networking._urllib import PUTRequest  # noqa: F401
-from ..networking._urllib import SUPPORTED_ENCODINGS, HEADRequest  # noqa: F401
-from ..networking._urllib import ProxyHandler as PerRequestProxyHandler  # noqa: F401
-from ..networking._urllib import RedirectHandler as YoutubeDLRedirectHandler  # noqa: F401
-from ..networking._urllib import make_socks_conn_class, update_Request  # noqa: F401
-from ..networking.exceptions import network_exceptions  # noqa: F401
 
 
 def encodeFilename(s, for_subprocess=False):
@@ -47,12 +37,3 @@ def decodeOption(optval):
 
 def error_to_compat_str(err):
     return str(err)
-
-
-class YoutubeDLHandler(HTTPHandler):
-    def __init__(self, params, *args, **kwargs):
-        self._params = params
-        super().__init__(*args, **kwargs)
-
-
-YoutubeDLHTTPSHandler = YoutubeDLHandler
diff --git a/yt_dlp/utils/_legacy.py b/yt_dlp/utils/_legacy.py
index 96ac468b1f..0770009717 100644
--- a/yt_dlp/utils/_legacy.py
+++ b/yt_dlp/utils/_legacy.py
@@ -1,17 +1,30 @@
 """No longer used and new code should not use. Exists only for API compat."""
-
 import platform
 import struct
 import sys
+import urllib.error
 import urllib.parse
+import urllib.request
 import zlib
 
 from ._utils import Popen, decode_base_n, preferredencoding
 from .traversal import traverse_obj
 from ..dependencies import certifi, websockets
+from ..networking._helper import make_ssl_context
+from ..networking._urllib import HTTPHandler
 
 # isort: split
+from .networking import random_user_agent, std_headers  # noqa: F401
 from ..cookies import YoutubeDLCookieJar  # noqa: F401
+from ..networking._urllib import PUTRequest  # noqa: F401
+from ..networking._urllib import SUPPORTED_ENCODINGS, HEADRequest  # noqa: F401
+from ..networking._urllib import ProxyHandler as PerRequestProxyHandler  # noqa: F401
+from ..networking._urllib import RedirectHandler as YoutubeDLRedirectHandler  # noqa: F401
+from ..networking._urllib import (  # noqa: F401
+    make_socks_conn_class,
+    update_Request,
+)
+from ..networking.exceptions import HTTPError, network_exceptions  # noqa: F401
 
 has_certifi = bool(certifi)
 has_websockets = bool(websockets)
@@ -176,5 +189,52 @@ def handle_youtubedl_headers(headers):
     return filtered_headers
 
 
+def request_to_url(req):
+    if isinstance(req, urllib.request.Request):
+        return req.get_full_url()
+    else:
+        return req
+
+
+def sanitized_Request(url, *args, **kwargs):
+    from ..utils import escape_url, extract_basic_auth, sanitize_url
+    url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
+    if auth_header is not None:
+        headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
+        headers['Authorization'] = auth_header
+    return urllib.request.Request(url, *args, **kwargs)
+
+
+class YoutubeDLHandler(HTTPHandler):
+    def __init__(self, params, *args, **kwargs):
+        self._params = params
+        super().__init__(*args, **kwargs)
+
+
+YoutubeDLHTTPSHandler = YoutubeDLHandler
+
+
+class YoutubeDLCookieProcessor(urllib.request.HTTPCookieProcessor):
+    def __init__(self, cookiejar=None):
+        urllib.request.HTTPCookieProcessor.__init__(self, cookiejar)
+
+    def http_response(self, request, response):
+        return urllib.request.HTTPCookieProcessor.http_response(self, request, response)
+
+    https_request = urllib.request.HTTPCookieProcessor.http_request
+    https_response = http_response
+
+
+def make_HTTPS_handler(params, **kwargs):
+    return YoutubeDLHTTPSHandler(params, context=make_ssl_context(
+        verify=not params.get('nocheckcertificate'),
+        client_certificate=params.get('client_certificate'),
+        client_certificate_key=params.get('client_certificate_key'),
+        client_certificate_password=params.get('client_certificate_password'),
+        legacy_support=params.get('legacyserverconnect'),
+        use_certifi='no-certifi' not in params.get('compat_opts', []),
+    ), **kwargs)
+
+
 def process_communicate_or_kill(p, *args, **kwargs):
     return Popen.communicate_or_kill(p, *args, **kwargs)
diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py
index d0e3287166..2e619f9ea4 100644
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@@ -62,11 +62,6 @@
 compiled_regex_type = type(re.compile(''))
 
 
-USER_AGENTS = {
-    'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
-}
-
-
 class NO_DEFAULT:
     pass
 
@@ -727,14 +722,6 @@ def extract_basic_auth(url):
     return url, f'Basic {auth_payload.decode()}'
 
 
-def sanitized_Request(url, *args, **kwargs):
-    url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
-    if auth_header is not None:
-        headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
-        headers['Authorization'] = auth_header
-    return urllib.request.Request(url, *args, **kwargs)
-
-
 def expand_path(s):
     """Expand shell variables and ~"""
     return os.path.expandvars(compat_expanduser(s))
@@ -894,19 +881,6 @@ def formatSeconds(secs, delim=':', msec=False):
     return '%s.%03d' % (ret, time.milliseconds) if msec else ret
 
 
-def make_HTTPS_handler(params, **kwargs):
-    from ._deprecated import YoutubeDLHTTPSHandler
-    from ..networking._helper import make_ssl_context
-    return YoutubeDLHTTPSHandler(params, context=make_ssl_context(
-        verify=not params.get('nocheckcertificate'),
-        client_certificate=params.get('client_certificate'),
-        client_certificate_key=params.get('client_certificate_key'),
-        client_certificate_password=params.get('client_certificate_password'),
-        legacy_support=params.get('legacyserverconnect'),
-        use_certifi='no-certifi' not in params.get('compat_opts', []),
-    ), **kwargs)
-
-
 def bug_reports_message(before=';'):
     from ..update import REPOSITORY
 
@@ -1143,17 +1117,6 @@ def is_path_like(f):
     return isinstance(f, (str, bytes, os.PathLike))
 
 
-class YoutubeDLCookieProcessor(urllib.request.HTTPCookieProcessor):
-    def __init__(self, cookiejar=None):
-        urllib.request.HTTPCookieProcessor.__init__(self, cookiejar)
-
-    def http_response(self, request, response):
-        return urllib.request.HTTPCookieProcessor.http_response(self, request, response)
-
-    https_request = urllib.request.HTTPCookieProcessor.http_request
-    https_response = http_response
-
-
 def extract_timezone(date_str):
     m = re.search(
         r'''(?x)
@@ -1455,6 +1418,7 @@ def write_string(s, out=None, encoding=None):
     out.flush()
 
 
+# TODO: Use global logger
 def deprecation_warning(msg, *, printer=None, stacklevel=0, **kwargs):
     from .. import _IN_CLI
     if _IN_CLI:
@@ -2005,13 +1969,6 @@ def url_or_none(url):
     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
 
 
-def request_to_url(req):
-    if isinstance(req, urllib.request.Request):
-        return req.get_full_url()
-    else:
-        return req
-
-
 def strftime_or_none(timestamp, date_format='%Y%m%d', default=None):
     datetime_object = None
     try:
@@ -5525,7 +5482,7 @@ def info(self, message):
 
     def warning(self, message, *, once=False):
         if self._ydl:
-            self._ydl.report_warning(message, only_once=once)
+            self._ydl.report_warning(message, once)
 
     def error(self, message, *, is_error=True):
         if self._ydl:

From 131d132da5c98c6c78bd7eed4b37f4458561b3d9 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Sat, 15 Jul 2023 16:39:45 +0530
Subject: [PATCH 047/218] [build] Make sure deprecated modules are added

---
 setup.py                            | 3 ++-
 yt_dlp/__pyinstaller/hook-yt_dlp.py | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index ccfcf42520..a2f9f55c36 100644
--- a/setup.py
+++ b/setup.py
@@ -65,7 +65,8 @@ def py2exe_params():
             'excludes': ['Crypto', 'Cryptodome'],  # py2exe cannot import Crypto
             'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'],
             # Modules that are only imported dynamically must be added here
-            'includes': ['yt_dlp.compat._legacy'],
+            'includes': ['yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated',
+                         'yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated'],
         },
         'zipfile': None,
     }
diff --git a/yt_dlp/__pyinstaller/hook-yt_dlp.py b/yt_dlp/__pyinstaller/hook-yt_dlp.py
index 63dcdffe02..88c2b8b285 100644
--- a/yt_dlp/__pyinstaller/hook-yt_dlp.py
+++ b/yt_dlp/__pyinstaller/hook-yt_dlp.py
@@ -18,7 +18,8 @@ def pycryptodome_module():
 
 
 def get_hidden_imports():
-    yield 'yt_dlp.compat._legacy'
+    yield from ('yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated')
+    yield from ('yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated')
     yield pycryptodome_module()
     yield from collect_submodules('websockets')
     # These are auto-detected, but explicitly add them just in case

From 2b029ca0a9f9105c4f7626993fa60e54c9782749 Mon Sep 17 00:00:00 2001
From: Aaruni Kaushik <aaruni96@users.noreply.github.com>
Date: Sat, 15 Jul 2023 21:15:08 +0200
Subject: [PATCH 048/218] [cleanup] Add color to `download-archive` message
 (#5138)

Authored by: aaruni96, Grub4K, pukkandan
Closes #4913
---
 yt_dlp/YoutubeDL.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index 850eb8ae0a..c49960782d 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -1492,7 +1492,10 @@ def check_filter():
             return ret
 
         if self.in_download_archive(info_dict):
-            reason = '%s has already been recorded in the archive' % video_title
+            reason = ''.join((
+                format_field(info_dict, 'id', f'{self._format_screen("%s", self.Styles.ID)}: '),
+                format_field(info_dict, 'title', f'{self._format_screen("%s", self.Styles.EMPHASIS)} '),
+                'has already been recorded in the archive'))
             break_opt, break_err = 'break_on_existing', ExistingVideoReached
         else:
             try:
@@ -1553,7 +1556,8 @@ def extract_info(self, url, download=True, ie_key=None, extra_info=None,
 
             temp_id = ie.get_temp_id(url)
             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
-                self.to_screen(f'[{key}] {temp_id}: has already been recorded in the archive')
+                self.to_screen(f'[download] {self._format_screen(temp_id, self.Styles.ID)}: '
+                               'has already been recorded in the archive')
                 if self.params.get('break_on_existing', False):
                     raise ExistingVideoReached()
                 break

From 6c5211cebeacfc53ad5d5ddf4a659be76039656f Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@bashonly.com>
Date: Sat, 15 Jul 2023 15:22:10 -0500
Subject: [PATCH 049/218] [core] Fix HTTP headers and cookie handling

- Remove `Cookie` header from `http_headers` immediately after loading into cookiejar
- Restore compat for `--load-info-json` cookies
- Add more tests
- Fix improper passing of Cookie header by `MailRu` extractor

Closes #7558
Authored by: bashonly, pukkandan
---
 test/test_YoutubeDL.py          | 85 +++++++++++++++++++++++++++++----
 test/test_YoutubeDLCookieJar.py |  8 ++++
 yt_dlp/YoutubeDL.py             | 46 ++++++++++++------
 yt_dlp/downloader/common.py     |  6 ---
 yt_dlp/extractor/mailru.py      |  8 ++--
 5 files changed, 120 insertions(+), 33 deletions(-)

diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index c15c7704c5..b4f770ca58 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -11,7 +11,7 @@
 import copy
 import json
 
-from test.helper import FakeYDL, assertRegexpMatches
+from test.helper import FakeYDL, assertRegexpMatches, try_rm
 from yt_dlp import YoutubeDL
 from yt_dlp.compat import compat_os_name
 from yt_dlp.extractor import YoutubeIE
@@ -24,6 +24,8 @@
     int_or_none,
     match_filter_func,
 )
+from yt_dlp.utils.traversal import traverse_obj
+
 
 TEST_URL = 'http://localhost/sample.mp4'
 
@@ -1227,10 +1229,10 @@ def cookie(name, value, version=None, domain='', path='', secure=False, expires=
 
         _test_url = 'https://yt.dlp/test'
 
-        def test(encoded_cookies, cookies, headers=False, round_trip=None, error=None):
+        def test(encoded_cookies, cookies, *, headers=False, round_trip=None, error_re=None):
             def _test():
                 ydl.cookiejar.clear()
-                ydl._load_cookies(encoded_cookies, from_headers=headers)
+                ydl._load_cookies(encoded_cookies, autoscope=headers)
                 if headers:
                     ydl._apply_header_cookies(_test_url)
                 data = {'url': _test_url}
@@ -1245,14 +1247,14 @@ def _test():
                 ydl.__dict__['_YoutubeDL__header_cookies'] = []
 
             with self.subTest(msg=encoded_cookies):
-                if not error:
+                if not error_re:
                     _test()
                     return
-                with self.assertRaisesRegex(Exception, error):
+                with self.assertRaisesRegex(Exception, error_re):
                     _test()
 
         test('test=value; Domain=.yt.dlp', [cookie('test', 'value', domain='.yt.dlp')])
-        test('test=value', [cookie('test', 'value')], error='Unscoped cookies are not allowed')
+        test('test=value', [cookie('test', 'value')], error_re=r'Unscoped cookies are not allowed')
         test('cookie1=value1; Domain=.yt.dlp; Path=/test; cookie2=value2; Domain=.yt.dlp; Path=/', [
             cookie('cookie1', 'value1', domain='.yt.dlp', path='/test'),
             cookie('cookie2', 'value2', domain='.yt.dlp', path='/')])
@@ -1265,9 +1267,76 @@ def _test():
              round_trip='name=""; Domain=.yt.dlp')
 
         test('test=value', [cookie('test', 'value', domain='.yt.dlp')], headers=True)
-        test('cookie1=value; Domain=.yt.dlp; cookie2=value', [], headers=True, error='Invalid syntax')
+        test('cookie1=value; Domain=.yt.dlp; cookie2=value', [], headers=True, error_re=r'Invalid syntax')
         ydl.deprecated_feature = ydl.report_error
-        test('test=value', [], headers=True, error='Passing cookies as a header is a potential security risk')
+        test('test=value', [], headers=True, error_re=r'Passing cookies as a header is a potential security risk')
+
+    def test_infojson_cookies(self):
+        TEST_FILE = 'test_infojson_cookies.info.json'
+        TEST_URL = 'https://example.com/example.mp4'
+        COOKIES = 'a=b; Domain=.example.com; c=d; Domain=.example.com'
+        COOKIE_HEADER = {'Cookie': 'a=b; c=d'}
+
+        ydl = FakeYDL()
+        ydl.process_info = lambda x: ydl._write_info_json('test', x, TEST_FILE)
+
+        def make_info(info_header_cookies=False, fmts_header_cookies=False, cookies_field=False):
+            fmt = {'url': TEST_URL}
+            if fmts_header_cookies:
+                fmt['http_headers'] = COOKIE_HEADER
+            if cookies_field:
+                fmt['cookies'] = COOKIES
+            return _make_result([fmt], http_headers=COOKIE_HEADER if info_header_cookies else None)
+
+        def test(initial_info, note):
+            result = {}
+            result['processed'] = ydl.process_ie_result(initial_info)
+            self.assertTrue(ydl.cookiejar.get_cookies_for_url(TEST_URL),
+                            msg=f'No cookies set in cookiejar after initial process when {note}')
+            ydl.cookiejar.clear()
+            with open(TEST_FILE) as infojson:
+                result['loaded'] = ydl.sanitize_info(json.load(infojson), True)
+            result['final'] = ydl.process_ie_result(result['loaded'].copy(), download=False)
+            self.assertTrue(ydl.cookiejar.get_cookies_for_url(TEST_URL),
+                            msg=f'No cookies set in cookiejar after final process when {note}')
+            ydl.cookiejar.clear()
+            for key in ('processed', 'loaded', 'final'):
+                info = result[key]
+                self.assertIsNone(
+                    traverse_obj(info, ((None, ('formats', 0)), 'http_headers', 'Cookie'), casesense=False, get_all=False),
+                    msg=f'Cookie header not removed in {key} result when {note}')
+                self.assertEqual(
+                    traverse_obj(info, ((None, ('formats', 0)), 'cookies'), get_all=False), COOKIES,
+                    msg=f'No cookies field found in {key} result when {note}')
+
+        test({'url': TEST_URL, 'http_headers': COOKIE_HEADER, 'id': '1', 'title': 'x'}, 'no formats field')
+        test(make_info(info_header_cookies=True), 'info_dict header cokies')
+        test(make_info(fmts_header_cookies=True), 'format header cookies')
+        test(make_info(info_header_cookies=True, fmts_header_cookies=True), 'info_dict and format header cookies')
+        test(make_info(info_header_cookies=True, fmts_header_cookies=True, cookies_field=True), 'all cookies fields')
+        test(make_info(cookies_field=True), 'cookies format field')
+        test({'url': TEST_URL, 'cookies': COOKIES, 'id': '1', 'title': 'x'}, 'info_dict cookies field only')
+
+        try_rm(TEST_FILE)
+
+    def test_add_headers_cookie(self):
+        def check_for_cookie_header(result):
+            return traverse_obj(result, ((None, ('formats', 0)), 'http_headers', 'Cookie'), casesense=False, get_all=False)
+
+        ydl = FakeYDL({'http_headers': {'Cookie': 'a=b'}})
+        ydl._apply_header_cookies(_make_result([])['webpage_url'])  # Scope to input webpage URL: .example.com
+
+        fmt = {'url': 'https://example.com/video.mp4'}
+        result = ydl.process_ie_result(_make_result([fmt]), download=False)
+        self.assertIsNone(check_for_cookie_header(result), msg='http_headers cookies in result info_dict')
+        self.assertEqual(result.get('cookies'), 'a=b; Domain=.example.com', msg='No cookies were set in cookies field')
+        self.assertIn('a=b', ydl.cookiejar.get_cookie_header(fmt['url']), msg='No cookies were set in cookiejar')
+
+        fmt = {'url': 'https://wrong.com/video.mp4'}
+        result = ydl.process_ie_result(_make_result([fmt]), download=False)
+        self.assertIsNone(check_for_cookie_header(result), msg='http_headers cookies for wrong domain')
+        self.assertFalse(result.get('cookies'), msg='Cookies set in cookies field for wrong domain')
+        self.assertFalse(ydl.cookiejar.get_cookie_header(fmt['url']), msg='Cookies set in cookiejar for wrong domain')
 
 
 if __name__ == '__main__':
diff --git a/test/test_YoutubeDLCookieJar.py b/test/test_YoutubeDLCookieJar.py
index 2c73d7d853..0b7a0acdb5 100644
--- a/test/test_YoutubeDLCookieJar.py
+++ b/test/test_YoutubeDLCookieJar.py
@@ -53,6 +53,14 @@ def test_get_cookie_header(self):
         header = cookiejar.get_cookie_header('https://www.foobar.foobar')
         self.assertIn('HTTPONLY_COOKIE', header)
 
+    def test_get_cookies_for_url(self):
+        cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt')
+        cookiejar.load(ignore_discard=True, ignore_expires=True)
+        cookies = cookiejar.get_cookies_for_url('https://www.foobar.foobar/')
+        self.assertEqual(len(cookies), 2)
+        cookies = cookiejar.get_cookies_for_url('https://foobar.foobar/')
+        self.assertFalse(cookies)
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index c49960782d..1a2f42fe9a 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -680,14 +680,15 @@ def process_color_policy(stream):
 
         self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
         self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
+        self.__header_cookies = []
+        self._load_cookies(self.params['http_headers'].get('Cookie'))  # compat
+        self.params['http_headers'].pop('Cookie', None)
+
         self._request_director = self.build_request_director(
             sorted(_REQUEST_HANDLERS.values(), key=lambda rh: rh.RH_NAME.lower()))
         if auto_init and auto_init != 'no_verbose_header':
             self.print_debug_header()
 
-        self.__header_cookies = []
-        self._load_cookies(traverse_obj(self.params.get('http_headers'), 'cookie', casesense=False))  # compat
-
         def check_deprecated(param, option, suggestion):
             if self.params.get(param) is not None:
                 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
@@ -1645,18 +1646,19 @@ def progress(msg):
                 self.to_screen('')
             raise
 
-    def _load_cookies(self, data, *, from_headers=True):
+    def _load_cookies(self, data, *, autoscope=True):
         """Loads cookies from a `Cookie` header
 
         This tries to work around the security vulnerability of passing cookies to every domain.
         See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
-        The unscoped cookies are saved for later to be stored in the jar with a limited scope.
 
         @param data         The Cookie header as string to load the cookies from
-        @param from_headers If `False`, allows Set-Cookie syntax in the cookie string (at least a domain will be required)
+        @param autoscope    If `False`, scope cookies using Set-Cookie syntax and error for cookie without domains
+                            If `True`, save cookies for later to be stored in the jar with a limited scope
+                            If a URL, save cookies in the jar with the domain of the URL
         """
         for cookie in LenientSimpleCookie(data).values():
-            if from_headers and any(cookie.values()):
+            if autoscope and any(cookie.values()):
                 raise ValueError('Invalid syntax in Cookie Header')
 
             domain = cookie.get('domain') or ''
@@ -1670,17 +1672,23 @@ def _load_cookies(self, data, *, from_headers=True):
 
             if domain:
                 self.cookiejar.set_cookie(prepared_cookie)
-            elif from_headers:
+            elif autoscope is True:
                 self.deprecated_feature(
                     'Passing cookies as a header is a potential security risk; '
                     'they will be scoped to the domain of the downloaded urls. '
                     'Please consider loading cookies from a file or browser instead.')
                 self.__header_cookies.append(prepared_cookie)
+            elif autoscope:
+                self.report_warning(
+                    'The extractor result contains an unscoped cookie as an HTTP header. '
+                    f'If you are using yt-dlp with an input URL{bug_reports_message(before=",")}',
+                    only_once=True)
+                self._apply_header_cookies(autoscope, [prepared_cookie])
             else:
                 self.report_error('Unscoped cookies are not allowed; please specify some sort of scoping',
                                   tb=False, is_error=False)
 
-    def _apply_header_cookies(self, url):
+    def _apply_header_cookies(self, url, cookies=None):
         """Applies stray header cookies to the provided url
 
         This loads header cookies and scopes them to the domain provided in `url`.
@@ -1691,7 +1699,7 @@ def _apply_header_cookies(self, url):
         if not parsed.hostname:
             return
 
-        for cookie in map(copy.copy, self.__header_cookies):
+        for cookie in map(copy.copy, cookies or self.__header_cookies):
             cookie.domain = f'.{parsed.hostname}'
             self.cookiejar.set_cookie(cookie)
 
@@ -2481,9 +2489,16 @@ def restore_last_token(self):
         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
         return _build_selector_function(parsed_selector)
 
-    def _calc_headers(self, info_dict):
+    def _calc_headers(self, info_dict, load_cookies=False):
         res = HTTPHeaderDict(self.params['http_headers'], info_dict.get('http_headers'))
         clean_headers(res)
+
+        if load_cookies:  # For --load-info-json
+            self._load_cookies(res.get('Cookie'), autoscope=info_dict['url'])  # compat
+            self._load_cookies(info_dict.get('cookies'), autoscope=False)
+        # The `Cookie` header is removed to prevent leaks and unscoped cookies.
+        # See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
+        res.pop('Cookie', None)
         cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
         if cookies:
             encoder = LenientSimpleCookie()
@@ -2762,7 +2777,12 @@ def is_wellformed(f):
                     and info_dict.get('duration') and format.get('tbr')
                     and not format.get('filesize') and not format.get('filesize_approx')):
                 format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
-            format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict))
+            format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict), load_cookies=True)
+
+        # Safeguard against old/insecure infojson when using --load-info-json
+        if info_dict.get('http_headers'):
+            info_dict['http_headers'] = HTTPHeaderDict(info_dict['http_headers'])
+            info_dict['http_headers'].pop('Cookie', None)
 
         # This is copied to http_headers by the above _calc_headers and can now be removed
         if '__x_forwarded_for_ip' in info_dict:
@@ -3508,8 +3528,6 @@ def download_with_info_file(self, info_filename):
             infos = [self.sanitize_info(info, self.params.get('clean_infojson', True))
                      for info in variadic(json.loads('\n'.join(f)))]
         for info in infos:
-            self._load_cookies(info.get('cookies'), from_headers=False)
-            self._load_cookies(traverse_obj(info.get('http_headers'), 'Cookie', casesense=False))  # compat
             try:
                 self.__download_wrapper(self.process_ie_result)(info, download=True)
             except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py
index 2c404ee902..b71d7ee8f2 100644
--- a/yt_dlp/downloader/common.py
+++ b/yt_dlp/downloader/common.py
@@ -32,7 +32,6 @@
     timetuple_from_msec,
     try_call,
 )
-from ..utils.traversal import traverse_obj
 
 
 class FileDownloader:
@@ -453,11 +452,6 @@ def download(self, filename, info_dict, subtitle=False):
             self.to_screen(f'[download] Sleeping {sleep_interval:.2f} seconds ...')
             time.sleep(sleep_interval)
 
-        # Filter the `Cookie` header from the info_dict to prevent leaks.
-        # See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
-        info_dict['http_headers'] = dict(traverse_obj(info_dict, (
-            'http_headers', {dict.items}, lambda _, pair: pair[0].lower() != 'cookie'))) or None
-
         ret = self.real_download(filename, info_dict)
         self._finish_multiline_status()
         return ret, True
diff --git a/yt_dlp/extractor/mailru.py b/yt_dlp/extractor/mailru.py
index 387d211fe1..0f0550c921 100644
--- a/yt_dlp/extractor/mailru.py
+++ b/yt_dlp/extractor/mailru.py
@@ -1,6 +1,7 @@
 import itertools
 import json
 import re
+import urllib.parse
 
 from .common import InfoExtractor
 from ..compat import compat_urllib_parse_unquote
@@ -140,17 +141,15 @@ def _real_extract(self, url):
                 'http://api.video.mail.ru/videos/%s.json?new=1' % video_id,
                 video_id, 'Downloading video JSON')
 
-        headers = {}
-
         video_key = self._get_cookies('https://my.mail.ru').get('video_key')
-        if video_key:
-            headers['Cookie'] = 'video_key=%s' % video_key.value
 
         formats = []
         for f in video_data['videos']:
             video_url = f.get('url')
             if not video_url:
                 continue
+            if video_key:
+                self._set_cookie(urllib.parse.urlparse(video_url).hostname, 'video_key', video_key.value)
             format_id = f.get('key')
             height = int_or_none(self._search_regex(
                 r'^(\d+)[pP]$', format_id, 'height', default=None)) if format_id else None
@@ -158,7 +157,6 @@ def _real_extract(self, url):
                 'url': video_url,
                 'format_id': format_id,
                 'height': height,
-                'http_headers': headers,
             })
 
         meta_data = video_data['meta']

From 42ded0a429c20ec13dc006825e1508d9a02f0ad4 Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@bashonly.com>
Date: Sat, 15 Jul 2023 15:18:25 -0500
Subject: [PATCH 050/218] [fd/external] Fixes to cookie handling

- Fix bug in `axel` Cookie header arg
- Pass cookies to `curl` as strings
- Write session cookies for `aria2c` and `wget`

Closes #7539
Authored by: bashonly
---
 test/test_downloader_external.py | 9 +++++----
 yt_dlp/downloader/external.py    | 9 +++++----
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/test/test_downloader_external.py b/test/test_downloader_external.py
index e5b02ba5a4..d3d74df043 100644
--- a/test/test_downloader_external.py
+++ b/test/test_downloader_external.py
@@ -68,7 +68,7 @@ def test_make_cmd(self):
             ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
             self.assertEqual(
                 downloader._make_cmd('test', TEST_INFO),
-                ['axel', '-o', 'test', 'Cookie: test=ytdlp', '--max-redirect=0', '--', 'http://www.example.com/'])
+                ['axel', '-o', 'test', '-H', 'Cookie: test=ytdlp', '--max-redirect=0', '--', 'http://www.example.com/'])
 
 
 class TestWgetFD(unittest.TestCase):
@@ -85,10 +85,11 @@ class TestCurlFD(unittest.TestCase):
     def test_make_cmd(self):
         with FakeYDL() as ydl:
             downloader = CurlFD(ydl, {})
-            self.assertNotIn('--cookie-jar', downloader._make_cmd('test', TEST_INFO))
-            # Test cookiejar tempfile arg is added
+            self.assertNotIn('--cookie', downloader._make_cmd('test', TEST_INFO))
+            # Test cookie header is added
             ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
-            self.assertIn('--cookie-jar', downloader._make_cmd('test', TEST_INFO))
+            self.assertIn('--cookie', downloader._make_cmd('test', TEST_INFO))
+            self.assertIn('test=ytdlp', downloader._make_cmd('test', TEST_INFO))
 
 
 class TestAria2cFD(unittest.TestCase):
diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py
index e307502db1..4f52f6e8df 100644
--- a/yt_dlp/downloader/external.py
+++ b/yt_dlp/downloader/external.py
@@ -137,7 +137,7 @@ def _write_cookies(self):
             self._cookies_tempfile = tmp_cookies.name
             self.to_screen(f'[download] Writing temporary cookies file to "{self._cookies_tempfile}"')
         # real_download resets _cookies_tempfile; if it's None then save() will write to cookiejar.filename
-        self.ydl.cookiejar.save(self._cookies_tempfile)
+        self.ydl.cookiejar.save(self._cookies_tempfile, ignore_discard=True, ignore_expires=True)
         return self.ydl.cookiejar.filename or self._cookies_tempfile
 
     def _call_downloader(self, tmpfilename, info_dict):
@@ -199,8 +199,9 @@ class CurlFD(ExternalFD):
 
     def _make_cmd(self, tmpfilename, info_dict):
         cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed']
-        if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
-            cmd += ['--cookie-jar', self._write_cookies()]
+        cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
+        if cookie_header:
+            cmd += ['--cookie', cookie_header]
         if info_dict.get('http_headers') is not None:
             for key, val in info_dict['http_headers'].items():
                 cmd += ['--header', f'{key}: {val}']
@@ -233,7 +234,7 @@ def _make_cmd(self, tmpfilename, info_dict):
                 cmd += ['-H', f'{key}: {val}']
         cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
         if cookie_header:
-            cmd += [f'Cookie: {cookie_header}', '--max-redirect=0']
+            cmd += ['-H', f'Cookie: {cookie_header}', '--max-redirect=0']
         cmd += self._configuration_args()
         cmd += ['--', info_dict['url']]
         return cmd

From 1d3d579c2142f69831b6ae140e1d8e824e07fa0e Mon Sep 17 00:00:00 2001
From: zhong-yiyu <53254770+zhong-yiyu@users.noreply.github.com>
Date: Sat, 15 Jul 2023 21:54:19 +0100
Subject: [PATCH 051/218] [ie/pornhub] Update access cookies for UK (#7591)

Closes #7590
Authored by: zhong-yiyu
---
 yt_dlp/extractor/pornhub.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/yt_dlp/extractor/pornhub.py b/yt_dlp/extractor/pornhub.py
index f08414030b..999d038d47 100644
--- a/yt_dlp/extractor/pornhub.py
+++ b/yt_dlp/extractor/pornhub.py
@@ -62,6 +62,7 @@ def _real_initialize(self):
     def _set_age_cookies(self, host):
         self._set_cookie(host, 'age_verified', '1')
         self._set_cookie(host, 'accessAgeDisclaimerPH', '1')
+        self._set_cookie(host, 'accessAgeDisclaimerUK', '1')
         self._set_cookie(host, 'accessPH', '1')
 
     def _login(self, host):

From bb5d84c9d2f1e978c3eddfb5ccbe138036682a36 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?V=C4=83n=20Anh?=
 <65241526+demon071@users.noreply.github.com>
Date: Sun, 16 Jul 2023 04:03:23 +0700
Subject: [PATCH 052/218] [ie/facebook:reel] Fix extraction (#7564)

Closes #7469
Authored by: demon071, bashonly
---
 yt_dlp/extractor/facebook.py | 83 +++++++++++++++++++++++-------------
 1 file changed, 54 insertions(+), 29 deletions(-)

diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py
index 9f4d3fb789..574f8e8c95 100644
--- a/yt_dlp/extractor/facebook.py
+++ b/yt_dlp/extractor/facebook.py
@@ -24,6 +24,7 @@
     parse_count,
     parse_qs,
     qualities,
+    str_or_none,
     traverse_obj,
     try_get,
     url_or_none,
@@ -90,16 +91,16 @@ class FacebookIE(InfoExtractor):
         'info_dict': {
             'id': '274175099429670',
             'ext': 'mp4',
-            'title': 'Asif Nawab Butt',
-            'description': 'Asif Nawab Butt',
+            'title': 'Asif',
+            'description': '',
             'uploader': 'Asif Nawab Butt',
             'upload_date': '20140506',
             'timestamp': 1399398998,
             'thumbnail': r're:^https?://.*',
+            'uploader_id': 'pfbid04scW44U4P9iTyLZAGy8y8W3pR3i2VugvHCimiRudUAVbN3MPp9eXBaYFcgVworZwl',
+            'duration': 131.03,
+            'concurrent_view_count': int,
         },
-        'expected_warnings': [
-            'title'
-        ]
     }, {
         'note': 'Video with DASH manifest',
         'url': 'https://www.facebook.com/video.php?v=957955867617029',
@@ -151,7 +152,7 @@ class FacebookIE(InfoExtractor):
         # have 1080P, but only up to 720p in swf params
         # data.video.story.attachments[].media
         'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
-        'md5': '3f3798adb2b73423263e59376f1f5eb7',
+        'md5': 'ca63897a90c9452efee5f8c40d080e25',
         'info_dict': {
             'id': '10155529876156509',
             'ext': 'mp4',
@@ -162,6 +163,9 @@ class FacebookIE(InfoExtractor):
             'uploader': 'CNN',
             'thumbnail': r're:^https?://.*',
             'view_count': int,
+            'uploader_id': '100059479812265',
+            'concurrent_view_count': int,
+            'duration': 44.478,
         },
     }, {
         # bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
@@ -170,12 +174,16 @@ class FacebookIE(InfoExtractor):
         'info_dict': {
             'id': '1417995061575415',
             'ext': 'mp4',
-            'title': 'Ukrainian Scientists Worldwide | Довгоочікуване відео',
+            'title': 'Довгоочікуване відео | By Yaroslav - Facebook',
             'description': 'Довгоочікуване відео',
-            'timestamp': 1486648771,
+            'timestamp': 1486648217,
             'upload_date': '20170209',
             'uploader': 'Yaroslav Korpan',
-            'uploader_id': '100000948048708',
+            'uploader_id': 'pfbid029y8j22EwH3ikeqgH3SEP9G3CAi9kmWKgXJJG9s5geV7mo3J2bvURqHCdgucRgAyhl',
+            'concurrent_view_count': int,
+            'thumbnail': r're:^https?://.*',
+            'view_count': int,
+            'duration': 11736.446,
         },
         'params': {
             'skip_download': True,
@@ -192,9 +200,7 @@ class FacebookIE(InfoExtractor):
             'uploader': 'La Guía Del Varón',
             'thumbnail': r're:^https?://.*',
         },
-        'params': {
-            'skip_download': True,
-        },
+        'skip': 'Requires logging in',
     }, {
         # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
         'url': 'https://www.facebook.com/groups/1024490957622648/permalink/1396382447100162/',
@@ -208,9 +214,7 @@ class FacebookIE(InfoExtractor):
             'uploader': 'Elisabeth Ahtn',
             'uploader_id': '100013949973717',
         },
-        'params': {
-            'skip_download': True,
-        },
+        'skip': 'Requires logging in',
     }, {
         'url': 'https://www.facebook.com/video.php?v=10204634152394104',
         'only_matching': True,
@@ -252,7 +256,11 @@ class FacebookIE(InfoExtractor):
             'timestamp': 1527084179,
             'upload_date': '20180523',
             'uploader': 'ESL One Dota 2',
-            'uploader_id': '234218833769558',
+            'uploader_id': '100066514874195',
+            'duration': 4524.212,
+            'view_count': int,
+            'thumbnail': r're:^https?://.*',
+            'concurrent_view_count': int,
         },
         'params': {
             'skip_download': True,
@@ -262,8 +270,17 @@ class FacebookIE(InfoExtractor):
         'url': 'https://www.facebook.com/100033620354545/videos/106560053808006/',
         'info_dict': {
             'id': '106560053808006',
+            'ext': 'mp4',
+            'title': 'Josef',
+            'thumbnail': r're:^https?://.*',
+            'concurrent_view_count': int,
+            'uploader_id': 'pfbid02gXHbDwxumkaKJQaTGUf3znYfYzTuidGEWawiramNx4YamSj2afwYSRkpcjtHtMRJl',
+            'timestamp': 1549275572,
+            'duration': 3.413,
+            'uploader': 'Josef Novak',
+            'description': '',
+            'upload_date': '20190204',
         },
-        'playlist_count': 2,
     }, {
         # data.video.story.attachments[].media
         'url': 'https://www.facebook.com/watch/?v=647537299265662',
@@ -276,6 +293,7 @@ class FacebookIE(InfoExtractor):
             'id': '10157667649866271',
         },
         'playlist_count': 3,
+        'skip': 'Requires logging in',
     }, {
         # data.nodes[].comet_sections.content.story.attachments[].style_type_renderer.attachment.media
         'url': 'https://m.facebook.com/Alliance.Police.Department/posts/4048563708499330',
@@ -497,6 +515,13 @@ def extract_relay_prefetched_data(_filter):
                 entries = []
 
                 def parse_graphql_video(video):
+                    v_id = video.get('videoId') or video.get('id') or video_id
+                    reel_info = traverse_obj(
+                        video, ('creation_story', 'short_form_video_context', 'playback_video', {dict}))
+                    if reel_info:
+                        video = video['creation_story']
+                        video['owner'] = traverse_obj(video, ('short_form_video_context', 'video_owner'))
+                        video.update(reel_info)
                     formats = []
                     q = qualities(['sd', 'hd'])
                     for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'),
@@ -513,15 +538,15 @@ def parse_graphql_video(video):
                                 'url': playable_url,
                             })
                     extract_dash_manifest(video, formats)
-                    v_id = video.get('videoId') or video.get('id') or video_id
                     info = {
                         'id': v_id,
                         'formats': formats,
                         'thumbnail': traverse_obj(
                             video, ('thumbnailImage', 'uri'), ('preferred_thumbnail', 'image', 'uri')),
-                        'uploader_id': try_get(video, lambda x: x['owner']['id']),
-                        'timestamp': int_or_none(video.get('publish_time')),
-                        'duration': float_or_none(video.get('playable_duration_in_ms'), 1000),
+                        'uploader_id': traverse_obj(video, ('owner', 'id', {str_or_none})),
+                        'timestamp': traverse_obj(video, 'publish_time', 'creation_time', expected_type=int_or_none),
+                        'duration': (float_or_none(video.get('playable_duration_in_ms'), 1000)
+                                     or float_or_none(video.get('length_in_second'))),
                     }
                     process_formats(info)
                     description = try_get(video, lambda x: x['savable_description']['text'])
@@ -782,18 +807,18 @@ class FacebookReelIE(InfoExtractor):
 
     _TESTS = [{
         'url': 'https://www.facebook.com/reel/1195289147628387',
-        'md5': 'c4ff9a7182ff9ff7d6f7a83603bae831',
+        'md5': 'f13dd37f2633595982db5ed8765474d3',
         'info_dict': {
             'id': '1195289147628387',
             'ext': 'mp4',
-            'title': 'md5:9f5b142921b2dc57004fa13f76005f87',
-            'description': 'md5:24ea7ef062215d295bdde64e778f5474',
-            'uploader': 'Beast Camp Training',
-            'uploader_id': '1738535909799870',
-            'duration': 9.536,
-            'thumbnail': r're:^https?://.*',
+            'title': 'md5:b05800b5b1ad56c0ca78bd3807b6a61e',
+            'description': 'md5:22f03309b216ac84720183961441d8db',
+            'uploader': 'md5:723e6cb3091241160f20b3c5dc282af1',
+            'uploader_id': '100040874179269',
+            'duration': 9.579,
+            'timestamp': 1637502609,
             'upload_date': '20211121',
-            'timestamp': 1637502604,
+            'thumbnail': r're:^https?://.*',
         }
     }]
 

From 613dbce177d34ffc31053e8e01acf4bb107bcd1e Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Sat, 15 Jul 2023 16:10:12 -0500
Subject: [PATCH 053/218] [ie/twitter:spaces] Fix format protocol (#7550)

Closes #7536
Authored by: bashonly
---
 yt_dlp/extractor/twitter.py | 38 ++++++++++++++++++++++++++++++++++---
 1 file changed, 35 insertions(+), 3 deletions(-)

diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py
index 4015277a86..9d87dbc4be 100644
--- a/yt_dlp/extractor/twitter.py
+++ b/yt_dlp/extractor/twitter.py
@@ -1499,6 +1499,38 @@ class TwitterSpacesIE(TwitterBaseIE):
             'release_date': '20220807',
         },
         'params': {'skip_download': 'm3u8'},
+    }, {
+        # post_live/TimedOut but downloadable
+        'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
+        'info_dict': {
+            'id': '1vAxRAVQWONJl',
+            'ext': 'm4a',
+            'title': 'Framing Up FinOps: Billing Tools',
+            'description': 'Twitter Space participated by rupa, Alfonso Hernandez',
+            'uploader': 'Google Cloud',
+            'uploader_id': 'googlecloud',
+            'live_status': 'post_live',
+            'timestamp': 1681409554,
+            'upload_date': '20230413',
+            'release_timestamp': 1681839000,
+            'release_date': '20230418',
+        },
+        'params': {'skip_download': 'm3u8'},
+    }, {
+        # Needs ffmpeg as downloader, see: https://github.com/yt-dlp/yt-dlp/issues/7536
+        'url': 'https://twitter.com/i/spaces/1eaKbrQbjoRKX',
+        'info_dict': {
+            'id': '1eaKbrQbjoRKX',
+            'ext': 'm4a',
+            'title': 'あ',
+            'description': 'Twitter Space participated by nobody yet',
+            'uploader': '息根とめる🔪Twitchで復活',
+            'uploader_id': 'tomeru_ikinone',
+            'live_status': 'was_live',
+            'timestamp': 1685617198,
+            'upload_date': '20230601',
+        },
+        'params': {'skip_download': 'm3u8'},
     }]
 
     SPACE_STATUS = {
@@ -1555,9 +1587,9 @@ def _real_extract(self, url):
             source = traverse_obj(
                 self._call_api(f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key']),
                 ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False)
-            formats = self._extract_m3u8_formats(
-                source, metadata['media_key'], 'm4a', live=is_live, fatal=False,
-                headers={'Referer': 'https://twitter.com/'}) if source else []
+            formats = self._extract_m3u8_formats(  # XXX: Some Spaces need ffmpeg as downloader
+                source, metadata['media_key'], 'm4a', entry_protocol='m3u8', live=is_live,
+                headers={'Referer': 'https://twitter.com/'}, fatal=False) if source else []
             for fmt in formats:
                 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
                 if not is_live:

From 71baa490ebd3655746430f208a9b605d120cd315 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Thu, 20 Jul 2023 08:23:30 -0500
Subject: [PATCH 054/218] [networking] Fix POST requests with zero-length
 payloads (#7648)

Bugfix for 227bf1a33be7b89cd7d44ad046844c4ccba104f4

Authored by: bashonly
---
 test/test_networking.py      | 11 +++++++++++
 yt_dlp/extractor/ettutv.py   |  2 +-
 yt_dlp/networking/_urllib.py |  2 +-
 yt_dlp/networking/common.py  |  2 +-
 4 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/test/test_networking.py b/test/test_networking.py
index b60ed283be..3cf587a637 100644
--- a/test/test_networking.py
+++ b/test/test_networking.py
@@ -1280,6 +1280,17 @@ def test_content_type_header(self):
         req.data = b'test3'
         assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded'
 
+    def test_update_req(self):
+        req = Request('http://example.com')
+        assert req.data is None
+        assert req.method == 'GET'
+        assert 'Content-Type' not in req.headers
+        # Test that zero-byte payloads will be sent
+        req.update(data=b'')
+        assert req.data == b''
+        assert req.method == 'POST'
+        assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded'
+
     def test_proxies(self):
         req = Request(url='http://example.com', proxies={'http': 'http://127.0.0.1:8080'})
         assert req.proxies == {'http': 'http://127.0.0.1:8080'}
diff --git a/yt_dlp/extractor/ettutv.py b/yt_dlp/extractor/ettutv.py
index 46d7255438..133b525556 100644
--- a/yt_dlp/extractor/ettutv.py
+++ b/yt_dlp/extractor/ettutv.py
@@ -41,7 +41,7 @@ def _real_extract(self, url):
                 'device': 'desktop',
             })
 
-        stream_response = self._download_json(player_settings['streamAccess'], video_id, data={})
+        stream_response = self._download_json(player_settings['streamAccess'], video_id, data=b'')
 
         formats, subtitles = self._extract_m3u8_formats_and_subtitles(
             stream_response['data']['stream'], video_id, 'mp4')
diff --git a/yt_dlp/networking/_urllib.py b/yt_dlp/networking/_urllib.py
index 2c5f09872a..8a76676d94 100644
--- a/yt_dlp/networking/_urllib.py
+++ b/yt_dlp/networking/_urllib.py
@@ -315,7 +315,7 @@ def get_method(self):
 def update_Request(req, url=None, data=None, headers=None, query=None):
     req_headers = req.headers.copy()
     req_headers.update(headers or {})
-    req_data = data or req.data
+    req_data = data if data is not None else req.data
     req_url = update_url_query(url or req.get_full_url(), query)
     req_get_method = req.get_method()
     if req_get_method == 'HEAD':
diff --git a/yt_dlp/networking/common.py b/yt_dlp/networking/common.py
index 458eca39f8..61196406dc 100644
--- a/yt_dlp/networking/common.py
+++ b/yt_dlp/networking/common.py
@@ -425,7 +425,7 @@ def headers(self, new_headers: Mapping):
             raise TypeError('headers must be a mapping')
 
     def update(self, url=None, data=None, headers=None, query=None):
-        self.data = data or self.data
+        self.data = data if data is not None else self.data
         self.headers.update(headers or {})
         self.url = update_url_query(url or self.url, query or {})
 

From 75dc8e673b481a82d0688aeec30f6c65d82bb359 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Thu, 20 Jul 2023 08:31:17 -0500
Subject: [PATCH 055/218] [networking] Fix `--legacy-server-connect` (#7645)

Bugfix for 227bf1a33be7b89cd7d44ad046844c4ccba104f4

Authored by: bashonly
---
 test/test_networking.py | 2 +-
 yt_dlp/YoutubeDL.py     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/test_networking.py b/test/test_networking.py
index 3cf587a637..d4eba2a5df 100644
--- a/test/test_networking.py
+++ b/test/test_networking.py
@@ -1152,7 +1152,7 @@ def test_build_handler_params(self):
             'debug_printtraffic': True,
             'compat_opts': ['no-certifi'],
             'nocheckcertificate': True,
-            'legacy_server_connect': True,
+            'legacyserverconnect': True,
         }) as ydl:
             rh = self.build_handler(ydl)
             assert rh.headers.get('test') == 'testtest'
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index 1a2f42fe9a..324f9e99c4 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -4097,7 +4097,7 @@ def build_request_director(self, handlers):
                     'verbose': 'debug_printtraffic',
                     'source_address': 'source_address',
                     'timeout': 'socket_timeout',
-                    'legacy_ssl_support': 'legacy_server_connect',
+                    'legacy_ssl_support': 'legacyserverconnect',
                     'enable_file_urls': 'enable_file_urls',
                     'client_cert': {
                         'client_certificate': 'client_certificate',

From af86873218c24c3859ccf575a87f2b00a73b49d0 Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@bashonly.com>
Date: Thu, 20 Jul 2023 08:40:31 -0500
Subject: [PATCH 056/218] [utils] Improve `parse_duration`

Authored by: bashonly
---
 test/test_utils.py     | 2 ++
 yt_dlp/utils/_utils.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/test/test_utils.py b/test/test_utils.py
index 768edfd0cf..b36bc04c2f 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -655,6 +655,8 @@ def test_parse_duration(self):
         self.assertEqual(parse_duration('P0Y0M0DT0H4M20.880S'), 260.88)
         self.assertEqual(parse_duration('01:02:03:050'), 3723.05)
         self.assertEqual(parse_duration('103:050'), 103.05)
+        self.assertEqual(parse_duration('1HR 3MIN'), 3780)
+        self.assertEqual(parse_duration('2hrs 3mins'), 7380)
 
     def test_fix_xml_ampersands(self):
         self.assertEqual(
diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py
index 2e619f9ea4..abae0f17e4 100644
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@@ -2021,7 +2021,7 @@ def parse_duration(s):
                 )?
                 T)?
                 (?:
-                    (?P<hours>[0-9]+)\s*h(?:ours?)?,?\s*
+                    (?P<hours>[0-9]+)\s*h(?:(?:ou)?rs?)?,?\s*
                 )?
                 (?:
                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?,?\s*

From f4ea501551526ebcb54d19b84cf0ebe798583a85 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Thu, 20 Jul 2023 09:02:50 -0500
Subject: [PATCH 057/218] [ie/MagellanTV] Add extractor (#7616)

Closes #7529
Authored by: bashonly
---
 yt_dlp/extractor/_extractors.py |  1 +
 yt_dlp/extractor/magellantv.py  | 50 +++++++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+)
 create mode 100644 yt_dlp/extractor/magellantv.py

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 2af99b3dad..bcd8dbe006 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1012,6 +1012,7 @@
     LyndaCourseIE
 )
 from .m6 import M6IE
+from .magellantv import MagellanTVIE
 from .magentamusik360 import MagentaMusik360IE
 from .mailru import (
     MailRuIE,
diff --git a/yt_dlp/extractor/magellantv.py b/yt_dlp/extractor/magellantv.py
new file mode 100644
index 0000000000..0947a450a6
--- /dev/null
+++ b/yt_dlp/extractor/magellantv.py
@@ -0,0 +1,50 @@
+from .common import InfoExtractor
+from ..utils import parse_age_limit, parse_duration, traverse_obj
+
+
+class MagellanTVIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?magellantv\.com/(?:watch|video)/(?P<id>[\w-]+)'
+    _TESTS = [{
+        'url': 'https://www.magellantv.com/watch/my-dads-on-death-row?type=v',
+        'info_dict': {
+            'id': 'my-dads-on-death-row',
+            'ext': 'mp4',
+            'title': 'My Dad\'s On Death Row',
+            'description': 'md5:33ba23b9f0651fc4537ed19b1d5b0d7a',
+            'duration': 3780.0,
+            'age_limit': 14,
+            'tags': ['Justice', 'Reality', 'United States', 'True Crime'],
+        },
+        'params': {'skip_download': 'm3u8'},
+    }, {
+        'url': 'https://www.magellantv.com/video/james-bulger-the-new-revelations',
+        'info_dict': {
+            'id': 'james-bulger-the-new-revelations',
+            'ext': 'mp4',
+            'title': 'James Bulger: The New Revelations',
+            'description': 'md5:7b97922038bad1d0fe8d0470d8a189f2',
+            'duration': 2640.0,
+            'age_limit': 0,
+            'tags': ['Investigation', 'True Crime', 'Justice', 'Europe'],
+        },
+        'params': {'skip_download': 'm3u8'},
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['reactContext']['video']['detail']
+        formats, subtitles = self._extract_m3u8_formats_and_subtitles(data['jwpVideoUrl'], video_id)
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            'subtitles': subtitles,
+            **traverse_obj(data, {
+                'title': ('title', {str}),
+                'description': ('metadata', 'description', {str}),
+                'duration': ('duration', {parse_duration}),
+                'age_limit': ('ratingCategory', {parse_age_limit}),
+                'tags': ('tags', ..., {str}),
+            }),
+        }

From 65cfa2b057d7946fbe322155a778fe206556d0c6 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Thu, 20 Jul 2023 09:15:21 -0500
Subject: [PATCH 058/218] [ie/MuseAI] Add extractor (#7614)

Closes #7543
Authored by: bashonly
---
 yt_dlp/extractor/_extractors.py |   1 +
 yt_dlp/extractor/museai.py      | 112 ++++++++++++++++++++++++++++++++
 2 files changed, 113 insertions(+)
 create mode 100644 yt_dlp/extractor/museai.py

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index bcd8dbe006..ae73a9f960 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1142,6 +1142,7 @@
 )
 from .muenchentv import MuenchenTVIE
 from .murrtube import MurrtubeIE, MurrtubeUserIE
+from .museai import MuseAIIE
 from .musescore import MuseScoreIE
 from .musicdex import (
     MusicdexSongIE,
diff --git a/yt_dlp/extractor/museai.py b/yt_dlp/extractor/museai.py
new file mode 100644
index 0000000000..7f66928c72
--- /dev/null
+++ b/yt_dlp/extractor/museai.py
@@ -0,0 +1,112 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    determine_ext,
+    float_or_none,
+    int_or_none,
+    js_to_json,
+    traverse_obj,
+    url_or_none,
+)
+
+
+class MuseAIIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?muse\.ai/(?:v|embed)/(?P<id>\w+)'
+    _TESTS = [{
+        'url': 'https://muse.ai/embed/YdTWvUW',
+        'md5': 'f994f9a38be1c3aaf9e37cbd7d76fe7c',
+        'info_dict': {
+            'id': 'YdTWvUW',
+            'ext': 'mp4',
+            'title': '2023-05-28-Grabien-1941111 (1)',
+            'description': '',
+            'uploader': 'Today News Africa',
+            'uploader_id': 'TodayNewsAfrica',
+            'upload_date': '20230528',
+            'timestamp': 1685285044,
+            'duration': 1291.3,
+            'view_count': int,
+            'availability': 'public',
+        },
+    }, {
+        'url': 'https://muse.ai/v/gQ4gGAA-0756',
+        'md5': '52dbfc78e865e56dc19a1715badc35e8',
+        'info_dict': {
+            'id': 'gQ4gGAA',
+            'ext': 'mp4',
+            'title': '0756',
+            'description': 'md5:0ca1483f9aac423e9a96ad00bb3a0785',
+            'uploader': 'Aerial.ie',
+            'uploader_id': 'aerial',
+            'upload_date': '20210306',
+            'timestamp': 1615072842,
+            'duration': 21.4,
+            'view_count': int,
+            'availability': 'public',
+        },
+    }]
+    _WEBPAGE_TESTS = [{
+        'url': 'https://muse.ai/docs',
+        'playlist_mincount': 4,
+        'info_dict': {
+            'id': 'docs',
+            'title': 'muse.ai | docs',
+            'description': 'md5:6c0293431481582739c82ee8902687fa',
+            'age_limit': 0,
+            'thumbnail': 'https://muse.ai/static/imgs/poster-img-docs.png',
+        },
+        'params': {'allowed_extractors': ['all', '-html5']},
+    }]
+    _EMBED_REGEX = [r'<iframe[^>]*\bsrc=["\'](?P<url>https://muse\.ai/embed/\w+)']
+
+    @classmethod
+    def _extract_embed_urls(cls, url, webpage):
+        yield from super()._extract_embed_urls(url, webpage)
+        for embed_id in re.findall(r'<script>[^<]*\bMusePlayer\(\{[^}<]*\bvideo:\s*["\'](\w+)["\']', webpage):
+            yield f'https://muse.ai/embed/{embed_id}'
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(f'https://muse.ai/embed/{video_id}', video_id)
+        data = self._search_json(
+            r'player\.setData\(', webpage, 'player data', video_id, transform_source=js_to_json)
+
+        source_url = data['url']
+        if not url_or_none(source_url):
+            raise ExtractorError('Unable to extract video URL')
+
+        formats = [{
+            'url': source_url,
+            'format_id': 'source',
+            'quality': 1,
+            **traverse_obj(data, {
+                'ext': ('filename', {determine_ext}),
+                'width': ('width', {int_or_none}),
+                'height': ('height', {int_or_none}),
+                'filesize': ('size', {int_or_none}),
+            }),
+        }]
+        if source_url.endswith('/data'):
+            base_url = f'{source_url[:-5]}/videos'
+            formats.extend(self._extract_m3u8_formats(
+                f'{base_url}/hls.m3u8', video_id, m3u8_id='hls', fatal=False))
+            formats.extend(self._extract_mpd_formats(
+                f'{base_url}/dash.mpd', video_id, mpd_id='dash', fatal=False))
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            **traverse_obj(data, {
+                'title': ('title', {str}),
+                'description': ('description', {str}),
+                'duration': ('duration', {float_or_none}),
+                'timestamp': ('tcreated', {int_or_none}),
+                'uploader': ('owner_name', {str}),
+                'uploader_id': ('owner_username', {str}),
+                'view_count': ('views', {int_or_none}),
+                'age_limit': ('mature', {lambda x: 18 if x else None}),
+                'availability': ('visibility', {lambda x: x if x in ('private', 'unlisted') else 'public'}),
+            }),
+        }

From 9b16762f48914de9ac914601769c76668e433325 Mon Sep 17 00:00:00 2001
From: Simon Sawicki <contact@grub4k.xyz>
Date: Thu, 20 Jul 2023 22:09:52 +0200
Subject: [PATCH 059/218] [ie/crunchyroll] Remove initial state extraction
 (#7632)

Authored by: Grub4K
---
 yt_dlp/extractor/crunchyroll.py | 73 +++++++++++++++++++--------------
 1 file changed, 43 insertions(+), 30 deletions(-)

diff --git a/yt_dlp/extractor/crunchyroll.py b/yt_dlp/extractor/crunchyroll.py
index adb3d5dcf6..ee34aced55 100644
--- a/yt_dlp/extractor/crunchyroll.py
+++ b/yt_dlp/extractor/crunchyroll.py
@@ -27,11 +27,24 @@ class CrunchyrollBaseIE(InfoExtractor):
     _AUTH_HEADERS = None
     _API_ENDPOINT = None
     _BASIC_AUTH = None
-    _QUERY = {}
+    _CLIENT_ID = ('cr_web', 'noaihdevm_6iyg0a8l0q')
+    _LOCALE_LOOKUP = {
+        'ar': 'ar-SA',
+        'de': 'de-DE',
+        '': 'en-US',
+        'es': 'es-419',
+        'es-es': 'es-ES',
+        'fr': 'fr-FR',
+        'it': 'it-IT',
+        'pt-br': 'pt-BR',
+        'pt-pt': 'pt-PT',
+        'ru': 'ru-RU',
+        'hi': 'hi-IN',
+    }
 
     @property
     def is_logged_in(self):
-        return self._get_cookies(self._BASE_URL).get('etp_rt')
+        return bool(self._get_cookies(self._BASE_URL).get('etp_rt'))
 
     def _perform_login(self, username, password):
         if self.is_logged_in:
@@ -62,49 +75,49 @@ def _perform_login(self, username, password):
         if not self.is_logged_in:
             raise ExtractorError('Login succeeded but did not set etp_rt cookie')
 
-    def _update_query(self, lang):
-        if lang in CrunchyrollBaseIE._QUERY:
-            return
-
-        webpage = self._download_webpage(
-            f'{self._BASE_URL}/{lang}', None, note=f'Retrieving main page (lang={lang or None})')
-
-        initial_state = self._search_json(r'__INITIAL_STATE__\s*=', webpage, 'initial state', None)
-        CrunchyrollBaseIE._QUERY[lang] = traverse_obj(initial_state, {
-            'locale': ('localization', 'locale'),
-        }) or None
-
-        if CrunchyrollBaseIE._BASIC_AUTH:
-            return
-
-        app_config = self._search_json(r'__APP_CONFIG__\s*=', webpage, 'app config', None)
-        cx_api_param = app_config['cxApiParams']['accountAuthClientId' if self.is_logged_in else 'anonClientId']
-        self.write_debug(f'Using cxApiParam={cx_api_param}')
-        CrunchyrollBaseIE._BASIC_AUTH = 'Basic ' + base64.b64encode(f'{cx_api_param}:'.encode()).decode()
-
     def _update_auth(self):
         if CrunchyrollBaseIE._AUTH_HEADERS and CrunchyrollBaseIE._AUTH_REFRESH > time_seconds():
             return
 
-        assert CrunchyrollBaseIE._BASIC_AUTH, '_update_query needs to be called at least one time beforehand'
+        if not CrunchyrollBaseIE._BASIC_AUTH:
+            cx_api_param = self._CLIENT_ID[self.is_logged_in]
+            self.write_debug(f'Using cxApiParam={cx_api_param}')
+            CrunchyrollBaseIE._BASIC_AUTH = 'Basic ' + base64.b64encode(f'{cx_api_param}:'.encode()).decode()
+
         grant_type = 'etp_rt_cookie' if self.is_logged_in else 'client_id'
-        auth_response = self._download_json(
-            f'{self._BASE_URL}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
-            headers={'Authorization': CrunchyrollBaseIE._BASIC_AUTH}, data=f'grant_type={grant_type}'.encode())
+        try:
+            auth_response = self._download_json(
+                f'{self._BASE_URL}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
+                headers={'Authorization': CrunchyrollBaseIE._BASIC_AUTH}, data=f'grant_type={grant_type}'.encode())
+        except ExtractorError as error:
+            if isinstance(error.cause, HTTPError) and error.cause.status == 403:
+                raise ExtractorError(
+                    'Request blocked by Cloudflare; navigate to Crunchyroll in your browser, '
+                    'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
+                    'and your browser\'s User-Agent (with --user-agent)', expected=True)
+            raise
 
         CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']}
         CrunchyrollBaseIE._AUTH_REFRESH = time_seconds(seconds=traverse_obj(auth_response, ('expires_in', {float_or_none}), default=300) - 10)
 
+    def _locale_from_language(self, language):
+        config_locale = self._configuration_arg('metadata', ie_key=CrunchyrollBetaIE, casesense=True)
+        return config_locale[0] if config_locale else self._LOCALE_LOOKUP.get(language)
+
     def _call_base_api(self, endpoint, internal_id, lang, note=None, query={}):
-        self._update_query(lang)
         self._update_auth()
 
         if not endpoint.startswith('/'):
             endpoint = f'/{endpoint}'
 
+        query = query.copy()
+        locale = self._locale_from_language(lang)
+        if locale:
+            query['locale'] = locale
+
         return self._download_json(
             f'{self._BASE_URL}{endpoint}', internal_id, note or f'Calling API: {endpoint}',
-            headers=CrunchyrollBaseIE._AUTH_HEADERS, query={**CrunchyrollBaseIE._QUERY[lang], **query})
+            headers=CrunchyrollBaseIE._AUTH_HEADERS, query=query)
 
     def _call_api(self, path, internal_id, lang, note='api', query={}):
         if not path.startswith(f'/content/v2/{self._API_ENDPOINT}/'):
@@ -206,7 +219,7 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
     IE_NAME = 'crunchyroll'
     _VALID_URL = r'''(?x)
         https?://(?:beta\.|www\.)?crunchyroll\.com/
-        (?P<lang>(?:\w{2}(?:-\w{2})?/)?)
+        (?:(?P<lang>\w{2}(?:-\w{2})?)/)?
         watch/(?!concert|musicvideo)(?P<id>\w+)'''
     _TESTS = [{
         # Premium only
@@ -304,7 +317,7 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
         },
         'playlist_mincount': 5,
     }, {
-        'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y',
+        'url': 'https://www.crunchyroll.com/de/watch/GY2P1Q98Y',
         'only_matching': True,
     }, {
         'url': 'https://beta.crunchyroll.com/pt-br/watch/G8WUN8VKP/the-ruler-of-conspiracy',

From e57eb98222d29cc4c09ee975d3c492274a6e5be3 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Fri, 21 Jul 2023 21:32:49 -0500
Subject: [PATCH 060/218] [fd/external] Fix ffmpeg input from stdin (#7655)

Bugfix for 1ceb657bdd254ad961489e5060f2ccc7d556b729

Authored by: bashonly
---
 test/test_downloader_external.py | 5 +++++
 yt_dlp/downloader/external.py    | 5 +++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/test/test_downloader_external.py b/test/test_downloader_external.py
index d3d74df043..62f7d45d49 100644
--- a/test/test_downloader_external.py
+++ b/test/test_downloader_external.py
@@ -129,6 +129,11 @@ def test_make_cmd(self):
                 'ffmpeg', '-y', '-hide_banner', '-cookies', 'test=ytdlp; path=/; domain=.example.com;\r\n',
                 '-i', 'http://www.example.com/', '-c', 'copy', '-f', 'mp4', 'file:test'])
 
+            # Test with non-url input (ffmpeg reads from stdin '-' for websockets)
+            downloader._call_downloader('test', {'url': 'x', 'ext': 'mp4'})
+            self.assertEqual(self._args, [
+                'ffmpeg', '-y', '-hide_banner', '-i', 'x', '-c', 'copy', '-f', 'mp4', 'file:test'])
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py
index 4f52f6e8df..d3c3eba888 100644
--- a/yt_dlp/downloader/external.py
+++ b/yt_dlp/downloader/external.py
@@ -559,12 +559,13 @@ def _call_downloader(self, tmpfilename, info_dict):
 
         selected_formats = info_dict.get('requested_formats') or [info_dict]
         for i, fmt in enumerate(selected_formats):
-            cookies = self.ydl.cookiejar.get_cookies_for_url(fmt['url'])
+            is_http = re.match(r'^https?://', fmt['url'])
+            cookies = self.ydl.cookiejar.get_cookies_for_url(fmt['url']) if is_http else []
             if cookies:
                 args.extend(['-cookies', ''.join(
                     f'{cookie.name}={cookie.value}; path={cookie.path}; domain={cookie.domain};\r\n'
                     for cookie in cookies)])
-            if fmt.get('http_headers') and re.match(r'^https?://', fmt['url']):
+            if fmt.get('http_headers') and is_http:
                 # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
                 # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
                 args.extend(['-headers', ''.join(f'{key}: {val}\r\n' for key, val in fmt['http_headers'].items())])

From 9f66247289b9f8ecf931833b3f5f127274dd2161 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Wed, 19 Jul 2023 07:11:52 +0530
Subject: [PATCH 061/218] [ie/abematv] Temporary fix for protocol handler

Closes #7622
---
 yt_dlp/extractor/abematv.py | 79 +++++--------------------------------
 1 file changed, 10 insertions(+), 69 deletions(-)

diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py
index 98ece8da7d..163b83c6da 100644
--- a/yt_dlp/extractor/abematv.py
+++ b/yt_dlp/extractor/abematv.py
@@ -27,74 +27,18 @@
     update_url_query,
 )
 
-# NOTE: network handler related code is temporary thing until network stack overhaul PRs are merged (#2861/#2862)
 
-
-def add_opener(ydl, handler):
-    ''' Add a handler for opening URLs, like _download_webpage '''
+def add_opener(ydl, handler):  # FIXME: Create proper API in .networking
+    """Add a handler for opening URLs, like _download_webpage"""
     # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
     # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
-    assert isinstance(ydl._opener, urllib.request.OpenerDirector)
-    ydl._opener.add_handler(handler)
-
-
-def remove_opener(ydl, handler):
-    '''
-    Remove handler(s) for opening URLs
-    @param handler Either handler object itself or handler type.
-    Specifying handler type will remove all handler which isinstance returns True.
-    '''
-    # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
-    # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
-    opener = ydl._opener
-    assert isinstance(ydl._opener, urllib.request.OpenerDirector)
-    if isinstance(handler, (type, tuple)):
-        find_cp = lambda x: isinstance(x, handler)
-    else:
-        find_cp = lambda x: x is handler
-
-    removed = []
-    for meth in dir(handler):
-        if meth in ["redirect_request", "do_open", "proxy_open"]:
-            # oops, coincidental match
-            continue
-
-        i = meth.find("_")
-        protocol = meth[:i]
-        condition = meth[i + 1:]
-
-        if condition.startswith("error"):
-            j = condition.find("_") + i + 1
-            kind = meth[j + 1:]
-            try:
-                kind = int(kind)
-            except ValueError:
-                pass
-            lookup = opener.handle_error.get(protocol, {})
-            opener.handle_error[protocol] = lookup
-        elif condition == "open":
-            kind = protocol
-            lookup = opener.handle_open
-        elif condition == "response":
-            kind = protocol
-            lookup = opener.process_response
-        elif condition == "request":
-            kind = protocol
-            lookup = opener.process_request
-        else:
-            continue
-
-        handlers = lookup.setdefault(kind, [])
-        if handlers:
-            handlers[:] = [x for x in handlers if not find_cp(x)]
-
-        removed.append(x for x in handlers if find_cp(x))
-
-    if removed:
-        for x in opener.handlers:
-            if find_cp(x):
-                x.add_parent(None)
-        opener.handlers[:] = [x for x in opener.handlers if not find_cp(x)]
+    rh = ydl._request_director.handlers['Urllib']
+    if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES:
+        return
+    opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=ydl.proxies)
+    assert isinstance(opener, urllib.request.OpenerDirector)
+    opener.add_handler(handler)
+    rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license')
 
 
 class AbemaLicenseHandler(urllib.request.BaseHandler):
@@ -140,7 +84,7 @@ def abematv_license_open(self, url):
         ticket = urllib.parse.urlparse(url).netloc
         response_data = self._get_videokey_from_ticket(ticket)
         return urllib.response.addinfourl(io.BytesIO(response_data), headers={
-            'Content-Length': len(response_data),
+            'Content-Length': str(len(response_data)),
         }, url=url, code=200)
 
 
@@ -212,10 +156,7 @@ def _get_device_token(self):
             })
         AbemaTVBaseIE._USERTOKEN = user_data['token']
 
-        # don't allow adding it 2 times or more, though it's guarded
-        remove_opener(self._downloader, AbemaLicenseHandler)
         add_opener(self._downloader, AbemaLicenseHandler(self))
-
         return self._USERTOKEN
 
     def _get_media_token(self, invalidate=False, to_show=True):

From a264433c9fba147ecae2420091614186cfeeb895 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Sat, 22 Jul 2023 07:52:55 +0530
Subject: [PATCH 062/218] [outtmpl] Fix replacement for `playlist_index`

---
 test/test_YoutubeDL.py | 1 +
 yt_dlp/YoutubeDL.py    | 8 ++++----
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index b4f770ca58..c54c3ea5ce 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -687,6 +687,7 @@ def test(tmpl, expected, *, info=None, **params):
         test('%(duration_string)s', ('27:46:40', '27-46-40'))
         test('%(resolution)s', '1080p')
         test('%(playlist_index|)s', '001')
+        test('%(playlist_index&{}!)s', '001!')
         test('%(playlist_autonumber)s', '02')
         test('%(autonumber)s', '00001')
         test('%(autonumber+2)03d', '005', autonumber_start=3)
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index 324f9e99c4..dae29d9f95 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -1300,16 +1300,16 @@ def create_key(outer_mobj):
                 else:
                     break
 
-            fmt = outer_mobj.group('format')
-            if fmt == 's' and value is not None and last_field in field_size_compat_map.keys():
-                fmt = f'0{field_size_compat_map[last_field]:d}d'
-
             if None not in (value, replacement):
                 try:
                     value = replacement_formatter.format(replacement, value)
                 except ValueError:
                     value, default = None, na
 
+            fmt = outer_mobj.group('format')
+            if fmt == 's' and last_field in field_size_compat_map.keys() and isinstance(value, int):
+                fmt = f'0{field_size_compat_map[last_field]:d}d'
+
             flags = outer_mobj.group('conversion') or ''
             str_fmt = f'{fmt[:-1]}s'
             if value is None:

From 994f7ef8e6003f4b7b258528755d0b6adcc31714 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Sat, 22 Jul 2023 07:54:25 +0530
Subject: [PATCH 063/218] [ie/generic] Fix generic title for embeds

Closes #7067
---
 yt_dlp/extractor/generic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index 8fa4c62217..f5c59a0930 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -2562,7 +2562,7 @@ def _real_extract(self, url):
         self._downloader.write_debug('Looking for embeds')
         embeds = list(self._extract_embeds(original_url, webpage, urlh=full_response, info_dict=info_dict))
         if len(embeds) == 1:
-            return {**info_dict, **embeds[0]}
+            return merge_dicts(embeds[0], info_dict)
         elif embeds:
             return self.playlist_result(embeds, **info_dict)
         raise UnsupportedError(url)

From 81b4712bca608b9015aa68a4d96661d56e9cb894 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Sat, 22 Jul 2023 08:28:38 +0530
Subject: [PATCH 064/218] [extractor] Fix `--load-pages`

---
 yt_dlp/extractor/common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index d449187764..64a280dc05 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -1042,7 +1042,7 @@ def download_content(self, url_or_request, video_id, note=note, errnote=errnote,
                              fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
             if self.get_param('load_pages'):
                 url_or_request = self._create_request(url_or_request, data, headers, query)
-                filename = self._request_dump_filename(url_or_request.full_url, video_id)
+                filename = self._request_dump_filename(url_or_request.url, video_id)
                 self.to_screen(f'Loading request from {filename}')
                 try:
                     with open(filename, 'rb') as dumpf:

From e0c4db04dc82a699bdabd9821ddc239ebe17d30a Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Sat, 22 Jul 2023 08:56:45 +0530
Subject: [PATCH 065/218] [compat] Add `types.NoneType`

---
 yt_dlp/compat/types.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)
 create mode 100644 yt_dlp/compat/types.py

diff --git a/yt_dlp/compat/types.py b/yt_dlp/compat/types.py
new file mode 100644
index 0000000000..ae70245642
--- /dev/null
+++ b/yt_dlp/compat/types.py
@@ -0,0 +1,12 @@
+# flake8: noqa: F405
+from types import *  # noqa: F403
+
+from .compat_utils import passthrough_module
+
+passthrough_module(__name__, 'types')
+del passthrough_module
+
+try:
+    NoneType  # >= 3.10
+except NameError:
+    NoneType = type(None)

From 62b5c94cadaa5f596dc1a7083db9db12efe357be Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Sat, 22 Jul 2023 09:08:12 +0530
Subject: [PATCH 066/218] [cleanup] Misc fixes

Closes #7528
---
 Changelog.md                       |  4 ++--
 README.md                          |  4 ++--
 devscripts/changelog_override.json |  7 ++++++-
 devscripts/make_changelog.py       |  4 ++--
 test/test_YoutubeDL.py             |  3 +--
 test/test_YoutubeDLCookieJar.py    | 12 ++++++------
 yt_dlp/YoutubeDL.py                | 13 +++++++------
 yt_dlp/compat/_legacy.py           |  4 ++--
 yt_dlp/cookies.py                  | 10 +++++-----
 yt_dlp/downloader/external.py      |  2 +-
 yt_dlp/networking/_urllib.py       |  4 ++--
 yt_dlp/networking/common.py        |  2 +-
 12 files changed, 37 insertions(+), 32 deletions(-)

diff --git a/Changelog.md b/Changelog.md
index 622ae68b9b..32cdaca2ab 100644
--- a/Changelog.md
+++ b/Changelog.md
@@ -10,7 +10,7 @@ #### Important changes
 - Security: [[CVE-2023-35934](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-35934)] Fix [Cookie leak](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj)
     - `--add-header Cookie:` is deprecated and auto-scoped to input URL domains
     - Cookies are scoped when passed to external downloaders
-    - Add `cookie` field to info.json and deprecate `http_headers.Cookie`
+    - Add `cookies` field to info.json and deprecate `http_headers.Cookie`
 
 #### Core changes
 - [Allow extractors to mark formats as potentially DRM](https://github.com/yt-dlp/yt-dlp/commit/bc344cd456380999c1ee74554dfd432a38f32ec7) ([#7396](https://github.com/yt-dlp/yt-dlp/issues/7396)) by [pukkandan](https://github.com/pukkandan)
@@ -51,7 +51,7 @@ #### Downloader changes
 - **http**: [Avoid infinite loop when no data is received](https://github.com/yt-dlp/yt-dlp/commit/662ef1e910b72e57957f06589925b2332ba52821) by [pukkandan](https://github.com/pukkandan)
 
 #### Misc. changes
-- [Add CodeQL workflow](https://github.com/yt-dlp/yt-dlp/commit/6355b5f1e1e8e7f4ef866d71d51e03baf0e82f17) ([#7497](https://github.com/yt-dlp/yt-dlp/issues/7497)) by [pukkandan](https://github.com/pukkandan)
+- [Add CodeQL workflow](https://github.com/yt-dlp/yt-dlp/commit/6355b5f1e1e8e7f4ef866d71d51e03baf0e82f17) ([#7497](https://github.com/yt-dlp/yt-dlp/issues/7497)) by [jorgectf](https://github.com/jorgectf)
 - **cleanup**: Miscellaneous: [337734d](https://github.com/yt-dlp/yt-dlp/commit/337734d4a8a6500bc65434843db346b5cbd05e81) by [pukkandan](https://github.com/pukkandan)
 - **docs**: [Minor fixes](https://github.com/yt-dlp/yt-dlp/commit/b532a3481046e1eabb6232ee8196fb696c356ff6) by [pukkandan](https://github.com/pukkandan)
 - **make_changelog**: [Skip reverted commits](https://github.com/yt-dlp/yt-dlp/commit/fa44802809d189fca0f4782263d48d6533384503) by [pukkandan](https://github.com/pukkandan)
diff --git a/README.md b/README.md
index 655cd41f52..ff88f817cf 100644
--- a/README.md
+++ b/README.md
@@ -1569,7 +1569,7 @@ ## Sorting Formats
  - `aext`: Audio Extension (`m4a` > `aac` > `mp3` > `ogg` > `opus` > `webm` > other). If `--prefer-free-formats` is used, the order changes to `ogg` > `opus` > `webm` > `mp3` > `m4a` > `aac`
  - `ext`: Equivalent to `vext,aext`
  - `filesize`: Exact filesize, if known in advance
- - `fs_approx`: Approximate filesize calculated from the manifests
+ - `fs_approx`: Approximate filesize
  - `size`: Exact filesize if available, otherwise approximate filesize
  - `height`: Height of video
  - `width`: Width of video
@@ -1580,7 +1580,7 @@ ## Sorting Formats
  - `tbr`: Total average bitrate in KBit/s
  - `vbr`: Average video bitrate in KBit/s
  - `abr`: Average audio bitrate in KBit/s
- - `br`: Equivalent to using `tbr,vbr,abr`
+ - `br`: Average bitrate in KBit/s, `tbr`/`vbr`/`abr`
  - `asr`: Audio sample rate in Hz
  
 **Deprecation warning**: Many of these fields have (currently undocumented) aliases, that may be removed in a future version. It is recommended to use only the documented field names.
diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json
index f573a74630..d03db3f232 100644
--- a/devscripts/changelog_override.json
+++ b/devscripts/changelog_override.json
@@ -63,6 +63,11 @@
     {
         "action": "add",
         "when": "1ceb657bdd254ad961489e5060f2ccc7d556b729",
-        "short": "[priority] Security: [[CVE-2023-35934](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-35934)] Fix [Cookie leak](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj)\n    - `--add-header Cookie:` is deprecated and auto-scoped to input URL domains\n    - Cookies are scoped when passed to external downloaders\n    - Add `cookie` field to info.json and deprecate `http_headers.Cookie`"
+        "short": "[priority] Security: [[CVE-2023-35934](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-35934)] Fix [Cookie leak](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj)\n    - `--add-header Cookie:` is deprecated and auto-scoped to input URL domains\n    - Cookies are scoped when passed to external downloaders\n    - Add `cookies` field to info.json and deprecate `http_headers.Cookie`"
+    },
+    {
+        "action": "change",
+        "when": "b03fa7834579a01cc5fba48c0e73488a16683d48",
+        "short": "[ie/twitter] Revert 92315c03774cfabb3a921884326beb4b981f786b"
     }
 ]
diff --git a/devscripts/make_changelog.py b/devscripts/make_changelog.py
index 157c661267..84f72d52f3 100644
--- a/devscripts/make_changelog.py
+++ b/devscripts/make_changelog.py
@@ -53,10 +53,10 @@ def commit_lookup(cls):
                     'cookies',
                     'core',
                     'dependencies',
+                    'formats',
                     'jsinterp',
                     'networking',
                     'outtmpl',
-                    'formats',
                     'plugins',
                     'update',
                     'upstream',
@@ -254,7 +254,7 @@ class CommitRange:
         (?:\ \((?P<issues>\#\d+(?:,\ \#\d+)*)\))?
         ''', re.VERBOSE | re.DOTALL)
     EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE)
-    REVERT_RE = re.compile(r'(?i:Revert)\s+([\da-f]{40})')
+    REVERT_RE = re.compile(r'(?:\[[^\]]+\]\s+)?(?i:Revert)\s+([\da-f]{40})')
     FIXES_RE = re.compile(r'(?i:Fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Revert)\s+([\da-f]{40})')
     UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)')
 
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index c54c3ea5ce..ab1250848b 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -26,7 +26,6 @@
 )
 from yt_dlp.utils.traversal import traverse_obj
 
-
 TEST_URL = 'http://localhost/sample.mp4'
 
 
@@ -687,7 +686,7 @@ def test(tmpl, expected, *, info=None, **params):
         test('%(duration_string)s', ('27:46:40', '27-46-40'))
         test('%(resolution)s', '1080p')
         test('%(playlist_index|)s', '001')
-        test('%(playlist_index&{}!)s', '001!')
+        test('%(playlist_index&{}!)s', '1!')
         test('%(playlist_autonumber)s', '02')
         test('%(autonumber)s', '00001')
         test('%(autonumber+2)03d', '005', autonumber_start=3)
diff --git a/test/test_YoutubeDLCookieJar.py b/test/test_YoutubeDLCookieJar.py
index 0b7a0acdb5..fdb9baee59 100644
--- a/test/test_YoutubeDLCookieJar.py
+++ b/test/test_YoutubeDLCookieJar.py
@@ -17,10 +17,10 @@
 class TestYoutubeDLCookieJar(unittest.TestCase):
     def test_keep_session_cookies(self):
         cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt')
-        cookiejar.load(ignore_discard=True, ignore_expires=True)
+        cookiejar.load()
         tf = tempfile.NamedTemporaryFile(delete=False)
         try:
-            cookiejar.save(filename=tf.name, ignore_discard=True, ignore_expires=True)
+            cookiejar.save(filename=tf.name)
             temp = tf.read().decode()
             self.assertTrue(re.search(
                 r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpiresEmpty\s+YoutubeDLExpiresEmptyValue', temp))
@@ -32,7 +32,7 @@ def test_keep_session_cookies(self):
 
     def test_strip_httponly_prefix(self):
         cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt')
-        cookiejar.load(ignore_discard=True, ignore_expires=True)
+        cookiejar.load()
 
         def assert_cookie_has_value(key):
             self.assertEqual(cookiejar._cookies['www.foobar.foobar']['/'][key].value, key + '_VALUE')
@@ -42,20 +42,20 @@ def assert_cookie_has_value(key):
 
     def test_malformed_cookies(self):
         cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/malformed_cookies.txt')
-        cookiejar.load(ignore_discard=True, ignore_expires=True)
+        cookiejar.load()
         # Cookies should be empty since all malformed cookie file entries
         # will be ignored
         self.assertFalse(cookiejar._cookies)
 
     def test_get_cookie_header(self):
         cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt')
-        cookiejar.load(ignore_discard=True, ignore_expires=True)
+        cookiejar.load()
         header = cookiejar.get_cookie_header('https://www.foobar.foobar')
         self.assertIn('HTTPONLY_COOKIE', header)
 
     def test_get_cookies_for_url(self):
         cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt')
-        cookiejar.load(ignore_discard=True, ignore_expires=True)
+        cookiejar.load()
         cookies = cookiejar.get_cookies_for_url('https://www.foobar.foobar/')
         self.assertEqual(len(cookies), 2)
         cookies = cookiejar.get_cookies_for_url('https://foobar.foobar/')
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index dae29d9f95..c9cf07e530 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -572,7 +572,7 @@ class YoutubeDL:
         'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
         'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
         'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
-        'preference', 'language', 'language_preference', 'quality', 'source_preference',
+        'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies',
         'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
         'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
     }
@@ -621,7 +621,8 @@ def __init__(self, params=None, auto_init=True):
 
         if self.params.get('no_color'):
             if self.params.get('color') is not None:
-                self.report_warning('Overwriting params from "color" with "no_color"')
+                self.params.setdefault('_warnings', []).append(
+                    'Overwriting params from "color" with "no_color"')
             self.params['color'] = 'no_color'
 
         term_allow_color = os.environ.get('TERM', '').lower() != 'dumb'
@@ -949,7 +950,7 @@ def __enter__(self):
 
     def save_cookies(self):
         if self.params.get('cookiefile') is not None:
-            self.cookiejar.save(ignore_discard=True, ignore_expires=True)
+            self.cookiejar.save()
 
     def __exit__(self, *args):
         self.restore_console_title()
@@ -3290,7 +3291,7 @@ def existing_video_file(*filepaths):
                 fd, success = None, True
                 if info_dict.get('protocol') or info_dict.get('url'):
                     fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
-                    if fd is not FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and (
+                    if fd != FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and (
                             info_dict.get('section_start') or info_dict.get('section_end')):
                         msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
                                else 'You have requested downloading the video partially, but ffmpeg is not installed')
@@ -3451,7 +3452,7 @@ def ffmpeg_fixup(cndn, msg, cls):
                     postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((
                         isinstance(pp, FFmpegVideoConvertorPP)
                         and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)
-                    ) for pp in self._pps['post_process'])
+                    ) for pp in self._pps['post_process']) or fd == FFmpegFD
 
                     if not postprocessed_by_ffmpeg:
                         ffmpeg_fixup(ext == 'm4a' and info_dict.get('container') == 'm4a_dash',
@@ -4031,7 +4032,7 @@ def _opener(self):
         """
         Get a urllib OpenerDirector from the Urllib handler (deprecated).
         """
-        self.deprecation_warning('YoutubeDL._opener() is deprecated, use YoutubeDL.urlopen()')
+        self.deprecation_warning('YoutubeDL._opener is deprecated, use YoutubeDL.urlopen()')
         handler = self._request_director.handlers['Urllib']
         return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies)
 
diff --git a/yt_dlp/compat/_legacy.py b/yt_dlp/compat/_legacy.py
index 912907a021..90ccf0f14a 100644
--- a/yt_dlp/compat/_legacy.py
+++ b/yt_dlp/compat/_legacy.py
@@ -16,12 +16,12 @@
 import shutil
 import socket
 import struct
+import subprocess
 import tokenize
 import urllib.error
 import urllib.parse
 import urllib.request
 import xml.etree.ElementTree as etree
-from subprocess import DEVNULL
 
 # isort: split
 import asyncio  # noqa: F401
@@ -85,7 +85,7 @@ def compat_setenv(key, value, env=os.environ):
 compat_Struct = struct.Struct
 compat_struct_pack = struct.pack
 compat_struct_unpack = struct.unpack
-compat_subprocess_get_DEVNULL = lambda: DEVNULL
+compat_subprocess_get_DEVNULL = lambda: subprocess.DEVNULL
 compat_tokenize_tokenize = tokenize.tokenize
 compat_urllib_error = urllib.error
 compat_urllib_HTTPError = urllib.error.HTTPError
diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py
index 16f1918e6a..80428c747b 100644
--- a/yt_dlp/cookies.py
+++ b/yt_dlp/cookies.py
@@ -97,7 +97,7 @@ def load_cookies(cookie_file, browser_specification, ydl):
 
         jar = YoutubeDLCookieJar(cookie_file)
         if not is_filename or os.access(cookie_file, os.R_OK):
-            jar.load(ignore_discard=True, ignore_expires=True)
+            jar.load()
         cookie_jars.append(jar)
 
     return _merge_cookie_jars(cookie_jars)
@@ -1213,7 +1213,7 @@ def open(self, file, *, write=False):
                 file.truncate(0)
             yield file
 
-    def _really_save(self, f, ignore_discard=False, ignore_expires=False):
+    def _really_save(self, f, ignore_discard, ignore_expires):
         now = time.time()
         for cookie in self:
             if (not ignore_discard and cookie.discard
@@ -1234,7 +1234,7 @@ def _really_save(self, f, ignore_discard=False, ignore_expires=False):
                 name, value
             )))
 
-    def save(self, filename=None, *args, **kwargs):
+    def save(self, filename=None, ignore_discard=True, ignore_expires=True):
         """
         Save cookies to a file.
         Code is taken from CPython 3.6
@@ -1253,9 +1253,9 @@ def save(self, filename=None, *args, **kwargs):
 
         with self.open(filename, write=True) as f:
             f.write(self._HEADER)
-            self._really_save(f, *args, **kwargs)
+            self._really_save(f, ignore_discard, ignore_expires)
 
-    def load(self, filename=None, ignore_discard=False, ignore_expires=False):
+    def load(self, filename=None, ignore_discard=True, ignore_expires=True):
         """Load cookies from a file."""
         if filename is None:
             if self.filename is not None:
diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py
index d3c3eba888..4ce8a3bf7d 100644
--- a/yt_dlp/downloader/external.py
+++ b/yt_dlp/downloader/external.py
@@ -137,7 +137,7 @@ def _write_cookies(self):
             self._cookies_tempfile = tmp_cookies.name
             self.to_screen(f'[download] Writing temporary cookies file to "{self._cookies_tempfile}"')
         # real_download resets _cookies_tempfile; if it's None then save() will write to cookiejar.filename
-        self.ydl.cookiejar.save(self._cookies_tempfile, ignore_discard=True, ignore_expires=True)
+        self.ydl.cookiejar.save(self._cookies_tempfile)
         return self.ydl.cookiejar.filename or self._cookies_tempfile
 
     def _call_downloader(self, tmpfilename, info_dict):
diff --git a/yt_dlp/networking/_urllib.py b/yt_dlp/networking/_urllib.py
index 8a76676d94..ff3a22c8c1 100644
--- a/yt_dlp/networking/_urllib.py
+++ b/yt_dlp/networking/_urllib.py
@@ -28,7 +28,7 @@
     make_socks_proxy_opts,
     select_proxy,
 )
-from .common import Features, RequestHandler, Response, register
+from .common import Features, RequestHandler, Response, register_rh
 from .exceptions import (
     CertificateVerifyError,
     HTTPError,
@@ -372,7 +372,7 @@ def handle_response_read_exceptions(e):
         raise TransportError(cause=e) from e
 
 
-@register
+@register_rh
 class UrllibRH(RequestHandler, InstanceStoreMixin):
     _SUPPORTED_URL_SCHEMES = ('http', 'https', 'data', 'ftp')
     _SUPPORTED_PROXY_SCHEMES = ('http', 'socks4', 'socks4a', 'socks5', 'socks5h')
diff --git a/yt_dlp/networking/common.py b/yt_dlp/networking/common.py
index 61196406dc..7f74579780 100644
--- a/yt_dlp/networking/common.py
+++ b/yt_dlp/networking/common.py
@@ -105,7 +105,7 @@ def send(self, request: Request) -> Response:
 _REQUEST_HANDLERS = {}
 
 
-def register(handler):
+def register_rh(handler):
     """Register a RequestHandler class"""
     assert issubclass(handler, RequestHandler), f'{handler} must be a subclass of RequestHandler'
     assert handler.RH_KEY not in _REQUEST_HANDLERS, f'RequestHandler {handler.RH_KEY} already registered'

From e7057383380d7d53815f8feaf90ca3dcbde88983 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Sat, 22 Jul 2023 09:43:51 +0530
Subject: [PATCH 067/218] [ie/unsupported] List more sites with DRM

Closes #7323, #3072, #5740, #5767, #6125
---
 yt_dlp/extractor/unsupported.py | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/yt_dlp/extractor/unsupported.py b/yt_dlp/extractor/unsupported.py
index 1bc49786f9..bbcbf3acbb 100644
--- a/yt_dlp/extractor/unsupported.py
+++ b/yt_dlp/extractor/unsupported.py
@@ -42,6 +42,12 @@ class KnownDRMIE(UnsupportedInfoExtractor):
         r'vootkids\.com',
         r'nowtv\.it/watch',
         r'tv\.apple\.com',
+        r'primevideo\.com',
+        r'hulu\.com',
+        r'resource\.inkryptvideos\.com',
+        r'joyn\.de',
+        r'amazon\.(?:\w{2}\.)?\w+/gp/video',
+        r'music\.amazon\.(?:\w{2}\.)?\w+',
     )
 
     _TESTS = [{
@@ -111,6 +117,30 @@ class KnownDRMIE(UnsupportedInfoExtractor):
         # https://github.com/yt-dlp/yt-dlp/issues/5557
         'url': 'https://tv.apple.com/it/show/loot---una-fortuna/umc.cmc.5erbujil1mpazuerhr1udnk45?ctx_brand=tvs.sbd.4000',
         'only_matching': True,
+    }, {
+        # https://github.com/yt-dlp/yt-dlp/issues/3072
+        'url': 'https://www.joyn.de/play/serien/clannad/1-1-wo-die-kirschblueten-fallen',
+        'only_matching': True,
+    }, {
+        # https://github.com/yt-dlp/yt-dlp/issues/7323
+        'url': 'https://music.amazon.co.jp/albums/B088Y368TK',
+        'only_matching': True,
+    }, {
+        # https://github.com/yt-dlp/yt-dlp/issues/7323
+        'url': 'https://www.amazon.co.jp/gp/video/detail/B09X5HBYRS/',
+        'only_matching': True,
+    }, {
+        # https://github.com/yt-dlp/yt-dlp/issues/6125
+        'url': 'https://www.primevideo.com/region/eu/detail/0H3DDB4KBJFNDCKKLHNRLRLVKQ/ref=atv_br_def_r_br_c_unkc_1_10',
+        'only_matching': True,
+    }, {
+        # https://github.com/yt-dlp/yt-dlp/issues/5740
+        'url': 'https://resource.inkryptvideos.com/v2-a83ns52/iframe/index.html#video_id=7999ea0f6e03439eb40d056258c2d736&otp=xxx',
+        'only_matching': True,
+    }, {
+        # https://github.com/yt-dlp/yt-dlp/issues/5767
+        'url': 'https://www.hulu.com/movie/anthem-6b25fac9-da2b-45a3-8e09-e4156b0471cc',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):

From 25b6e8f94679b4458550702b46e61249b875a4fd Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Sat, 22 Jul 2023 10:17:36 +0530
Subject: [PATCH 068/218] Fix e0c4db04dc82a699bdabd9821ddc239ebe17d30a for pypy

---
 yt_dlp/compat/types.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/compat/types.py b/yt_dlp/compat/types.py
index ae70245642..4aa3b0efdd 100644
--- a/yt_dlp/compat/types.py
+++ b/yt_dlp/compat/types.py
@@ -7,6 +7,7 @@
 del passthrough_module
 
 try:
-    NoneType  # >= 3.10
-except NameError:
+    # NB: pypy has builtin NoneType, so checking NameError won't work
+    from types import NoneType  # >= 3.10
+except ImportError:
     NoneType = type(None)

From a250b247334ce9f641e709cbb64974da6034a2b3 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Sat, 22 Jul 2023 17:56:53 +0530
Subject: [PATCH 069/218] [compat] Ensure submodules are imported correctly

Closes #7663
---
 test/test_compat.py              | 6 +++---
 yt_dlp/compat/__init__.py        | 7 ++-----
 yt_dlp/compat/_deprecated.py     | 8 ++++++++
 yt_dlp/compat/urllib/__init__.py | 3 +++
 yt_dlp/utils/__init__.py         | 6 ++----
 5 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/test/test_compat.py b/test/test_compat.py
index 003a97abf7..71ca7f99f1 100644
--- a/test/test_compat.py
+++ b/test/test_compat.py
@@ -9,15 +9,16 @@
 
 
 import struct
-import urllib.parse
 
 from yt_dlp import compat
+from yt_dlp.compat import urllib  # isort: split
 from yt_dlp.compat import (
     compat_etree_fromstring,
     compat_expanduser,
     compat_urllib_parse_unquote,
     compat_urllib_parse_urlencode,
 )
+from yt_dlp.compat.urllib.request import getproxies
 
 
 class TestCompat(unittest.TestCase):
@@ -28,8 +29,7 @@ def test_compat_passthrough(self):
         with self.assertWarns(DeprecationWarning):
             compat.WINDOWS_VT_MODE
 
-        # TODO: Test submodule
-        # compat.asyncio.events  # Must not raise error
+        self.assertEqual(urllib.request.getproxies, getproxies)
 
         with self.assertWarns(DeprecationWarning):
             compat.compat_pycrypto_AES  # Must not raise error
diff --git a/yt_dlp/compat/__init__.py b/yt_dlp/compat/__init__.py
index a41a80ebb6..832a9138d3 100644
--- a/yt_dlp/compat/__init__.py
+++ b/yt_dlp/compat/__init__.py
@@ -1,14 +1,11 @@
 import os
 import sys
-import warnings
 import xml.etree.ElementTree as etree
 
-from ._deprecated import *  # noqa: F401, F403
 from .compat_utils import passthrough_module
 
-# XXX: Implement this the same way as other DeprecationWarnings without circular import
-passthrough_module(__name__, '._legacy', callback=lambda attr: warnings.warn(
-    DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=5))
+passthrough_module(__name__, '._deprecated')
+del passthrough_module
 
 
 # HTMLParseError has been deprecated in Python 3.3 and removed in
diff --git a/yt_dlp/compat/_deprecated.py b/yt_dlp/compat/_deprecated.py
index 14d37b2367..607bae9999 100644
--- a/yt_dlp/compat/_deprecated.py
+++ b/yt_dlp/compat/_deprecated.py
@@ -1,4 +1,12 @@
 """Deprecated - New code should avoid these"""
+import warnings
+
+from .compat_utils import passthrough_module
+
+# XXX: Implement this the same way as other DeprecationWarnings without circular import
+passthrough_module(__name__, '.._legacy', callback=lambda attr: warnings.warn(
+    DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=6))
+del passthrough_module
 
 import base64
 import urllib.error
diff --git a/yt_dlp/compat/urllib/__init__.py b/yt_dlp/compat/urllib/__init__.py
index 6b6b8e103d..b27cc6133c 100644
--- a/yt_dlp/compat/urllib/__init__.py
+++ b/yt_dlp/compat/urllib/__init__.py
@@ -1,6 +1,9 @@
 # flake8: noqa: F405
 from urllib import *  # noqa: F403
 
+del request
+from . import request  # noqa: F401
+
 from ..compat_utils import passthrough_module
 
 passthrough_module(__name__, 'urllib')
diff --git a/yt_dlp/utils/__init__.py b/yt_dlp/utils/__init__.py
index 0b00adddb4..c267e326f0 100644
--- a/yt_dlp/utils/__init__.py
+++ b/yt_dlp/utils/__init__.py
@@ -1,6 +1,4 @@
-# flake8: noqa: F401, F403
-import warnings
-
+# flake8: noqa: F403
 from ..compat.compat_utils import passthrough_module
 
 passthrough_module(__name__, '._deprecated')
@@ -9,4 +7,4 @@
 # isort: off
 from .traversal import *
 from ._utils import *
-from ._utils import _configuration_args, _get_exe_version_output
+from ._utils import _configuration_args, _get_exe_version_output  # noqa: F401

From 11de6fec9c9b8d34d1f90c8e6218ec58a3471b58 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Sat, 22 Jul 2023 08:10:25 -0500
Subject: [PATCH 070/218] [ie/PatreonCampaign] Fix extraction (#7664)

Authored by: bashonly
---
 yt_dlp/extractor/patreon.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py
index 447087436d..9316789df2 100644
--- a/yt_dlp/extractor/patreon.py
+++ b/yt_dlp/extractor/patreon.py
@@ -2,21 +2,21 @@
 
 from .common import InfoExtractor
 from .vimeo import VimeoIE
-
 from ..compat import compat_urllib_parse_unquote
 from ..networking.exceptions import HTTPError
 from ..utils import (
+    KNOWN_EXTENSIONS,
+    ExtractorError,
     clean_html,
     determine_ext,
-    ExtractorError,
     int_or_none,
-    KNOWN_EXTENSIONS,
     mimetype2ext,
     parse_iso8601,
     str_or_none,
     traverse_obj,
     try_get,
     url_or_none,
+    urljoin,
 )
 
 
@@ -404,8 +404,8 @@ def _entries(self, campaign_id):
             posts_json = self._call_api('posts', campaign_id, query=params, note='Downloading posts page %d' % page)
 
             cursor = traverse_obj(posts_json, ('meta', 'pagination', 'cursors', 'next'))
-            for post in posts_json.get('data') or []:
-                yield self.url_result(url_or_none(traverse_obj(post, ('attributes', 'patreon_url'))), 'Patreon')
+            for post_url in traverse_obj(posts_json, ('data', ..., 'attributes', 'patreon_url')):
+                yield self.url_result(urljoin('https://www.patreon.com/', post_url), PatreonIE)
 
             if cursor is None:
                 break

From 86aea0d3a213da3be1da638b9b828e6f0ee1d59f Mon Sep 17 00:00:00 2001
From: coletdjnz <coletdjnz@protonmail.com>
Date: Sun, 23 Jul 2023 17:17:15 +1200
Subject: [PATCH 071/218] [networking] Add strict Request extension checking
 (#7604)

Authored by: coletdjnz
Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com>
---
 test/test_networking.py      | 54 ++++++++++++++++++++++++------------
 yt_dlp/networking/_urllib.py |  5 ++++
 yt_dlp/networking/common.py  | 34 +++++++++++------------
 3 files changed, 58 insertions(+), 35 deletions(-)

diff --git a/test/test_networking.py b/test/test_networking.py
index d4eba2a5df..1bd6afc88b 100644
--- a/test/test_networking.py
+++ b/test/test_networking.py
@@ -804,10 +804,10 @@ def test_httplib_validation_errors(self, handler):
             assert not isinstance(exc_info.value, TransportError)
 
 
-def run_validation(handler, fail, req, **handler_kwargs):
+def run_validation(handler, error, req, **handler_kwargs):
     with handler(**handler_kwargs) as rh:
-        if fail:
-            with pytest.raises(UnsupportedRequest):
+        if error:
+            with pytest.raises(error):
                 rh.validate(req)
         else:
             rh.validate(req)
@@ -824,6 +824,9 @@ class NoCheckRH(ValidationRH):
         _SUPPORTED_PROXY_SCHEMES = None
         _SUPPORTED_URL_SCHEMES = None
 
+        def _check_extensions(self, extensions):
+            extensions.clear()
+
     class HTTPSupportedRH(ValidationRH):
         _SUPPORTED_URL_SCHEMES = ('http',)
 
@@ -834,26 +837,26 @@ class HTTPSupportedRH(ValidationRH):
             ('https', False, {}),
             ('data', False, {}),
             ('ftp', False, {}),
-            ('file', True, {}),
+            ('file', UnsupportedRequest, {}),
             ('file', False, {'enable_file_urls': True}),
         ]),
         (NoCheckRH, [('http', False, {})]),
-        (ValidationRH, [('http', True, {})])
+        (ValidationRH, [('http', UnsupportedRequest, {})])
     ]
 
     PROXY_SCHEME_TESTS = [
         # scheme, expected to fail
         ('Urllib', [
             ('http', False),
-            ('https', True),
+            ('https', UnsupportedRequest),
             ('socks4', False),
             ('socks4a', False),
             ('socks5', False),
             ('socks5h', False),
-            ('socks', True),
+            ('socks', UnsupportedRequest),
         ]),
         (NoCheckRH, [('http', False)]),
-        (HTTPSupportedRH, [('http', True)]),
+        (HTTPSupportedRH, [('http', UnsupportedRequest)]),
     ]
 
     PROXY_KEY_TESTS = [
@@ -863,8 +866,22 @@ class HTTPSupportedRH(ValidationRH):
             ('unrelated', False),
         ]),
         (NoCheckRH, [('all', False)]),
-        (HTTPSupportedRH, [('all', True)]),
-        (HTTPSupportedRH, [('no', True)]),
+        (HTTPSupportedRH, [('all', UnsupportedRequest)]),
+        (HTTPSupportedRH, [('no', UnsupportedRequest)]),
+    ]
+
+    EXTENSION_TESTS = [
+        ('Urllib', [
+            ({'cookiejar': 'notacookiejar'}, AssertionError),
+            ({'cookiejar': CookieJar()}, False),
+            ({'timeout': 1}, False),
+            ({'timeout': 'notatimeout'}, AssertionError),
+            ({'unsupported': 'value'}, UnsupportedRequest),
+        ]),
+        (NoCheckRH, [
+            ({'cookiejar': 'notacookiejar'}, False),
+            ({'somerandom': 'test'}, False),  # but any extension is allowed through
+        ]),
     ]
 
     @pytest.mark.parametrize('handler,scheme,fail,handler_kwargs', [
@@ -907,15 +924,16 @@ def test_empty_proxy(self, handler):
     @pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1'])
     @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
     def test_missing_proxy_scheme(self, handler, proxy_url):
-        run_validation(handler, True, Request('http://', proxies={'http': 'example.com'}))
+        run_validation(handler, UnsupportedRequest, Request('http://', proxies={'http': 'example.com'}))
 
-    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
-    def test_cookiejar_extension(self, handler):
-        run_validation(handler, True, Request('http://', extensions={'cookiejar': 'notacookiejar'}))
-
-    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
-    def test_timeout_extension(self, handler):
-        run_validation(handler, True, Request('http://', extensions={'timeout': 'notavalidtimeout'}))
+    @pytest.mark.parametrize('handler,extensions,fail', [
+        (handler_tests[0], extensions, fail)
+        for handler_tests in EXTENSION_TESTS
+        for extensions, fail in handler_tests[1]
+    ], indirect=['handler'])
+    def test_extension(self, handler, extensions, fail):
+        run_validation(
+            handler, fail, Request('http://', extensions=extensions))
 
     def test_invalid_request_type(self):
         rh = self.ValidationRH(logger=FakeLogger())
diff --git a/yt_dlp/networking/_urllib.py b/yt_dlp/networking/_urllib.py
index ff3a22c8c1..3fe5fa52ea 100644
--- a/yt_dlp/networking/_urllib.py
+++ b/yt_dlp/networking/_urllib.py
@@ -385,6 +385,11 @@ def __init__(self, *, enable_file_urls: bool = False, **kwargs):
         if self.enable_file_urls:
             self._SUPPORTED_URL_SCHEMES = (*self._SUPPORTED_URL_SCHEMES, 'file')
 
+    def _check_extensions(self, extensions):
+        super()._check_extensions(extensions)
+        extensions.pop('cookiejar', None)
+        extensions.pop('timeout', None)
+
     def _create_instance(self, proxies, cookiejar):
         opener = urllib.request.OpenerDirector()
         handlers = [
diff --git a/yt_dlp/networking/common.py b/yt_dlp/networking/common.py
index 7f74579780..ab26a06282 100644
--- a/yt_dlp/networking/common.py
+++ b/yt_dlp/networking/common.py
@@ -21,6 +21,7 @@
     TransportError,
     UnsupportedRequest,
 )
+from ..compat.types import NoneType
 from ..utils import (
     bug_reports_message,
     classproperty,
@@ -147,6 +148,7 @@ class RequestHandler(abc.ABC):
         a proxy url with an url scheme not in this list will raise an UnsupportedRequest.
 
     - `_SUPPORTED_FEATURES`: a tuple of supported features, as defined in Features enum.
+
     The above may be set to None to disable the checks.
 
     Parameters:
@@ -169,9 +171,14 @@ class RequestHandler(abc.ABC):
     Requests may have additional optional parameters defined as extensions.
      RequestHandler subclasses may choose to support custom extensions.
 
+    If an extension is supported, subclasses should extend _check_extensions(extensions)
+    to pop and validate the extension.
+    - Extensions left in `extensions` are treated as unsupported and UnsupportedRequest will be raised.
+
     The following extensions are defined for RequestHandler:
-    - `cookiejar`: Cookiejar to use for this request
-    - `timeout`: socket timeout to use for this request
+    - `cookiejar`: Cookiejar to use for this request.
+    - `timeout`: socket timeout to use for this request.
+    To enable these, add extensions.pop('<extension>', None) to _check_extensions
 
     Apart from the url protocol, proxies dict may contain the following keys:
     - `all`: proxy to use for all protocols. Used as a fallback if no proxy is set for a specific protocol.
@@ -263,26 +270,19 @@ def _check_proxies(self, proxies):
             if scheme not in self._SUPPORTED_PROXY_SCHEMES:
                 raise UnsupportedRequest(f'Unsupported proxy type: "{scheme}"')
 
-    def _check_cookiejar_extension(self, extensions):
-        if not extensions.get('cookiejar'):
-            return
-        if not isinstance(extensions['cookiejar'], CookieJar):
-            raise UnsupportedRequest('cookiejar is not a CookieJar')
-
-    def _check_timeout_extension(self, extensions):
-        if extensions.get('timeout') is None:
-            return
-        if not isinstance(extensions['timeout'], (float, int)):
-            raise UnsupportedRequest('timeout is not a float or int')
-
     def _check_extensions(self, extensions):
-        self._check_cookiejar_extension(extensions)
-        self._check_timeout_extension(extensions)
+        """Check extensions for unsupported extensions. Subclasses should extend this."""
+        assert isinstance(extensions.get('cookiejar'), (CookieJar, NoneType))
+        assert isinstance(extensions.get('timeout'), (float, int, NoneType))
 
     def _validate(self, request):
         self._check_url_scheme(request)
         self._check_proxies(request.proxies or self.proxies)
-        self._check_extensions(request.extensions)
+        extensions = request.extensions.copy()
+        self._check_extensions(extensions)
+        if extensions:
+            # TODO: add support for optional extensions
+            raise UnsupportedRequest(f'Unsupported extensions: {", ".join(extensions.keys())}')
 
     @wrap_request_errors
     def validate(self, request: Request):

From 39837ae3199aa934299badbd0d63243ed639e6c8 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Sun, 23 Jul 2023 18:29:45 -0500
Subject: [PATCH 072/218] [ie/triller] Fix unlisted video extraction (#7670)

Authored by: bashonly
---
 yt_dlp/extractor/triller.py | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/yt_dlp/extractor/triller.py b/yt_dlp/extractor/triller.py
index c5d01c8271..56e51fea8f 100644
--- a/yt_dlp/extractor/triller.py
+++ b/yt_dlp/extractor/triller.py
@@ -66,13 +66,6 @@ def _get_comments(self, video_id, limit=15):
             'timestamp': ('timestamp', {unified_timestamp}),
         }))
 
-    def _check_user_info(self, user_info):
-        if user_info.get('private') and not user_info.get('followed_by_me'):
-            raise ExtractorError('This video is private', expected=True)
-        elif traverse_obj(user_info, 'blocked_by_user', 'blocking_user'):
-            raise ExtractorError('The author of the video is blocked', expected=True)
-        return user_info
-
     def _parse_video_info(self, video_info, username, user_id, display_id=None):
         video_id = str(video_info['id'])
         display_id = display_id or video_info.get('video_uuid')
@@ -231,8 +224,6 @@ def _real_extract(self, url):
             f'{self._API_BASE_URL}/api/videos/{display_id}', display_id,
             headers=self._API_HEADERS)['videos'][0]
 
-        self._check_user_info(video_info.get('user') or {})
-
         return self._parse_video_info(video_info, username, None, display_id)
 
 
@@ -287,9 +278,14 @@ def _entries(self, username, user_id, limit=6):
     def _real_extract(self, url):
         username = self._match_id(url)
 
-        user_info = self._check_user_info(self._download_json(
+        user_info = traverse_obj(self._download_json(
             f'{self._API_BASE_URL}/api/users/by_username/{username}',
-            username, note='Downloading user info', headers=self._API_HEADERS)['user'])
+            username, note='Downloading user info', headers=self._API_HEADERS), ('user', {dict})) or {}
+
+        if user_info.get('private') and user_info.get('followed_by_me') not in (True, 'true'):
+            raise ExtractorError('This user profile is private', expected=True)
+        elif traverse_obj(user_info, (('blocked_by_user', 'blocking_user'), {bool}), get_all=False):
+            raise ExtractorError('The author of the video is blocked', expected=True)
 
         user_id = str_or_none(user_info.get('user_id'))
         if not user_id:

From 550e65410a7a1b105923494ac44460a4dc1a15d9 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Sun, 23 Jul 2023 19:09:52 -0500
Subject: [PATCH 073/218] [ie] Extract subtitles from SMIL manifests (#7667)

Authored by: bashonly, pukkandan
---
 yt_dlp/extractor/common.py      | 46 +++++++++++++++++++--------------
 yt_dlp/extractor/livestream.py  |  5 ++--
 yt_dlp/extractor/mediaset.py    |  6 +++--
 yt_dlp/extractor/nbc.py         |  1 -
 yt_dlp/extractor/theplatform.py |  4 +--
 5 files changed, 34 insertions(+), 28 deletions(-)

diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 64a280dc05..b69ac1d653 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -2248,18 +2248,10 @@ def _extract_smil_formats_and_subtitles(self, smil_url, video_id, fatal=True, f4
         if res is False:
             assert not fatal
             return [], {}
-
         smil, urlh = res
-        smil_url = urlh.url
 
-        namespace = self._parse_smil_namespace(smil)
-
-        fmts = self._parse_smil_formats(
-            smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
-        subs = self._parse_smil_subtitles(
-            smil, namespace=namespace)
-
-        return fmts, subs
+        return self._parse_smil_formats_and_subtitles(smil, urlh.url, video_id, f4m_params=f4m_params,
+                                                      namespace=self._parse_smil_namespace(smil))
 
     def _extract_smil_formats(self, *args, **kwargs):
         fmts, subs = self._extract_smil_formats_and_subtitles(*args, **kwargs)
@@ -2285,9 +2277,8 @@ def _download_smil(self, smil_url, video_id, fatal=True, transform_source=None):
     def _parse_smil(self, smil, smil_url, video_id, f4m_params=None):
         namespace = self._parse_smil_namespace(smil)
 
-        formats = self._parse_smil_formats(
+        formats, subtitles = self._parse_smil_formats_and_subtitles(
             smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
-        subtitles = self._parse_smil_subtitles(smil, namespace=namespace)
 
         video_id = os.path.splitext(url_basename(smil_url))[0]
         title = None
@@ -2326,7 +2317,14 @@ def _parse_smil_namespace(self, smil):
         return self._search_regex(
             r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None)
 
-    def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
+    def _parse_smil_formats(self, *args, **kwargs):
+        fmts, subs = self._parse_smil_formats_and_subtitles(*args, **kwargs)
+        if subs:
+            self._report_ignoring_subs('SMIL')
+        return fmts
+
+    def _parse_smil_formats_and_subtitles(
+            self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
         base = smil_url
         for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
             b = meta.get('base') or meta.get('httpBase')
@@ -2334,7 +2332,7 @@ def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_para
                 base = b
                 break
 
-        formats = []
+        formats, subtitles = [], {}
         rtmp_count = 0
         http_count = 0
         m3u8_count = 0
@@ -2382,8 +2380,9 @@ def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_para
             src_url = src_url.strip()
 
             if proto == 'm3u8' or src_ext == 'm3u8':
-                m3u8_formats = self._extract_m3u8_formats(
+                m3u8_formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
                     src_url, video_id, ext or 'mp4', m3u8_id='hls', fatal=False)
+                self._merge_subtitles(m3u8_subs, target=subtitles)
                 if len(m3u8_formats) == 1:
                     m3u8_count += 1
                     m3u8_formats[0].update({
@@ -2404,11 +2403,15 @@ def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_para
                 f4m_url += urllib.parse.urlencode(f4m_params)
                 formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
             elif src_ext == 'mpd':
-                formats.extend(self._extract_mpd_formats(
-                    src_url, video_id, mpd_id='dash', fatal=False))
+                mpd_formats, mpd_subs = self._extract_mpd_formats_and_subtitles(
+                    src_url, video_id, mpd_id='dash', fatal=False)
+                formats.extend(mpd_formats)
+                self._merge_subtitles(mpd_subs, target=subtitles)
             elif re.search(r'\.ism/[Mm]anifest', src_url):
-                formats.extend(self._extract_ism_formats(
-                    src_url, video_id, ism_id='mss', fatal=False))
+                ism_formats, ism_subs = self._extract_ism_formats_and_subtitles(
+                    src_url, video_id, ism_id='mss', fatal=False)
+                formats.extend(ism_formats)
+                self._merge_subtitles(ism_subs, target=subtitles)
             elif src_url.startswith('http') and self._is_valid_url(src, video_id):
                 http_count += 1
                 formats.append({
@@ -2439,7 +2442,10 @@ def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_para
                 'format_note': 'SMIL storyboards',
             })
 
-        return formats
+        smil_subs = self._parse_smil_subtitles(smil, namespace=namespace)
+        self._merge_subtitles(smil_subs, target=subtitles)
+
+        return formats, subtitles
 
     def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
         urls = []
diff --git a/yt_dlp/extractor/livestream.py b/yt_dlp/extractor/livestream.py
index 692d6ab3a6..a05a0fa9ec 100644
--- a/yt_dlp/extractor/livestream.py
+++ b/yt_dlp/extractor/livestream.py
@@ -80,7 +80,8 @@ class LivestreamIE(InfoExtractor):
     }]
     _API_URL_TEMPLATE = 'http://livestream.com/api/accounts/%s/events/%s'
 
-    def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
+    def _parse_smil_formats_and_subtitles(
+            self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
         base_ele = find_xpath_attr(
             smil, self._xpath_ns('.//meta', namespace), 'name', 'httpBase')
         base = base_ele.get('content') if base_ele is not None else 'http://livestreamvod-f.akamaihd.net/'
@@ -104,7 +105,7 @@ def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_para
                 'tbr': tbr,
                 'preference': -1000,  # Strictly inferior than all other formats?
             })
-        return formats
+        return formats, {}
 
     def _extract_video_info(self, video_data):
         video_id = compat_str(video_data['id'])
diff --git a/yt_dlp/extractor/mediaset.py b/yt_dlp/extractor/mediaset.py
index 1fa5299141..e3b728dcae 100644
--- a/yt_dlp/extractor/mediaset.py
+++ b/yt_dlp/extractor/mediaset.py
@@ -154,10 +154,12 @@ class MediasetIE(ThePlatformBaseIE):
         }
     }]
 
-    def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
+    def _parse_smil_formats_and_subtitles(
+            self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
         for video in smil.findall(self._xpath_ns('.//video', namespace)):
             video.attrib['src'] = re.sub(r'(https?://vod05)t(-mediaset-it\.akamaized\.net/.+?.mpd)\?.+', r'\1\2', video.attrib['src'])
-        return super(MediasetIE, self)._parse_smil_formats(smil, smil_url, video_id, namespace, f4m_params, transform_rtmp_url)
+        return super(MediasetIE, self)._parse_smil_formats_and_subtitles(
+            smil, smil_url, video_id, namespace, f4m_params, transform_rtmp_url)
 
     def _check_drm_formats(self, tp_formats, video_id):
         has_nondrm, drm_manifest = False, ''
diff --git a/yt_dlp/extractor/nbc.py b/yt_dlp/extractor/nbc.py
index 299b051745..b3c28ab55d 100644
--- a/yt_dlp/extractor/nbc.py
+++ b/yt_dlp/extractor/nbc.py
@@ -131,7 +131,6 @@ class NBCIE(ThePlatformIE):  # XXX: Do not subclass from concrete IE
                 'age_limit': 0,
                 'thumbnail': r're:https?://.+\.jpg',
             },
-            'expected_warnings': ['Ignoring subtitle tracks'],
             'params': {
                 'skip_download': 'm3u8',
             },
diff --git a/yt_dlp/extractor/theplatform.py b/yt_dlp/extractor/theplatform.py
index 537f6f6cd0..8307b912dd 100644
--- a/yt_dlp/extractor/theplatform.py
+++ b/yt_dlp/extractor/theplatform.py
@@ -45,7 +45,7 @@ def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL d
                     raise ExtractorError(
                         error_element.attrib['abstract'], expected=True)
 
-        smil_formats = self._parse_smil_formats(
+        smil_formats, subtitles = self._parse_smil_formats_and_subtitles(
             meta, smil_url, video_id, namespace=default_ns,
             # the parameters are from syfy.com, other sites may use others,
             # they also work for nbc.com
@@ -65,8 +65,6 @@ def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL d
 
                 formats.append(_format)
 
-        subtitles = self._parse_smil_subtitles(meta, default_ns)
-
         return formats, subtitles
 
     def _download_theplatform_metadata(self, path, video_id):

From 95abea9a03289da1384e5bda3d590223ccc0a238 Mon Sep 17 00:00:00 2001
From: coletdjnz <coletdjnz@protonmail.com>
Date: Tue, 25 Jul 2023 07:18:52 +1200
Subject: [PATCH 074/218] [test] Fix `httplib_validation_errors` test for old
 Python versions (#7677)

Fixes https://github.com/yt-dlp/yt-dlp/issues/7674

Authored by: coletdjnz
---
 test/test_networking.py | 39 ++++++++++++++++++++++++---------------
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/test/test_networking.py b/test/test_networking.py
index 1bd6afc88b..dbe28359be 100644
--- a/test/test_networking.py
+++ b/test/test_networking.py
@@ -785,22 +785,31 @@ def test_verify_cert_error_text(self, handler):
                 validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
 
     @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
-    def test_httplib_validation_errors(self, handler):
+    @pytest.mark.parametrize('req,match,version_check', [
+        # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1256
+        # bpo-39603: Check implemented in 3.7.9+, 3.8.5+
+        (
+            Request('http://127.0.0.1', method='GET\n'),
+            'method can\'t contain control characters',
+            lambda v: v < (3, 7, 9) or (3, 8, 0) <= v < (3, 8, 5)
+        ),
+        # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1265
+        # bpo-38576: Check implemented in 3.7.8+, 3.8.3+
+        (
+            Request('http://127.0.0. 1', method='GET'),
+            'URL can\'t contain control characters',
+            lambda v: v < (3, 7, 8) or (3, 8, 0) <= v < (3, 8, 3)
+        ),
+        # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1288C31-L1288C50
+        (Request('http://127.0.0.1', headers={'foo\n': 'bar'}), 'Invalid header name', None),
+    ])
+    def test_httplib_validation_errors(self, handler, req, match, version_check):
+        if version_check and version_check(sys.version_info):
+            pytest.skip(f'Python {sys.version} version does not have the required validation for this test.')
+
         with handler() as rh:
-
-            # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1256
-            with pytest.raises(RequestError, match='method can\'t contain control characters') as exc_info:
-                validate_and_send(rh, Request('http://127.0.0.1', method='GET\n'))
-            assert not isinstance(exc_info.value, TransportError)
-
-            # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1265
-            with pytest.raises(RequestError, match='URL can\'t contain control characters') as exc_info:
-                validate_and_send(rh, Request('http://127.0.0. 1', method='GET\n'))
-            assert not isinstance(exc_info.value, TransportError)
-
-            # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1288C31-L1288C50
-            with pytest.raises(RequestError, match='Invalid header name') as exc_info:
-                validate_and_send(rh, Request('http://127.0.0.1', headers={'foo\n': 'bar'}))
+            with pytest.raises(RequestError, match=match) as exc_info:
+                validate_and_send(rh, req)
             assert not isinstance(exc_info.value, TransportError)
 
 

From dae349da97cafe7357106a8f3187fd48a2ad1210 Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@bashonly.com>
Date: Thu, 27 Jul 2023 09:53:22 -0500
Subject: [PATCH 075/218] [ie/WrestleUniversePPV] Fix HLS AES key extraction

Fix bug in ef8fb7f029b816dfc95600727d84400591a3b5c5

Closes #7708
Authored by: bashonly
---
 yt_dlp/extractor/wrestleuniverse.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/wrestleuniverse.py b/yt_dlp/extractor/wrestleuniverse.py
index 99a8f01200..dd12804db3 100644
--- a/yt_dlp/extractor/wrestleuniverse.py
+++ b/yt_dlp/extractor/wrestleuniverse.py
@@ -300,7 +300,7 @@ def _real_extract(self, url):
             info['hls_aes'] = {
                 'key': hls_aes_key,
                 'iv': traverse_obj(video_data, ('hls', 'iv', {decrypt})),
-            },
+            }
         elif traverse_obj(video_data, ('hls', 'encryptType', {int})):
             self.report_warning('HLS AES-128 key was not found in API response')
 

From bbeacff7fcaa3b521066088a5ccbf34ef5070d1d Mon Sep 17 00:00:00 2001
From: coletdjnz <coletdjnz@protonmail.com>
Date: Fri, 28 Jul 2023 02:56:02 +1200
Subject: [PATCH 076/218] [networking] Ignore invalid proxies in env (#7704)

Authored by: coletdjnz
---
 test/test_networking.py     |  8 +++++---
 yt_dlp/networking/common.py | 10 +++++++---
 yt_dlp/utils/networking.py  |  8 +++++++-
 3 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/test/test_networking.py b/test/test_networking.py
index dbe28359be..f0938ab91c 100644
--- a/test/test_networking.py
+++ b/test/test_networking.py
@@ -930,10 +930,10 @@ def test_empty_proxy(self, handler):
         run_validation(handler, False, Request('http://', proxies={'http': None}))
         run_validation(handler, False, Request('http://'), proxies={'http': None})
 
-    @pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1'])
+    @pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1', '/a/b/c'])
     @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
-    def test_missing_proxy_scheme(self, handler, proxy_url):
-        run_validation(handler, UnsupportedRequest, Request('http://', proxies={'http': 'example.com'}))
+    def test_invalid_proxy_url(self, handler, proxy_url):
+        run_validation(handler, UnsupportedRequest, Request('http://', proxies={'http': proxy_url}))
 
     @pytest.mark.parametrize('handler,extensions,fail', [
         (handler_tests[0], extensions, fail)
@@ -1126,9 +1126,11 @@ def test_legacy_server_connect_error(self):
         ('http', '__noproxy__', None),
         ('no', '127.0.0.1,foo.bar', '127.0.0.1,foo.bar'),
         ('https', 'example.com', 'http://example.com'),
+        ('https', '//example.com', 'http://example.com'),
         ('https', 'socks5://example.com', 'socks5h://example.com'),
         ('http', 'socks://example.com', 'socks4://example.com'),
         ('http', 'socks4://example.com', 'socks4://example.com'),
+        ('unrelated', '/bad/proxy', '/bad/proxy'),  # clean_proxies should ignore bad proxies
     ])
     def test_clean_proxy(self, proxy_key, proxy_url, expected):
         # proxies should be cleaned in urlopen()
diff --git a/yt_dlp/networking/common.py b/yt_dlp/networking/common.py
index ab26a06282..3164df49b4 100644
--- a/yt_dlp/networking/common.py
+++ b/yt_dlp/networking/common.py
@@ -262,9 +262,13 @@ def _check_proxies(self, proxies):
                 # Skip proxy scheme checks
                 continue
 
-            # Scheme-less proxies are not supported
-            if urllib.request._parse_proxy(proxy_url)[0] is None:
-                raise UnsupportedRequest(f'Proxy "{proxy_url}" missing scheme')
+            try:
+                if urllib.request._parse_proxy(proxy_url)[0] is None:
+                    # Scheme-less proxies are not supported
+                    raise UnsupportedRequest(f'Proxy "{proxy_url}" missing scheme')
+            except ValueError as e:
+                # parse_proxy may raise on some invalid proxy urls such as "/a/b/c"
+                raise UnsupportedRequest(f'Invalid proxy url "{proxy_url}": {e}')
 
             scheme = urllib.parse.urlparse(proxy_url).scheme.lower()
             if scheme not in self._SUPPORTED_PROXY_SCHEMES:
diff --git a/yt_dlp/utils/networking.py b/yt_dlp/utils/networking.py
index ac355ddc85..e6515ec8ee 100644
--- a/yt_dlp/utils/networking.py
+++ b/yt_dlp/utils/networking.py
@@ -98,7 +98,13 @@ def clean_proxies(proxies: dict, headers: HTTPHeaderDict):
             continue
         if proxy_url is not None:
             # Ensure proxies without a scheme are http.
-            proxy_scheme = urllib.request._parse_proxy(proxy_url)[0]
+            try:
+                proxy_scheme = urllib.request._parse_proxy(proxy_url)[0]
+            except ValueError:
+                # Ignore invalid proxy URLs. Sometimes these may be introduced through environment
+                # variables unrelated to proxy settings - e.g. Colab `COLAB_LANGUAGE_SERVER_PROXY`.
+                # If the proxy is going to be used, the Request Handler proxy validation will handle it.
+                continue
             if proxy_scheme is None:
                 proxies[proxy_key] = 'http://' + remove_start(proxy_url, '//')
 

From c03a58ec9933e4a42c2d8fa80b8a0ddb2cde64e6 Mon Sep 17 00:00:00 2001
From: Amirreza Aflakparast <84932095+AmirAflak@users.noreply.github.com>
Date: Fri, 28 Jul 2023 22:21:16 +0330
Subject: [PATCH 077/218] [ie/MotorTrendOnDemand] Update `_VALID_URL` (#7683)

Closes #7680
Authored by: AmirAflak
---
 yt_dlp/extractor/dplay.py | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/dplay.py b/yt_dlp/extractor/dplay.py
index 6404752f7e..363b4bec9e 100644
--- a/yt_dlp/extractor/dplay.py
+++ b/yt_dlp/extractor/dplay.py
@@ -746,7 +746,7 @@ class MotorTrendIE(DiscoveryPlusBaseIE):
 
 
 class MotorTrendOnDemandIE(DiscoveryPlusBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?motortrendondemand\.com/detail' + DPlayBaseIE._PATH_REGEX
+    _VALID_URL = r'https?://(?:www\.)?motortrend(?:ondemand\.com|\.com/plus)/detail' + DPlayBaseIE._PATH_REGEX
     _TESTS = [{
         'url': 'https://www.motortrendondemand.com/detail/wheelstanding-dump-truck-stubby-bobs-comeback/37699/784',
         'info_dict': {
@@ -767,6 +767,25 @@ class MotorTrendOnDemandIE(DiscoveryPlusBaseIE):
             'upload_date': '20140101',
             'tags': [],
         },
+    }, {
+        'url': 'https://www.motortrend.com/plus/detail/roadworthy-rescues-teaser-trailer/4922860/',
+        'info_dict': {
+            'id': '4922860',
+            'ext': 'mp4',
+            'title': 'Roadworthy Rescues | Teaser Trailer',
+            'description': 'Derek Bieri helps Freiburger and Finnegan with their \'68 big-block Dart.',
+            'display_id': 'roadworthy-rescues-teaser-trailer/4922860',
+            'creator': 'Originals',
+            'series': 'Roadworthy Rescues',
+            'thumbnail': r're:^https?://.+\.jpe?g$',
+            'upload_date': '20220907',
+            'timestamp': 1662523200,
+            'duration': 1066.356,
+            'tags': [],
+        },
+    }, {
+        'url': 'https://www.motortrend.com/plus/detail/ugly-duckling/2450033/12439',
+        'only_matching': True,
     }]
 
     _PRODUCT = 'MTOD'

From a15fcd299e767a510debd8dc1646fe863b96ce0e Mon Sep 17 00:00:00 2001
From: nnoboa <90611593+nnoboa@users.noreply.github.com>
Date: Fri, 28 Jul 2023 14:52:07 -0400
Subject: [PATCH 078/218] [ie/Wimbledon] Add extractor (#7551)

Closes #7462
Authored by: nnoboa
---
 yt_dlp/extractor/_extractors.py |  1 +
 yt_dlp/extractor/wimbledon.py   | 61 +++++++++++++++++++++++++++++++++
 2 files changed, 62 insertions(+)
 create mode 100644 yt_dlp/extractor/wimbledon.py

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index ae73a9f960..9d935a7d16 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -2363,6 +2363,7 @@
 from .whyp import WhypIE
 from .wikimedia import WikimediaIE
 from .willow import WillowIE
+from .wimbledon import WimbledonIE
 from .wimtv import WimTVIE
 from .whowatch import WhoWatchIE
 from .wistia import (
diff --git a/yt_dlp/extractor/wimbledon.py b/yt_dlp/extractor/wimbledon.py
new file mode 100644
index 0000000000..ee4872e88b
--- /dev/null
+++ b/yt_dlp/extractor/wimbledon.py
@@ -0,0 +1,61 @@
+from .common import InfoExtractor
+from ..utils import (
+    parse_duration,
+    traverse_obj,
+)
+
+
+class WimbledonIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?wimbledon\.com/\w+/video/media/(?P<id>\d+).html'
+    _TESTS = [{
+        'url': 'https://www.wimbledon.com/en_GB/video/media/6330247525112.html',
+        'info_dict': {
+            'id': '6330247525112',
+            'ext': 'mp4',
+            'timestamp': 1687972186,
+            'description': '',
+            'thumbnail': r're:^https://[\w.-]+\.prod\.boltdns\.net/[^?#]+/image\.jpg',
+            'upload_date': '20230628',
+            'title': 'Coco Gauff | My Wimbledon Inspiration',
+            'tags': ['features', 'trending', 'homepage'],
+            'uploader_id': '3506358525001',
+            'duration': 163072.0,
+        },
+    }, {
+        'url': 'https://www.wimbledon.com/en_GB/video/media/6308703111112.html',
+        'info_dict': {
+            'id': '6308703111112',
+            'ext': 'mp4',
+            'thumbnail': r're:^https://[\w.-]+\.prod\.boltdns\.net/[^?#]+/image\.jpg',
+            'description': 'null',
+            'upload_date': '20220629',
+            'uploader_id': '3506358525001',
+            'title': 'Roblox | WimbleWorld ',
+            'duration': 101440.0,
+            'tags': ['features', 'kids'],
+            'timestamp': 1656500867,
+        },
+    }, {
+        'url': 'https://www.wimbledon.com/en_US/video/media/6309327106112.html',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.wimbledon.com/es_Es/video/media/6308377909112.html',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        metadata = self._download_json(
+            f'https://www.wimbledon.com/relatedcontent/rest/v2/wim_v1/en/content/wim_v1_{video_id}_en', video_id)
+
+        return {
+            '_type': 'url_transparent',
+            'url': f'http://players.brightcove.net/3506358525001/default_default/index.html?videoId={video_id}',
+            'ie_key': 'BrightcoveNew',
+            'id': video_id,
+            **traverse_obj(metadata, {
+                'title': 'title',
+                'description': 'description',
+                'duration': ('metadata', 'duration', {parse_duration}),
+            }),
+        }

From 4bf912282a34b58b6b35d8f7e6be535770c89c76 Mon Sep 17 00:00:00 2001
From: coletdjnz <coletdjnz@protonmail.com>
Date: Sat, 29 Jul 2023 10:40:20 +1200
Subject: [PATCH 079/218] [networking] Remove dot segments during URL
 normalization (#7662)

This implements RFC3986 5.2.4 remove_dot_segments during the URL normalization process.

Closes #3355, #6526

Authored by: coletdjnz
---
 test/test_networking.py      | 21 ++++++++++++++++++
 test/test_utils.py           | 42 ++++++++++++++++++++++++++++--------
 yt_dlp/cookies.py            |  6 +++---
 yt_dlp/networking/_urllib.py |  7 +++---
 yt_dlp/networking/common.py  |  5 ++---
 yt_dlp/utils/_legacy.py      |  4 +++-
 yt_dlp/utils/_utils.py       | 17 ---------------
 yt_dlp/utils/networking.py   | 38 ++++++++++++++++++++++++++++++++
 8 files changed, 104 insertions(+), 36 deletions(-)

diff --git a/test/test_networking.py b/test/test_networking.py
index f0938ab91c..684bf5f965 100644
--- a/test/test_networking.py
+++ b/test/test_networking.py
@@ -173,6 +173,12 @@ def do_GET(self):
             self.send_header('Location', self.path)
             self.send_header('Content-Length', '0')
             self.end_headers()
+        elif self.path == '/redirect_dotsegments':
+            self.send_response(301)
+            # redirect to /headers but with dot segments before
+            self.send_header('Location', '/a/b/./../../headers')
+            self.send_header('Content-Length', '0')
+            self.end_headers()
         elif self.path.startswith('/redirect_'):
             self._redirect()
         elif self.path.startswith('/method'):
@@ -355,6 +361,21 @@ def test_percent_encode(self, handler):
             assert res.status == 200
             res.close()
 
+    @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+    def test_remove_dot_segments(self, handler):
+        with handler() as rh:
+            # This isn't a comprehensive test,
+            # but it should be enough to check whether the handler is removing dot segments
+            res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/a/b/./../../headers'))
+            assert res.status == 200
+            assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
+            res.close()
+
+            res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_dotsegments'))
+            assert res.status == 200
+            assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
+            res.close()
+
     @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
     def test_unicode_path_redirection(self, handler):
         with handler() as rh:
diff --git a/test/test_utils.py b/test/test_utils.py
index b36bc04c2f..453a01a1c2 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -47,8 +47,6 @@
     encode_base_n,
     encode_compat_str,
     encodeFilename,
-    escape_rfc3986,
-    escape_url,
     expand_path,
     extract_attributes,
     extract_basic_auth,
@@ -132,7 +130,12 @@
     xpath_text,
     xpath_with_ns,
 )
-from yt_dlp.utils.networking import HTTPHeaderDict
+from yt_dlp.utils.networking import (
+    HTTPHeaderDict,
+    escape_rfc3986,
+    normalize_url,
+    remove_dot_segments,
+)
 
 
 class TestUtil(unittest.TestCase):
@@ -933,24 +936,45 @@ def test_escape_rfc3986(self):
         self.assertEqual(escape_rfc3986('foo bar'), 'foo%20bar')
         self.assertEqual(escape_rfc3986('foo%20bar'), 'foo%20bar')
 
-    def test_escape_url(self):
+    def test_normalize_url(self):
         self.assertEqual(
-            escape_url('http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavré_FD.mp4'),
+            normalize_url('http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavré_FD.mp4'),
             'http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavre%CC%81_FD.mp4'
         )
         self.assertEqual(
-            escape_url('http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erklärt/Das-Erste/Video?documentId=22673108&bcastId=5290'),
+            normalize_url('http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erklärt/Das-Erste/Video?documentId=22673108&bcastId=5290'),
             'http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erkl%C3%A4rt/Das-Erste/Video?documentId=22673108&bcastId=5290'
         )
         self.assertEqual(
-            escape_url('http://тест.рф/фрагмент'),
+            normalize_url('http://тест.рф/фрагмент'),
             'http://xn--e1aybc.xn--p1ai/%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82'
         )
         self.assertEqual(
-            escape_url('http://тест.рф/абв?абв=абв#абв'),
+            normalize_url('http://тест.рф/абв?абв=абв#абв'),
             'http://xn--e1aybc.xn--p1ai/%D0%B0%D0%B1%D0%B2?%D0%B0%D0%B1%D0%B2=%D0%B0%D0%B1%D0%B2#%D0%B0%D0%B1%D0%B2'
         )
-        self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
+        self.assertEqual(normalize_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
+
+        self.assertEqual(normalize_url('http://www.example.com/../a/b/../c/./d.html'), 'http://www.example.com/a/c/d.html')
+
+    def test_remove_dot_segments(self):
+        self.assertEqual(remove_dot_segments('/a/b/c/./../../g'), '/a/g')
+        self.assertEqual(remove_dot_segments('mid/content=5/../6'), 'mid/6')
+        self.assertEqual(remove_dot_segments('/ad/../cd'), '/cd')
+        self.assertEqual(remove_dot_segments('/ad/../cd/'), '/cd/')
+        self.assertEqual(remove_dot_segments('/..'), '/')
+        self.assertEqual(remove_dot_segments('/./'), '/')
+        self.assertEqual(remove_dot_segments('/./a'), '/a')
+        self.assertEqual(remove_dot_segments('/abc/./.././d/././e/.././f/./../../ghi'), '/ghi')
+        self.assertEqual(remove_dot_segments('/'), '/')
+        self.assertEqual(remove_dot_segments('/t'), '/t')
+        self.assertEqual(remove_dot_segments('t'), 't')
+        self.assertEqual(remove_dot_segments(''), '')
+        self.assertEqual(remove_dot_segments('/../a/b/c'), '/a/b/c')
+        self.assertEqual(remove_dot_segments('../a'), 'a')
+        self.assertEqual(remove_dot_segments('./a'), 'a')
+        self.assertEqual(remove_dot_segments('.'), '')
+        self.assertEqual(remove_dot_segments('////'), '////')
 
     def test_js_to_json_vars_strings(self):
         self.assertDictEqual(
diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py
index 80428c747b..157f5b0c2b 100644
--- a/yt_dlp/cookies.py
+++ b/yt_dlp/cookies.py
@@ -33,7 +33,6 @@
 from .utils import (
     Popen,
     error_to_str,
-    escape_url,
     expand_path,
     is_path_like,
     sanitize_url,
@@ -42,6 +41,7 @@
     write_string,
 )
 from .utils._utils import _YDLLogger
+from .utils.networking import normalize_url
 
 CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
 SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
@@ -1308,7 +1308,7 @@ def prepare_line(line):
 
     def get_cookie_header(self, url):
         """Generate a Cookie HTTP header for a given url"""
-        cookie_req = urllib.request.Request(escape_url(sanitize_url(url)))
+        cookie_req = urllib.request.Request(normalize_url(sanitize_url(url)))
         self.add_cookie_header(cookie_req)
         return cookie_req.get_header('Cookie')
 
@@ -1317,7 +1317,7 @@ def get_cookies_for_url(self, url):
         # Policy `_now` attribute must be set before calling `_cookies_for_request`
         # Ref: https://github.com/python/cpython/blob/3.7/Lib/http/cookiejar.py#L1360
         self._policy._now = self._now = int(time.time())
-        return self._cookies_for_request(urllib.request.Request(escape_url(sanitize_url(url))))
+        return self._cookies_for_request(urllib.request.Request(normalize_url(sanitize_url(url))))
 
     def clear(self, *args, **kwargs):
         with contextlib.suppress(KeyError):
diff --git a/yt_dlp/networking/_urllib.py b/yt_dlp/networking/_urllib.py
index 3fe5fa52ea..0c4794954b 100644
--- a/yt_dlp/networking/_urllib.py
+++ b/yt_dlp/networking/_urllib.py
@@ -41,7 +41,8 @@
 from ..dependencies import brotli
 from ..socks import ProxyError as SocksProxyError
 from ..socks import sockssocket
-from ..utils import escape_url, update_url_query
+from ..utils import update_url_query
+from ..utils.networking import normalize_url
 
 SUPPORTED_ENCODINGS = ['gzip', 'deflate']
 CONTENT_DECODE_ERRORS = [zlib.error, OSError]
@@ -179,7 +180,7 @@ def http_request(self, req):
         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
         # the code of this workaround has been moved here from YoutubeDL.urlopen()
         url = req.get_full_url()
-        url_escaped = escape_url(url)
+        url_escaped = normalize_url(url)
 
         # Substitute URL if any change after escaping
         if url != url_escaped:
@@ -212,7 +213,7 @@ def http_response(self, req, resp):
             if location:
                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
                 location = location.encode('iso-8859-1').decode()
-                location_escaped = escape_url(location)
+                location_escaped = normalize_url(location)
                 if location != location_escaped:
                     del resp.headers['Location']
                     resp.headers['Location'] = location_escaped
diff --git a/yt_dlp/networking/common.py b/yt_dlp/networking/common.py
index 3164df49b4..792e062fdf 100644
--- a/yt_dlp/networking/common.py
+++ b/yt_dlp/networking/common.py
@@ -27,10 +27,9 @@
     classproperty,
     deprecation_warning,
     error_to_str,
-    escape_url,
     update_url_query,
 )
-from ..utils.networking import HTTPHeaderDict
+from ..utils.networking import HTTPHeaderDict, normalize_url
 
 if typing.TYPE_CHECKING:
     RequestData = bytes | Iterable[bytes] | typing.IO | None
@@ -372,7 +371,7 @@ def url(self, url):
             raise TypeError('url must be a string')
         elif url.startswith('//'):
             url = 'http:' + url
-        self._url = escape_url(url)
+        self._url = normalize_url(url)
 
     @property
     def method(self):
diff --git a/yt_dlp/utils/_legacy.py b/yt_dlp/utils/_legacy.py
index 0770009717..dde02092c9 100644
--- a/yt_dlp/utils/_legacy.py
+++ b/yt_dlp/utils/_legacy.py
@@ -8,6 +8,8 @@
 import zlib
 
 from ._utils import Popen, decode_base_n, preferredencoding
+from .networking import escape_rfc3986  # noqa: F401
+from .networking import normalize_url as escape_url  # noqa: F401
 from .traversal import traverse_obj
 from ..dependencies import certifi, websockets
 from ..networking._helper import make_ssl_context
@@ -197,7 +199,7 @@ def request_to_url(req):
 
 
 def sanitized_Request(url, *args, **kwargs):
-    from ..utils import escape_url, extract_basic_auth, sanitize_url
+    from ..utils import extract_basic_auth, sanitize_url
     url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
     if auth_header is not None:
         headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py
index abae0f17e4..f5552ce802 100644
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@@ -2464,23 +2464,6 @@ def lowercase_escape(s):
         s)
 
 
-def escape_rfc3986(s):
-    """Escape non-ASCII characters as suggested by RFC 3986"""
-    return urllib.parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
-
-
-def escape_url(url):
-    """Escape URL as suggested by RFC 3986"""
-    url_parsed = urllib.parse.urlparse(url)
-    return url_parsed._replace(
-        netloc=url_parsed.netloc.encode('idna').decode('ascii'),
-        path=escape_rfc3986(url_parsed.path),
-        params=escape_rfc3986(url_parsed.params),
-        query=escape_rfc3986(url_parsed.query),
-        fragment=escape_rfc3986(url_parsed.fragment)
-    ).geturl()
-
-
 def parse_qs(url, **kwargs):
     return urllib.parse.parse_qs(urllib.parse.urlparse(url).query, **kwargs)
 
diff --git a/yt_dlp/utils/networking.py b/yt_dlp/utils/networking.py
index e6515ec8ee..bbcea84d2c 100644
--- a/yt_dlp/utils/networking.py
+++ b/yt_dlp/utils/networking.py
@@ -121,3 +121,41 @@ def clean_headers(headers: HTTPHeaderDict):
     if 'Youtubedl-No-Compression' in headers:  # compat
         del headers['Youtubedl-No-Compression']
         headers['Accept-Encoding'] = 'identity'
+
+
+def remove_dot_segments(path):
+    # Implements RFC3986 5.2.4 remote_dot_segments
+    # Pseudo-code: https://tools.ietf.org/html/rfc3986#section-5.2.4
+    # https://github.com/urllib3/urllib3/blob/ba49f5c4e19e6bca6827282feb77a3c9f937e64b/src/urllib3/util/url.py#L263
+    output = []
+    segments = path.split('/')
+    for s in segments:
+        if s == '.':
+            continue
+        elif s == '..':
+            if output:
+                output.pop()
+        else:
+            output.append(s)
+    if not segments[0] and (not output or output[0]):
+        output.insert(0, '')
+    if segments[-1] in ('.', '..'):
+        output.append('')
+    return '/'.join(output)
+
+
+def escape_rfc3986(s):
+    """Escape non-ASCII characters as suggested by RFC 3986"""
+    return urllib.parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
+
+
+def normalize_url(url):
+    """Normalize URL as suggested by RFC 3986"""
+    url_parsed = urllib.parse.urlparse(url)
+    return url_parsed._replace(
+        netloc=url_parsed.netloc.encode('idna').decode('ascii'),
+        path=escape_rfc3986(remove_dot_segments(url_parsed.path)),
+        params=escape_rfc3986(url_parsed.params),
+        query=escape_rfc3986(url_parsed.query),
+        fragment=escape_rfc3986(url_parsed.fragment)
+    ).geturl()

From ba06d77a316650ff057347d224b5afa8b203ad65 Mon Sep 17 00:00:00 2001
From: coletdjnz <coletdjnz@protonmail.com>
Date: Sat, 29 Jul 2023 18:20:42 +1200
Subject: [PATCH 080/218] [ie/youtube] Add `player_params` extractor arg
 (#7719)

Authored by: coletdjnz
---
 README.md                   |  1 +
 yt_dlp/extractor/youtube.py | 11 ++++++++---
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index ff88f817cf..b82d92a6ec 100644
--- a/README.md
+++ b/README.md
@@ -1802,6 +1802,7 @@ #### youtube
 * `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
 * `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients.
 * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
+* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
 * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
 * `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all`
     * E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index 2b3776aa1d..940a4995b5 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -3583,8 +3583,6 @@ def _is_agegated(player_response):
     def _is_unplayable(player_response):
         return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
 
-    _PLAYER_PARAMS = 'CgIQBg=='
-
     def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
 
         session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
@@ -3597,7 +3595,11 @@ def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg,
             'videoId': video_id,
         }
         if _split_innertube_client(client)[0] == 'android':
-            yt_query['params'] = self._PLAYER_PARAMS
+            yt_query['params'] = 'CgIQBg=='
+
+        pp_arg = self._configuration_arg('player_params', [None])[0]
+        if pp_arg:
+            yt_query['params'] = pp_arg
 
         yt_query.update(self._generate_player_context(sts))
         return self._extract_response(
@@ -4016,6 +4018,9 @@ def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
         webpage = None
         if 'webpage' not in self._configuration_arg('player_skip'):
             query = {'bpctr': '9999999999', 'has_verified': '1'}
+            pp = self._configuration_arg('player_params', [None])[0]
+            if pp:
+                query['pp'] = pp
             webpage = self._download_webpage(
                 webpage_url, video_id, fatal=False, query=query)
 

From 9a04113dfbb69b904e4e2bea736da293505786b8 Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@bashonly.com>
Date: Fri, 28 Jul 2023 17:21:45 -0500
Subject: [PATCH 081/218] [ie/Reddit] Fix thumbnail extraction

Authored by: bashonly
---
 yt_dlp/extractor/reddit.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/yt_dlp/extractor/reddit.py b/yt_dlp/extractor/reddit.py
index 13615e82f9..813e62874c 100644
--- a/yt_dlp/extractor/reddit.py
+++ b/yt_dlp/extractor/reddit.py
@@ -240,6 +240,7 @@ def add_thumbnail(src):
                 'url': unescapeHTML(thumbnail_url),
                 'width': int_or_none(src.get('width')),
                 'height': int_or_none(src.get('height')),
+                'http_headers': {'Accept': '*/*'},
             })
 
         for image in try_get(data, lambda x: x['preview']['images']) or []:

From 86eeb044c2342d68c6ef177577f87852e6badd85 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Sat, 29 Jul 2023 10:47:43 -0500
Subject: [PATCH 082/218] [ie/hotstar] Support `/clips/` URLs (#7710)

Closes #7699
Authored by: bashonly
---
 yt_dlp/extractor/hotstar.py | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py
index 324e9f51db..cdd9379416 100644
--- a/yt_dlp/extractor/hotstar.py
+++ b/yt_dlp/extractor/hotstar.py
@@ -84,7 +84,7 @@ class HotStarIE(HotStarBaseIE):
     _VALID_URL = r'''(?x)
         https?://(?:www\.)?hotstar\.com(?:/in)?/(?!in/)
         (?:
-            (?P<type>movies|sports|episode|(?P<tv>tv|shows))/
+            (?P<type>movies|sports|clips|episode|(?P<tv>tv|shows))/
             (?(tv)(?:[^/?#]+/){2}|[^?#]*)
         )?
         [^/?#]+/
@@ -142,6 +142,18 @@ class HotStarIE(HotStarBaseIE):
             'duration': 1272,
             'channel_id': 3,
         },
+    }, {
+        'url': 'https://www.hotstar.com/in/clips/e3-sairat-kahani-pyaar-ki/1000262286',
+        'info_dict': {
+            'id': '1000262286',
+            'ext': 'mp4',
+            'title': 'E3 - SaiRat, Kahani Pyaar Ki',
+            'description': 'md5:e3b4b3203bc0c5396fe7d0e4948a6385',
+            'episode': 'E3 - SaiRat, Kahani Pyaar Ki',
+            'upload_date': '20210606',
+            'timestamp': 1622943900,
+            'duration': 5395,
+        },
     }, {
         'url': 'https://www.hotstar.com/movies/radha-gopalam/1000057157',
         'only_matching': True,
@@ -160,6 +172,7 @@ class HotStarIE(HotStarBaseIE):
         'episode': 'episode',
         'tv': 'episode',
         'shows': 'episode',
+        'clips': 'content',
         None: 'content',
     }
 

From 127a22460658ac39cbe5c4b3fb88d578363e0dfa Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Sat, 29 Jul 2023 11:01:43 -0500
Subject: [PATCH 083/218] [ie/LBRY] Fix original format extraction (#7711)

Authored by: bashonly
---
 yt_dlp/extractor/lbry.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/yt_dlp/extractor/lbry.py b/yt_dlp/extractor/lbry.py
index 6af64f0df4..7dd3a48613 100644
--- a/yt_dlp/extractor/lbry.py
+++ b/yt_dlp/extractor/lbry.py
@@ -1,5 +1,6 @@
 import functools
 import json
+import re
 import urllib.parse
 
 from .common import InfoExtractor
@@ -83,7 +84,7 @@ class LBRYIE(LBRYBaseIE):
     _TESTS = [{
         # Video
         'url': 'https://lbry.tv/@Mantega:1/First-day-LBRY:1',
-        'md5': 'fffd15d76062e9a985c22c7c7f2f4805',
+        'md5': '65bd7ec1f6744ada55da8e4c48a2edf9',
         'info_dict': {
             'id': '17f983b61f53091fb8ea58a9c56804e4ff8cff4d',
             'ext': 'mp4',
@@ -132,9 +133,8 @@ class LBRYIE(LBRYBaseIE):
             'license': 'None',
         }
     }, {
-        # HLS
         'url': 'https://odysee.com/@gardeningincanada:b/plants-i-will-never-grow-again.-the:e',
-        'md5': '25049011f3c8bc2f8b60ad88a031837e',
+        'md5': 'c35fac796f62a14274b4dc2addb5d0ba',
         'info_dict': {
             'id': 'e51671357333fe22ae88aad320bde2f6f96b1410',
             'ext': 'mp4',
@@ -246,12 +246,13 @@ def _real_extract(self, url):
             streaming_url = self._call_api_proxy(
                 'get', claim_id, {'uri': uri}, 'streaming url')['streaming_url']
 
-            # GET request returns original video/audio file if available
+            # GET request to v3 API returns original video/audio file if available
+            direct_url = re.sub(r'/api/v\d+/', '/api/v3/', streaming_url)
             ext = urlhandle_detect_ext(self._request_webpage(
-                streaming_url, display_id, 'Checking for original quality', headers=headers))
+                direct_url, display_id, 'Checking for original quality', headers=headers))
             if ext != 'm3u8':
                 formats.append({
-                    'url': streaming_url,
+                    'url': direct_url,
                     'format_id': 'original',
                     'quality': 1,
                     **traverse_obj(result, ('value', {

From b09bd0c19648f60c59fb980cd454cb0069959fb9 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Sat, 29 Jul 2023 11:14:16 -0500
Subject: [PATCH 084/218] [ie/tiktok] Fix audio-only format extraction (#7712)

Closes #6608
Authored by: bashonly
---
 yt_dlp/extractor/tiktok.py | 43 +++++++++++++++++++++++++++++++-------
 1 file changed, 35 insertions(+), 8 deletions(-)

diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py
index 48de61f934..f14c4f9d6a 100644
--- a/yt_dlp/extractor/tiktok.py
+++ b/yt_dlp/extractor/tiktok.py
@@ -205,15 +205,16 @@ def parse_url_key(url_key):
 
         known_resolutions = {}
 
-        def mp3_meta(url):
+        def audio_meta(url):
+            ext = determine_ext(url, default_ext='m4a')
             return {
                 'format_note': 'Music track',
-                'ext': 'mp3',
-                'acodec': 'mp3',
+                'ext': ext,
+                'acodec': 'aac' if ext == 'm4a' else ext,
                 'vcodec': 'none',
                 'width': None,
                 'height': None,
-            } if determine_ext(url) == 'mp3' else {}
+            } if ext == 'mp3' or '-music-' in url else {}
 
         def extract_addr(addr, add_meta={}):
             parsed_meta, res = parse_url_key(addr.get('url_key', ''))
@@ -231,7 +232,7 @@ def extract_addr(addr, add_meta={}):
                 **add_meta, **parsed_meta,
                 'format_note': join_nonempty(
                     add_meta.get('format_note'), '(API)' if 'aweme/v1' in url else None, delim=' '),
-                **mp3_meta(url),
+                **audio_meta(url),
             } for url in addr.get('url_list') or []]
 
         # Hack: Add direct video links first to prioritize them when removing duplicate formats
@@ -527,6 +528,7 @@ class TikTokIE(TikTokBaseIE):
             'repost_count': int,
             'comment_count': int,
         },
+        'params': {'skip_download': True},  # XXX: unable to download video data: HTTP Error 403: Forbidden
     }, {
         # Video without title and description
         'url': 'https://www.tiktok.com/@pokemonlife22/video/7059698374567611694',
@@ -600,7 +602,7 @@ class TikTokIE(TikTokBaseIE):
     }, {
         # only available via web
         'url': 'https://www.tiktok.com/@moxypatch/video/7206382937372134662',
-        'md5': '8d8c0be14127020cd9f5def4a2e6b411',
+        'md5': '6aba7fad816e8709ff2c149679ace165',
         'info_dict': {
             'id': '7206382937372134662',
             'ext': 'mp4',
@@ -637,8 +639,8 @@ class TikTokIE(TikTokBaseIE):
             'uploader_id': '86328792343818240',
             'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAA-0bQT0CqebTRr6I4IkYvMDMKSRSJHLNPBo5HrSklJwyA2psXLSZG5FP-LMNpHnJd',
             'channel_id': 'MS4wLjABAAAA-0bQT0CqebTRr6I4IkYvMDMKSRSJHLNPBo5HrSklJwyA2psXLSZG5FP-LMNpHnJd',
-            'creator': 't8',
-            'artist': 't8',
+            'creator': 'tate mcrae',
+            'artist': 'tate mcrae',
             'track': 'original sound',
             'upload_date': '20220609',
             'timestamp': 1654805899,
@@ -650,6 +652,31 @@ class TikTokIE(TikTokBaseIE):
             'thumbnail': r're:^https://.+\.webp',
         },
         'params': {'format': 'bytevc1_1080p_808907-0'},
+    }, {
+        # Slideshow, audio-only m4a format
+        'url': 'https://www.tiktok.com/@hara_yoimiya/video/7253412088251534594',
+        'md5': '2ff8fe0174db2dbf49c597a7bef4e47d',
+        'info_dict': {
+            'id': '7253412088251534594',
+            'ext': 'm4a',
+            'title': 'я ред флаг простите #переписка #щитпост #тревожныйтиппривязанности #рекомендации ',
+            'description': 'я ред флаг простите #переписка #щитпост #тревожныйтиппривязанности #рекомендации ',
+            'uploader': 'hara_yoimiya',
+            'uploader_id': '6582536342634676230',
+            'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAIAlDxriiPWLE-p8p1R_0Bx8qWKfi-7zwmGhzU8Mv25W8sNxjfIKrol31qTczzuLB',
+            'channel_id': 'MS4wLjABAAAAIAlDxriiPWLE-p8p1R_0Bx8qWKfi-7zwmGhzU8Mv25W8sNxjfIKrol31qTczzuLB',
+            'creator': 'лампочка',
+            'artist': 'Øneheart',
+            'album': 'watching the stars',
+            'track': 'watching the stars',
+            'upload_date': '20230708',
+            'timestamp': 1688816612,
+            'view_count': int,
+            'like_count': int,
+            'comment_count': int,
+            'repost_count': int,
+            'thumbnail': r're:^https://.+\.webp',
+        },
     }, {
         # Auto-captions available
         'url': 'https://www.tiktok.com/@hankgreen1/video/7047596209028074758',

From de20687ee6b742646128a7629b57096631a20619 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Fri, 28 Jul 2023 13:48:17 +0530
Subject: [PATCH 085/218] [test] Fix `test_load_certifi`

Closes #7688, #7675
---
 test/test_networking_utils.py | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/test/test_networking_utils.py b/test/test_networking_utils.py
index ef46f79ed0..dbf656090d 100644
--- a/test/test_networking_utils.py
+++ b/test/test_networking_utils.py
@@ -95,17 +95,20 @@ def test_make_socks_proxy_unknown(self):
 
     @pytest.mark.skipif(not certifi, reason='certifi is not installed')
     def test_load_certifi(self):
+        context_certifi = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
+        context_certifi.load_verify_locations(cafile=certifi.where())
         context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
-        context2 = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
         ssl_load_certs(context, use_certifi=True)
-        context2.load_verify_locations(cafile=certifi.where())
-        assert context.get_ca_certs() == context2.get_ca_certs()
+        assert context.get_ca_certs() == context_certifi.get_ca_certs()
 
-        # Test load normal certs
-        # XXX: could there be a case where system certs are the same as certifi?
-        context3 = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
-        ssl_load_certs(context3, use_certifi=False)
-        assert context3.get_ca_certs() != context.get_ca_certs()
+        context_default = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
+        context_default.load_default_certs()
+        context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
+        ssl_load_certs(context, use_certifi=False)
+        assert context.get_ca_certs() == context_default.get_ca_certs()
+
+        if context_default.get_ca_certs() == context_certifi.get_ca_certs():
+            pytest.skip('System uses certifi as default. The test is not valid')
 
     @pytest.mark.parametrize('method,status,expected', [
         ('GET', 303, 'GET'),

From 3f7965105d8d2048359e67c1e8b8ebd51588143b Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Sun, 30 Jul 2023 03:18:10 +0530
Subject: [PATCH 086/218] [utils] HTTPHeaderDict: Handle byte values

---
 test/test_utils.py         | 2 ++
 yt_dlp/utils/networking.py | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/test/test_utils.py b/test/test_utils.py
index 453a01a1c2..91e3ffd39e 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -2344,6 +2344,8 @@ def test_traverse_obj(self):
 
     def test_http_header_dict(self):
         headers = HTTPHeaderDict()
+        headers['ytdl-test'] = b'0'
+        self.assertEqual(list(headers.items()), [('Ytdl-Test', '0')])
         headers['ytdl-test'] = 1
         self.assertEqual(list(headers.items()), [('Ytdl-Test', '1')])
         headers['Ytdl-test'] = '2'
diff --git a/yt_dlp/utils/networking.py b/yt_dlp/utils/networking.py
index bbcea84d2c..ba0493cc2b 100644
--- a/yt_dlp/utils/networking.py
+++ b/yt_dlp/utils/networking.py
@@ -65,6 +65,8 @@ def __init__(self, *args, **kwargs):
         self.update(kwargs)
 
     def __setitem__(self, key, value):
+        if isinstance(value, bytes):
+            value = value.decode('latin-1')
         super().__setitem__(key.title(), str(value))
 
     def __getitem__(self, key):

From 8cb7fc44db010e965d808ee679ef0725cb6e147c Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Sun, 30 Jul 2023 03:21:35 +0530
Subject: [PATCH 087/218] Fix `--check-formats`

Bug in bc344cd456380999c1ee74554dfd432a38f32ec7
---
 yt_dlp/YoutubeDL.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index c9cf07e530..6e8be40ba2 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -2339,13 +2339,13 @@ def _merge(formats_pair):
             return new_dict
 
         def _check_formats(formats):
-            if (self.params.get('check_formats') is not None
+            if self.params.get('check_formats') == 'selected':
+                yield from self._check_formats(formats)
+                return
+            elif (self.params.get('check_formats') is not None
                     or self.params.get('allow_unplayable_formats')):
                 yield from formats
                 return
-            elif self.params.get('check_formats') == 'selected':
-                yield from self._check_formats(formats)
-                return
 
             for f in formats:
                 if f.get('has_drm'):

From 6148833f5ceb7674142ddb8d761ffe03cee7df69 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Sun, 30 Jul 2023 03:36:17 +0530
Subject: [PATCH 088/218] [cleanup] Misc

---
 test/test_YoutubeDL.py        |  1 +
 test/test_networking.py       |  8 +++++---
 yt_dlp/YoutubeDL.py           | 14 ++++++--------
 yt_dlp/extractor/common.py    |  2 +-
 yt_dlp/extractor/lbry.py      |  6 +++---
 yt_dlp/extractor/netverse.py  |  4 ++--
 yt_dlp/extractor/ninenow.py   |  2 +-
 yt_dlp/extractor/vk.py        |  4 ++--
 yt_dlp/extractor/wimbledon.py |  2 +-
 yt_dlp/networking/common.py   |  9 +++++----
 10 files changed, 27 insertions(+), 25 deletions(-)

diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index ab1250848b..3cfb61fb26 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -831,6 +831,7 @@ def expect_same_infodict(out):
         test('%(id&hi {:>10} {}|)s', 'hi       1234 1234')
         test(R'%(id&{0} {}|)s', 'NA')
         test(R'%(id&{0.1}|)s', 'NA')
+        test('%(height&{:,d})S', '1,080')
 
         # Laziness
         def gen():
diff --git a/test/test_networking.py b/test/test_networking.py
index 684bf5f965..9c33b0d4c6 100644
--- a/test/test_networking.py
+++ b/test/test_networking.py
@@ -29,6 +29,7 @@
 from http.cookiejar import CookieJar
 
 from test.helper import FakeYDL, http_server_port
+from yt_dlp.cookies import YoutubeDLCookieJar
 from yt_dlp.dependencies import brotli
 from yt_dlp.networking import (
     HEADRequest,
@@ -478,7 +479,7 @@ def test_request_cookie_header(self, handler):
             assert 'Cookie: test=test' not in res
 
         # Specified Cookie header should override global cookiejar for that request
-        cookiejar = http.cookiejar.CookieJar()
+        cookiejar = YoutubeDLCookieJar()
         cookiejar.set_cookie(http.cookiejar.Cookie(
             version=0, name='test', value='ytdlp', port=None, port_specified=False,
             domain='127.0.0.1', domain_specified=True, domain_initial_dot=False, path='/',
@@ -505,7 +506,7 @@ def test_incompleteread(self, handler):
 
     @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
     def test_cookies(self, handler):
-        cookiejar = http.cookiejar.CookieJar()
+        cookiejar = YoutubeDLCookieJar()
         cookiejar.set_cookie(http.cookiejar.Cookie(
             0, 'test', 'ytdlp', None, False, '127.0.0.1', True,
             False, '/headers', True, False, None, False, None, None, {}))
@@ -903,7 +904,8 @@ class HTTPSupportedRH(ValidationRH):
     EXTENSION_TESTS = [
         ('Urllib', [
             ({'cookiejar': 'notacookiejar'}, AssertionError),
-            ({'cookiejar': CookieJar()}, False),
+            ({'cookiejar': YoutubeDLCookieJar()}, False),
+            ({'cookiejar': CookieJar()}, AssertionError),
             ({'timeout': 1}, False),
             ({'timeout': 'notatimeout'}, AssertionError),
             ({'unsupported': 'value'}, UnsupportedRequest),
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index 6e8be40ba2..db5932c443 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -256,8 +256,6 @@ class YoutubeDL:
     overwrites:        Overwrite all video and metadata files if True,
                        overwrite only non-video files if None
                        and don't overwrite any file if False
-                       For compatibility with youtube-dl,
-                       "nooverwrites" may also be used instead
     playlist_items:    Specific indices of playlist to download.
     playlistrandom:    Download playlist items in random order.
     lazy_playlist:     Process playlist entries as they are received.
@@ -553,6 +551,7 @@ class YoutubeDL:
                        You can reduce network I/O by disabling it if you don't
                        care about HLS. (only for youtube)
     no_color:          Same as `color='no_color'`
+    no_overwrites:     Same as `overwrites=False`
     """
 
     _NUMERIC_FIELDS = {
@@ -604,6 +603,7 @@ def __init__(self, params=None, auto_init=True):
         self._playlist_level = 0
         self._playlist_urls = set()
         self.cache = Cache(self)
+        self.__header_cookies = []
 
         stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
         self._out_files = Namespace(
@@ -632,7 +632,7 @@ def process_color_policy(stream):
             policy = traverse_obj(self.params, ('color', (stream_name, None), {str}), get_all=False)
             if policy in ('auto', None):
                 return term_allow_color and supports_terminal_sequences(stream)
-            assert policy in ('always', 'never', 'no_color')
+            assert policy in ('always', 'never', 'no_color'), policy
             return {'always': True, 'never': False}.get(policy, policy)
 
         self._allow_colors = Namespace(**{
@@ -681,12 +681,10 @@ def process_color_policy(stream):
 
         self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
         self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
-        self.__header_cookies = []
         self._load_cookies(self.params['http_headers'].get('Cookie'))  # compat
         self.params['http_headers'].pop('Cookie', None)
+        self._request_director = self.build_request_director(_REQUEST_HANDLERS.values())
 
-        self._request_director = self.build_request_director(
-            sorted(_REQUEST_HANDLERS.values(), key=lambda rh: rh.RH_NAME.lower()))
         if auto_init and auto_init != 'no_verbose_header':
             self.print_debug_header()
 
@@ -3977,7 +3975,7 @@ def get_encoding(stream):
         })) or 'none'))
 
         write_debug(f'Proxy map: {self.proxies}')
-        # write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers)}')
+        # write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
         for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
             display_list = ['%s%s' % (
                 klass.__name__, '' if klass.__name__ == name else f' as {name}')
@@ -4080,7 +4078,7 @@ def urlopen(self, req):
 
     def build_request_director(self, handlers):
         logger = _YDLLogger(self)
-        headers = self.params.get('http_headers').copy()
+        headers = self.params['http_headers'].copy()
         proxies = self.proxies.copy()
         clean_headers(headers)
         clean_proxies(proxies, headers)
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index b69ac1d653..7deab995c4 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -729,7 +729,7 @@ def extract(self, url):
         except UnsupportedError:
             raise
         except ExtractorError as e:
-            e.video_id = e.video_id or self.get_temp_id(url),
+            e.video_id = e.video_id or self.get_temp_id(url)
             e.ie = e.ie or self.IE_NAME,
             e.traceback = e.traceback or sys.exc_info()[2]
             raise
diff --git a/yt_dlp/extractor/lbry.py b/yt_dlp/extractor/lbry.py
index 7dd3a48613..9a9f9256fe 100644
--- a/yt_dlp/extractor/lbry.py
+++ b/yt_dlp/extractor/lbry.py
@@ -248,9 +248,9 @@ def _real_extract(self, url):
 
             # GET request to v3 API returns original video/audio file if available
             direct_url = re.sub(r'/api/v\d+/', '/api/v3/', streaming_url)
-            ext = urlhandle_detect_ext(self._request_webpage(
-                direct_url, display_id, 'Checking for original quality', headers=headers))
-            if ext != 'm3u8':
+            urlh = self._request_webpage(
+                direct_url, display_id, 'Checking for original quality', headers=headers, fatal=False)
+            if urlh and urlhandle_detect_ext(urlh) != 'm3u8':
                 formats.append({
                     'url': direct_url,
                     'format_id': 'original',
diff --git a/yt_dlp/extractor/netverse.py b/yt_dlp/extractor/netverse.py
index 398198a1b0..ef53e15da6 100644
--- a/yt_dlp/extractor/netverse.py
+++ b/yt_dlp/extractor/netverse.py
@@ -160,7 +160,7 @@ class NetverseIE(NetverseBaseIE):
             'uploader': 'Net Prime',
             'comment_count': int,
         },
-        'params':{
+        'params': {
             'getcomments': True
         }
     }, {
@@ -187,7 +187,7 @@ class NetverseIE(NetverseBaseIE):
             'season': 'Season 1',
             'comment_count': int,
         },
-        'params':{
+        'params': {
             'getcomments': True
         }
     }]
diff --git a/yt_dlp/extractor/ninenow.py b/yt_dlp/extractor/ninenow.py
index b970f8ccb5..c655b75f46 100644
--- a/yt_dlp/extractor/ninenow.py
+++ b/yt_dlp/extractor/ninenow.py
@@ -53,7 +53,7 @@ class NineNowIE(InfoExtractor):
             'upload_date': '20210421',
         },
         'expected_warnings': ['Ignoring subtitle tracks'],
-        'params':{
+        'params': {
             'skip_download': True,
         }
     }]
diff --git a/yt_dlp/extractor/vk.py b/yt_dlp/extractor/vk.py
index 6b7379d46c..915422817a 100644
--- a/yt_dlp/extractor/vk.py
+++ b/yt_dlp/extractor/vk.py
@@ -765,7 +765,7 @@ def _extract_common_meta(self, stream_info):
 
 
 class VKPlayIE(VKPlayBaseIE):
-    _VALID_URL = r'https?://vkplay\.live/(?P<username>[^/]+)/record/(?P<id>[a-f0-9\-]+)'
+    _VALID_URL = r'https?://vkplay\.live/(?P<username>[^/#?]+)/record/(?P<id>[a-f0-9-]+)'
     _TESTS = [{
         'url': 'https://vkplay.live/zitsmann/record/f5e6e3b5-dc52-4d14-965d-0680dd2882da',
         'info_dict': {
@@ -802,7 +802,7 @@ def _real_extract(self, url):
 
 
 class VKPlayLiveIE(VKPlayBaseIE):
-    _VALID_URL = r'https?://vkplay\.live/(?P<id>[^/]+)/?(?:[#?]|$)'
+    _VALID_URL = r'https?://vkplay\.live/(?P<id>[^/#?]+)/?(?:[#?]|$)'
     _TESTS = [{
         'url': 'https://vkplay.live/bayda',
         'info_dict': {
diff --git a/yt_dlp/extractor/wimbledon.py b/yt_dlp/extractor/wimbledon.py
index ee4872e88b..0223e54f1d 100644
--- a/yt_dlp/extractor/wimbledon.py
+++ b/yt_dlp/extractor/wimbledon.py
@@ -6,7 +6,7 @@
 
 
 class WimbledonIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?wimbledon\.com/\w+/video/media/(?P<id>\d+).html'
+    _VALID_URL = r'https?://(?:www\.)?wimbledon\.com/\w+/video/media/(?P<id>\d+)\.html'
     _TESTS = [{
         'url': 'https://www.wimbledon.com/en_GB/video/media/6330247525112.html',
         'info_dict': {
diff --git a/yt_dlp/networking/common.py b/yt_dlp/networking/common.py
index 792e062fdf..8fba8c1c5a 100644
--- a/yt_dlp/networking/common.py
+++ b/yt_dlp/networking/common.py
@@ -12,7 +12,6 @@
 from collections.abc import Iterable, Mapping
 from email.message import Message
 from http import HTTPStatus
-from http.cookiejar import CookieJar
 
 from ._helper import make_ssl_context, wrap_request_errors
 from .exceptions import (
@@ -22,6 +21,7 @@
     UnsupportedRequest,
 )
 from ..compat.types import NoneType
+from ..cookies import YoutubeDLCookieJar
 from ..utils import (
     bug_reports_message,
     classproperty,
@@ -194,7 +194,7 @@ def __init__(
         self, *,
         logger,  # TODO(Grub4k): default logger
         headers: HTTPHeaderDict = None,
-        cookiejar: CookieJar = None,
+        cookiejar: YoutubeDLCookieJar = None,
         timeout: float | int | None = None,
         proxies: dict = None,
         source_address: str = None,
@@ -208,7 +208,7 @@ def __init__(
 
         self._logger = logger
         self.headers = headers or {}
-        self.cookiejar = cookiejar if cookiejar is not None else CookieJar()
+        self.cookiejar = cookiejar if cookiejar is not None else YoutubeDLCookieJar()
         self.timeout = float(timeout or 20)
         self.proxies = proxies or {}
         self.source_address = source_address
@@ -275,7 +275,7 @@ def _check_proxies(self, proxies):
 
     def _check_extensions(self, extensions):
         """Check extensions for unsupported extensions. Subclasses should extend this."""
-        assert isinstance(extensions.get('cookiejar'), (CookieJar, NoneType))
+        assert isinstance(extensions.get('cookiejar'), (YoutubeDLCookieJar, NoneType))
         assert isinstance(extensions.get('timeout'), (float, int, NoneType))
 
     def _validate(self, request):
@@ -302,6 +302,7 @@ def send(self, request: Request) -> Response:
     @abc.abstractmethod
     def _send(self, request: Request):
         """Handle a request from start to finish. Redefine in subclasses."""
+        pass
 
     def close(self):
         pass

From 546b2c28a106cf8101d481b215b676d1b091d276 Mon Sep 17 00:00:00 2001
From: coletdjnz <coletdjnz@protonmail.com>
Date: Sun, 30 Jul 2023 10:50:25 +1200
Subject: [PATCH 089/218] [ie/youtube] Fix `player_params` arg being converted
 to lowercase

Fix bug in ba06d77a316650ff057347d224b5afa8b203ad65

Authored by: coletdjnz
---
 yt_dlp/extractor/youtube.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index 940a4995b5..1e16631b18 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -3597,7 +3597,7 @@ def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg,
         if _split_innertube_client(client)[0] == 'android':
             yt_query['params'] = 'CgIQBg=='
 
-        pp_arg = self._configuration_arg('player_params', [None])[0]
+        pp_arg = self._configuration_arg('player_params', [None], casesense=True)[0]
         if pp_arg:
             yt_query['params'] = pp_arg
 
@@ -4018,7 +4018,7 @@ def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
         webpage = None
         if 'webpage' not in self._configuration_arg('player_skip'):
             query = {'bpctr': '9999999999', 'has_verified': '1'}
-            pp = self._configuration_arg('player_params', [None])[0]
+            pp = self._configuration_arg('player_params', [None], casesense=True)[0]
             if pp:
                 query['pp'] = pp
             webpage = self._download_webpage(

From f73c11803579889dc8e1c99e25dba9a22fef39d8 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Sun, 30 Jul 2023 04:24:38 +0530
Subject: [PATCH 090/218] `FFmpegFixupM3u8PP` may need to run with ffmpeg

Bug in 62b5c94cadaa5f596dc1a7083db9db12efe357be
Closes #7725
---
 yt_dlp/YoutubeDL.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index db5932c443..87bca5bbe0 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -3450,10 +3450,11 @@ def ffmpeg_fixup(cndn, msg, cls):
                     postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((
                         isinstance(pp, FFmpegVideoConvertorPP)
                         and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)
-                    ) for pp in self._pps['post_process']) or fd == FFmpegFD
+                    ) for pp in self._pps['post_process'])
 
                     if not postprocessed_by_ffmpeg:
-                        ffmpeg_fixup(ext == 'm4a' and info_dict.get('container') == 'm4a_dash',
+                        ffmpeg_fixup(fd != FFmpegFD and ext == 'm4a'
+                                     and info_dict.get('container') == 'm4a_dash',
                                      'writing DASH m4a. Only some players support this container',
                                      FFmpegFixupM4aPP)
                         ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')

From 6014355c6142f68e20c8374e3787e5b5820f19e2 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Sat, 29 Jul 2023 18:37:06 -0500
Subject: [PATCH 091/218] [ie/twitter] Add fallback, improve error handling
 (#7621)

Closes #7579, Closes #7625
Authored by: bashonly
---
 yt_dlp/extractor/twitter.py | 81 +++++++++++++++++++++++++++----------
 1 file changed, 60 insertions(+), 21 deletions(-)

diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py
index 9d87dbc4be..34b8625c31 100644
--- a/yt_dlp/extractor/twitter.py
+++ b/yt_dlp/extractor/twitter.py
@@ -1,3 +1,4 @@
+import functools
 import json
 import re
 
@@ -279,6 +280,12 @@ def input_dict(subtask_id, text):
                     'Submitting confirmation code', headers, data=build_login_json(input_dict(
                         next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
 
+            elif next_subtask == 'ArkoseLogin':
+                self.raise_login_required('Twitter is requiring captcha for this login attempt', method='cookies')
+
+            elif next_subtask == 'DenyLoginSubtask':
+                self.raise_login_required('Twitter rejected this login attempt as suspicious', method='cookies')
+
             elif next_subtask == 'LoginSuccessSubtask':
                 raise ExtractorError('Twitter API did not grant auth token cookie')
 
@@ -304,8 +311,9 @@ def _call_api(self, path, video_id, query={}, graphql=False):
 
         if result.get('errors'):
             errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
-            raise ExtractorError(
-                f'Error(s) while querying API: {errors or "Unknown error"}', expected=True)
+            if errors and 'not authorized' in errors:
+                self.raise_login_required(remove_end(errors, '.'))
+            raise ExtractorError(f'Error(s) while querying API: {errors or "Unknown error"}')
 
         return result
 
@@ -607,7 +615,7 @@ class TwitterIE(TwitterBaseIE):
         # has mp4 formats via mobile API
         'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
         'info_dict': {
-            'id': '852138619213144067',
+            'id': '852077943283097602',
             'ext': 'mp4',
             'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
             'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة   https://t.co/xg6OhpyKfN',
@@ -616,8 +624,16 @@ class TwitterIE(TwitterBaseIE):
             'duration': 277.4,
             'timestamp': 1492000653,
             'upload_date': '20170412',
+            'display_id': '852138619213144067',
+            'age_limit': 0,
+            'uploader_url': 'https://twitter.com/news_al3alm',
+            'thumbnail': r're:^https?://.*\.jpg',
+            'tags': [],
+            'repost_count': int,
+            'view_count': int,
+            'like_count': int,
+            'comment_count': int,
         },
-        'skip': 'Account suspended',
     }, {
         'url': 'https://twitter.com/i/web/status/910031516746514432',
         'info_dict': {
@@ -675,15 +691,15 @@ class TwitterIE(TwitterBaseIE):
             'id': '1087791272830607360',
             'display_id': '1087791357756956680',
             'ext': 'mp4',
-            'title': 'Twitter - A new is coming.  Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
+            'title': 'X - A new is coming.  Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
             'thumbnail': r're:^https?://.*\.jpg',
             'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
-            'uploader': 'Twitter',
-            'uploader_id': 'Twitter',
+            'uploader': 'X',
+            'uploader_id': 'X',
             'duration': 61.567,
             'timestamp': 1548184644,
             'upload_date': '20190122',
-            'uploader_url': 'https://twitter.com/Twitter',
+            'uploader_url': 'https://twitter.com/X',
             'comment_count': int,
             'repost_count': int,
             'like_count': int,
@@ -991,10 +1007,10 @@ class TwitterIE(TwitterBaseIE):
             'view_count': int,
             'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
             'age_limit': 0,
-            'uploader': 'Mün The Shinobi',
+            'uploader': 'Mün The Friend Of YWAP',
             'repost_count': int,
             'upload_date': '20221206',
-            'title': 'Mün The Shinobi - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
+            'title': 'Mün The Friend Of YWAP - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
             'comment_count': int,
             'like_count': int,
             'tags': [],
@@ -1024,6 +1040,7 @@ class TwitterIE(TwitterBaseIE):
             'repost_count': int,
         },
         'params': {'extractor_args': {'twitter': {'legacy_api': ['']}}},
+        'skip': 'Protected tweet',
     }, {
         # orig tweet w/ graphql
         'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
@@ -1047,6 +1064,7 @@ class TwitterIE(TwitterBaseIE):
             'repost_count': int,
             'comment_count': int,
         },
+        'skip': 'Protected tweet',
     }, {
         # onion route
         'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
@@ -1103,6 +1121,8 @@ def _graphql_to_legacy(self, data, twid):
             reason = result.get('reason')
             if reason == 'NsfwLoggedOut':
                 self.raise_login_required('NSFW tweet requires authentication')
+            elif reason == 'Protected':
+                self.raise_login_required('You are not authorized to view this protected tweet')
             raise ExtractorError(reason or 'Requested tweet is unavailable', expected=True)
 
         status = result.get('legacy', {})
@@ -1187,22 +1207,38 @@ def _build_graphql_query(self, media_id):
             }
         }
 
-    def _real_extract(self, url):
-        twid, selected_index = self._match_valid_url(url).group('id', 'index')
-        if not self.is_logged_in and self._configuration_arg('legacy_api'):
-            status = traverse_obj(self._call_api(f'statuses/show/{twid}.json', twid, {
+    def _extract_status(self, twid):
+        if self.is_logged_in:
+            return self._graphql_to_legacy(
+                self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid), twid)
+
+        try:
+            if not self._configuration_arg('legacy_api'):
+                return self._graphql_to_legacy(
+                    self._call_graphql_api('2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId', twid), twid)
+            return traverse_obj(self._call_api(f'statuses/show/{twid}.json', twid, {
                 'cards_platform': 'Web-12',
                 'include_cards': 1,
                 'include_reply_count': 1,
                 'include_user_entities': 0,
                 'tweet_mode': 'extended',
             }), 'retweeted_status', None)
-        elif not self.is_logged_in:
-            status = self._graphql_to_legacy(
-                self._call_graphql_api('2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId', twid), twid)
-        else:
-            status = self._graphql_to_legacy(
-                self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid), twid)
+
+        except ExtractorError as e:
+            if e.expected:
+                raise
+            self.report_warning(
+                f'{e.orig_msg}. Falling back to syndication endpoint; some metadata may be missing', twid)
+
+        status = self._download_json(
+            'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
+            headers={'User-Agent': 'Googlebot'}, query={'id': twid})
+        status['extended_entities'] = {'media': status.get('mediaDetails')}
+        return status
+
+    def _real_extract(self, url):
+        twid, selected_index = self._match_valid_url(url).group('id', 'index')
+        status = self._extract_status(twid)
 
         title = description = traverse_obj(
             status, (('full_text', 'text'), {lambda x: x.replace('\n', ' ')}), get_all=False) or ''
@@ -1230,7 +1266,10 @@ def _real_extract(self, url):
         }
 
         def extract_from_video_info(media):
-            media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
+            media_id = traverse_obj(media, 'id_str', 'id', (
+                'video_info', 'variants', ..., 'url',
+                {functools.partial(re.search, r'_video/(\d+)/')}, 1
+            ), get_all=False, expected_type=str_or_none) or twid
             self.write_debug(f'Extracting from video info: {media_id}')
 
             formats = []

From 6d6081dda1290a85bdab6717f239289e3aa74c8e Mon Sep 17 00:00:00 2001
From: Steve <snixon@gmail.com>
Date: Mon, 31 Jul 2023 10:08:37 -0700
Subject: [PATCH 092/218] [extractor/pbs] Add extractor `PBSKidsIE` (#7602)

Authored by: snixon
Fixes #2440
---
 yt_dlp/extractor/_extractors.py |  2 +-
 yt_dlp/extractor/pbs.py         | 59 +++++++++++++++++++++++++++++++++
 2 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 9d935a7d16..2ad7e9800e 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1420,7 +1420,7 @@
     PatreonIE,
     PatreonCampaignIE
 )
-from .pbs import PBSIE
+from .pbs import PBSIE, PBSKidsIE
 from .pearvideo import PearVideoIE
 from .peekvids import PeekVidsIE, PlayVidsIE
 from .peertube import (
diff --git a/yt_dlp/extractor/pbs.py b/yt_dlp/extractor/pbs.py
index 5bdf561db9..2bb2ea9f19 100644
--- a/yt_dlp/extractor/pbs.py
+++ b/yt_dlp/extractor/pbs.py
@@ -11,6 +11,7 @@
     orderedSet,
     strip_jsonp,
     strip_or_none,
+    traverse_obj,
     unified_strdate,
     url_or_none,
     US_RATINGS,
@@ -696,3 +697,61 @@ def extract_redirect_urls(info):
             'subtitles': subtitles,
             'chapters': chapters,
         }
+
+
+class PBSKidsIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?pbskids\.org/video/[\w-]+/(?P<id>\d+)'
+    _TESTS = [
+        {
+            'url': 'https://pbskids.org/video/molly-of-denali/3030407927',
+            'md5': '1ded20a017cc6b53446238f1804ce4c7',
+            'info_dict': {
+                'id': '3030407927',
+                'title': 'Bird in the Hand/Bye-Bye Birdie',
+                'channel': 'molly-of-denali',
+                'duration': 1540,
+                'ext': 'mp4',
+                'series': 'Molly of Denali',
+                'description': 'md5:d006b2211633685d8ebc8d03b6d5611e',
+                'categories': ['Episode'],
+                'upload_date': '20190718',
+            }
+        },
+        {
+            'url': 'https://pbskids.org/video/plum-landing/2365205059',
+            'md5': '92e5d189851a64ae1d0237a965be71f5',
+            'info_dict': {
+                'id': '2365205059',
+                'title': 'Cooper\'s Favorite Place in Nature',
+                'channel': 'plum-landing',
+                'duration': 67,
+                'ext': 'mp4',
+                'series': 'Plum Landing',
+                'description': 'md5:657e5fc4356a84ead1c061eb280ff05d',
+                'categories': ['Episode'],
+                'upload_date': '20140302',
+            }
+        }
+    ]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        meta = self._search_json(r'window\._PBS_KIDS_DEEPLINK\s*=', webpage, 'video info', video_id)
+        formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+            traverse_obj(meta, ('video_obj', 'URI', {url_or_none})), video_id, ext='mp4')
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            'subtitles': subtitles,
+            **traverse_obj(meta, {
+                'categories': ('video_obj', 'video_type', {str}, {lambda x: [x] if x else None}),
+                'channel': ('show_slug', {str}),
+                'description': ('video_obj', 'description', {str}),
+                'duration': ('video_obj', 'duration', {int_or_none}),
+                'series': ('video_obj', 'program_title', {str}),
+                'title': ('video_obj', 'title', {str}),
+                'upload_date': ('video_obj', 'air_date', {unified_strdate}),
+            })
+        }

From 30b29f37159e9226e2f2d5434c9a4096ac4efa2e Mon Sep 17 00:00:00 2001
From: ischmidt20 <ischmidt20@berkeley.edu>
Date: Tue, 1 Aug 2023 03:24:04 -0400
Subject: [PATCH 093/218] [ie/fox] Support foxsports.com (#7724)

Authored by: ischmidt20
---
 yt_dlp/extractor/fox.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/fox.py b/yt_dlp/extractor/fox.py
index 8fb4ada6be..e00e977bdd 100644
--- a/yt_dlp/extractor/fox.py
+++ b/yt_dlp/extractor/fox.py
@@ -20,7 +20,7 @@
 
 
 class FOXIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[\da-fA-F]+)'
+    _VALID_URL = r'https?://(?:www\.)?fox(?:sports)?\.com/(?:watch|replay)/(?P<id>[\da-fA-F]+)'
     _TESTS = [{
         # clip
         'url': 'https://www.fox.com/watch/4b765a60490325103ea69888fb2bd4e8/',
@@ -50,6 +50,10 @@ class FOXIE(InfoExtractor):
         # sports event, geo-restricted
         'url': 'https://www.fox.com/watch/b057484dade738d1f373b3e46216fa2c/',
         'only_matching': True,
+    }, {
+        # fox sports replay, geo-restricted
+        'url': 'https://www.foxsports.com/replay/561f3e071347a24e5e877abc56b22e89',
+        'only_matching': True,
     }]
     _GEO_BYPASS = False
     _HOME_PAGE_URL = 'https://www.fox.com/'

From a854fbec56d5004f5147116a41d1dd050632a579 Mon Sep 17 00:00:00 2001
From: ringus1 <ringus1@users.noreply.github.com>
Date: Tue, 1 Aug 2023 16:13:54 +0200
Subject: [PATCH 094/218] [ie/facebook] Add dash manifest URL (#7743)

Fixes #7742
Authored by: ringus1
---
 yt_dlp/extractor/facebook.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py
index 574f8e8c95..4fd17b5743 100644
--- a/yt_dlp/extractor/facebook.py
+++ b/yt_dlp/extractor/facebook.py
@@ -481,7 +481,8 @@ def extract_dash_manifest(video, formats):
             dash_manifest = video.get('dash_manifest')
             if dash_manifest:
                 formats.extend(self._parse_mpd_formats(
-                    compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest))))
+                    compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
+                    mpd_url=video.get('dash_manifest_url')))
 
         def process_formats(info):
             # Downloads with browser's User-Agent are rate limited. Working around

From b9de629d78ce31699f2de886071dc257830f9676 Mon Sep 17 00:00:00 2001
From: ifan-t <jacifan2000@gmail.com>
Date: Tue, 1 Aug 2023 19:01:59 +0100
Subject: [PATCH 095/218] [ie/S4C] Add extractor (#7730)

Authored by: ifan-t
---
 yt_dlp/extractor/_extractors.py |  1 +
 yt_dlp/extractor/s4c.py         | 62 +++++++++++++++++++++++++++++++++
 2 files changed, 63 insertions(+)
 create mode 100644 yt_dlp/extractor/s4c.py

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 2ad7e9800e..63bb55ea77 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1709,6 +1709,7 @@
     RuvIE,
     RuvSpilaIE
 )
+from .s4c import S4CIE
 from .safari import (
     SafariIE,
     SafariApiIE,
diff --git a/yt_dlp/extractor/s4c.py b/yt_dlp/extractor/s4c.py
new file mode 100644
index 0000000000..38a9058960
--- /dev/null
+++ b/yt_dlp/extractor/s4c.py
@@ -0,0 +1,62 @@
+from .common import InfoExtractor
+from ..utils import traverse_obj
+
+
+class S4CIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?s4c\.cymru/clic/programme/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://www.s4c.cymru/clic/programme/861362209',
+        'info_dict': {
+            'id': '861362209',
+            'ext': 'mp4',
+            'title': 'Y Swn',
+            'description': 'md5:f7681a30e4955b250b3224aa9fe70cf0',
+            'duration': 5340
+        },
+    }, {
+        'url': 'https://www.s4c.cymru/clic/programme/856636948',
+        'info_dict': {
+            'id': '856636948',
+            'ext': 'mp4',
+            'title': 'Am Dro',
+            'duration': 2880,
+            'description': 'md5:100d8686fc9a632a0cb2db52a3433ffe',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        details = self._download_json(
+            f'https://www.s4c.cymru/df/full_prog_details?lang=e&programme_id={video_id}',
+            video_id, fatal=False)
+
+        filename = self._download_json(
+            'https://player-api.s4c-cdn.co.uk/player-configuration/prod', video_id, query={
+                'programme_id': video_id,
+                'signed': '0',
+                'lang': 'en',
+                'mode': 'od',
+                'appId': 'clic',
+                'streamName': '',
+            }, note='Downloading player config JSON')['filename']
+        m3u8_url = self._download_json(
+            'https://player-api.s4c-cdn.co.uk/streaming-urls/prod', video_id, query={
+                'mode': 'od',
+                'application': 'clic',
+                'region': 'WW',
+                'extra': 'false',
+                'thirdParty': 'false',
+                'filename': filename,
+            }, note='Downloading streaming urls JSON')['hls']
+        formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            'subtitles': subtitles,
+            **traverse_obj(details, ('full_prog_details', 0, {
+                'title': (('programme_title', 'series_title'), {str}),
+                'description': ('full_billing', {str.strip}),
+                'duration': ('duration', {lambda x: int(x) * 60}),
+            }), get_all=False),
+        }

From db9743894071760f994f640a4c24358f749a78c0 Mon Sep 17 00:00:00 2001
From: Franklin Lee <Frankgoji@users.noreply.github.com>
Date: Tue, 1 Aug 2023 11:21:16 -0700
Subject: [PATCH 096/218] [ie/PicartoVod] Fix extractor (#7727)

Closes #2926
Authored by: Frankgoji
---
 yt_dlp/extractor/picarto.py | 54 ++++++++++++++++++++++++++++---------
 1 file changed, 42 insertions(+), 12 deletions(-)

diff --git a/yt_dlp/extractor/picarto.py b/yt_dlp/extractor/picarto.py
index 36a062def3..d415ba28e1 100644
--- a/yt_dlp/extractor/picarto.py
+++ b/yt_dlp/extractor/picarto.py
@@ -1,7 +1,10 @@
+import urllib.parse
+
 from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
-    js_to_json,
+    str_or_none,
+    traverse_obj,
 )
 
 
@@ -84,7 +87,7 @@ def _real_extract(self, url):
 
 
 class PicartoVodIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www.)?picarto\.tv/videopopout/(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:www\.)?picarto\.tv/(?:videopopout|\w+/videos)/(?P<id>[^/?#&]+)'
     _TESTS = [{
         'url': 'https://picarto.tv/videopopout/ArtofZod_2017.12.12.00.13.23.flv',
         'md5': '3ab45ba4352c52ee841a28fb73f2d9ca',
@@ -94,6 +97,18 @@ class PicartoVodIE(InfoExtractor):
             'title': 'ArtofZod_2017.12.12.00.13.23.flv',
             'thumbnail': r're:^https?://.*\.jpg'
         },
+        'skip': 'The VOD does not exist',
+    }, {
+        'url': 'https://picarto.tv/ArtofZod/videos/772650',
+        'md5': '00067a0889f1f6869cc512e3e79c521b',
+        'info_dict': {
+            'id': '772650',
+            'ext': 'mp4',
+            'title': 'Art of Zod - Drawing and Painting',
+            'thumbnail': r're:^https?://.*\.jpg',
+            'channel': 'ArtofZod',
+            'age_limit': 18,
+        }
     }, {
         'url': 'https://picarto.tv/videopopout/Plague',
         'only_matching': True,
@@ -102,21 +117,36 @@ class PicartoVodIE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        webpage = self._download_webpage(url, video_id)
+        data = self._download_json(
+            'https://ptvintern.picarto.tv/ptvapi', video_id, query={
+                'query': f'''{{
+  video(id: "{video_id}") {{
+    id
+    title
+    adult
+    file_name
+    video_recording_image_url
+    channel {{
+      name
+    }}
+  }}
+}}'''
+            })['data']['video']
 
-        vod_info = self._parse_json(
-            self._search_regex(
-                r'(?s)#vod-player["\']\s*,\s*(\{.+?\})\s*\)', webpage,
-                'vod player'),
-            video_id, transform_source=js_to_json)
+        file_name = data['file_name']
+        netloc = urllib.parse.urlparse(data['video_recording_image_url']).netloc
 
         formats = self._extract_m3u8_formats(
-            vod_info['vod'], video_id, 'mp4', entry_protocol='m3u8_native',
-            m3u8_id='hls')
+            f'https://{netloc}/stream/hls/{file_name}/index.m3u8', video_id, 'mp4', m3u8_id='hls')
 
         return {
             'id': video_id,
-            'title': video_id,
-            'thumbnail': vod_info.get('vodThumb'),
+            **traverse_obj(data, {
+                'id': ('id', {str_or_none}),
+                'title': ('title', {str}),
+                'thumbnail': 'video_recording_image_url',
+                'channel': ('channel', 'name', {str}),
+                'age_limit': ('adult', {lambda x: 18 if x else 0}),
+            }),
             'formats': formats,
         }

From db7b054a6111ca387220d0eb87bf342f9c130eb8 Mon Sep 17 00:00:00 2001
From: coletdjnz <coletdjnz@protonmail.com>
Date: Sat, 5 Aug 2023 10:17:48 +1200
Subject: [PATCH 097/218] [networking] Add request handler preference framework
 (#7603)

Preference functions that take a request and a request handler instance can be registered to prioritize different request handlers per request.

Authored by: coletdjnz
Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com>
---
 test/test_networking.py     | 29 +++++++++++++++++++++++----
 yt_dlp/YoutubeDL.py         |  7 ++++---
 yt_dlp/networking/common.py | 40 +++++++++++++++++++++++++++++++++----
 3 files changed, 65 insertions(+), 11 deletions(-)

diff --git a/test/test_networking.py b/test/test_networking.py
index 9c33b0d4c6..2622d24da6 100644
--- a/test/test_networking.py
+++ b/test/test_networking.py
@@ -1035,17 +1035,17 @@ def test_send(self):
         assert isinstance(director.send(Request('http://')), FakeResponse)
 
     def test_unsupported_handlers(self):
-        director = RequestDirector(logger=FakeLogger())
-        director.add_handler(FakeRH(logger=FakeLogger()))
-
         class SupportedRH(RequestHandler):
             _SUPPORTED_URL_SCHEMES = ['http']
 
             def _send(self, request: Request):
                 return Response(fp=io.BytesIO(b'supported'), headers={}, url=request.url)
 
-        # This handler should by default take preference over FakeRH
+        director = RequestDirector(logger=FakeLogger())
         director.add_handler(SupportedRH(logger=FakeLogger()))
+        director.add_handler(FakeRH(logger=FakeLogger()))
+
+        # First should take preference
         assert director.send(Request('http://')).read() == b'supported'
         assert director.send(Request('any://')).read() == b''
 
@@ -1072,6 +1072,27 @@ def _send(self, request: Request):
         director.add_handler(UnexpectedRH(logger=FakeLogger))
         assert director.send(Request('any://'))
 
+    def test_preference(self):
+        director = RequestDirector(logger=FakeLogger())
+        director.add_handler(FakeRH(logger=FakeLogger()))
+
+        class SomeRH(RequestHandler):
+            _SUPPORTED_URL_SCHEMES = ['http']
+
+            def _send(self, request: Request):
+                return Response(fp=io.BytesIO(b'supported'), headers={}, url=request.url)
+
+        def some_preference(rh, request):
+            return (0 if not isinstance(rh, SomeRH)
+                    else 100 if 'prefer' in request.headers
+                    else -1)
+
+        director.add_handler(SomeRH(logger=FakeLogger()))
+        director.preferences.add(some_preference)
+
+        assert director.send(Request('http://')).read() == b''
+        assert director.send(Request('http://', headers={'prefer': '1'})).read() == b'supported'
+
 
 # XXX: do we want to move this to test_YoutubeDL.py?
 class TestYoutubeDLNetworking:
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index 87bca5bbe0..666d89b461 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -34,7 +34,7 @@
 from .extractor.openload import PhantomJSwrapper
 from .minicurses import format_text
 from .networking import HEADRequest, Request, RequestDirector
-from .networking.common import _REQUEST_HANDLERS
+from .networking.common import _REQUEST_HANDLERS, _RH_PREFERENCES
 from .networking.exceptions import (
     HTTPError,
     NoSupportingHandlers,
@@ -683,7 +683,7 @@ def process_color_policy(stream):
         self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
         self._load_cookies(self.params['http_headers'].get('Cookie'))  # compat
         self.params['http_headers'].pop('Cookie', None)
-        self._request_director = self.build_request_director(_REQUEST_HANDLERS.values())
+        self._request_director = self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
 
         if auto_init and auto_init != 'no_verbose_header':
             self.print_debug_header()
@@ -4077,7 +4077,7 @@ def urlopen(self, req):
         except HTTPError as e:  # TODO: Remove in a future release
             raise _CompatHTTPError(e) from e
 
-    def build_request_director(self, handlers):
+    def build_request_director(self, handlers, preferences=None):
         logger = _YDLLogger(self)
         headers = self.params['http_headers'].copy()
         proxies = self.proxies.copy()
@@ -4106,6 +4106,7 @@ def build_request_director(self, handlers):
                     },
                 }),
             ))
+        director.preferences.update(preferences or [])
         return director
 
     def encode(self, s):
diff --git a/yt_dlp/networking/common.py b/yt_dlp/networking/common.py
index 8fba8c1c5a..584c7bb4db 100644
--- a/yt_dlp/networking/common.py
+++ b/yt_dlp/networking/common.py
@@ -31,8 +31,19 @@
 )
 from ..utils.networking import HTTPHeaderDict, normalize_url
 
-if typing.TYPE_CHECKING:
-    RequestData = bytes | Iterable[bytes] | typing.IO | None
+
+def register_preference(*handlers: type[RequestHandler]):
+    assert all(issubclass(handler, RequestHandler) for handler in handlers)
+
+    def outer(preference: Preference):
+        @functools.wraps(preference)
+        def inner(handler, *args, **kwargs):
+            if not handlers or isinstance(handler, handlers):
+                return preference(handler, *args, **kwargs)
+            return 0
+        _RH_PREFERENCES.add(inner)
+        return inner
+    return outer
 
 
 class RequestDirector:
@@ -40,12 +51,17 @@ class RequestDirector:
 
     Helper class that, when given a request, forward it to a RequestHandler that supports it.
 
+    Preference functions in the form of func(handler, request) -> int
+    can be registered into the `preferences` set. These are used to sort handlers
+    in order of preference.
+
     @param logger: Logger instance.
     @param verbose: Print debug request information to stdout.
     """
 
     def __init__(self, logger, verbose=False):
         self.handlers: dict[str, RequestHandler] = {}
+        self.preferences: set[Preference] = set()
         self.logger = logger  # TODO(Grub4k): default logger
         self.verbose = verbose
 
@@ -58,6 +74,16 @@ def add_handler(self, handler: RequestHandler):
         assert isinstance(handler, RequestHandler), 'handler must be a RequestHandler'
         self.handlers[handler.RH_KEY] = handler
 
+    def _get_handlers(self, request: Request) -> list[RequestHandler]:
+        """Sorts handlers by preference, given a request"""
+        preferences = {
+            rh: sum(pref(rh, request) for pref in self.preferences)
+            for rh in self.handlers.values()
+        }
+        self._print_verbose('Handler preferences for this request: %s' % ', '.join(
+            f'{rh.RH_NAME}={pref}' for rh, pref in preferences.items()))
+        return sorted(self.handlers.values(), key=preferences.get, reverse=True)
+
     def _print_verbose(self, msg):
         if self.verbose:
             self.logger.stdout(f'director: {msg}')
@@ -73,8 +99,7 @@ def send(self, request: Request) -> Response:
 
         unexpected_errors = []
         unsupported_errors = []
-        # TODO (future): add a per-request preference system
-        for handler in reversed(list(self.handlers.values())):
+        for handler in self._get_handlers(request):
             self._print_verbose(f'Checking if "{handler.RH_NAME}" supports this request.')
             try:
                 handler.validate(request)
@@ -530,3 +555,10 @@ def info(self):
     def getheader(self, name, default=None):
         deprecation_warning('Response.getheader() is deprecated, use Response.get_header', stacklevel=2)
         return self.get_header(name, default)
+
+
+if typing.TYPE_CHECKING:
+    RequestData = bytes | Iterable[bytes] | typing.IO | None
+    Preference = typing.Callable[[RequestHandler, Request], int]
+
+_RH_PREFERENCES: set[Preference] = set()

From 378ae9f9fb8e8c86e6ac89c4c5b815b48ce93620 Mon Sep 17 00:00:00 2001
From: coletdjnz <coletdjnz@protonmail.com>
Date: Sat, 12 Aug 2023 16:26:08 +1200
Subject: [PATCH 098/218] [ie/youtube] Fix consent cookie (#7774)

Fixes #7594

Authored by: coletdjnz
---
 yt_dlp/extractor/youtube.py | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index 1e16631b18..023d8fd8c1 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -496,16 +496,10 @@ def _initialize_consent(self):
         cookies = self._get_cookies('https://www.youtube.com/')
         if cookies.get('__Secure-3PSID'):
             return
-        consent_id = None
-        consent = cookies.get('CONSENT')
-        if consent:
-            if 'YES' in consent.value:
-                return
-            consent_id = self._search_regex(
-                r'PENDING\+(\d+)', consent.value, 'consent', default=None)
-        if not consent_id:
-            consent_id = random.randint(100, 999)
-        self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
+        socs = cookies.get('SOCS')
+        if socs and not socs.value.startswith('CAA'):  # not consented
+            return
+        self._set_cookie('.youtube.com', 'SOCS', 'CAI', secure=True)  # accept all (required for mixes)
 
     def _initialize_pref(self):
         cookies = self._get_cookies('https://www.youtube.com/')

From dab87ca23650fd87184ff5286b53e6985b59f71d Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Sat, 12 Aug 2023 16:30:23 -0500
Subject: [PATCH 099/218] [cookies] Containers JSON should be opened as utf-8
 (#7800)

Closes #7797
Authored by: bashonly
---
 yt_dlp/cookies.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py
index 157f5b0c2b..a71fbc28ba 100644
--- a/yt_dlp/cookies.py
+++ b/yt_dlp/cookies.py
@@ -138,7 +138,7 @@ def _extract_firefox_cookies(profile, container, logger):
         containers_path = os.path.join(os.path.dirname(cookie_database_path), 'containers.json')
         if not os.path.isfile(containers_path) or not os.access(containers_path, os.R_OK):
             raise FileNotFoundError(f'could not read containers.json in {search_root}')
-        with open(containers_path) as containers:
+        with open(containers_path, encoding='utf8') as containers:
             identities = json.load(containers).get('identities', [])
         container_id = next((context.get('userContextId') for context in identities if container in (
             context.get('name'),

From 339c339fec095ff4141b20e6aa83629117fb26df Mon Sep 17 00:00:00 2001
From: trainman261 <trainman261@users.noreply.github.com>
Date: Sun, 13 Aug 2023 01:58:55 +0200
Subject: [PATCH 100/218] [ie/CBCPlayer] Extract HLS formats and subtitles
 (#7484)

Authored by: trainman261
---
 yt_dlp/extractor/cbc.py             | 28 +++++++++++++++++++++++++++-
 yt_dlp/extractor/scrippsnetworks.py |  1 +
 yt_dlp/extractor/theplatform.py     | 22 ++++++++++++++++++++--
 3 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py
index 41e092422b..9413281a57 100644
--- a/yt_dlp/extractor/cbc.py
+++ b/yt_dlp/extractor/cbc.py
@@ -161,7 +161,7 @@ class CBCPlayerIE(InfoExtractor):
             'upload_date': '20160210',
             'uploader': 'CBCC-NEW',
         },
-        'skip': 'Geo-restricted to Canada',
+        'skip': 'Geo-restricted to Canada and no longer available',
     }, {
         # Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
         'url': 'http://www.cbc.ca/player/play/2657631896',
@@ -174,6 +174,9 @@ class CBCPlayerIE(InfoExtractor):
             'timestamp': 1425704400,
             'upload_date': '20150307',
             'uploader': 'CBCC-NEW',
+            'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg',
+            'chapters': [],
+            'duration': 494.811,
         },
     }, {
         'url': 'http://www.cbc.ca/player/play/2164402062',
@@ -186,6 +189,28 @@ class CBCPlayerIE(InfoExtractor):
             'timestamp': 1320410746,
             'upload_date': '20111104',
             'uploader': 'CBCC-NEW',
+            'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg',
+            'chapters': [],
+            'duration': 186.867,
+        },
+    }, {
+        # Has subtitles
+        # These broadcasts expire after ~1 month, can find new test URL here:
+        # https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast
+        'url': 'http://www.cbc.ca/player/play/2249992771553',
+        'md5': '2f2fb675dd4f0f8a5bb7588d1b13bacd',
+        'info_dict': {
+            'id': '2249992771553',
+            'ext': 'mp4',
+            'title': 'The National | Women’s soccer pay, Florida seawater, Swift quake',
+            'description': 'md5:adba28011a56cfa47a080ff198dad27a',
+            'timestamp': 1690596000,
+            'duration': 2716.333,
+            'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
+            'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/481/326/thumbnail.jpeg',
+            'uploader': 'CBCC-NEW',
+            'chapters': 'count:5',
+            'upload_date': '20230729',
         },
     }]
 
@@ -199,6 +224,7 @@ def _real_extract(self, url):
                     'force_smil_url': True
                 }),
             'id': video_id,
+            '_format_sort_fields': ('res', 'proto')  # Prioritize direct http formats over HLS
         }
 
 
diff --git a/yt_dlp/extractor/scrippsnetworks.py b/yt_dlp/extractor/scrippsnetworks.py
index c3cee6e4aa..adfd7e5f29 100644
--- a/yt_dlp/extractor/scrippsnetworks.py
+++ b/yt_dlp/extractor/scrippsnetworks.py
@@ -115,6 +115,7 @@ class ScrippsNetworksIE(InfoExtractor):
             'uploader': 'SCNI-SCND',
         },
         'add_ie': ['ThePlatform'],
+        'expected_warnings': ['No HLS formats found'],
     }, {
         'url': 'https://www.diynetwork.com/videos/diy-barnwood-tablet-stand-0265790',
         'only_matching': True,
diff --git a/yt_dlp/extractor/theplatform.py b/yt_dlp/extractor/theplatform.py
index 8307b912dd..99caeb5f99 100644
--- a/yt_dlp/extractor/theplatform.py
+++ b/yt_dlp/extractor/theplatform.py
@@ -19,7 +19,11 @@
     xpath_with_ns,
     mimetype2ext,
     find_xpath_attr,
+    traverse_obj,
+    update_url,
+    urlhandle_detect_ext,
 )
+from ..networking import HEADRequest
 
 default_ns = 'http://www.w3.org/2005/SMIL21/Language'
 _x = lambda p: xpath_with_ns(p, {'smil': default_ns})
@@ -162,7 +166,8 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
         'params': {
             # rtmp download
             'skip_download': True,
-        }
+        },
+        'skip': '404 Not Found',
     }, {
         'url': 'https://player.theplatform.com/p/D6x-PC/pulse_preview/embed/select/media/yMBg9E8KFxZD',
         'info_dict': {
@@ -171,7 +176,8 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
             'description': 'md5:644ad9188d655b742f942bf2e06b002d',
             'title': 'HIGHLIGHTS: USA bag first ever series Cup win',
             'uploader': 'EGSM',
-        }
+        },
+        'skip': '404 Not Found',
     }, {
         'url': 'http://player.theplatform.com/p/NnzsPC/widget/select/media/4Y0TlYUr_ZT7',
         'only_matching': True,
@@ -189,6 +195,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
             'upload_date': '20150701',
             'uploader': 'NBCU-NEWS',
         },
+        'skip': '404 Not Found',
     }, {
         # From http://www.nbc.com/the-blacklist/video/sir-crispin-crandall/2928790?onid=137781#vc137781=1
         # geo-restricted (US), HLS encrypted with AES-128
@@ -295,6 +302,17 @@ def _real_extract(self, url):
 
         formats, subtitles = self._extract_theplatform_smil(smil_url, video_id)
 
+        # With some sites, manifest URL must be forced to extract HLS formats
+        if not traverse_obj(formats, lambda _, v: v['format_id'].startswith('hls')):
+            m3u8_url = update_url(url, query='mbr=true&manifest=m3u', fragment=None)
+            urlh = self._request_webpage(
+                HEADRequest(m3u8_url), video_id, 'Checking for HLS formats', 'No HLS formats found', fatal=False)
+            if urlh and urlhandle_detect_ext(urlh) == 'm3u8':
+                m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
+                    m3u8_url, video_id, m3u8_id='hls', fatal=False)
+                formats.extend(m3u8_fmts)
+                self._merge_subtitles(m3u8_subs, target=subtitles)
+
         ret = self._extract_theplatform_metadata(path, video_id)
         combined_subtitles = self._merge_subtitles(ret.get('subtitles', {}), subtitles)
         ret.update({

From 876b70c8edf4c0147f180bd981fbc4d625cbfb9c Mon Sep 17 00:00:00 2001
From: garret <garret1317@yandex.com>
Date: Mon, 14 Aug 2023 19:29:04 +0100
Subject: [PATCH 101/218] [ie/tbsjp] Add episode, program, playlist extractors
 (#7765)

Authored by: garret1317
---
 yt_dlp/extractor/_extractors.py |   5 ++
 yt_dlp/extractor/tbsjp.py       | 152 ++++++++++++++++++++++++++++++++
 2 files changed, 157 insertions(+)
 create mode 100644 yt_dlp/extractor/tbsjp.py

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 63bb55ea77..d4d3b6074c 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1901,6 +1901,11 @@
 from .tagesschau import TagesschauIE
 from .tass import TassIE
 from .tbs import TBSIE
+from .tbsjp import (
+    TBSJPEpisodeIE,
+    TBSJPProgramIE,
+    TBSJPPlaylistIE,
+)
 from .tdslifeway import TDSLifewayIE
 from .teachable import (
     TeachableIE,
diff --git a/yt_dlp/extractor/tbsjp.py b/yt_dlp/extractor/tbsjp.py
new file mode 100644
index 0000000000..77ddeca32c
--- /dev/null
+++ b/yt_dlp/extractor/tbsjp.py
@@ -0,0 +1,152 @@
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+    ExtractorError,
+    clean_html,
+    get_element_text_and_html_by_tag,
+    int_or_none,
+    str_or_none,
+    traverse_obj,
+    try_call,
+    unified_timestamp,
+    urljoin,
+)
+
+
+class TBSJPEpisodeIE(InfoExtractor):
+    _VALID_URL = r'https?://cu\.tbs\.co\.jp/episode/(?P<id>[\d_]+)'
+    _GEO_BYPASS = False
+    _TESTS = [{
+        'url': 'https://cu.tbs.co.jp/episode/23613_2044134_1000049010',
+        'skip': 'streams geo-restricted, Japan only. Also, will likely expire eventually',
+        'info_dict': {
+            'title': 'VIVANT 第三話 誤送金完結へ!絶体絶命の反撃開始',
+            'id': '23613_2044134_1000049010',
+            'ext': 'mp4',
+            'upload_date': '20230728',
+            'duration': 3517,
+            'release_timestamp': 1691118230,
+            'episode': '第三話 誤送金完結へ!絶体絶命の反撃開始',
+            'release_date': '20230804',
+            'categories': 'count:11',
+            'episode_number': 3,
+            'timestamp': 1690522538,
+            'description': 'md5:2b796341af1ef772034133174ba4a895',
+            'series': 'VIVANT',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        meta = self._search_json(r'window\.app\s*=', webpage, 'episode info', video_id, fatal=False)
+        episode = traverse_obj(meta, ('falcorCache', 'catalog', 'episode', video_id, 'value'))
+
+        tf_path = self._search_regex(
+            r'<script[^>]+src=["\'](/assets/tf\.[^"\']+\.js)["\']', webpage, 'stream API config')
+        tf_js = self._download_webpage(urljoin(url, tf_path), video_id, note='Downloading stream API config')
+        video_url = self._search_regex(r'videoPlaybackUrl:\s*[\'"]([^\'"]+)[\'"]', tf_js, 'stream API url')
+        api_key = self._search_regex(r'api_key:\s*[\'"]([^\'"]+)[\'"]', tf_js, 'stream API key')
+
+        try:
+            source_meta = self._download_json(f'{video_url}ref:{video_id}', video_id,
+                                              headers={'X-Streaks-Api-Key': api_key},
+                                              note='Downloading stream metadata')
+        except ExtractorError as e:
+            if isinstance(e.cause, HTTPError) and e.cause.status == 403:
+                self.raise_geo_restricted(countries=['JP'])
+            raise
+
+        formats, subtitles = [], {}
+        for src in traverse_obj(source_meta, ('sources', ..., 'src')):
+            fmts, subs = self._extract_m3u8_formats_and_subtitles(src, video_id, fatal=False)
+            formats.extend(fmts)
+            self._merge_subtitles(subs, target=subtitles)
+
+        return {
+            'title': try_call(lambda: clean_html(get_element_text_and_html_by_tag('h3', webpage)[0])),
+            'id': video_id,
+            **traverse_obj(episode, {
+                'categories': ('keywords', {list}),
+                'id': ('content_id', {str}),
+                'description': ('description', 0, 'value'),
+                'timestamp': ('created_at', {unified_timestamp}),
+                'release_timestamp': ('pub_date', {unified_timestamp}),
+                'duration': ('tv_episode_info', 'duration', {int_or_none}),
+                'episode_number': ('tv_episode_info', 'episode_number', {int_or_none}),
+                'episode': ('title', lambda _, v: not v.get('is_phonetic'), 'value'),
+                'series': ('custom_data', 'program_name'),
+            }, get_all=False),
+            'formats': formats,
+            'subtitles': subtitles,
+        }
+
+
+class TBSJPProgramIE(InfoExtractor):
+    _VALID_URL = r'https?://cu\.tbs\.co\.jp/program/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://cu.tbs.co.jp/program/23601',
+        'playlist_mincount': 4,
+        'info_dict': {
+            'id': '23601',
+            'categories': ['エンタメ', 'ミライカプセル', '会社', '働く', 'バラエティ', '動画'],
+            'description': '幼少期の夢は大人になって、どう成長したのだろうか？\nそしてその夢は今後、どのように広がっていくのか？\nいま話題の会社で働く人の「夢の成長」を描く',
+            'series': 'ミライカプセル　-I have a dream-',
+            'title': 'ミライカプセル　-I have a dream-'
+        }
+    }]
+
+    def _real_extract(self, url):
+        programme_id = self._match_id(url)
+        webpage = self._download_webpage(url, programme_id)
+        meta = self._search_json(r'window\.app\s*=', webpage, 'programme info', programme_id)
+
+        programme = traverse_obj(meta, ('falcorCache', 'catalog', 'program', programme_id, 'false', 'value'))
+
+        return {
+            '_type': 'playlist',
+            'entries': [self.url_result(f'https://cu.tbs.co.jp/episode/{video_id}', TBSJPEpisodeIE, video_id)
+                        for video_id in traverse_obj(programme, ('custom_data', 'seriesList', 'episodeCode', ...))],
+            'id': programme_id,
+            **traverse_obj(programme, {
+                'categories': ('keywords', ...),
+                'id': ('tv_episode_info', 'show_content_id', {str_or_none}),
+                'description': ('custom_data', 'program_description'),
+                'series': ('custom_data', 'program_name'),
+                'title': ('custom_data', 'program_name'),
+            }),
+        }
+
+
+class TBSJPPlaylistIE(InfoExtractor):
+    _VALID_URL = r'https?://cu\.tbs\.co\.jp/playlist/(?P<id>[\da-f]+)'
+    _TESTS = [{
+        'url': 'https://cu.tbs.co.jp/playlist/184f9970e7ba48e4915f1b252c55015e',
+        'playlist_mincount': 4,
+        'info_dict': {
+            'title': 'まもなく配信終了',
+            'id': '184f9970e7ba48e4915f1b252c55015e',
+        }
+    }]
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+        page = self._download_webpage(url, playlist_id)
+        meta = self._search_json(r'window\.app\s*=', page, 'playlist info', playlist_id)
+        playlist = traverse_obj(meta, ('falcorCache', 'playList', playlist_id))
+
+        def entries():
+            for entry in traverse_obj(playlist, ('catalogs', 'value', lambda _, v: v['content_id'])):
+                # TODO: it's likely possible to get all metadata from the playlist page json instead
+                content_id = entry['content_id']
+                content_type = entry.get('content_type')
+                if content_type == 'tv_show':
+                    yield self.url_result(
+                        f'https://cu.tbs.co.jp/program/{content_id}', TBSJPProgramIE, content_id)
+                elif content_type == 'tv_episode':
+                    yield self.url_result(
+                        f'https://cu.tbs.co.jp/episode/{content_id}', TBSJPEpisodeIE, content_id)
+                else:
+                    self.report_warning(f'Skipping "{content_id}" with unsupported content_type "{content_type}"')
+
+        return self.playlist_result(entries(), playlist_id, traverse_obj(playlist, ('display_name', 'value')))

From a0de8bb8601146b8f87bf7cd562eef8bfb4690be Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Sun, 20 Aug 2023 11:10:15 -0500
Subject: [PATCH 102/218] [ie/zee5] Update access token endpoint (#7914)

Closes #7911
Authored by: bashonly
---
 yt_dlp/extractor/zee5.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/yt_dlp/extractor/zee5.py b/yt_dlp/extractor/zee5.py
index b4734cc8f1..ca79cf0a71 100644
--- a/yt_dlp/extractor/zee5.py
+++ b/yt_dlp/extractor/zee5.py
@@ -133,8 +133,8 @@ def _perform_login(self, username, password):
     def _real_extract(self, url):
         video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
         access_token_request = self._download_json(
-            'https://useraction.zee5.com/token/platform_tokens.php?platform_name=web_app',
-            video_id, note='Downloading access token')
+            'https://launchapi.zee5.com/launch?platform_name=web_app',
+            video_id, note='Downloading access token')['platform_token']
         data = {
             'x-access-token': access_token_request['token']
         }
@@ -240,8 +240,8 @@ class Zee5SeriesIE(InfoExtractor):
 
     def _entries(self, show_id):
         access_token_request = self._download_json(
-            'https://useraction.zee5.com/token/platform_tokens.php?platform_name=web_app',
-            show_id, note='Downloading access token')
+            'https://launchapi.zee5.com/launch?platform_name=web_app',
+            show_id, note='Downloading access token')['platform_token']
         headers = {
             'X-Access-Token': access_token_request['token'],
             'Referer': 'https://www.zee5.com/',

From ed711897814f3ee0b1822e4205e74133467e8f1c Mon Sep 17 00:00:00 2001
From: trainman261 <trainman261@users.noreply.github.com>
Date: Sun, 20 Aug 2023 18:35:57 +0200
Subject: [PATCH 103/218] [ie/CBCPlayerPlaylist] Add extractor (#7870)

Authored by: trainman261
---
 yt_dlp/extractor/_extractors.py |  1 +
 yt_dlp/extractor/cbc.py         | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index d4d3b6074c..194ad8356f 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -303,6 +303,7 @@
 from .cbc import (
     CBCIE,
     CBCPlayerIE,
+    CBCPlayerPlaylistIE,
     CBCGemIE,
     CBCGemPlaylistIE,
     CBCGemLiveIE,
diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py
index 9413281a57..b3c5471f7b 100644
--- a/yt_dlp/extractor/cbc.py
+++ b/yt_dlp/extractor/cbc.py
@@ -2,6 +2,7 @@
 import json
 import base64
 import time
+import urllib.parse
 
 from .common import InfoExtractor
 from ..compat import (
@@ -228,6 +229,38 @@ def _real_extract(self, url):
         }
 
 
+class CBCPlayerPlaylistIE(InfoExtractor):
+    IE_NAME = 'cbc.ca:player:playlist'
+    _VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?:player/)(?!play/)(?P<id>[^?#]+)'
+    _TESTS = [{
+        'url': 'https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast',
+        'playlist_mincount': 25,
+        'info_dict': {
+            'id': 'news/tv shows/the national/latest broadcast',
+        }
+    }, {
+        'url': 'https://www.cbc.ca/player/news/Canada/North',
+        'playlist_mincount': 25,
+        'info_dict': {
+            'id': 'news/canada/north',
+        }
+    }]
+
+    def _real_extract(self, url):
+        playlist_id = urllib.parse.unquote(self._match_id(url)).lower()
+        webpage = self._download_webpage(url, playlist_id)
+        json_content = self._search_json(
+            r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', playlist_id)
+
+        def entries():
+            for video_id in traverse_obj(json_content, (
+                'video', 'clipsByCategory', lambda k, _: k.lower() == playlist_id, 'items', ..., 'id'
+            )):
+                yield self.url_result(f'https://www.cbc.ca/player/play/{video_id}', CBCPlayerIE)
+
+        return self.playlist_result(entries(), playlist_id)
+
+
 class CBCGemIE(InfoExtractor):
     IE_NAME = 'gem.cbc.ca'
     _VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>[0-9a-z-]+/s[0-9]+[a-z][0-9]+)'

From 7cccab79e7d00ed965b48b8cefce1da8a0513409 Mon Sep 17 00:00:00 2001
From: Davin Kevin <davin.kevin@gmail.com>
Date: Sun, 20 Aug 2023 19:25:49 +0200
Subject: [PATCH 104/218] [ie/wat.tv] Fix extraction (#7898)

Closes #7303
Authored by: davinkevin
---
 yt_dlp/extractor/tf1.py | 19 +++++++++++++++++++
 yt_dlp/extractor/wat.py | 14 +++++++++++++-
 2 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/tf1.py b/yt_dlp/extractor/tf1.py
index 4cf0322b35..aba4927ae8 100644
--- a/yt_dlp/extractor/tf1.py
+++ b/yt_dlp/extractor/tf1.py
@@ -27,6 +27,25 @@ class TF1IE(InfoExtractor):
             # Sometimes wat serves the whole file with the --test option
             'skip_download': True,
         },
+    }, {
+        'url': 'https://www.tf1.fr/tmc/burger-quiz/videos/burger-quiz-du-19-aout-2023-s03-episode-21-85585666.html',
+        'info_dict': {
+            'id': '14010600',
+            'ext': 'mp4',
+            'title': 'Burger Quiz - S03 EP21 avec Eye Haidara, Anne Depétrini, Jonathan Zaccaï et Pio Marmaï',
+            'thumbnail': 'https://photos.tf1.fr/1280/720/burger-quiz-11-9adb79-0@1x.jpg',
+            'description': 'Manu Payet recevra Eye Haidara, Anne Depétrini, Jonathan Zaccaï et Pio Marmaï.',
+            'upload_date': '20230819',
+            'timestamp': 1692469471,
+            'season_number': 3,
+            'series': 'Burger Quiz',
+            'episode_number': 21,
+            'season': 'Season 3',
+            'tags': 'count:13',
+            'episode': 'Episode 21',
+            'duration': 2312
+        },
+        'params': {'skip_download': 'm3u8'},
     }, {
         'url': 'http://www.tf1.fr/tf1/koh-lanta/videos/replay-koh-lanta-22-mai-2015.html',
         'only_matching': True,
diff --git a/yt_dlp/extractor/wat.py b/yt_dlp/extractor/wat.py
index 7c62d2866a..9ea3fddd63 100644
--- a/yt_dlp/extractor/wat.py
+++ b/yt_dlp/extractor/wat.py
@@ -41,6 +41,18 @@ class WatIE(InfoExtractor):
             'expected_warnings': ["Ce contenu n'est pas disponible pour l'instant."],
             'skip': 'This content is no longer available',
         },
+        {
+            'url': 'wat:14010600',
+            'info_dict': {
+                'id': '14010600',
+                'title': 'Burger Quiz - S03 EP21 avec Eye Haidara, Anne Depétrini, Jonathan Zaccaï et Pio Marmaï',
+                'thumbnail': 'https://photos.tf1.fr/1280/720/burger-quiz-11-9adb79-0@1x.jpg',
+                'upload_date': '20230819',
+                'duration': 2312,
+                'ext': 'mp4',
+            },
+            'params': {'skip_download': 'm3u8'},
+        }
     ]
     _GEO_BYPASS = False
 
@@ -54,7 +66,7 @@ def _real_extract(self, url):
         #     'http://www.wat.tv/interface/contentv4s/' + video_id, video_id)
         video_data = self._download_json(
             'https://mediainfo.tf1.fr/mediainfocombo/' + video_id,
-            video_id, query={'context': 'MYTF1', 'pver': '4020003'})
+            video_id, query={'pver': '5010000'})
         video_info = video_data['media']
 
         error_desc = video_info.get('error_desc')

From fcd6a76adc49d5cd8783985c7ce35384b72e545f Mon Sep 17 00:00:00 2001
From: coletdjnz <coletdjnz@protonmail.com>
Date: Fri, 25 Aug 2023 07:10:44 +0000
Subject: [PATCH 105/218] [tests] Add tests for socks proxies (#7908)

Authored by: coletdjnz
---
 test/conftest.py        |  21 ++
 test/test_networking.py |  16 --
 test/test_socks.py      | 529 +++++++++++++++++++++++++++++++++-------
 3 files changed, 464 insertions(+), 102 deletions(-)
 create mode 100644 test/conftest.py

diff --git a/test/conftest.py b/test/conftest.py
new file mode 100644
index 0000000000..15549d30b9
--- /dev/null
+++ b/test/conftest.py
@@ -0,0 +1,21 @@
+import functools
+import inspect
+
+import pytest
+
+from yt_dlp.networking import RequestHandler
+from yt_dlp.networking.common import _REQUEST_HANDLERS
+from yt_dlp.utils._utils import _YDLLogger as FakeLogger
+
+
+@pytest.fixture
+def handler(request):
+    RH_KEY = request.param
+    if inspect.isclass(RH_KEY) and issubclass(RH_KEY, RequestHandler):
+        handler = RH_KEY
+    elif RH_KEY in _REQUEST_HANDLERS:
+        handler = _REQUEST_HANDLERS[RH_KEY]
+    else:
+        pytest.skip(f'{RH_KEY} request handler is not available')
+
+    return functools.partial(handler, logger=FakeLogger)
diff --git a/test/test_networking.py b/test/test_networking.py
index 2622d24da6..5308c8d6fa 100644
--- a/test/test_networking.py
+++ b/test/test_networking.py
@@ -8,12 +8,10 @@
 
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-import functools
 import gzip
 import http.client
 import http.cookiejar
 import http.server
-import inspect
 import io
 import pathlib
 import random
@@ -40,7 +38,6 @@
     Response,
 )
 from yt_dlp.networking._urllib import UrllibRH
-from yt_dlp.networking.common import _REQUEST_HANDLERS
 from yt_dlp.networking.exceptions import (
     CertificateVerifyError,
     HTTPError,
@@ -307,19 +304,6 @@ def setup_class(cls):
         cls.https_server_thread.start()
 
 
-@pytest.fixture
-def handler(request):
-    RH_KEY = request.param
-    if inspect.isclass(RH_KEY) and issubclass(RH_KEY, RequestHandler):
-        handler = RH_KEY
-    elif RH_KEY in _REQUEST_HANDLERS:
-        handler = _REQUEST_HANDLERS[RH_KEY]
-    else:
-        pytest.skip(f'{RH_KEY} request handler is not available')
-
-    return functools.partial(handler, logger=FakeLogger)
-
-
 class TestHTTPRequestHandler(TestRequestHandlerBase):
     @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
     def test_verify_cert(self, handler):
diff --git a/test/test_socks.py b/test/test_socks.py
index 6651290d27..95ffce275b 100644
--- a/test/test_socks.py
+++ b/test/test_socks.py
@@ -1,113 +1,470 @@
 #!/usr/bin/env python3
-
 # Allow direct execution
 import os
 import sys
+import threading
 import unittest
 
+import pytest
+
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-
+import abc
+import contextlib
+import enum
+import functools
+import http.server
+import json
 import random
-import subprocess
-import urllib.request
+import socket
+import struct
+import time
+from socketserver import (
+    BaseRequestHandler,
+    StreamRequestHandler,
+    ThreadingTCPServer,
+)
 
-from test.helper import FakeYDL, get_params, is_download_test
+from test.helper import http_server_port
+from yt_dlp.networking import Request
+from yt_dlp.networking.exceptions import ProxyError, TransportError
+from yt_dlp.socks import (
+    SOCKS4_REPLY_VERSION,
+    SOCKS4_VERSION,
+    SOCKS5_USER_AUTH_SUCCESS,
+    SOCKS5_USER_AUTH_VERSION,
+    SOCKS5_VERSION,
+    Socks5AddressType,
+    Socks5Auth,
+)
+
+SOCKS5_USER_AUTH_FAILURE = 0x1
 
 
-@is_download_test
-class TestMultipleSocks(unittest.TestCase):
-    @staticmethod
-    def _check_params(attrs):
-        params = get_params()
-        for attr in attrs:
-            if attr not in params:
-                print('Missing %s. Skipping.' % attr)
+class Socks4CD(enum.IntEnum):
+    REQUEST_GRANTED = 90
+    REQUEST_REJECTED_OR_FAILED = 91
+    REQUEST_REJECTED_CANNOT_CONNECT_TO_IDENTD = 92
+    REQUEST_REJECTED_DIFFERENT_USERID = 93
+
+
+class Socks5Reply(enum.IntEnum):
+    SUCCEEDED = 0x0
+    GENERAL_FAILURE = 0x1
+    CONNECTION_NOT_ALLOWED = 0x2
+    NETWORK_UNREACHABLE = 0x3
+    HOST_UNREACHABLE = 0x4
+    CONNECTION_REFUSED = 0x5
+    TTL_EXPIRED = 0x6
+    COMMAND_NOT_SUPPORTED = 0x7
+    ADDRESS_TYPE_NOT_SUPPORTED = 0x8
+
+
+class SocksTestRequestHandler(BaseRequestHandler):
+
+    def __init__(self, *args, socks_info=None, **kwargs):
+        self.socks_info = socks_info
+        super().__init__(*args, **kwargs)
+
+
+class SocksProxyHandler(BaseRequestHandler):
+    def __init__(self, request_handler_class, socks_server_kwargs, *args, **kwargs):
+        self.socks_kwargs = socks_server_kwargs or {}
+        self.request_handler_class = request_handler_class
+        super().__init__(*args, **kwargs)
+
+
+class Socks5ProxyHandler(StreamRequestHandler, SocksProxyHandler):
+
+    # SOCKS5 protocol https://tools.ietf.org/html/rfc1928
+    # SOCKS5 username/password authentication https://tools.ietf.org/html/rfc1929
+
+    def handle(self):
+        sleep = self.socks_kwargs.get('sleep')
+        if sleep:
+            time.sleep(sleep)
+        version, nmethods = self.connection.recv(2)
+        assert version == SOCKS5_VERSION
+        methods = list(self.connection.recv(nmethods))
+
+        auth = self.socks_kwargs.get('auth')
+
+        if auth is not None and Socks5Auth.AUTH_USER_PASS not in methods:
+            self.connection.sendall(struct.pack('!BB', SOCKS5_VERSION, Socks5Auth.AUTH_NO_ACCEPTABLE))
+            self.server.close_request(self.request)
+            return
+
+        elif Socks5Auth.AUTH_USER_PASS in methods:
+            self.connection.sendall(struct.pack("!BB", SOCKS5_VERSION, Socks5Auth.AUTH_USER_PASS))
+
+            _, user_len = struct.unpack('!BB', self.connection.recv(2))
+            username = self.connection.recv(user_len).decode()
+            pass_len = ord(self.connection.recv(1))
+            password = self.connection.recv(pass_len).decode()
+
+            if username == auth[0] and password == auth[1]:
+                self.connection.sendall(struct.pack('!BB', SOCKS5_USER_AUTH_VERSION, SOCKS5_USER_AUTH_SUCCESS))
+            else:
+                self.connection.sendall(struct.pack('!BB', SOCKS5_USER_AUTH_VERSION, SOCKS5_USER_AUTH_FAILURE))
+                self.server.close_request(self.request)
                 return
-        return params
 
-    def test_proxy_http(self):
-        params = self._check_params(['primary_proxy', 'primary_server_ip'])
-        if params is None:
-            return
-        ydl = FakeYDL({
-            'proxy': params['primary_proxy']
-        })
-        self.assertEqual(
-            ydl.urlopen('http://yt-dl.org/ip').read().decode(),
-            params['primary_server_ip'])
-
-    def test_proxy_https(self):
-        params = self._check_params(['primary_proxy', 'primary_server_ip'])
-        if params is None:
-            return
-        ydl = FakeYDL({
-            'proxy': params['primary_proxy']
-        })
-        self.assertEqual(
-            ydl.urlopen('https://yt-dl.org/ip').read().decode(),
-            params['primary_server_ip'])
-
-    def test_secondary_proxy_http(self):
-        params = self._check_params(['secondary_proxy', 'secondary_server_ip'])
-        if params is None:
-            return
-        ydl = FakeYDL()
-        req = urllib.request.Request('http://yt-dl.org/ip')
-        req.add_header('Ytdl-request-proxy', params['secondary_proxy'])
-        self.assertEqual(
-            ydl.urlopen(req).read().decode(),
-            params['secondary_server_ip'])
-
-    def test_secondary_proxy_https(self):
-        params = self._check_params(['secondary_proxy', 'secondary_server_ip'])
-        if params is None:
-            return
-        ydl = FakeYDL()
-        req = urllib.request.Request('https://yt-dl.org/ip')
-        req.add_header('Ytdl-request-proxy', params['secondary_proxy'])
-        self.assertEqual(
-            ydl.urlopen(req).read().decode(),
-            params['secondary_server_ip'])
-
-
-@is_download_test
-class TestSocks(unittest.TestCase):
-    _SKIP_SOCKS_TEST = True
-
-    def setUp(self):
-        if self._SKIP_SOCKS_TEST:
+        elif Socks5Auth.AUTH_NONE in methods:
+            self.connection.sendall(struct.pack('!BB', SOCKS5_VERSION, Socks5Auth.AUTH_NONE))
+        else:
+            self.connection.sendall(struct.pack('!BB', SOCKS5_VERSION, Socks5Auth.AUTH_NO_ACCEPTABLE))
+            self.server.close_request(self.request)
             return
 
-        self.port = random.randint(20000, 30000)
-        self.server_process = subprocess.Popen([
-            'srelay', '-f', '-i', '127.0.0.1:%d' % self.port],
-            stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        version, command, _, address_type = struct.unpack('!BBBB', self.connection.recv(4))
+        socks_info = {
+            'version': version,
+            'auth_methods': methods,
+            'command': command,
+            'client_address': self.client_address,
+            'ipv4_address': None,
+            'domain_address': None,
+            'ipv6_address': None,
+        }
+        if address_type == Socks5AddressType.ATYP_IPV4:
+            socks_info['ipv4_address'] = socket.inet_ntoa(self.connection.recv(4))
+        elif address_type == Socks5AddressType.ATYP_DOMAINNAME:
+            socks_info['domain_address'] = self.connection.recv(ord(self.connection.recv(1))).decode()
+        elif address_type == Socks5AddressType.ATYP_IPV6:
+            socks_info['ipv6_address'] = socket.inet_ntop(socket.AF_INET6, self.connection.recv(16))
+        else:
+            self.server.close_request(self.request)
 
-    def tearDown(self):
-        if self._SKIP_SOCKS_TEST:
+        socks_info['port'] = struct.unpack('!H', self.connection.recv(2))[0]
+
+        # dummy response, the returned IP is just a placeholder
+        self.connection.sendall(struct.pack(
+            '!BBBBIH', SOCKS5_VERSION, self.socks_kwargs.get('reply', Socks5Reply.SUCCEEDED), 0x0, 0x1, 0x7f000001, 40000))
+
+        self.request_handler_class(self.request, self.client_address, self.server, socks_info=socks_info)
+
+
+class Socks4ProxyHandler(StreamRequestHandler, SocksProxyHandler):
+
+    # SOCKS4 protocol http://www.openssh.com/txt/socks4.protocol
+    # SOCKS4A protocol http://www.openssh.com/txt/socks4a.protocol
+
+    def _read_until_null(self):
+        return b''.join(iter(functools.partial(self.connection.recv, 1), b'\x00'))
+
+    def handle(self):
+        sleep = self.socks_kwargs.get('sleep')
+        if sleep:
+            time.sleep(sleep)
+        socks_info = {
+            'version': SOCKS4_VERSION,
+            'command': None,
+            'client_address': self.client_address,
+            'ipv4_address': None,
+            'port': None,
+            'domain_address': None,
+        }
+        version, command, dest_port, dest_ip = struct.unpack('!BBHI', self.connection.recv(8))
+        socks_info['port'] = dest_port
+        socks_info['command'] = command
+        if version != SOCKS4_VERSION:
+            self.server.close_request(self.request)
+            return
+        use_remote_dns = False
+        if 0x0 < dest_ip <= 0xFF:
+            use_remote_dns = True
+        else:
+            socks_info['ipv4_address'] = socket.inet_ntoa(struct.pack("!I", dest_ip))
+
+        user_id = self._read_until_null().decode()
+        if user_id != (self.socks_kwargs.get('user_id') or ''):
+            self.connection.sendall(struct.pack(
+                '!BBHI', SOCKS4_REPLY_VERSION, Socks4CD.REQUEST_REJECTED_DIFFERENT_USERID, 0x00, 0x00000000))
+            self.server.close_request(self.request)
             return
 
-        self.server_process.terminate()
-        self.server_process.communicate()
+        if use_remote_dns:
+            socks_info['domain_address'] = self._read_until_null().decode()
 
-    def _get_ip(self, protocol):
-        if self._SKIP_SOCKS_TEST:
-            return '127.0.0.1'
+        # dummy response, the returned IP is just a placeholder
+        self.connection.sendall(
+            struct.pack(
+                '!BBHI', SOCKS4_REPLY_VERSION,
+                self.socks_kwargs.get('cd_reply', Socks4CD.REQUEST_GRANTED), 40000, 0x7f000001))
 
-        ydl = FakeYDL({
-            'proxy': '%s://127.0.0.1:%d' % (protocol, self.port),
-        })
-        return ydl.urlopen('http://yt-dl.org/ip').read().decode()
+        self.request_handler_class(self.request, self.client_address, self.server, socks_info=socks_info)
 
-    def test_socks4(self):
-        self.assertTrue(isinstance(self._get_ip('socks4'), str))
 
-    def test_socks4a(self):
-        self.assertTrue(isinstance(self._get_ip('socks4a'), str))
+class IPv6ThreadingTCPServer(ThreadingTCPServer):
+    address_family = socket.AF_INET6
 
-    def test_socks5(self):
-        self.assertTrue(isinstance(self._get_ip('socks5'), str))
+
+class SocksHTTPTestRequestHandler(http.server.BaseHTTPRequestHandler, SocksTestRequestHandler):
+    def do_GET(self):
+        if self.path == '/socks_info':
+            payload = json.dumps(self.socks_info.copy())
+            self.send_response(200)
+            self.send_header('Content-Type', 'application/json; charset=utf-8')
+            self.send_header('Content-Length', str(len(payload)))
+            self.end_headers()
+            self.wfile.write(payload.encode())
+
+
+@contextlib.contextmanager
+def socks_server(socks_server_class, request_handler, bind_ip=None, **socks_server_kwargs):
+    server = server_thread = None
+    try:
+        bind_address = bind_ip or '127.0.0.1'
+        server_type = ThreadingTCPServer if '.' in bind_address else IPv6ThreadingTCPServer
+        server = server_type(
+            (bind_address, 0), functools.partial(socks_server_class, request_handler, socks_server_kwargs))
+        server_port = http_server_port(server)
+        server_thread = threading.Thread(target=server.serve_forever)
+        server_thread.daemon = True
+        server_thread.start()
+        if '.' not in bind_address:
+            yield f'[{bind_address}]:{server_port}'
+        else:
+            yield f'{bind_address}:{server_port}'
+    finally:
+        server.shutdown()
+        server.server_close()
+        server_thread.join(2.0)
+
+
+class SocksProxyTestContext(abc.ABC):
+    REQUEST_HANDLER_CLASS = None
+
+    def socks_server(self, server_class, *args, **kwargs):
+        return socks_server(server_class, self.REQUEST_HANDLER_CLASS, *args, **kwargs)
+
+    @abc.abstractmethod
+    def socks_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs) -> dict:
+        """return a dict of socks_info"""
+
+
+class HTTPSocksTestProxyContext(SocksProxyTestContext):
+    REQUEST_HANDLER_CLASS = SocksHTTPTestRequestHandler
+
+    def socks_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs):
+        request = Request(f'http://{target_domain or "127.0.0.1"}:{target_port or "40000"}/socks_info', **req_kwargs)
+        handler.validate(request)
+        return json.loads(handler.send(request).read().decode())
+
+
+CTX_MAP = {
+    'http': HTTPSocksTestProxyContext,
+}
+
+
+@pytest.fixture(scope='module')
+def ctx(request):
+    return CTX_MAP[request.param]()
+
+
+class TestSocks4Proxy:
+    @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
+    def test_socks4_no_auth(self, handler, ctx):
+        with handler() as rh:
+            with ctx.socks_server(Socks4ProxyHandler) as server_address:
+                response = ctx.socks_info_request(
+                    rh, proxies={'all': f'socks4://{server_address}'})
+                assert response['version'] == 4
+
+    @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
+    def test_socks4_auth(self, handler, ctx):
+        with handler() as rh:
+            with ctx.socks_server(Socks4ProxyHandler, user_id='user') as server_address:
+                with pytest.raises(ProxyError):
+                    ctx.socks_info_request(rh, proxies={'all': f'socks4://{server_address}'})
+                response = ctx.socks_info_request(
+                    rh, proxies={'all': f'socks4://user:@{server_address}'})
+                assert response['version'] == 4
+
+    @pytest.mark.parametrize('handler,ctx', [
+        pytest.param('Urllib', 'http', marks=pytest.mark.xfail(
+            reason='socks4a implementation currently broken when destination is not a domain name'))
+    ], indirect=True)
+    def test_socks4a_ipv4_target(self, handler, ctx):
+        with ctx.socks_server(Socks4ProxyHandler) as server_address:
+            with handler(proxies={'all': f'socks4a://{server_address}'}) as rh:
+                response = ctx.socks_info_request(rh, target_domain='127.0.0.1')
+                assert response['version'] == 4
+                assert response['ipv4_address'] == '127.0.0.1'
+                assert response['domain_address'] is None
+
+    @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
+    def test_socks4a_domain_target(self, handler, ctx):
+        with ctx.socks_server(Socks4ProxyHandler) as server_address:
+            with handler(proxies={'all': f'socks4a://{server_address}'}) as rh:
+                response = ctx.socks_info_request(rh, target_domain='localhost')
+                assert response['version'] == 4
+                assert response['ipv4_address'] is None
+                assert response['domain_address'] == 'localhost'
+
+    @pytest.mark.parametrize('handler,ctx', [
+        pytest.param('Urllib', 'http', marks=pytest.mark.xfail(
+            reason='source_address is not yet supported for socks4 proxies'))
+    ], indirect=True)
+    def test_ipv4_client_source_address(self, handler, ctx):
+        with ctx.socks_server(Socks4ProxyHandler) as server_address:
+            source_address = f'127.0.0.{random.randint(5, 255)}'
+            with handler(proxies={'all': f'socks4://{server_address}'},
+                         source_address=source_address) as rh:
+                response = ctx.socks_info_request(rh)
+                assert response['client_address'][0] == source_address
+                assert response['version'] == 4
+
+    @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
+    @pytest.mark.parametrize('reply_code', [
+        Socks4CD.REQUEST_REJECTED_OR_FAILED,
+        Socks4CD.REQUEST_REJECTED_CANNOT_CONNECT_TO_IDENTD,
+        Socks4CD.REQUEST_REJECTED_DIFFERENT_USERID,
+    ])
+    def test_socks4_errors(self, handler, ctx, reply_code):
+        with ctx.socks_server(Socks4ProxyHandler, cd_reply=reply_code) as server_address:
+            with handler(proxies={'all': f'socks4://{server_address}'}) as rh:
+                with pytest.raises(ProxyError):
+                    ctx.socks_info_request(rh)
+
+    @pytest.mark.parametrize('handler,ctx', [
+        pytest.param('Urllib', 'http', marks=pytest.mark.xfail(
+            reason='IPv6 socks4 proxies are not yet supported'))
+    ], indirect=True)
+    def test_ipv6_socks4_proxy(self, handler, ctx):
+        with ctx.socks_server(Socks4ProxyHandler, bind_ip='::1') as server_address:
+            with handler(proxies={'all': f'socks4://{server_address}'}) as rh:
+                response = ctx.socks_info_request(rh, target_domain='127.0.0.1')
+                assert response['client_address'][0] == '::1'
+                assert response['ipv4_address'] == '127.0.0.1'
+                assert response['version'] == 4
+
+    @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
+    def test_timeout(self, handler, ctx):
+        with ctx.socks_server(Socks4ProxyHandler, sleep=2) as server_address:
+            with handler(proxies={'all': f'socks4://{server_address}'}, timeout=1) as rh:
+                with pytest.raises(TransportError):
+                    ctx.socks_info_request(rh)
+
+
+class TestSocks5Proxy:
+
+    @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
+    def test_socks5_no_auth(self, handler, ctx):
+        with ctx.socks_server(Socks5ProxyHandler) as server_address:
+            with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
+                response = ctx.socks_info_request(rh)
+                assert response['auth_methods'] == [0x0]
+                assert response['version'] == 5
+
+    @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
+    def test_socks5_user_pass(self, handler, ctx):
+        with ctx.socks_server(Socks5ProxyHandler, auth=('test', 'testpass')) as server_address:
+            with handler() as rh:
+                with pytest.raises(ProxyError):
+                    ctx.socks_info_request(rh, proxies={'all': f'socks5://{server_address}'})
+
+                response = ctx.socks_info_request(
+                    rh, proxies={'all': f'socks5://test:testpass@{server_address}'})
+
+                assert response['auth_methods'] == [Socks5Auth.AUTH_NONE, Socks5Auth.AUTH_USER_PASS]
+                assert response['version'] == 5
+
+    @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
+    def test_socks5_ipv4_target(self, handler, ctx):
+        with ctx.socks_server(Socks5ProxyHandler) as server_address:
+            with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
+                response = ctx.socks_info_request(rh, target_domain='127.0.0.1')
+                assert response['ipv4_address'] == '127.0.0.1'
+                assert response['version'] == 5
+
+    @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
+    def test_socks5_domain_target(self, handler, ctx):
+        with ctx.socks_server(Socks5ProxyHandler) as server_address:
+            with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
+                response = ctx.socks_info_request(rh, target_domain='localhost')
+                assert response['ipv4_address'] == '127.0.0.1'
+                assert response['version'] == 5
+
+    @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
+    def test_socks5h_domain_target(self, handler, ctx):
+        with ctx.socks_server(Socks5ProxyHandler) as server_address:
+            with handler(proxies={'all': f'socks5h://{server_address}'}) as rh:
+                response = ctx.socks_info_request(rh, target_domain='localhost')
+                assert response['ipv4_address'] is None
+                assert response['domain_address'] == 'localhost'
+                assert response['version'] == 5
+
+    @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
+    def test_socks5h_ip_target(self, handler, ctx):
+        with ctx.socks_server(Socks5ProxyHandler) as server_address:
+            with handler(proxies={'all': f'socks5h://{server_address}'}) as rh:
+                response = ctx.socks_info_request(rh, target_domain='127.0.0.1')
+                assert response['ipv4_address'] == '127.0.0.1'
+                assert response['domain_address'] is None
+                assert response['version'] == 5
+
+    @pytest.mark.parametrize('handler,ctx', [
+        pytest.param('Urllib', 'http', marks=pytest.mark.xfail(
+            reason='IPv6 destination addresses are not yet supported'))
+    ], indirect=True)
+    def test_socks5_ipv6_destination(self, handler, ctx):
+        with ctx.socks_server(Socks5ProxyHandler) as server_address:
+            with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
+                response = ctx.socks_info_request(rh, target_domain='[::1]')
+                assert response['ipv6_address'] == '::1'
+                assert response['port'] == 80
+                assert response['version'] == 5
+
+    @pytest.mark.parametrize('handler,ctx', [
+        pytest.param('Urllib', 'http', marks=pytest.mark.xfail(
+            reason='IPv6 socks5 proxies are not yet supported'))
+    ], indirect=True)
+    def test_ipv6_socks5_proxy(self, handler, ctx):
+        with ctx.socks_server(Socks5ProxyHandler, bind_ip='::1') as server_address:
+            with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
+                response = ctx.socks_info_request(rh, target_domain='127.0.0.1')
+                assert response['client_address'][0] == '::1'
+                assert response['ipv4_address'] == '127.0.0.1'
+                assert response['version'] == 5
+
+    # XXX: is there any feasible way of testing IPv6 source addresses?
+    # Same would go for non-proxy source_address test...
+    @pytest.mark.parametrize('handler,ctx', [
+        pytest.param('Urllib', 'http', marks=pytest.mark.xfail(
+            reason='source_address is not yet supported for socks5 proxies'))
+    ], indirect=True)
+    def test_ipv4_client_source_address(self, handler, ctx):
+        with ctx.socks_server(Socks5ProxyHandler) as server_address:
+            source_address = f'127.0.0.{random.randint(5, 255)}'
+            with handler(proxies={'all': f'socks5://{server_address}'}, source_address=source_address) as rh:
+                response = ctx.socks_info_request(rh)
+                assert response['client_address'][0] == source_address
+                assert response['version'] == 5
+
+    @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
+    @pytest.mark.parametrize('reply_code', [
+        Socks5Reply.GENERAL_FAILURE,
+        Socks5Reply.CONNECTION_NOT_ALLOWED,
+        Socks5Reply.NETWORK_UNREACHABLE,
+        Socks5Reply.HOST_UNREACHABLE,
+        Socks5Reply.CONNECTION_REFUSED,
+        Socks5Reply.TTL_EXPIRED,
+        Socks5Reply.COMMAND_NOT_SUPPORTED,
+        Socks5Reply.ADDRESS_TYPE_NOT_SUPPORTED,
+    ])
+    def test_socks5_errors(self, handler, ctx, reply_code):
+        with ctx.socks_server(Socks5ProxyHandler, reply=reply_code) as server_address:
+            with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
+                with pytest.raises(ProxyError):
+                    ctx.socks_info_request(rh)
+
+    @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
+    def test_timeout(self, handler, ctx):
+        with ctx.socks_server(Socks5ProxyHandler, sleep=2) as server_address:
+            with handler(proxies={'all': f'socks5://{server_address}'}, timeout=1) as rh:
+                with pytest.raises(TransportError):
+                    ctx.socks_info_request(rh)
 
 
 if __name__ == '__main__':

From 1be0a96a4d14f629097509fcc89d15f69a8243c7 Mon Sep 17 00:00:00 2001
From: Simon Sawicki <contact@grub4k.xyz>
Date: Sat, 26 Aug 2023 22:29:56 +0200
Subject: [PATCH 106/218] [docs] Update collaborators

Authored by: Grub4K
---
 CONTRIBUTORS     |  2 +-
 Collaborators.md | 10 ----------
 2 files changed, 1 insertion(+), 11 deletions(-)

diff --git a/CONTRIBUTORS b/CONTRIBUTORS
index 6ccd08931d..6b9b9f4701 100644
--- a/CONTRIBUTORS
+++ b/CONTRIBUTORS
@@ -2,7 +2,6 @@ pukkandan (owner)
 shirt-dev (collaborator)
 coletdjnz/colethedj (collaborator)
 Ashish0804 (collaborator)
-nao20010128nao/Lesmiscore (collaborator)
 bashonly (collaborator)
 Grub4K (collaborator)
 h-h-h-h
@@ -467,3 +466,4 @@ nnoboa
 rdamas
 RfadnjdExt
 urectanc
+nao20010128nao/Lesmiscore
diff --git a/Collaborators.md b/Collaborators.md
index a0976dd8c5..70ab616f11 100644
--- a/Collaborators.md
+++ b/Collaborators.md
@@ -44,16 +44,6 @@ ## [Ashish0804](https://github.com/Ashish0804) <sub><sup>[Inactive]</sup></sub>
 * Improved/fixed support for HiDive, HotStar, Hungama, LBRY, LinkedInLearning, Mxplayer, SonyLiv, TV2, Vimeo, VLive etc
 
 
-## [Lesmiscore](https://github.com/Lesmiscore)
-
-**Bitcoin**: bc1qfd02r007cutfdjwjmyy9w23rjvtls6ncve7r3s  
-**Monacoin**: mona1q3tf7dzvshrhfe3md379xtvt2n22duhglv5dskr
-
-* Download live from start to end for YouTube
-* Added support for new websites AbemaTV, mildom, PixivSketch, skeb, radiko, voicy, mirrativ, openrec, whowatch, damtomo, 17.live, mixch etc
-* Improved/fixed support for fc2, YahooJapanNews, tver, iwara etc
-
-
 ## [bashonly](https://github.com/bashonly)
 
 * `--update-to`, automated release, nightly builds

From 59e92b1f1833440bb2190f847eb735cf0f90bc85 Mon Sep 17 00:00:00 2001
From: Simon Sawicki <contact@grub4k.xyz>
Date: Sun, 27 Aug 2023 00:13:30 +0200
Subject: [PATCH 107/218] [rh/urllib] Simplify gzip decoding (#7611)

Authored by: Grub4K
---
 yt_dlp/networking/_urllib.py | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/yt_dlp/networking/_urllib.py b/yt_dlp/networking/_urllib.py
index 0c4794954b..5a804d99b4 100644
--- a/yt_dlp/networking/_urllib.py
+++ b/yt_dlp/networking/_urllib.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 
 import functools
-import gzip
 import http.client
 import io
 import socket
@@ -155,20 +154,9 @@ def brotli(data):
 
     @staticmethod
     def gz(data):
-        gz = gzip.GzipFile(fileobj=io.BytesIO(data), mode='rb')
-        try:
-            return gz.read()
-        except OSError as original_oserror:
-            # There may be junk add the end of the file
-            # See http://stackoverflow.com/q/4928560/35070 for details
-            for i in range(1, 1024):
-                try:
-                    gz = gzip.GzipFile(fileobj=io.BytesIO(data[:-i]), mode='rb')
-                    return gz.read()
-                except OSError:
-                    continue
-            else:
-                raise original_oserror
+        # There may be junk added the end of the file
+        # We ignore it by only ever decoding a single gzip payload
+        return zlib.decompress(data, wbits=zlib.MAX_WBITS | 16)
 
     def http_request(self, req):
         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not

From d7aee8e310b2c4f21d50aac0b420e1b3abde21a4 Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@bashonly.com>
Date: Fri, 25 Aug 2023 08:44:05 -0500
Subject: [PATCH 108/218] [ie/Mzaalo] Improve `_VALID_URL`

Authored by: bashonly
---
 yt_dlp/extractor/mzaalo.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/mzaalo.py b/yt_dlp/extractor/mzaalo.py
index c6f420ceaa..1996368cc1 100644
--- a/yt_dlp/extractor/mzaalo.py
+++ b/yt_dlp/extractor/mzaalo.py
@@ -8,7 +8,7 @@
 
 
 class MzaaloIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?mzaalo\.com/play/(?P<type>movie|original|clip)/(?P<id>[a-fA-F0-9-]+)/[\w-]+'
+    _VALID_URL = r'(?i)https?://(?:www\.)?mzaalo\.com/(?:play|watch)/(?P<type>movie|original|clip)/(?P<id>[a-f0-9-]+)/[\w-]+'
     _TESTS = [{
         # Movies
         'url': 'https://www.mzaalo.com/play/movie/c0958d9f-f90e-4503-a755-44358758921d/Jamun',
@@ -55,6 +55,9 @@ class MzaaloIE(InfoExtractor):
             'language': 'hin',
         },
         'params': {'skip_download': 'm3u8'}
+    }, {
+        'url': 'https://mzaalo.com/watch/MOVIE/389c892d-0b65-4019-bf73-d4edcb1c014f/Chalo-Dilli',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):

From 56b3dc03354b75be995759d8441d2754c0442b9a Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Sun, 27 Aug 2023 18:33:25 -0500
Subject: [PATCH 109/218] [ie/StagePlus] Fix m3u8 extraction (#7929)

Closes #7928
Authored by: bashonly
---
 yt_dlp/extractor/stageplus.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/yt_dlp/extractor/stageplus.py b/yt_dlp/extractor/stageplus.py
index adb4ebbc2d..4bed4d646a 100644
--- a/yt_dlp/extractor/stageplus.py
+++ b/yt_dlp/extractor/stageplus.py
@@ -484,18 +484,15 @@ def _real_extract(self, url):
             'url': 'url',
         })) or None
 
-        m3u8_headers = {'jwt': self._TOKEN}
-
         entries = []
         for idx, video in enumerate(traverse_obj(data, (
                 'performanceWorks', lambda _, v: v['id'] and url_or_none(v['stream']['url']))), 1):
             formats, subtitles = self._extract_m3u8_formats_and_subtitles(
-                video['stream']['url'], video['id'], 'mp4', m3u8_id='hls', headers=m3u8_headers)
+                video['stream']['url'], video['id'], 'mp4', m3u8_id='hls', query={'token': self._TOKEN})
             entries.append({
                 'id': video['id'],
                 'formats': formats,
                 'subtitles': subtitles,
-                'http_headers': m3u8_headers,
                 'album': metadata.get('title'),
                 'album_artist': metadata.get('artist'),
                 'track_number': idx,

From c2d8ee0000302aba63476b7d5bd8793e57b6c8c6 Mon Sep 17 00:00:00 2001
From: sepro <4618135+seproDev@users.noreply.github.com>
Date: Mon, 28 Aug 2023 23:09:14 +0200
Subject: [PATCH 110/218] [ie/weverse] Support extraction without auth (#7924)

Authored by: seproDev
---
 yt_dlp/extractor/weverse.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/yt_dlp/extractor/weverse.py b/yt_dlp/extractor/weverse.py
index 9a08b8e43b..bbf62856a6 100644
--- a/yt_dlp/extractor/weverse.py
+++ b/yt_dlp/extractor/weverse.py
@@ -70,10 +70,8 @@ def _real_initialize(self):
             return
 
         token = try_call(lambda: self._get_cookies('https://weverse.io/')['we2_access_token'].value)
-        if not token:
-            self.raise_login_required()
-
-        WeverseBaseIE._API_HEADERS['Authorization'] = f'Bearer {token}'
+        if token:
+            WeverseBaseIE._API_HEADERS['Authorization'] = f'Bearer {token}'
 
     def _call_api(self, ep, video_id, data=None, note='Downloading API JSON'):
         # Ref: https://ssl.pstatic.net/static/wevweb/2_3_2_11101725/public/static/js/2488.a09b41ff.chunk.js
@@ -101,11 +99,14 @@ def _call_api(self, ep, video_id, data=None, note='Downloading API JSON'):
                 self.raise_login_required(
                     'Session token has expired. Log in again or refresh cookies in browser')
             elif isinstance(e.cause, HTTPError) and e.cause.status == 403:
-                raise ExtractorError('Your account does not have access to this content', expected=True)
+                if 'Authorization' in self._API_HEADERS:
+                    raise ExtractorError('Your account does not have access to this content', expected=True)
+                self.raise_login_required()
             raise
 
     def _call_post_api(self, video_id):
-        return self._call_api(f'/post/v1.0/post-{video_id}?fieldSet=postV1', video_id)
+        path = '' if 'Authorization' in self._API_HEADERS else '/preview'
+        return self._call_api(f'/post/v1.0/post-{video_id}{path}?fieldSet=postV1', video_id)
 
     def _get_community_id(self, channel):
         return str(self._call_api(

From b9f2bc2dbed2323734a0d18e65e1e2e23dc833d8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nathan=20Touz=C3=A9?=
 <60022007+nathantouze@users.noreply.github.com>
Date: Mon, 28 Aug 2023 23:33:48 +0200
Subject: [PATCH 111/218] [ie/Dropbox] Fix extractor (#7926)

Closes #7005, Closes #7696
Authored by: nathantouze, bashonly, denhotte
---
 yt_dlp/extractor/dropbox.py | 42 ++++++++++++++++++++++++++-----------
 1 file changed, 30 insertions(+), 12 deletions(-)

diff --git a/yt_dlp/extractor/dropbox.py b/yt_dlp/extractor/dropbox.py
index 214b309bfd..ec86d7ad24 100644
--- a/yt_dlp/extractor/dropbox.py
+++ b/yt_dlp/extractor/dropbox.py
@@ -1,3 +1,4 @@
+import base64
 import os.path
 import re
 
@@ -5,14 +6,13 @@
 from ..compat import compat_urllib_parse_unquote
 from ..utils import (
     ExtractorError,
-    traverse_obj,
-    try_get,
+    update_url_query,
     url_basename,
 )
 
 
 class DropboxIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?dropbox[.]com/sh?/(?P<id>[a-zA-Z0-9]{15})/.*'
+    _VALID_URL = r'https?://(?:www\.)?dropbox\.com/(?:(?:e/)?scl/fi|sh?)/(?P<id>\w+)'
     _TESTS = [
         {
             'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0',
@@ -22,7 +22,16 @@ class DropboxIE(InfoExtractor):
                 'title': 'youtube-dl test video \'ä"BaW_jenozKc'
             }
         }, {
-            'url': 'https://www.dropbox.com/sh/662glsejgzoj9sr/AAByil3FGH9KFNZ13e08eSa1a/Pregame%20Ceremony%20Program%20PA%2020140518.m4v',
+            'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh',
+            'only_matching': True,
+        }, {
+            'url': 'https://www.dropbox.com/sh/2mgpiuq7kv8nqdf/AABy-fW4dkydT4GmWi2mdOUDa?dl=0&preview=Drone+Shot.mp4',
+            'only_matching': True,
+        }, {
+            'url': 'https://www.dropbox.com/scl/fi/r2kd2skcy5ylbbta5y1pz/DJI_0003.MP4?dl=0&rlkey=wcdgqangn7t3lnmmv6li9mu9h',
+            'only_matching': True,
+        }, {
+            'url': 'https://www.dropbox.com/e/scl/fi/r2kd2skcy5ylbbta5y1pz/DJI_0003.MP4?dl=0&rlkey=wcdgqangn7t3lnmmv6li9mu9h',
             'only_matching': True,
         },
     ]
@@ -53,16 +62,25 @@ def _real_extract(self, url):
             else:
                 raise ExtractorError('Password protected video, use --video-password <password>', expected=True)
 
-        info_json = self._search_json(r'InitReact\.mountComponent\(.*?,', webpage, 'mountComponent', video_id,
-                                      contains_pattern=r'{.+?"preview".+?}', end_pattern=r'\)')['props']
-        transcode_url = traverse_obj(info_json, ((None, 'preview'), 'file', 'preview', 'content', 'transcode_url'), get_all=False)
-        formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id)
+        formats, subtitles, has_anonymous_download = [], {}, False
+        for encoded in reversed(re.findall(r'registerStreamedPrefetch\s*\(\s*"[\w/+=]+"\s*,\s*"([\w/+=]+)"', webpage)):
+            decoded = base64.b64decode(encoded).decode('utf-8', 'ignore')
+            transcode_url = self._search_regex(
+                r'\n\x03(https://[^\x12\x03\n]+\.m3u8)', decoded, 'transcode url', default=None)
+            if not transcode_url:
+                continue
+            formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id)
+            has_anonymous_download = self._search_regex(r'(anonymous:\tanonymous)', decoded, 'anonymous', default=False)
+            break
 
         # downloads enabled we can get the original file
-        if 'anonymous' in (try_get(info_json, lambda x: x['sharePermission']['canDownloadRoles']) or []):
-            video_url = re.sub(r'[?&]dl=0', '', url)
-            video_url += ('?' if '?' not in video_url else '&') + 'dl=1'
-            formats.append({'url': video_url, 'format_id': 'original', 'format_note': 'Original', 'quality': 1})
+        if has_anonymous_download:
+            formats.append({
+                'url': update_url_query(url, {'dl': '1'}),
+                'format_id': 'original',
+                'format_note': 'Original',
+                'quality': 1
+            })
 
         return {
             'id': video_id,

From 665876034c8d3c031443f6b4958bed02ccdf4164 Mon Sep 17 00:00:00 2001
From: Stavros Ntentos <133706+stdedos@users.noreply.github.com>
Date: Tue, 29 Aug 2023 03:05:49 +0300
Subject: [PATCH 112/218] [ie/antenna] Support antenna.gr (#7584)

Authored by: stdedos
---
 yt_dlp/extractor/_extractors.py               |  4 +-
 .../extractor/{ant1newsgr.py => antenna.py}   | 53 ++++++++++++-------
 2 files changed, 36 insertions(+), 21 deletions(-)
 rename yt_dlp/extractor/{ant1newsgr.py => antenna.py} (72%)

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 194ad8356f..f11554bddf 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1699,8 +1699,8 @@
     MegaTVComIE,
     MegaTVComEmbedIE,
 )
-from .ant1newsgr import (
-    Ant1NewsGrWatchIE,
+from .antenna import (
+    AntennaGrWatchIE,
     Ant1NewsGrArticleIE,
     Ant1NewsGrEmbedIE,
 )
diff --git a/yt_dlp/extractor/ant1newsgr.py b/yt_dlp/extractor/antenna.py
similarity index 72%
rename from yt_dlp/extractor/ant1newsgr.py
rename to yt_dlp/extractor/antenna.py
index 217e3acc43..c78717aa9e 100644
--- a/yt_dlp/extractor/ant1newsgr.py
+++ b/yt_dlp/extractor/antenna.py
@@ -5,22 +5,26 @@
 from ..utils import (
     ExtractorError,
     determine_ext,
+    make_archive_id,
     scale_thumbnails_to_max_format_width,
 )
 
 
-class Ant1NewsGrBaseIE(InfoExtractor):
+class AntennaBaseIE(InfoExtractor):
     def _download_and_extract_api_data(self, video_id, netloc, cid=None):
-        url = f'{self.http_scheme()}//{netloc}{self._API_PATH}'
-        info = self._download_json(url, video_id, query={'cid': cid or video_id})
-        try:
-            source = info['url']
-        except KeyError:
-            raise ExtractorError('no source found for %s' % video_id)
-        formats, subs = (self._extract_m3u8_formats_and_subtitles(source, video_id, 'mp4')
-                         if determine_ext(source) == 'm3u8' else ([{'url': source}], {}))
+        info = self._download_json(f'{self.http_scheme()}//{netloc}{self._API_PATH}',
+                                   video_id, query={'cid': cid or video_id})
+        if not info.get('url'):
+            raise ExtractorError(f'No source found for {video_id}')
+
+        ext = determine_ext(info['url'])
+        if ext == 'm3u8':
+            formats, subs = self._extract_m3u8_formats_and_subtitles(info['url'], video_id, 'mp4')
+        else:
+            formats, subs = [{'url': info['url'], 'format_id': ext}], {}
+
         thumbnails = scale_thumbnails_to_max_format_width(
-            formats, [{'url': info['thumb']}], r'(?<=/imgHandler/)\d+')
+            formats, [{'url': info['thumb']}], r'(?<=/imgHandler/)\d+') if info.get('thumb') else []
         return {
             'id': video_id,
             'title': info.get('title'),
@@ -30,21 +34,31 @@ def _download_and_extract_api_data(self, video_id, netloc, cid=None):
         }
 
 
-class Ant1NewsGrWatchIE(Ant1NewsGrBaseIE):
-    IE_NAME = 'ant1newsgr:watch'
-    IE_DESC = 'ant1news.gr videos'
-    _VALID_URL = r'https?://(?P<netloc>(?:www\.)?ant1news\.gr)/watch/(?P<id>\d+)/'
+class AntennaGrWatchIE(AntennaBaseIE):
+    IE_NAME = 'antenna:watch'
+    IE_DESC = 'antenna.gr and ant1news.gr videos'
+    _VALID_URL = r'https?://(?P<netloc>(?:www\.)?(?:antenna|ant1news)\.gr)/watch/(?P<id>\d+)/'
     _API_PATH = '/templates/data/player'
 
     _TESTS = [{
         'url': 'https://www.ant1news.gr/watch/1506168/ant1-news-09112021-stis-18-45',
-        'md5': '95925e6b32106754235f2417e0d2dfab',
+        'md5': 'c472d9dd7cd233c63aff2ea42201cda6',
         'info_dict': {
             'id': '1506168',
             'ext': 'mp4',
             'title': 'md5:0ad00fa66ecf8aa233d26ab0dba7514a',
             'description': 'md5:18665af715a6dcfeac1d6153a44f16b0',
-            'thumbnail': 'https://ant1media.azureedge.net/imgHandler/640/26d46bf6-8158-4f02-b197-7096c714b2de.jpg',
+            'thumbnail': r're:https://ant1media\.azureedge\.net/imgHandler/\d+/26d46bf6-8158-4f02-b197-7096c714b2de\.jpg',
+        },
+    }, {
+        'url': 'https://www.antenna.gr/watch/1643812/oi-prodotes-epeisodio-01',
+        'md5': '8f6f7dd3b1dba4d835ba990e25f31243',
+        'info_dict': {
+            'id': '1643812',
+            'ext': 'mp4',
+            'format_id': 'mp4',
+            'title': 'ΟΙ ΠΡΟΔΟΤΕΣ – ΕΠΕΙΣΟΔΙΟ 01',
+            'thumbnail': r're:https://ant1media\.azureedge\.net/imgHandler/\d+/b3d63096-e72d-43c4-87a0-00d4363d242f\.jpg',
         },
     }]
 
@@ -52,11 +66,12 @@ def _real_extract(self, url):
         video_id, netloc = self._match_valid_url(url).group('id', 'netloc')
         webpage = self._download_webpage(url, video_id)
         info = self._download_and_extract_api_data(video_id, netloc)
-        info['description'] = self._og_search_description(webpage)
+        info['description'] = self._og_search_description(webpage, default=None)
+        info['_old_archive_ids'] = [make_archive_id('Ant1NewsGrWatch', video_id)],
         return info
 
 
-class Ant1NewsGrArticleIE(Ant1NewsGrBaseIE):
+class Ant1NewsGrArticleIE(AntennaBaseIE):
     IE_NAME = 'ant1newsgr:article'
     IE_DESC = 'ant1news.gr articles'
     _VALID_URL = r'https?://(?:www\.)?ant1news\.gr/[^/]+/article/(?P<id>\d+)/'
@@ -96,7 +111,7 @@ def _real_extract(self, url):
             video_kwargs={'url_transparent': True, 'timestamp': info.get('timestamp')})
 
 
-class Ant1NewsGrEmbedIE(Ant1NewsGrBaseIE):
+class Ant1NewsGrEmbedIE(AntennaBaseIE):
     IE_NAME = 'ant1newsgr:embed'
     IE_DESC = 'ant1news.gr embedded videos'
     _BASE_PLAYER_URL_RE = r'(?:https?:)?//(?:[a-zA-Z0-9\-]+\.)?(?:antenna|ant1news)\.gr/templates/pages/player'

From 4b3a6ef1b3e235ba9a45142830b6edb357c71696 Mon Sep 17 00:00:00 2001
From: Omar Atef <85079143+Yalab7@users.noreply.github.com>
Date: Tue, 29 Aug 2023 03:49:29 +0300
Subject: [PATCH 113/218] [ie/hungama] Overhaul extractors (#7757)

Closes #7754
Authored by: Yalab7, bashonly
---
 yt_dlp/extractor/hungama.py | 109 +++++++++++++++++++++++++++---------
 1 file changed, 82 insertions(+), 27 deletions(-)

diff --git a/yt_dlp/extractor/hungama.py b/yt_dlp/extractor/hungama.py
index 2e9939601f..cdec36838e 100644
--- a/yt_dlp/extractor/hungama.py
+++ b/yt_dlp/extractor/hungama.py
@@ -1,19 +1,32 @@
-import re
-
 from .common import InfoExtractor
 from ..utils import (
     int_or_none,
+    remove_end,
+    traverse_obj,
     try_get,
+    unified_timestamp,
+    url_or_none,
     urlencode_postdata,
 )
 
 
-class HungamaIE(InfoExtractor):
+class HungamaBaseIE(InfoExtractor):
+    def _call_api(self, path, content_id, fatal=False):
+        return traverse_obj(self._download_json(
+            f'https://cpage.api.hungama.com/v2/page/content/{content_id}/{path}/detail',
+            content_id, fatal=fatal, query={
+                'device': 'web',
+                'platform': 'a',
+                'storeId': '1',
+            }), ('data', {dict})) or {}
+
+
+class HungamaIE(HungamaBaseIE):
     _VALID_URL = r'''(?x)
                     https?://
-                        (?:www\.)?hungama\.com/
+                        (?:www\.|un\.)?hungama\.com/
                         (?:
-                            (?:video|movie)/[^/]+/|
+                            (?:video|movie|short-film)/[^/]+/|
                             tv-show/(?:[^/]+/){2}\d+/episode/[^/]+/
                         )
                         (?P<id>\d+)
@@ -25,13 +38,28 @@ class HungamaIE(InfoExtractor):
             'id': '39349649',
             'ext': 'mp4',
             'title': 'Krishna Chants',
-            'description': 'Watch Krishna Chants video now. You can also watch other latest videos only at Hungama',
+            'description': ' ',
             'upload_date': '20180829',
             'duration': 264,
             'timestamp': 1535500800,
             'view_count': int,
-            'thumbnail': 'https://images.hungama.com/c/1/0dc/2ca/39349649/39349649_700x394.jpg',
-        }
+            'thumbnail': 'https://images1.hungama.com/tr:n-a_169_m/c/1/0dc/2ca/39349649/39349649_350x197.jpg?v=8',
+            'tags': 'count:6',
+        },
+    }, {
+        'url': 'https://un.hungama.com/short-film/adira/102524179/',
+        'md5': '2278463f5dc9db9054d0c02602d44666',
+        'info_dict': {
+            'id': '102524179',
+            'ext': 'mp4',
+            'title': 'Adira',
+            'description': 'md5:df20cd4d41eabb33634f06de1025a4b4',
+            'upload_date': '20230417',
+            'timestamp': 1681689600,
+            'view_count': int,
+            'thumbnail': 'https://images1.hungama.com/tr:n-a_23_m/c/1/197/ac9/102524179/102524179_350x525.jpg?v=1',
+            'tags': 'count:7',
+        },
     }, {
         'url': 'https://www.hungama.com/movie/kahaani-2/44129919/',
         'only_matching': True,
@@ -51,14 +79,19 @@ def _real_extract(self, url):
                 'c': 'common',
                 'm': 'get_video_mdn_url',
             })
-
         formats = self._extract_m3u8_formats(video_json['stream_url'], video_id, ext='mp4', m3u8_id='hls')
-
-        json_ld = self._search_json_ld(
-            self._download_webpage(url, video_id, fatal=False) or '', video_id, fatal=False)
+        metadata = self._call_api('movie', video_id)
 
         return {
-            **json_ld,
+            **traverse_obj(metadata, ('head', 'data', {
+                'title': ('title', {str}),
+                'description': ('misc', 'description', {str}),
+                'duration': ('duration', {int}),  # duration in JSON is incorrect if string
+                'timestamp': ('releasedate', {unified_timestamp}),
+                'view_count': ('misc', 'playcount', {int_or_none}),
+                'thumbnail': ('image', {url_or_none}),
+                'tags': ('misc', 'keywords', ..., {str}),
+            })),
             'id': video_id,
             'formats': formats,
             'subtitles': {
@@ -71,10 +104,10 @@ def _real_extract(self, url):
 
 
 class HungamaSongIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?hungama\.com/song/[^/]+/(?P<id>\d+)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.|un\.)?hungama\.com/song/[^/]+/(?P<id>\d+)'
+    _TESTS = [{
         'url': 'https://www.hungama.com/song/kitni-haseen-zindagi/2931166/',
-        'md5': 'd4a6a05a394ad0453a9bea3ca00e6024',
+        'md5': '964f46828e8b250aa35e5fdcfdcac367',
         'info_dict': {
             'id': '2931166',
             'ext': 'mp3',
@@ -83,8 +116,22 @@ class HungamaSongIE(InfoExtractor):
             'artist': 'Lucky Ali',
             'album': None,
             'release_year': 2000,
-        }
-    }
+            'thumbnail': 'https://stat2.hungama.ind.in/assets/images/default_images/da-200x200.png',
+        },
+    }, {
+        'url': 'https://un.hungama.com/song/tum-kya-mile-from-rocky-aur-rani-kii-prem-kahaani/103553672',
+        'md5': '964f46828e8b250aa35e5fdcfdcac367',
+        'info_dict': {
+            'id': '103553672',
+            'ext': 'mp3',
+            'title': 'md5:5ebeb1e10771b634ce5f700ce68ae5f4',
+            'track': 'Tum Kya Mile (From "Rocky Aur Rani Kii Prem Kahaani")',
+            'artist': 'Pritam Chakraborty, Arijit Singh, Shreya Ghoshal, Amitabh Bhattacharya',
+            'album': 'Tum Kya Mile (From "Rocky Aur Rani Kii Prem Kahaani")',
+            'release_year': 2023,
+            'thumbnail': 'https://images.hungama.com/c/1/7c2/c7b/103553671/103553671_200x200.jpg',
+        },
+    }]
 
     def _real_extract(self, url):
         audio_id = self._match_id(url)
@@ -122,8 +169,8 @@ def _real_extract(self, url):
         }
 
 
-class HungamaAlbumPlaylistIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?hungama\.com/(?:playlists|album)/[^/]+/(?P<id>\d+)'
+class HungamaAlbumPlaylistIE(HungamaBaseIE):
+    _VALID_URL = r'https?://(?:www\.|un\.)?hungama\.com/(?P<path>playlists|album)/[^/]+/(?P<id>\d+)'
     _TESTS = [{
         'url': 'https://www.hungama.com/album/bhuj-the-pride-of-india/69481490/',
         'playlist_mincount': 7,
@@ -132,16 +179,24 @@ class HungamaAlbumPlaylistIE(InfoExtractor):
         },
     }, {
         'url': 'https://www.hungama.com/playlists/hindi-jan-to-june-2021/123063/',
-        'playlist_mincount': 50,
+        'playlist_mincount': 33,
         'info_dict': {
             'id': '123063',
         },
+    }, {
+        'url': 'https://un.hungama.com/album/what-jhumka-%3F-from-rocky-aur-rani-kii-prem-kahaani/103891805/',
+        'playlist_mincount': 1,
+        'info_dict': {
+            'id': '103891805',
+        },
     }]
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-        ptrn = r'<meta[^>]+?property=[\"\']?music:song:url[\"\']?[^>]+?content=[\"\']?([^\"\']+)'
-        items = re.findall(ptrn, webpage)
-        entries = [self.url_result(item, ie=HungamaSongIE.ie_key()) for item in items]
-        return self.playlist_result(entries, video_id)
+        playlist_id, path = self._match_valid_url(url).group('id', 'path')
+        data = self._call_api(remove_end(path, 's'), playlist_id, fatal=True)
+
+        def entries():
+            for song_url in traverse_obj(data, ('body', 'rows', ..., 'data', 'misc', 'share', {url_or_none})):
+                yield self.url_result(song_url, HungamaSongIE)
+
+        return self.playlist_result(entries(), playlist_id)

From 099fb1b35cf835303306549f5113d1802d79c9c7 Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@bashonly.com>
Date: Tue, 29 Aug 2023 08:06:02 -0500
Subject: [PATCH 114/218] Bugfix for b9f2bc2dbed2323734a0d18e65e1e2e23dc833d8

Authored by: bashonly
---
 yt_dlp/extractor/dropbox.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/extractor/dropbox.py b/yt_dlp/extractor/dropbox.py
index ec86d7ad24..bc2efce123 100644
--- a/yt_dlp/extractor/dropbox.py
+++ b/yt_dlp/extractor/dropbox.py
@@ -66,10 +66,10 @@ def _real_extract(self, url):
         for encoded in reversed(re.findall(r'registerStreamedPrefetch\s*\(\s*"[\w/+=]+"\s*,\s*"([\w/+=]+)"', webpage)):
             decoded = base64.b64decode(encoded).decode('utf-8', 'ignore')
             transcode_url = self._search_regex(
-                r'\n\x03(https://[^\x12\x03\n]+\.m3u8)', decoded, 'transcode url', default=None)
+                r'\n.(https://[^\x03\x08\x12\n]+\.m3u8)', decoded, 'transcode url', default=None)
             if not transcode_url:
                 continue
-            formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id)
+            formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id, 'mp4')
             has_anonymous_download = self._search_regex(r'(anonymous:\tanonymous)', decoded, 'anonymous', default=False)
             break
 

From bae4834245a708fff97219849ec880c319c88bc6 Mon Sep 17 00:00:00 2001
From: RedDeffender <74822209+RedDeffender@users.noreply.github.com>
Date: Thu, 31 Aug 2023 01:26:45 +0200
Subject: [PATCH 115/218] [ie/NoodleMagazine] Fix extraction (#7830)

Closes #7917
Authored by: RedDeffender
---
 yt_dlp/extractor/noodlemagazine.py | 31 ++++++++++++++++++++----------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/yt_dlp/extractor/noodlemagazine.py b/yt_dlp/extractor/noodlemagazine.py
index e6208956fb..1cea0dbda9 100644
--- a/yt_dlp/extractor/noodlemagazine.py
+++ b/yt_dlp/extractor/noodlemagazine.py
@@ -1,9 +1,14 @@
 from .common import InfoExtractor
 from ..utils import (
-    parse_duration,
+    extract_attributes,
+    get_element_html_by_id,
+    int_or_none,
     parse_count,
-    unified_strdate
+    parse_duration,
+    unified_strdate,
+    urljoin,
 )
+from ..utils.traversal import traverse_obj
 
 
 class NoodleMagazineIE(InfoExtractor):
@@ -37,15 +42,21 @@ def _real_extract(self, url):
         like_count = parse_count(self._html_search_meta('ya:ovs:likes', webpage, default=None))
         upload_date = unified_strdate(self._html_search_meta('ya:ovs:upload_date', webpage, default=''))
 
-        key = self._html_search_regex(rf'/{video_id}\?(?:.*&)?m=([^&"\'\s,]+)', webpage, 'key')
-        playlist_info = self._download_json(f'https://adult.noodlemagazine.com/playlist/{video_id}?m={key}', video_id)
-        thumbnail = self._og_search_property('image', webpage, default=None) or playlist_info.get('image')
+        player_path = extract_attributes(get_element_html_by_id('iplayer', webpage) or '')['src']
+        player_iframe = self._download_webpage(
+            urljoin('https://adult.noodlemagazine.com', player_path), video_id, 'Downloading iframe page')
+        playlist_url = self._search_regex(
+            r'window\.playlistUrl\s*=\s*["\']([^"\']+)["\']', player_iframe, 'playlist url')
+        playlist_info = self._download_json(
+            urljoin('https://adult.noodlemagazine.com', playlist_url), video_id, headers={'Referer': url})
 
-        formats = [{
-            'url': source.get('file'),
-            'quality': source.get('label'),
-            'ext': source.get('type'),
-        } for source in playlist_info.get('sources')]
+        thumbnail = self._og_search_property('image', webpage, default=None) or playlist_info.get('image')
+        formats = traverse_obj(playlist_info, ('sources', lambda _, v: v['file'], {
+            'url': 'file',
+            'format_id': 'label',
+            'height': ('label', {int_or_none}),
+            'ext': 'type',
+        }))
 
         return {
             'id': video_id,

From 630a55df8de7747e79aa680959d785dfff2c4b76 Mon Sep 17 00:00:00 2001
From: Grabien <60237587+Grabien@users.noreply.github.com>
Date: Thu, 31 Aug 2023 02:49:42 +0300
Subject: [PATCH 116/218] [ie/Mediaite] Fix extraction (#7923)

Authored by: Grabien
---
 yt_dlp/extractor/mediaite.py | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/extractor/mediaite.py b/yt_dlp/extractor/mediaite.py
index 0f9079b112..ab253920b6 100644
--- a/yt_dlp/extractor/mediaite.py
+++ b/yt_dlp/extractor/mediaite.py
@@ -81,10 +81,24 @@ class MediaiteIE(InfoExtractor):
             'upload_date': '20210930',
         },
         'params': {'skip_download': True}
+    }, {
+        'url': 'https://www.mediaite.com/politics/i-cant-read-it-fast-enough-while-defending-trump-larry-kudlow-overwhelmed-by-volume-of-ex-presidents-legal-troubles/',
+        'info_dict': {
+            'id': 'E6EhDX5z',
+            'ext': 'mp4',
+            'title': 'Fox Business Network - 4:00 PM - 5:00 PM - 1:39:42 pm - 1:42:20 pm',
+            'description': '',
+            'thumbnail': 'https://cdn.jwplayer.com/v2/media/E6EhDX5z/poster.jpg?width=720',
+            'duration': 157,
+            'timestamp': 1691015535,
+            'upload_date': '20230802',
+        },
+        'params': {'skip_download': True}
     }]
 
     def _real_extract(self, url):
         webpage = self._download_webpage(url, None)
-        id = self._search_regex(r'data-video-id\s?=\s?\"([^\"]+)\"', webpage, 'id')
-        data_json = self._download_json(f'https://cdn.jwplayer.com/v2/media/{id}', id)
+        video_id = self._search_regex(
+            [r'"https://cdn\.jwplayer\.com/players/(\w+)', r'data-video-id\s*=\s*\"([^\"]+)\"'], webpage, 'id')
+        data_json = self._download_json(f'https://cdn.jwplayer.com/v2/media/{video_id}', video_id)
         return self._parse_jwplayer_data(data_json)

From 30ea88591b728cca0896018dbf67c2298070c669 Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@bashonly.com>
Date: Thu, 31 Aug 2023 15:45:11 -0500
Subject: [PATCH 117/218] [ie/hotstar] Make metadata extraction non-fatal

Authored by: bashonly
---
 yt_dlp/extractor/hotstar.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py
index cdd9379416..6cadfb5b7d 100644
--- a/yt_dlp/extractor/hotstar.py
+++ b/yt_dlp/extractor/hotstar.py
@@ -200,8 +200,10 @@ def _real_extract(self, url):
         video_type = self._TYPE.get(video_type, video_type)
         cookies = self._get_cookies(url)  # Cookies before any request
 
-        video_data = self._call_api_v1(f'{video_type}/detail', video_id,
-                                       query={'tas': 10000, 'contentId': video_id})['body']['results']['item']
+        video_data = traverse_obj(
+            self._call_api_v1(
+                f'{video_type}/detail', video_id, fatal=False, query={'tas': 10000, 'contentId': video_id}),
+            ('body', 'results', 'item', {dict})) or {}
         if not self.get_param('allow_unplayable_formats') and video_data.get('drmProtected'):
             self.report_drm(video_id)
 

From 7237c8dca0590aa7438ade93f927df88c9381ec7 Mon Sep 17 00:00:00 2001
From: Rajeshwaran <54212165+Rajeshwaran2001@users.noreply.github.com>
Date: Fri, 1 Sep 2023 02:18:52 +0530
Subject: [PATCH 118/218] [ie/hotstar] Extract `release_year` (#7869)

Authored by: Rajeshwaran2001
---
 yt_dlp/extractor/hotstar.py | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py
index 6cadfb5b7d..541792b908 100644
--- a/yt_dlp/extractor/hotstar.py
+++ b/yt_dlp/extractor/hotstar.py
@@ -142,6 +142,26 @@ class HotStarIE(HotStarBaseIE):
             'duration': 1272,
             'channel_id': 3,
         },
+        'skip': 'HTTP Error 504: Gateway Time-out',  # XXX: Investigate 504 errors on some episodes
+    }, {
+        'url': 'https://www.hotstar.com/in/shows/kana-kaanum-kaalangal/1260097087/back-to-school/1260097320',
+        'info_dict': {
+            'id': '1260097320',
+            'ext': 'mp4',
+            'title': 'Back To School',
+            'season': 'Chapter 1',
+            'description': 'md5:b0d6a4c8a650681491e7405496fc7e13',
+            'timestamp': 1650564000,
+            'channel': 'Hotstar Specials',
+            'series': 'Kana Kaanum Kaalangal',
+            'season_number': 1,
+            'season_id': 9441,
+            'upload_date': '20220421',
+            'episode': 'Back To School',
+            'episode_number': 1,
+            'duration': 1810,
+            'channel_id': 54,
+        },
     }, {
         'url': 'https://www.hotstar.com/in/clips/e3-sairat-kahani-pyaar-ki/1000262286',
         'info_dict': {
@@ -154,6 +174,19 @@ class HotStarIE(HotStarBaseIE):
             'timestamp': 1622943900,
             'duration': 5395,
         },
+    }, {
+        'url': 'https://www.hotstar.com/in/movies/premam/1000091195',
+        'info_dict': {
+            'id': '1000091195',
+            'ext': 'mp4',
+            'title': 'Premam',
+            'release_year': 2015,
+            'description': 'md5:d833c654e4187b5e34757eafb5b72d7f',
+            'timestamp': 1462149000,
+            'upload_date': '20160502',
+            'episode': 'Premam',
+            'duration': 8994,
+        },
     }, {
         'url': 'https://www.hotstar.com/movies/radha-gopalam/1000057157',
         'only_matching': True,
@@ -288,6 +321,7 @@ def _real_extract(self, url):
             'description': video_data.get('description'),
             'duration': int_or_none(video_data.get('duration')),
             'timestamp': int_or_none(traverse_obj(video_data, 'broadcastDate', 'startDate')),
+            'release_year': int_or_none(video_data.get('year')),
             'formats': formats,
             'subtitles': subs,
             'channel': video_data.get('channelName'),

From 77bff23ee97565bab2e0d75b893a21bf7983219a Mon Sep 17 00:00:00 2001
From: Simon Sawicki <contact@grub4k.xyz>
Date: Sat, 2 Sep 2023 15:18:04 +0200
Subject: [PATCH 119/218] Bugfix for 59e92b1f1833440bb2190f847eb735cf0f90bc85

Closes #8012

Authored by: Grub4K
---
 yt_dlp/networking/_urllib.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/yt_dlp/networking/_urllib.py b/yt_dlp/networking/_urllib.py
index 5a804d99b4..b3e705b844 100644
--- a/yt_dlp/networking/_urllib.py
+++ b/yt_dlp/networking/_urllib.py
@@ -156,6 +156,8 @@ def brotli(data):
     def gz(data):
         # There may be junk added the end of the file
         # We ignore it by only ever decoding a single gzip payload
+        if not data:
+            return data
         return zlib.decompress(data, wbits=zlib.MAX_WBITS | 16)
 
     def http_request(self, req):

From 2301b5c1b77a65abbb46b72f91e1e4666fd5d985 Mon Sep 17 00:00:00 2001
From: Mattias Wadman <mattias.wadman@gmail.com>
Date: Sat, 2 Sep 2023 16:40:11 +0200
Subject: [PATCH 120/218] [ie/SVTPlay] Fix extraction (#7789)

Closes #5595
Authored by: wader, dirkf
---
 yt_dlp/extractor/svt.py | 61 +++++++++++++++++++++++++++++++++--------
 1 file changed, 50 insertions(+), 11 deletions(-)

diff --git a/yt_dlp/extractor/svt.py b/yt_dlp/extractor/svt.py
index 31bf7f97e6..18da87534f 100644
--- a/yt_dlp/extractor/svt.py
+++ b/yt_dlp/extractor/svt.py
@@ -1,3 +1,4 @@
+import json
 import re
 
 from .common import InfoExtractor
@@ -6,10 +7,11 @@
     determine_ext,
     dict_get,
     int_or_none,
-    unified_timestamp,
     str_or_none,
     strip_or_none,
+    traverse_obj,
     try_get,
+    unified_timestamp,
 )
 
 
@@ -163,10 +165,46 @@ class SVTPlayIE(SVTPlayBaseIE):
             },
         },
         'params': {
-            # skip for now due to download test asserts that segment is > 10000 bytes and svt uses
-            # init segments that are smaller
-            # AssertionError: Expected test_SVTPlay_jNwpV9P.mp4 to be at least 9.77KiB, but it's only 864.00B
-            'skip_download': True,
+            'skip_download': 'm3u8',
+        },
+        'skip': 'Episode is no longer available',
+    }, {
+        'url': 'https://www.svtplay.se/video/emBxBQj',
+        'md5': '2382036fd6f8c994856c323fe51c426e',
+        'info_dict': {
+            'id': 'eyBd9aj',
+            'ext': 'mp4',
+            'title': '1. Farlig kryssning',
+            'timestamp': 1491019200,
+            'upload_date': '20170401',
+            'duration': 2566,
+            'thumbnail': r're:^https?://(?:.*[\.-]jpg|www.svtstatic.se/image/.*)$',
+            'age_limit': 0,
+            'episode': '1. Farlig kryssning',
+            'series': 'Rederiet',
+            'subtitles': {
+                'sv': 'count:3'
+            },
+        },
+        'params': {
+            'skip_download': 'm3u8',
+        },
+    }, {
+        'url': 'https://www.svtplay.se/video/jz2rYz7/anders-hansen-moter/james-fallon?info=visa',
+        'info_dict': {
+            'id': 'jvXAGVb',
+            'ext': 'mp4',
+            'title': 'James Fallon',
+            'timestamp': 1673917200,
+            'upload_date': '20230117',
+            'duration': 1081,
+            'thumbnail': r're:^https?://(?:.*[\.-]jpg|www.svtstatic.se/image/.*)$',
+            'age_limit': 0,
+            'episode': 'James Fallon',
+            'series': 'Anders Hansen möter...',
+        },
+        'params': {
+            'skip_download': 'dash',
         },
     }, {
         'url': 'https://www.svtplay.se/video/30479064/husdrommar/husdrommar-sasong-8-designdrommar-i-stenungsund?modalId=8zVbDPA',
@@ -247,15 +285,16 @@ def _real_extract(self, url):
                 data, lambda x: x['statistics']['dataLake']['content']['id'],
                 compat_str)
 
+        if not svt_id:
+            nextjs_data = self._search_nextjs_data(webpage, video_id, fatal=False)
+            svt_id = traverse_obj(nextjs_data, (
+                'props', 'urqlState', ..., 'data', {json.loads}, 'detailsPageByPath',
+                'video', 'svtId', {str}), get_all=False)
+
         if not svt_id:
             svt_id = self._search_regex(
                 (r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)',
-                 r'<[^>]+\bdata-rt=["\']top-area-play-button["\'][^>]+\bhref=["\'][^"\']*video/%s/[^"\']*\b(?:modalId|id)=([\da-zA-Z-]+)' % re.escape(video_id),
-                 r'["\']videoSvtId["\']\s*:\s*["\']([\da-zA-Z-]+)',
-                 r'["\']videoSvtId\\?["\']\s*:\s*\\?["\']([\da-zA-Z-]+)',
-                 r'"content"\s*:\s*{.*?"id"\s*:\s*"([\da-zA-Z-]+)"',
-                 r'["\']svtId["\']\s*:\s*["\']([\da-zA-Z-]+)',
-                 r'["\']svtId\\?["\']\s*:\s*\\?["\']([\da-zA-Z-]+)'),
+                 r'<[^>]+\bdata-rt=["\']top-area-play-button["\'][^>]+\bhref=["\'][^"\']*video/[\w-]+/[^"\']*\b(?:modalId|id)=([\w-]+)'),
                 webpage, 'video id')
 
         info_dict = self._extract_by_video_id(svt_id, webpage)

From 69dbfe01c47cd078682a87f179f5846e2679e927 Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@bashonly.com>
Date: Mon, 4 Sep 2023 11:18:59 -0500
Subject: [PATCH 121/218] Bugfix for bae4834245a708fff97219849ec880c319c88bc6

Authored by: bashonly
---
 yt_dlp/extractor/noodlemagazine.py | 35 ++++++++++++++++++------------
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/yt_dlp/extractor/noodlemagazine.py b/yt_dlp/extractor/noodlemagazine.py
index 1cea0dbda9..1c1a763dc2 100644
--- a/yt_dlp/extractor/noodlemagazine.py
+++ b/yt_dlp/extractor/noodlemagazine.py
@@ -1,7 +1,5 @@
 from .common import InfoExtractor
 from ..utils import (
-    extract_attributes,
-    get_element_html_by_id,
     int_or_none,
     parse_count,
     parse_duration,
@@ -42,27 +40,36 @@ def _real_extract(self, url):
         like_count = parse_count(self._html_search_meta('ya:ovs:likes', webpage, default=None))
         upload_date = unified_strdate(self._html_search_meta('ya:ovs:upload_date', webpage, default=''))
 
-        player_path = extract_attributes(get_element_html_by_id('iplayer', webpage) or '')['src']
+        def build_url(url_or_path):
+            return urljoin('https://adult.noodlemagazine.com', url_or_path)
+
+        headers = {'Referer': url}
+        player_path = self._html_search_regex(
+            r'<iframe[^>]+\bid="iplayer"[^>]+\bsrc="([^"]+)"', webpage, 'player path')
         player_iframe = self._download_webpage(
-            urljoin('https://adult.noodlemagazine.com', player_path), video_id, 'Downloading iframe page')
+            build_url(player_path), video_id, 'Downloading iframe page', headers=headers)
         playlist_url = self._search_regex(
             r'window\.playlistUrl\s*=\s*["\']([^"\']+)["\']', player_iframe, 'playlist url')
-        playlist_info = self._download_json(
-            urljoin('https://adult.noodlemagazine.com', playlist_url), video_id, headers={'Referer': url})
+        playlist_info = self._download_json(build_url(playlist_url), video_id, headers=headers)
 
-        thumbnail = self._og_search_property('image', webpage, default=None) or playlist_info.get('image')
-        formats = traverse_obj(playlist_info, ('sources', lambda _, v: v['file'], {
-            'url': 'file',
-            'format_id': 'label',
-            'height': ('label', {int_or_none}),
-            'ext': 'type',
-        }))
+        formats = []
+        for source in traverse_obj(playlist_info, ('sources', lambda _, v: v['file'])):
+            if source.get('type') == 'hls':
+                formats.extend(self._extract_m3u8_formats(
+                    build_url(source['file']), video_id, 'mp4', fatal=False, m3u8_id='hls'))
+            else:
+                formats.append(traverse_obj(source, {
+                    'url': ('file', {build_url}),
+                    'format_id': 'label',
+                    'height': ('label', {int_or_none}),
+                    'ext': 'type',
+                }))
 
         return {
             'id': video_id,
             'formats': formats,
             'title': title,
-            'thumbnail': thumbnail,
+            'thumbnail': self._og_search_property('image', webpage, default=None) or playlist_info.get('image'),
             'duration': duration,
             'description': description,
             'tags': tags,

From c6ef553792ed48462f9fd0e78143bef6b1a71c2e Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@bashonly.com>
Date: Tue, 5 Sep 2023 01:54:14 -0500
Subject: [PATCH 122/218] [ie/twitter:spaces] Pass referer header to downloader

Closes #8029
Authored by: bashonly
---
 yt_dlp/extractor/twitter.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py
index 34b8625c31..f86216f8ff 100644
--- a/yt_dlp/extractor/twitter.py
+++ b/yt_dlp/extractor/twitter.py
@@ -1618,6 +1618,7 @@ def _real_extract(self, url):
         is_live = live_status == 'is_live'
 
         formats = []
+        headers = {'Referer': 'https://twitter.com/'}
         if live_status == 'is_upcoming':
             self.raise_no_formats('Twitter Space not started yet', expected=True)
         elif not is_live and not metadata.get('is_space_available_for_replay'):
@@ -1628,7 +1629,7 @@ def _real_extract(self, url):
                 ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False)
             formats = self._extract_m3u8_formats(  # XXX: Some Spaces need ffmpeg as downloader
                 source, metadata['media_key'], 'm4a', entry_protocol='m3u8', live=is_live,
-                headers={'Referer': 'https://twitter.com/'}, fatal=False) if source else []
+                headers=headers, fatal=False) if source else []
             for fmt in formats:
                 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
                 if not is_live:
@@ -1653,6 +1654,7 @@ def _real_extract(self, url):
                 lambda: int_or_none(metadata['scheduled_start'], scale=1000)),
             'timestamp': int_or_none(metadata.get('created_at'), scale=1000),
             'formats': formats,
+            'http_headers': headers,
         }
 
 

From 99c99c7185f5d8e9b3699a6fc7f86ec663d7b97e Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@bashonly.com>
Date: Tue, 5 Sep 2023 14:58:02 -0500
Subject: [PATCH 123/218] [ie/gofile] Update token

Closes #7235
Authored by: bashonly
---
 yt_dlp/extractor/gofile.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/gofile.py b/yt_dlp/extractor/gofile.py
index ddbce2ee8f..8983905839 100644
--- a/yt_dlp/extractor/gofile.py
+++ b/yt_dlp/extractor/gofile.py
@@ -66,7 +66,7 @@ def _entries(self, file_id):
         query_params = {
             'contentId': file_id,
             'token': self._TOKEN,
-            'websiteToken': 12345,
+            'websiteToken': '7fd94ds12fds4',  # From https://gofile.io/dist/js/alljs.js
         }
         password = self.get_param('videopassword')
         if password:

From d3d81cc98f554d0adb87d24bfd6fabaaa803944d Mon Sep 17 00:00:00 2001
From: ringus1 <ringus1@users.noreply.github.com>
Date: Tue, 5 Sep 2023 22:35:23 +0200
Subject: [PATCH 124/218] [ie/facebook] Fix webpage extraction (#7890)

Closes #7901
Authored by: ringus1
---
 yt_dlp/extractor/facebook.py | 41 +++++++++++++++++++++++++-----------
 1 file changed, 29 insertions(+), 12 deletions(-)

diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py
index 4fd17b5743..c30a6b06a0 100644
--- a/yt_dlp/extractor/facebook.py
+++ b/yt_dlp/extractor/facebook.py
@@ -74,6 +74,22 @@ class FacebookIE(InfoExtractor):
     _VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary'
 
     _TESTS = [{
+        'url': 'https://www.facebook.com/radiokicksfm/videos/3676516585958356/',
+        'info_dict': {
+            'id': '3676516585958356',
+            'ext': 'mp4',
+            'title': 'dr Adam Przygoda',
+            'description': 'md5:34675bda53336b1d16400265c2bb9b3b',
+            'uploader': 'RADIO KICKS FM',
+            'upload_date': '20230818',
+            'timestamp': 1692346159,
+            'thumbnail': r're:^https?://.*',
+            'uploader_id': '100063551323670',
+            'duration': 3132.184,
+            'view_count': int,
+            'concurrent_view_count': 0,
+        },
+    }, {
         'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
         'md5': '6a40d33c0eccbb1af76cf0485a052659',
         'info_dict': {
@@ -97,7 +113,7 @@ class FacebookIE(InfoExtractor):
             'upload_date': '20140506',
             'timestamp': 1399398998,
             'thumbnail': r're:^https?://.*',
-            'uploader_id': 'pfbid04scW44U4P9iTyLZAGy8y8W3pR3i2VugvHCimiRudUAVbN3MPp9eXBaYFcgVworZwl',
+            'uploader_id': 'pfbid028wxorhX2ErLFJ578N6P3crHD3PHmXTCqCvfBpsnbSLmbokwSY75p5hWBjHGkG4zxl',
             'duration': 131.03,
             'concurrent_view_count': int,
         },
@@ -179,7 +195,7 @@ class FacebookIE(InfoExtractor):
             'timestamp': 1486648217,
             'upload_date': '20170209',
             'uploader': 'Yaroslav Korpan',
-            'uploader_id': 'pfbid029y8j22EwH3ikeqgH3SEP9G3CAi9kmWKgXJJG9s5geV7mo3J2bvURqHCdgucRgAyhl',
+            'uploader_id': 'pfbid06AScABAWcW91qpiuGrLt99Ef9tvwHoXP6t8KeFYEqkSfreMtfa9nTveh8b2ZEVSWl',
             'concurrent_view_count': int,
             'thumbnail': r're:^https?://.*',
             'view_count': int,
@@ -274,7 +290,7 @@ class FacebookIE(InfoExtractor):
             'title': 'Josef',
             'thumbnail': r're:^https?://.*',
             'concurrent_view_count': int,
-            'uploader_id': 'pfbid02gXHbDwxumkaKJQaTGUf3znYfYzTuidGEWawiramNx4YamSj2afwYSRkpcjtHtMRJl',
+            'uploader_id': 'pfbid0cibUN6tV7DYgdbJdsUFN46wc4jKpVSPAvJQhFofGqBGmVn3V3JtAs2tfUwziw2hUl',
             'timestamp': 1549275572,
             'duration': 3.413,
             'uploader': 'Josef Novak',
@@ -401,9 +417,9 @@ def _extract_from_url(self, url, video_id):
 
         def extract_metadata(webpage):
             post_data = [self._parse_json(j, video_id, fatal=False) for j in re.findall(
-                r'handleWithCustomApplyEach\(\s*ScheduledApplyEach\s*,\s*(\{.+?\})\s*\);', webpage)]
+                r'data-sjs>({.*?ScheduledServerJS.*?})</script>', webpage)]
             post = traverse_obj(post_data, (
-                ..., 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or []
+                ..., 'require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or []
             media = traverse_obj(post, (..., 'attachments', ..., lambda k, v: (
                 k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict)
             title = get_first(media, ('title', 'text'))
@@ -493,14 +509,14 @@ def process_formats(info):
 
         def extract_relay_data(_filter):
             return self._parse_json(self._search_regex(
-                r'handleWithCustomApplyEach\([^,]+,\s*({.*?%s.*?})\);' % _filter,
+                r'data-sjs>({.*?%s.*?})</script>' % _filter,
                 webpage, 'replay data', default='{}'), video_id, fatal=False) or {}
 
         def extract_relay_prefetched_data(_filter):
-            replay_data = extract_relay_data(_filter)
-            for require in (replay_data.get('require') or []):
-                if require[0] == 'RelayPrefetchedStreamCache':
-                    return try_get(require, lambda x: x[3][1]['__bbox']['result']['data'], dict) or {}
+            return traverse_obj(extract_relay_data(_filter), (
+                'require', (None, (..., ..., ..., '__bbox', 'require')),
+                lambda _, v: 'RelayPrefetchedStreamCache' in v, ..., ...,
+                '__bbox', 'result', 'data', {dict}), get_all=False) or {}
 
         if not video_data:
             server_js_data = self._parse_json(self._search_regex([
@@ -511,7 +527,7 @@ def extract_relay_prefetched_data(_filter):
 
         if not video_data:
             data = extract_relay_prefetched_data(
-                r'"(?:dash_manifest|playable_url(?:_quality_hd)?)"\s*:\s*"[^"]+"')
+                r'"(?:dash_manifest|playable_url(?:_quality_hd)?)')
             if data:
                 entries = []
 
@@ -526,7 +542,8 @@ def parse_graphql_video(video):
                     formats = []
                     q = qualities(['sd', 'hd'])
                     for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'),
-                                           ('playable_url_dash', '')):
+                                           ('playable_url_dash', ''), ('browser_native_hd_url', 'hd'),
+                                           ('browser_native_sd_url', 'sd')):
                         playable_url = video.get(key)
                         if not playable_url:
                             continue

From fe371dcf0ba5ce8d42480eade54eeeac99ab3cb0 Mon Sep 17 00:00:00 2001
From: ifan-t <jacifan2000@gmail.com>
Date: Fri, 8 Sep 2023 13:25:43 +0100
Subject: [PATCH 125/218] [ie/S4C] Add series support and extract subs/thumbs
 (#7776)

Authored by: ifan-t
---
 yt_dlp/extractor/_extractors.py |  5 ++-
 yt_dlp/extractor/s4c.py         | 57 +++++++++++++++++++++++++++++----
 2 files changed, 54 insertions(+), 8 deletions(-)

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index f11554bddf..b788737a2d 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1710,7 +1710,10 @@
     RuvIE,
     RuvSpilaIE
 )
-from .s4c import S4CIE
+from .s4c import (
+    S4CIE,
+    S4CSeriesIE
+)
 from .safari import (
     SafariIE,
     SafariApiIE,
diff --git a/yt_dlp/extractor/s4c.py b/yt_dlp/extractor/s4c.py
index 38a9058960..990ea2b447 100644
--- a/yt_dlp/extractor/s4c.py
+++ b/yt_dlp/extractor/s4c.py
@@ -1,5 +1,5 @@
 from .common import InfoExtractor
-from ..utils import traverse_obj
+from ..utils import traverse_obj, url_or_none
 
 
 class S4CIE(InfoExtractor):
@@ -11,7 +11,8 @@ class S4CIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Y Swn',
             'description': 'md5:f7681a30e4955b250b3224aa9fe70cf0',
-            'duration': 5340
+            'duration': 5340,
+            'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Y_Swn_2023S4C_099_ii.jpg'
         },
     }, {
         'url': 'https://www.s4c.cymru/clic/programme/856636948',
@@ -21,6 +22,7 @@ class S4CIE(InfoExtractor):
             'title': 'Am Dro',
             'duration': 2880,
             'description': 'md5:100d8686fc9a632a0cb2db52a3433ffe',
+            'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Am_Dro_2022-23S4C_P6_4005.jpg'
         },
     }]
 
@@ -30,7 +32,7 @@ def _real_extract(self, url):
             f'https://www.s4c.cymru/df/full_prog_details?lang=e&programme_id={video_id}',
             video_id, fatal=False)
 
-        filename = self._download_json(
+        player_config = self._download_json(
             'https://player-api.s4c-cdn.co.uk/player-configuration/prod', video_id, query={
                 'programme_id': video_id,
                 'signed': '0',
@@ -38,7 +40,13 @@ def _real_extract(self, url):
                 'mode': 'od',
                 'appId': 'clic',
                 'streamName': '',
-            }, note='Downloading player config JSON')['filename']
+            }, note='Downloading player config JSON')
+        subtitles = {}
+        for sub in traverse_obj(player_config, ('subtitles', lambda _, v: url_or_none(v['0']))):
+            subtitles.setdefault(sub.get('3', 'en'), []).append({
+                'url': sub['0'],
+                'name': sub.get('1'),
+            })
         m3u8_url = self._download_json(
             'https://player-api.s4c-cdn.co.uk/streaming-urls/prod', video_id, query={
                 'mode': 'od',
@@ -46,17 +54,52 @@ def _real_extract(self, url):
                 'region': 'WW',
                 'extra': 'false',
                 'thirdParty': 'false',
-                'filename': filename,
+                'filename': player_config['filename'],
             }, note='Downloading streaming urls JSON')['hls']
-        formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')
 
         return {
             'id': video_id,
-            'formats': formats,
+            'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls'),
             'subtitles': subtitles,
+            'thumbnail': url_or_none(player_config.get('poster')),
             **traverse_obj(details, ('full_prog_details', 0, {
                 'title': (('programme_title', 'series_title'), {str}),
                 'description': ('full_billing', {str.strip}),
                 'duration': ('duration', {lambda x: int(x) * 60}),
             }), get_all=False),
         }
+
+
+class S4CSeriesIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?s4c\.cymru/clic/series/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://www.s4c.cymru/clic/series/864982911',
+        'playlist_mincount': 6,
+        'info_dict': {
+            'id': '864982911',
+            'title': 'Iaith ar Daith',
+            'description': 'md5:e878ebf660dce89bd2ef521d7ce06397'
+        },
+    }, {
+        'url': 'https://www.s4c.cymru/clic/series/866852587',
+        'playlist_mincount': 8,
+        'info_dict': {
+            'id': '866852587',
+            'title': 'FFIT Cymru',
+            'description': 'md5:abcb3c129cb68dbb6cd304fd33b07e96'
+        },
+    }]
+
+    def _real_extract(self, url):
+        series_id = self._match_id(url)
+        series_details = self._download_json(
+            'https://www.s4c.cymru/df/series_details', series_id, query={
+                'lang': 'e',
+                'series_id': series_id,
+                'show_prog_in_series': 'Y'
+            }, note='Downloading series details JSON')
+
+        return self.playlist_result(
+            [self.url_result(f'https://www.s4c.cymru/clic/programme/{episode_id}', S4CIE, episode_id)
+             for episode_id in traverse_obj(series_details, ('other_progs_in_series', ..., 'id'))],
+            series_id, traverse_obj(series_details, ('full_prog_details', 0, 'series_title', {str})))

From 5d0395498d7065aa5e55bac85fa9354b4b0d48eb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Szaby=20Gr=C3=BCnwald?= <szaby.gruenwald@web.de>
Date: Fri, 8 Sep 2023 14:54:41 +0200
Subject: [PATCH 126/218] [ie/wdr] Fix extraction (#7979)

Closes #7461
Authored by: szabyg
---
 yt_dlp/extractor/wdr.py | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/yt_dlp/extractor/wdr.py b/yt_dlp/extractor/wdr.py
index de5dc26667..6767f26544 100644
--- a/yt_dlp/extractor/wdr.py
+++ b/yt_dlp/extractor/wdr.py
@@ -173,6 +173,7 @@ class WDRPageIE(WDRIE):  # XXX: Do not subclass from concrete IE
             'skip': 'HTTP Error 404: Not Found',
         },
         {
+            # FIXME: Asset JSON is directly embedded in webpage
             'url': 'http://www1.wdr.de/mediathek/video/live/index.html',
             'info_dict': {
                 'id': 'mdb-2296252',
@@ -221,6 +222,8 @@ class WDRPageIE(WDRIE):  # XXX: Do not subclass from concrete IE
                 'id': 'mdb-869971',
                 'ext': 'mp4',
                 'title': r're:^COSMO Livestream [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+                'alt_title': 'COSMO Livestream',
+                'live_status': 'is_live',
                 'upload_date': '20160101',
             },
             'params': {
@@ -248,6 +251,16 @@ class WDRPageIE(WDRIE):  # XXX: Do not subclass from concrete IE
             'url': 'https://kinder.wdr.de/tv/die-sendung-mit-dem-elefanten/av/video-folge---astronaut-100.html',
             'only_matching': True,
         },
+        {
+            'url': 'https://www1.wdr.de/mediathek/video/sendungen/rockpalast/video-baroness---freak-valley-festival--100.html',
+            'info_dict': {
+                'id': 'mdb-2741028',
+                'ext': 'mp4',
+                'title': 'Baroness - Freak Valley Festival 2022',
+                'alt_title': 'Rockpalast',
+                'upload_date': '20220725',
+            },
+        }
     ]
 
     def _real_extract(self, url):
@@ -259,7 +272,7 @@ def _real_extract(self, url):
 
         # Article with several videos
 
-        # for wdr.de the data-extension is in a tag with the class "mediaLink"
+        # for wdr.de the data-extension-ard is in a tag with the class "mediaLink"
         # for wdr.de radio players, in a tag with the class "wdrrPlayerPlayBtn"
         # for wdrmaus, in a tag with the class "videoButton" (previously a link
         # to the page in a multiline "videoLink"-tag)
@@ -268,7 +281,7 @@ def _real_extract(self, url):
                     (?:
                         (["\'])(?:mediaLink|wdrrPlayerPlayBtn|videoButton)\b.*?\1[^>]+|
                         (["\'])videoLink\b.*?\2[\s]*>\n[^\n]*
-                    )data-extension=(["\'])(?P<data>(?:(?!\3).)+)\3
+                    )data-extension(?:-ard)?=(["\'])(?P<data>(?:(?!\3).)+)\3
                     ''', webpage):
             media_link_obj = self._parse_json(
                 mobj.group('data'), display_id, transform_source=js_to_json,
@@ -295,7 +308,7 @@ def _real_extract(self, url):
                     compat_urlparse.urljoin(url, mobj.group('href')),
                     ie=WDRPageIE.ie_key())
                 for mobj in re.finditer(
-                    r'<a[^>]+\bhref=(["\'])(?P<href>(?:(?!\1).)+)\1[^>]+\bdata-extension=',
+                    r'<a[^>]+\bhref=(["\'])(?P<href>(?:(?!\1).)+)\1[^>]+\bdata-extension(?:-ard)?=',
                     webpage) if re.match(self._PAGE_REGEX, mobj.group('href'))
             ]
 

From a006ce2b27357c15792eb5c18f06765e640b801c Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Sat, 9 Sep 2023 10:14:49 -0500
Subject: [PATCH 127/218] [ie/twitter] Fix retweet extraction and syndication
 API (#8016)

Authored by: bashonly
---
 README.md                   |   2 +-
 yt_dlp/extractor/twitter.py | 181 ++++++++++++++++++++++++++----------
 2 files changed, 132 insertions(+), 51 deletions(-)

diff --git a/README.md b/README.md
index b82d92a6ec..c7b73f4fd6 100644
--- a/README.md
+++ b/README.md
@@ -1854,7 +1854,7 @@ #### rokfinchannel
 * `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks`
 
 #### twitter
-* `legacy_api`: Force usage of the legacy Twitter API instead of the GraphQL API for tweet extraction. Has no effect if login cookies are passed
+* `api`: Select one of `graphql` (default), `legacy` or `syndication` as the API for tweet extraction. Has no effect if logged in
 
 #### stacommu, wrestleuniverse
 * `device_id`: UUID value assigned by the website and used to enforce device limits for paid livestream content. Can be found in browser local storage
diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py
index f86216f8ff..4065acbaaa 100644
--- a/yt_dlp/extractor/twitter.py
+++ b/yt_dlp/extractor/twitter.py
@@ -1,9 +1,10 @@
-import functools
 import json
+import random
 import re
 
 from .common import InfoExtractor
 from .periscope import PeriscopeBaseIE, PeriscopeIE
+from ..compat import functools  # isort: split
 from ..compat import (
     compat_parse_qs,
     compat_urllib_parse_unquote,
@@ -147,10 +148,14 @@ def _search_dimensions_in_video_url(a_format, video_url):
     def is_logged_in(self):
         return bool(self._get_cookies(self._API_BASE).get('auth_token'))
 
+    @functools.cached_property
+    def _selected_api(self):
+        return self._configuration_arg('api', ['graphql'], ie_key='Twitter')[0]
+
     def _fetch_guest_token(self, display_id):
         guest_token = traverse_obj(self._download_json(
             f'{self._API_BASE}guest/activate.json', display_id, 'Downloading guest token', data=b'',
-            headers=self._set_base_headers(legacy=display_id and self._configuration_arg('legacy_api'))),
+            headers=self._set_base_headers(legacy=display_id and self._selected_api == 'legacy')),
             ('guest_token', {str}))
         if not guest_token:
             raise ExtractorError('Could not retrieve guest token')
@@ -295,7 +300,7 @@ def input_dict(subtask_id, text):
         self.report_login()
 
     def _call_api(self, path, video_id, query={}, graphql=False):
-        headers = self._set_base_headers(legacy=not graphql and self._configuration_arg('legacy_api'))
+        headers = self._set_base_headers(legacy=not graphql and self._selected_api == 'legacy')
         headers.update({
             'x-twitter-auth-type': 'OAuth2Session',
             'x-twitter-client-language': 'en',
@@ -707,6 +712,7 @@ class TwitterIE(TwitterBaseIE):
             'tags': [],
             'age_limit': 0,
         },
+        'skip': 'This Tweet is unavailable',
     }, {
         # not available in Periscope
         'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
@@ -721,6 +727,7 @@ class TwitterIE(TwitterBaseIE):
             'view_count': int,
         },
         'add_ie': ['TwitterBroadcast'],
+        'skip': 'Broadcast no longer exists',
     }, {
         # unified card
         'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
@@ -773,9 +780,9 @@ class TwitterIE(TwitterBaseIE):
         'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
         'info_dict': {
             'id': '1577719286659006464',
-            'title': 'Ultima📛 | #вʟм - Test',
+            'title': 'Ultima📛| New Era - Test',
             'description': 'Test https://t.co/Y3KEZD7Dad',
-            'uploader': 'Ultima📛 | #вʟм',
+            'uploader': 'Ultima📛| New Era',
             'uploader_id': 'UltimaShadowX',
             'uploader_url': 'https://twitter.com/UltimaShadowX',
             'upload_date': '20221005',
@@ -811,7 +818,7 @@ class TwitterIE(TwitterBaseIE):
             'age_limit': 0,
         },
     }, {
-        # Adult content, fails if not logged in (GraphQL)
+        # Adult content, fails if not logged in
         'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
         'info_dict': {
             'id': '1575199163847000068',
@@ -831,9 +838,10 @@ class TwitterIE(TwitterBaseIE):
             'age_limit': 18,
             'tags': []
         },
+        'params': {'skip_download': 'The media could not be played'},
         'skip': 'Requires authentication',
     }, {
-        # Playlist result only with auth
+        # Playlist result only with graphql API
         'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
         'playlist_mincount': 2,
         'info_dict': {
@@ -898,7 +906,7 @@ class TwitterIE(TwitterBaseIE):
             'uploader_id': 'MoniqueCamarra',
             'live_status': 'was_live',
             'release_timestamp': 1658417414,
-            'description': 'md5:4dc8e972f1d8b3c6580376fabb02a3ad',
+            'description': 'md5:acce559345fd49f129c20dbcda3f1201',
             'timestamp': 1658407771,
             'release_date': '20220721',
             'upload_date': '20220721',
@@ -1007,10 +1015,10 @@ class TwitterIE(TwitterBaseIE):
             'view_count': int,
             'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
             'age_limit': 0,
-            'uploader': 'Mün The Friend Of YWAP',
+            'uploader': 'Mün',
             'repost_count': int,
             'upload_date': '20221206',
-            'title': 'Mün The Friend Of YWAP - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
+            'title': 'Mün - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
             'comment_count': int,
             'like_count': int,
             'tags': [],
@@ -1019,7 +1027,7 @@ class TwitterIE(TwitterBaseIE):
             'timestamp': 1670306984.0,
         },
     }, {
-        # url to retweet id w/ legacy api
+        # retweeted_status (private)
         'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
         'info_dict': {
             'id': '1623274794488659969',
@@ -1039,32 +1047,84 @@ class TwitterIE(TwitterBaseIE):
             'like_count': int,
             'repost_count': int,
         },
-        'params': {'extractor_args': {'twitter': {'legacy_api': ['']}}},
         'skip': 'Protected tweet',
     }, {
-        # orig tweet w/ graphql
-        'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
+        # retweeted_status
+        'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
         'info_dict': {
-            'id': '1623274794488659969',
-            'display_id': '1623739803874349067',
+            'id': '1694928337846538240',
             'ext': 'mp4',
-            'title': '@selfisekai@hackerspace.pl 🐀 - RT @Johnnybull3ts: Me after going viral to over 30million people:    Whoopsie-daisy',
-            'description': 'md5:9258bdbb54793bdc124fe1cd47e96c6a',
-            'uploader': '@selfisekai@hackerspace.pl 🐀',
-            'uploader_id': 'liberdalau',
-            'uploader_url': 'https://twitter.com/liberdalau',
+            'display_id': '1695424220702888009',
+            'title': 'md5:e8daa9527bc2b947121395494f786d9d',
+            'description': 'md5:004f2d37fd58737724ec75bc7e679938',
+            'uploader': 'Benny Johnson',
+            'uploader_id': 'bennyjohnson',
+            'uploader_url': 'https://twitter.com/bennyjohnson',
             'age_limit': 0,
             'tags': [],
-            'duration': 8.033,
-            'timestamp': 1675964711.0,
-            'upload_date': '20230209',
-            'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
+            'duration': 45.001,
+            'timestamp': 1692962814.0,
+            'upload_date': '20230825',
+            'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
             'like_count': int,
-            'view_count': int,
             'repost_count': int,
+            'view_count': int,
             'comment_count': int,
         },
-        'skip': 'Protected tweet',
+    }, {
+        # retweeted_status w/ legacy API
+        'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
+        'info_dict': {
+            'id': '1694928337846538240',
+            'ext': 'mp4',
+            'display_id': '1695424220702888009',
+            'title': 'md5:e8daa9527bc2b947121395494f786d9d',
+            'description': 'md5:004f2d37fd58737724ec75bc7e679938',
+            'uploader': 'Benny Johnson',
+            'uploader_id': 'bennyjohnson',
+            'uploader_url': 'https://twitter.com/bennyjohnson',
+            'age_limit': 0,
+            'tags': [],
+            'duration': 45.001,
+            'timestamp': 1692962814.0,
+            'upload_date': '20230825',
+            'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
+            'like_count': int,
+            'repost_count': int,
+        },
+        'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
+    }, {
+        # Broadcast embedded in tweet
+        'url': 'https://twitter.com/JessicaDobsonWX/status/1693057346933600402',
+        'info_dict': {
+            'id': '1yNGaNLjEblJj',
+            'ext': 'mp4',
+            'title': 'Jessica Dobson - WAVE Weather Now - Saturday 8/19/23 Update',
+            'uploader': 'Jessica Dobson',
+            'uploader_id': '1DZEoDwDovRQa',
+            'thumbnail': r're:^https?://.*\.jpg',
+            'view_count': int,
+        },
+        'add_ie': ['TwitterBroadcast'],
+    }, {
+        # Animated gif and quote tweet video, with syndication API
+        'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950',
+        'playlist_mincount': 2,
+        'info_dict': {
+            'id': '1696256659889565950',
+            'title': 'BAKOON - https://t.co/zom968d0a0',
+            'description': 'https://t.co/zom968d0a0',
+            'tags': [],
+            'uploader': 'BAKOON',
+            'uploader_id': 'BAKKOOONN',
+            'uploader_url': 'https://twitter.com/BAKKOOONN',
+            'age_limit': 18,
+            'timestamp': 1693254077.0,
+            'upload_date': '20230828',
+            'like_count': int,
+        },
+        'params': {'extractor_args': {'twitter': {'api': ['syndication']}}},
+        'expected_warnings': ['Not all metadata'],
     }, {
         # onion route
         'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
@@ -1103,6 +1163,14 @@ class TwitterIE(TwitterBaseIE):
         'only_matching': True,
     }]
 
+    _MEDIA_ID_RE = re.compile(r'_video/(\d+)/')
+
+    @property
+    def _GRAPHQL_ENDPOINT(self):
+        if self.is_logged_in:
+            return 'zZXycP0V6H7m-2r0mOnFcA/TweetDetail'
+        return '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
+
     def _graphql_to_legacy(self, data, twid):
         result = traverse_obj(data, (
             'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
@@ -1130,9 +1198,14 @@ def _graphql_to_legacy(self, data, twid):
             'user': ('core', 'user_results', 'result', 'legacy'),
             'card': ('card', 'legacy'),
             'quoted_status': ('quoted_status_result', 'result', 'legacy'),
+            'retweeted_status': ('legacy', 'retweeted_status_result', 'result', 'legacy'),
         }, expected_type=dict, default={}))
 
-        # extra transformation is needed since result does not match legacy format
+        # extra transformations needed since result does not match legacy format
+        if status.get('retweeted_status'):
+            status['retweeted_status']['user'] = traverse_obj(status, (
+                'retweeted_status_result', 'result', 'core', 'user_results', 'result', 'legacy', {dict})) or {}
+
         binding_values = {
             binding_value.get('key'): binding_value.get('value')
             for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
@@ -1208,33 +1281,42 @@ def _build_graphql_query(self, media_id):
         }
 
     def _extract_status(self, twid):
-        if self.is_logged_in:
-            return self._graphql_to_legacy(
-                self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid), twid)
+        if self.is_logged_in or self._selected_api == 'graphql':
+            status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
 
-        try:
-            if not self._configuration_arg('legacy_api'):
-                return self._graphql_to_legacy(
-                    self._call_graphql_api('2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId', twid), twid)
-            return traverse_obj(self._call_api(f'statuses/show/{twid}.json', twid, {
+        elif self._selected_api == 'legacy':
+            status = self._call_api(f'statuses/show/{twid}.json', twid, {
                 'cards_platform': 'Web-12',
                 'include_cards': 1,
                 'include_reply_count': 1,
                 'include_user_entities': 0,
                 'tweet_mode': 'extended',
-            }), 'retweeted_status', None)
+            })
 
-        except ExtractorError as e:
-            if e.expected:
-                raise
+        elif self._selected_api == 'syndication':
             self.report_warning(
-                f'{e.orig_msg}. Falling back to syndication endpoint; some metadata may be missing', twid)
+                'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
+            status = self._download_json(
+                'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
+                headers={'User-Agent': 'Googlebot'}, query={
+                    'id': twid,
+                    # TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
+                    'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
+                })
+            if not status:
+                raise ExtractorError('Syndication endpoint returned empty JSON response')
+            # Transform the result so its structure matches that of legacy/graphql
+            media = []
+            for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
+                detail['id_str'] = traverse_obj(detail, (
+                    'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
+                media.append(detail)
+            status['extended_entities'] = {'media': media}
 
-        status = self._download_json(
-            'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
-            headers={'User-Agent': 'Googlebot'}, query={'id': twid})
-        status['extended_entities'] = {'media': status.get('mediaDetails')}
-        return status
+        else:
+            raise ExtractorError(f'"{self._selected_api}" is not a valid API selection', expected=True)
+
+        return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {}
 
     def _real_extract(self, url):
         twid, selected_index = self._match_valid_url(url).group('id', 'index')
@@ -1266,10 +1348,7 @@ def _real_extract(self, url):
         }
 
         def extract_from_video_info(media):
-            media_id = traverse_obj(media, 'id_str', 'id', (
-                'video_info', 'variants', ..., 'url',
-                {functools.partial(re.search, r'_video/(\d+)/')}, 1
-            ), get_all=False, expected_type=str_or_none) or twid
+            media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
             self.write_debug(f'Extracting from video info: {media_id}')
 
             formats = []
@@ -1503,6 +1582,8 @@ def _real_extract(self, url):
         broadcast = self._call_api(
             'broadcasts/show.json', broadcast_id,
             {'ids': broadcast_id})['broadcasts'][broadcast_id]
+        if not broadcast:
+            raise ExtractorError('Broadcast no longer exists', expected=True)
         info = self._parse_broadcast_data(broadcast, broadcast_id)
         media_key = broadcast['media_key']
         source = self._call_api(

From 66cc64ff6696f9921ff112a278542f8d999ffea4 Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@bashonly.com>
Date: Mon, 11 Sep 2023 09:51:39 -0500
Subject: [PATCH 128/218] [ie/zoom] Extract duration

Closes #8080
Authored by: bashonly
---
 yt_dlp/extractor/zoom.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/yt_dlp/extractor/zoom.py b/yt_dlp/extractor/zoom.py
index 3d7ccca760..1e41d04349 100644
--- a/yt_dlp/extractor/zoom.py
+++ b/yt_dlp/extractor/zoom.py
@@ -127,6 +127,7 @@ def _real_extract(self, url):
         return {
             'id': video_id,
             'title': str_or_none(traverse_obj(data, ('meet', 'topic'))),
+            'duration': int_or_none(data.get('duration')),
             'subtitles': subtitles,
             'formats': formats,
             'http_headers': {

From 7b71643cc986de9a3768dac4ac9b64f4d05e7f5e Mon Sep 17 00:00:00 2001
From: garret <garret1317@yandex.com>
Date: Fri, 15 Sep 2023 18:18:51 +0100
Subject: [PATCH 129/218] [ie/mixcloud] Update API URL (#8114)

Closes #8104
Authored by: garret1317
---
 yt_dlp/extractor/mixcloud.py | 31 +++++++++++++++++++++++--------
 1 file changed, 23 insertions(+), 8 deletions(-)

diff --git a/yt_dlp/extractor/mixcloud.py b/yt_dlp/extractor/mixcloud.py
index fb5a08ca28..8a95d1a5db 100644
--- a/yt_dlp/extractor/mixcloud.py
+++ b/yt_dlp/extractor/mixcloud.py
@@ -20,7 +20,7 @@ class MixcloudBaseIE(InfoExtractor):
     def _call_api(self, object_type, object_fields, display_id, username, slug=None):
         lookup_key = object_type + 'Lookup'
         return self._download_json(
-            'https://www.mixcloud.com/graphql', display_id, query={
+            'https://app.mixcloud.com/graphql', display_id, query={
                 'query': '''{
   %s(lookup: {username: "%s"%s}) {
     %s
@@ -46,7 +46,15 @@ class MixcloudIE(MixcloudBaseIE):
             'view_count': int,
             'timestamp': 1321359578,
             'upload_date': '20111115',
+            'uploader_url': 'https://www.mixcloud.com/dholbach/',
+            'artist': 'Submorphics & Chino , Telekinesis, Porter Robinson, Enei, Breakage ft Jess Mills',
+            'duration': 3723,
+            'tags': [],
+            'comment_count': int,
+            'repost_count': int,
+            'like_count': int,
         },
+        'params': {'skip_download': 'm3u8'},
     }, {
         'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/',
         'info_dict': {
@@ -60,7 +68,14 @@ class MixcloudIE(MixcloudBaseIE):
             'view_count': int,
             'timestamp': 1422987057,
             'upload_date': '20150203',
+            'uploader_url': 'https://www.mixcloud.com/gillespeterson/',
+            'duration': 2992,
+            'tags': [],
+            'comment_count': int,
+            'repost_count': int,
+            'like_count': int,
         },
+        'params': {'skip_download': '404 playback error on site'},
     }, {
         'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/',
         'only_matching': True,
@@ -259,9 +274,9 @@ def _real_extract(self, url):
                 cloudcast_url = cloudcast.get('url')
                 if not cloudcast_url:
                     continue
-                slug = try_get(cloudcast, lambda x: x['slug'], compat_str)
+                item_slug = try_get(cloudcast, lambda x: x['slug'], compat_str)
                 owner_username = try_get(cloudcast, lambda x: x['owner']['username'], compat_str)
-                video_id = '%s_%s' % (owner_username, slug) if slug and owner_username else None
+                video_id = f'{owner_username}_{item_slug}' if item_slug and owner_username else None
                 entries.append(self.url_result(
                     cloudcast_url, MixcloudIE.ie_key(), video_id))
 
@@ -284,7 +299,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
         'info_dict': {
             'id': 'dholbach_uploads',
             'title': 'Daniel Holbach (uploads)',
-            'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
+            'description': 'md5:a3f468a60ac8c3e1f8616380fc469b2b',
         },
         'playlist_mincount': 36,
     }, {
@@ -292,7 +307,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
         'info_dict': {
             'id': 'dholbach_uploads',
             'title': 'Daniel Holbach (uploads)',
-            'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
+            'description': 'md5:a3f468a60ac8c3e1f8616380fc469b2b',
         },
         'playlist_mincount': 36,
     }, {
@@ -300,7 +315,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
         'info_dict': {
             'id': 'dholbach_favorites',
             'title': 'Daniel Holbach (favorites)',
-            'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
+            'description': 'md5:a3f468a60ac8c3e1f8616380fc469b2b',
         },
         # 'params': {
         #     'playlist_items': '1-100',
@@ -323,9 +338,9 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
         'info_dict': {
             'id': 'FirstEar_stream',
             'title': 'First Ear (stream)',
-            'description': 'Curators of good music\r\n\r\nfirstearmusic.com',
+            'description': 'we maraud for ears',
         },
-        'playlist_mincount': 271,
+        'playlist_mincount': 269,
     }]
 
     _TITLE_KEY = 'displayName'

From 497bbbbd7328cb705f70eced94dbd90993819a46 Mon Sep 17 00:00:00 2001
From: SevenLives <410355694@qq.com>
Date: Sat, 16 Sep 2023 17:37:04 +0800
Subject: [PATCH 130/218] [ie/abematv] Fix proxy handling (#8046)

Fixes https://github.com/yt-dlp/yt-dlp/issues/8036

Authored by: SevenLives
---
 yt_dlp/extractor/abematv.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py
index 163b83c6da..2a093580cc 100644
--- a/yt_dlp/extractor/abematv.py
+++ b/yt_dlp/extractor/abematv.py
@@ -12,7 +12,7 @@
 import urllib.request
 import urllib.response
 import uuid
-
+from ..utils.networking import clean_proxies
 from .common import InfoExtractor
 from ..aes import aes_ecb_decrypt
 from ..utils import (
@@ -35,7 +35,10 @@ def add_opener(ydl, handler):  # FIXME: Create proper API in .networking
     rh = ydl._request_director.handlers['Urllib']
     if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES:
         return
-    opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=ydl.proxies)
+    headers = ydl.params['http_headers'].copy()
+    proxies = ydl.proxies.copy()
+    clean_proxies(proxies, headers)
+    opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=proxies)
     assert isinstance(opener, urllib.request.OpenerDirector)
     opener.add_handler(handler)
     rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license')

From 578a82e497502b951036ce9da6fe0dac6937ac27 Mon Sep 17 00:00:00 2001
From: Kshitiz Gupta <Kshitiz305@live.com>
Date: Sat, 16 Sep 2023 15:13:05 +0530
Subject: [PATCH 131/218] [ie/banbye] Support video ids containing a hyphen
 (#8059)

Fixes https://github.com/yt-dlp/yt-dlp/issues/7895

Authored by: kshitiz305
---
 yt_dlp/extractor/banbye.py | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/extractor/banbye.py b/yt_dlp/extractor/banbye.py
index c873425656..e0fc93b973 100644
--- a/yt_dlp/extractor/banbye.py
+++ b/yt_dlp/extractor/banbye.py
@@ -31,7 +31,7 @@ def _extract_playlist(self, playlist_id):
 
 
 class BanByeIE(BanByeBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?banbye.com/(?:en/)?watch/(?P<id>\w+)'
+    _VALID_URL = r'https?://(?:www\.)?banbye.com/(?:en/)?watch/(?P<id>[\w-]+)'
     _TESTS = [{
         'url': 'https://banbye.com/watch/v_ytfmvkVYLE8T',
         'md5': '2f4ea15c5ca259a73d909b2cfd558eb5',
@@ -59,7 +59,27 @@ class BanByeIE(BanByeBaseIE):
             'title': 'Krzysztof Karoń',
             'id': 'p_Ld82N6gBw_OJ',
         },
-        'playlist_count': 9,
+        'playlist_mincount': 9,
+    }, {
+        'url': 'https://banbye.com/watch/v_kb6_o1Kyq-CD',
+        'info_dict': {
+            'id': 'v_kb6_o1Kyq-CD',
+            'ext': 'mp4',
+            'title': 'Co tak naprawdę dzieje się we Francji?! Czy Warszawa a potem cała Polska będzie drugim Paryżem?!🤔🇵🇱',
+            'description': 'md5:82be4c0e13eae8ea1ca8b9f2e07226a8',
+            'uploader': 'Marcin Rola - MOIM ZDANIEM!🇵🇱',
+            'channel_id': 'ch_QgWnHvDG2fo5',
+            'channel_url': 'https://banbye.com/channel/ch_QgWnHvDG2fo5',
+            'duration': 597,
+            'timestamp': 1688642656,
+            'upload_date': '20230706',
+            'thumbnail': 'https://cdn.banbye.com/video/v_kb6_o1Kyq-CD/96.webp',
+            'tags': ['Paryż', 'Francja', 'Polska', 'Imigranci', 'Morawiecki', 'Tusk'],
+            'like_count': int,
+            'dislike_count': int,
+            'view_count': int,
+            'comment_count': int,
+        },
     }]
 
     def _real_extract(self, url):

From aee6b9b88c0bcccf27fd23b7e00fc0b7b168928f Mon Sep 17 00:00:00 2001
From: barsnick <barsnick@users.noreply.github.com>
Date: Sat, 16 Sep 2023 12:04:08 +0200
Subject: [PATCH 132/218] [ie/Axs] Add extractor (#8094)

Authored by: barsnick
---
 yt_dlp/extractor/_extractors.py |  1 +
 yt_dlp/extractor/axs.py         | 87 +++++++++++++++++++++++++++++++++
 2 files changed, 88 insertions(+)
 create mode 100644 yt_dlp/extractor/axs.py

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index b788737a2d..b836fe8a3d 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -165,6 +165,7 @@
     AWAANLiveIE,
     AWAANSeasonIE,
 )
+from .axs import AxsIE
 from .azmedien import AZMedienIE
 from .baidu import BaiduVideoIE
 from .banbye import (
diff --git a/yt_dlp/extractor/axs.py b/yt_dlp/extractor/axs.py
new file mode 100644
index 0000000000..4b263725f1
--- /dev/null
+++ b/yt_dlp/extractor/axs.py
@@ -0,0 +1,87 @@
+from .common import InfoExtractor
+from ..utils import (
+    float_or_none,
+    js_to_json,
+    parse_iso8601,
+    traverse_obj,
+    url_or_none,
+)
+
+
+class AxsIE(InfoExtractor):
+    IE_NAME = 'axs.tv'
+    _VALID_URL = r'https?://(?:www\.)?axs\.tv/(?:channel/(?:[^/?#]+/)+)?video/(?P<id>[^/?#]+)'
+
+    _TESTS = [{
+        'url': 'https://www.axs.tv/video/5f4dc776b70e4f1c194f22ef/',
+        'md5': '8d97736ae8e50c64df528e5e676778cf',
+        'info_dict': {
+            'id': '5f4dc776b70e4f1c194f22ef',
+            'title': 'Small Town',
+            'ext': 'mp4',
+            'description': 'md5:e314d28bfaa227a4d7ec965fae19997f',
+            'upload_date': '20230602',
+            'timestamp': 1685729564,
+            'duration': 1284.216,
+            'series': 'Rock & Roll Road Trip with Sammy Hagar',
+            'season': 2,
+            'episode': '3',
+            'thumbnail': 'https://images.dotstudiopro.com/5f4e9d330a0c3b295a7e8394',
+        },
+    }, {
+        'url': 'https://www.axs.tv/channel/rock-star-interview/video/daryl-hall',
+        'md5': '300ae795cd8f9984652c0949734ffbdc',
+        'info_dict': {
+            'id': '5f488148b70e4f392572977c',
+            'display_id': 'daryl-hall',
+            'title': 'Daryl Hall',
+            'ext': 'mp4',
+            'description': 'md5:e54ecaa0f4b5683fc9259e9e4b196628',
+            'upload_date': '20230214',
+            'timestamp': 1676403615,
+            'duration': 2570.668,
+            'series': 'The Big Interview with Dan Rather',
+            'season': 3,
+            'episode': '5',
+            'thumbnail': 'https://images.dotstudiopro.com/5f4d1901f340b50d937cec32',
+        },
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+
+        webpage_json_data = self._search_json(
+            r'mountObj\s*=', webpage, 'video ID data', display_id,
+            transform_source=js_to_json)
+        video_id = webpage_json_data['video_id']
+        company_id = webpage_json_data['company_id']
+
+        meta = self._download_json(
+            f'https://api.myspotlight.tv/dotplayer/video/{company_id}/{video_id}',
+            video_id, query={'device_type': 'desktop_web'})['video']
+
+        formats = self._extract_m3u8_formats(
+            meta['video_m3u8'], video_id, 'mp4', m3u8_id='hls')
+
+        subtitles = {}
+        for cc in traverse_obj(meta, ('closeCaption', lambda _, v: url_or_none(v['srtPath']))):
+            subtitles.setdefault(cc.get('srtShortLang') or 'en', []).append(
+                {'ext': cc.get('srtExt'), 'url': cc['srtPath']})
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'formats': formats,
+            **traverse_obj(meta, {
+                'title': ('title', {str}),
+                'description': ('description', {str}),
+                'series': ('seriestitle', {str}),
+                'season': ('season', {int}),
+                'episode': ('episode', {str}),
+                'duration': ('duration', {float_or_none}),
+                'timestamp': ('updated_at', {parse_iso8601}),
+                'thumbnail': ('thumb', {url_or_none}),
+            }),
+            'subtitles': subtitles,
+        }

From 6e07e4bc7e59f5bdb60e93c011e57b18b009f2b5 Mon Sep 17 00:00:00 2001
From: zhallgato <zhallgato@gmail.com>
Date: Sat, 16 Sep 2023 12:12:18 +0200
Subject: [PATCH 133/218] [ie/mediaklikk] Fix extractor (#8086)

Fixes https://github.com/yt-dlp/yt-dlp/issues/8053

Authored by: bashonly, zhallgato
---
 yt_dlp/extractor/mediaklikk.py | 72 ++++++++++++++++++++++++++++++----
 1 file changed, 64 insertions(+), 8 deletions(-)

diff --git a/yt_dlp/extractor/mediaklikk.py b/yt_dlp/extractor/mediaklikk.py
index 46365081b7..fcc4827b5c 100644
--- a/yt_dlp/extractor/mediaklikk.py
+++ b/yt_dlp/extractor/mediaklikk.py
@@ -1,5 +1,8 @@
 from ..utils import (
-    unified_strdate
+    ExtractorError,
+    traverse_obj,
+    unified_strdate,
+    url_or_none,
 )
 from .common import InfoExtractor
 from ..compat import (
@@ -15,7 +18,7 @@ class MediaKlikkIE(InfoExtractor):
                         (?P<id>[^/#?_]+)'''
 
     _TESTS = [{
-        # mediaklikk. date in html.
+        # (old) mediaklikk. date in html.
         'url': 'https://mediaklikk.hu/video/hazajaro-delnyugat-bacska-a-duna-menten-palankatol-doroszloig/',
         'info_dict': {
             'id': '4754129',
@@ -23,9 +26,21 @@ class MediaKlikkIE(InfoExtractor):
             'ext': 'mp4',
             'upload_date': '20210901',
             'thumbnail': 'http://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg'
+        },
+        'skip': 'Webpage redirects to 404 page',
+    }, {
+        # mediaklikk. date in html.
+        'url': 'https://mediaklikk.hu/video/hazajaro-fabova-hegyseg-kishont-koronaja/',
+        'info_dict': {
+            'id': '6696133',
+            'title': 'Hazajáró, Fabova-hegység - Kishont koronája',
+            'display_id': 'hazajaro-fabova-hegyseg-kishont-koronaja',
+            'ext': 'mp4',
+            'upload_date': '20230903',
+            'thumbnail': 'https://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg'
         }
     }, {
-        # m4sport
+        # (old) m4sport
         'url': 'https://m4sport.hu/video/2021/08/30/gyemant-liga-parizs/',
         'info_dict': {
             'id': '4754999',
@@ -33,6 +48,18 @@ class MediaKlikkIE(InfoExtractor):
             'ext': 'mp4',
             'upload_date': '20210830',
             'thumbnail': 'http://m4sport.hu/wp-content/uploads/sites/4/2021/08/vlcsnap-2021-08-30-18h21m20s10-1024x576.jpg'
+        },
+        'skip': 'Webpage redirects to 404 page',
+    }, {
+        # m4sport
+        'url': 'https://m4sport.hu/sportkozvetitesek/video/2023/09/08/atletika-gyemant-liga-brusszel/',
+        'info_dict': {
+            'id': '6711136',
+            'title': 'Atlétika – Gyémánt Liga, Brüsszel',
+            'display_id': 'atletika-gyemant-liga-brusszel',
+            'ext': 'mp4',
+            'upload_date': '20230908',
+            'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-08-22h43m18s691.jpg'
         }
     }, {
         # m4sport with *video/ url and no date
@@ -40,20 +67,33 @@ class MediaKlikkIE(InfoExtractor):
         'info_dict': {
             'id': '4492099',
             'title': 'Real Madrid - Chelsea 1-1',
+            'display_id': 'real-madrid-chelsea-1-1',
             'ext': 'mp4',
-            'thumbnail': 'http://m4sport.hu/wp-content/uploads/sites/4/2021/04/Sequence-01.Still001-1024x576.png'
+            'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2021/04/Sequence-01.Still001-1024x576.png'
         }
     }, {
-        # hirado
+        # (old) hirado
         'url': 'https://hirado.hu/videok/felteteleket-szabott-a-fovaros/',
         'info_dict': {
             'id': '4760120',
             'title': 'Feltételeket szabott a főváros',
             'ext': 'mp4',
             'thumbnail': 'http://hirado.hu/wp-content/uploads/sites/4/2021/09/vlcsnap-2021-09-01-20h20m37s165.jpg'
+        },
+        'skip': 'Webpage redirects to video list page',
+    }, {
+        # hirado
+        'url': 'https://hirado.hu/belfold/video/2023/09/11/marad-az-eves-elszamolas-a-napelemekre-beruhazo-csaladoknal',
+        'info_dict': {
+            'id': '6716068',
+            'title': 'Marad az éves elszámolás a napelemekre beruházó családoknál',
+            'display_id': 'marad-az-eves-elszamolas-a-napelemekre-beruhazo-csaladoknal',
+            'ext': 'mp4',
+            'upload_date': '20230911',
+            'thumbnail': 'https://hirado.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-11-09h16m09s882.jpg'
         }
     }, {
-        # petofilive
+        # (old) petofilive
         'url': 'https://petofilive.hu/video/2021/06/07/tha-shudras-az-akusztikban/',
         'info_dict': {
             'id': '4571948',
@@ -61,6 +101,18 @@ class MediaKlikkIE(InfoExtractor):
             'ext': 'mp4',
             'upload_date': '20210607',
             'thumbnail': 'http://petofilive.hu/wp-content/uploads/sites/4/2021/06/vlcsnap-2021-06-07-22h14m23s915-1024x576.jpg'
+        },
+        'skip': 'Webpage redirects to empty page',
+    }, {
+        # petofilive
+        'url': 'https://petofilive.hu/video/2023/09/09/futball-fesztival-a-margitszigeten/',
+        'info_dict': {
+            'id': '6713233',
+            'title': 'Futball Fesztivál a Margitszigeten',
+            'display_id': 'futball-fesztival-a-margitszigeten',
+            'ext': 'mp4',
+            'upload_date': '20230909',
+            'thumbnail': 'https://petofilive.hu/wp-content/uploads/sites/4/2023/09/Clipboard11-2.jpg'
         }
     }]
 
@@ -84,8 +136,12 @@ def _real_extract(self, url):
 
         player_data['video'] = player_data.pop('token')
         player_page = self._download_webpage('https://player.mediaklikk.hu/playernew/player.php', video_id, query=player_data)
-        playlist_url = self._proto_relative_url(compat_urllib_parse_unquote(
-            self._html_search_regex(r'\"file\":\s*\"(\\?/\\?/.*playlist\.m3u8)\"', player_page, 'playlist_url')).replace('\\/', '/'))
+        player_json = self._search_json(
+            r'\bpl\.setup\s*\(', player_page, 'player json', video_id, end_pattern=r'\);')
+        playlist_url = traverse_obj(
+            player_json, ('playlist', lambda _, v: v['type'] == 'hls', 'file', {url_or_none}), get_all=False)
+        if not playlist_url:
+            raise ExtractorError('Unable to extract playlist url')
 
         formats = self._extract_wowza_formats(
             playlist_url, video_id, skip_protocols=['f4m', 'smil', 'dash'])

From 98eac0e6ba0e510ae7dfdfd249d42ee71fb272b1 Mon Sep 17 00:00:00 2001
From: hatsomatt <143712404+hatsomatt@users.noreply.github.com>
Date: Sat, 16 Sep 2023 16:02:37 +0200
Subject: [PATCH 134/218] [ie/videa] Fix extraction (#8003)

Closes #7427
Authored by: hatsomatt, aky-01

Co-authored-by: aky-01 <65510015+aky-01@users.noreply.github.com>
---
 yt_dlp/extractor/videa.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/videa.py b/yt_dlp/extractor/videa.py
index 59ae933b08..634d2edea6 100644
--- a/yt_dlp/extractor/videa.py
+++ b/yt_dlp/extractor/videa.py
@@ -38,6 +38,7 @@ class VideaIE(InfoExtractor):
             'title': 'Az őrült kígyász 285 kígyót enged szabadon',
             'thumbnail': r're:^https?://.*',
             'duration': 21,
+            'age_limit': 0,
         },
     }, {
         'url': 'http://videa.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH',
@@ -48,6 +49,7 @@ class VideaIE(InfoExtractor):
             'title': 'Supercars előzés',
             'thumbnail': r're:^https?://.*',
             'duration': 64,
+            'age_limit': 0,
         },
     }, {
         'url': 'http://videa.hu/player?v=8YfIAjxwWGwT8HVQ',
@@ -58,6 +60,7 @@ class VideaIE(InfoExtractor):
             'title': 'Az őrült kígyász 285 kígyót enged szabadon',
             'thumbnail': r're:^https?://.*',
             'duration': 21,
+            'age_limit': 0,
         },
     }, {
         'url': 'http://videa.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
@@ -124,7 +127,7 @@ def _real_extract(self, url):
         query['_t'] = result[:16]
 
         b64_info, handle = self._download_webpage_handle(
-            'http://videa.hu/videaplayer_get_xml.php', video_id, query=query)
+            'http://videa.hu/player/xml', video_id, query=query)
         if b64_info.startswith('<?xml'):
             info = self._parse_xml(b64_info, video_id)
         else:

From 7d3d658f4c558ee7d72b1c01b46f2126948681cd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Duval?= <jerome.duval@gmail.com>
Date: Sat, 16 Sep 2023 16:24:11 +0200
Subject: [PATCH 135/218] [ie/TV5MondePlus] Fix extractor (#7952)

Closes #4978
Authored by: korli, dirkf
---
 yt_dlp/extractor/tv5mondeplus.py | 98 ++++++++++++++++++++++++++------
 1 file changed, 80 insertions(+), 18 deletions(-)

diff --git a/yt_dlp/extractor/tv5mondeplus.py b/yt_dlp/extractor/tv5mondeplus.py
index bd0be784d2..4da1b26d1a 100644
--- a/yt_dlp/extractor/tv5mondeplus.py
+++ b/yt_dlp/extractor/tv5mondeplus.py
@@ -1,10 +1,14 @@
+import urllib.parse
+
 from .common import InfoExtractor
 from ..utils import (
     determine_ext,
     extract_attributes,
     int_or_none,
     parse_duration,
+    traverse_obj,
     try_get,
+    url_or_none,
 )
 
 
@@ -12,6 +16,36 @@ class TV5MondePlusIE(InfoExtractor):
     IE_DESC = 'TV5MONDE+'
     _VALID_URL = r'https?://(?:www\.)?(?:tv5mondeplus|revoir\.tv5monde)\.com/toutes-les-videos/[^/]+/(?P<id>[^/?#]+)'
     _TESTS = [{
+        # movie
+        'url': 'https://revoir.tv5monde.com/toutes-les-videos/cinema/les-novices',
+        'md5': 'c86f60bf8b75436455b1b205f9745955',
+        'info_dict': {
+            'id': 'ZX0ipMyFQq_6D4BA7b',
+            'display_id': 'les-novices',
+            'ext': 'mp4',
+            'title': 'Les novices',
+            'description': 'md5:2e7c33ba3ad48dabfcc2a956b88bde2b',
+            'upload_date': '20230821',
+            'thumbnail': 'https://revoir.tv5monde.com/uploads/media/video_thumbnail/0738/60/01e952b7ccf36b7c6007ec9131588954ab651de9.jpeg',
+            'duration': 5177,
+            'episode': 'Les novices',
+        },
+    }, {
+        # series episode
+        'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/opj-les-dents-de-la-terre-2',
+        'info_dict': {
+            'id': 'wJ0eeEPozr_6D4BA7b',
+            'display_id': 'opj-les-dents-de-la-terre-2',
+            'ext': 'mp4',
+            'title': "OPJ - Les dents de la Terre (2)",
+            'description': 'md5:288f87fd68d993f814e66e60e5302d9d',
+            'upload_date': '20230823',
+            'series': 'OPJ',
+            'episode': 'Les dents de la Terre (2)',
+            'duration': 2877,
+            'thumbnail': 'https://dl-revoir.tv5monde.com/images/1a/5753448.jpg'
+        },
+    }, {
         # movie
         'url': 'https://revoir.tv5monde.com/toutes-les-videos/cinema/ceux-qui-travaillent',
         'md5': '32fa0cde16a4480d1251502a66856d5f',
@@ -23,6 +57,7 @@ class TV5MondePlusIE(InfoExtractor):
             'description': 'md5:570e8bb688036ace873b2d50d24c026d',
             'upload_date': '20210819',
         },
+        'skip': 'no longer available',
     }, {
         # series episode
         'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/vestiaires-caro-actrice',
@@ -39,6 +74,7 @@ class TV5MondePlusIE(InfoExtractor):
         'params': {
             'skip_download': True,
         },
+        'skip': 'no longer available',
     }, {
         'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/neuf-jours-en-hiver-neuf-jours-en-hiver',
         'only_matching': True,
@@ -63,20 +99,45 @@ def _real_extract(self, url):
         video_files = self._parse_json(
             vpl_data['data-broadcast'], display_id)
         formats = []
-        for video_file in video_files:
-            v_url = video_file.get('url')
-            if not v_url:
-                continue
-            video_format = video_file.get('format') or determine_ext(v_url)
-            if video_format == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(
-                    v_url, display_id, 'mp4', 'm3u8_native',
-                    m3u8_id='hls', fatal=False))
-            else:
-                formats.append({
-                    'url': v_url,
-                    'format_id': video_format,
-                })
+        video_id = None
+
+        def process_video_files(v):
+            nonlocal video_id
+            for video_file in v:
+                v_url = video_file.get('url')
+                if not v_url:
+                    continue
+                if video_file.get('type') == 'application/deferred':
+                    d_param = urllib.parse.quote(v_url)
+                    token = video_file.get('token')
+                    if not token:
+                        continue
+                    deferred_json = self._download_json(
+                        f'https://api.tv5monde.com/player/asset/{d_param}/resolve?condenseKS=true', display_id,
+                        note='Downloading deferred info', headers={'Authorization': f'Bearer {token}'}, fatal=False)
+                    v_url = traverse_obj(deferred_json, (0, 'url', {url_or_none}))
+                    if not v_url:
+                        continue
+                    # data-guid from the webpage isn't stable, use the material id from the json urls
+                    video_id = self._search_regex(
+                        r'materials/([\da-zA-Z]{10}_[\da-fA-F]{7})/', v_url, 'video id', default=None)
+                    process_video_files(deferred_json)
+
+                video_format = video_file.get('format') or determine_ext(v_url)
+                if video_format == 'm3u8':
+                    formats.extend(self._extract_m3u8_formats(
+                        v_url, display_id, 'mp4', 'm3u8_native',
+                        m3u8_id='hls', fatal=False))
+                elif video_format == 'mpd':
+                    formats.extend(self._extract_mpd_formats(
+                        v_url, display_id, fatal=False))
+                else:
+                    formats.append({
+                        'url': v_url,
+                        'format_id': video_format,
+                    })
+
+        process_video_files(video_files)
 
         metadata = self._parse_json(
             vpl_data['data-metadata'], display_id)
@@ -100,10 +161,11 @@ def _real_extract(self, url):
         if upload_date:
             upload_date = upload_date.replace('_', '')
 
-        video_id = self._search_regex(
-            (r'data-guid=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
-             r'id_contenu["\']\s:\s*(\d+)'), webpage, 'video id',
-            default=display_id)
+        if not video_id:
+            video_id = self._search_regex(
+                (r'data-guid=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
+                 r'id_contenu["\']\s:\s*(\d+)'), webpage, 'video id',
+                default=display_id)
 
         return {
             'id': video_id,

From f659e6439444ac64305b5c80688cd82f59d2279c Mon Sep 17 00:00:00 2001
From: Simon Sawicki <contact@grub4k.xyz>
Date: Sat, 16 Sep 2023 17:50:06 +0200
Subject: [PATCH 136/218] [ie/bpb] Overhaul extractor (#8119)

Authored by: Grub4K
---
 yt_dlp/extractor/bpb.py | 174 +++++++++++++++++++++++++++++++++-------
 yt_dlp/utils/_utils.py  |   1 +
 2 files changed, 145 insertions(+), 30 deletions(-)

diff --git a/yt_dlp/extractor/bpb.py b/yt_dlp/extractor/bpb.py
index f28e581b87..7fe0899449 100644
--- a/yt_dlp/extractor/bpb.py
+++ b/yt_dlp/extractor/bpb.py
@@ -1,56 +1,170 @@
+import functools
 import re
 
 from .common import InfoExtractor
 from ..utils import (
+    clean_html,
+    extract_attributes,
+    get_element_text_and_html_by_tag,
+    get_elements_by_class,
+    join_nonempty,
     js_to_json,
-    determine_ext,
+    mimetype2ext,
+    unified_strdate,
+    url_or_none,
+    urljoin,
+    variadic,
 )
+from ..utils.traversal import traverse_obj
+
+
+def html_get_element(tag=None, cls=None):
+    assert tag or cls, 'One of tag or class is required'
+
+    if cls:
+        func = functools.partial(get_elements_by_class, cls, tag=tag)
+    else:
+        func = functools.partial(get_element_text_and_html_by_tag, tag)
+
+    def html_get_element_wrapper(html):
+        return variadic(func(html))[0]
+
+    return html_get_element_wrapper
 
 
 class BpbIE(InfoExtractor):
     IE_DESC = 'Bundeszentrale für politische Bildung'
-    _VALID_URL = r'https?://(?:www\.)?bpb\.de/mediathek/(?P<id>[0-9]+)/'
+    _VALID_URL = r'https?://(?:www\.|m\.)?bpb\.de/(?:[^/?#]+/)*(?P<id>\d+)(?:[/?#]|$)'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr',
-        'md5': 'c4f84c8a8044ca9ff68bb8441d300b3f',
         'info_dict': {
             'id': '297',
             'ext': 'mp4',
+            'creator': 'Kooperative Berlin',
+            'description': 'md5:f4f75885ba009d3e2b156247a8941ce6',
+            'release_date': '20160115',
+            'series': 'Interview auf dem Geschichtsforum 1989 | 2009',
+            'tags': ['Friedliche Revolution', 'Erinnerungskultur', 'Vergangenheitspolitik', 'DDR 1949 - 1990', 'Freiheitsrecht', 'BStU', 'Deutschland'],
+            'thumbnail': 'https://www.bpb.de/cache/images/7/297_teaser_16x9_1240.jpg?8839D',
             'title': 'Joachim Gauck zu 1989 und die Erinnerung an die DDR',
-            'description': 'Joachim Gauck, erster Beauftragter für die Stasi-Unterlagen, spricht auf dem Geschichtsforum über die friedliche Revolution 1989 und eine "gewisse Traurigkeit" im Umgang mit der DDR-Vergangenheit.'
+            'uploader': 'Bundeszentrale für politische Bildung',
+        },
+    }, {
+        'url': 'https://www.bpb.de/mediathek/video/522184/krieg-flucht-und-falschmeldungen-wirstattdesinformation-2/',
+        'info_dict': {
+            'id': '522184',
+            'ext': 'mp4',
+            'creator': 'Institute for Strategic Dialogue Germany gGmbH (ISD)',
+            'description': 'md5:f83c795ff8f825a69456a9e51fc15903',
+            'release_date': '20230621',
+            'tags': ['Desinformation', 'Ukraine', 'Russland', 'Geflüchtete'],
+            'thumbnail': 'https://www.bpb.de/cache/images/4/522184_teaser_16x9_1240.png?EABFB',
+            'title': 'md5:9b01ccdbf58dbf9e5c9f6e771a803b1c',
+            'uploader': 'Bundeszentrale für politische Bildung',
+        },
+    }, {
+        'url': 'https://www.bpb.de/lernen/bewegtbild-und-politische-bildung/webvideo/518789/krieg-flucht-und-falschmeldungen-wirstattdesinformation-1/',
+        'info_dict': {
+            'id': '518789',
+            'ext': 'mp4',
+            'creator': 'Institute for Strategic Dialogue Germany gGmbH (ISD)',
+            'description': 'md5:85228aed433e84ff0ff9bc582abd4ea8',
+            'release_date': '20230302',
+            'tags': ['Desinformation', 'Ukraine', 'Russland', 'Geflüchtete'],
+            'thumbnail': 'https://www.bpb.de/cache/images/9/518789_teaser_16x9_1240.jpeg?56D0D',
+            'title': 'md5:3e956f264bb501f6383f10495a401da4',
+            'uploader': 'Bundeszentrale für politische Bildung',
+        },
+    }, {
+        'url': 'https://www.bpb.de/mediathek/podcasts/apuz-podcast/539727/apuz-20-china/',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.bpb.de/mediathek/audio/315813/folge-1-eine-einfuehrung/',
+        'info_dict': {
+            'id': '315813',
+            'ext': 'mp3',
+            'creator': 'Axel Schröder',
+            'description': 'md5:eda9d1af34e5912efef5baf54fba4427',
+            'release_date': '20200921',
+            'series': 'Auf Endlagersuche. Der deutsche Weg zu einem sicheren Atommülllager',
+            'tags': ['Atomenergie', 'Endlager', 'hoch-radioaktiver Abfall', 'Endlagersuche', 'Atommüll', 'Atomendlager', 'Gorleben', 'Deutschland'],
+            'thumbnail': 'https://www.bpb.de/cache/images/3/315813_teaser_16x9_1240.png?92A94',
+            'title': 'Folge 1: Eine Einführung',
+            'uploader': 'Bundeszentrale für politische Bildung',
+        },
+    }, {
+        'url': 'https://www.bpb.de/517806/die-weltanschauung-der-neuen-rechten/',
+        'info_dict': {
+            'id': '517806',
+            'ext': 'mp3',
+            'creator': 'Bundeszentrale für politische Bildung',
+            'description': 'md5:594689600e919912aade0b2871cc3fed',
+            'release_date': '20230127',
+            'series': 'Vorträge des Fachtags "Modernisierer. Grenzgänger. Anstifter. Sechs Jahrzehnte \'Neue Rechte\'"',
+            'tags': ['Rechtsextremismus', 'Konservatismus', 'Konservativismus', 'neue Rechte', 'Rechtspopulismus', 'Schnellroda', 'Deutschland'],
+            'thumbnail': 'https://www.bpb.de/cache/images/6/517806_teaser_16x9_1240.png?7A7A0',
+            'title': 'Die Weltanschauung der "Neuen Rechten"',
+            'uploader': 'Bundeszentrale für politische Bildung',
+        },
+    }, {
+        'url': 'https://www.bpb.de/mediathek/reihen/zahlen-und-fakten-soziale-situation-filme/520153/zahlen-und-fakten-die-soziale-situation-in-deutschland-migration/',
+        'only_matching': True,
+    }]
+
+    _TITLE_RE = re.compile('(?P<title>[^<]*)<[^>]+>(?P<series>[^<]*)')
+
+    def _parse_vue_attributes(self, name, string, video_id):
+        attributes = extract_attributes(self._search_regex(rf'(<{name}(?:"[^"]*?"|[^>])*>)', string, name))
+
+        for key, value in attributes.items():
+            if key.startswith(':'):
+                attributes[key] = self._parse_json(value, video_id, transform_source=js_to_json, fatal=False)
+
+        return attributes
+
+    @staticmethod
+    def _process_source(source):
+        url = url_or_none(source['src'])
+        if not url:
+            return None
+
+        source_type = source.get('type', '')
+        extension = mimetype2ext(source_type)
+        is_video = source_type.startswith('video')
+        note = url.rpartition('.')[0].rpartition('_')[2] if is_video else None
+
+        return {
+            'url': url,
+            'ext': extension,
+            'vcodec': None if is_video else 'none',
+            'quality': 10 if note == 'high' else 0,
+            'format_note': note,
+            'format_id': join_nonempty(extension, note),
         }
-    }
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        title = self._html_search_regex(
-            r'<h2 class="white">(.*?)</h2>', webpage, 'title')
-        video_info_dicts = re.findall(
-            r"({\s*src\s*:\s*'https?://film\.bpb\.de/[^}]+})", webpage)
-
-        formats = []
-        for video_info in video_info_dicts:
-            video_info = self._parse_json(
-                video_info, video_id, transform_source=js_to_json, fatal=False)
-            if not video_info:
-                continue
-            video_url = video_info.get('src')
-            if not video_url:
-                continue
-            quality = 'high' if '_high' in video_url else 'low'
-            formats.append({
-                'url': video_url,
-                'quality': 10 if quality == 'high' else 0,
-                'format_note': quality,
-                'format_id': '%s-%s' % (quality, determine_ext(video_url)),
-            })
+        title_result = traverse_obj(webpage, ({html_get_element(cls='opening-header__title')}, {self._TITLE_RE.match}))
+        json_lds = list(self._yield_json_ld(webpage, video_id, fatal=False))
 
         return {
             'id': video_id,
-            'formats': formats,
-            'title': title,
-            'description': self._og_search_description(webpage),
+            'title': traverse_obj(title_result, ('title', {str.strip})) or None,
+            # This metadata could be interpreted otherwise, but it fits "series" the most
+            'series': traverse_obj(title_result, ('series', {str.strip})) or None,
+            'description': join_nonempty(*traverse_obj(webpage, [(
+                {html_get_element(cls='opening-intro')},
+                [{html_get_element(tag='bpb-accordion-item')}, {html_get_element(cls='text-content')}],
+            ), {clean_html}]), delim='\n\n') or None,
+            'creator': self._html_search_meta('author', webpage),
+            'uploader': self._html_search_meta('publisher', webpage),
+            'release_date': unified_strdate(self._html_search_meta('date', webpage)),
+            'tags': traverse_obj(json_lds, (..., 'keywords', {lambda x: x.split(',')}, ...)),
+            **traverse_obj(self._parse_vue_attributes('bpb-player', webpage, video_id), {
+                'formats': (':sources', ..., {self._process_source}),
+                'thumbnail': ('poster', {lambda x: urljoin(url, x)}),
+            }),
         }
diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py
index f5552ce802..180bec245a 100644
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@@ -2847,6 +2847,7 @@ def mimetype2ext(mt, default=NO_DEFAULT):
         'quicktime': 'mov',
         'webm': 'webm',
         'vp9': 'vp9',
+        'video/ogg': 'ogv',
         'x-flv': 'flv',
         'x-m4v': 'm4v',
         'x-matroska': 'mkv',

From 069cbece9dba6384f1cc5fcfc7ce562a31af42fc Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@bashonly.com>
Date: Sat, 16 Sep 2023 13:28:14 -0500
Subject: [PATCH 137/218] [ie/tiktok] Fix webpage extraction

Closes #8089
Authored by: bashonly
---
 yt_dlp/extractor/tiktok.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py
index f14c4f9d6a..f26972cff2 100644
--- a/yt_dlp/extractor/tiktok.py
+++ b/yt_dlp/extractor/tiktok.py
@@ -15,7 +15,6 @@
     UserNotLive,
     determine_ext,
     format_field,
-    get_element_by_id,
     get_first,
     int_or_none,
     join_nonempty,
@@ -50,8 +49,9 @@ def _create_url(user_id, video_id):
         return f'https://www.tiktok.com/@{user_id or "_"}/video/{video_id}'
 
     def _get_sigi_state(self, webpage, display_id):
-        return self._parse_json(get_element_by_id(
-            'SIGI_STATE|sigi-persisted-data', webpage, escape_value=False), display_id)
+        return self._search_json(
+            r'<script[^>]+\bid="(?:SIGI_STATE|sigi-persisted-data)"[^>]*>', webpage,
+            'sigi state', display_id, end_pattern=r'</script>')
 
     def _call_api_impl(self, ep, query, manifest_app_version, video_id, fatal=True,
                        note='Downloading API JSON', errnote='Unable to download API page'):

From cebbd33b1c678149fc8f0e254db6fc0da317ea80 Mon Sep 17 00:00:00 2001
From: c-basalt <117849907+c-basalt@users.noreply.github.com>
Date: Sat, 16 Sep 2023 16:43:12 -0400
Subject: [PATCH 138/218] [ie/twitcasting] Improve `_VALID_URL` (#8120)

Closes #7597
Authored by: c-basalt
---
 yt_dlp/extractor/twitcasting.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/yt_dlp/extractor/twitcasting.py b/yt_dlp/extractor/twitcasting.py
index dff353a4f9..3890d5d8fb 100644
--- a/yt_dlp/extractor/twitcasting.py
+++ b/yt_dlp/extractor/twitcasting.py
@@ -22,7 +22,7 @@
 
 
 class TwitCastingIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<uploader_id>[^/]+)/(?:movie|twplayer)/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:[^/?#]+\.)?twitcasting\.tv/(?P<uploader_id>[^/?#]+)/(?:movie|twplayer)/(?P<id>\d+)'
     _M3U8_HEADERS = {
         'Origin': 'https://twitcasting.tv',
         'Referer': 'https://twitcasting.tv/',
@@ -231,7 +231,7 @@ def find_dmu(x):
 
 
 class TwitCastingLiveIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<id>[^/]+)/?(?:[#?]|$)'
+    _VALID_URL = r'https?://(?:[^/?#]+\.)?twitcasting\.tv/(?P<id>[^/?#]+)/?(?:[#?]|$)'
     _TESTS = [{
         'url': 'https://twitcasting.tv/ivetesangalo',
         'only_matching': True,
@@ -265,8 +265,15 @@ def _real_extract(self, url):
 
 
 class TwitCastingUserIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<id>[^/]+)/show/?(?:[#?]|$)'
+    _VALID_URL = r'https?://(?:[^/?#]+\.)?twitcasting\.tv/(?P<id>[^/?#]+)/(:?show|archive)/?(?:[#?]|$)'
     _TESTS = [{
+        'url': 'https://twitcasting.tv/natsuiromatsuri/archive/',
+        'info_dict': {
+            'id': 'natsuiromatsuri',
+            'title': 'natsuiromatsuri - Live History',
+        },
+        'playlist_mincount': 235,
+    }, {
         'url': 'https://twitcasting.tv/noriyukicas/show',
         'only_matching': True,
     }]

From 9bf14be775289bd88cc1f5c89fd761ae51879484 Mon Sep 17 00:00:00 2001
From: makeworld <25111343+makew0rld@users.noreply.github.com>
Date: Sat, 16 Sep 2023 16:49:43 -0400
Subject: [PATCH 139/218] [ie/cbc] Ignore any 426 from API (#7689)

Closes #7477
Authored by: makew0rld
---
 yt_dlp/extractor/cbc.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py
index b3c5471f7b..2920b9027d 100644
--- a/yt_dlp/extractor/cbc.py
+++ b/yt_dlp/extractor/cbc.py
@@ -339,12 +339,12 @@ def _new_claims_token(self, email, password):
         data = json.dumps({'jwt': sig}).encode()
         headers = {'content-type': 'application/json', 'ott-device-type': 'web'}
         resp = self._download_json('https://services.radio-canada.ca/ott/cbc-api/v2/token',
-                                   None, data=data, headers=headers)
+                                   None, data=data, headers=headers, expected_status=426)
         cbc_access_token = resp['accessToken']
 
         headers = {'content-type': 'application/json', 'ott-device-type': 'web', 'ott-access-token': cbc_access_token}
         resp = self._download_json('https://services.radio-canada.ca/ott/cbc-api/v2/profile',
-                                   None, headers=headers)
+                                   None, headers=headers, expected_status=426)
         return resp['claimsToken']
 
     def _get_claims_token_expiry(self):

From 5336bf57a7061e0955a37f0542fc8ebf50d55b17 Mon Sep 17 00:00:00 2001
From: c-basalt <117849907+c-basalt@users.noreply.github.com>
Date: Sat, 16 Sep 2023 16:53:57 -0400
Subject: [PATCH 140/218] [ie/bilibili] Extract `format_id` (#7555)

Authored by: c-basalt
---
 yt_dlp/extractor/bilibili.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py
index cb7ab2a174..290340078c 100644
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@@ -3,6 +3,7 @@
 import hashlib
 import itertools
 import math
+import re
 import time
 import urllib.parse
 
@@ -38,6 +39,8 @@
 
 
 class BilibiliBaseIE(InfoExtractor):
+    _FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
+
     def extract_formats(self, play_info):
         format_names = {
             r['quality']: traverse_obj(r, 'new_description', 'display_desc')
@@ -54,7 +57,8 @@ def extract_formats(self, play_info):
             'acodec': audio.get('codecs'),
             'vcodec': 'none',
             'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
-            'filesize': int_or_none(audio.get('size'))
+            'filesize': int_or_none(audio.get('size')),
+            'format_id': str_or_none(audio.get('id')),
         } for audio in audios]
 
         formats.extend({
@@ -68,6 +72,9 @@ def extract_formats(self, play_info):
             'tbr': float_or_none(video.get('bandwidth'), scale=1000),
             'filesize': int_or_none(video.get('size')),
             'quality': int_or_none(video.get('id')),
+            'format_id': traverse_obj(
+                video, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}, 1),
+                ('id', {str_or_none}), get_all=False),
             'format': format_names.get(video.get('id')),
         } for video in traverse_obj(play_info, ('dash', 'video', ...)))
 

From 9d376c4daeaf1279a011582f3f0e6ae42af520dd Mon Sep 17 00:00:00 2001
From: Aniruddh Joshi <aniruddh@ebincoweb.com>
Date: Sun, 17 Sep 2023 02:28:21 +0530
Subject: [PATCH 141/218] [ie/AmazonMiniTV] Fix extractor (#8103)

Closes #7817
Authored by: Aniruddh-J
---
 yt_dlp/extractor/amazonminitv.py | 63 +++++---------------------------
 1 file changed, 9 insertions(+), 54 deletions(-)

diff --git a/yt_dlp/extractor/amazonminitv.py b/yt_dlp/extractor/amazonminitv.py
index b57d985d10..ad23b16bd6 100644
--- a/yt_dlp/extractor/amazonminitv.py
+++ b/yt_dlp/extractor/amazonminitv.py
@@ -37,7 +37,7 @@ def _call_api(self, asin, data=None, note=None):
         return resp['data'][data['operationName']]
 
 
-class AmazonMiniTVIE(AmazonMiniTVBaseIE):
+class AmazonMiniTVIE(InfoExtractor):
     _VALID_URL = r'(?:https?://(?:www\.)?amazon\.in/minitv/tp/|amazonminitv:(?:amzn1\.dv\.gti\.)?)(?P<id>[a-f0-9-]+)'
     _TESTS = [{
         'url': 'https://www.amazon.in/minitv/tp/75fe3a75-b8fe-4499-8100-5c9424344840?referrer=https%3A%2F%2Fwww.amazon.in%2Fminitv',
@@ -86,56 +86,14 @@ class AmazonMiniTVIE(AmazonMiniTVBaseIE):
         'only_matching': True,
     }]
 
-    _GRAPHQL_QUERY_CONTENT = '''
-query content($sessionIdToken: String!, $deviceLocale: String, $contentId: ID!, $contentType: ContentType!, $clientId: String) {
-  content(
-    applicationContextInput: {deviceLocale: $deviceLocale, sessionIdToken: $sessionIdToken, clientId: $clientId}
-    contentId: $contentId
-    contentType: $contentType
-  ) {
-    contentId
-    name
-    ... on Episode {
-      contentId
-      vodType
-      name
-      images
-      description {
-        synopsis
-        contentLengthInSeconds
-      }
-      publicReleaseDateUTC
-      audioTracks
-      seasonId
-      seriesId
-      seriesName
-      seasonNumber
-      episodeNumber
-      timecode {
-        endCreditsTime
-      }
-    }
-    ... on MovieContent {
-      contentId
-      vodType
-      name
-      description {
-        synopsis
-        contentLengthInSeconds
-      }
-      images
-      publicReleaseDateUTC
-      audioTracks
-    }
-  }
-}'''
-
     def _real_extract(self, url):
-        asin = f'amzn1.dv.gti.{self._match_id(url)}'
-        prs = self._call_api(asin, note='Downloading playback info')
+        video_uuid = self._match_id(url)
+        asin = f'amzn1.dv.gti.{video_uuid}'
+        webpage = self._download_webpage(f'https://www.amazon.in/minitv/tp/{video_uuid}', asin)
+        data = self._search_nextjs_data(webpage, asin)['props']['pageProps']['ssrProps']
 
         formats, subtitles = [], {}
-        for type_, asset in prs['playbackAssets'].items():
+        for type_, asset in traverse_obj(data, ('playbackData', 'playbackAssets', {dict.items}, ...)):
             if not traverse_obj(asset, 'manifestUrl'):
                 continue
             if type_ == 'hls':
@@ -152,12 +110,7 @@ def _real_extract(self, url):
             else:
                 self.report_warning(f'Unknown asset type: {type_}')
 
-        title_info = self._call_api(
-            asin, note='Downloading title info', data={
-                'operationName': 'content',
-                'variables': {'contentId': asin},
-                'query': self._GRAPHQL_QUERY_CONTENT,
-            })
+        title_info = traverse_obj(data, ('contentData', {dict})) or {}
         credits_time = try_get(title_info, lambda x: x['timecode']['endCreditsTime'] / 1000)
         is_episode = title_info.get('vodType') == 'EPISODE'
 
@@ -192,6 +145,7 @@ class AmazonMiniTVSeasonIE(AmazonMiniTVBaseIE):
     IE_NAME = 'amazonminitv:season'
     _VALID_URL = r'amazonminitv:season:(?:amzn1\.dv\.gti\.)?(?P<id>[a-f0-9-]+)'
     IE_DESC = 'Amazon MiniTV Season, "minitv:season:" prefix'
+    _WORKING = False
     _TESTS = [{
         'url': 'amazonminitv:season:amzn1.dv.gti.0aa996eb-6a1b-4886-a342-387fbd2f1db0',
         'playlist_mincount': 6,
@@ -251,6 +205,7 @@ class AmazonMiniTVSeriesIE(AmazonMiniTVBaseIE):
     IE_NAME = 'amazonminitv:series'
     _VALID_URL = r'amazonminitv:series:(?:amzn1\.dv\.gti\.)?(?P<id>[a-f0-9-]+)'
     IE_DESC = 'Amazon MiniTV Series, "minitv:series:" prefix'
+    _WORKING = False
     _TESTS = [{
         'url': 'amazonminitv:series:amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0',
         'playlist_mincount': 3,

From a83da3717d30697102e76f63a6f29d77f9373c2a Mon Sep 17 00:00:00 2001
From: ApoorvShah111 <79164543+ApoorvShah111@users.noreply.github.com>
Date: Sun, 17 Sep 2023 02:31:26 +0530
Subject: [PATCH 142/218] [ie/nitter] Fix title extraction fallback (#8102)

Closes #7575
Authored by: ApoorvShah111
---
 yt_dlp/extractor/nitter.py | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/nitter.py b/yt_dlp/extractor/nitter.py
index 5d1ca1f5d0..35d1311dcd 100644
--- a/yt_dlp/extractor/nitter.py
+++ b/yt_dlp/extractor/nitter.py
@@ -265,6 +265,26 @@ class NitterIE(InfoExtractor):
                 'repost_count': int,
                 'comment_count': int,
             }
+        }, {  # no OpenGraph title
+            'url': f'https://{current_instance}/LocalBateman/status/1678455464038735895#m',
+            'info_dict': {
+                'id': '1678455464038735895',
+                'ext': 'mp4',
+                'title': 'Your Typical Local Man - Local man, what did Romanians ever do to you?',
+                'description': 'Local man, what did Romanians ever do to you?',
+                'thumbnail': r're:^https?://.*\.jpg$',
+                'uploader': 'Your Typical Local Man',
+                'uploader_id': 'LocalBateman',
+                'uploader_url': f'https://{current_instance}/LocalBateman',
+                'upload_date': '20230710',
+                'timestamp': 1689009900,
+                'view_count': int,
+                'like_count': int,
+                'repost_count': int,
+                'comment_count': int,
+            },
+            'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
+            'params': {'skip_download': 'm3u8'},
         }
     ]
 
@@ -292,7 +312,7 @@ def _real_extract(self, url):
                 'ext': ext
             }]
 
-        title = description = self._og_search_description(full_webpage) or self._html_search_regex(
+        title = description = self._og_search_description(full_webpage, default=None) or self._html_search_regex(
             r'<div class="tweet-content[^>]+>([^<]+)</div>', webpage, 'title', fatal=False)
 
         uploader_id = self._html_search_regex(

From ecef42c3adbcb6a84405139047923c4967316f28 Mon Sep 17 00:00:00 2001
From: Mozi <29089388+pzhlkj6612@users.noreply.github.com>
Date: Sun, 17 Sep 2023 05:04:10 +0800
Subject: [PATCH 143/218] [ie/zaiko] Improve thumbnail extraction (#8054)

Authored by: pzhlkj6612
---
 yt_dlp/extractor/zaiko.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/extractor/zaiko.py b/yt_dlp/extractor/zaiko.py
index 0ccacbb6aa..2b6221da21 100644
--- a/yt_dlp/extractor/zaiko.py
+++ b/yt_dlp/extractor/zaiko.py
@@ -9,6 +9,7 @@
     traverse_obj,
     try_call,
     unescapeHTML,
+    url_basename,
     url_or_none,
 )
 
@@ -45,12 +46,14 @@ class ZaikoIE(ZaikoBaseIE):
             'uploader_id': '454',
             'uploader': 'ZAIKO ZERO',
             'release_timestamp': 1583809200,
-            'thumbnail': r're:https://[a-z0-9]+.cloudfront.net/[a-z0-9_]+/[a-z0-9_]+',
+            'thumbnail': r're:^https://[\w.-]+/\w+/\w+',
+            'thumbnails': 'maxcount:2',
             'release_date': '20200310',
             'categories': ['Tech House'],
             'live_status': 'was_live',
         },
         'params': {'skip_download': 'm3u8'},
+        'skip': 'Your account does not have tickets to this event',
     }]
 
     def _real_extract(self, url):
@@ -83,6 +86,12 @@ def _real_extract(self, url):
         if not formats:
             self.raise_no_formats(msg, expected=expected)
 
+        thumbnail_urls = [
+            traverse_obj(player_meta, ('initial_event_info', 'poster_url')),
+            self._og_search_thumbnail(self._download_webpage(
+                f'https://zaiko.io/event/{video_id}', video_id, 'Downloading event page', fatal=False) or ''),
+        ]
+
         return {
             'id': video_id,
             'formats': formats,
@@ -96,8 +105,8 @@ def _real_extract(self, url):
             }),
             **traverse_obj(player_meta, ('initial_event_info', {
                 'alt_title': ('title', {str}),
-                'thumbnail': ('poster_url', {url_or_none}),
             })),
+            'thumbnails': [{'url': url, 'id': url_basename(url)} for url in thumbnail_urls if url_or_none(url)]
         }
 
 

From 0ce1f48bf1cb78d40d734ce73ee1c90eccf92274 Mon Sep 17 00:00:00 2001
From: 04-pasha-04 <89145825+04-pasha-04@users.noreply.github.com>
Date: Sat, 16 Sep 2023 23:06:00 +0200
Subject: [PATCH 144/218] [ie/funker530] Fix extraction (#8040)

Authored by: 04-pasha-04
---
 yt_dlp/extractor/funker530.py | 1 +
 yt_dlp/extractor/rumble.py    | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/funker530.py b/yt_dlp/extractor/funker530.py
index ba5ab7d4ee..62fd7f6dda 100644
--- a/yt_dlp/extractor/funker530.py
+++ b/yt_dlp/extractor/funker530.py
@@ -60,6 +60,7 @@ class Funker530IE(InfoExtractor):
     def _real_extract(self, url):
         display_id = self._match_id(url)
         webpage = self._download_webpage(url, display_id)
+        info = {}
         rumble_url = list(RumbleEmbedIE._extract_embed_urls(url, webpage))
         if rumble_url:
             info = {'url': rumble_url[0], 'ie_key': RumbleEmbedIE.ie_key()}
diff --git a/yt_dlp/extractor/rumble.py b/yt_dlp/extractor/rumble.py
index f8bf4a1825..96c192581d 100644
--- a/yt_dlp/extractor/rumble.py
+++ b/yt_dlp/extractor/rumble.py
@@ -144,7 +144,7 @@ def _extract_embed_urls(cls, url, webpage):
         if embeds:
             return embeds
         return [f'https://rumble.com/embed/{mobj.group("id")}' for mobj in re.finditer(
-            r'<script>[^<]*\bRumble\(\s*"play"\s*,\s*{\s*[\'"]?video[\'"]?\s*:\s*[\'"](?P<id>[0-9a-z]+)[\'"]', webpage)]
+            r'<script>[^<]*\bRumble\(\s*"play"\s*,\s*{[^}]*[\'"]?video[\'"]?\s*:\s*[\'"](?P<id>[0-9a-z]+)[\'"]', webpage)]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)

From 23d829a3420450bcfb0788e6fb2cf4f6acdbe596 Mon Sep 17 00:00:00 2001
From: Tristan Lee <lee.tristan.evans@gmail.com>
Date: Sat, 16 Sep 2023 16:08:15 -0500
Subject: [PATCH 145/218] [ie/Rumble] Fix embed extraction (#8035)

Authored by: trislee
---
 yt_dlp/extractor/rumble.py | 59 ++++++++++++++++++++++----------------
 1 file changed, 34 insertions(+), 25 deletions(-)

diff --git a/yt_dlp/extractor/rumble.py b/yt_dlp/extractor/rumble.py
index 96c192581d..85567d9a22 100644
--- a/yt_dlp/extractor/rumble.py
+++ b/yt_dlp/extractor/rumble.py
@@ -33,7 +33,7 @@ class RumbleEmbedIE(InfoExtractor):
             'upload_date': '20191020',
             'channel_url': 'https://rumble.com/c/WMAR',
             'channel': 'WMAR',
-            'thumbnail': 'https://sp.rmbl.ws/s8/1/5/M/z/1/5Mz1a.OvCc-small-WMAR-2-News-Latest-Headline.jpg',
+            'thumbnail': 'https://sp.rmbl.ws/s8/1/5/M/z/1/5Mz1a.qR4e-small-WMAR-2-News-Latest-Headline.jpg',
             'duration': 234,
             'uploader': 'WMAR',
             'live_status': 'not_live',
@@ -84,7 +84,7 @@ class RumbleEmbedIE(InfoExtractor):
         'info_dict': {
             'id': 'v1essrt',
             'ext': 'mp4',
-            'title': 'startswith:lofi hip hop radio - beats to relax/study',
+            'title': 'startswith:lofi hip hop radio 📚 - beats to relax/study to',
             'timestamp': 1661519399,
             'upload_date': '20220826',
             'channel_url': 'https://rumble.com/c/LofiGirl',
@@ -99,7 +99,7 @@ class RumbleEmbedIE(InfoExtractor):
         'url': 'https://rumble.com/embed/v1amumr',
         'info_dict': {
             'id': 'v1amumr',
-            'ext': 'webm',
+            'ext': 'mp4',
             'fps': 60,
             'title': 'Turning Point USA 2022 Student Action Summit DAY 1  - Rumble Exclusive Live',
             'timestamp': 1658518457,
@@ -129,7 +129,7 @@ class RumbleEmbedIE(InfoExtractor):
                 'duration': 92,
                 'title': '911 Audio From The Man Who Wanted To Kill Supreme Court Justice Kavanaugh',
                 'channel_url': 'https://rumble.com/c/RichSementa',
-                'thumbnail': 'https://sp.rmbl.ws/s8/1/P/j/f/A/PjfAe.OvCc-small-911-Audio-From-The-Man-Who-.jpg',
+                'thumbnail': 'https://sp.rmbl.ws/s8/1/P/j/f/A/PjfAe.qR4e-small-911-Audio-From-The-Man-Who-.jpg',
                 'timestamp': 1654892716,
                 'uploader': 'Mr Producer Media',
                 'upload_date': '20220610',
@@ -236,7 +236,9 @@ def _real_extract(self, url):
 
 class RumbleIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?rumble\.com/(?P<id>v(?!ideos)[\w.-]+)[^/]*$'
-    _EMBED_REGEX = [r'<a class=video-item--a href=(?P<url>/v[\w.-]+\.html)>']
+    _EMBED_REGEX = [
+        r'<a class=video-item--a href=(?P<url>/v[\w.-]+\.html)>',
+        r'<a[^>]+class="videostream__link link"[^>]+href=(?P<url>/v[\w.-]+\.html)[^>]*>']
     _TESTS = [{
         'add_ie': ['RumbleEmbed'],
         'url': 'https://rumble.com/vdmum1-moose-the-dog-helps-girls-dig-a-snow-fort.html',
@@ -254,6 +256,7 @@ class RumbleIE(InfoExtractor):
             'thumbnail': r're:https://.+\.jpg',
             'duration': 103,
             'like_count': int,
+            'dislike_count': int,
             'view_count': int,
             'live_status': 'not_live',
         }
@@ -278,6 +281,9 @@ class RumbleIE(InfoExtractor):
             'channel_url': 'https://rumble.com/c/Redacted',
             'live_status': 'not_live',
             'thumbnail': 'https://sp.rmbl.ws/s8/1/d/x/2/O/dx2Oi.qR4e-small-The-U.S.-CANNOT-hide-this-i.jpg',
+            'like_count': int,
+            'dislike_count': int,
+            'view_count': int,
         },
     }, {
         'url': 'https://rumble.com/v2e7fju-the-covid-twitter-files-drop-protecting-fauci-while-censoring-the-truth-wma.html',
@@ -296,12 +302,15 @@ class RumbleIE(InfoExtractor):
             'channel_url': 'https://rumble.com/c/KimIversen',
             'channel': 'Kim Iversen',
             'thumbnail': 'https://sp.rmbl.ws/s8/1/6/b/w/O/6bwOi.qR4e-small-The-Covid-Twitter-Files-Dro.jpg',
+            'like_count': int,
+            'dislike_count': int,
+            'view_count': int,
         },
     }]
 
     _WEBPAGE_TESTS = [{
         'url': 'https://rumble.com/videos?page=2',
-        'playlist_count': 25,
+        'playlist_mincount': 24,
         'info_dict': {
             'id': 'videos?page=2',
             'title': 'All videos',
@@ -309,17 +318,16 @@ class RumbleIE(InfoExtractor):
             'age_limit': 0,
         },
     }, {
-        'url': 'https://rumble.com/live-videos',
-        'playlist_mincount': 19,
+        'url': 'https://rumble.com/browse/live',
+        'playlist_mincount': 25,
         'info_dict': {
-            'id': 'live-videos',
-            'title': 'Live Videos',
-            'description': 'Live videos on Rumble.com',
+            'id': 'live',
+            'title': 'Browse',
             'age_limit': 0,
         },
     }, {
         'url': 'https://rumble.com/search/video?q=rumble&sort=views',
-        'playlist_count': 24,
+        'playlist_mincount': 24,
         'info_dict': {
             'id': 'video?q=rumble&sort=views',
             'title': 'Search results for: rumble',
@@ -334,19 +342,20 @@ def _real_extract(self, url):
         if not url_info:
             raise UnsupportedError(url)
 
-        release_ts_str = self._search_regex(
-            r'(?:Livestream begins|Streamed on):\s+<time datetime="([^"]+)',
-            webpage, 'release date', fatal=False, default=None)
-        view_count_str = self._search_regex(r'<span class="media-heading-info">([\d,]+) Views',
-                                            webpage, 'view count', fatal=False, default=None)
-
-        return self.url_result(
-            url_info['url'], ie_key=url_info['ie_key'], url_transparent=True,
-            view_count=parse_count(view_count_str),
-            release_timestamp=parse_iso8601(release_ts_str),
-            like_count=parse_count(get_element_by_class('rumbles-count', webpage)),
-            description=clean_html(get_element_by_class('media-description', webpage)),
-        )
+        return {
+            '_type': 'url_transparent',
+            'ie_key': url_info['ie_key'],
+            'url': url_info['url'],
+            'release_timestamp': parse_iso8601(self._search_regex(
+                r'(?:Livestream begins|Streamed on):\s+<time datetime="([^"]+)', webpage, 'release date', default=None)),
+            'view_count': int_or_none(self._search_regex(
+                r'"userInteractionCount"\s*:\s*(\d+)', webpage, 'view count', default=None)),
+            'like_count': parse_count(self._search_regex(
+                r'<span data-js="rumbles_up_votes">\s*([\d,.KM]+)', webpage, 'like count', default=None)),
+            'dislike_count': parse_count(self._search_regex(
+                r'<span data-js="rumbles_down_votes">\s*([\d,.KM]+)', webpage, 'dislike count', default=None)),
+            'description': clean_html(get_element_by_class('media-description', webpage))
+        }
 
 
 class RumbleChannelIE(InfoExtractor):

From b4c1c408c63724339eb12b16c91b253a7ee62cfa Mon Sep 17 00:00:00 2001
From: barsnick <barsnick@users.noreply.github.com>
Date: Sat, 16 Sep 2023 23:11:05 +0200
Subject: [PATCH 146/218] [ie/Bild.de] Extract HLS formats (#8032)

Closes #7951
Authored by: barsnick
---
 yt_dlp/extractor/bild.py | 32 +++++++++++++++++++++++++++++---
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/yt_dlp/extractor/bild.py b/yt_dlp/extractor/bild.py
index f3dea33c46..eb289329d8 100644
--- a/yt_dlp/extractor/bild.py
+++ b/yt_dlp/extractor/bild.py
@@ -1,6 +1,7 @@
 from .common import InfoExtractor
 from ..utils import (
     int_or_none,
+    traverse_obj,
     unescapeHTML,
 )
 
@@ -8,7 +9,8 @@
 class BildIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?bild\.de/(?:[^/]+/)+(?P<display_id>[^/]+)-(?P<id>\d+)(?:,auto=true)?\.bild\.html'
     IE_DESC = 'Bild.de'
-    _TEST = {
+    _TESTS = [{
+        'note': 'static MP4 only',
         'url': 'http://www.bild.de/video/clip/apple-ipad-air/das-koennen-die-neuen-ipads-38184146.bild.html',
         'md5': 'dd495cbd99f2413502a1713a1156ac8a',
         'info_dict': {
@@ -19,7 +21,19 @@ class BildIE(InfoExtractor):
             'thumbnail': r're:^https?://.*\.jpg$',
             'duration': 196,
         }
-    }
+    }, {
+        'note': 'static MP4 and HLS',
+        'url': 'https://www.bild.de/video/clip/news-ausland/deftiger-abgang-vom-10m-turm-bademeister-sorgt-fuer-skandal-85158620.bild.html',
+        'md5': 'fb0ed4f09c495d4ba7ce2eee0bb90de1',
+        'info_dict': {
+            'id': '85158620',
+            'ext': 'mp4',
+            'title': 'Der Sprungturm-Skandal',
+            'description': 'md5:709b543c24dc31bbbffee73bccda34ad',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'duration': 69,
+        }
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -27,11 +41,23 @@ def _real_extract(self, url):
         video_data = self._download_json(
             url.split('.bild.html')[0] + ',view=json.bild.html', video_id)
 
+        formats = []
+        for src in traverse_obj(video_data, ('clipList', 0, 'srces', lambda _, v: v['src'])):
+            src_type = src.get('type')
+            if src_type == 'application/x-mpegURL':
+                formats.extend(
+                    self._extract_m3u8_formats(
+                        src['src'], video_id, 'mp4', m3u8_id='hls', fatal=False))
+            elif src_type == 'video/mp4':
+                formats.append({'url': src['src'], 'format_id': 'http-mp4'})
+            else:
+                self.report_warning(f'Skipping unsupported format type: "{src_type}"')
+
         return {
             'id': video_id,
             'title': unescapeHTML(video_data['title']).strip(),
             'description': unescapeHTML(video_data.get('description')),
-            'url': video_data['clipList'][0]['srces'][0]['src'],
+            'formats': formats,
             'thumbnail': video_data.get('poster'),
             'duration': int_or_none(video_data.get('durationSec')),
         }

From 5be7e978867b5f66ad6786c674d79d40e950ae16 Mon Sep 17 00:00:00 2001
From: c-basalt <117849907+c-basalt@users.noreply.github.com>
Date: Sat, 16 Sep 2023 17:13:04 -0400
Subject: [PATCH 147/218] [ie/sohu] Fix extractor (#7628)

Closes #1667, Closes #7463
Authored by: c-basalt, bashonly
---
 yt_dlp/extractor/_extractors.py |   5 +-
 yt_dlp/extractor/sohu.py        | 107 ++++++++++++++++++++++++++++++--
 2 files changed, 105 insertions(+), 7 deletions(-)

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index b836fe8a3d..4fed6d66a2 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1795,7 +1795,10 @@
 from .slutload import SlutloadIE
 from .smotrim import SmotrimIE
 from .snotr import SnotrIE
-from .sohu import SohuIE
+from .sohu import (
+    SohuIE,
+    SohuVIE,
+)
 from .sonyliv import (
     SonyLIVIE,
     SonyLIVSeriesIE,
diff --git a/yt_dlp/extractor/sohu.py b/yt_dlp/extractor/sohu.py
index a8f1e4623e..c0ff4f9aa8 100644
--- a/yt_dlp/extractor/sohu.py
+++ b/yt_dlp/extractor/sohu.py
@@ -1,3 +1,4 @@
+import base64
 import re
 
 from .common import InfoExtractor
@@ -8,7 +9,12 @@
 from ..utils import (
     ExtractorError,
     int_or_none,
+    float_or_none,
+    url_or_none,
+    unified_timestamp,
     try_get,
+    urljoin,
+    traverse_obj,
 )
 
 
@@ -31,13 +37,20 @@ class SohuIE(InfoExtractor):
             'id': '409385080',
             'ext': 'mp4',
             'title': '《2015湖南卫视羊年元宵晚会》唐嫣《花好月圆》',
-        }
+        },
+        'skip': 'no longer available',
     }, {
         'url': 'http://my.tv.sohu.com/us/232799889/78693464.shtml',
         'info_dict': {
             'id': '78693464',
             'ext': 'mp4',
             'title': '【爱范品】第31期：MWC见不到的奇葩手机',
+            'uploader': '爱范儿视频',
+            'duration': 213,
+            'timestamp': 1425519600,
+            'upload_date': '20150305',
+            'thumbnail': 'http://e3f49eaa46b57.cdn.sohucs.com//group1/M10/83/FA/MTAuMTAuODguODA=/6_14cbccdde5eg104SysCutcloud_78693464_7_0b.jpg',
+            'tags': ['爱范儿', '爱范品', 'MWC', '手机'],
         }
     }, {
         'note': 'Multipart video',
@@ -45,6 +58,12 @@ class SohuIE(InfoExtractor):
         'info_dict': {
             'id': '78910339',
             'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
+            'uploader': '小苍cany',
+            'duration': 744.0,
+            'timestamp': 1426269360,
+            'upload_date': '20150313',
+            'thumbnail': 'http://e3f49eaa46b57.cdn.sohucs.com//group1/M11/89/57/MTAuMTAuODguODA=/6_14cea022a1dg102SysCutcloud_78910339_8_0b.jpg',
+            'tags': ['小苍MM', '英雄联盟', '实战秘籍'],
         },
         'playlist': [{
             'info_dict': {
@@ -75,6 +94,11 @@ class SohuIE(InfoExtractor):
             'id': '78932792',
             'ext': 'mp4',
             'title': 'youtube-dl testing video',
+            'duration': 360,
+            'timestamp': 1426348620,
+            'upload_date': '20150314',
+            'thumbnail': 'http://e3f49eaa46b57.cdn.sohucs.com//group1/M02/8A/00/MTAuMTAuODguNzk=/6_14cee1be192g102SysCutcloud_78932792_7_7b.jpg',
+            'tags': [],
         },
         'params': {
             'skip_download': True
@@ -100,7 +124,7 @@ def _fetch_data(vid_id, mytv=False):
 
         webpage = self._download_webpage(url, video_id)
 
-        title = re.sub(r' - 搜狐视频$', '', self._og_search_title(webpage))
+        title = re.sub(r'( - 高清正版在线观看)? - 搜狐视频$', '', self._og_search_title(webpage))
 
         vid = self._html_search_regex(
             r'var vid ?= ?["\'](\d+)["\']',
@@ -132,7 +156,9 @@ def _fetch_data(vid_id, mytv=False):
                 allot = format_data['allot']
 
                 data = format_data['data']
-                clips_url = data['clipsURL']
+                clip_url = traverse_obj(data, (('clipsURL', 'mp4PlayUrl'), i, {url_or_none}), get_all=False)
+                if not clip_url:
+                    raise ExtractorError(f'Unable to extract url for clip {i}')
                 su = data['su']
 
                 video_url = 'newflv.sohu.ccgslb.net'
@@ -142,9 +168,9 @@ def _fetch_data(vid_id, mytv=False):
                 while 'newflv.sohu.ccgslb.net' in video_url:
                     params = {
                         'prot': 9,
-                        'file': clips_url[i],
+                        'file': clip_url,
                         'new': su[i],
-                        'prod': 'flash',
+                        'prod': 'h5n',
                         'rb': 1,
                     }
 
@@ -193,6 +219,75 @@ def _fetch_data(vid_id, mytv=False):
                 'entries': playlist,
                 'id': video_id,
                 'title': title,
+                'duration': traverse_obj(vid_data, ('data', 'totalDuration', {float_or_none})),
             }
 
-        return info
+        if mytv:
+            publish_time = unified_timestamp(self._search_regex(
+                r'publishTime:\s*["\'](\d+-\d+-\d+ \d+:\d+)["\']', webpage, 'publish time', fatal=False))
+        else:
+            publish_time = traverse_obj(vid_data, ('tv_application_time', {unified_timestamp}))
+
+        return {
+            'timestamp': publish_time - 8 * 3600 if publish_time else None,
+            **traverse_obj(vid_data, {
+                'alt_title': ('data', 'subName', {str}),
+                'uploader': ('wm_data', 'wm_username', {str}),
+                'thumbnail': ('data', 'coverImg', {url_or_none}),
+                'tags': ('data', 'tag', {str.split}),
+            }),
+            **info,
+        }
+
+
+class SohuVIE(InfoExtractor):
+    _VALID_URL = r'https?://tv\.sohu\.com/v/(?P<id>[\w=-]+)\.html(?:$|[#?])'
+
+    _TESTS = [{
+        'note': 'Multipart video',
+        'url': 'https://tv.sohu.com/v/MjAyMzA2MTQvbjYwMTMxNTE5Mi5zaHRtbA==.html',
+        'info_dict': {
+            'id': '601315192',
+            'title': '《淬火丹心》第1集',
+            'alt_title': '“点天灯”发生事故',
+            'duration': 2701.692,
+            'timestamp': 1686758040,
+            'upload_date': '20230614',
+            'thumbnail': 'http://photocdn.tv.sohu.com/img/20230614/vrsa_hor_1686738763256_454010551.jpg',
+        },
+        'playlist_mincount': 9,
+        'skip': 'Only available in China',
+    }, {
+        'url': 'https://tv.sohu.com/v/dXMvMjMyNzk5ODg5Lzc4NjkzNDY0LnNodG1s.html',
+        'info_dict': {
+            'id': '78693464',
+            'ext': 'mp4',
+            'title': '【爱范品】第31期：MWC见不到的奇葩手机',
+            'uploader': '爱范儿视频',
+            'duration': 213,
+            'timestamp': 1425519600,
+            'upload_date': '20150305',
+            'thumbnail': 'http://e3f49eaa46b57.cdn.sohucs.com//group1/M10/83/FA/MTAuMTAuODguODA=/6_14cbccdde5eg104SysCutcloud_78693464_7_0b.jpg',
+            'tags': ['爱范儿', '爱范品', 'MWC', '手机'],
+        }
+    }, {
+        'note': 'Multipart video',
+        'url': 'https://tv.sohu.com/v/dXMvMjQyNTYyMTYzLzc4OTEwMzM5LnNodG1s.html?src=pl',
+        'info_dict': {
+            'id': '78910339',
+            'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
+            'uploader': '小苍cany',
+            'duration': 744.0,
+            'timestamp': 1426269360,
+            'upload_date': '20150313',
+            'thumbnail': 'http://e3f49eaa46b57.cdn.sohucs.com//group1/M11/89/57/MTAuMTAuODguODA=/6_14cea022a1dg102SysCutcloud_78910339_8_0b.jpg',
+            'tags': ['小苍MM', '英雄联盟', '实战秘籍'],
+        },
+        'playlist_mincount': 3,
+    }]
+
+    def _real_extract(self, url):
+        encoded_id = self._match_id(url)
+        path = base64.urlsafe_b64decode(encoded_id).decode()
+        subdomain = 'tv' if re.match(r'\d+/n\d+\.shtml', path) else 'my.tv'
+        return self.url_result(urljoin(f'http://{subdomain}.sohu.com/', path), SohuIE)

From 308936619c8a4f3a52d73c829c2006ff6c55fea2 Mon Sep 17 00:00:00 2001
From: fireattack <human.peng@gmail.com>
Date: Sun, 17 Sep 2023 05:18:04 +0800
Subject: [PATCH 148/218] [ie/facebook] Improve format sorting (#8074)

Authored by: fireattack
---
 yt_dlp/extractor/facebook.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py
index c30a6b06a0..50a750d3b1 100644
--- a/yt_dlp/extractor/facebook.py
+++ b/yt_dlp/extractor/facebook.py
@@ -505,7 +505,6 @@ def process_formats(info):
             # with non-browser User-Agent.
             for f in info['formats']:
                 f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1'
-            info['_format_sort_fields'] = ('res', 'quality')
 
         def extract_relay_data(_filter):
             return self._parse_json(self._search_regex(
@@ -552,7 +551,8 @@ def parse_graphql_video(video):
                         else:
                             formats.append({
                                 'format_id': format_id,
-                                'quality': q(format_id),
+                                # sd, hd formats w/o resolution info should be deprioritized below DASH
+                                'quality': q(format_id) - 3,
                                 'url': playable_url,
                             })
                     extract_dash_manifest(video, formats)
@@ -719,9 +719,11 @@ def parse_attachment(attachment, key='media'):
                 for src_type in ('src', 'src_no_ratelimit'):
                     src = f[0].get('%s_%s' % (quality, src_type))
                     if src:
-                        preference = -10 if format_id == 'progressive' else -1
+                        # sd, hd formats w/o resolution info should be deprioritized below DASH
+                        # TODO: investigate if progressive or src formats still exist
+                        preference = -10 if format_id == 'progressive' else -3
                         if quality == 'hd':
-                            preference += 5
+                            preference += 1
                         formats.append({
                             'format_id': '%s_%s_%s' % (format_id, quality, src_type),
                             'url': src,

From 53675852195d8dd859555d4789944a6887171ff8 Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@bashonly.com>
Date: Sat, 16 Sep 2023 16:20:34 -0500
Subject: [PATCH 149/218] [ie/generic] Fix KVS thumbnail extraction

Closes #8045
Authored by: bashonly
---
 yt_dlp/extractor/generic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index f5c59a0930..33e71d1c57 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -2370,7 +2370,7 @@ def _extract_kvs(self, url, webpage, video_id):
             'id': flashvars['video_id'],
             'display_id': display_id,
             'title': title,
-            'thumbnail': thumbnail,
+            'thumbnail': urljoin(url, thumbnail),
             'formats': formats,
         }
 

From 635ae31f68a3ac7f6393d59657ed711e34ee3552 Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@bashonly.com>
Date: Sat, 16 Sep 2023 16:22:21 -0500
Subject: [PATCH 150/218] [ie/mediastream] Make embed extraction non-fatal

Authored by: bashonly
---
 yt_dlp/extractor/mediastream.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/mediastream.py b/yt_dlp/extractor/mediastream.py
index cef769f299..d5c9aab8a3 100644
--- a/yt_dlp/extractor/mediastream.py
+++ b/yt_dlp/extractor/mediastream.py
@@ -14,7 +14,7 @@ class MediaStreamBaseIE(InfoExtractor):
     _BASE_URL_RE = r'https?://mdstrm\.com/(?:embed|live-stream)'
 
     def _extract_mediastream_urls(self, webpage):
-        yield from traverse_obj(list(self._yield_json_ld(webpage, None)), (
+        yield from traverse_obj(list(self._yield_json_ld(webpage, None, fatal=False)), (
             lambda _, v: v['@type'] == 'VideoObject', ('embedUrl', 'contentUrl'),
             {lambda x: x if re.match(rf'{self._BASE_URL_RE}/\w+', x) else None}))
 

From 20c3c9b433dd47faf0dbde6b46e4e34eb76109a5 Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@bashonly.com>
Date: Sat, 16 Sep 2023 16:23:54 -0500
Subject: [PATCH 151/218] [ie/reddit] Extract subtitles

Closes #7814
Authored by: bashonly
---
 yt_dlp/extractor/reddit.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/yt_dlp/extractor/reddit.py b/yt_dlp/extractor/reddit.py
index 813e62874c..62f669f35d 100644
--- a/yt_dlp/extractor/reddit.py
+++ b/yt_dlp/extractor/reddit.py
@@ -319,16 +319,20 @@ def add_thumbnail(src):
                 'format_id': 'fallback',
                 'format_note': 'DASH video, mp4_dash',
             }]
-            formats.extend(self._extract_m3u8_formats(
-                hls_playlist_url, display_id, 'mp4', m3u8_id='hls', fatal=False))
-            formats.extend(self._extract_mpd_formats(
-                dash_playlist_url, display_id, mpd_id='dash', fatal=False))
+            hls_fmts, subtitles = self._extract_m3u8_formats_and_subtitles(
+                hls_playlist_url, display_id, 'mp4', m3u8_id='hls', fatal=False)
+            formats.extend(hls_fmts)
+            dash_fmts, dash_subs = self._extract_mpd_formats_and_subtitles(
+                dash_playlist_url, display_id, mpd_id='dash', fatal=False)
+            formats.extend(dash_fmts)
+            self._merge_subtitles(dash_subs, target=subtitles)
 
             return {
                 **info,
                 'id': video_id,
                 'display_id': display_id,
                 'formats': formats,
+                'subtitles': subtitles,
                 'duration': int_or_none(reddit_video.get('duration')),
             }
 

From eda0e415d26eb084e570cf5372d38ee1f616b70f Mon Sep 17 00:00:00 2001
From: garret <garret1317@yandex.com>
Date: Sat, 16 Sep 2023 23:47:49 +0100
Subject: [PATCH 152/218] [ie/bbc] Extract tracklist as chapters (#7788)

Authored by: garret1317
---
 yt_dlp/extractor/bbc.py | 38 +++++++++++++++++++++++---------------
 1 file changed, 23 insertions(+), 15 deletions(-)

diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py
index a55cdef2b8..d1d6e04faa 100644
--- a/yt_dlp/extractor/bbc.py
+++ b/yt_dlp/extractor/bbc.py
@@ -15,11 +15,13 @@
     float_or_none,
     get_element_by_class,
     int_or_none,
+    join_nonempty,
     js_to_json,
     parse_duration,
     parse_iso8601,
     parse_qs,
     strip_or_none,
+    traverse_obj,
     try_get,
     unescapeHTML,
     unified_timestamp,
@@ -41,7 +43,6 @@ class BBCCoUkIE(InfoExtractor):
                             iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
                             music/(?:clips|audiovideo/popular)[/#]|
                             radio/player/|
-                            sounds/play/|
                             events/[^/]+/play/[^/]+/
                         )
                         (?P<id>%s)(?!/(?:episodes|broadcasts|clips))
@@ -218,20 +219,6 @@ class BBCCoUkIE(InfoExtractor):
                 # rtmp download
                 'skip_download': True,
             },
-        }, {
-            'url': 'https://www.bbc.co.uk/sounds/play/m0007jzb',
-            'note': 'Audio',
-            'info_dict': {
-                'id': 'm0007jz9',
-                'ext': 'mp4',
-                'title': 'BBC Proms, 2019, Prom 34: West–Eastern Divan Orchestra',
-                'description': "Live BBC Proms. West–Eastern Divan Orchestra with Daniel Barenboim and Martha Argerich.",
-                'duration': 9840,
-            },
-            'params': {
-                # rtmp download
-                'skip_download': True,
-            }
         }, {
             'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
             'only_matching': True,
@@ -844,6 +831,20 @@ class BBCIE(BBCCoUkIE):  # XXX: Do not subclass from concrete IE
             'upload_date': '20190604',
             'categories': ['Psychology'],
         },
+    }, {
+        # BBC Sounds
+        'url': 'https://www.bbc.co.uk/sounds/play/m001q78b',
+        'info_dict': {
+            'id': 'm001q789',
+            'ext': 'mp4',
+            'title': 'The Night Tracks Mix - Music for the darkling hour',
+            'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0c00hym.jpg',
+            'chapters': 'count:8',
+            'description': 'md5:815fb51cbdaa270040aab8145b3f1d67',
+            'uploader': 'Radio 3',
+            'duration': 1800,
+            'uploader_id': 'bbc_radio_three',
+        },
     }, {  # onion routes
         'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576',
         'only_matching': True,
@@ -1128,6 +1129,13 @@ def _real_extract(self, url):
                     'uploader_id': network.get('id'),
                     'formats': formats,
                     'subtitles': subtitles,
+                    'chapters': traverse_obj(preload_state, (
+                        'tracklist', 'tracks', lambda _, v: float_or_none(v['offset']['start']), {
+                            'title': ('titles', {lambda x: join_nonempty(
+                                'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
+                            'start_time': ('offset', 'start', {float_or_none}),
+                            'end_time': ('offset', 'end', {float_or_none}),
+                        })) or None,
                 }
 
         bbc3_config = self._parse_json(

From 2da7bcca16fdb40d4bdb2746643ba1a603771382 Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@bashonly.com>
Date: Sat, 16 Sep 2023 18:57:14 -0500
Subject: [PATCH 153/218] Revert 9d376c4daeaf1279a011582f3f0e6ae42af520dd

Authored by: bashonly
---
 yt_dlp/extractor/amazonminitv.py | 63 +++++++++++++++++++++++++++-----
 1 file changed, 54 insertions(+), 9 deletions(-)

diff --git a/yt_dlp/extractor/amazonminitv.py b/yt_dlp/extractor/amazonminitv.py
index ad23b16bd6..b57d985d10 100644
--- a/yt_dlp/extractor/amazonminitv.py
+++ b/yt_dlp/extractor/amazonminitv.py
@@ -37,7 +37,7 @@ def _call_api(self, asin, data=None, note=None):
         return resp['data'][data['operationName']]
 
 
-class AmazonMiniTVIE(InfoExtractor):
+class AmazonMiniTVIE(AmazonMiniTVBaseIE):
     _VALID_URL = r'(?:https?://(?:www\.)?amazon\.in/minitv/tp/|amazonminitv:(?:amzn1\.dv\.gti\.)?)(?P<id>[a-f0-9-]+)'
     _TESTS = [{
         'url': 'https://www.amazon.in/minitv/tp/75fe3a75-b8fe-4499-8100-5c9424344840?referrer=https%3A%2F%2Fwww.amazon.in%2Fminitv',
@@ -86,14 +86,56 @@ class AmazonMiniTVIE(InfoExtractor):
         'only_matching': True,
     }]
 
+    _GRAPHQL_QUERY_CONTENT = '''
+query content($sessionIdToken: String!, $deviceLocale: String, $contentId: ID!, $contentType: ContentType!, $clientId: String) {
+  content(
+    applicationContextInput: {deviceLocale: $deviceLocale, sessionIdToken: $sessionIdToken, clientId: $clientId}
+    contentId: $contentId
+    contentType: $contentType
+  ) {
+    contentId
+    name
+    ... on Episode {
+      contentId
+      vodType
+      name
+      images
+      description {
+        synopsis
+        contentLengthInSeconds
+      }
+      publicReleaseDateUTC
+      audioTracks
+      seasonId
+      seriesId
+      seriesName
+      seasonNumber
+      episodeNumber
+      timecode {
+        endCreditsTime
+      }
+    }
+    ... on MovieContent {
+      contentId
+      vodType
+      name
+      description {
+        synopsis
+        contentLengthInSeconds
+      }
+      images
+      publicReleaseDateUTC
+      audioTracks
+    }
+  }
+}'''
+
     def _real_extract(self, url):
-        video_uuid = self._match_id(url)
-        asin = f'amzn1.dv.gti.{video_uuid}'
-        webpage = self._download_webpage(f'https://www.amazon.in/minitv/tp/{video_uuid}', asin)
-        data = self._search_nextjs_data(webpage, asin)['props']['pageProps']['ssrProps']
+        asin = f'amzn1.dv.gti.{self._match_id(url)}'
+        prs = self._call_api(asin, note='Downloading playback info')
 
         formats, subtitles = [], {}
-        for type_, asset in traverse_obj(data, ('playbackData', 'playbackAssets', {dict.items}, ...)):
+        for type_, asset in prs['playbackAssets'].items():
             if not traverse_obj(asset, 'manifestUrl'):
                 continue
             if type_ == 'hls':
@@ -110,7 +152,12 @@ def _real_extract(self, url):
             else:
                 self.report_warning(f'Unknown asset type: {type_}')
 
-        title_info = traverse_obj(data, ('contentData', {dict})) or {}
+        title_info = self._call_api(
+            asin, note='Downloading title info', data={
+                'operationName': 'content',
+                'variables': {'contentId': asin},
+                'query': self._GRAPHQL_QUERY_CONTENT,
+            })
         credits_time = try_get(title_info, lambda x: x['timecode']['endCreditsTime'] / 1000)
         is_episode = title_info.get('vodType') == 'EPISODE'
 
@@ -145,7 +192,6 @@ class AmazonMiniTVSeasonIE(AmazonMiniTVBaseIE):
     IE_NAME = 'amazonminitv:season'
     _VALID_URL = r'amazonminitv:season:(?:amzn1\.dv\.gti\.)?(?P<id>[a-f0-9-]+)'
     IE_DESC = 'Amazon MiniTV Season, "minitv:season:" prefix'
-    _WORKING = False
     _TESTS = [{
         'url': 'amazonminitv:season:amzn1.dv.gti.0aa996eb-6a1b-4886-a342-387fbd2f1db0',
         'playlist_mincount': 6,
@@ -205,7 +251,6 @@ class AmazonMiniTVSeriesIE(AmazonMiniTVBaseIE):
     IE_NAME = 'amazonminitv:series'
     _VALID_URL = r'amazonminitv:series:(?:amzn1\.dv\.gti\.)?(?P<id>[a-f0-9-]+)'
     IE_DESC = 'Amazon MiniTV Series, "minitv:series:" prefix'
-    _WORKING = False
     _TESTS = [{
         'url': 'amazonminitv:series:amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0',
         'playlist_mincount': 3,

From 538d37671a17e0782d17f08df17800e2e3bd57c8 Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@bashonly.com>
Date: Sat, 16 Sep 2023 19:03:30 -0500
Subject: [PATCH 154/218] [ie/AmazonMiniTV] Fix extractors

Closes #7817
Authored by: GautamMKGarg, bashonly

Co-authored by: GautamMKGarg <GautamMKgarg@gmail.com>
---
 yt_dlp/extractor/amazonminitv.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/yt_dlp/extractor/amazonminitv.py b/yt_dlp/extractor/amazonminitv.py
index b57d985d10..2c71c5ef56 100644
--- a/yt_dlp/extractor/amazonminitv.py
+++ b/yt_dlp/extractor/amazonminitv.py
@@ -22,8 +22,11 @@ def _call_api(self, asin, data=None, note=None):
 
         resp = self._download_json(
             f'https://www.amazon.in/minitv/api/web/{"graphql" if data else "prs"}',
-            asin, note=note, headers={'Content-Type': 'application/json'},
-            data=json.dumps(data).encode() if data else None,
+            asin, note=note, headers={
+                'Content-Type': 'application/json',
+                'currentpageurl': '/',
+                'currentplatform': 'dWeb'
+            }, data=json.dumps(data).encode() if data else None,
             query=None if data else {
                 'deviceType': 'A1WMMUXPCUJL4N',
                 'contentId': asin,
@@ -46,7 +49,7 @@ class AmazonMiniTVIE(AmazonMiniTVBaseIE):
             'ext': 'mp4',
             'title': 'May I Kiss You?',
             'language': 'Hindi',
-            'thumbnail': r're:^https?://.*\.jpg$',
+            'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
             'description': 'md5:a549bfc747973e04feb707833474e59d',
             'release_timestamp': 1644710400,
             'release_date': '20220213',
@@ -68,7 +71,7 @@ class AmazonMiniTVIE(AmazonMiniTVBaseIE):
             'ext': 'mp4',
             'title': 'Jahaan',
             'language': 'Hindi',
-            'thumbnail': r're:^https?://.*\.jpg',
+            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
             'description': 'md5:05eb765a77bf703f322f120ec6867339',
             'release_timestamp': 1647475200,
             'release_date': '20220317',

From 9652bca1bd02f6bc1b8cb1e186f2ccbf32225561 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Sat, 16 Sep 2023 19:38:09 -0500
Subject: [PATCH 155/218] [ie/web.archive:vlive] Remove extractor (#8132)

Closes #8122
Authored by: bashonly
---
 yt_dlp/extractor/_extractors.py |   1 -
 yt_dlp/extractor/archiveorg.py  | 235 --------------------------------
 yt_dlp/extractor/naver.py       |   2 +-
 3 files changed, 1 insertion(+), 237 deletions(-)

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 4fed6d66a2..bf0c67542e 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -122,7 +122,6 @@
 from .archiveorg import (
     ArchiveOrgIE,
     YoutubeWebArchiveIE,
-    VLiveWebArchiveIE,
 )
 from .arcpublishing import ArcPublishingIE
 from .arkena import ArkenaIE
diff --git a/yt_dlp/extractor/archiveorg.py b/yt_dlp/extractor/archiveorg.py
index 2541cd6fd8..a0b26ac5a0 100644
--- a/yt_dlp/extractor/archiveorg.py
+++ b/yt_dlp/extractor/archiveorg.py
@@ -3,7 +3,6 @@
 import urllib.parse
 
 from .common import InfoExtractor
-from .naver import NaverBaseIE
 from .youtube import YoutubeBaseInfoExtractor, YoutubeIE
 from ..compat import compat_urllib_parse_unquote
 from ..networking import HEADRequest
@@ -947,237 +946,3 @@ def _real_extract(self, url):
         if not info.get('title'):
             info['title'] = video_id
         return info
-
-
-class VLiveWebArchiveIE(InfoExtractor):
-    IE_NAME = 'web.archive:vlive'
-    IE_DESC = 'web.archive.org saved vlive videos'
-    _VALID_URL = r'''(?x)
-            (?:https?://)?web\.archive\.org/
-            (?:web/)?(?:(?P<date>[0-9]{14})?[0-9A-Za-z_*]*/)?  # /web and the version index is optional
-            (?:https?(?::|%3[Aa])//)?(?:
-                (?:(?:www|m)\.)?vlive\.tv(?::(?:80|443))?/(?:video|embed)/(?P<id>[0-9]+)  # VLive URL
-            )
-        '''
-    _TESTS = [{
-        'url': 'https://web.archive.org/web/20221221144331/http://www.vlive.tv/video/1326',
-        'md5': 'cc7314812855ce56de70a06a27314983',
-        'info_dict': {
-            'id': '1326',
-            'ext': 'mp4',
-            'title': "Girl's Day's Broadcast",
-            'creator': "Girl's Day",
-            'view_count': int,
-            'uploader_id': 'muploader_a',
-            'uploader_url': None,
-            'uploader': None,
-            'upload_date': '20150817',
-            'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
-            'timestamp': 1439816449,
-            'like_count': int,
-            'channel': 'Girl\'s Day',
-            'channel_id': 'FDF27',
-            'comment_count': int,
-            'release_timestamp': 1439818140,
-            'release_date': '20150817',
-            'duration': 1014,
-        },
-        'params': {
-            'skip_download': True,
-        },
-    }, {
-        'url': 'https://web.archive.org/web/20221221182103/http://www.vlive.tv/video/16937',
-        'info_dict': {
-            'id': '16937',
-            'ext': 'mp4',
-            'title': '첸백시 걍방',
-            'creator': 'EXO',
-            'view_count': int,
-            'subtitles': 'mincount:12',
-            'uploader_id': 'muploader_j',
-            'uploader_url': 'http://vlive.tv',
-            'uploader': None,
-            'upload_date': '20161112',
-            'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
-            'timestamp': 1478923074,
-            'like_count': int,
-            'channel': 'EXO',
-            'channel_id': 'F94BD',
-            'comment_count': int,
-            'release_timestamp': 1478924280,
-            'release_date': '20161112',
-            'duration': 906,
-        },
-        'params': {
-            'skip_download': True,
-        },
-    }, {
-        'url': 'https://web.archive.org/web/20221127190050/http://www.vlive.tv/video/101870',
-        'info_dict': {
-            'id': '101870',
-            'ext': 'mp4',
-            'title': '[ⓓ xV] “레벨이들 매력에 반해? 안 반해?” 움직이는 HD 포토 (레드벨벳:Red Velvet)',
-            'creator': 'Dispatch',
-            'view_count': int,
-            'subtitles': 'mincount:6',
-            'uploader_id': 'V__FRA08071',
-            'uploader_url': 'http://vlive.tv',
-            'uploader': None,
-            'upload_date': '20181130',
-            'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
-            'timestamp': 1543601327,
-            'like_count': int,
-            'channel': 'Dispatch',
-            'channel_id': 'C796F3',
-            'comment_count': int,
-            'release_timestamp': 1543601040,
-            'release_date': '20181130',
-            'duration': 279,
-        },
-        'params': {
-            'skip_download': True,
-        },
-    }]
-
-    # The wayback machine has special timestamp and "mode" values:
-    # timestamp:
-    #   1 = the first capture
-    #   2 = the last capture
-    # mode:
-    #   id_ = Identity - perform no alterations of the original resource, return it as it was archived.
-    _WAYBACK_BASE_URL = 'https://web.archive.org/web/2id_/'
-
-    def _download_archived_page(self, url, video_id, *, timestamp='2', **kwargs):
-        for retry in self.RetryManager():
-            try:
-                return self._download_webpage(f'https://web.archive.org/web/{timestamp}id_/{url}', video_id, **kwargs)
-            except ExtractorError as e:
-                if isinstance(e.cause, HTTPError) and e.cause.status == 404:
-                    raise ExtractorError('Page was not archived', expected=True)
-                retry.error = e
-                continue
-
-    def _download_archived_json(self, url, video_id, **kwargs):
-        page = self._download_archived_page(url, video_id, **kwargs)
-        if not page:
-            raise ExtractorError('Page was not archived', expected=True)
-        else:
-            return self._parse_json(page, video_id)
-
-    def _extract_formats_from_m3u8(self, m3u8_url, params, video_id):
-        m3u8_doc = self._download_archived_page(m3u8_url, video_id, note='Downloading m3u8', query=params, fatal=False)
-        if not m3u8_doc:
-            return
-
-        # M3U8 document should be changed to archive domain
-        m3u8_doc = m3u8_doc.splitlines()
-        url_base = m3u8_url.rsplit('/', 1)[0]
-        first_segment = None
-        for i, line in enumerate(m3u8_doc):
-            if not line.startswith('#'):
-                m3u8_doc[i] = f'{self._WAYBACK_BASE_URL}{url_base}/{line}?{urllib.parse.urlencode(params)}'
-                first_segment = first_segment or m3u8_doc[i]
-
-        # Segments may not have been archived. See https://web.archive.org/web/20221127190050/http://www.vlive.tv/video/101870
-        urlh = self._request_webpage(HEADRequest(first_segment), video_id, errnote=False,
-                                     fatal=False, note='Check first segment availablity')
-        if urlh:
-            formats, subtitles = self._parse_m3u8_formats_and_subtitles('\n'.join(m3u8_doc), ext='mp4', video_id=video_id)
-            if subtitles:
-                self._report_ignoring_subs('m3u8')
-            return formats
-
-    # Closely follows the logic of the ArchiveTeam grab script
-    # See: https://github.com/ArchiveTeam/vlive-grab/blob/master/vlive.lua
-    def _real_extract(self, url):
-        video_id, url_date = self._match_valid_url(url).group('id', 'date')
-
-        webpage = self._download_archived_page(f'https://www.vlive.tv/video/{video_id}', video_id, timestamp=url_date)
-
-        player_info = self._search_json(r'__PRELOADED_STATE__\s*=', webpage, 'player info', video_id)
-        user_country = traverse_obj(player_info, ('common', 'userCountry'))
-
-        main_script_url = self._search_regex(r'<script\s+src="([^"]+/js/main\.[^"]+\.js)"', webpage, 'main script url')
-        main_script = self._download_archived_page(main_script_url, video_id, note='Downloading main script')
-        app_id = self._search_regex(r'appId\s*=\s*"([^"]+)"', main_script, 'app id')
-
-        inkey = self._download_archived_json(
-            f'https://www.vlive.tv/globalv-web/vam-web/video/v1.0/vod/{video_id}/inkey', video_id, note='Fetching inkey', query={
-                'appId': app_id,
-                'platformType': 'PC',
-                'gcc': user_country,
-                'locale': 'en_US',
-            }, fatal=False)
-
-        vod_id = traverse_obj(player_info, ('postDetail', 'post', 'officialVideo', 'vodId'))
-
-        vod_data = self._download_archived_json(
-            f'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{vod_id}', video_id, note='Fetching vod data', query={
-                'key': inkey.get('inkey'),
-                'pid': 'rmcPlayer_16692457559726800',  # partially unix time and partially random. Fixed value used by archiveteam project
-                'sid': '2024',
-                'ver': '2.0',
-                'devt': 'html5_pc',
-                'doct': 'json',
-                'ptc': 'https',
-                'sptc': 'https',
-                'cpt': 'vtt',
-                'ctls': '%7B%22visible%22%3A%7B%22fullscreen%22%3Atrue%2C%22logo%22%3Afalse%2C%22playbackRate%22%3Afalse%2C%22scrap%22%3Afalse%2C%22playCount%22%3Atrue%2C%22commentCount%22%3Atrue%2C%22title%22%3Atrue%2C%22writer%22%3Atrue%2C%22expand%22%3Afalse%2C%22subtitles%22%3Atrue%2C%22thumbnails%22%3Atrue%2C%22quality%22%3Atrue%2C%22setting%22%3Atrue%2C%22script%22%3Afalse%2C%22logoDimmed%22%3Atrue%2C%22badge%22%3Atrue%2C%22seekingTime%22%3Atrue%2C%22muted%22%3Atrue%2C%22muteButton%22%3Afalse%2C%22viewerNotice%22%3Afalse%2C%22linkCount%22%3Afalse%2C%22createTime%22%3Afalse%2C%22thumbnail%22%3Atrue%7D%2C%22clicked%22%3A%7B%22expand%22%3Afalse%2C%22subtitles%22%3Afalse%7D%7D',
-                'pv': '4.26.9',
-                'dr': '1920x1080',
-                'cpl': 'en_US',
-                'lc': 'en_US',
-                'adi': '%5B%7B%22type%22%3A%22pre%22%2C%22exposure%22%3Afalse%2C%22replayExposure%22%3Afalse%7D%5D',
-                'adu': '%2F',
-                'videoId': vod_id,
-                'cc': user_country,
-            })
-
-        formats = []
-
-        streams = traverse_obj(vod_data, ('streams', ...))
-        if len(streams) > 1:
-            self.report_warning('Multiple streams found. Only the first stream will be downloaded.')
-        stream = streams[0]
-
-        max_stream = max(
-            stream.get('videos') or [],
-            key=lambda v: traverse_obj(v, ('bitrate', 'video'), default=0), default=None)
-        if max_stream is not None:
-            params = {arg.get('name'): arg.get('value') for arg in stream.get('keys', []) if arg.get('type') == 'param'}
-            formats = self._extract_formats_from_m3u8(max_stream.get('source'), params, video_id) or []
-
-        # For parts of the project MP4 files were archived
-        max_video = max(
-            traverse_obj(vod_data, ('videos', 'list', ...)),
-            key=lambda v: traverse_obj(v, ('bitrate', 'video'), default=0), default=None)
-        if max_video is not None:
-            video_url = self._WAYBACK_BASE_URL + max_video.get('source')
-            urlh = self._request_webpage(HEADRequest(video_url), video_id, errnote=False,
-                                         fatal=False, note='Check video availablity')
-            if urlh:
-                formats.append({'url': video_url})
-
-        return {
-            'id': video_id,
-            'formats': formats,
-            **traverse_obj(player_info, ('postDetail', 'post', {
-                'title': ('officialVideo', 'title', {str}),
-                'creator': ('author', 'nickname', {str}),
-                'channel': ('channel', 'channelName', {str}),
-                'channel_id': ('channel', 'channelCode', {str}),
-                'duration': ('officialVideo', 'playTime', {int_or_none}),
-                'view_count': ('officialVideo', 'playCount', {int_or_none}),
-                'like_count': ('officialVideo', 'likeCount', {int_or_none}),
-                'comment_count': ('officialVideo', 'commentCount', {int_or_none}),
-                'timestamp': ('officialVideo', 'createdAt', {lambda x: int_or_none(x, scale=1000)}),
-                'release_timestamp': ('officialVideo', 'willStartAt', {lambda x: int_or_none(x, scale=1000)}),
-            })),
-            **traverse_obj(vod_data, ('meta', {
-                'uploader_id': ('user', 'id', {str}),
-                'uploader': ('user', 'name', {str}),
-                'uploader_url': ('user', 'url', {url_or_none}),
-                'thumbnail': ('cover', 'source', {url_or_none}),
-            }), expected_type=lambda x: x or None),
-            **NaverBaseIE.process_subtitles(vod_data, lambda x: [self._WAYBACK_BASE_URL + x]),
-        }
diff --git a/yt_dlp/extractor/naver.py b/yt_dlp/extractor/naver.py
index d79caf5f3d..2d8459b02b 100644
--- a/yt_dlp/extractor/naver.py
+++ b/yt_dlp/extractor/naver.py
@@ -21,7 +21,7 @@
 class NaverBaseIE(InfoExtractor):
     _CAPTION_EXT_RE = r'\.(?:ttml|vtt)'
 
-    @staticmethod  # NB: Used in VLiveWebArchiveIE, WeverseIE
+    @staticmethod  # NB: Used in WeverseIE
     def process_subtitles(vod_data, process_url):
         ret = {'subtitles': {}, 'automatic_captions': {}}
         for caption in traverse_obj(vod_data, ('captions', 'list', ...)):

From 94389b225d9bcf29aa7ba8afaf1bbd7c62204eae Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Sat, 16 Sep 2023 21:42:42 -0500
Subject: [PATCH 156/218] [ie/RTVSLO] Fix format extraction (#8131)

Closes #8020
Authored by: bashonly
---
 yt_dlp/extractor/rtvslo.py | 50 +++++++++++++++++++++++++-------------
 1 file changed, 33 insertions(+), 17 deletions(-)

diff --git a/yt_dlp/extractor/rtvslo.py b/yt_dlp/extractor/rtvslo.py
index 05942b6b44..39ace7cc6e 100644
--- a/yt_dlp/extractor/rtvslo.py
+++ b/yt_dlp/extractor/rtvslo.py
@@ -1,6 +1,7 @@
 from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
+    int_or_none,
     parse_duration,
     traverse_obj,
     unified_timestamp,
@@ -25,7 +26,7 @@ class RTVSLOIE(InfoExtractor):
             'url': 'https://www.rtvslo.si/rtv365/arhiv/174842550?s=tv',
             'info_dict': {
                 'id': '174842550',
-                'ext': 'flv',
+                'ext': 'mp4',
                 'release_timestamp': 1643140032,
                 'upload_date': '20220125',
                 'series': 'Dnevnik',
@@ -69,7 +70,21 @@ class RTVSLOIE(InfoExtractor):
                 'tbr': 128000,
                 'release_date': '20220201',
             },
-
+        }, {
+            'url': 'https://365.rtvslo.si/arhiv/razred-zase/148350750',
+            'info_dict': {
+                'id': '148350750',
+                'ext': 'mp4',
+                'title': 'Prvi šolski dan, mozaična oddaja za mlade',
+                'series': 'Razred zase',
+                'series_id': '148185730',
+                'duration': 1481,
+                'upload_date': '20121019',
+                'timestamp': 1350672122,
+                'release_date': '20121019',
+                'release_timestamp': 1350672122,
+                'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/148185730/razred_zase_2014_logo_4d_wide2.jpg',
+            },
         }, {
             'url': 'https://4d.rtvslo.si/arhiv/dnevnik/174842550',
             'only_matching': True
@@ -98,13 +113,14 @@ def _real_extract(self, url):
         media = self._download_json(self._API_BASE.format('getMedia', v_id), v_id, query={'jwt': jwt})['response']
 
         formats = []
+        skip_protocols = ['smil', 'f4m', 'dash']
         adaptive_url = traverse_obj(media, ('addaptiveMedia', 'hls_sec'), expected_type=url_or_none)
         if adaptive_url:
-            formats = self._extract_wowza_formats(adaptive_url, v_id, skip_protocols=['smil'])
+            formats = self._extract_wowza_formats(adaptive_url, v_id, skip_protocols=skip_protocols)
 
         adaptive_url = traverse_obj(media, ('addaptiveMedia_sl', 'hls_sec'), expected_type=url_or_none)
         if adaptive_url:
-            for f in self._extract_wowza_formats(adaptive_url, v_id, skip_protocols=['smil']):
+            for f in self._extract_wowza_formats(adaptive_url, v_id, skip_protocols=skip_protocols):
                 formats.append({
                     **f,
                     'format_id': 'sign-' + f['format_id'],
@@ -114,19 +130,19 @@ def _real_extract(self, url):
                         else f.get('language'))
                 })
 
-        formats.extend(
-            {
-                'url': f['streams'][strm],
-                'ext': traverse_obj(f, 'mediaType', expected_type=str.lower),
-                'width': f.get('width'),
-                'height': f.get('height'),
-                'tbr': f.get('bitrate'),
-                'filesize': f.get('filesize'),
-            }
-            for strm in ('http', 'https')
-            for f in media.get('mediaFiles') or []
-            if traverse_obj(f, ('streams', strm))
-        )
+        for mediafile in traverse_obj(media, ('mediaFiles', lambda _, v: url_or_none(v['streams']['https']))):
+            formats.append(traverse_obj(mediafile, {
+                'url': ('streams', 'https'),
+                'ext': ('mediaType', {str.lower}),
+                'width': ('width', {int_or_none}),
+                'height': ('height', {int_or_none}),
+                'tbr': ('bitrate', {int_or_none}),
+                'filesize': ('filesize', {int_or_none}),
+            }))
+
+        for mediafile in traverse_obj(media, ('mediaFiles', lambda _, v: url_or_none(v['streams']['hls_sec']))):
+            formats.extend(self._extract_wowza_formats(
+                mediafile['streams']['hls_sec'], v_id, skip_protocols=skip_protocols))
 
         if any('intermission.mp4' in x['url'] for x in formats):
             self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)

From 836e06d246512f286f30c1371b2c54b72c9ecd93 Mon Sep 17 00:00:00 2001
From: Simon Sawicki <contact@grub4k.xyz>
Date: Sun, 17 Sep 2023 12:56:50 +0200
Subject: [PATCH 157/218] [core] Fix support for upcoming Python 3.12 (#8130)

This also adds the following test runners:
- `3.12-dev` on `ubuntu-latest`
- `3.12-dev` on `windows-latest`
- `pypy-3.10` on `ubuntu-latest`

Authored by: Grub4K
---
 .github/workflows/core.yml      |  5 ++++-
 devscripts/update-version.py    |  4 ++--
 yt_dlp/YoutubeDL.py             |  2 +-
 yt_dlp/extractor/aws.py         |  2 +-
 yt_dlp/extractor/goplay.py      |  4 ++--
 yt_dlp/extractor/motherless.py  |  2 +-
 yt_dlp/extractor/panopto.py     |  4 ++--
 yt_dlp/networking/_urllib.py    |  2 +-
 yt_dlp/networking/exceptions.py |  2 +-
 yt_dlp/utils/_utils.py          | 12 ++++++++----
 10 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml
index dead444c0b..689408c500 100644
--- a/.github/workflows/core.yml
+++ b/.github/workflows/core.yml
@@ -13,13 +13,16 @@ jobs:
       matrix:
         os: [ubuntu-latest]
         # CPython 3.11 is in quick-test
-        python-version: ['3.8', '3.9', '3.10', pypy-3.7, pypy-3.8]
+        python-version: ['3.8', '3.9', '3.10', '3.12-dev', pypy-3.7, pypy-3.8, pypy-3.10]
         run-tests-ext: [sh]
         include:
         # atleast one of each CPython/PyPy tests must be in windows
         - os: windows-latest
           python-version: '3.7'
           run-tests-ext: bat
+        - os: windows-latest
+          python-version: '3.12-dev'
+          run-tests-ext: bat
         - os: windows-latest
           python-version: pypy-3.9
           run-tests-ext: bat
diff --git a/devscripts/update-version.py b/devscripts/update-version.py
index c873d10a5d..0144bd284a 100644
--- a/devscripts/update-version.py
+++ b/devscripts/update-version.py
@@ -10,14 +10,14 @@
 import argparse
 import contextlib
 import sys
-from datetime import datetime
+from datetime import datetime, timezone
 
 from devscripts.utils import read_version, run_process, write_file
 
 
 def get_new_version(version, revision):
     if not version:
-        version = datetime.utcnow().strftime('%Y.%m.%d')
+        version = datetime.now(timezone.utc).strftime('%Y.%m.%d')
 
     if revision:
         assert revision.isdigit(), 'Revision must be a number'
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index 666d89b461..1feed30524 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -2591,7 +2591,7 @@ def _fill_common_fields(self, info_dict, final=True):
                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
                 # see http://bugs.python.org/issue1646728)
                 with contextlib.suppress(ValueError, OverflowError, OSError):
-                    upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
+                    upload_date = datetime.datetime.fromtimestamp(info_dict[ts_key], datetime.timezone.utc)
                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
 
         live_keys = ('is_live', 'was_live')
diff --git a/yt_dlp/extractor/aws.py b/yt_dlp/extractor/aws.py
index eb831a1530..c4741a6a11 100644
--- a/yt_dlp/extractor/aws.py
+++ b/yt_dlp/extractor/aws.py
@@ -12,7 +12,7 @@ class AWSIE(InfoExtractor):  # XXX: Conventionally, base classes should end with
 
     def _aws_execute_api(self, aws_dict, video_id, query=None):
         query = query or {}
-        amz_date = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
+        amz_date = datetime.datetime.now(datetime.timezone.utc).strftime('%Y%m%dT%H%M%SZ')
         date = amz_date[:8]
         headers = {
             'Accept': 'application/json',
diff --git a/yt_dlp/extractor/goplay.py b/yt_dlp/extractor/goplay.py
index 960d7d7bc0..0a3c8340f1 100644
--- a/yt_dlp/extractor/goplay.py
+++ b/yt_dlp/extractor/goplay.py
@@ -383,9 +383,9 @@ def __get_current_timestamp():
         months = [None, 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
         days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
 
-        time_now = datetime.datetime.utcnow()
+        time_now = datetime.datetime.now(datetime.timezone.utc)
         format_string = "{} {} {} %H:%M:%S UTC %Y".format(days[time_now.weekday()], months[time_now.month], time_now.day)
-        time_string = datetime.datetime.utcnow().strftime(format_string)
+        time_string = time_now.strftime(format_string)
         return time_string
 
     def __str__(self):
diff --git a/yt_dlp/extractor/motherless.py b/yt_dlp/extractor/motherless.py
index 769b52ce6d..e359c44e93 100644
--- a/yt_dlp/extractor/motherless.py
+++ b/yt_dlp/extractor/motherless.py
@@ -151,7 +151,7 @@ def _real_extract(self, url):
                     'd': 'days',
                 }
                 kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta}
-                upload_date = (datetime.datetime.utcnow() - datetime.timedelta(**kwargs)).strftime('%Y%m%d')
+                upload_date = (datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(**kwargs)).strftime('%Y%m%d')
 
         comment_count = len(re.findall(r'''class\s*=\s*['"]media-comment-contents\b''', webpage))
         uploader_id = self._html_search_regex(
diff --git a/yt_dlp/extractor/panopto.py b/yt_dlp/extractor/panopto.py
index 6e3c9f442d..5ab2b2bcec 100644
--- a/yt_dlp/extractor/panopto.py
+++ b/yt_dlp/extractor/panopto.py
@@ -1,7 +1,7 @@
 import calendar
 import json
 import functools
-from datetime import datetime
+from datetime import datetime, timezone
 from random import random
 
 from .common import InfoExtractor
@@ -243,7 +243,7 @@ def _mark_watched(self, base_url, video_id, delivery_info):
         invocation_id = delivery_info.get('InvocationId')
         stream_id = traverse_obj(delivery_info, ('Delivery', 'Streams', ..., 'PublicID'), get_all=False, expected_type=str)
         if invocation_id and stream_id and duration:
-            timestamp_str = f'/Date({calendar.timegm(datetime.utcnow().timetuple())}000)/'
+            timestamp_str = f'/Date({calendar.timegm(datetime.now(timezone.utc).timetuple())}000)/'
             data = {
                 'streamRequests': [
                     {
diff --git a/yt_dlp/networking/_urllib.py b/yt_dlp/networking/_urllib.py
index b3e705b844..3c0647ecf9 100644
--- a/yt_dlp/networking/_urllib.py
+++ b/yt_dlp/networking/_urllib.py
@@ -429,7 +429,7 @@ def _send(self, request):
         except urllib.error.HTTPError as e:
             if isinstance(e.fp, (http.client.HTTPResponse, urllib.response.addinfourl)):
                 # Prevent file object from being closed when urllib.error.HTTPError is destroyed.
-                e._closer.file = None
+                e._closer.close_called = True
                 raise HTTPError(UrllibResponseAdapter(e.fp), redirect_loop='redirect error' in str(e)) from e
             raise  # unexpected
         except urllib.error.URLError as e:
diff --git a/yt_dlp/networking/exceptions.py b/yt_dlp/networking/exceptions.py
index 10afc9ccbf..465b18ba94 100644
--- a/yt_dlp/networking/exceptions.py
+++ b/yt_dlp/networking/exceptions.py
@@ -115,7 +115,7 @@ def __init__(self, http_error: HTTPError):
             hdrs=http_error.response.headers,
             fp=http_error.response
         )
-        self._closer.file = None  # Disable auto close
+        self._closer.close_called = True  # Disable auto close
         self._http_error = http_error
         HTTPError.__init__(self, http_error.response, redirect_loop=http_error.redirect_loop)
 
diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py
index 180bec245a..ef26de1160 100644
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@@ -669,6 +669,7 @@ def replace_insane(char):
 
 def sanitize_path(s, force=False):
     """Sanitizes and normalizes path on Windows"""
+    # XXX: this handles drive relative paths (c:sth) incorrectly
     if sys.platform == 'win32':
         force = False
         drive_or_unc, _ = os.path.splitdrive(s)
@@ -687,7 +688,10 @@ def sanitize_path(s, force=False):
         sanitized_path.insert(0, drive_or_unc + os.path.sep)
     elif force and s and s[0] == os.path.sep:
         sanitized_path.insert(0, os.path.sep)
-    return os.path.join(*sanitized_path)
+    # TODO: Fix behavioral differences <3.12
+    # The workaround using `normpath` only superficially passes tests
+    # Ref: https://github.com/python/cpython/pull/100351
+    return os.path.normpath(os.path.join(*sanitized_path))
 
 
 def sanitize_url(url, *, scheme='http'):
@@ -1256,7 +1260,7 @@ def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
     if precision == 'auto':
         auto_precision = True
         precision = 'microsecond'
-    today = datetime_round(datetime.datetime.utcnow(), precision)
+    today = datetime_round(datetime.datetime.now(datetime.timezone.utc), precision)
     if date_str in ('now', 'today'):
         return today
     if date_str == 'yesterday':
@@ -1319,8 +1323,8 @@ def datetime_round(dt, precision='day'):
         'second': 1,
     }
     roundto = lambda x, n: ((x + n / 2) // n) * n
-    timestamp = calendar.timegm(dt.timetuple())
-    return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
+    timestamp = roundto(calendar.timegm(dt.timetuple()), unit_seconds[precision])
+    return datetime.datetime.fromtimestamp(timestamp, datetime.timezone.utc)
 
 
 def hyphenate_date(date_str):

From 30ba233d4cee945756ed7344e7ddb3a90d2ae608 Mon Sep 17 00:00:00 2001
From: Simon Sawicki <contact@grub4k.xyz>
Date: Sun, 17 Sep 2023 13:22:04 +0200
Subject: [PATCH 158/218] [devscripts] `make_changelog`: Fix changelog grouping
 and add networking group (#8124)

Authored by: Grub4K
---
 devscripts/changelog_override.json | 21 ++++++-
 devscripts/make_changelog.py       | 96 ++++++++++++++++--------------
 2 files changed, 71 insertions(+), 46 deletions(-)

diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json
index d03db3f232..e7f453acf8 100644
--- a/devscripts/changelog_override.json
+++ b/devscripts/changelog_override.json
@@ -68,6 +68,25 @@
     {
         "action": "change",
         "when": "b03fa7834579a01cc5fba48c0e73488a16683d48",
-        "short": "[ie/twitter] Revert 92315c03774cfabb3a921884326beb4b981f786b"
+        "short": "[ie/twitter] Revert 92315c03774cfabb3a921884326beb4b981f786b",
+        "authors": ["pukkandan"]
+    },
+    {
+        "action": "change",
+        "when": "fcd6a76adc49d5cd8783985c7ce35384b72e545f",
+        "short": "[test] Add tests for socks proxies (#7908)",
+        "authors": ["coletdjnz"]
+    },
+    {
+        "action": "change",
+        "when": "4bf912282a34b58b6b35d8f7e6be535770c89c76",
+        "short": "[rh:urllib] Remove dot segments during URL normalization (#7662)",
+        "authors": ["coletdjnz"]
+    },
+    {
+        "action": "change",
+        "when": "59e92b1f1833440bb2190f847eb735cf0f90bc85",
+        "short": "[rh:urllib] Simplify gzip decoding (#7611)",
+        "authors": ["Grub4K"]
     }
 ]
diff --git a/devscripts/make_changelog.py b/devscripts/make_changelog.py
index 84f72d52f3..ac68dcd19a 100644
--- a/devscripts/make_changelog.py
+++ b/devscripts/make_changelog.py
@@ -31,35 +31,27 @@ class CommitGroup(enum.Enum):
     EXTRACTOR = 'Extractor'
     DOWNLOADER = 'Downloader'
     POSTPROCESSOR = 'Postprocessor'
+    NETWORKING = 'Networking'
     MISC = 'Misc.'
 
-    @classmethod
-    @property
-    def ignorable_prefixes(cls):
-        return ('core', 'downloader', 'extractor', 'misc', 'postprocessor', 'upstream')
-
     @classmethod
     @lru_cache
-    def commit_lookup(cls):
+    def subgroup_lookup(cls):
         return {
             name: group
             for group, names in {
-                cls.PRIORITY: {'priority'},
                 cls.CORE: {
                     'aes',
                     'cache',
                     'compat_utils',
                     'compat',
                     'cookies',
-                    'core',
                     'dependencies',
                     'formats',
                     'jsinterp',
-                    'networking',
                     'outtmpl',
                     'plugins',
                     'update',
-                    'upstream',
                     'utils',
                 },
                 cls.MISC: {
@@ -67,23 +59,40 @@ def commit_lookup(cls):
                     'cleanup',
                     'devscripts',
                     'docs',
-                    'misc',
                     'test',
                 },
-                cls.EXTRACTOR: {'extractor', 'ie'},
-                cls.DOWNLOADER: {'downloader', 'fd'},
-                cls.POSTPROCESSOR: {'postprocessor', 'pp'},
+                cls.NETWORKING: {
+                    'rh',
+                },
             }.items()
             for name in names
         }
 
     @classmethod
-    def get(cls, value):
-        result = cls.commit_lookup().get(value)
-        if result:
-            logger.debug(f'Mapped {value!r} => {result.name}')
+    @lru_cache
+    def group_lookup(cls):
+        result = {
+            'fd': cls.DOWNLOADER,
+            'ie': cls.EXTRACTOR,
+            'pp': cls.POSTPROCESSOR,
+            'upstream': cls.CORE,
+        }
+        result.update({item.name.lower(): item for item in iter(cls)})
         return result
 
+    @classmethod
+    def get(cls, value: str) -> tuple[CommitGroup | None, str | None]:
+        group, _, subgroup = (group.strip().lower() for group in value.partition('/'))
+
+        result = cls.group_lookup().get(group)
+        if not result:
+            if subgroup:
+                return None, value
+            subgroup = group
+            result = cls.subgroup_lookup().get(subgroup)
+
+        return result, subgroup or None
+
 
 @dataclass
 class Commit:
@@ -198,19 +207,23 @@ def _prepare_cleanup_misc_items(self, items):
         for commit_infos in cleanup_misc_items.values():
             sorted_items.append(CommitInfo(
                 'cleanup', ('Miscellaneous',), ', '.join(
-                    self._format_message_link(None, info.commit.hash).strip()
+                    self._format_message_link(None, info.commit.hash)
                     for info in sorted(commit_infos, key=lambda item: item.commit.hash or '')),
                 [], Commit(None, '', commit_infos[0].commit.authors), []))
 
         return sorted_items
 
-    def format_single_change(self, info):
-        message = self._format_message_link(info.message, info.commit.hash)
+    def format_single_change(self, info: CommitInfo):
+        message, sep, rest = info.message.partition('\n')
+        if '[' not in message:
+            # If the message doesn't already contain markdown links, try to add a link to the commit
+            message = self._format_message_link(message, info.commit.hash)
+
         if info.issues:
-            message = message.replace('\n', f' ({self._format_issues(info.issues)})\n', 1)
+            message = f'{message} ({self._format_issues(info.issues)})'
 
         if info.commit.authors:
-            message = message.replace('\n', f' by {self._format_authors(info.commit.authors)}\n', 1)
+            message = f'{message} by {self._format_authors(info.commit.authors)}'
 
         if info.fixes:
             fix_message = ', '.join(f'{self._format_message_link(None, fix.hash)}' for fix in info.fixes)
@@ -219,16 +232,14 @@ def format_single_change(self, info):
             if authors != info.commit.authors:
                 fix_message = f'{fix_message} by {self._format_authors(authors)}'
 
-            message = message.replace('\n', f' (With fixes in {fix_message})\n', 1)
+            message = f'{message} (With fixes in {fix_message})'
 
-        return message[:-1]
+        return message if not sep else f'{message}{sep}{rest}'
 
     def _format_message_link(self, message, hash):
         assert message or hash, 'Improperly defined commit message or override'
         message = message if message else hash[:HASH_LENGTH]
-        if not hash:
-            return f'{message}\n'
-        return f'[{message}\n'.replace('\n', f']({self.repo_url}/commit/{hash})\n', 1)
+        return f'[{message}]({self.repo_url}/commit/{hash})' if hash else message
 
     def _format_issues(self, issues):
         return ', '.join(f'[#{issue}]({self.repo_url}/issues/{issue})' for issue in issues)
@@ -318,7 +329,7 @@ def _get_commits_and_fixes(self, default_author):
         for commitish, revert_commit in reverts.items():
             reverted = commits.pop(commitish, None)
             if reverted:
-                logger.debug(f'{commit} fully reverted {reverted}')
+                logger.debug(f'{commitish} fully reverted {reverted}')
             else:
                 commits[revert_commit.hash] = revert_commit
 
@@ -337,7 +348,7 @@ def apply_overrides(self, overrides):
         for override in overrides:
             when = override.get('when')
             if when and when not in self and when != self._start:
-                logger.debug(f'Ignored {when!r}, not in commits {self._start!r}')
+                logger.debug(f'Ignored {when!r} override')
                 continue
 
             override_hash = override.get('hash') or when
@@ -365,7 +376,7 @@ def groups(self):
         for commit in self:
             upstream_re = self.UPSTREAM_MERGE_RE.search(commit.short)
             if upstream_re:
-                commit.short = f'[core/upstream] Merged with youtube-dl {upstream_re.group(1)}'
+                commit.short = f'[upstream] Merged with youtube-dl {upstream_re.group(1)}'
 
             match = self.MESSAGE_RE.fullmatch(commit.short)
             if not match:
@@ -410,25 +421,20 @@ def details_from_prefix(prefix):
         if not prefix:
             return CommitGroup.CORE, None, ()
 
-        prefix, _, details = prefix.partition('/')
-        prefix = prefix.strip()
-        details = details.strip()
+        prefix, *sub_details = prefix.split(':')
 
-        group = CommitGroup.get(prefix.lower())
-        if group is CommitGroup.PRIORITY:
-            prefix, _, details = details.partition('/')
+        group, details = CommitGroup.get(prefix)
+        if group is CommitGroup.PRIORITY and details:
+            details = details.partition('/')[2].strip()
 
-        if not details and prefix and prefix not in CommitGroup.ignorable_prefixes:
-            logger.debug(f'Replaced details with {prefix!r}')
-            details = prefix or None
+        if details and '/' in details:
+            logger.error(f'Prefix is overnested, using first part: {prefix}')
+            details = details.partition('/')[0].strip()
 
         if details == 'common':
             details = None
-
-        if details:
-            details, *sub_details = details.split(':')
-        else:
-            sub_details = []
+        elif group is CommitGroup.NETWORKING and details == 'rh':
+            details = 'Request Handler'
 
         return group, details, sub_details
 

From 58493923e9b6f774947a2131e5258e9f3cf816be Mon Sep 17 00:00:00 2001
From: soundchaser128 <69268557+soundchaser128@users.noreply.github.com>
Date: Sun, 17 Sep 2023 17:09:42 +0200
Subject: [PATCH 159/218] [ie/rule34video] Extract tags (#7117)

Authored by: soundchaser128
---
 yt_dlp/extractor/rule34video.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/yt_dlp/extractor/rule34video.py b/yt_dlp/extractor/rule34video.py
index 9d15f4d214..f3250b557a 100644
--- a/yt_dlp/extractor/rule34video.py
+++ b/yt_dlp/extractor/rule34video.py
@@ -1,6 +1,6 @@
 import re
 
-from ..utils import parse_duration
+from ..utils import parse_duration, unescapeHTML
 from .common import InfoExtractor
 
 
@@ -16,7 +16,8 @@ class Rule34VideoIE(InfoExtractor):
                 'title': 'Shot It-(mmd hmv)',
                 'thumbnail': 'https://rule34video.com/contents/videos_screenshots/3065000/3065157/preview.jpg',
                 'duration': 347.0,
-                'age_limit': 18
+                'age_limit': 18,
+                'tags': 'count:14'
             }
         },
         {
@@ -28,7 +29,8 @@ class Rule34VideoIE(InfoExtractor):
                 'title': 'Lara in Trouble Ep. 7 [WildeerStudio]',
                 'thumbnail': 'https://rule34video.com/contents/videos_screenshots/3065000/3065296/preview.jpg',
                 'duration': 938.0,
-                'age_limit': 18
+                'age_limit': 18,
+                'tags': 'count:50'
             }
         },
     ]
@@ -57,5 +59,7 @@ def _real_extract(self, url):
             'title': title,
             'thumbnail': thumbnail,
             'duration': parse_duration(duration),
-            'age_limit': 18
+            'age_limit': 18,
+            'tags': list(map(unescapeHTML, re.findall(
+                r'<a class="tag_item"[^>]+\bhref="https://rule34video\.com/tags/\d+/"[^>]*>(?P<tag>[^>]*)</a>', webpage))),
         }

From efa2339502a37cf13ae7f143bd8b2c28f452d1cd Mon Sep 17 00:00:00 2001
From: Simon <simon30002021@icloud.com>
Date: Sun, 17 Sep 2023 17:11:22 +0200
Subject: [PATCH 160/218] [ie/lecturio] Improve `_VALID_URL` (#7649)

Authored by: simon300000
---
 yt_dlp/extractor/lecturio.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/extractor/lecturio.py b/yt_dlp/extractor/lecturio.py
index bb059d3a29..795012541c 100644
--- a/yt_dlp/extractor/lecturio.py
+++ b/yt_dlp/extractor/lecturio.py
@@ -57,8 +57,8 @@ class LecturioIE(LecturioBaseIE):
     _VALID_URL = r'''(?x)
                     https://
                         (?:
-                            app\.lecturio\.com/([^/]+/(?P<nt>[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P<id>\d+))|
-                            (?:www\.)?lecturio\.de/[^/]+/(?P<nt_de>[^/?#&]+)\.vortrag
+                            app\.lecturio\.com/([^/?#]+/(?P<nt>[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P<id>\d+))|
+                            (?:www\.)?lecturio\.de/(?:[^/?#]+/)+(?P<nt_de>[^/?#&]+)\.vortrag
                         )
                     '''
     _TESTS = [{
@@ -73,6 +73,9 @@ class LecturioIE(LecturioBaseIE):
     }, {
         'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag',
         'only_matching': True,
+    }, {
+        'url': 'https://www.lecturio.de/jura/oeffentliches-recht-at-1-staatsexamen/oeffentliches-recht-staatsexamen.vortrag',
+        'only_matching': True,
     }, {
         'url': 'https://app.lecturio.com/#/lecture/c/6434/39634',
         'only_matching': True,

From 63e0c5748c0eb461a2ccca4181616eb930b4b750 Mon Sep 17 00:00:00 2001
From: aky-01 <65510015+aky-01@users.noreply.github.com>
Date: Sun, 17 Sep 2023 17:16:11 +0200
Subject: [PATCH 161/218] [ie/IndavideoEmbed] Fix extraction (#8129)

Closes #7190
Authored by: aky-01
---
 yt_dlp/extractor/indavideo.py | 73 +++++++++++++++++------------------
 1 file changed, 36 insertions(+), 37 deletions(-)

diff --git a/yt_dlp/extractor/indavideo.py b/yt_dlp/extractor/indavideo.py
index 4fa97d8bba..564bf8a024 100644
--- a/yt_dlp/extractor/indavideo.py
+++ b/yt_dlp/extractor/indavideo.py
@@ -1,9 +1,9 @@
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
     int_or_none,
     parse_age_limit,
     parse_iso8601,
+    time_seconds,
     update_url_query,
 )
 
@@ -11,15 +11,14 @@
 class IndavideoEmbedIE(InfoExtractor):
     _VALID_URL = r'https?://(?:(?:embed\.)?indavideo\.hu/player/video/|assets\.indavideo\.hu/swf/player\.swf\?.*\b(?:v(?:ID|id))=)(?P<id>[\da-f]+)'
     # Some example URLs covered by generic extractor:
-    #   http://indavideo.hu/video/Vicces_cica_1
-    #   http://index.indavideo.hu/video/2015_0728_beregszasz
-    #   http://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko
-    #   http://erotika.indavideo.hu/video/Amator_tini_punci
-    #   http://film.indavideo.hu/video/f_hrom_nagymamm_volt
-    #   http://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes
-    _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//embed\.indavideo\.hu/player/video/[\da-f]+)']
+    #   https://indavideo.hu/video/Vicces_cica_1
+    #   https://index.indavideo.hu/video/Hod_Nemetorszagban
+    #   https://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko
+    #   https://film.indavideo.hu/video/f_farkaslesen
+    #   https://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes
+    _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)//embed\.indavideo\.hu/player/video/[\da-f]+)']
     _TESTS = [{
-        'url': 'http://indavideo.hu/player/video/1bdc3c6d80/',
+        'url': 'https://indavideo.hu/player/video/1bdc3c6d80/',
         'md5': 'c8a507a1c7410685f83a06eaeeaafeab',
         'info_dict': {
             'id': '1837039',
@@ -36,21 +35,33 @@ class IndavideoEmbedIE(InfoExtractor):
             'tags': ['tánc', 'cica', 'cuki', 'cukiajanlo', 'newsroom'],
         },
     }, {
-        'url': 'http://embed.indavideo.hu/player/video/1bdc3c6d80?autostart=1&hide=1',
-        'only_matching': True,
-    }, {
-        'url': 'http://assets.indavideo.hu/swf/player.swf?v=fe25e500&vID=1bdc3c6d80&autostart=1&hide=1&i=1',
+        'url': 'https://embed.indavideo.hu/player/video/1bdc3c6d80?autostart=1&hide=1',
         'only_matching': True,
     }]
+    _WEBPAGE_TESTS = [{
+        'url': 'https://indavideo.hu/video/Vicces_cica_1',
+        'info_dict': {
+            'id': '1335611',
+            'ext': 'mp4',
+            'title': 'Vicces cica',
+            'description': 'Játszik a tablettel. :D',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'uploader': 'Jet_Pack',
+            'uploader_id': '491217',
+            'timestamp': 1390821212,
+            'upload_date': '20140127',
+            'duration': 7,
+            'age_limit': 0,
+            'tags': ['cica', 'Jet_Pack'],
+        },
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
         video = self._download_json(
-            'https://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/%s' % video_id,
-            video_id)['data']
-
-        title = video['title']
+            f'https://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/{video_id}/',
+            video_id, query={'_': time_seconds()})['data']
 
         video_urls = []
 
@@ -60,33 +71,21 @@ def _real_extract(self, url):
         elif isinstance(video_files, dict):
             video_urls.extend(video_files.values())
 
-        video_file = video.get('video_file')
-        if video:
-            video_urls.append(video_file)
         video_urls = list(set(video_urls))
 
-        video_prefix = video_urls[0].rsplit('/', 1)[0]
-
-        for flv_file in video.get('flv_files', []):
-            flv_url = '%s/%s' % (video_prefix, flv_file)
-            if flv_url not in video_urls:
-                video_urls.append(flv_url)
-
-        filesh = video.get('filesh')
+        filesh = video.get('filesh') or {}
 
         formats = []
         for video_url in video_urls:
             height = int_or_none(self._search_regex(
                 r'\.(\d{3,4})\.mp4(?:\?|$)', video_url, 'height', default=None))
-            if filesh:
-                if not height:
-                    continue
-                token = filesh.get(compat_str(height))
-                if token is None:
-                    continue
-                video_url = update_url_query(video_url, {'token': token})
+            if not height and len(filesh) == 1:
+                height = int_or_none(list(filesh.keys())[0])
+            token = filesh.get(str(height))
+            if token is None:
+                continue
             formats.append({
-                'url': video_url,
+                'url': update_url_query(video_url, {'token': token}),
                 'height': height,
             })
 
@@ -103,7 +102,7 @@ def _real_extract(self, url):
 
         return {
             'id': video.get('id') or video_id,
-            'title': title,
+            'title': video.get('title'),
             'description': video.get('description'),
             'thumbnails': thumbnails,
             'uploader': video.get('user_name'),

From 81f46ac573dc443ad48560f308582a26784d3015 Mon Sep 17 00:00:00 2001
From: Sebastian Koch <sebastian@0py.de>
Date: Sun, 17 Sep 2023 22:54:00 +0200
Subject: [PATCH 162/218] [ie/massengeschmack.tv] Fix title extraction (#7813)

Authored by: sb0stn
---
 yt_dlp/extractor/massengeschmacktv.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/yt_dlp/extractor/massengeschmacktv.py b/yt_dlp/extractor/massengeschmacktv.py
index 7dacb43e02..1490e9b21d 100644
--- a/yt_dlp/extractor/massengeschmacktv.py
+++ b/yt_dlp/extractor/massengeschmacktv.py
@@ -17,11 +17,12 @@ class MassengeschmackTVIE(InfoExtractor):
 
     _TEST = {
         'url': 'https://massengeschmack.tv/play/fktv202',
-        'md5': 'a9e054db9c2b5a08f0a0527cc201e8d3',
+        'md5': '9996f314994a49fefe5f39aa1b07ae21',
         'info_dict': {
             'id': 'fktv202',
             'ext': 'mp4',
-            'title': 'Fernsehkritik-TV - Folge 202',
+            'title': 'Fernsehkritik-TV #202',
+            'thumbnail': 'https://cache.massengeschmack.tv/img/mag/fktv202.jpg'
         },
     }
 
@@ -29,9 +30,6 @@ def _real_extract(self, url):
         episode = self._match_id(url)
 
         webpage = self._download_webpage(url, episode)
-        title = clean_html(self._html_search_regex(
-            '<h3>([^<]+)</h3>', webpage, 'title'))
-        thumbnail = self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False)
         sources = self._parse_json(self._search_regex(r'(?s)MEDIA\s*=\s*(\[.+?\]);', webpage, 'media'), episode, js_to_json)
 
         formats = []
@@ -67,7 +65,8 @@ def _real_extract(self, url):
 
         return {
             'id': episode,
-            'title': title,
+            'title': clean_html(self._html_search_regex(
+                r'<span[^>]+\bid=["\']clip-title["\'][^>]*>([^<]+)', webpage, 'title', fatal=False)),
             'formats': formats,
-            'thumbnail': thumbnail,
+            'thumbnail': self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False),
         }

From 20fbbd9249a2f26c7ae579bde5ba5d69aa8fac69 Mon Sep 17 00:00:00 2001
From: coletdjnz <coletdjnz@protonmail.com>
Date: Mon, 18 Sep 2023 07:33:26 +0000
Subject: [PATCH 163/218] [networking] Fix various socks proxy bugs (#8065)

- Fixed support for IPv6 socks proxies
- Fixed support for IPv6 over socks5
- Fixed --source-address not being obeyed for socks4 and socks5
- Fixed socks4a when the destination address is an IPv4 address

Closes https://github.com/yt-dlp/yt-dlp/issues/7959
Fixes https://github.com/ytdl-org/youtube-dl/issues/15368

Authored by: coletdjnz
Co-authored-by: Simon Sawicki <accounts@grub4k.xyz>
Co-authored-by: bashonly <bashonly@bashonly.com>
---
 test/test_socks.py           | 38 +++++---------------
 yt_dlp/networking/_helper.py | 57 ++++++++++++++++++++++++++++++
 yt_dlp/networking/_urllib.py | 68 +++++++++++++-----------------------
 yt_dlp/socks.py              | 31 +++++++++-------
 4 files changed, 110 insertions(+), 84 deletions(-)

diff --git a/test/test_socks.py b/test/test_socks.py
index 95ffce275b..211ee814d1 100644
--- a/test/test_socks.py
+++ b/test/test_socks.py
@@ -281,17 +281,13 @@ def test_socks4_auth(self, handler, ctx):
                     rh, proxies={'all': f'socks4://user:@{server_address}'})
                 assert response['version'] == 4
 
-    @pytest.mark.parametrize('handler,ctx', [
-        pytest.param('Urllib', 'http', marks=pytest.mark.xfail(
-            reason='socks4a implementation currently broken when destination is not a domain name'))
-    ], indirect=True)
+    @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
     def test_socks4a_ipv4_target(self, handler, ctx):
         with ctx.socks_server(Socks4ProxyHandler) as server_address:
             with handler(proxies={'all': f'socks4a://{server_address}'}) as rh:
                 response = ctx.socks_info_request(rh, target_domain='127.0.0.1')
                 assert response['version'] == 4
-                assert response['ipv4_address'] == '127.0.0.1'
-                assert response['domain_address'] is None
+                assert (response['ipv4_address'] == '127.0.0.1') != (response['domain_address'] == '127.0.0.1')
 
     @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
     def test_socks4a_domain_target(self, handler, ctx):
@@ -302,10 +298,7 @@ def test_socks4a_domain_target(self, handler, ctx):
                 assert response['ipv4_address'] is None
                 assert response['domain_address'] == 'localhost'
 
-    @pytest.mark.parametrize('handler,ctx', [
-        pytest.param('Urllib', 'http', marks=pytest.mark.xfail(
-            reason='source_address is not yet supported for socks4 proxies'))
-    ], indirect=True)
+    @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
     def test_ipv4_client_source_address(self, handler, ctx):
         with ctx.socks_server(Socks4ProxyHandler) as server_address:
             source_address = f'127.0.0.{random.randint(5, 255)}'
@@ -327,10 +320,7 @@ def test_socks4_errors(self, handler, ctx, reply_code):
                 with pytest.raises(ProxyError):
                     ctx.socks_info_request(rh)
 
-    @pytest.mark.parametrize('handler,ctx', [
-        pytest.param('Urllib', 'http', marks=pytest.mark.xfail(
-            reason='IPv6 socks4 proxies are not yet supported'))
-    ], indirect=True)
+    @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
     def test_ipv6_socks4_proxy(self, handler, ctx):
         with ctx.socks_server(Socks4ProxyHandler, bind_ip='::1') as server_address:
             with handler(proxies={'all': f'socks4://{server_address}'}) as rh:
@@ -342,7 +332,7 @@ def test_ipv6_socks4_proxy(self, handler, ctx):
     @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
     def test_timeout(self, handler, ctx):
         with ctx.socks_server(Socks4ProxyHandler, sleep=2) as server_address:
-            with handler(proxies={'all': f'socks4://{server_address}'}, timeout=1) as rh:
+            with handler(proxies={'all': f'socks4://{server_address}'}, timeout=0.5) as rh:
                 with pytest.raises(TransportError):
                     ctx.socks_info_request(rh)
 
@@ -383,7 +373,7 @@ def test_socks5_domain_target(self, handler, ctx):
         with ctx.socks_server(Socks5ProxyHandler) as server_address:
             with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
                 response = ctx.socks_info_request(rh, target_domain='localhost')
-                assert response['ipv4_address'] == '127.0.0.1'
+                assert (response['ipv4_address'] == '127.0.0.1') != (response['ipv6_address'] == '::1')
                 assert response['version'] == 5
 
     @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
@@ -404,22 +394,15 @@ def test_socks5h_ip_target(self, handler, ctx):
                 assert response['domain_address'] is None
                 assert response['version'] == 5
 
-    @pytest.mark.parametrize('handler,ctx', [
-        pytest.param('Urllib', 'http', marks=pytest.mark.xfail(
-            reason='IPv6 destination addresses are not yet supported'))
-    ], indirect=True)
+    @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
     def test_socks5_ipv6_destination(self, handler, ctx):
         with ctx.socks_server(Socks5ProxyHandler) as server_address:
             with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
                 response = ctx.socks_info_request(rh, target_domain='[::1]')
                 assert response['ipv6_address'] == '::1'
-                assert response['port'] == 80
                 assert response['version'] == 5
 
-    @pytest.mark.parametrize('handler,ctx', [
-        pytest.param('Urllib', 'http', marks=pytest.mark.xfail(
-            reason='IPv6 socks5 proxies are not yet supported'))
-    ], indirect=True)
+    @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
     def test_ipv6_socks5_proxy(self, handler, ctx):
         with ctx.socks_server(Socks5ProxyHandler, bind_ip='::1') as server_address:
             with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
@@ -430,10 +413,7 @@ def test_ipv6_socks5_proxy(self, handler, ctx):
 
     # XXX: is there any feasible way of testing IPv6 source addresses?
     # Same would go for non-proxy source_address test...
-    @pytest.mark.parametrize('handler,ctx', [
-        pytest.param('Urllib', 'http', marks=pytest.mark.xfail(
-            reason='source_address is not yet supported for socks5 proxies'))
-    ], indirect=True)
+    @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
     def test_ipv4_client_source_address(self, handler, ctx):
         with ctx.socks_server(Socks5ProxyHandler) as server_address:
             source_address = f'127.0.0.{random.randint(5, 255)}'
diff --git a/yt_dlp/networking/_helper.py b/yt_dlp/networking/_helper.py
index a43c57bb4b..4c9dbf25dc 100644
--- a/yt_dlp/networking/_helper.py
+++ b/yt_dlp/networking/_helper.py
@@ -2,6 +2,7 @@
 
 import contextlib
 import functools
+import socket
 import ssl
 import sys
 import typing
@@ -206,3 +207,59 @@ def wrapper(self, *args, **kwargs):
                 e.handler = self
             raise
     return wrapper
+
+
+def _socket_connect(ip_addr, timeout, source_address):
+    af, socktype, proto, canonname, sa = ip_addr
+    sock = socket.socket(af, socktype, proto)
+    try:
+        if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
+            sock.settimeout(timeout)
+        if source_address:
+            sock.bind(source_address)
+        sock.connect(sa)
+        return sock
+    except socket.error:
+        sock.close()
+        raise
+
+
+def create_connection(
+    address,
+    timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
+    source_address=None,
+    *,
+    _create_socket_func=_socket_connect
+):
+    # Work around socket.create_connection() which tries all addresses from getaddrinfo() including IPv6.
+    # This filters the addresses based on the given source_address.
+    # Based on: https://github.com/python/cpython/blob/main/Lib/socket.py#L810
+    host, port = address
+    ip_addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
+    if not ip_addrs:
+        raise socket.error('getaddrinfo returns an empty list')
+    if source_address is not None:
+        af = socket.AF_INET if ':' not in source_address[0] else socket.AF_INET6
+        ip_addrs = [addr for addr in ip_addrs if addr[0] == af]
+        if not ip_addrs:
+            raise OSError(
+                f'No remote IPv{4 if af == socket.AF_INET else 6} addresses available for connect. '
+                f'Can\'t use "{source_address[0]}" as source address')
+
+    err = None
+    for ip_addr in ip_addrs:
+        try:
+            sock = _create_socket_func(ip_addr, timeout, source_address)
+            # Explicitly break __traceback__ reference cycle
+            # https://bugs.python.org/issue36820
+            err = None
+            return sock
+        except socket.error as e:
+            err = e
+
+    try:
+        raise err
+    finally:
+        # Explicitly break __traceback__ reference cycle
+        # https://bugs.python.org/issue36820
+        err = None
diff --git a/yt_dlp/networking/_urllib.py b/yt_dlp/networking/_urllib.py
index 3c0647ecf9..c327f7744e 100644
--- a/yt_dlp/networking/_urllib.py
+++ b/yt_dlp/networking/_urllib.py
@@ -23,6 +23,7 @@
 from ._helper import (
     InstanceStoreMixin,
     add_accept_encoding_header,
+    create_connection,
     get_redirect_method,
     make_socks_proxy_opts,
     select_proxy,
@@ -54,44 +55,10 @@
 def _create_http_connection(http_class, source_address, *args, **kwargs):
     hc = http_class(*args, **kwargs)
 
+    if hasattr(hc, '_create_connection'):
+        hc._create_connection = create_connection
+
     if source_address is not None:
-        # This is to workaround _create_connection() from socket where it will try all
-        # address data from getaddrinfo() including IPv6. This filters the result from
-        # getaddrinfo() based on the source_address value.
-        # This is based on the cpython socket.create_connection() function.
-        # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
-        def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
-            host, port = address
-            err = None
-            addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
-            af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
-            ip_addrs = [addr for addr in addrs if addr[0] == af]
-            if addrs and not ip_addrs:
-                ip_version = 'v4' if af == socket.AF_INET else 'v6'
-                raise OSError(
-                    "No remote IP%s addresses available for connect, can't use '%s' as source address"
-                    % (ip_version, source_address[0]))
-            for res in ip_addrs:
-                af, socktype, proto, canonname, sa = res
-                sock = None
-                try:
-                    sock = socket.socket(af, socktype, proto)
-                    if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
-                        sock.settimeout(timeout)
-                    sock.bind(source_address)
-                    sock.connect(sa)
-                    err = None  # Explicitly break reference cycle
-                    return sock
-                except OSError as _:
-                    err = _
-                    if sock is not None:
-                        sock.close()
-            if err is not None:
-                raise err
-            else:
-                raise OSError('getaddrinfo returns an empty list')
-        if hasattr(hc, '_create_connection'):
-            hc._create_connection = _create_connection
         hc.source_address = (source_address, 0)
 
     return hc
@@ -220,13 +187,28 @@ def make_socks_conn_class(base_class, socks_proxy):
     proxy_args = make_socks_proxy_opts(socks_proxy)
 
     class SocksConnection(base_class):
-        def connect(self):
-            self.sock = sockssocket()
-            self.sock.setproxy(**proxy_args)
-            if type(self.timeout) in (int, float):  # noqa: E721
-                self.sock.settimeout(self.timeout)
-            self.sock.connect((self.host, self.port))
+        _create_connection = create_connection
 
+        def connect(self):
+            def sock_socket_connect(ip_addr, timeout, source_address):
+                af, socktype, proto, canonname, sa = ip_addr
+                sock = sockssocket(af, socktype, proto)
+                try:
+                    connect_proxy_args = proxy_args.copy()
+                    connect_proxy_args.update({'addr': sa[0], 'port': sa[1]})
+                    sock.setproxy(**connect_proxy_args)
+                    if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:  # noqa: E721
+                        sock.settimeout(timeout)
+                    if source_address:
+                        sock.bind(source_address)
+                    sock.connect((self.host, self.port))
+                    return sock
+                except socket.error:
+                    sock.close()
+                    raise
+            self.sock = create_connection(
+                (proxy_args['addr'], proxy_args['port']), timeout=self.timeout,
+                source_address=self.source_address, _create_socket_func=sock_socket_connect)
             if isinstance(self, http.client.HTTPSConnection):
                 self.sock = self._context.wrap_socket(self.sock, server_hostname=self.host)
 
diff --git a/yt_dlp/socks.py b/yt_dlp/socks.py
index f93328f63a..e7f41d7e2a 100644
--- a/yt_dlp/socks.py
+++ b/yt_dlp/socks.py
@@ -134,26 +134,31 @@ def _check_response_version(self, expected_version, got_version):
             self.close()
             raise InvalidVersionError(expected_version, got_version)
 
-    def _resolve_address(self, destaddr, default, use_remote_dns):
-        try:
-            return socket.inet_aton(destaddr)
-        except OSError:
-            if use_remote_dns and self._proxy.remote_dns:
-                return default
-            else:
-                return socket.inet_aton(socket.gethostbyname(destaddr))
+    def _resolve_address(self, destaddr, default, use_remote_dns, family=None):
+        for f in (family,) if family else (socket.AF_INET, socket.AF_INET6):
+            try:
+                return f, socket.inet_pton(f, destaddr)
+            except OSError:
+                continue
+
+        if use_remote_dns and self._proxy.remote_dns:
+            return 0, default
+        else:
+            res = socket.getaddrinfo(destaddr, None, family=family or 0)
+            f, _, _, _, ipaddr = res[0]
+            return f, socket.inet_pton(f, ipaddr[0])
 
     def _setup_socks4(self, address, is_4a=False):
         destaddr, port = address
 
-        ipaddr = self._resolve_address(destaddr, SOCKS4_DEFAULT_DSTIP, use_remote_dns=is_4a)
+        _, ipaddr = self._resolve_address(destaddr, SOCKS4_DEFAULT_DSTIP, use_remote_dns=is_4a, family=socket.AF_INET)
 
         packet = struct.pack('!BBH', SOCKS4_VERSION, Socks4Command.CMD_CONNECT, port) + ipaddr
 
         username = (self._proxy.username or '').encode()
         packet += username + b'\x00'
 
-        if is_4a and self._proxy.remote_dns:
+        if is_4a and self._proxy.remote_dns and ipaddr == SOCKS4_DEFAULT_DSTIP:
             packet += destaddr.encode() + b'\x00'
 
         self.sendall(packet)
@@ -210,7 +215,7 @@ def _socks5_auth(self):
     def _setup_socks5(self, address):
         destaddr, port = address
 
-        ipaddr = self._resolve_address(destaddr, None, use_remote_dns=True)
+        family, ipaddr = self._resolve_address(destaddr, None, use_remote_dns=True)
 
         self._socks5_auth()
 
@@ -220,8 +225,10 @@ def _setup_socks5(self, address):
             destaddr = destaddr.encode()
             packet += struct.pack('!B', Socks5AddressType.ATYP_DOMAINNAME)
             packet += self._len_and_data(destaddr)
-        else:
+        elif family == socket.AF_INET:
             packet += struct.pack('!B', Socks5AddressType.ATYP_IPV4) + ipaddr
+        elif family == socket.AF_INET6:
+            packet += struct.pack('!B', Socks5AddressType.ATYP_IPV6) + ipaddr
         packet += struct.pack('!H', port)
 
         self.sendall(packet)

From ba8e9eb2c8bbb699f314169fab8e544437ad731e Mon Sep 17 00:00:00 2001
From: Elyse <26639800+elyse0@users.noreply.github.com>
Date: Mon, 18 Sep 2023 15:08:40 -0600
Subject: [PATCH 164/218] [ie/radiofrance] Add support for livestreams,
 podcasts, playlists (#7006)

Closes #4282
Authored by: elyse0
---
 yt_dlp/extractor/_extractors.py |   9 +-
 yt_dlp/extractor/radiofrance.py | 379 +++++++++++++++++++++++++++++++-
 2 files changed, 382 insertions(+), 6 deletions(-)

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index bf0c67542e..ec3ae0e668 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1555,7 +1555,14 @@
 from .radiode import RadioDeIE
 from .radiojavan import RadioJavanIE
 from .radiobremen import RadioBremenIE
-from .radiofrance import FranceCultureIE, RadioFranceIE
+from .radiofrance import (
+    FranceCultureIE,
+    RadioFranceIE,
+    RadioFranceLiveIE,
+    RadioFrancePodcastIE,
+    RadioFranceProfileIE,
+    RadioFranceProgramScheduleIE,
+)
 from .radiozet import RadioZetPodcastIE
 from .radiokapital import (
     RadioKapitalIE,
diff --git a/yt_dlp/extractor/radiofrance.py b/yt_dlp/extractor/radiofrance.py
index 92e51b7f45..35f4b91dd2 100644
--- a/yt_dlp/extractor/radiofrance.py
+++ b/yt_dlp/extractor/radiofrance.py
@@ -1,7 +1,18 @@
+import itertools
 import re
+import urllib.parse
 
 from .common import InfoExtractor
-from ..utils import parse_duration, unified_strdate
+from ..utils import (
+    int_or_none,
+    join_nonempty,
+    js_to_json,
+    parse_duration,
+    strftime_or_none,
+    traverse_obj,
+    unified_strdate,
+    urljoin,
+)
 
 
 class RadioFranceIE(InfoExtractor):
@@ -56,8 +67,32 @@ def _real_extract(self, url):
         }
 
 
-class FranceCultureIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?radiofrance\.fr/(?:franceculture|fip|francemusique|mouv|franceinter)/podcasts/(?:[^?#]+/)?(?P<display_id>[^?#]+)-(?P<id>\d+)($|[?#])'
+class RadioFranceBaseIE(InfoExtractor):
+    _VALID_URL_BASE = r'https?://(?:www\.)?radiofrance\.fr'
+
+    _STATIONS_RE = '|'.join(map(re.escape, (
+        'franceculture',
+        'franceinfo',
+        'franceinter',
+        'francemusique',
+        'fip',
+        'mouv',
+    )))
+
+    def _extract_data_from_webpage(self, webpage, display_id, key):
+        return traverse_obj(self._search_json(
+            r'\bconst\s+data\s*=', webpage, key, display_id,
+            contains_pattern=r'(\[\{.*?\}\]);', transform_source=js_to_json),
+            (..., 'data', key, {dict}), get_all=False) or {}
+
+
+class FranceCultureIE(RadioFranceBaseIE):
+    _VALID_URL = rf'''(?x)
+        {RadioFranceBaseIE._VALID_URL_BASE}
+        /(?:{RadioFranceBaseIE._STATIONS_RE})
+        /podcasts/(?:[^?#]+/)?(?P<display_id>[^?#]+)-(?P<id>\d{{6,}})(?:$|[?#])
+    '''
+
     _TESTS = [
         {
             'url': 'https://www.radiofrance.fr/franceculture/podcasts/science-en-questions/la-physique-d-einstein-aiderait-elle-a-comprendre-le-cerveau-8440487',
@@ -67,14 +102,30 @@ class FranceCultureIE(InfoExtractor):
                 'ext': 'mp3',
                 'title': 'La physique d’Einstein aiderait-elle à comprendre le cerveau ?',
                 'description': 'Existerait-il un pont conceptuel entre la physique de l’espace-temps et les neurosciences ?',
-                'thumbnail': 'https://cdn.radiofrance.fr/s3/cruiser-production/2022/05/d184e7a3-4827-4494-bf94-04ed7b120db4/1200x630_gettyimages-200171095-001.jpg',
+                'thumbnail': r're:^https?://.*\.(?:jpg|png)',
                 'upload_date': '20220514',
                 'duration': 2750,
             },
         },
+        {
+            'url': 'https://www.radiofrance.fr/franceinter/podcasts/le-7-9-30/le-7-9-30-du-vendredi-10-mars-2023-2107675',
+            'info_dict': {
+                'id': '2107675',
+                'display_id': 'le-7-9-30-du-vendredi-10-mars-2023',
+                'title': 'Inflation alimentaire : comment en sortir ? - Régis Debray et Claude Grange - Cybèle Idelot',
+                'description': 'md5:36ee74351ede77a314fdebb94026b916',
+                'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+                'upload_date': '20230310',
+                'duration': 8977,
+                'ext': 'mp3',
+            },
+        },
         {
             'url': 'https://www.radiofrance.fr/franceinter/podcasts/la-rafle-du-vel-d-hiv-une-affaire-d-etat/les-racines-du-crime-episode-1-3715507',
             'only_matching': True,
+        }, {
+            'url': 'https://www.radiofrance.fr/franceinfo/podcasts/le-billet-sciences/sante-bientot-un-vaccin-contre-l-asthme-allergique-3057200',
+            'only_matching': True,
         }
     ]
 
@@ -89,7 +140,6 @@ def _real_extract(self, url):
             'id': video_id,
             'display_id': display_id,
             'url': video_data['contentUrl'],
-            'ext': video_data.get('encodingFormat'),
             'vcodec': 'none' if video_data.get('encodingFormat') == 'mp3' else None,
             'duration': parse_duration(video_data.get('duration')),
             'title': self._html_search_regex(r'(?s)<h1[^>]*itemprop="[^"]*name[^"]*"[^>]*>(.+?)</h1>',
@@ -102,3 +152,322 @@ def _real_extract(self, url):
             'upload_date': unified_strdate(self._search_regex(
                 r'"datePublished"\s*:\s*"([^"]+)', webpage, 'timestamp', fatal=False))
         }
+
+
+class RadioFranceLiveIE(RadioFranceBaseIE):
+    _VALID_URL = rf'''(?x)
+        https?://(?:www\.)?radiofrance\.fr
+        /(?P<id>{RadioFranceBaseIE._STATIONS_RE})
+        /?(?P<substation_id>radio-[\w-]+)?(?:[#?]|$)
+    '''
+
+    _TESTS = [{
+        'url': 'https://www.radiofrance.fr/franceinter/',
+        'info_dict': {
+            'id': 'franceinter',
+            'title': str,
+            'live_status': 'is_live',
+            'ext': 'aac',
+        },
+        'params': {
+            'skip_download': 'Livestream',
+        },
+    }, {
+        'url': 'https://www.radiofrance.fr/franceculture',
+        'info_dict': {
+            'id': 'franceculture',
+            'title': str,
+            'live_status': 'is_live',
+            'ext': 'aac',
+        },
+        'params': {
+            'skip_download': 'Livestream',
+        },
+    }, {
+        'url': 'https://www.radiofrance.fr/mouv/radio-musique-kids-family',
+        'info_dict': {
+            'id': 'mouv-radio-musique-kids-family',
+            'title': str,
+            'live_status': 'is_live',
+            'ext': 'aac',
+        },
+        'params': {
+            'skip_download': 'Livestream',
+        },
+    }, {
+        'url': 'https://www.radiofrance.fr/mouv/radio-rnb-soul',
+        'info_dict': {
+            'id': 'mouv-radio-rnb-soul',
+            'title': str,
+            'live_status': 'is_live',
+            'ext': 'aac',
+        },
+        'params': {
+            'skip_download': 'Livestream',
+        },
+    }, {
+        'url': 'https://www.radiofrance.fr/mouv/radio-musique-mix',
+        'info_dict': {
+            'id': 'mouv-radio-musique-mix',
+            'title': str,
+            'live_status': 'is_live',
+            'ext': 'aac',
+        },
+        'params': {
+            'skip_download': 'Livestream',
+        },
+    }, {
+        'url': 'https://www.radiofrance.fr/fip/radio-rock',
+        'info_dict': {
+            'id': 'fip-radio-rock',
+            'title': str,
+            'live_status': 'is_live',
+            'ext': 'aac',
+        },
+        'params': {
+            'skip_download': 'Livestream',
+        },
+    }, {
+        'url': 'https://www.radiofrance.fr/mouv',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        station_id, substation_id = self._match_valid_url(url).group('id', 'substation_id')
+
+        if substation_id:
+            webpage = self._download_webpage(url, station_id)
+            api_response = self._extract_data_from_webpage(webpage, station_id, 'webRadioData')
+        else:
+            api_response = self._download_json(
+                f'https://www.radiofrance.fr/{station_id}/api/live', station_id)
+
+        formats, subtitles = [], {}
+        for media_source in traverse_obj(api_response, (('now', None), 'media', 'sources', lambda _, v: v['url'])):
+            if media_source.get('format') == 'hls':
+                fmts, subs = self._extract_m3u8_formats_and_subtitles(media_source['url'], station_id, fatal=False)
+                formats.extend(fmts)
+                self._merge_subtitles(subs, target=subtitles)
+            else:
+                formats.append({
+                    'url': media_source['url'],
+                    'abr': media_source.get('bitrate'),
+                })
+
+        return {
+            'id': join_nonempty(station_id, substation_id),
+            'title': traverse_obj(api_response, ('visual', 'legend')) or join_nonempty(
+                ('now', 'firstLine', 'title'), ('now', 'secondLine', 'title'), from_dict=api_response, delim=' - '),
+            'formats': formats,
+            'subtitles': subtitles,
+            'is_live': True,
+        }
+
+
+class RadioFrancePlaylistBase(RadioFranceBaseIE):
+    """Subclasses must set _METADATA_KEY"""
+
+    def _call_api(self, content_id, cursor, page_num):
+        raise NotImplementedError('This method must be implemented by subclasses')
+
+    def _generate_playlist_entries(self, content_id, content_response):
+        for page_num in itertools.count(2):
+            for entry in content_response['items']:
+                yield self.url_result(
+                    f'https://www.radiofrance.fr/{entry["path"]}', url_transparent=True, **traverse_obj(entry, {
+                        'title': 'title',
+                        'description': 'standFirst',
+                        'timestamp': ('publishedDate', {int_or_none}),
+                        'thumbnail': ('visual', 'src'),
+                    }))
+
+            next_cursor = traverse_obj(content_response, (('pagination', None), 'next'), get_all=False)
+            if not next_cursor:
+                break
+
+            content_response = self._call_api(content_id, next_cursor, page_num)
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        metadata = self._download_json(
+            'https://www.radiofrance.fr/api/v2.1/path', display_id,
+            query={'value': urllib.parse.urlparse(url).path})['content']
+
+        content_id = metadata['id']
+
+        return self.playlist_result(
+            self._generate_playlist_entries(content_id, metadata[self._METADATA_KEY]), content_id,
+            display_id=display_id, **{**traverse_obj(metadata, {
+                'title': 'title',
+                'description': 'standFirst',
+                'thumbnail': ('visual', 'src'),
+            }), **traverse_obj(metadata, {
+                'title': 'name',
+                'description': 'role',
+            })})
+
+
+class RadioFrancePodcastIE(RadioFrancePlaylistBase):
+    _VALID_URL = rf'''(?x)
+        {RadioFranceBaseIE._VALID_URL_BASE}
+        /(?:{RadioFranceBaseIE._STATIONS_RE})
+        /podcasts/(?P<id>[\w-]+)/?(?:[?#]|$)
+    '''
+
+    _TESTS = [{
+        'url': 'https://www.radiofrance.fr/franceinfo/podcasts/le-billet-vert',
+        'info_dict': {
+            'id': 'eaf6ef81-a980-4f1c-a7d1-8a75ecd54b17',
+            'display_id': 'le-billet-vert',
+            'title': 'Le billet sciences',
+            'description': 'md5:eb1007b34b0c0a680daaa71525bbd4c1',
+            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+        },
+        'playlist_mincount': 11,
+    }, {
+        'url': 'https://www.radiofrance.fr/franceinter/podcasts/jean-marie-le-pen-l-obsession-nationale',
+        'info_dict': {
+            'id': '566fd524-3074-4fbc-ac69-8696f2152a54',
+            'display_id': 'jean-marie-le-pen-l-obsession-nationale',
+            'title': 'Jean-Marie Le Pen, l\'obsession nationale',
+            'description': 'md5:a07c0cfb894f6d07a62d0ad12c4b7d73',
+            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+        },
+        'playlist_count': 7,
+    }, {
+        'url': 'https://www.radiofrance.fr/franceculture/podcasts/serie-thomas-grjebine',
+        'info_dict': {
+            'id': '63c1ddc9-9f15-457a-98b2-411bac63f48d',
+            'display_id': 'serie-thomas-grjebine',
+            'title': 'Thomas Grjebine',
+        },
+        'playlist_count': 1,
+    }, {
+        'url': 'https://www.radiofrance.fr/fip/podcasts/certains-l-aiment-fip',
+        'info_dict': {
+            'id': '143dff38-e956-4a5d-8576-1c0b7242b99e',
+            'display_id': 'certains-l-aiment-fip',
+            'title': 'Certains l’aiment Fip',
+            'description': 'md5:ff974672ba00d4fd5be80fb001c5b27e',
+            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+        },
+        'playlist_mincount': 321,
+    }, {
+        'url': 'https://www.radiofrance.fr/franceinter/podcasts/le-7-9',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.radiofrance.fr/mouv/podcasts/dirty-mix',
+        'only_matching': True,
+    }]
+
+    _METADATA_KEY = 'expressions'
+
+    def _call_api(self, podcast_id, cursor, page_num):
+        return self._download_json(
+            f'https://www.radiofrance.fr/api/v2.1/concepts/{podcast_id}/expressions', podcast_id,
+            note=f'Downloading page {page_num}', query={'pageCursor': cursor})
+
+
+class RadioFranceProfileIE(RadioFrancePlaylistBase):
+    _VALID_URL = rf'{RadioFranceBaseIE._VALID_URL_BASE}/personnes/(?P<id>[\w-]+)'
+
+    _TESTS = [{
+        'url': 'https://www.radiofrance.fr/personnes/thomas-pesquet?p=3',
+        'info_dict': {
+            'id': '86c62790-e481-11e2-9f7b-782bcb6744eb',
+            'display_id': 'thomas-pesquet',
+            'title': 'Thomas Pesquet',
+            'description': 'Astronaute à l\'agence spatiale européenne',
+        },
+        'playlist_mincount': 212,
+    }, {
+        'url': 'https://www.radiofrance.fr/personnes/eugenie-bastie',
+        'info_dict': {
+            'id': '9593050b-0183-4972-a0b5-d8f699079e02',
+            'display_id': 'eugenie-bastie',
+            'title': 'Eugénie Bastié',
+            'description': 'Journaliste et essayiste',
+            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+        },
+        'playlist_mincount': 39,
+    }, {
+        'url': 'https://www.radiofrance.fr/personnes/lea-salame',
+        'only_matching': True,
+    }]
+
+    _METADATA_KEY = 'documents'
+
+    def _call_api(self, profile_id, cursor, page_num):
+        resp = self._download_json(
+            f'https://www.radiofrance.fr/api/v2.1/taxonomy/{profile_id}/documents', profile_id,
+            note=f'Downloading page {page_num}', query={
+                'relation': 'personality',
+                'cursor': cursor,
+            })
+
+        resp['next'] = traverse_obj(resp, ('pagination', 'next'))
+        return resp
+
+
+class RadioFranceProgramScheduleIE(RadioFranceBaseIE):
+    _VALID_URL = rf'''(?x)
+        {RadioFranceBaseIE._VALID_URL_BASE}
+        /(?P<station>{RadioFranceBaseIE._STATIONS_RE})
+        /grille-programmes(?:\?date=(?P<date>[\d-]+))?
+    '''
+
+    _TESTS = [{
+        'url': 'https://www.radiofrance.fr/franceinter/grille-programmes?date=17-02-2023',
+        'info_dict': {
+            'id': 'franceinter-program-20230217',
+            'upload_date': '20230217',
+        },
+        'playlist_count': 25,
+    }, {
+        'url': 'https://www.radiofrance.fr/franceculture/grille-programmes?date=01-02-2023',
+        'info_dict': {
+            'id': 'franceculture-program-20230201',
+            'upload_date': '20230201',
+        },
+        'playlist_count': 25,
+    }, {
+        'url': 'https://www.radiofrance.fr/mouv/grille-programmes?date=19-03-2023',
+        'info_dict': {
+            'id': 'mouv-program-20230319',
+            'upload_date': '20230319',
+        },
+        'playlist_count': 3,
+    }, {
+        'url': 'https://www.radiofrance.fr/francemusique/grille-programmes?date=18-03-2023',
+        'info_dict': {
+            'id': 'francemusique-program-20230318',
+            'upload_date': '20230318',
+        },
+        'playlist_count': 15,
+    }, {
+        'url': 'https://www.radiofrance.fr/franceculture/grille-programmes',
+        'only_matching': True,
+    }]
+
+    def _generate_playlist_entries(self, webpage_url, api_response):
+        for entry in traverse_obj(api_response, ('steps', lambda _, v: v['expression']['path'])):
+            yield self.url_result(
+                urljoin(webpage_url, f'/{entry["expression"]["path"]}'), ie=FranceCultureIE,
+                url_transparent=True, **traverse_obj(entry, {
+                    'title': ('expression', 'title'),
+                    'thumbnail': ('expression', 'visual', 'src'),
+                    'timestamp': ('startTime', {int_or_none}),
+                    'series_id': ('concept', 'id'),
+                    'series': ('concept', 'title'),
+                }))
+
+    def _real_extract(self, url):
+        station, date = self._match_valid_url(url).group('station', 'date')
+        webpage = self._download_webpage(url, station)
+        grid_data = self._extract_data_from_webpage(webpage, station, 'grid')
+        upload_date = strftime_or_none(grid_data.get('date'), '%Y%m%d')
+
+        return self.playlist_result(
+            self._generate_playlist_entries(url, grid_data),
+            join_nonempty(station, 'program', upload_date), upload_date=upload_date)

From 9e68747f9607f05e92bb7d9b6e79d678b50070e1 Mon Sep 17 00:00:00 2001
From: c-basalt <117849907+c-basalt@users.noreply.github.com>
Date: Mon, 18 Sep 2023 19:02:00 -0400
Subject: [PATCH 165/218] [ie/bilibili] Add support for series, favorites and
 watch later (#7518)

Closes #6719
Authored by: c-basalt
---
 yt_dlp/extractor/_extractors.py |   6 +-
 yt_dlp/extractor/bilibili.py    | 281 ++++++++++++++++++++++++++++++--
 2 files changed, 272 insertions(+), 15 deletions(-)

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index ec3ae0e668..a6a286766f 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -223,7 +223,11 @@
     BiliBiliPlayerIE,
     BilibiliSpaceVideoIE,
     BilibiliSpaceAudioIE,
-    BilibiliSpacePlaylistIE,
+    BilibiliCollectionListIE,
+    BilibiliSeriesListIE,
+    BilibiliFavoritesListIE,
+    BilibiliWatchlaterIE,
+    BilibiliPlaylistIE,
     BiliIntlIE,
     BiliIntlSeriesIE,
     BiliLiveIE,
diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py
index 290340078c..5e7042dbbd 100644
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@@ -15,6 +15,7 @@
     GeoRestrictedError,
     InAdvancePagedList,
     OnDemandPagedList,
+    bool_or_none,
     filter_dict,
     float_or_none,
     format_field,
@@ -35,6 +36,7 @@
     unsmuggle_url,
     url_or_none,
     urlencode_postdata,
+    variadic,
 )
 
 
@@ -156,7 +158,7 @@ def _get_episodes_from_season(self, ss_id, url):
 
 
 class BiliBiliIE(BilibiliBaseIE):
-    _VALID_URL = r'https?://www\.bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
 
     _TESTS = [{
         'url': 'https://www.bilibili.com/video/BV13x41117TL',
@@ -252,7 +254,7 @@ class BiliBiliIE(BilibiliBaseIE):
             'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
             'duration': 313.557,
             'upload_date': '20220709',
-            'uploader': '小夫Tech',
+            'uploader': '小夫太渴',
             'timestamp': 1657347907,
             'uploader_id': '1326814124',
             'comment_count': int,
@@ -509,7 +511,7 @@ def _real_extract(self, url):
 
 
 class BiliBiliBangumiMediaIE(BilibiliBaseIE):
-    _VALID_URL = r'https?://www\.bilibili\.com/bangumi/media/md(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P<id>\d+)'
     _TESTS = [{
         'url': 'https://www.bilibili.com/bangumi/media/md24097891',
         'info_dict': {
@@ -528,7 +530,7 @@ def _real_extract(self, url):
 
 
 class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
-    _VALID_URL = r'(?x)https?://www\.bilibili\.com/bangumi/play/ss(?P<id>\d+)'
+    _VALID_URL = r'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P<id>\d+)'
     _TESTS = [{
         'url': 'https://www.bilibili.com/bangumi/play/ss26801',
         'info_dict': {
@@ -679,13 +681,35 @@ def get_entries(page_data):
         return self.playlist_result(paged_list, playlist_id)
 
 
-class BilibiliSpacePlaylistIE(BilibiliSpaceBaseIE):
-    _VALID_URL = r'https?://space.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail\?sid=(?P<sid>\d+)'
+class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE):
+    def _get_entries(self, page_data, bvid_keys, ending_key='bvid'):
+        for bvid in traverse_obj(page_data, (*variadic(bvid_keys, (str, bytes, dict, set)), ..., ending_key, {str})):
+            yield self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE, bvid)
+
+    def _get_uploader(self, uid, playlist_id):
+        webpage = self._download_webpage(f'https://space.bilibili.com/{uid}', playlist_id, fatal=False)
+        return self._search_regex(r'(?s)<title\b[^>]*>([^<]+)的个人空间-', webpage, 'uploader', fatal=False)
+
+    def _extract_playlist(self, fetch_page, get_metadata, get_entries):
+        metadata, page_list = super()._extract_playlist(fetch_page, get_metadata, get_entries)
+        metadata.pop('page_count', None)
+        metadata.pop('page_size', None)
+        return metadata, page_list
+
+
+class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
+    _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
     _TESTS = [{
         'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
         'info_dict': {
             'id': '2142762_57445',
-            'title': '《底特律 变人》'
+            'title': '【完结】《底特律 变人》全结局流程解说',
+            'description': '',
+            'uploader': '老戴在此',
+            'uploader_id': '2142762',
+            'timestamp': int,
+            'upload_date': str,
+            'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
         },
         'playlist_mincount': 31,
     }]
@@ -706,22 +730,251 @@ def get_metadata(page_data):
             return {
                 'page_count': math.ceil(entry_count / page_size),
                 'page_size': page_size,
-                'title': traverse_obj(page_data, ('meta', 'name'))
+                'uploader': self._get_uploader(mid, playlist_id),
+                **traverse_obj(page_data, {
+                    'title': ('meta', 'name', {str}),
+                    'description': ('meta', 'description', {str}),
+                    'uploader_id': ('meta', 'mid', {str_or_none}),
+                    'timestamp': ('meta', 'ptime', {int_or_none}),
+                    'thumbnail': ('meta', 'cover', {url_or_none}),
+                })
             }
 
         def get_entries(page_data):
-            for entry in page_data.get('archives', []):
-                yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}',
-                                      BiliBiliIE, entry['bvid'])
+            return self._get_entries(page_data, 'archives')
 
         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
-        return self.playlist_result(paged_list, playlist_id, metadata['title'])
+        return self.playlist_result(paged_list, playlist_id, **metadata)
+
+
+class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
+    _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
+    _TESTS = [{
+        'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
+        'info_dict': {
+            'id': '1958703906_547718',
+            'title': '直播回放',
+            'description': '直播回放',
+            'uploader': '靡烟miya',
+            'uploader_id': '1958703906',
+            'timestamp': 1637985853,
+            'upload_date': '20211127',
+            'modified_timestamp': int,
+            'modified_date': str,
+        },
+        'playlist_mincount': 513,
+    }]
+
+    def _real_extract(self, url):
+        mid, sid = self._match_valid_url(url).group('mid', 'sid')
+        playlist_id = f'{mid}_{sid}'
+        playlist_meta = traverse_obj(self._download_json(
+            f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False
+        ), {
+            'title': ('data', 'meta', 'name', {str}),
+            'description': ('data', 'meta', 'description', {str}),
+            'uploader_id': ('data', 'meta', 'mid', {str_or_none}),
+            'timestamp': ('data', 'meta', 'ctime', {int_or_none}),
+            'modified_timestamp': ('data', 'meta', 'mtime', {int_or_none}),
+        })
+
+        def fetch_page(page_idx):
+            return self._download_json(
+                'https://api.bilibili.com/x/series/archives',
+                playlist_id, note=f'Downloading page {page_idx}',
+                query={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30})['data']
+
+        def get_metadata(page_data):
+            page_size = page_data['page']['size']
+            entry_count = page_data['page']['total']
+            return {
+                'page_count': math.ceil(entry_count / page_size),
+                'page_size': page_size,
+                'uploader': self._get_uploader(mid, playlist_id),
+                **playlist_meta
+            }
+
+        def get_entries(page_data):
+            return self._get_entries(page_data, 'archives')
+
+        metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
+        return self.playlist_result(paged_list, playlist_id, **metadata)
+
+
+class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE):
+    _VALID_URL = r'https?://(?:space\.bilibili\.com/\d+/favlist/?\?fid=|(?:www\.)?bilibili\.com/medialist/detail/ml)(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://space.bilibili.com/84912/favlist?fid=1103407912&ftype=create',
+        'info_dict': {
+            'id': '1103407912',
+            'title': '【V2】（旧）',
+            'description': '',
+            'uploader': '晓月春日',
+            'uploader_id': '84912',
+            'timestamp': 1604905176,
+            'upload_date': '20201109',
+            'modified_timestamp': int,
+            'modified_date': str,
+            'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
+            'view_count': int,
+            'like_count': int,
+        },
+        'playlist_mincount': 22,
+    }, {
+        'url': 'https://www.bilibili.com/medialist/detail/ml1103407912',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        fid = self._match_id(url)
+
+        list_info = self._download_json(
+            f'https://api.bilibili.com/x/v3/fav/resource/list?media_id={fid}&pn=1&ps=20',
+            fid, note='Downloading favlist metadata')
+        if list_info['code'] == -403:
+            self.raise_login_required(msg='This is a private favorites list. You need to log in as its owner')
+
+        entries = self._get_entries(self._download_json(
+            f'https://api.bilibili.com/x/v3/fav/resource/ids?media_id={fid}',
+            fid, note='Download favlist entries'), 'data')
+
+        return self.playlist_result(entries, fid, **traverse_obj(list_info, ('data', 'info', {
+            'title': ('title', {str}),
+            'description': ('intro', {str}),
+            'uploader': ('upper', 'name', {str}),
+            'uploader_id': ('upper', 'mid', {str_or_none}),
+            'timestamp': ('ctime', {int_or_none}),
+            'modified_timestamp': ('mtime', {int_or_none}),
+            'thumbnail': ('cover', {url_or_none}),
+            'view_count': ('cnt_info', 'play', {int_or_none}),
+            'like_count': ('cnt_info', 'thumb_up', {int_or_none}),
+        })))
+
+
+class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
+    _TESTS = [{
+        'url': 'https://www.bilibili.com/watchlater/#/list',
+        'info_dict': {'id': 'watchlater'},
+        'playlist_mincount': 0,
+        'skip': 'login required',
+    }]
+
+    def _real_extract(self, url):
+        list_id = getattr(self._get_cookies(url).get('DedeUserID'), 'value', 'watchlater')
+        watchlater_info = self._download_json(
+            'https://api.bilibili.com/x/v2/history/toview/web?jsonp=jsonp', list_id)
+        if watchlater_info['code'] == -101:
+            self.raise_login_required(msg='You need to login to access your watchlater list')
+        entries = self._get_entries(watchlater_info, ('data', 'list'))
+        return self.playlist_result(entries, id=list_id, title='稍后再看')
+
+
+class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:medialist/play|list)/(?P<id>\w+)'
+    _TESTS = [{
+        'url': 'https://www.bilibili.com/list/1958703906?sid=547718',
+        'info_dict': {
+            'id': '5_547718',
+            'title': '直播回放',
+            'uploader': '靡烟miya',
+            'uploader_id': '1958703906',
+            'timestamp': 1637985853,
+            'upload_date': '20211127',
+        },
+        'playlist_mincount': 513,
+    }, {
+        'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
+        'info_dict': {
+            'id': '5_547718',
+        },
+        'playlist_mincount': 513,
+        'skip': 'redirect url',
+    }, {
+        'url': 'https://www.bilibili.com/list/ml1103407912',
+        'info_dict': {
+            'id': '3_1103407912',
+            'title': '【V2】（旧）',
+            'uploader': '晓月春日',
+            'uploader_id': '84912',
+            'timestamp': 1604905176,
+            'upload_date': '20201109',
+            'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
+        },
+        'playlist_mincount': 22,
+    }, {
+        'url': 'https://www.bilibili.com/medialist/play/ml1103407912',
+        'info_dict': {
+            'id': '3_1103407912',
+        },
+        'playlist_mincount': 22,
+        'skip': 'redirect url',
+    }, {
+        'url': 'https://www.bilibili.com/list/watchlater',
+        'info_dict': {'id': 'watchlater'},
+        'playlist_mincount': 0,
+        'skip': 'login required',
+    }, {
+        'url': 'https://www.bilibili.com/medialist/play/watchlater',
+        'info_dict': {'id': 'watchlater'},
+        'playlist_mincount': 0,
+        'skip': 'login required',
+    }]
+
+    def _extract_medialist(self, query, list_id):
+        for page_num in itertools.count(1):
+            page_data = self._download_json(
+                'https://api.bilibili.com/x/v2/medialist/resource/list',
+                list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}'
+            )['data']
+            yield from self._get_entries(page_data, 'media_list', ending_key='bv_id')
+            query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id'))
+            if not page_data.get('has_more', False):
+                break
+
+    def _real_extract(self, url):
+        list_id = self._match_id(url)
+        webpage = self._download_webpage(url, list_id)
+        initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
+        if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
+            error_code = traverse_obj(initial_state, ('error', 'trueCode', {int_or_none}))
+            error_message = traverse_obj(initial_state, ('error', 'message', {str_or_none}))
+            if error_code == -400 and list_id == 'watchlater':
+                self.raise_login_required('You need to login to access your watchlater playlist')
+            elif error_code == -403:
+                self.raise_login_required('This is a private playlist. You need to login as its owner')
+            elif error_code == 11010:
+                raise ExtractorError('Playlist is no longer available', expected=True)
+            raise ExtractorError(f'Could not access playlist: {error_code} {error_message}')
+
+        query = {
+            'ps': 20,
+            'with_current': False,
+            **traverse_obj(initial_state, {
+                'type': ('playlist', 'type', {int_or_none}),
+                'biz_id': ('playlist', 'id', {int_or_none}),
+                'tid': ('tid', {int_or_none}),
+                'sort_field': ('sortFiled', {int_or_none}),
+                'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}),
+            })
+        }
+        metadata = {
+            'id': f'{query["type"]}_{query["biz_id"]}',
+            **traverse_obj(initial_state, ('mediaListInfo', {
+                'title': ('title', {str}),
+                'uploader': ('upper', 'name', {str}),
+                'uploader_id': ('upper', 'mid', {str_or_none}),
+                'timestamp': ('ctime', {int_or_none}),
+                'thumbnail': ('cover', {url_or_none}),
+            })),
+        }
+        return self.playlist_result(self._extract_medialist(query, list_id), **metadata)
 
 
 class BilibiliCategoryIE(InfoExtractor):
     IE_NAME = 'Bilibili category extractor'
     _MAX_RESULTS = 1000000
-    _VALID_URL = r'https?://www\.bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
+    _VALID_URL = r'https?://(?:www\.)?bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
     _TESTS = [{
         'url': 'https://www.bilibili.com/v/kichiku/mad',
         'info_dict': {
@@ -1406,7 +1659,7 @@ def _real_extract(self, url):
 
 
 class BiliLiveIE(InfoExtractor):
-    _VALID_URL = r'https?://live.bilibili.com/(?:blanc/)?(?P<id>\d+)'
+    _VALID_URL = r'https?://live\.bilibili\.com/(?:blanc/)?(?P<id>\d+)'
 
     _TESTS = [{
         'url': 'https://live.bilibili.com/196',

From 69b03f84f8378b0b5a2fbae56f9b7d860b2f529e Mon Sep 17 00:00:00 2001
From: c-basalt <117849907+c-basalt@users.noreply.github.com>
Date: Mon, 18 Sep 2023 19:06:36 -0400
Subject: [PATCH 166/218] [ie/weibo] Fix extractor and support user extraction
 (#7657)

Closes #3964, Closes #4673, Closes #6979
Authored by: c-basalt
---
 yt_dlp/extractor/_extractors.py |   3 +-
 yt_dlp/extractor/weibo.py       | 319 +++++++++++++++++++++-----------
 2 files changed, 215 insertions(+), 107 deletions(-)

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index a6a286766f..47d983c9cc 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -2371,7 +2371,8 @@
 )
 from .weibo import (
     WeiboIE,
-    WeiboMobileIE
+    WeiboVideoIE,
+    WeiboUserIE,
 )
 from .weiqitv import WeiqiTVIE
 from .weverse import (
diff --git a/yt_dlp/extractor/weibo.py b/yt_dlp/extractor/weibo.py
index bc9a71abe0..b0c3052b6a 100644
--- a/yt_dlp/extractor/weibo.py
+++ b/yt_dlp/extractor/weibo.py
@@ -1,134 +1,241 @@
-from .common import InfoExtractor
-
-import json
 import random
-import re
+import itertools
+import urllib.parse
 
-from ..compat import (
-    compat_parse_qs,
-    compat_str,
-)
+from .common import InfoExtractor
 from ..utils import (
-    js_to_json,
+    int_or_none,
+    make_archive_id,
+    mimetype2ext,
+    parse_resolution,
+    str_or_none,
     strip_jsonp,
+    traverse_obj,
+    url_or_none,
     urlencode_postdata,
+    urljoin,
 )
 
 
-class WeiboIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?weibo\.com/[0-9]+/(?P<id>[a-zA-Z0-9]+)'
-    _TEST = {
-        'url': 'https://weibo.com/6275294458/Fp6RGfbff?type=comment',
-        'info_dict': {
-            'id': 'Fp6RGfbff',
-            'ext': 'mp4',
-            'title': 'You should have servants to massage you,... 来自Hosico_猫 - 微博',
-        }
-    }
+class WeiboBaseIE(InfoExtractor):
+    def _update_visitor_cookies(self, video_id):
+        visitor_data = self._download_json(
+            'https://passport.weibo.com/visitor/genvisitor', video_id,
+            note='Generating first-visit guest request',
+            transform_source=strip_jsonp,
+            data=urlencode_postdata({
+                'cb': 'gen_callback',
+                'fp': '{"os":"2","browser":"Gecko57,0,0,0","fonts":"undefined","screenInfo":"1440*900*24","plugins":""}',
+            }))
 
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        # to get Referer url for genvisitor
-        webpage, urlh = self._download_webpage_handle(url, video_id)
-
-        visitor_url = urlh.url
-
-        if 'passport.weibo.com' in visitor_url:
-            # first visit
-            visitor_data = self._download_json(
-                'https://passport.weibo.com/visitor/genvisitor', video_id,
-                note='Generating first-visit data',
-                transform_source=strip_jsonp,
-                headers={'Referer': visitor_url},
-                data=urlencode_postdata({
-                    'cb': 'gen_callback',
-                    'fp': json.dumps({
-                        'os': '2',
-                        'browser': 'Gecko57,0,0,0',
-                        'fonts': 'undefined',
-                        'screenInfo': '1440*900*24',
-                        'plugins': '',
-                    }),
-                }))
-
-            tid = visitor_data['data']['tid']
-            cnfd = '%03d' % visitor_data['data']['confidence']
-
-            self._download_webpage(
-                'https://passport.weibo.com/visitor/visitor', video_id,
-                note='Running first-visit callback',
-                query={
-                    'a': 'incarnate',
-                    't': tid,
-                    'w': 2,
-                    'c': cnfd,
-                    'cb': 'cross_domain',
-                    'from': 'weibo',
-                    '_rand': random.random(),
-                })
-
-            webpage = self._download_webpage(
-                url, video_id, note='Revisiting webpage')
-
-        title = self._html_extract_title(webpage)
-
-        video_formats = compat_parse_qs(self._search_regex(
-            r'video-sources=\\\"(.+?)\"', webpage, 'video_sources'))
-
-        formats = []
-        supported_resolutions = (480, 720)
-        for res in supported_resolutions:
-            vid_urls = video_formats.get(compat_str(res))
-            if not vid_urls or not isinstance(vid_urls, list):
-                continue
-
-            vid_url = vid_urls[0]
-            formats.append({
-                'url': vid_url,
-                'height': res,
+        self._download_webpage(
+            'https://passport.weibo.com/visitor/visitor', video_id,
+            note='Running first-visit callback to get guest cookies',
+            query={
+                'a': 'incarnate',
+                't': visitor_data['data']['tid'],
+                'w': 2,
+                'c': '%03d' % visitor_data['data']['confidence'],
+                'cb': 'cross_domain',
+                'from': 'weibo',
+                '_rand': random.random(),
             })
 
-        uploader = self._og_search_property(
-            'nick-name', webpage, 'uploader', default=None)
+    def _weibo_download_json(self, url, video_id, *args, fatal=True, note='Downloading JSON metadata', **kwargs):
+        webpage, urlh = self._download_webpage_handle(url, video_id, *args, fatal=fatal, note=note, **kwargs)
+        if urllib.parse.urlparse(urlh.url).netloc == 'passport.weibo.com':
+            self._update_visitor_cookies(video_id)
+            webpage = self._download_webpage(url, video_id, *args, fatal=fatal, note=note, **kwargs)
+        return self._parse_json(webpage, video_id, fatal=fatal)
 
+    def _extract_formats(self, video_info):
+        media_info = traverse_obj(video_info, ('page_info', 'media_info'))
+        formats = traverse_obj(media_info, (
+            'playback_list', lambda _, v: url_or_none(v['play_info']['url']), 'play_info', {
+                'url': 'url',
+                'format': ('quality_desc', {str}),
+                'format_id': ('label', {str}),
+                'ext': ('mime', {mimetype2ext}),
+                'tbr': ('bitrate', {int_or_none}, {lambda x: x or None}),
+                'vcodec': ('video_codecs', {str}),
+                'fps': ('fps', {int_or_none}),
+                'width': ('width', {int_or_none}),
+                'height': ('height', {int_or_none}),
+                'filesize': ('size', {int_or_none}),
+                'acodec': ('audio_codecs', {str}),
+                'asr': ('audio_sample_rate', {int_or_none}),
+                'audio_channels': ('audio_channels', {int_or_none}),
+            }))
+        if not formats:  # fallback, should be barely used
+            for url in set(traverse_obj(media_info, (..., {url_or_none}))):
+                if 'label=' in url:  # filter out non-video urls
+                    format_id, resolution = self._search_regex(
+                        r'label=(\w+)&template=(\d+x\d+)', url, 'format info',
+                        group=(1, 2), default=(None, None))
+                    formats.append({
+                        'url': url,
+                        'format_id': format_id,
+                        **parse_resolution(resolution),
+                        **traverse_obj(media_info, (
+                            'video_details', lambda _, v: v['label'].startswith(format_id), {
+                                'size': ('size', {int_or_none}),
+                                'tbr': ('bitrate', {int_or_none}),
+                            }
+                        ), get_all=False),
+                    })
+        return formats
+
+    def _parse_video_info(self, video_info, video_id=None):
         return {
             'id': video_id,
-            'title': title,
-            'uploader': uploader,
-            'formats': formats
+            'extractor_key': WeiboIE.ie_key(),
+            'extractor': WeiboIE.IE_NAME,
+            'formats': self._extract_formats(video_info),
+            'http_headers': {'Referer': 'https://weibo.com/'},
+            '_old_archive_ids': [make_archive_id('WeiboMobile', video_id)],
+            **traverse_obj(video_info, {
+                'id': (('id', 'id_str', 'mid'), {str_or_none}),
+                'display_id': ('mblogid', {str_or_none}),
+                'title': ('page_info', 'media_info', ('video_title', 'kol_title', 'name'), {str}, {lambda x: x or None}),
+                'description': ('text_raw', {str}),
+                'duration': ('page_info', 'media_info', 'duration', {int_or_none}),
+                'timestamp': ('page_info', 'media_info', 'video_publish_time', {int_or_none}),
+                'thumbnail': ('page_info', 'page_pic', {url_or_none}),
+                'uploader': ('user', 'screen_name', {str}),
+                'uploader_id': ('user', ('id', 'id_str'), {str_or_none}),
+                'uploader_url': ('user', 'profile_url', {lambda x: urljoin('https://weibo.com/', x)}),
+                'view_count': ('page_info', 'media_info', 'online_users_number', {int_or_none}),
+                'like_count': ('attitudes_count', {int_or_none}),
+                'repost_count': ('reposts_count', {int_or_none}),
+            }, get_all=False),
+            'tags': traverse_obj(video_info, ('topic_struct', ..., 'topic_title', {str})) or None,
         }
 
 
-class WeiboMobileIE(InfoExtractor):
-    _VALID_URL = r'https?://m\.weibo\.cn/status/(?P<id>[0-9]+)(\?.+)?'
-    _TEST = {
-        'url': 'https://m.weibo.cn/status/4189191225395228?wm=3333_2001&sourcetype=weixin&featurecode=newtitle&from=singlemessage&isappinstalled=0',
+class WeiboIE(WeiboBaseIE):
+    _VALID_URL = r'https?://(?:m\.weibo\.cn/status|(?:www\.)?weibo\.com/\d+)/(?P<id>[a-zA-Z0-9]+)'
+    _TESTS = [{
+        'url': 'https://weibo.com/7827771738/N4xlMvjhI',
+        'info_dict': {
+            'id': '4910815147462302',
+            'ext': 'mp4',
+            'display_id': 'N4xlMvjhI',
+            'title': '【睡前消息暑假版第一期：拉泰国一把  对中国有好处】',
+            'description': 'md5:e2637a7673980d68694ea7c43cf12a5f',
+            'duration': 918,
+            'timestamp': 1686312819,
+            'upload_date': '20230609',
+            'thumbnail': r're:https://.*\.jpg',
+            'uploader': '睡前视频基地',
+            'uploader_id': '7827771738',
+            'uploader_url': 'https://weibo.com/u/7827771738',
+            'view_count': int,
+            'like_count': int,
+            'repost_count': int,
+            'tags': ['泰国大选远进党获胜', '睡前消息', '暑期版'],
+        },
+    }, {
+        'url': 'https://m.weibo.cn/status/4189191225395228',
         'info_dict': {
             'id': '4189191225395228',
             'ext': 'mp4',
-            'title': '午睡当然是要甜甜蜜蜜的啦',
-            'uploader': '柴犬柴犬'
+            'display_id': 'FBqgOmDxO',
+            'title': '柴犬柴犬的秒拍视频',
+            'description': 'md5:80f461ab5cdae6bbdb70efbf5a1db24f',
+            'duration': 53,
+            'timestamp': 1514264429,
+            'upload_date': '20171226',
+            'thumbnail': r're:https://.*\.jpg',
+            'uploader': '柴犬柴犬',
+            'uploader_id': '5926682210',
+            'uploader_url': 'https://weibo.com/u/5926682210',
+            'view_count': int,
+            'like_count': int,
+            'repost_count': int,
         }
-    }
+    }, {
+        'url': 'https://weibo.com/0/4224132150961381',
+        'note': 'no playback_list example',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        # to get Referer url for genvisitor
-        webpage = self._download_webpage(url, video_id, note='visit the page')
 
-        weibo_info = self._parse_json(self._search_regex(
-            r'var\s+\$render_data\s*=\s*\[({.*})\]\[0\]\s*\|\|\s*{};',
-            webpage, 'js_code', flags=re.DOTALL),
-            video_id, transform_source=js_to_json)
+        return self._parse_video_info(self._weibo_download_json(
+            f'https://weibo.com/ajax/statuses/show?id={video_id}', video_id))
 
-        status_data = weibo_info.get('status', {})
-        page_info = status_data.get('page_info')
-        title = status_data['status_title']
-        uploader = status_data.get('user', {}).get('screen_name')
 
-        return {
-            'id': video_id,
-            'title': title,
-            'uploader': uploader,
-            'url': page_info['media_info']['stream_url']
+class WeiboVideoIE(WeiboBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?weibo\.com/tv/show/(?P<id>\d+:\d+)'
+    _TESTS = [{
+        'url': 'https://weibo.com/tv/show/1034:4797699866951785?from=old_pc_videoshow',
+        'info_dict': {
+            'id': '4797700463137878',
+            'ext': 'mp4',
+            'display_id': 'LEZDodaiW',
+            'title': '呃，稍微了解了一下靡烟miya，感觉这东西也太二了',
+            'description': '呃，稍微了解了一下靡烟miya，感觉这东西也太二了 http://t.cn/A6aerGsM ​​​',
+            'duration': 76,
+            'timestamp': 1659344278,
+            'upload_date': '20220801',
+            'thumbnail': r're:https://.*\.jpg',
+            'uploader': '君子爱财陈平安',
+            'uploader_id': '3905382233',
+            'uploader_url': 'https://weibo.com/u/3905382233',
+            'view_count': int,
+            'like_count': int,
+            'repost_count': int,
         }
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        post_data = f'data={{"Component_Play_Playinfo":{{"oid":"{video_id}"}}}}'.encode()
+        video_info = self._weibo_download_json(
+            f'https://weibo.com/tv/api/component?page=%2Ftv%2Fshow%2F{video_id.replace(":", "%3A")}',
+            video_id, headers={'Referer': url}, data=post_data)['data']['Component_Play_Playinfo']
+        return self.url_result(f'https://weibo.com/0/{video_info["mid"]}', WeiboIE)
+
+
+class WeiboUserIE(WeiboBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?weibo\.com/u/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://weibo.com/u/2066652961?tabtype=video',
+        'info_dict': {
+            'id': '2066652961',
+            'title': '萧影殿下的视频',
+            'description': '萧影殿下的全部视频',
+            'uploader': '萧影殿下',
+        },
+        'playlist_mincount': 195,
+    }]
+
+    def _fetch_page(self, uid, cursor=0, page=1):
+        return self._weibo_download_json(
+            'https://weibo.com/ajax/profile/getWaterFallContent',
+            uid, note=f'Downloading videos page {page}',
+            query={'uid': uid, 'cursor': cursor})['data']
+
+    def _entries(self, uid, first_page):
+        cursor = 0
+        for page in itertools.count(1):
+            response = first_page if page == 1 else self._fetch_page(uid, cursor, page)
+            for video_info in traverse_obj(response, ('list', ..., {dict})):
+                yield self._parse_video_info(video_info)
+            cursor = response.get('next_cursor')
+            if (int_or_none(cursor) or -1) < 0:
+                break
+
+    def _real_extract(self, url):
+        uid = self._match_id(url)
+        first_page = self._fetch_page(uid)
+        uploader = traverse_obj(first_page, ('list', ..., 'user', 'screen_name', {str}), get_all=False)
+        metainfo = {
+            'title': f'{uploader}的视频',
+            'description': f'{uploader}的全部视频',
+            'uploader': uploader,
+        } if uploader else {}
+
+        return self.playlist_result(self._entries(uid, first_page), uid, **metainfo)

From 8ac5b6d96ae5c60cd5ae2495949e0068a6754c45 Mon Sep 17 00:00:00 2001
From: u-spec-png <srdjankalaba@protonmail.ch>
Date: Tue, 19 Sep 2023 01:36:10 +0200
Subject: [PATCH 167/218] [ie/N1Info:article] Fix extractor (#7373)

Authored by: u-spec-png
---
 yt_dlp/extractor/n1.py | 52 +++++++++++++++++++++++++++++++-----------
 1 file changed, 39 insertions(+), 13 deletions(-)

diff --git a/yt_dlp/extractor/n1.py b/yt_dlp/extractor/n1.py
index 55345f3983..edc41443ab 100644
--- a/yt_dlp/extractor/n1.py
+++ b/yt_dlp/extractor/n1.py
@@ -33,7 +33,7 @@ def _real_extract(self, url):
 
 class N1InfoIIE(InfoExtractor):
     IE_NAME = 'N1Info:article'
-    _VALID_URL = r'https?://(?:(?:(?:ba|rs|hr)\.)?n1info\.(?:com|si)|nova\.rs)/(?:[^/]+/){1,2}(?P<id>[^/]+)'
+    _VALID_URL = r'https?://(?:(?:\w+\.)?n1info\.\w+|nova\.rs)/(?:[^/?#]+/){1,2}(?P<id>[^/?#]+)'
     _TESTS = [{
         # Youtube embedded
         'url': 'https://rs.n1info.com/sport-klub/tenis/kako-je-djokovic-propustio-istorijsku-priliku-video/',
@@ -94,6 +94,16 @@ class N1InfoIIE(InfoExtractor):
             'upload_date': '20211102',
             'timestamp': 1635861677,
         },
+    }, {
+        'url': 'https://n1info.rs/vesti/cuta-biti-u-kosovskoj-mitrovici-znaci-da-te-docekaju-eksplozivnim-napravama/',
+        'info_dict': {
+            'id': '1332368',
+            'ext': 'mp4',
+            'title': 'Ćuta: Biti u Kosovskoj Mitrovici znači da te dočekaju eksplozivnim napravama',
+            'upload_date': '20230620',
+            'timestamp': 1687290536,
+            'thumbnail': 'https://cdn.brid.tv/live/partners/26827/snapshot/1332368_th_6492013a8356f_1687290170.jpg'
+        },
     }, {
         'url': 'https://hr.n1info.com/vijesti/pravobraniteljica-o-ubojstvu-u-zagrebu-radi-se-o-doista-nezapamcenoj-situaciji/',
         'only_matching': True,
@@ -105,19 +115,35 @@ def _real_extract(self, url):
 
         title = self._html_search_regex(r'<h1[^>]+>(.+?)</h1>', webpage, 'title')
         timestamp = unified_timestamp(self._html_search_meta('article:published_time', webpage))
-
-        videos = re.findall(r'(?m)(<video[^>]+>)', webpage)
+        plugin_data = self._html_search_meta('BridPlugin', webpage)
         entries = []
-        for video in videos:
-            video_data = extract_attributes(video)
-            entries.append({
-                '_type': 'url_transparent',
-                'url': video_data.get('data-url'),
-                'id': video_data.get('id'),
-                'title': title,
-                'thumbnail': video_data.get('data-thumbnail'),
-                'timestamp': timestamp,
-                'ie_key': 'N1InfoAsset'})
+        if plugin_data:
+            site_id = self._html_search_regex(r'site:(\d+)', webpage, 'site id')
+            for video_data in re.findall(r'\$bp\("Brid_\d+", (.+)\);', webpage):
+                video_id = self._parse_json(video_data, title)['video']
+                entries.append({
+                    'id': video_id,
+                    'title': title,
+                    'timestamp': timestamp,
+                    'thumbnail': self._html_search_meta('thumbnailURL', webpage),
+                    'formats': self._extract_m3u8_formats(
+                        f'https://cdn-uc.brid.tv/live/partners/{site_id}/streaming/{video_id}/{video_id}.m3u8',
+                        video_id, fatal=False),
+                })
+        else:
+            # Old player still present in older articles
+            videos = re.findall(r'(?m)(<video[^>]+>)', webpage)
+            for video in videos:
+                video_data = extract_attributes(video)
+                entries.append({
+                    '_type': 'url_transparent',
+                    'url': video_data.get('data-url'),
+                    'id': video_data.get('id'),
+                    'title': title,
+                    'thumbnail': video_data.get('data-thumbnail'),
+                    'timestamp': timestamp,
+                    'ie_key': 'N1InfoAsset',
+                })
 
         embedded_videos = re.findall(r'(<iframe[^>]+>)', webpage)
         for embedded_video in embedded_videos:

From 40999467f72db074a3f13057da9bf82a857530fe Mon Sep 17 00:00:00 2001
From: niemands <67282402+niemands@users.noreply.github.com>
Date: Tue, 19 Sep 2023 01:37:17 +0200
Subject: [PATCH 168/218] [ie/pornbox] Add extractor (#7386)

Authored by: niemands
---
 yt_dlp/extractor/_extractors.py |   1 +
 yt_dlp/extractor/pornbox.py     | 113 ++++++++++++++++++++++++++++++++
 2 files changed, 114 insertions(+)
 create mode 100644 yt_dlp/extractor/pornbox.py

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 47d983c9cc..dd670d59c2 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1505,6 +1505,7 @@
 from .popcorntimes import PopcorntimesIE
 from .popcorntv import PopcornTVIE
 from .porn91 import Porn91IE
+from .pornbox import PornboxIE
 from .porncom import PornComIE
 from .pornflip import PornFlipIE
 from .pornhd import PornHdIE
diff --git a/yt_dlp/extractor/pornbox.py b/yt_dlp/extractor/pornbox.py
new file mode 100644
index 0000000000..c381382e93
--- /dev/null
+++ b/yt_dlp/extractor/pornbox.py
@@ -0,0 +1,113 @@
+from .common import InfoExtractor
+from ..compat import functools
+from ..utils import (
+    int_or_none,
+    parse_duration,
+    parse_iso8601,
+    qualities,
+    str_or_none,
+    traverse_obj,
+    url_or_none,
+)
+
+
+class PornboxIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?pornbox\.com/application/watch-page/(?P<id>[0-9]+)'
+    _TESTS = [{
+        'url': 'https://pornbox.com/application/watch-page/212108',
+        'md5': '3ff6b6e206f263be4c5e987a3162ac6e',
+        'info_dict': {
+            'id': '212108',
+            'ext': 'mp4',
+            'title': 'md5:ececc5c6e6c9dd35d290c45fed05fd49',
+            'uploader': 'Lily Strong',
+            'timestamp': 1665871200,
+            'upload_date': '20221015',
+            'age_limit': 18,
+            'availability': 'needs_auth',
+            'duration': 1505,
+            'cast': ['Lily Strong', 'John Strong'],
+            'tags': 'count:11',
+            'description': 'md5:589c7f33e183aa8aa939537300efb859',
+            'thumbnail': r're:^https?://cdn-image\.gtflixtv\.com.*\.jpg.*$'
+        }
+    }, {
+        'url': 'https://pornbox.com/application/watch-page/216045',
+        'info_dict': {
+            'id': '216045',
+            'title': 'md5:3e48528e73a9a2b12f7a2772ed0b26a2',
+            'description': 'md5:3e631dcaac029f15ed434e402d1b06c7',
+            'uploader': 'VK Studio',
+            'timestamp': 1618264800,
+            'upload_date': '20210412',
+            'age_limit': 18,
+            'availability': 'premium_only',
+            'duration': 2710,
+            'cast': 'count:3',
+            'tags': 'count:29',
+            'thumbnail': r're:^https?://cdn-image\.gtflixtv\.com.*\.jpg.*$',
+            'subtitles': 'count:6'
+        },
+        'params': {
+            'skip_download': True,
+            'ignore_no_formats_error': True
+        },
+        'expected_warnings': [
+            'You are either not logged in or do not have access to this scene',
+            'No video formats found', 'Requested format is not available']
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        public_data = self._download_json(f'https://pornbox.com/contents/{video_id}', video_id)
+
+        subtitles = {country_code: [{
+            'url': f'https://pornbox.com/contents/{video_id}/subtitles/{country_code}',
+            'ext': 'srt'
+        }] for country_code in traverse_obj(public_data, ('subtitles', ..., {str}))}
+
+        is_free_scene = traverse_obj(
+            public_data, ('price', 'is_available_for_free', {bool}), default=False)
+
+        metadata = {
+            'id': video_id,
+            **traverse_obj(public_data, {
+                'title': ('scene_name', {str.strip}),
+                'description': ('small_description', {str.strip}),
+                'uploader': 'studio',
+                'duration': ('runtime', {parse_duration}),
+                'cast': (('models', 'male_models'), ..., 'model_name'),
+                'thumbnail': ('player_poster', {url_or_none}),
+                'tags': ('niches', ..., 'niche'),
+            }),
+            'age_limit': 18,
+            'timestamp': parse_iso8601(traverse_obj(
+                public_data, ('studios', 'release_date'), 'publish_date')),
+            'availability': self._availability(needs_auth=True, needs_premium=not is_free_scene),
+            'subtitles': subtitles,
+        }
+
+        if not public_data.get('is_purchased') or not is_free_scene:
+            self.raise_login_required(
+                'You are either not logged in or do not have access to this scene', metadata_available=True)
+            return metadata
+
+        media_id = traverse_obj(public_data, (
+            'medias', lambda _, v: v['title'] == 'Full video', 'media_id', {int}), get_all=False)
+        if not media_id:
+            self.raise_no_formats('Could not find stream id', video_id=video_id)
+
+        stream_data = self._download_json(
+            f'https://pornbox.com/media/{media_id}/stream', video_id=video_id, note='Getting manifest urls')
+
+        get_quality = qualities(['web', 'vga', 'hd', '1080p', '4k', '8k'])
+        metadata['formats'] = traverse_obj(stream_data, ('qualities', lambda _, v: v['src'], {
+            'url': 'src',
+            'vbr': ('bitrate', {functools.partial(int_or_none, scale=1000)}),
+            'format_id': ('quality', {str_or_none}),
+            'quality': ('quality', {get_quality}),
+            'width': ('size', {lambda x: int(x[:-1])}),
+        }))
+
+        return metadata

From cf11b40ac40e3d23a6352753296f3a732886efb9 Mon Sep 17 00:00:00 2001
From: Rohan Dey <142105763+Rohxn16@users.noreply.github.com>
Date: Mon, 18 Sep 2023 23:39:20 +0000
Subject: [PATCH 169/218] [ie/media.ccc.de:lists] Fix extraction (#8144)

Closes #8138
Authored by: Rohxn16
---
 yt_dlp/extractor/ccc.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/ccc.py b/yt_dlp/extractor/ccc.py
index 22e3a22ece..ca6b82c981 100644
--- a/yt_dlp/extractor/ccc.py
+++ b/yt_dlp/extractor/ccc.py
@@ -90,10 +90,17 @@ class CCCPlaylistIE(InfoExtractor):
             'id': '30c3',
         },
         'playlist_count': 135,
+    }, {
+        'url': 'https://media.ccc.de/c/DS2023',
+        'info_dict': {
+            'title': 'Datenspuren 2023',
+            'id': 'DS2023',
+        },
+        'playlist_count': 37
     }]
 
     def _real_extract(self, url):
-        playlist_id = self._match_id(url).lower()
+        playlist_id = self._match_id(url)
 
         conf = self._download_json(
             'https://media.ccc.de/public/conferences/' + playlist_id,

From b532556d0a85e7d76f8f0880861232fb706ddbc5 Mon Sep 17 00:00:00 2001
From: Simon Sawicki <contact@grub4k.xyz>
Date: Tue, 19 Sep 2023 21:52:44 +0200
Subject: [PATCH 170/218] [ie/pr0gramm] Rewrite extractor (#8151)

Authored by: Grub4K
---
 yt_dlp/extractor/_extractors.py |   2 +-
 yt_dlp/extractor/pr0gramm.py    | 218 ++++++++++++++++++++------------
 2 files changed, 139 insertions(+), 81 deletions(-)

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index dd670d59c2..490b010b8d 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1524,7 +1524,7 @@
     PuhuTVIE,
     PuhuTVSerieIE,
 )
-from .pr0gramm import Pr0grammStaticIE, Pr0grammIE
+from .pr0gramm import Pr0grammIE
 from .prankcast import PrankCastIE
 from .premiershiprugby import PremiershipRugbyIE
 from .presstv import PressTVIE
diff --git a/yt_dlp/extractor/pr0gramm.py b/yt_dlp/extractor/pr0gramm.py
index 2eb327fba1..c8e0bb493b 100644
--- a/yt_dlp/extractor/pr0gramm.py
+++ b/yt_dlp/extractor/pr0gramm.py
@@ -1,97 +1,155 @@
-import re
+import json
+from datetime import date
+from urllib.parse import unquote
 
 from .common import InfoExtractor
-from ..utils import merge_dicts
+from ..compat import functools
+from ..utils import ExtractorError, make_archive_id, urljoin
+from ..utils.traversal import traverse_obj
 
 
-class Pr0grammStaticIE(InfoExtractor):
-    # Possible urls:
-    # https://pr0gramm.com/static/5466437
-    _VALID_URL = r'https?://pr0gramm\.com/static/(?P<id>[0-9]+)'
-    _TEST = {
-        'url': 'https://pr0gramm.com/static/5466437',
-        'md5': '52fa540d70d3edc286846f8ca85938aa',
-        'info_dict': {
-            'id': '5466437',
-            'ext': 'mp4',
-            'title': 'pr0gramm-5466437 by g11st',
-            'uploader': 'g11st',
-            'upload_date': '20221221',
-        }
-    }
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-
-        # Fetch media sources
-        entries = self._parse_html5_media_entries(url, webpage, video_id)
-        media_info = entries[0]
-
-        # Fetch author
-        uploader = self._html_search_regex(r'by\W+([\w-]+)\W+', webpage, 'uploader')
-
-        # Fetch approx upload timestamp from filename
-        # Have None-defaults in case the extraction fails
-        uploadDay = None
-        uploadMon = None
-        uploadYear = None
-        uploadTimestr = None
-        # (//img.pr0gramm.com/2022/12/21/62ae8aa5e2da0ebf.mp4)
-        m = re.search(r'//img\.pr0gramm\.com/(?P<year>[\d]+)/(?P<mon>[\d]+)/(?P<day>[\d]+)/\w+\.\w{,4}', webpage)
-
-        if (m):
-            # Up to a day of accuracy should suffice...
-            uploadDay = m.groupdict().get('day')
-            uploadMon = m.groupdict().get('mon')
-            uploadYear = m.groupdict().get('year')
-            uploadTimestr = uploadYear + uploadMon + uploadDay
-
-        return merge_dicts({
-            'id': video_id,
-            'title': 'pr0gramm-%s%s' % (video_id, (' by ' + uploader) if uploader else ''),
-            'uploader': uploader,
-            'upload_date': uploadTimestr
-        }, media_info)
-
-
-# This extractor is for the primary url (used for sharing, and appears in the
-# location bar) Since this page loads the DOM via JS, yt-dl can't find any
-# video information here. So let's redirect to a compatibility version of
-# the site, which does contain the <video>-element  by itself,  without requiring
-# js to be ran.
 class Pr0grammIE(InfoExtractor):
-    # Possible urls:
-    # https://pr0gramm.com/new/546637
-    # https://pr0gramm.com/new/video/546637
-    # https://pr0gramm.com/top/546637
-    # https://pr0gramm.com/top/video/546637
-    # https://pr0gramm.com/user/g11st/uploads/5466437
-    # https://pr0gramm.com/user/froschler/dafur-ist-man-hier/5091290
-    # https://pr0gramm.com/user/froschler/reinziehen-1elf/5232030
-    # https://pr0gramm.com/user/froschler/1elf/5232030
-    # https://pr0gramm.com/new/5495710:comment62621020 <- this is not the id!
-    # https://pr0gramm.com/top/fruher war alles damals/5498175
-
-    _VALID_URL = r'https?:\/\/pr0gramm\.com\/(?!static/\d+).+?\/(?P<id>[\d]+)(:|$)'
-    _TEST = {
+    _VALID_URL = r'https?://pr0gramm\.com\/(?:[^/?#]+/)+(?P<id>[\d]+)(?:[/?#:]|$)'
+    _TESTS = [{
+        # Tags require account
         'url': 'https://pr0gramm.com/new/video/5466437',
         'info_dict': {
             'id': '5466437',
             'ext': 'mp4',
             'title': 'pr0gramm-5466437 by g11st',
+            'tags': ['Neon Genesis Evangelion', 'Touhou Project', 'Fly me to the Moon', 'Marisad', 'Marisa Kirisame', 'video', 'sound', 'Marisa', 'Anime'],
             'uploader': 'g11st',
+            'uploader_id': 394718,
+            'upload_timestamp': 1671590240,
             'upload_date': '20221221',
-        }
-    }
+            'like_count': int,
+            'dislike_count': int,
+            'age_limit': 0,
+            'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg',
+        },
+    }, {
+        # Tags require account
+        'url': 'https://pr0gramm.com/new/3052805:comment28391322',
+        'info_dict': {
+            'id': '3052805',
+            'ext': 'mp4',
+            'title': 'pr0gramm-3052805 by Hansking1',
+            'tags': 'count:15',
+            'uploader': 'Hansking1',
+            'uploader_id': 385563,
+            'upload_timestamp': 1552930408,
+            'upload_date': '20190318',
+            'like_count': int,
+            'dislike_count': int,
+            'age_limit': 0,
+            'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg',
+        },
+    }, {
+        # Requires verified account
+        'url': 'https://pr0gramm.com/new/Gianna%20Michaels/5848332',
+        'info_dict': {
+            'id': '5848332',
+            'ext': 'mp4',
+            'title': 'pr0gramm-5848332 by erd0pfel',
+            'tags': 'count:18',
+            'uploader': 'erd0pfel',
+            'uploader_id': 349094,
+            'upload_timestamp': 1694489652,
+            'upload_date': '20230912',
+            'like_count': int,
+            'dislike_count': int,
+            'age_limit': 18,
+            'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg',
+        },
+    }, {
+        'url': 'https://pr0gramm.com/static/5466437',
+        'only_matching': True,
+    }, {
+        'url': 'https://pr0gramm.com/new/rowan%20atkinson%20herr%20bohne/3052805',
+        'only_matching': True,
+    }, {
+        'url': 'https://pr0gramm.com/user/froschler/dafur-ist-man-hier/5091290',
+        'only_matching': True,
+    }]
 
-    def _generic_title():
-        return "oof"
+    BASE_URL = 'https://pr0gramm.com'
+
+    @functools.cached_property
+    def _is_logged_in(self):
+        return 'pp' in self._get_cookies(self.BASE_URL)
+
+    @functools.cached_property
+    def _maximum_flags(self):
+        # We need to guess the flags for the content otherwise the api will raise an error
+        # We can guess the maximum allowed flags for the account from the cookies
+        # Bitflags are (msbf): nsfp, nsfl, nsfw, sfw
+        flags = 0b0001
+        if self._is_logged_in:
+            flags |= 0b1000
+            cookies = self._get_cookies(self.BASE_URL)
+            if 'me' not in cookies:
+                self._download_webpage(self.BASE_URL, None, 'Refreshing verification information')
+            if traverse_obj(cookies, ('me', {lambda x: x.value}, {unquote}, {json.loads}, 'verified')):
+                flags |= 0b0110
+
+        return flags
+
+    def _call_api(self, endpoint, video_id, query={}, note='Downloading API json'):
+        data = self._download_json(
+            f'https://pr0gramm.com/api/items/{endpoint}',
+            video_id, note, query=query, expected_status=403)
+
+        error = traverse_obj(data, ('error', {str}))
+        if error in ('nsfwRequired', 'nsflRequired', 'nsfpRequired', 'verificationRequired'):
+            if not self._is_logged_in:
+                self.raise_login_required()
+            raise ExtractorError(f'Unverified account cannot access NSFW/NSFL ({error})', expected=True)
+        elif error:
+            message = traverse_obj(data, ('msg', {str})) or error
+            raise ExtractorError(f'API returned error: {message}', expected=True)
+
+        return data
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
+        video_info = traverse_obj(
+            self._call_api('get', video_id, {'id': video_id, 'flags': self._maximum_flags}),
+            ('items', 0, {dict}))
 
-        return self.url_result(
-            'https://pr0gramm.com/static/' + video_id,
-            video_id=video_id,
-            ie=Pr0grammStaticIE.ie_key())
+        source = urljoin('https://img.pr0gramm.com', video_info.get('image'))
+        if not source or not source.endswith('mp4'):
+            self.raise_no_formats('Could not extract a video', expected=bool(source), video_id=video_id)
+
+        tags = None
+        if self._is_logged_in:
+            metadata = self._call_api('info', video_id, {'itemId': video_id})
+            tags = traverse_obj(metadata, ('tags', ..., 'tag', {str}))
+            # Sorted by "confidence", higher confidence = earlier in list
+            confidences = traverse_obj(metadata, ('tags', ..., 'confidence', ({int}, {float})))
+            if confidences:
+                tags = [tag for _, tag in sorted(zip(confidences, tags), reverse=True)]
+
+        return {
+            'id': video_id,
+            'title': f'pr0gramm-{video_id} by {video_info.get("user")}',
+            'formats': [{
+                'url': source,
+                'ext': 'mp4',
+                **traverse_obj(video_info, {
+                    'width': ('width', {int}),
+                    'height': ('height', {int}),
+                }),
+            }],
+            'tags': tags,
+            'age_limit': 18 if traverse_obj(video_info, ('flags', {0b110.__and__})) else 0,
+            '_old_archive_ids': [make_archive_id('Pr0grammStatic', video_id)],
+            **traverse_obj(video_info, {
+                'uploader': ('user', {str}),
+                'uploader_id': ('userId', {int}),
+                'like_count': ('up', {int}),
+                'dislike_count': ('down', {int}),
+                'upload_timestamp': ('created', {int}),
+                'upload_date': ('created', {int}, {date.fromtimestamp}, {lambda x: x.strftime('%Y%m%d')}),
+                'thumbnail': ('thumb', {lambda x: urljoin('https://thumb.pr0gramm.com', x)})
+            }),
+        }

From 9d6254069c75877bc88bc3584f4326fb1853a543 Mon Sep 17 00:00:00 2001
From: coletdjnz <coletdjnz@protonmail.com>
Date: Wed, 20 Sep 2023 19:14:10 +0000
Subject: [PATCH 171/218] Update to ytdl-commit-66ab08 (#8128)

[utils] Revert bbd3e7e, updating docstring, test instead
 https://github.com/ytdl-org/youtube-dl/commit/66ab0814c4baa2dc79c2dd5287bc0ad61a37c5b9

Authored by: coletdjnz
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index c7b73f4fd6..d94d8ea822 100644
--- a/README.md
+++ b/README.md
@@ -76,7 +76,7 @@
 
 # NEW FEATURES
 
-* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@42f2d4**](https://github.com/ytdl-org/youtube-dl/commit/07af47960f3bb262ead02490ce65c8c45c01741e) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))
+* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@66ab08**](https://github.com/ytdl-org/youtube-dl/commit/66ab0814c4baa2dc79c2dd5287bc0ad61a37c5b9) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))
 
 * **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in YouTube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API
 

From 35f9a306e6934793cff100200cd03f288ec33f11 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Thu, 21 Sep 2023 10:58:53 -0500
Subject: [PATCH 172/218] [dependencies] Handle deprecation of
 `sqlite3.version` (#8167)

Closes #8152
Authored by: bashonly
---
 yt_dlp/compat/compat_utils.py   | 2 +-
 yt_dlp/dependencies/__init__.py | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/yt_dlp/compat/compat_utils.py b/yt_dlp/compat/compat_utils.py
index 3ca46d270c..d62b7d0488 100644
--- a/yt_dlp/compat/compat_utils.py
+++ b/yt_dlp/compat/compat_utils.py
@@ -15,7 +15,7 @@ def get_package_info(module):
         name=getattr(module, '_yt_dlp__identifier', module.__name__),
         version=str(next(filter(None, (
             getattr(module, attr, None)
-            for attr in ('__version__', 'version_string', 'version')
+            for attr in ('_yt_dlp__version', '__version__', 'version_string', 'version')
         )), None)))
 
 
diff --git a/yt_dlp/dependencies/__init__.py b/yt_dlp/dependencies/__init__.py
index 6e7d29c5ca..b56e4f5cc6 100644
--- a/yt_dlp/dependencies/__init__.py
+++ b/yt_dlp/dependencies/__init__.py
@@ -43,6 +43,8 @@
 
 try:
     import sqlite3
+    # We need to get the underlying `sqlite` version, see https://github.com/yt-dlp/yt-dlp/issues/8152
+    sqlite3._yt_dlp__version = sqlite3.sqlite_version
 except ImportError:
     # although sqlite3 is part of the standard library, it is possible to compile python without
     # sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544

From 295fbb3ae3a7d0dd50e286be5c487cf145ed5778 Mon Sep 17 00:00:00 2001
From: Mozi <29089388+pzhlkj6612@users.noreply.github.com>
Date: Fri, 22 Sep 2023 01:28:20 +0800
Subject: [PATCH 173/218] [ie/eplus:inbound] Add extractor (#5782)

Authored by: pzhlkj6612
---
 yt_dlp/extractor/_extractors.py |  1 +
 yt_dlp/extractor/eplus.py       | 96 +++++++++++++++++++++++++++++++++
 2 files changed, 97 insertions(+)
 create mode 100644 yt_dlp/extractor/eplus.py

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 490b010b8d..3ce6baef2f 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -565,6 +565,7 @@
     EpiconIE,
     EpiconSeriesIE,
 )
+from .eplus import EplusIbIE
 from .epoch import EpochIE
 from .eporner import EpornerIE
 from .eroprofile import (
diff --git a/yt_dlp/extractor/eplus.py b/yt_dlp/extractor/eplus.py
new file mode 100644
index 0000000000..3ebdcf5fbe
--- /dev/null
+++ b/yt_dlp/extractor/eplus.py
@@ -0,0 +1,96 @@
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    try_call,
+    unified_timestamp,
+)
+
+
+class EplusIbIE(InfoExtractor):
+    IE_NAME = 'eplus:inbound'
+    IE_DESC = 'e+ (イープラス) overseas'
+    _VALID_URL = r'https?://live\.eplus\.jp/ex/player\?ib=(?P<id>(?:\w|%2B|%2F){86}%3D%3D)'
+    _TESTS = [{
+        'url': 'https://live.eplus.jp/ex/player?ib=YEFxb3Vyc2Dombnjg7blkrLlrablnJLjgrnjgq%2Fjg7zjg6vjgqLjgqTjg4njg6vlkIzlpb3kvJpgTGllbGxhIQ%3D%3D',
+        'info_dict': {
+            'id': '354502-0001-002',
+            'title': 'LoveLive!Series Presents COUNTDOWN LoveLive! 2021→2022～LIVE with a smile!～【Streaming+(配信)】',
+            'live_status': 'was_live',
+            'release_date': '20211231',
+            'release_timestamp': 1640952000,
+            'description': str,
+        },
+        'params': {
+            'skip_download': True,
+            'ignore_no_formats_error': True,
+        },
+        'expected_warnings': [
+            'Could not find the playlist URL. This event may not be accessible',
+            'No video formats found!',
+            'Requested format is not available',
+        ],
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        data_json = self._search_json(r'<script>\s*var app\s*=', webpage, 'data json', video_id)
+
+        delivery_status = data_json.get('delivery_status')
+        archive_mode = data_json.get('archive_mode')
+        release_timestamp = try_call(lambda: unified_timestamp(data_json['event_datetime']) - 32400)
+        release_timestamp_str = data_json.get('event_datetime_text')  # JST
+
+        self.write_debug(f'delivery_status = {delivery_status}, archive_mode = {archive_mode}')
+
+        if delivery_status == 'PREPARING':
+            live_status = 'is_upcoming'
+        elif delivery_status == 'STARTED':
+            live_status = 'is_live'
+        elif delivery_status == 'STOPPED':
+            if archive_mode != 'ON':
+                raise ExtractorError(
+                    'This event has ended and there is no archive for this event', expected=True)
+            live_status = 'post_live'
+        elif delivery_status == 'WAIT_CONFIRM_ARCHIVED':
+            live_status = 'post_live'
+        elif delivery_status == 'CONFIRMED_ARCHIVE':
+            live_status = 'was_live'
+        else:
+            self.report_warning(f'Unknown delivery_status {delivery_status}, treat it as a live')
+            live_status = 'is_live'
+
+        formats = []
+
+        m3u8_playlist_urls = self._search_json(
+            r'var listChannels\s*=', webpage, 'hls URLs', video_id, contains_pattern=r'\[.+\]', default=[])
+        if not m3u8_playlist_urls:
+            if live_status == 'is_upcoming':
+                self.raise_no_formats(
+                    f'Could not find the playlist URL. This live event will begin at {release_timestamp_str} JST', expected=True)
+            else:
+                self.raise_no_formats(
+                    'Could not find the playlist URL. This event may not be accessible', expected=True)
+        elif live_status == 'is_upcoming':
+            self.raise_no_formats(f'This live event will begin at {release_timestamp_str} JST', expected=True)
+        elif live_status == 'post_live':
+            self.raise_no_formats('This event has ended, and the archive will be available shortly', expected=True)
+        else:
+            for m3u8_playlist_url in m3u8_playlist_urls:
+                formats.extend(self._extract_m3u8_formats(m3u8_playlist_url, video_id))
+            # FIXME: HTTP request headers need to be updated to continue download
+            warning = 'Due to technical limitations, the download will be interrupted after one hour'
+            if live_status == 'is_live':
+                self.report_warning(warning)
+            elif live_status == 'was_live':
+                self.report_warning(f'{warning}. You can restart to continue the download')
+
+        return {
+            'id': data_json['app_id'],
+            'title': data_json.get('app_name'),
+            'formats': formats,
+            'live_status': live_status,
+            'description': data_json.get('content'),
+            'release_timestamp': release_timestamp,
+        }

From b3febedbeb662dfdf9b5c1d5799039ad4fc969de Mon Sep 17 00:00:00 2001
From: Elyse <26639800+elyse0@users.noreply.github.com>
Date: Thu, 21 Sep 2023 11:30:32 -0600
Subject: [PATCH 174/218] [ie/Canal1,CaracolTvPlay] Add extractors (#7151)

Closes #5826
Authored by: elyse0
---
 yt_dlp/extractor/_extractors.py |   2 +
 yt_dlp/extractor/canal1.py      |  39 +++++++++
 yt_dlp/extractor/caracoltv.py   | 136 ++++++++++++++++++++++++++++++++
 yt_dlp/extractor/mediastream.py |   8 +-
 4 files changed, 183 insertions(+), 2 deletions(-)
 create mode 100644 yt_dlp/extractor/canal1.py
 create mode 100644 yt_dlp/extractor/caracoltv.py

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 3ce6baef2f..632d6720e1 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -296,9 +296,11 @@
 from .camsoda import CamsodaIE
 from .camtasia import CamtasiaEmbedIE
 from .camwithher import CamWithHerIE
+from .canal1 import Canal1IE
 from .canalalpha import CanalAlphaIE
 from .canalplus import CanalplusIE
 from .canalc2 import Canalc2IE
+from .caracoltv import CaracolTvPlayIE
 from .carambatv import (
     CarambaTVIE,
     CarambaTVPageIE,
diff --git a/yt_dlp/extractor/canal1.py b/yt_dlp/extractor/canal1.py
new file mode 100644
index 0000000000..587a11ab8c
--- /dev/null
+++ b/yt_dlp/extractor/canal1.py
@@ -0,0 +1,39 @@
+from .common import InfoExtractor
+
+
+class Canal1IE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.|noticias\.)?canal1\.com\.co/(?:[^?#&])+/(?P<id>[\w-]+)'
+
+    _TESTS = [{
+        'url': 'https://canal1.com.co/noticias/napa-i-una-cadena-de-produccion-de-arroz-que-se-quedo-en-veremos-y-abandonada-en-el-departamento-del-choco/',
+        'info_dict': {
+            'id': '63b39f6b354977084b85ab54',
+            'display_id': 'napa-i-una-cadena-de-produccion-de-arroz-que-se-quedo-en-veremos-y-abandonada-en-el-departamento-del-choco',
+            'title': 'Ñapa I Una cadena de producción de arroz que se quedó en veremos y abandonada en el departamento del Chocó',
+            'description': 'md5:bc49c6d64d20610ea1e7daf079a0d013',
+            'thumbnail': r're:^https?://[^?#]+63b39f6b354977084b85ab54',
+            'ext': 'mp4',
+        },
+    }, {
+        'url': 'https://noticias.canal1.com.co/noticias/tres-i-el-triste-record-que-impuso-elon-musk-el-dueno-de-tesla-y-de-twitter/',
+        'info_dict': {
+            'id': '63b39e93f5fd223aa32250fb',
+            'display_id': 'tres-i-el-triste-record-que-impuso-elon-musk-el-dueno-de-tesla-y-de-twitter',
+            'title': 'Tres I El triste récord que impuso Elon Musk, el dueño de Tesla y de Twitter',
+            'description': 'md5:d9f691f131a21ce6767ca6c05d17d791',
+            'thumbnail': r're:^https?://[^?#]+63b39e93f5fd223aa32250fb',
+            'ext': 'mp4',
+        },
+    }, {
+        # Geo-restricted to Colombia
+        'url': 'https://canal1.com.co/programas/guerreros-canal-1/video-inedito-guerreros-despedida-kewin-zarate/',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+
+        return self.url_result(
+            self._search_regex(r'"embedUrl"\s*:\s*"([^"]+)', webpage, 'embed url'),
+            display_id=display_id, url_transparent=True)
diff --git a/yt_dlp/extractor/caracoltv.py b/yt_dlp/extractor/caracoltv.py
new file mode 100644
index 0000000000..79f7752fe0
--- /dev/null
+++ b/yt_dlp/extractor/caracoltv.py
@@ -0,0 +1,136 @@
+import base64
+import json
+import uuid
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    js_to_json,
+    traverse_obj,
+    urljoin,
+)
+
+
+class CaracolTvPlayIE(InfoExtractor):
+    _VALID_URL = r'https?://play\.caracoltv\.com/videoDetails/(?P<id>[^/?#]+)'
+    _NETRC_MACHINE = 'caracoltv-play'
+
+    _TESTS = [{
+        'url': 'https://play.caracoltv.com/videoDetails/OTo4NGFmNjUwOWQ2ZmM0NTg2YWRiOWU0MGNhOWViOWJkYQ==',
+        'info_dict': {
+            'id': 'OTo4NGFmNjUwOWQ2ZmM0NTg2YWRiOWU0MGNhOWViOWJkYQ==',
+            'title': 'La teoría del promedio',
+            'description': 'md5:1cdd6d2c13f19ef0d9649ab81a023ac3',
+        },
+        'playlist_count': 6,
+    }, {
+        'url': 'https://play.caracoltv.com/videoDetails/OTo3OWM4ZTliYzQxMmM0MTMxYTk4Mjk2YjdjNGQ4NGRkOQ==/ella?season=0',
+        'info_dict': {
+            'id': 'OTo3OWM4ZTliYzQxMmM0MTMxYTk4Mjk2YjdjNGQ4NGRkOQ==',
+            'title': 'Ella',
+            'description': 'md5:a639b1feb5ddcc0cff92a489b4e544b8',
+        },
+        'playlist_count': 10,
+    }, {
+        'url': 'https://play.caracoltv.com/videoDetails/OTpiYTY1YTVmOTI5MzI0ZWJhOGZiY2Y3MmRlOWZlYmJkOA==/la-vuelta-al-mundo-en-80-risas-2022?season=0',
+        'info_dict': {
+            'id': 'OTpiYTY1YTVmOTI5MzI0ZWJhOGZiY2Y3MmRlOWZlYmJkOA==',
+            'title': 'La vuelta al mundo en 80 risas 2022',
+            'description': 'md5:e97aac36106e5c37ebf947b3350106a4',
+        },
+        'playlist_count': 17,
+    }, {
+        'url': 'https://play.caracoltv.com/videoDetails/MzoxX3BwbjRmNjB1',
+        'only_matching': True,
+    }]
+
+    _USER_TOKEN = None
+
+    def _extract_app_token(self, webpage):
+        config_js_path = self._search_regex(
+            r'<script[^>]+src\s*=\s*"([^"]+coreConfig.js[^"]+)', webpage, 'config js url', fatal=False)
+
+        mediation_config = {} if not config_js_path else self._search_json(
+            r'mediation\s*:', self._download_webpage(
+                urljoin('https://play.caracoltv.com/', config_js_path), None, fatal=False, note='Extracting JS config'),
+            'mediation_config', None, transform_source=js_to_json, fatal=False)
+
+        key = traverse_obj(
+            mediation_config, ('live', 'key')) or '795cd9c089a1fc48094524a5eba85a3fca1331817c802f601735907c8bbb4f50'
+        secret = traverse_obj(
+            mediation_config, ('live', 'secret')) or '64dec00a6989ba83d087621465b5e5d38bdac22033b0613b659c442c78976fa0'
+
+        return base64.b64encode(f'{key}:{secret}'.encode()).decode()
+
+    def _perform_login(self, email, password):
+        webpage = self._download_webpage('https://play.caracoltv.com/', None, fatal=False)
+        app_token = self._extract_app_token(webpage)
+
+        bearer_token = self._download_json(
+            'https://eu-gateway.inmobly.com/applications/oauth', None, data=b'', note='Retrieving bearer token',
+            headers={'Authorization': f'Basic {app_token}'})['token']
+
+        self._USER_TOKEN = self._download_json(
+            'https://eu-gateway.inmobly.com/user/login', None, note='Performing login', headers={
+                'Content-Type': 'application/json',
+                'Authorization': f'Bearer {bearer_token}',
+            }, data=json.dumps({
+                'device_data': {
+                    'device_id': str(uuid.uuid4()),
+                    'device_token': '',
+                    'device_type': 'web'
+                },
+                'login_data': {
+                    'enabled': True,
+                    'email': email,
+                    'password': password,
+                }
+            }).encode())['user_token']
+
+    def _extract_video(self, video_data, series_id=None, season_id=None, season_number=None):
+        formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_data['stream_url'], series_id, 'mp4')
+
+        return {
+            'id': video_data['id'],
+            'title': video_data.get('name'),
+            'description': video_data.get('description'),
+            'formats': formats,
+            'subtitles': subtitles,
+            'thumbnails': traverse_obj(
+                video_data, ('extra_thumbs', ..., {'url': 'thumb_url', 'height': 'height', 'width': 'width'})),
+            'series_id': series_id,
+            'season_id': season_id,
+            'season_number': int_or_none(season_number),
+            'episode_number': int_or_none(video_data.get('item_order')),
+            'is_live': video_data.get('entry_type') == 3,
+        }
+
+    def _extract_series_seasons(self, seasons, series_id):
+        for season in seasons:
+            api_response = self._download_json(
+                'https://eu-gateway.inmobly.com/feed', series_id, query={'season_id': season['id']},
+                headers={'Authorization': f'Bearer {self._USER_TOKEN}'})
+
+            season_number = season.get('order')
+            for episode in api_response['items']:
+                yield self._extract_video(episode, series_id, season['id'], season_number)
+
+    def _real_extract(self, url):
+        series_id = self._match_id(url)
+
+        if self._USER_TOKEN is None:
+            self._perform_login('guest@inmobly.com', 'Test@gus1')
+
+        api_response = self._download_json(
+            'https://eu-gateway.inmobly.com/feed', series_id, query={'include_ids': series_id},
+            headers={'Authorization': f'Bearer {self._USER_TOKEN}'})['items'][0]
+
+        if not api_response.get('seasons'):
+            return self._extract_video(api_response)
+
+        return self.playlist_result(
+            self._extract_series_seasons(api_response['seasons'], series_id),
+            series_id, **traverse_obj(api_response, {
+                'title': 'name',
+                'description': 'description',
+            }))
diff --git a/yt_dlp/extractor/mediastream.py b/yt_dlp/extractor/mediastream.py
index d5c9aab8a3..b8cb5a691c 100644
--- a/yt_dlp/extractor/mediastream.py
+++ b/yt_dlp/extractor/mediastream.py
@@ -106,8 +106,12 @@ def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        if 'Debido a tu ubicación no puedes ver el contenido' in webpage:
-            self.raise_geo_restricted()
+        for message in [
+            'Debido a tu ubicación no puedes ver el contenido',
+            'You are not allowed to watch this video: Geo Fencing Restriction'
+        ]:
+            if message in webpage:
+                self.raise_geo_restricted()
 
         player_config = self._search_json(r'window\.MDSTRM\.OPTIONS\s*=', webpage, 'metadata', video_id)
 

From 21f40e75dfc0055ea9cdbd7fe2c46c6f9b561afd Mon Sep 17 00:00:00 2001
From: c-basalt <117849907+c-basalt@users.noreply.github.com>
Date: Thu, 21 Sep 2023 13:34:35 -0400
Subject: [PATCH 175/218] [ie/douyutv] Fix extractors (#7652)

Closes #2494, Closes #7295
Authored by: c-basalt
---
 yt_dlp/extractor/douyutv.py | 273 ++++++++++++++++++++++++------------
 1 file changed, 184 insertions(+), 89 deletions(-)

diff --git a/yt_dlp/extractor/douyutv.py b/yt_dlp/extractor/douyutv.py
index fa40844df5..ee8893d5af 100644
--- a/yt_dlp/extractor/douyutv.py
+++ b/yt_dlp/extractor/douyutv.py
@@ -1,31 +1,72 @@
 import time
 import hashlib
-import re
 import urllib
+import uuid
 
 from .common import InfoExtractor
+from .openload import PhantomJSwrapper
 from ..utils import (
     ExtractorError,
+    UserNotLive,
+    determine_ext,
+    int_or_none,
+    js_to_json,
+    parse_resolution,
+    str_or_none,
+    traverse_obj,
     unescapeHTML,
-    unified_strdate,
+    url_or_none,
+    urlencode_postdata,
     urljoin,
 )
 
 
-class DouyuTVIE(InfoExtractor):
-    IE_DESC = '斗鱼'
+class DouyuBaseIE(InfoExtractor):
+    def _download_cryptojs_md5(self, video_id):
+        for url in [
+            'https://cdnjs.cloudflare.com/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
+            'https://cdn.bootcdn.net/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
+        ]:
+            js_code = self._download_webpage(
+                url, video_id, note='Downloading signing dependency', fatal=False)
+            if js_code:
+                self.cache.store('douyu', 'crypto-js-md5', js_code)
+                return js_code
+        raise ExtractorError('Unable to download JS dependency (crypto-js/md5)')
+
+    def _get_cryptojs_md5(self, video_id):
+        return self.cache.load('douyu', 'crypto-js-md5') or self._download_cryptojs_md5(video_id)
+
+    def _calc_sign(self, sign_func, video_id, a):
+        b = uuid.uuid4().hex
+        c = round(time.time())
+        js_script = f'{self._get_cryptojs_md5(video_id)};{sign_func};console.log(ub98484234("{a}","{b}","{c}"))'
+        phantom = PhantomJSwrapper(self)
+        result = phantom.execute(js_script, video_id,
+                                 note='Executing JS signing script').strip()
+        return {i: v[0] for i, v in urllib.parse.parse_qs(result).items()}
+
+    def _search_js_sign_func(self, webpage, fatal=True):
+        # The greedy look-behind ensures last possible script tag is matched
+        return self._search_regex(
+            r'(?:<script.*)?<script[^>]*>(.*?ub98484234.*?)</script>', webpage, 'JS sign func', fatal=fatal)
+
+
+class DouyuTVIE(DouyuBaseIE):
+    IE_DESC = '斗鱼直播'
     _VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(topic/\w+\?rid=|(?:[^/]+/))*(?P<id>[A-Za-z0-9]+)'
     _TESTS = [{
-        'url': 'http://www.douyutv.com/iseven',
+        'url': 'https://www.douyu.com/pigff',
         'info_dict': {
-            'id': '17732',
-            'display_id': 'iseven',
-            'ext': 'flv',
-            'title': 're:^清晨醒脑！根本停不下来！ [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
-            'description': r're:.*m7show@163\.com.*',
-            'thumbnail': r're:^https?://.*\.png',
-            'uploader': '7师傅',
+            'id': '24422',
+            'display_id': 'pigff',
+            'ext': 'mp4',
+            'title': 're:^【PIGFF】.* [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+            'description': r'≥15级牌子看鱼吧置顶帖进粉丝vx群',
+            'thumbnail': str,
+            'uploader': 'pigff',
             'is_live': True,
+            'live_status': 'is_live',
         },
         'params': {
             'skip_download': True,
@@ -85,15 +126,43 @@ class DouyuTVIE(InfoExtractor):
         'only_matching': True,
     }]
 
+    def _get_sign_func(self, room_id, video_id):
+        return self._download_json(
+            f'https://www.douyu.com/swf_api/homeH5Enc?rids={room_id}', video_id,
+            note='Getting signing script')['data'][f'room{room_id}']
+
+    def _extract_stream_formats(self, stream_formats):
+        formats = []
+        for stream_info in traverse_obj(stream_formats, (..., 'data')):
+            stream_url = urljoin(
+                traverse_obj(stream_info, 'rtmp_url'), traverse_obj(stream_info, 'rtmp_live'))
+            if stream_url:
+                rate_id = traverse_obj(stream_info, ('rate', {int_or_none}))
+                rate_info = traverse_obj(stream_info, ('multirates', lambda _, v: v['rate'] == rate_id), get_all=False)
+                ext = determine_ext(stream_url)
+                formats.append({
+                    'url': stream_url,
+                    'format_id': str_or_none(rate_id),
+                    'ext': 'mp4' if ext == 'm3u8' else ext,
+                    'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
+                    'quality': rate_id % -10000 if rate_id is not None else None,
+                    **traverse_obj(rate_info, {
+                        'format': ('name', {str_or_none}),
+                        'tbr': ('bit', {int_or_none}),
+                    }),
+                })
+        return formats
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        if video_id.isdigit():
-            room_id = video_id
-        else:
-            page = self._download_webpage(url, video_id)
-            room_id = self._html_search_regex(
-                r'"room_id\\?"\s*:\s*(\d+),', page, 'room id')
+        webpage = self._download_webpage(url, video_id)
+        room_id = self._search_regex(r'\$ROOM\.room_id\s*=\s*(\d+)', webpage, 'room id')
+
+        if self._search_regex(r'"videoLoop"\s*:\s*(\d+)', webpage, 'loop', default='') == '1':
+            raise UserNotLive('The channel is auto-playing VODs', video_id=video_id)
+        if self._search_regex(r'\$ROOM\.show_status\s*=\s*(\d+)', webpage, 'status', default='') == '2':
+            raise UserNotLive(video_id=video_id)
 
         # Grab metadata from API
         params = {
@@ -102,110 +171,136 @@ def _real_extract(self, url):
             'time': int(time.time()),
         }
         params['auth'] = hashlib.md5(
-            f'room/{video_id}?{urllib.parse.urlencode(params)}zNzMV1y4EMxOHS6I5WKm'.encode()).hexdigest()
-        room = self._download_json(
+            f'room/{room_id}?{urllib.parse.urlencode(params)}zNzMV1y4EMxOHS6I5WKm'.encode()).hexdigest()
+        room = traverse_obj(self._download_json(
             f'http://www.douyutv.com/api/v1/room/{room_id}', video_id,
-            note='Downloading room info', query=params)['data']
+            note='Downloading room info', query=params, fatal=False), 'data')
 
         # 1 = live, 2 = offline
-        if room.get('show_status') == '2':
-            raise ExtractorError('Live stream is offline', expected=True)
+        if traverse_obj(room, 'show_status') == '2':
+            raise UserNotLive(video_id=video_id)
 
-        video_url = urljoin('https://hls3-akm.douyucdn.cn/', self._search_regex(r'(live/.*)', room['hls_url'], 'URL'))
-        formats, subs = self._extract_m3u8_formats_and_subtitles(video_url, room_id)
+        js_sign_func = self._search_js_sign_func(webpage, fatal=False) or self._get_sign_func(room_id, video_id)
+        form_data = {
+            'rate': 0,
+            **self._calc_sign(js_sign_func, video_id, room_id),
+        }
+        stream_formats = [self._download_json(
+            f'https://www.douyu.com/lapi/live/getH5Play/{room_id}',
+            video_id, note="Downloading livestream format",
+            data=urlencode_postdata(form_data))]
 
-        title = unescapeHTML(room['room_name'])
-        description = room.get('show_details')
-        thumbnail = room.get('room_src')
-        uploader = room.get('nickname')
+        for rate_id in traverse_obj(stream_formats[0], ('data', 'multirates', ..., 'rate')):
+            if rate_id != traverse_obj(stream_formats[0], ('data', 'rate')):
+                form_data['rate'] = rate_id
+                stream_formats.append(self._download_json(
+                    f'https://www.douyu.com/lapi/live/getH5Play/{room_id}',
+                    video_id, note=f'Downloading livestream format {rate_id}',
+                    data=urlencode_postdata(form_data)))
 
         return {
             'id': room_id,
-            'display_id': video_id,
-            'title': title,
-            'description': description,
-            'thumbnail': thumbnail,
-            'uploader': uploader,
+            'formats': self._extract_stream_formats(stream_formats),
             'is_live': True,
-            'subtitles': subs,
-            'formats': formats,
+            **traverse_obj(room, {
+                'display_id': ('url', {str}, {lambda i: i[1:]}),
+                'title': ('room_name', {unescapeHTML}),
+                'description': ('show_details', {str}),
+                'uploader': ('nickname', {str}),
+                'thumbnail': ('room_src', {url_or_none}),
+            })
         }
 
 
-class DouyuShowIE(InfoExtractor):
+class DouyuShowIE(DouyuBaseIE):
     _VALID_URL = r'https?://v(?:mobile)?\.douyu\.com/show/(?P<id>[0-9a-zA-Z]+)'
 
     _TESTS = [{
-        'url': 'https://v.douyu.com/show/rjNBdvnVXNzvE2yw',
-        'md5': '0c2cfd068ee2afe657801269b2d86214',
+        'url': 'https://v.douyu.com/show/mPyq7oVNe5Yv1gLY',
         'info_dict': {
-            'id': 'rjNBdvnVXNzvE2yw',
+            'id': 'mPyq7oVNe5Yv1gLY',
             'ext': 'mp4',
-            'title': '陈一发儿：砒霜 我有个室友系列！04-01 22点场',
-            'duration': 7150.08,
-            'thumbnail': r're:^https?://.*\.jpg$',
-            'uploader': '陈一发儿',
-            'uploader_id': 'XrZwYelr5wbK',
-            'uploader_url': 'https://v.douyu.com/author/XrZwYelr5wbK',
-            'upload_date': '20170402',
+            'title': '四川人小时候的味道“蒜苗回锅肉”，传统菜不能丢，要常做来吃',
+            'duration': 633,
+            'thumbnail': str,
+            'uploader': '美食作家王刚V',
+            'uploader_id': 'OVAO4NVx1m7Q',
+            'timestamp': 1661850002,
+            'upload_date': '20220830',
+            'view_count': int,
+            'tags': ['美食', '美食综合'],
         },
     }, {
         'url': 'https://vmobile.douyu.com/show/rjNBdvnVXNzvE2yw',
         'only_matching': True,
     }]
 
+    _FORMATS = {
+        'super': '原画',
+        'high': '超清',
+        'normal': '高清',
+    }
+
+    _QUALITIES = {
+        'super': -1,
+        'high': -2,
+        'normal': -3,
+    }
+
+    _RESOLUTIONS = {
+        'super': '1920x1080',
+        'high': '1280x720',
+        'normal': '852x480',
+    }
+
     def _real_extract(self, url):
         url = url.replace('vmobile.', 'v.')
         video_id = self._match_id(url)
 
         webpage = self._download_webpage(url, video_id)
 
-        room_info = self._parse_json(self._search_regex(
-            r'var\s+\$ROOM\s*=\s*({.+});', webpage, 'room info'), video_id)
+        video_info = self._search_json(
+            r'<script>\s*window\.\$DATA\s*=', webpage,
+            'video info', video_id, transform_source=js_to_json)
 
-        video_info = None
+        js_sign_func = self._search_js_sign_func(webpage)
+        form_data = {
+            'vid': video_id,
+            **self._calc_sign(js_sign_func, video_id, video_info['ROOM']['point_id']),
+        }
+        url_info = self._download_json(
+            'https://v.douyu.com/api/stream/getStreamUrl', video_id,
+            data=urlencode_postdata(form_data), note="Downloading video formats")
 
-        for trial in range(5):
-            # Sometimes Douyu rejects our request. Let's try it more times
-            try:
-                video_info = self._download_json(
-                    'https://vmobile.douyu.com/video/getInfo', video_id,
-                    query={'vid': video_id},
-                    headers={
-                        'Referer': url,
-                        'x-requested-with': 'XMLHttpRequest',
-                    })
-                break
-            except ExtractorError:
-                self._sleep(1, video_id)
-
-        if not video_info:
-            raise ExtractorError('Can\'t fetch video info')
-
-        formats = self._extract_m3u8_formats(
-            video_info['data']['video_url'], video_id,
-            entry_protocol='m3u8_native', ext='mp4')
-
-        upload_date = unified_strdate(self._html_search_regex(
-            r'<em>上传时间：</em><span>([^<]+)</span>', webpage,
-            'upload date', fatal=False))
-
-        uploader = uploader_id = uploader_url = None
-        mobj = re.search(
-            r'(?m)<a[^>]+href="/author/([0-9a-zA-Z]+)".+?<strong[^>]+title="([^"]+)"',
-            webpage)
-        if mobj:
-            uploader_id, uploader = mobj.groups()
-            uploader_url = urljoin(url, '/author/' + uploader_id)
+        formats = []
+        for name, url in traverse_obj(url_info, ('data', 'thumb_video', {dict.items}, ...)):
+            video_url = traverse_obj(url, ('url', {url_or_none}))
+            if video_url:
+                ext = determine_ext(video_url)
+                formats.append({
+                    'format': self._FORMATS.get(name),
+                    'format_id': name,
+                    'url': video_url,
+                    'quality': self._QUALITIES.get(name),
+                    'ext': 'mp4' if ext == 'm3u8' else ext,
+                    'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
+                    **parse_resolution(self._RESOLUTIONS.get(name))
+                })
+            else:
+                self.to_screen(
+                    f'"{self._FORMATS.get(name, name)}" format may require logging in. {self._login_hint()}')
 
         return {
             'id': video_id,
-            'title': room_info['name'],
             'formats': formats,
-            'duration': room_info.get('duration'),
-            'thumbnail': room_info.get('pic'),
-            'upload_date': upload_date,
-            'uploader': uploader,
-            'uploader_id': uploader_id,
-            'uploader_url': uploader_url,
+            **traverse_obj(video_info, ('DATA', {
+                'title': ('content', 'title', {str}),
+                'uploader': ('content', 'author', {str}),
+                'uploader_id': ('content', 'up_id', {str_or_none}),
+                'duration': ('content', 'video_duration', {int_or_none}),
+                'thumbnail': ('content', 'video_pic', {url_or_none}),
+                'timestamp': ('content', 'create_time', {int_or_none}),
+                'view_count': ('content', 'view_num', {int_or_none}),
+                'tags': ('videoTag', ..., 'tagName', {str}),
+            }))
         }

From 5fccabac27ca3c1165ade1b0df6fbadc24258dc2 Mon Sep 17 00:00:00 2001
From: Simon <simon30002021@icloud.com>
Date: Thu, 21 Sep 2023 19:37:58 +0200
Subject: [PATCH 176/218] [ie/rbgtum] Fix extraction and support new URL format
 (#7690)

Authored by: simon300000
---
 yt_dlp/extractor/_extractors.py |  1 +
 yt_dlp/extractor/rbgtum.py      | 79 ++++++++++++++++++++++++++-------
 2 files changed, 65 insertions(+), 15 deletions(-)

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 632d6720e1..9cda06d8fa 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1601,6 +1601,7 @@
 from .rbgtum import (
     RbgTumIE,
     RbgTumCourseIE,
+    RbgTumNewCourseIE,
 )
 from .rcs import (
     RCSIE,
diff --git a/yt_dlp/extractor/rbgtum.py b/yt_dlp/extractor/rbgtum.py
index 47649cfc58..c8a331f3ee 100644
--- a/yt_dlp/extractor/rbgtum.py
+++ b/yt_dlp/extractor/rbgtum.py
@@ -1,10 +1,11 @@
 import re
 
 from .common import InfoExtractor
+from ..utils import parse_qs, remove_start, traverse_obj, ExtractorError
 
 
 class RbgTumIE(InfoExtractor):
-    _VALID_URL = r'https://live\.rbg\.tum\.de/w/(?P<id>.+)'
+    _VALID_URL = r'https://(?:live\.rbg\.tum\.de|tum\.live)/w/(?P<id>[^?#]+)'
     _TESTS = [{
         # Combined view
         'url': 'https://live.rbg.tum.de/w/cpp/22128',
@@ -35,16 +36,18 @@ class RbgTumIE(InfoExtractor):
             'title': 'Fachschaftsvollversammlung',
             'series': 'Fachschaftsvollversammlung Informatik',
         }
+    }, {
+        'url': 'https://tum.live/w/linalginfo/27102',
+        'only_matching': True,
     }, ]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        m3u8 = self._html_search_regex(r'(https://.+?\.m3u8)', webpage, 'm3u8')
-        lecture_title = self._html_search_regex(r'(?si)<h1.*?>(.*)</h1>', webpage, 'title')
-        lecture_series_title = self._html_search_regex(
-            r'(?s)<title\b[^>]*>\s*(?:TUM-Live\s\|\s?)?([^:]+):?.*?</title>', webpage, 'series')
+        m3u8 = self._html_search_regex(r'"(https://[^"]+\.m3u8[^"]*)', webpage, 'm3u8')
+        lecture_title = self._html_search_regex(r'<h1[^>]*>([^<]+)</h1>', webpage, 'title', fatal=False)
+        lecture_series_title = remove_start(self._html_extract_title(webpage), 'TUM-Live | ')
 
         formats = self._extract_m3u8_formats(m3u8, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
 
@@ -57,9 +60,9 @@ def _real_extract(self, url):
 
 
 class RbgTumCourseIE(InfoExtractor):
-    _VALID_URL = r'https://live\.rbg\.tum\.de/course/(?P<id>.+)'
+    _VALID_URL = r'https://(?P<hostname>(?:live\.rbg\.tum\.de|tum\.live))/old/course/(?P<id>(?P<year>\d+)/(?P<term>\w+)/(?P<slug>[^/?#]+))'
     _TESTS = [{
-        'url': 'https://live.rbg.tum.de/course/2022/S/fpv',
+        'url': 'https://live.rbg.tum.de/old/course/2022/S/fpv',
         'info_dict': {
             'title': 'Funktionale Programmierung und Verifikation (IN0003)',
             'id': '2022/S/fpv',
@@ -69,7 +72,7 @@ class RbgTumCourseIE(InfoExtractor):
         },
         'playlist_count': 13,
     }, {
-        'url': 'https://live.rbg.tum.de/course/2022/W/set',
+        'url': 'https://live.rbg.tum.de/old/course/2022/W/set',
         'info_dict': {
             'title': 'SET FSMPIC',
             'id': '2022/W/set',
@@ -78,16 +81,62 @@ class RbgTumCourseIE(InfoExtractor):
             'noplaylist': False,
         },
         'playlist_count': 6,
+    }, {
+        'url': 'https://tum.live/old/course/2023/S/linalginfo',
+        'only_matching': True,
     }, ]
 
     def _real_extract(self, url):
-        course_id = self._match_id(url)
-        webpage = self._download_webpage(url, course_id)
+        course_id, hostname, year, term, slug = self._match_valid_url(url).group('id', 'hostname', 'year', 'term', 'slug')
+        meta = self._download_json(
+            f'https://{hostname}/api/courses/{slug}/', course_id, fatal=False,
+            query={'year': year, 'term': term}) or {}
+        lecture_series_title = meta.get('Name')
+        lectures = [self.url_result(f'https://{hostname}/w/{slug}/{stream_id}', RbgTumIE)
+                    for stream_id in traverse_obj(meta, ('Streams', ..., 'ID'))]
 
-        lecture_series_title = self._html_search_regex(r'(?si)<h1.*?>(.*)</h1>', webpage, 'title')
+        if not lectures:
+            webpage = self._download_webpage(url, course_id)
+            lecture_series_title = remove_start(self._html_extract_title(webpage), 'TUM-Live | ')
+            lectures = [self.url_result(f'https://{hostname}{lecture_path}', RbgTumIE)
+                        for lecture_path in re.findall(r'href="(/w/[^/"]+/[^/"]+)"', webpage)]
 
-        lecture_urls = []
-        for lecture_url in re.findall(r'(?i)href="/w/(.+)(?<!/cam)(?<!/pres)(?<!/chat)"', webpage):
-            lecture_urls.append(self.url_result('https://live.rbg.tum.de/w/' + lecture_url, ie=RbgTumIE.ie_key()))
+        return self.playlist_result(lectures, course_id, lecture_series_title)
 
-        return self.playlist_result(lecture_urls, course_id, lecture_series_title)
+
+class RbgTumNewCourseIE(InfoExtractor):
+    _VALID_URL = r'https://(?P<hostname>(?:live\.rbg\.tum\.de|tum\.live))/\?'
+    _TESTS = [{
+        'url': 'https://live.rbg.tum.de/?year=2022&term=S&slug=fpv&view=3',
+        'info_dict': {
+            'title': 'Funktionale Programmierung und Verifikation (IN0003)',
+            'id': '2022/S/fpv',
+        },
+        'params': {
+            'noplaylist': False,
+        },
+        'playlist_count': 13,
+    }, {
+        'url': 'https://live.rbg.tum.de/?year=2022&term=W&slug=set&view=3',
+        'info_dict': {
+            'title': 'SET FSMPIC',
+            'id': '2022/W/set',
+        },
+        'params': {
+            'noplaylist': False,
+        },
+        'playlist_count': 6,
+    }, {
+        'url': 'https://tum.live/?year=2023&term=S&slug=linalginfo&view=3',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        query = parse_qs(url)
+        errors = [key for key in ('year', 'term', 'slug') if not query.get(key)]
+        if errors:
+            raise ExtractorError(f'Input URL is missing query parameters: {", ".join(errors)}')
+        year, term, slug = query['year'][0], query['term'][0], query['slug'][0]
+        hostname = self._match_valid_url(url).group('hostname')
+
+        return self.url_result(f'https://{hostname}/old/course/{year}/{term}/{slug}', RbgTumCourseIE)

From b84fda7388dd20d38921e23b469147f3957c1812 Mon Sep 17 00:00:00 2001
From: ClosedPort22 <44864697+ClosedPort22@users.noreply.github.com>
Date: Thu, 21 Sep 2023 17:45:18 +0000
Subject: [PATCH 177/218] [ie/bilibili] Extract Dolby audio formats (#8142)

Closes #4050
Authored by: ClosedPort22
---
 yt_dlp/extractor/bilibili.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py
index 5e7042dbbd..9119f396be 100644
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@@ -49,14 +49,14 @@ def extract_formats(self, play_info):
             for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
         }
 
-        audios = traverse_obj(play_info, ('dash', 'audio', ...))
+        audios = traverse_obj(play_info, ('dash', (None, 'dolby'), 'audio', ..., {dict}))
         flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
         if flac_audio:
             audios.append(flac_audio)
         formats = [{
             'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
             'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
-            'acodec': audio.get('codecs'),
+            'acodec': traverse_obj(audio, ('codecs', {str.lower})),
             'vcodec': 'none',
             'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
             'filesize': int_or_none(audio.get('size')),
@@ -71,6 +71,7 @@ def extract_formats(self, play_info):
             'height': int_or_none(video.get('height')),
             'vcodec': video.get('codecs'),
             'acodec': 'none' if audios else None,
+            'dynamic_range': {126: 'DV', 125: 'HDR10'}.get(int_or_none(video.get('id'))),
             'tbr': float_or_none(video.get('bandwidth'), scale=1000),
             'filesize': int_or_none(video.get('size')),
             'quality': int_or_none(video.get('id')),

From a5e264d74b4bd60c6e7ec4e38f1a23af4e420531 Mon Sep 17 00:00:00 2001
From: kylegustavo <kysalves@yahoo.com>
Date: Thu, 21 Sep 2023 10:46:49 -0700
Subject: [PATCH 178/218] [ie/Expressen] Improve `_VALID_URL` (#8153)

Closes #8141
Authored by: kylegustavo
---
 yt_dlp/extractor/expressen.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/extractor/expressen.py b/yt_dlp/extractor/expressen.py
index 86967b631b..b96f2e4cbb 100644
--- a/yt_dlp/extractor/expressen.py
+++ b/yt_dlp/extractor/expressen.py
@@ -11,8 +11,8 @@ class ExpressenIE(InfoExtractor):
     _VALID_URL = r'''(?x)
                     https?://
                         (?:www\.)?(?:expressen|di)\.se/
-                        (?:(?:tvspelare/video|videoplayer/embed)/)?
-                        tv/(?:[^/]+/)*
+                        (?:(?:tvspelare/video|video-?player/embed)/)?
+                        (?:tv|nyheter)/(?:[^/?#]+/)*
                         (?P<id>[^/?#&]+)
                     '''
     _EMBED_REGEX = [r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?(?:expressen|di)\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1']
@@ -42,6 +42,12 @@ class ExpressenIE(InfoExtractor):
     }, {
         'url': 'https://www.di.se/videoplayer/embed/tv/ditv/borsmorgon/implantica-rusar-70--under-borspremiaren-hor-styrelsemedlemmen/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
         'only_matching': True,
+    }, {
+        'url': 'https://www.expressen.se/video-player/embed/tv/nyheter/ekero-fodda-olof-gustafsson-forvaltar-knarkbaronen-pablo-escobars-namn',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.expressen.se/nyheter/efter-egna-telefonbluffen-escobar-stammer-klarna/',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):

From 2269065ad60cb0ab62408ae6a7b20283e5252232 Mon Sep 17 00:00:00 2001
From: std-move <26625259+std-move@users.noreply.github.com>
Date: Thu, 21 Sep 2023 20:19:52 +0200
Subject: [PATCH 179/218] [ie/NovaEmbed] Fix extractor (#7910)

Closes #8025
Authored by: std-move
---
 yt_dlp/extractor/nova.py | 116 +++++++++++++++------------------------
 1 file changed, 45 insertions(+), 71 deletions(-)

diff --git a/yt_dlp/extractor/nova.py b/yt_dlp/extractor/nova.py
index 8bd3fd4725..bd0c4ebe34 100644
--- a/yt_dlp/extractor/nova.py
+++ b/yt_dlp/extractor/nova.py
@@ -6,7 +6,6 @@
     determine_ext,
     int_or_none,
     js_to_json,
-    qualities,
     traverse_obj,
     unified_strdate,
     url_or_none,
@@ -49,77 +48,52 @@ def _real_extract(self, url):
         duration = None
         formats = []
 
-        player = self._parse_json(
-            self._search_regex(
-                (r'(?:(?:replacePlaceholders|processAdTagModifier).*?:\s*)?(?:replacePlaceholders|processAdTagModifier)\s*\(\s*(?P<json>{.*?})\s*\)(?:\s*\))?\s*,',
-                    r'Player\.init\s*\([^,]+,(?P<cndn>\s*\w+\s*\?)?\s*(?P<json>{(?(cndn).+?|.+)})\s*(?(cndn):|,\s*{.+?}\s*\)\s*;)'),
-                webpage, 'player', default='{}', group='json'), video_id, fatal=False)
-        if player:
-            for format_id, format_list in player['tracks'].items():
-                if not isinstance(format_list, list):
-                    format_list = [format_list]
-                for format_dict in format_list:
-                    if not isinstance(format_dict, dict):
-                        continue
-                    if (not self.get_param('allow_unplayable_formats')
-                            and traverse_obj(format_dict, ('drm', 'keySystem'))):
-                        has_drm = True
-                        continue
-                    format_url = url_or_none(format_dict.get('src'))
-                    format_type = format_dict.get('type')
-                    ext = determine_ext(format_url)
-                    if (format_type == 'application/x-mpegURL'
-                            or format_id == 'HLS' or ext == 'm3u8'):
-                        formats.extend(self._extract_m3u8_formats(
-                            format_url, video_id, 'mp4',
-                            entry_protocol='m3u8_native', m3u8_id='hls',
-                            fatal=False))
-                    elif (format_type == 'application/dash+xml'
-                          or format_id == 'DASH' or ext == 'mpd'):
-                        formats.extend(self._extract_mpd_formats(
-                            format_url, video_id, mpd_id='dash', fatal=False))
-                    else:
-                        formats.append({
-                            'url': format_url,
-                        })
-            duration = int_or_none(player.get('duration'))
-        else:
-            # Old path, not actual as of 08.04.2020
-            bitrates = self._parse_json(
-                self._search_regex(
-                    r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'),
-                video_id, transform_source=js_to_json)
-
-            QUALITIES = ('lq', 'mq', 'hq', 'hd')
-            quality_key = qualities(QUALITIES)
-
-            for format_id, format_list in bitrates.items():
-                if not isinstance(format_list, list):
-                    format_list = [format_list]
-                for format_url in format_list:
-                    format_url = url_or_none(format_url)
-                    if not format_url:
-                        continue
-                    if format_id == 'hls':
-                        formats.extend(self._extract_m3u8_formats(
-                            format_url, video_id, ext='mp4',
-                            entry_protocol='m3u8_native', m3u8_id='hls',
-                            fatal=False))
-                        continue
-                    f = {
+        def process_format_list(format_list, format_id=""):
+            nonlocal formats, has_drm
+            if not isinstance(format_list, list):
+                format_list = [format_list]
+            for format_dict in format_list:
+                if not isinstance(format_dict, dict):
+                    continue
+                if (not self.get_param('allow_unplayable_formats')
+                        and traverse_obj(format_dict, ('drm', 'keySystem'))):
+                    has_drm = True
+                    continue
+                format_url = url_or_none(format_dict.get('src'))
+                format_type = format_dict.get('type')
+                ext = determine_ext(format_url)
+                if (format_type == 'application/x-mpegURL'
+                        or format_id == 'HLS' or ext == 'm3u8'):
+                    formats.extend(self._extract_m3u8_formats(
+                        format_url, video_id, 'mp4',
+                        entry_protocol='m3u8_native', m3u8_id='hls',
+                        fatal=False))
+                elif (format_type == 'application/dash+xml'
+                      or format_id == 'DASH' or ext == 'mpd'):
+                    formats.extend(self._extract_mpd_formats(
+                        format_url, video_id, mpd_id='dash', fatal=False))
+                else:
+                    formats.append({
                         'url': format_url,
-                    }
-                    f_id = format_id
-                    for quality in QUALITIES:
-                        if '%s.mp4' % quality in format_url:
-                            f_id += '-%s' % quality
-                            f.update({
-                                'quality': quality_key(quality),
-                                'format_note': quality.upper(),
-                            })
-                            break
-                    f['format_id'] = f_id
-                    formats.append(f)
+                    })
+
+        player = self._search_json(
+            r'player:', webpage, 'player', video_id, fatal=False, end_pattern=r';\s*</script>')
+        if player:
+            for src in traverse_obj(player, ('lib', 'source', 'sources', ...)):
+                process_format_list(src)
+            duration = traverse_obj(player, ('sourceInfo', 'duration', {int_or_none}))
+        if not formats and not has_drm:
+            # older code path, in use before August 2023
+            player = self._parse_json(
+                self._search_regex(
+                    (r'(?:(?:replacePlaceholders|processAdTagModifier).*?:\s*)?(?:replacePlaceholders|processAdTagModifier)\s*\(\s*(?P<json>{.*?})\s*\)(?:\s*\))?\s*,',
+                     r'Player\.init\s*\([^,]+,(?P<cndn>\s*\w+\s*\?)?\s*(?P<json>{(?(cndn).+?|.+)})\s*(?(cndn):|,\s*{.+?}\s*\)\s*;)'),
+                    webpage, 'player', group='json'), video_id)
+            if player:
+                for format_id, format_list in player['tracks'].items():
+                    process_format_list(format_list, format_id)
+                duration = int_or_none(player.get('duration'))
 
         if not formats and has_drm:
             self.report_drm(video_id)

From 52414d64ca7b92d3f83964cdd68247989b0c4625 Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@bashonly.com>
Date: Thu, 21 Sep 2023 16:51:57 -0500
Subject: [PATCH 180/218] [utils] `js_to_json`: Handle `Array` objects

Authored by: Grub4K, std-move

Co-authored-by: std-move <26625259+std-move@users.noreply.github.com>
Co-authored-by: Simon Sawicki <accounts@grub4k.xyz>
---
 test/test_utils.py     | 6 ++++++
 yt_dlp/utils/_utils.py | 1 +
 2 files changed, 7 insertions(+)

diff --git a/test/test_utils.py b/test/test_utils.py
index 91e3ffd39e..47d1f71bfe 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -1218,6 +1218,12 @@ def test_js_to_json_template_literal(self):
         self.assertEqual(js_to_json('`${name}"${name}"`', {'name': '5'}), '"5\\"5\\""')
         self.assertEqual(js_to_json('`${name}`', {}), '"name"')
 
+    def test_js_to_json_map_array_constructors(self):
+        self.assertEqual(json.loads(js_to_json('new Map([["a", 5]])')), {'a': 5})
+        self.assertEqual(json.loads(js_to_json('Array(5, 10)')), [5, 10])
+        self.assertEqual(json.loads(js_to_json('new Array(15,5)')), [15, 5])
+        self.assertEqual(json.loads(js_to_json('new Map([Array(5, 10),new Array(15,5)])')), {'5': 10, '15': 5})
+
     def test_extract_attributes(self):
         self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
         self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py
index ef26de1160..213ccc6363 100644
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@@ -2727,6 +2727,7 @@ def fix_kv(m):
     def create_map(mobj):
         return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars))))
 
+    code = re.sub(r'(?:new\s+)?Array\((.*?)\)', r'[\g<1>]', code)
     code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code)
     if not strict:
         code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)

From 904a19ee93195ce0bd4b08bd22b186120afb5b17 Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@bashonly.com>
Date: Thu, 21 Sep 2023 16:54:57 -0500
Subject: [PATCH 181/218] [ie] Make `_search_nuxt_data` more lenient

Authored by: std-move

Co-authored-by: std-move <26625259+std-move@users.noreply.github.com>
---
 yt_dlp/extractor/common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 7deab995c4..c94b4abdc2 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -1687,7 +1687,7 @@ def _search_nextjs_data(self, webpage, video_id, *, transform_source=None, fatal
     def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)):
         """Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
         rectx = re.escape(context_name)
-        FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){return\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)'
+        FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){(?:.*?)return\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)'
         js, arg_keys, arg_vals = self._search_regex(
             (rf'<script>\s*window\.{rectx}={FUNCTION_RE}\s*\)\s*;?\s*</script>', rf'{rectx}\(.*?{FUNCTION_RE}'),
             webpage, context_name, group=('js', 'arg_keys', 'arg_vals'),

From 568f08051841aedea968258889539741e26009e9 Mon Sep 17 00:00:00 2001
From: std-move <26625259+std-move@users.noreply.github.com>
Date: Fri, 22 Sep 2023 00:20:52 +0200
Subject: [PATCH 182/218] [ie/iprima] Fix extractor (#7216)

Closes #7229
Authored by: std-move
---
 yt_dlp/extractor/iprima.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/iprima.py b/yt_dlp/extractor/iprima.py
index 6dec1510da..f7aa579b38 100644
--- a/yt_dlp/extractor/iprima.py
+++ b/yt_dlp/extractor/iprima.py
@@ -134,10 +134,17 @@ def _real_extract(self, url):
         ), webpage, 'real id', group='id', default=None)
 
         if not video_id:
-            nuxt_data = self._search_nuxt_data(webpage, video_id, traverse='data')
+            nuxt_data = self._search_nuxt_data(webpage, video_id, traverse='data', fatal=False)
             video_id = traverse_obj(
                 nuxt_data, (..., 'content', 'additionals', 'videoPlayId', {str}), get_all=False)
 
+        if not video_id:
+            nuxt_data = self._search_json(
+                r'<script[^>]+\bid=["\']__NUXT_DATA__["\'][^>]*>',
+                webpage, 'nuxt data', None, end_pattern=r'</script>', contains_pattern=r'\[(?s:.+)\]')
+
+            video_id = traverse_obj(nuxt_data, lambda _, v: re.fullmatch(r'p\d+', v), get_all=False)
+
         if not video_id:
             self.raise_no_formats('Unable to extract video ID from webpage')
 

From 661c9a1d029296b28e0b2f8be8a72a43abaf6536 Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@bashonly.com>
Date: Thu, 21 Sep 2023 17:48:57 -0500
Subject: [PATCH 183/218] [test:download] Test for `expected_exception`

Authored by: at-wat

Co-authored-by: Atsushi Watanabe <atsushi.w@ieee.org>
---
 test/test_download.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/test/test_download.py b/test/test_download.py
index 6f00a4deda..2530792493 100755
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -31,6 +31,7 @@
     DownloadError,
     ExtractorError,
     UnavailableVideoError,
+    YoutubeDLError,
     format_bytes,
     join_nonempty,
 )
@@ -100,6 +101,8 @@ def print_skipping(reason):
             print_skipping('IE marked as not _WORKING')
 
         for tc in test_cases:
+            if tc.get('expected_exception'):
+                continue
             info_dict = tc.get('info_dict', {})
             params = tc.get('params', {})
             if not info_dict.get('id'):
@@ -139,6 +142,17 @@ def get_tc_filename(tc):
 
         res_dict = None
 
+        def match_exception(err):
+            expected_exception = test_case.get('expected_exception')
+            if not expected_exception:
+                return False
+            if err.__class__.__name__ == expected_exception:
+                return True
+            for exc in err.exc_info:
+                if exc.__class__.__name__ == expected_exception:
+                    return True
+            return False
+
         def try_rm_tcs_files(tcs=None):
             if tcs is None:
                 tcs = test_cases
@@ -161,6 +175,8 @@ def try_rm_tcs_files(tcs=None):
                 except (DownloadError, ExtractorError) as err:
                     # Check if the exception is not a network related one
                     if not isinstance(err.exc_info[1], (TransportError, UnavailableVideoError)) or (isinstance(err.exc_info[1], HTTPError) and err.exc_info[1].status == 503):
+                        if match_exception(err):
+                            return
                         err.msg = f'{getattr(err, "msg", err)} ({tname})'
                         raise
 
@@ -171,6 +187,10 @@ def try_rm_tcs_files(tcs=None):
                     print(f'Retrying: {try_num} failed tries\n\n##########\n\n')
 
                     try_num += 1
+                except YoutubeDLError as err:
+                    if match_exception(err):
+                        return
+                    raise
                 else:
                     break
 

From c1d71d0d9f41db5e4306c86af232f5f6220a130b Mon Sep 17 00:00:00 2001
From: Atsushi Watanabe <atsushi.w@ieee.org>
Date: Fri, 22 Sep 2023 08:04:05 +0900
Subject: [PATCH 184/218] [ie/twitcasting] Support `--wait-for-video` (#7975)

Authored by: at-wat
---
 yt_dlp/extractor/twitcasting.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/extractor/twitcasting.py b/yt_dlp/extractor/twitcasting.py
index 3890d5d8fb..540e217fd8 100644
--- a/yt_dlp/extractor/twitcasting.py
+++ b/yt_dlp/extractor/twitcasting.py
@@ -5,8 +5,9 @@
 from .common import InfoExtractor
 from ..dependencies import websockets
 from ..utils import (
-    clean_html,
     ExtractorError,
+    UserNotLive,
+    clean_html,
     float_or_none,
     get_element_by_class,
     get_element_by_id,
@@ -235,6 +236,9 @@ class TwitCastingLiveIE(InfoExtractor):
     _TESTS = [{
         'url': 'https://twitcasting.tv/ivetesangalo',
         'only_matching': True,
+    }, {
+        'url': 'https://twitcasting.tv/c:unusedlive',
+        'expected_exception': 'UserNotLive',
     }]
 
     def _real_extract(self, url):
@@ -260,7 +264,7 @@ def _real_extract(self, url):
                     r'(?s)<a\s+class="tw-movie-thumbnail"\s*href="/[^/]+/movie/(?P<video_id>\d+)"\s*>.+?</a>',
                     webpage, 'current live ID 2', default=None, group='video_id')
         if not current_live:
-            raise ExtractorError('The user is not currently live')
+            raise UserNotLive(video_id=uploader_id)
         return self.url_result('https://twitcasting.tv/%s/movie/%s' % (uploader_id, current_live))
 
 

From c2da0b5ea215298135f76e3dc14b972a3c4afacb Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@bashonly.com>
Date: Sat, 23 Sep 2023 14:54:00 -0500
Subject: [PATCH 185/218] [ie/ArteTV] Fix HLS formats extraction

Closes #8156
Authored by: bashonly
---
 yt_dlp/extractor/arte.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py
index e3cc5afb05..a19cd2a3ae 100644
--- a/yt_dlp/extractor/arte.py
+++ b/yt_dlp/extractor/arte.py
@@ -169,7 +169,7 @@ def _real_extract(self, url):
                 )))
 
             short_label = traverse_obj(stream_version, 'shortLabel', expected_type=str, default='?')
-            if stream['protocol'].startswith('HLS'):
+            if 'HLS' in stream['protocol']:
                 fmts, subs = self._extract_m3u8_formats_and_subtitles(
                     stream['url'], video_id=video_id, ext='mp4', m3u8_id=stream_version_code, fatal=False)
                 for fmt in fmts:

From 5ca095cbcde3e32642a4fe5b2d69e8e3c785a021 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Sat, 23 Sep 2023 15:00:31 -0500
Subject: [PATCH 186/218] [cleanup] Misc (#8182)

Closes #7796, Closes #8028
Authored by: barsnick, sqrtNOT, gamer191, coletdjnz, Grub4K, bashonly
---
 CONTRIBUTING.md                   | 8 ++++----
 README.md                         | 2 +-
 devscripts/make_changelog.py      | 2 +-
 test/test_YoutubeDL.py            | 1 -
 test/test_networking_utils.py     | 6 +++---
 yt_dlp/YoutubeDL.py               | 6 +++---
 yt_dlp/compat/urllib/__init__.py  | 2 +-
 yt_dlp/extractor/abc.py           | 1 -
 yt_dlp/extractor/ign.py           | 4 ----
 yt_dlp/extractor/nebula.py        | 1 -
 yt_dlp/extractor/peekvids.py      | 1 -
 yt_dlp/extractor/radiofrance.py   | 2 +-
 yt_dlp/extractor/rcs.py           | 6 +++---
 yt_dlp/extractor/rokfin.py        | 1 -
 yt_dlp/extractor/s4c.py           | 2 --
 yt_dlp/extractor/sovietscloset.py | 1 -
 yt_dlp/extractor/youtube.py       | 2 +-
 yt_dlp/networking/__init__.py     | 2 +-
 yt_dlp/networking/_urllib.py      | 2 +-
 yt_dlp/networking/exceptions.py   | 4 ++--
 20 files changed, 22 insertions(+), 34 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index a8587fe92d..90e7faf7c4 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -217,7 +217,7 @@ ## Adding support for a new site
 1. Add an import in [`yt_dlp/extractor/_extractors.py`](yt_dlp/extractor/_extractors.py). Note that the class name must end with `IE`.
 1. Run `python test/test_download.py TestDownload.test_YourExtractor` (note that `YourExtractor` doesn't end with `IE`). This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, the tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. You can also run all the tests in one go with `TestDownload.test_YourExtractor_all`
 1. Make sure you have atleast one test for your extractor. Even if all videos covered by the extractor are expected to be inaccessible for automated testing, tests should still be added with a `skip` parameter indicating why the particular test is disabled from running.
-1. Have a look at [`yt_dlp/extractor/common.py`](yt_dlp/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](yt_dlp/extractor/common.py#L91-L426). Add tests and code for as many as you want.
+1. Have a look at [`yt_dlp/extractor/common.py`](yt_dlp/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](yt_dlp/extractor/common.py#L119-L440). Add tests and code for as many as you want.
 1. Make sure your code follows [yt-dlp coding conventions](#yt-dlp-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
 
         $ flake8 yt_dlp/extractor/yourextractor.py
@@ -251,7 +251,7 @@ ## yt-dlp coding conventions
 
 ### Mandatory and optional metafields
 
-For extraction to work yt-dlp relies on metadata your extractor extracts and provides to yt-dlp expressed by an [information dictionary](yt_dlp/extractor/common.py#L91-L426) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by yt-dlp:
+For extraction to work yt-dlp relies on metadata your extractor extracts and provides to yt-dlp expressed by an [information dictionary](yt_dlp/extractor/common.py#L119-L440) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by yt-dlp:
 
  - `id` (media identifier)
  - `title` (media title)
@@ -696,7 +696,7 @@ #### Examples
 
 ### Use convenience conversion and parsing functions
 
-Wrap all extracted numeric data into safe functions from [`yt_dlp/utils.py`](yt_dlp/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
+Wrap all extracted numeric data into safe functions from [`yt_dlp/utils/`](yt_dlp/utils/): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
 
 Use `url_or_none` for safe URL processing.
 
@@ -704,7 +704,7 @@ ### Use convenience conversion and parsing functions
 
 Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction. 
 
-Explore [`yt_dlp/utils.py`](yt_dlp/utils.py) for more useful convenience functions.
+Explore [`yt_dlp/utils/`](yt_dlp/utils/) for more useful convenience functions.
 
 #### Examples
 
diff --git a/README.md b/README.md
index d94d8ea822..d9b11952de 100644
--- a/README.md
+++ b/README.md
@@ -1800,7 +1800,7 @@ # EXTRACTOR ARGUMENTS
 #### youtube
 * `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
 * `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
-* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients.
+* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb`, `mweb_embedscreen` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients.
 * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
 * `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
 * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
diff --git a/devscripts/make_changelog.py b/devscripts/make_changelog.py
index ac68dcd19a..9ff65db146 100644
--- a/devscripts/make_changelog.py
+++ b/devscripts/make_changelog.py
@@ -260,7 +260,7 @@ class CommitRange:
     AUTHOR_INDICATOR_RE = re.compile(r'Authored by:? ', re.IGNORECASE)
     MESSAGE_RE = re.compile(r'''
         (?:\[(?P<prefix>[^\]]+)\]\ )?
-        (?:(?P<sub_details>`?[^:`]+`?): )?
+        (?:(?P<sub_details>`?[\w.-]+`?): )?
         (?P<message>.+?)
         (?:\ \((?P<issues>\#\d+(?:,\ \#\d+)*)\))?
         ''', re.VERBOSE | re.DOTALL)
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index 3cfb61fb26..916ee48b97 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -631,7 +631,6 @@ def test_add_extra_info(self):
         self.assertEqual(test_dict['playlist'], 'funny videos')
 
     outtmpl_info = {
-        'id': '1234',
         'id': '1234',
         'ext': 'mp4',
         'width': None,
diff --git a/test/test_networking_utils.py b/test/test_networking_utils.py
index dbf656090d..419aae1e47 100644
--- a/test/test_networking_utils.py
+++ b/test/test_networking_utils.py
@@ -269,14 +269,14 @@ def test_compat_http_error_autoclose(self):
         assert not response.closed
 
     def test_incomplete_read_error(self):
-        error = IncompleteRead(b'test', 3, cause='test')
+        error = IncompleteRead(4, 3, cause='test')
         assert isinstance(error, IncompleteRead)
         assert repr(error) == '<IncompleteRead: 4 bytes read, 3 more expected>'
         assert str(error) == error.msg == '4 bytes read, 3 more expected'
-        assert error.partial == b'test'
+        assert error.partial == 4
         assert error.expected == 3
         assert error.cause == 'test'
 
-        error = IncompleteRead(b'aaa')
+        error = IncompleteRead(3)
         assert repr(error) == '<IncompleteRead: 3 bytes read>'
         assert str(error) == '3 bytes read'
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index 1feed30524..39aaf2c2ed 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -239,9 +239,9 @@ class YoutubeDL:
                        'selected' (check selected formats),
                        or None (check only if requested by extractor)
     paths:             Dictionary of output paths. The allowed keys are 'home'
-                       'temp' and the keys of OUTTMPL_TYPES (in utils.py)
+                       'temp' and the keys of OUTTMPL_TYPES (in utils/_utils.py)
     outtmpl:           Dictionary of templates for output names. Allowed keys
-                       are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
+                       are 'default' and the keys of OUTTMPL_TYPES (in utils/_utils.py).
                        For compatibility with youtube-dl, a single string can also be used
     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
     restrictfilenames: Do not allow "&" and spaces in file names
@@ -422,7 +422,7 @@ class YoutubeDL:
                          asked whether to download the video.
                        - Raise utils.DownloadCancelled(msg) to abort remaining
                          downloads when a video is rejected.
-                       match_filter_func in utils.py is one example for this.
+                       match_filter_func in utils/_utils.py is one example for this.
     color:             A Dictionary with output stream names as keys
                        and their respective color policy as values.
                        Can also just be a single color policy,
diff --git a/yt_dlp/compat/urllib/__init__.py b/yt_dlp/compat/urllib/__init__.py
index b27cc6133c..9084b3c2bf 100644
--- a/yt_dlp/compat/urllib/__init__.py
+++ b/yt_dlp/compat/urllib/__init__.py
@@ -1,7 +1,7 @@
 # flake8: noqa: F405
 from urllib import *  # noqa: F403
 
-del request
+del request  # noqa: F821
 from . import request  # noqa: F401
 
 from ..compat_utils import passthrough_module
diff --git a/yt_dlp/extractor/abc.py b/yt_dlp/extractor/abc.py
index f56133eb3e..d2cf5f7c51 100644
--- a/yt_dlp/extractor/abc.py
+++ b/yt_dlp/extractor/abc.py
@@ -180,7 +180,6 @@ class ABCIViewIE(InfoExtractor):
     _VALID_URL = r'https?://iview\.abc\.net\.au/(?:[^/]+/)*video/(?P<id>[^/?#]+)'
     _GEO_COUNTRIES = ['AU']
 
-    # ABC iview programs are normally available for 14 days only.
     _TESTS = [{
         'url': 'https://iview.abc.net.au/show/gruen/series/11/video/LE1927H001S00',
         'md5': '67715ce3c78426b11ba167d875ac6abf',
diff --git a/yt_dlp/extractor/ign.py b/yt_dlp/extractor/ign.py
index 64875f8ceb..1c4f105e9b 100644
--- a/yt_dlp/extractor/ign.py
+++ b/yt_dlp/extractor/ign.py
@@ -197,10 +197,6 @@ class IGNVideoIE(IGNBaseIE):
             'thumbnail': 'https://sm.ign.com/ign_me/video/h/how-hitman/how-hitman-aims-to-be-different-than-every-other-s_8z14.jpg',
             'duration': 298,
             'tags': 'count:13',
-            'display_id': '112203',
-            'thumbnail': 'https://sm.ign.com/ign_me/video/h/how-hitman/how-hitman-aims-to-be-different-than-every-other-s_8z14.jpg',
-            'duration': 298,
-            'tags': 'count:13',
         },
         'expected_warnings': ['HTTP Error 400: Bad Request'],
     }, {
diff --git a/yt_dlp/extractor/nebula.py b/yt_dlp/extractor/nebula.py
index 4f3e691b71..8fba2bcf74 100644
--- a/yt_dlp/extractor/nebula.py
+++ b/yt_dlp/extractor/nebula.py
@@ -127,7 +127,6 @@ class NebulaIE(NebulaBaseIE):
                 'channel_id': 'lindsayellis',
                 'uploader': 'Lindsay Ellis',
                 'uploader_id': 'lindsayellis',
-                'timestamp': 1533009600,
                 'uploader_url': 'https://nebula.tv/lindsayellis',
                 'series': 'Lindsay Ellis',
                 'display_id': 'that-time-disney-remade-beauty-and-the-beast',
diff --git a/yt_dlp/extractor/peekvids.py b/yt_dlp/extractor/peekvids.py
index d1fc058b92..41f591b093 100644
--- a/yt_dlp/extractor/peekvids.py
+++ b/yt_dlp/extractor/peekvids.py
@@ -146,7 +146,6 @@ class PlayVidsIE(PeekVidsBaseIE):
             'uploader': 'Brazzers',
             'age_limit': 18,
             'view_count': int,
-            'age_limit': 18,
             'categories': list,
             'tags': list,
         },
diff --git a/yt_dlp/extractor/radiofrance.py b/yt_dlp/extractor/radiofrance.py
index 35f4b91dd2..ec1b97631e 100644
--- a/yt_dlp/extractor/radiofrance.py
+++ b/yt_dlp/extractor/radiofrance.py
@@ -82,7 +82,7 @@ class RadioFranceBaseIE(InfoExtractor):
     def _extract_data_from_webpage(self, webpage, display_id, key):
         return traverse_obj(self._search_json(
             r'\bconst\s+data\s*=', webpage, key, display_id,
-            contains_pattern=r'(\[\{.*?\}\]);', transform_source=js_to_json),
+            contains_pattern=r'\[\{(?s:.+)\}\]', transform_source=js_to_json),
             (..., 'data', key, {dict}), get_all=False) or {}
 
 
diff --git a/yt_dlp/extractor/rcs.py b/yt_dlp/extractor/rcs.py
index 028d3d90bb..b865f63fbd 100644
--- a/yt_dlp/extractor/rcs.py
+++ b/yt_dlp/extractor/rcs.py
@@ -239,10 +239,10 @@ class RCSEmbedsIE(RCSBaseIE):
         }
     }, {
         'url': 'https://video.gazzanet.gazzetta.it/video-embed/gazzanet-mo05-0000260789',
-        'match_only': True
+        'only_matching': True
     }, {
         'url': 'https://video.gazzetta.it/video-embed/49612410-00ca-11eb-bcd8-30d4253e0140',
-        'match_only': True
+        'only_matching': True
     }]
     _WEBPAGE_TESTS = [{
         'url': 'https://www.iodonna.it/video-iodonna/personaggi-video/monica-bellucci-piu-del-lavoro-oggi-per-me-sono-importanti-lamicizia-e-la-famiglia/',
@@ -325,7 +325,7 @@ class RCSIE(RCSBaseIE):
         }
     }, {
         'url': 'https://video.corriere.it/video-360/metro-copenaghen-tutta-italiana/a248a7f0-e2db-11e9-9830-af2de6b1f945',
-        'match_only': True
+        'only_matching': True
     }]
 
 
diff --git a/yt_dlp/extractor/rokfin.py b/yt_dlp/extractor/rokfin.py
index 4a4d40befd..cad76f0c99 100644
--- a/yt_dlp/extractor/rokfin.py
+++ b/yt_dlp/extractor/rokfin.py
@@ -40,7 +40,6 @@ class RokfinIE(InfoExtractor):
             'channel': 'Jimmy Dore',
             'channel_id': 65429,
             'channel_url': 'https://rokfin.com/TheJimmyDoreShow',
-            'duration': 213.0,
             'availability': 'public',
             'live_status': 'not_live',
             'dislike_count': int,
diff --git a/yt_dlp/extractor/s4c.py b/yt_dlp/extractor/s4c.py
index 990ea2b447..67eff723b1 100644
--- a/yt_dlp/extractor/s4c.py
+++ b/yt_dlp/extractor/s4c.py
@@ -78,7 +78,6 @@ class S4CSeriesIE(InfoExtractor):
         'info_dict': {
             'id': '864982911',
             'title': 'Iaith ar Daith',
-            'description': 'md5:e878ebf660dce89bd2ef521d7ce06397'
         },
     }, {
         'url': 'https://www.s4c.cymru/clic/series/866852587',
@@ -86,7 +85,6 @@ class S4CSeriesIE(InfoExtractor):
         'info_dict': {
             'id': '866852587',
             'title': 'FFIT Cymru',
-            'description': 'md5:abcb3c129cb68dbb6cd304fd33b07e96'
         },
     }]
 
diff --git a/yt_dlp/extractor/sovietscloset.py b/yt_dlp/extractor/sovietscloset.py
index 453016ccb3..493eea2a69 100644
--- a/yt_dlp/extractor/sovietscloset.py
+++ b/yt_dlp/extractor/sovietscloset.py
@@ -76,7 +76,6 @@ class SovietsClosetIE(SovietsClosetBaseIE):
                 'title': 'Arma 3 - Zeus Games #5',
                 'uploader': 'SovietWomble',
                 'thumbnail': r're:^https?://.*\.b-cdn\.net/c0e5e76f-3a93-40b4-bf01-12343c2eec5d/thumbnail\.jpg$',
-                'uploader': 'SovietWomble',
                 'creator': 'SovietWomble',
                 'release_timestamp': 1461157200,
                 'release_date': '20160420',
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index 023d8fd8c1..a39d17cf11 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -902,7 +902,7 @@ def extract_relative_time(relative_time_text):
         e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today', '8 yr ago'
         """
 
-        # XXX: this could be moved to a general function in utils.py
+        # XXX: this could be moved to a general function in utils/_utils.py
         # The relative time text strings are roughly the same as what
         # Javascript's Intl.RelativeTimeFormat function generates.
         # See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/RelativeTimeFormat
diff --git a/yt_dlp/networking/__init__.py b/yt_dlp/networking/__init__.py
index 5e88764844..5b1599a6dc 100644
--- a/yt_dlp/networking/__init__.py
+++ b/yt_dlp/networking/__init__.py
@@ -1,4 +1,4 @@
-# flake8: noqa: 401
+# flake8: noqa: F401
 from .common import (
     HEADRequest,
     PUTRequest,
diff --git a/yt_dlp/networking/_urllib.py b/yt_dlp/networking/_urllib.py
index c327f7744e..9e2bf33e45 100644
--- a/yt_dlp/networking/_urllib.py
+++ b/yt_dlp/networking/_urllib.py
@@ -337,7 +337,7 @@ def handle_sslerror(e: ssl.SSLError):
 
 def handle_response_read_exceptions(e):
     if isinstance(e, http.client.IncompleteRead):
-        raise IncompleteRead(partial=e.partial, cause=e, expected=e.expected) from e
+        raise IncompleteRead(partial=len(e.partial), cause=e, expected=e.expected) from e
     elif isinstance(e, ssl.SSLError):
         handle_sslerror(e)
     elif isinstance(e, (OSError, EOFError, http.client.HTTPException, *CONTENT_DECODE_ERRORS)):
diff --git a/yt_dlp/networking/exceptions.py b/yt_dlp/networking/exceptions.py
index 465b18ba94..f58dc246e6 100644
--- a/yt_dlp/networking/exceptions.py
+++ b/yt_dlp/networking/exceptions.py
@@ -75,10 +75,10 @@ def __repr__(self):
 
 
 class IncompleteRead(TransportError):
-    def __init__(self, partial, expected=None, **kwargs):
+    def __init__(self, partial: int, expected: int = None, **kwargs):
         self.partial = partial
         self.expected = expected
-        msg = f'{len(partial)} bytes read'
+        msg = f'{partial} bytes read'
         if expected is not None:
             msg += f', {expected} more expected'
 

From eaee21bf71889d495076037cbe590c8c0b21ef3a Mon Sep 17 00:00:00 2001
From: garret <garret1317@yandex.com>
Date: Sat, 23 Sep 2023 23:13:48 +0100
Subject: [PATCH 187/218] [ie/Monstercat] Add extractor (#8133)

Closes #8067
Authored by: garret1317
---
 yt_dlp/extractor/_extractors.py |  1 +
 yt_dlp/extractor/monstercat.py  | 79 +++++++++++++++++++++++++++++++++
 2 files changed, 80 insertions(+)
 create mode 100644 yt_dlp/extractor/monstercat.py

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 9cda06d8fa..691cac339f 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1126,6 +1126,7 @@
     MofosexEmbedIE,
 )
 from .mojvideo import MojvideoIE
+from .monstercat import MonstercatIE
 from .morningstar import MorningstarIE
 from .motherless import (
     MotherlessIE,
diff --git a/yt_dlp/extractor/monstercat.py b/yt_dlp/extractor/monstercat.py
new file mode 100644
index 0000000000..7f04825fcd
--- /dev/null
+++ b/yt_dlp/extractor/monstercat.py
@@ -0,0 +1,79 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    clean_html,
+    extract_attributes,
+    get_element_by_class,
+    get_element_html_by_class,
+    get_element_text_and_html_by_tag,
+    int_or_none,
+    unified_strdate,
+    strip_or_none,
+    traverse_obj,
+    try_call,
+)
+
+
+class MonstercatIE(InfoExtractor):
+    _VALID_URL = r'https://www\.monstercat\.com/release/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://www.monstercat.com/release/742779548009',
+        'playlist_count': 20,
+        'info_dict': {
+            'title': 'The Secret Language of Trees',
+            'id': '742779548009',
+            'thumbnail': 'https://www.monstercat.com/release/742779548009/cover',
+            'release_year': 2023,
+            'release_date': '20230711',
+            'album': 'The Secret Language of Trees',
+            'album_artist': 'BT',
+        }
+    }]
+
+    def _extract_tracks(self, table, album_meta):
+        for td in re.findall(r'<tr[^<]*>((?:(?!</tr>)[\w\W])+)', table):  # regex by chatgpt due to lack of get_elements_by_tag
+            title = clean_html(try_call(
+                lambda: get_element_by_class('d-inline-flex flex-column', td).partition(' <span')[0]))
+            ids = extract_attributes(try_call(lambda: get_element_html_by_class('btn-play cursor-pointer mr-small', td)) or '')
+            track_id = ids.get('data-track-id')
+            release_id = ids.get('data-release-id')
+
+            track_number = int_or_none(try_call(lambda: get_element_by_class('py-xsmall', td)))
+            if not track_id or not release_id:
+                self.report_warning(f'Skipping track {track_number}, ID(s) not found')
+                self.write_debug(f'release_id={repr(release_id)} track_id={repr(track_id)}')
+                continue
+            yield {
+                **album_meta,
+                'title': title,
+                'track': title,
+                'track_number': track_number,
+                'artist': clean_html(try_call(lambda: get_element_by_class('d-block fs-xxsmall', td))),
+                'url': f'https://www.monstercat.com/api/release/{release_id}/track-stream/{track_id}',
+                'id': track_id,
+                'ext': 'mp3'
+            }
+
+    def _real_extract(self, url):
+        url_id = self._match_id(url)
+        html = self._download_webpage(url, url_id)
+        # wrap all `get_elements` in `try_call`, HTMLParser has problems with site's html
+        tracklist_table = try_call(lambda: get_element_by_class('table table-small', html)) or ''
+
+        title = try_call(lambda: get_element_text_and_html_by_tag('h1', html)[0])
+        date = traverse_obj(html, ({lambda html: get_element_by_class('font-italic mb-medium d-tablet-none d-phone-block',
+                            html).partition('Released ')}, 2, {strip_or_none}, {unified_strdate}))
+
+        album_meta = {
+            'title': title,
+            'album': title,
+            'thumbnail': f'https://www.monstercat.com/release/{url_id}/cover',
+            'album_artist': try_call(
+                lambda: get_element_by_class('h-normal text-uppercase mb-desktop-medium mb-smallish', html)),
+            'release_year': int_or_none(date[:4]) if date else None,
+            'release_date': date,
+        }
+
+        return self.playlist_result(
+            self._extract_tracks(tracklist_table, album_meta), playlist_id=url_id, **album_meta)

From 6636021206dad17c7745ae6bce6cb73d6f2ef319 Mon Sep 17 00:00:00 2001
From: Mozi <29089388+pzhlkj6612@users.noreply.github.com>
Date: Sun, 24 Sep 2023 06:15:01 +0800
Subject: [PATCH 188/218] [ie/PIAULIZAPortal] Add extractor (#7903)

Authored by: pzhlkj6612
---
 yt_dlp/extractor/_extractors.py    |  1 +
 yt_dlp/extractor/piaulizaportal.py | 70 ++++++++++++++++++++++++++++++
 2 files changed, 71 insertions(+)
 create mode 100644 yt_dlp/extractor/piaulizaportal.py

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 691cac339f..49c35cf713 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1452,6 +1452,7 @@
 from .phoenix import PhoenixIE
 from .photobucket import PhotobucketIE
 from .piapro import PiaproIE
+from .piaulizaportal import PIAULIZAPortalIE
 from .picarto import (
     PicartoIE,
     PicartoVodIE,
diff --git a/yt_dlp/extractor/piaulizaportal.py b/yt_dlp/extractor/piaulizaportal.py
new file mode 100644
index 0000000000..1eb6d92b72
--- /dev/null
+++ b/yt_dlp/extractor/piaulizaportal.py
@@ -0,0 +1,70 @@
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    parse_qs,
+    time_seconds,
+    traverse_obj,
+)
+
+
+class PIAULIZAPortalIE(InfoExtractor):
+    IE_DESC = 'ulizaportal.jp - PIA LIVE STREAM'
+    _VALID_URL = r'https?://(?:www\.)?ulizaportal\.jp/pages/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
+    _TESTS = [{
+        'url': 'https://ulizaportal.jp/pages/005f18b7-e810-5618-cb82-0987c5755d44',
+        'info_dict': {
+            'id': '005f18b7-e810-5618-cb82-0987c5755d44',
+            'title': 'プレゼンテーションプレイヤーのサンプル',
+            'live_status': 'not_live',
+        },
+        'params': {
+            'skip_download': True,
+            'ignore_no_formats_error': True,
+        },
+    }, {
+        'url': 'https://ulizaportal.jp/pages/005e1b23-fe93-5780-19a0-98e917cc4b7d?expires=4102412400&signature=f422a993b683e1068f946caf406d211c17d1ef17da8bef3df4a519502155aa91&version=1',
+        'info_dict': {
+            'id': '005e1b23-fe93-5780-19a0-98e917cc4b7d',
+            'title': '【確認用】視聴サンプルページ（ULIZA）',
+            'live_status': 'not_live',
+        },
+        'params': {
+            'skip_download': True,
+            'ignore_no_formats_error': True,
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        expires = int_or_none(traverse_obj(parse_qs(url), ('expires', 0)))
+        if expires and expires <= time_seconds():
+            raise ExtractorError('The link is expired.', video_id=video_id, expected=True)
+
+        webpage = self._download_webpage(url, video_id)
+
+        player_data = self._download_webpage(
+            self._search_regex(
+                r'<script [^>]*\bsrc="(https://player-api\.p\.uliza\.jp/v1/players/[^"]+)"',
+                webpage, 'player data url'),
+            video_id, headers={'Referer': 'https://ulizaportal.jp/'},
+            note='Fetching player data', errnote='Unable to fetch player data')
+
+        formats = self._extract_m3u8_formats(
+            self._search_regex(
+                r'["\'](https://vms-api\.p\.uliza\.jp/v1/prog-index\.m3u8[^"\']+)', player_data,
+                'm3u8 url', default=None),
+            video_id, fatal=False)
+        m3u8_type = self._search_regex(
+            r'/hls/(dvr|video)/', traverse_obj(formats, (0, 'url')), 'm3u8 type', default=None)
+
+        return {
+            'id': video_id,
+            'title': self._html_extract_title(webpage),
+            'formats': formats,
+            'live_status': {
+                'video': 'is_live',
+                'dvr': 'was_live',  # short-term archives
+            }.get(m3u8_type, 'not_live'),  # VOD or long-term archives
+        }

From 15591940ff102d1ae337d603a46d8f238c83a61f Mon Sep 17 00:00:00 2001
From: garret <garret1317@yandex.com>
Date: Sat, 23 Sep 2023 23:27:13 +0100
Subject: [PATCH 189/218] [ie/cineverse] Add extractors (#8146)

Also removes AsianCrushIE and AsianCrushPlaylistIE (URLs do not work anymore & old IDs are unavailable).

Closes #8109
Authored by: garret1317
---
 yt_dlp/extractor/_extractors.py |   8 +-
 yt_dlp/extractor/asiancrush.py  | 196 --------------------------------
 yt_dlp/extractor/cineverse.py   | 136 ++++++++++++++++++++++
 3 files changed, 140 insertions(+), 200 deletions(-)
 delete mode 100644 yt_dlp/extractor/asiancrush.py
 create mode 100644 yt_dlp/extractor/cineverse.py

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 49c35cf713..2535ed929a 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -137,10 +137,6 @@
     ArteTVCategoryIE,
 )
 from .arnes import ArnesIE
-from .asiancrush import (
-    AsianCrushIE,
-    AsianCrushPlaylistIE,
-)
 from .atresplayer import AtresPlayerIE
 from .atscaleconf import AtScaleConfEventIE
 from .atttechchannel import ATTTechChannelIE
@@ -358,6 +354,10 @@
 from .cinchcast import CinchcastIE
 from .cinemax import CinemaxIE
 from .cinetecamilano import CinetecaMilanoIE
+from .cineverse import (
+    CineverseIE,
+    CineverseDetailsIE,
+)
 from .ciscolive import (
     CiscoLiveSessionIE,
     CiscoLiveSearchIE,
diff --git a/yt_dlp/extractor/asiancrush.py b/yt_dlp/extractor/asiancrush.py
deleted file mode 100644
index 23f310edb3..0000000000
--- a/yt_dlp/extractor/asiancrush.py
+++ /dev/null
@@ -1,196 +0,0 @@
-import functools
-import re
-
-from .common import InfoExtractor
-from .kaltura import KalturaIE
-from ..utils import (
-    extract_attributes,
-    int_or_none,
-    OnDemandPagedList,
-    parse_age_limit,
-    strip_or_none,
-    try_get,
-)
-
-
-class AsianCrushBaseIE(InfoExtractor):
-    _VALID_URL_BASE = r'https?://(?:www\.)?(?P<host>(?:(?:asiancrush|yuyutv|midnightpulp)\.com|(?:cocoro|retrocrush)\.tv))'
-    _KALTURA_KEYS = [
-        'video_url', 'progressive_url', 'download_url', 'thumbnail_url',
-        'widescreen_thumbnail_url', 'screencap_widescreen',
-    ]
-    _API_SUFFIX = {'retrocrush.tv': '-ott'}
-
-    def _call_api(self, host, endpoint, video_id, query, resource):
-        return self._download_json(
-            'https://api%s.%s/%s' % (self._API_SUFFIX.get(host, ''), host, endpoint), video_id,
-            'Downloading %s JSON metadata' % resource, query=query,
-            headers=self.geo_verification_headers())['objects']
-
-    def _download_object_data(self, host, object_id, resource):
-        return self._call_api(
-            host, 'search', object_id, {'id': object_id}, resource)[0]
-
-    def _get_object_description(self, obj):
-        return strip_or_none(obj.get('long_description') or obj.get('short_description'))
-
-    def _parse_video_data(self, video):
-        title = video['name']
-
-        entry_id, partner_id = [None] * 2
-        for k in self._KALTURA_KEYS:
-            k_url = video.get(k)
-            if k_url:
-                mobj = re.search(r'/p/(\d+)/.+?/entryId/([^/]+)/', k_url)
-                if mobj:
-                    partner_id, entry_id = mobj.groups()
-                    break
-
-        meta_categories = try_get(video, lambda x: x['meta']['categories'], list) or []
-        categories = list(filter(None, [c.get('name') for c in meta_categories]))
-
-        show_info = video.get('show_info') or {}
-
-        return {
-            '_type': 'url_transparent',
-            'url': 'kaltura:%s:%s' % (partner_id, entry_id),
-            'ie_key': KalturaIE.ie_key(),
-            'id': entry_id,
-            'title': title,
-            'description': self._get_object_description(video),
-            'age_limit': parse_age_limit(video.get('mpaa_rating') or video.get('tv_rating')),
-            'categories': categories,
-            'series': show_info.get('show_name'),
-            'season_number': int_or_none(show_info.get('season_num')),
-            'season_id': show_info.get('season_id'),
-            'episode_number': int_or_none(show_info.get('episode_num')),
-        }
-
-
-class AsianCrushIE(AsianCrushBaseIE):
-    _VALID_URL = r'%s/video/(?:[^/]+/)?0+(?P<id>\d+)v\b' % AsianCrushBaseIE._VALID_URL_BASE
-    _TESTS = [{
-        'url': 'https://www.asiancrush.com/video/004289v/women-who-flirt',
-        'md5': 'c3b740e48d0ba002a42c0b72857beae6',
-        'info_dict': {
-            'id': '1_y4tmjm5r',
-            'ext': 'mp4',
-            'title': 'Women Who Flirt',
-            'description': 'md5:b65c7e0ae03a85585476a62a186f924c',
-            'timestamp': 1496936429,
-            'upload_date': '20170608',
-            'uploader_id': 'craig@crifkin.com',
-            'age_limit': 13,
-            'categories': 'count:5',
-            'duration': 5812,
-        },
-    }, {
-        'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/',
-        'only_matching': True,
-    }, {
-        'url': 'https://www.yuyutv.com/video/013886v/the-act-of-killing/',
-        'only_matching': True,
-    }, {
-        'url': 'https://www.yuyutv.com/video/peep-show/013922v-warring-factions/',
-        'only_matching': True,
-    }, {
-        'url': 'https://www.midnightpulp.com/video/010400v/drifters/',
-        'only_matching': True,
-    }, {
-        'url': 'https://www.midnightpulp.com/video/mononoke/016378v-zashikiwarashi-part-1/',
-        'only_matching': True,
-    }, {
-        'url': 'https://www.cocoro.tv/video/the-wonderful-wizard-of-oz/008878v-the-wonderful-wizard-of-oz-ep01/',
-        'only_matching': True,
-    }, {
-        'url': 'https://www.retrocrush.tv/video/true-tears/012328v-i...gave-away-my-tears',
-        'only_matching': True,
-    }]
-
-    def _real_extract(self, url):
-        host, video_id = self._match_valid_url(url).groups()
-
-        if host == 'cocoro.tv':
-            webpage = self._download_webpage(url, video_id)
-            embed_vars = self._parse_json(self._search_regex(
-                r'iEmbedVars\s*=\s*({.+?})', webpage, 'embed vars',
-                default='{}'), video_id, fatal=False) or {}
-            video_id = embed_vars.get('entry_id') or video_id
-
-        video = self._download_object_data(host, video_id, 'video')
-        return self._parse_video_data(video)
-
-
-class AsianCrushPlaylistIE(AsianCrushBaseIE):
-    _VALID_URL = r'%s/series/0+(?P<id>\d+)s\b' % AsianCrushBaseIE._VALID_URL_BASE
-    _TESTS = [{
-        'url': 'https://www.asiancrush.com/series/006447s/fruity-samurai',
-        'info_dict': {
-            'id': '6447',
-            'title': 'Fruity Samurai',
-            'description': 'md5:7535174487e4a202d3872a7fc8f2f154',
-        },
-        'playlist_count': 13,
-    }, {
-        'url': 'https://www.yuyutv.com/series/013920s/peep-show/',
-        'only_matching': True,
-    }, {
-        'url': 'https://www.midnightpulp.com/series/016375s/mononoke/',
-        'only_matching': True,
-    }, {
-        'url': 'https://www.cocoro.tv/series/008549s/the-wonderful-wizard-of-oz/',
-        'only_matching': True,
-    }, {
-        'url': 'https://www.retrocrush.tv/series/012355s/true-tears',
-        'only_matching': True,
-    }]
-    _PAGE_SIZE = 1000000000
-
-    def _fetch_page(self, domain, parent_id, page):
-        videos = self._call_api(
-            domain, 'getreferencedobjects', parent_id, {
-                'max': self._PAGE_SIZE,
-                'object_type': 'video',
-                'parent_id': parent_id,
-                'start': page * self._PAGE_SIZE,
-            }, 'page %d' % (page + 1))
-        for video in videos:
-            yield self._parse_video_data(video)
-
-    def _real_extract(self, url):
-        host, playlist_id = self._match_valid_url(url).groups()
-
-        if host == 'cocoro.tv':
-            webpage = self._download_webpage(url, playlist_id)
-
-            entries = []
-
-            for mobj in re.finditer(
-                    r'<a[^>]+href=(["\'])(?P<url>%s.*?)\1[^>]*>' % AsianCrushIE._VALID_URL,
-                    webpage):
-                attrs = extract_attributes(mobj.group(0))
-                if attrs.get('class') == 'clearfix':
-                    entries.append(self.url_result(
-                        mobj.group('url'), ie=AsianCrushIE.ie_key()))
-
-            title = self._html_search_regex(
-                r'(?s)<h1\b[^>]\bid=["\']movieTitle[^>]+>(.+?)</h1>', webpage,
-                'title', default=None) or self._og_search_title(
-                webpage, default=None) or self._html_search_meta(
-                'twitter:title', webpage, 'title',
-                default=None) or self._html_extract_title(webpage)
-            if title:
-                title = re.sub(r'\s*\|\s*.+?$', '', title)
-
-            description = self._og_search_description(
-                webpage, default=None) or self._html_search_meta(
-                'twitter:description', webpage, 'description', fatal=False)
-        else:
-            show = self._download_object_data(host, playlist_id, 'show')
-            title = show.get('name')
-            description = self._get_object_description(show)
-            entries = OnDemandPagedList(
-                functools.partial(self._fetch_page, host, playlist_id),
-                self._PAGE_SIZE)
-
-        return self.playlist_result(entries, playlist_id, title, description)
diff --git a/yt_dlp/extractor/cineverse.py b/yt_dlp/extractor/cineverse.py
new file mode 100644
index 0000000000..c9fa789b78
--- /dev/null
+++ b/yt_dlp/extractor/cineverse.py
@@ -0,0 +1,136 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    filter_dict,
+    int_or_none,
+    parse_age_limit,
+    smuggle_url,
+    traverse_obj,
+    unsmuggle_url,
+    url_or_none,
+)
+
+
+class CineverseBaseIE(InfoExtractor):
+    _VALID_URL_BASE = r'https://www\.(?P<host>%s)' % '|'.join(map(re.escape, (
+        'cineverse.com',
+        'asiancrush.com',
+        'dovechannel.com',
+        'screambox.com',
+        'midnightpulp.com',
+        'fandor.com',
+        'retrocrush.tv',
+    )))
+
+
+class CineverseIE(CineverseBaseIE):
+    _VALID_URL = rf'{CineverseBaseIE._VALID_URL_BASE}/watch/(?P<id>[A-Z0-9]+)'
+    _TESTS = [{
+        'url': 'https://www.asiancrush.com/watch/DMR00018919/Women-Who-Flirt',
+        'skip': 'geo-blocked',
+        'info_dict': {
+            'title': 'Women Who Flirt',
+            'ext': 'mp4',
+            'id': 'DMR00018919',
+            'modified_timestamp': 1678744575289,
+            'cast': ['Xun Zhou', 'Xiaoming Huang', 'Yi-Lin Sie', 'Sonia Sui', 'Quniciren'],
+            'duration': 5811.597,
+            'description': 'md5:892fd62a05611d394141e8394ace0bc6',
+            'age_limit': 13,
+        }
+    }, {
+        'url': 'https://www.retrocrush.tv/watch/1000000023016/Archenemy! Crystal Bowie',
+        'skip': 'geo-blocked',
+        'info_dict': {
+            'title': 'Archenemy! Crystal Bowie',
+            'ext': 'mp4',
+            'id': '1000000023016',
+            'episode_number': 3,
+            'season_number': 1,
+            'cast': ['Nachi Nozawa', 'Yoshiko Sakakibara', 'Toshiko Fujita'],
+            'age_limit': 0,
+            'episode': 'Episode 3',
+            'season': 'Season 1',
+            'duration': 1485.067,
+            'description': 'Cobra meets a beautiful bounty hunter by the name of Jane Royal.',
+            'series': 'Space Adventure COBRA (Original Japanese)',
+        }
+    }]
+
+    def _real_extract(self, url):
+        url, smuggled_data = unsmuggle_url(url, default={})
+        self._initialize_geo_bypass({
+            'countries': smuggled_data.get('geo_countries'),
+        })
+        video_id = self._match_id(url)
+        html = self._download_webpage(url, video_id)
+        idetails = self._search_nextjs_data(html, video_id)['props']['pageProps']['idetails']
+
+        if idetails.get('err_code') == 1200:
+            self.raise_geo_restricted(
+                'This video is not available from your location due to geo restriction. '
+                'You may be able to bypass it by using the /details/ page instead of the /watch/ page',
+                countries=smuggled_data.get('geo_countries'))
+
+        return {
+            'subtitles': filter_dict({
+                'en': traverse_obj(idetails, (('cc_url_vtt', 'subtitle_url'), {'url': {url_or_none}})) or None,
+            }),
+            'formats': self._extract_m3u8_formats(idetails['url'], video_id),
+            **traverse_obj(idetails, {
+                'title': 'title',
+                'id': ('details', 'item_id'),
+                'description': ('details', 'description'),
+                'duration': ('duration', {lambda x: x / 1000}),
+                'cast': ('details', 'cast', {lambda x: x.split(', ')}),
+                'modified_timestamp': ('details', 'updated_by', 0, 'update_time', 'time', {int_or_none}),
+                'season_number': ('details', 'season', {int_or_none}),
+                'episode_number': ('details', 'episode', {int_or_none}),
+                'age_limit': ('details', 'rating_code', {parse_age_limit}),
+                'series': ('details', 'series_details', 'title'),
+            }),
+        }
+
+
+class CineverseDetailsIE(CineverseBaseIE):
+    _VALID_URL = rf'{CineverseBaseIE._VALID_URL_BASE}/details/(?P<id>[A-Z0-9]+)'
+    _TESTS = [{
+        'url': 'https://www.retrocrush.tv/details/1000000023012/Space-Adventure-COBRA-(Original-Japanese)',
+        'playlist_mincount': 30,
+        'info_dict': {
+            'title': 'Space Adventure COBRA (Original Japanese)',
+            'id': '1000000023012',
+        }
+    }, {
+        'url': 'https://www.asiancrush.com/details/NNVG4938/Hansel-and-Gretel',
+        'info_dict': {
+            'id': 'NNVG4938',
+            'ext': 'mp4',
+            'title': 'Hansel and Gretel',
+            'description': 'md5:e3e4c35309c2e82aee044f972c2fb05d',
+            'cast': ['Jeong-myeong Cheon', 'Eun Won-jae', 'Shim Eun-gyeong', 'Ji-hee Jin', 'Hee-soon Park', 'Lydia Park', 'Kyeong-ik Kim'],
+            'duration': 7030.732,
+        },
+    }]
+
+    def _real_extract(self, url):
+        host, series_id = self._match_valid_url(url).group('host', 'id')
+        html = self._download_webpage(url, series_id)
+        pageprops = self._search_nextjs_data(html, series_id)['props']['pageProps']
+
+        geo_countries = traverse_obj(pageprops, ('itemDetailsData', 'geo_country', {lambda x: x.split(', ')}))
+        geoblocked = traverse_obj(pageprops, (
+            'itemDetailsData', 'playback_err_msg')) == 'This title is not available in your location.'
+
+        def item_result(item):
+            item_url = f'https://www.{host}/watch/{item["item_id"]}/{item["title"]}'
+            if geoblocked:
+                item_url = smuggle_url(item_url, {'geo_countries': geo_countries})
+            return self.url_result(item_url, CineverseIE)
+
+        season = traverse_obj(pageprops, ('seasonEpisodes', ..., 'episodes', lambda _, v: v['item_id'] and v['title']))
+        if season:
+            return self.playlist_result([item_result(ep) for ep in season], playlist_id=series_id,
+                                        playlist_title=traverse_obj(pageprops, ('itemDetailsData', 'title')))
+        return item_result(pageprops['itemDetailsData'])

From 698beb9a497f51693e64d167e572ff9efa4bc25f Mon Sep 17 00:00:00 2001
From: Mozi <29089388+pzhlkj6612@users.noreply.github.com>
Date: Sun, 24 Sep 2023 06:36:34 +0800
Subject: [PATCH 190/218] [ie/niconicochannelplus] Add extractors (#5686)

Closes #2537
Authored by: pzhlkj6612
---
 README.md                               |   3 +
 yt_dlp/extractor/_extractors.py         |   5 +
 yt_dlp/extractor/niconicochannelplus.py | 426 ++++++++++++++++++++++++
 3 files changed, 434 insertions(+)
 create mode 100644 yt_dlp/extractor/niconicochannelplus.py

diff --git a/README.md b/README.md
index d9b11952de..40515d48dc 100644
--- a/README.md
+++ b/README.md
@@ -1845,6 +1845,9 @@ #### hotstar
 * `vcodec`: vcodec to ignore - one or more of `h264`, `h265`, `dvh265`
 * `dr`: dynamic range to ignore - one or more of `sdr`, `hdr10`, `dv`
 
+#### niconicochannelplus
+* `max_comments`: Maximum number of comments to extract - default is `120`
+
 #### tiktok
 * `api_hostname`: Hostname to use for mobile API requests, e.g. `api-h2.tiktokv.com`
 * `app_version`: App version to call mobile APIs with - should be set along with `manifest_app_version`, e.g. `20.2.1`
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 2535ed929a..f325864d15 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1299,6 +1299,11 @@
     NineCNineMediaIE,
     CPTwentyFourIE,
 )
+from .niconicochannelplus import (
+    NiconicoChannelPlusIE,
+    NiconicoChannelPlusChannelVideosIE,
+    NiconicoChannelPlusChannelLivesIE,
+)
 from .ninegag import NineGagIE
 from .ninenow import NineNowIE
 from .nintendo import NintendoIE
diff --git a/yt_dlp/extractor/niconicochannelplus.py b/yt_dlp/extractor/niconicochannelplus.py
new file mode 100644
index 0000000000..89af3f7b53
--- /dev/null
+++ b/yt_dlp/extractor/niconicochannelplus.py
@@ -0,0 +1,426 @@
+import functools
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    OnDemandPagedList,
+    filter_dict,
+    int_or_none,
+    parse_qs,
+    str_or_none,
+    traverse_obj,
+    unified_timestamp,
+    url_or_none,
+)
+
+
+class NiconicoChannelPlusBaseIE(InfoExtractor):
+    _WEBPAGE_BASE_URL = 'https://nicochannel.jp'
+
+    def _call_api(self, path, item_id, *args, **kwargs):
+        return self._download_json(
+            f'https://nfc-api.nicochannel.jp/fc/{path}', video_id=item_id, *args, **kwargs)
+
+    def _find_fanclub_site_id(self, channel_name):
+        fanclub_list_json = self._call_api(
+            'content_providers/channels', item_id=f'channels/{channel_name}',
+            note='Fetching channel list', errnote='Unable to fetch channel list',
+        )['data']['content_providers']
+        fanclub_id = traverse_obj(fanclub_list_json, (
+            lambda _, v: v['domain'] == f'{self._WEBPAGE_BASE_URL}/{channel_name}', 'id'),
+            get_all=False)
+        if not fanclub_id:
+            raise ExtractorError(f'Channel {channel_name} does not exist', expected=True)
+        return fanclub_id
+
+    def _get_channel_base_info(self, fanclub_site_id):
+        return traverse_obj(self._call_api(
+            f'fanclub_sites/{fanclub_site_id}/page_base_info', item_id=f'fanclub_sites/{fanclub_site_id}',
+            note='Fetching channel base info', errnote='Unable to fetch channel base info', fatal=False,
+        ), ('data', 'fanclub_site', {dict})) or {}
+
+    def _get_channel_user_info(self, fanclub_site_id):
+        return traverse_obj(self._call_api(
+            f'fanclub_sites/{fanclub_site_id}/user_info', item_id=f'fanclub_sites/{fanclub_site_id}',
+            note='Fetching channel user info', errnote='Unable to fetch channel user info', fatal=False,
+            data=json.dumps('null').encode('ascii'),
+        ), ('data', 'fanclub_site', {dict})) or {}
+
+
+class NiconicoChannelPlusIE(NiconicoChannelPlusBaseIE):
+    IE_NAME = 'NiconicoChannelPlus'
+    IE_DESC = 'ニコニコチャンネルプラス'
+    _VALID_URL = r'https?://nicochannel\.jp/(?P<channel>[\w.-]+)/(?:video|live)/(?P<code>sm\w+)'
+    _TESTS = [{
+        'url': 'https://nicochannel.jp/kaorin/video/smsDd8EdFLcVZk9yyAhD6H7H',
+        'info_dict': {
+            'id': 'smsDd8EdFLcVZk9yyAhD6H7H',
+            'title': '前田佳織里はニコ生がしたい！',
+            'ext': 'mp4',
+            'channel': '前田佳織里の世界攻略計画',
+            'channel_id': 'kaorin',
+            'channel_url': 'https://nicochannel.jp/kaorin',
+            'live_status': 'not_live',
+            'thumbnail': 'https://nicochannel.jp/public_html/contents/video_pages/74/thumbnail_path',
+            'description': '２０２１年１１月に放送された\n「前田佳織里はニコ生がしたい！」アーカイブになります。',
+            'timestamp': 1641360276,
+            'duration': 4097,
+            'comment_count': int,
+            'view_count': int,
+            'tags': [],
+            'upload_date': '20220105',
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        # age limited video; test purpose channel.
+        'url': 'https://nicochannel.jp/testman/video/smDXbcrtyPNxLx9jc4BW69Ve',
+        'info_dict': {
+            'id': 'smDXbcrtyPNxLx9jc4BW69Ve',
+            'title': 'test oshiro',
+            'ext': 'mp4',
+            'channel': '本番チャンネルプラステストマン',
+            'channel_id': 'testman',
+            'channel_url': 'https://nicochannel.jp/testman',
+            'age_limit': 18,
+            'live_status': 'was_live',
+            'timestamp': 1666344616,
+            'duration': 86465,
+            'comment_count': int,
+            'view_count': int,
+            'tags': [],
+            'upload_date': '20221021',
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }]
+
+    def _real_extract(self, url):
+        content_code, channel_id = self._match_valid_url(url).group('code', 'channel')
+        fanclub_site_id = self._find_fanclub_site_id(channel_id)
+
+        data_json = self._call_api(
+            f'video_pages/{content_code}', item_id=content_code, headers={'fc_use_device': 'null'},
+            note='Fetching video page info', errnote='Unable to fetch video page info',
+        )['data']['video_page']
+
+        live_status, session_id = self._get_live_status_and_session_id(content_code, data_json)
+
+        release_timestamp_str = data_json.get('live_scheduled_start_at')
+
+        formats = []
+
+        if live_status == 'is_upcoming':
+            if release_timestamp_str:
+                msg = f'This live event will begin at {release_timestamp_str} UTC'
+            else:
+                msg = 'This event has not started yet'
+            self.raise_no_formats(msg, expected=True, video_id=content_code)
+        else:
+            formats = self._extract_m3u8_formats(
+                # "authenticated_url" is a format string that contains "{session_id}".
+                m3u8_url=data_json['video_stream']['authenticated_url'].format(session_id=session_id),
+                video_id=content_code)
+
+        return {
+            'id': content_code,
+            'formats': formats,
+            '_format_sort_fields': ('tbr', 'vcodec', 'acodec'),
+            'channel': self._get_channel_base_info(fanclub_site_id).get('fanclub_site_name'),
+            'channel_id': channel_id,
+            'channel_url': f'{self._WEBPAGE_BASE_URL}/{channel_id}',
+            'age_limit': traverse_obj(self._get_channel_user_info(fanclub_site_id), ('content_provider', 'age_limit')),
+            'live_status': live_status,
+            'release_timestamp': unified_timestamp(release_timestamp_str),
+            **traverse_obj(data_json, {
+                'title': ('title', {str}),
+                'thumbnail': ('thumbnail_url', {url_or_none}),
+                'description': ('description', {str}),
+                'timestamp': ('released_at', {unified_timestamp}),
+                'duration': ('active_video_filename', 'length', {int_or_none}),
+                'comment_count': ('video_aggregate_info', 'number_of_comments', {int_or_none}),
+                'view_count': ('video_aggregate_info', 'total_views', {int_or_none}),
+                'tags': ('video_tags', ..., 'tag', {str}),
+            }),
+            '__post_extractor': self.extract_comments(
+                content_code=content_code,
+                comment_group_id=traverse_obj(data_json, ('video_comment_setting', 'comment_group_id'))),
+        }
+
+    def _get_comments(self, content_code, comment_group_id):
+        item_id = f'{content_code}/comments'
+
+        if not comment_group_id:
+            return None
+
+        comment_access_token = self._call_api(
+            f'video_pages/{content_code}/comments_user_token', item_id,
+            note='Getting comment token', errnote='Unable to get comment token',
+        )['data']['access_token']
+
+        comment_list = self._download_json(
+            'https://comm-api.sheeta.com/messages.history', video_id=item_id,
+            note='Fetching comments', errnote='Unable to fetch comments',
+            headers={'Content-Type': 'application/json'},
+            query={
+                'sort_direction': 'asc',
+                'limit': int_or_none(self._configuration_arg('max_comments', [''])[0]) or 120,
+            },
+            data=json.dumps({
+                'token': comment_access_token,
+                'group_id': comment_group_id,
+            }).encode('ascii'))
+
+        for comment in traverse_obj(comment_list, ...):
+            yield traverse_obj(comment, {
+                'author': ('nickname', {str}),
+                'author_id': ('sender_id', {str_or_none}),
+                'id': ('id', {str_or_none}),
+                'text': ('message', {str}),
+                'timestamp': (('updated_at', 'sent_at', 'created_at'), {unified_timestamp}),
+                'author_is_uploader': ('sender_id', {lambda x: x == '-1'}),
+            }, get_all=False)
+
+    def _get_live_status_and_session_id(self, content_code, data_json):
+        video_type = data_json.get('type')
+        live_finished_at = data_json.get('live_finished_at')
+
+        payload = {}
+        if video_type == 'vod':
+            if live_finished_at:
+                live_status = 'was_live'
+            else:
+                live_status = 'not_live'
+        elif video_type == 'live':
+            if not data_json.get('live_started_at'):
+                return 'is_upcoming', ''
+
+            if not live_finished_at:
+                live_status = 'is_live'
+            else:
+                live_status = 'was_live'
+                payload = {'broadcast_type': 'dvr'}
+
+                video_allow_dvr_flg = traverse_obj(data_json, ('video', 'allow_dvr_flg'))
+                video_convert_to_vod_flg = traverse_obj(data_json, ('video', 'convert_to_vod_flg'))
+
+                self.write_debug(f'allow_dvr_flg = {video_allow_dvr_flg}, convert_to_vod_flg = {video_convert_to_vod_flg}.')
+
+                if not (video_allow_dvr_flg and video_convert_to_vod_flg):
+                    raise ExtractorError(
+                        'Live was ended, there is no video for download.', video_id=content_code, expected=True)
+        else:
+            raise ExtractorError(f'Unknown type: {video_type}', video_id=content_code, expected=False)
+
+        self.write_debug(f'{content_code}: video_type={video_type}, live_status={live_status}')
+
+        session_id = self._call_api(
+            f'video_pages/{content_code}/session_ids', item_id=f'{content_code}/session',
+            data=json.dumps(payload).encode('ascii'), headers={
+                'Content-Type': 'application/json',
+                'fc_use_device': 'null',
+                'origin': 'https://nicochannel.jp',
+            },
+            note='Getting session id', errnote='Unable to get session id',
+        )['data']['session_id']
+
+        return live_status, session_id
+
+
+class NiconicoChannelPlusChannelBaseIE(NiconicoChannelPlusBaseIE):
+    _PAGE_SIZE = 12
+
+    def _fetch_paged_channel_video_list(self, path, query, channel_name, item_id, page):
+        response = self._call_api(
+            path, item_id, query={
+                **query,
+                'page': (page + 1),
+                'per_page': self._PAGE_SIZE,
+            },
+            headers={'fc_use_device': 'null'},
+            note=f'Getting channel info (page {page + 1})',
+            errnote=f'Unable to get channel info (page {page + 1})')
+
+        for content_code in traverse_obj(response, ('data', 'video_pages', 'list', ..., 'content_code')):
+            # "video/{content_code}" works for both VOD and live, but "live/{content_code}" doesn't work for VOD
+            yield self.url_result(
+                f'{self._WEBPAGE_BASE_URL}/{channel_name}/video/{content_code}', NiconicoChannelPlusIE)
+
+
+class NiconicoChannelPlusChannelVideosIE(NiconicoChannelPlusChannelBaseIE):
+    IE_NAME = 'NiconicoChannelPlus:channel:videos'
+    IE_DESC = 'ニコニコチャンネルプラス - チャンネル - 動画リスト. nicochannel.jp/channel/videos'
+    _VALID_URL = r'https?://nicochannel\.jp/(?P<id>[a-z\d\._-]+)/videos(?:\?.*)?'
+    _TESTS = [{
+        # query: None
+        'url': 'https://nicochannel.jp/testman/videos',
+        'info_dict': {
+            'id': 'testman-videos',
+            'title': '本番チャンネルプラステストマン-videos',
+        },
+        'playlist_mincount': 18,
+    }, {
+        # query: None
+        'url': 'https://nicochannel.jp/testtarou/videos',
+        'info_dict': {
+            'id': 'testtarou-videos',
+            'title': 'チャンネルプラステスト太郎-videos',
+        },
+        'playlist_mincount': 2,
+    }, {
+        # query: None
+        'url': 'https://nicochannel.jp/testjirou/videos',
+        'info_dict': {
+            'id': 'testjirou-videos',
+            'title': 'チャンネルプラステスト二郎-videos',
+        },
+        'playlist_mincount': 12,
+    }, {
+        # query: tag
+        'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8',
+        'info_dict': {
+            'id': 'testman-videos',
+            'title': '本番チャンネルプラステストマン-videos',
+        },
+        'playlist_mincount': 6,
+    }, {
+        # query: vodType
+        'url': 'https://nicochannel.jp/testman/videos?vodType=1',
+        'info_dict': {
+            'id': 'testman-videos',
+            'title': '本番チャンネルプラステストマン-videos',
+        },
+        'playlist_mincount': 18,
+    }, {
+        # query: sort
+        'url': 'https://nicochannel.jp/testman/videos?sort=-released_at',
+        'info_dict': {
+            'id': 'testman-videos',
+            'title': '本番チャンネルプラステストマン-videos',
+        },
+        'playlist_mincount': 18,
+    }, {
+        # query: tag, vodType
+        'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8&vodType=1',
+        'info_dict': {
+            'id': 'testman-videos',
+            'title': '本番チャンネルプラステストマン-videos',
+        },
+        'playlist_mincount': 6,
+    }, {
+        # query: tag, sort
+        'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8&sort=-released_at',
+        'info_dict': {
+            'id': 'testman-videos',
+            'title': '本番チャンネルプラステストマン-videos',
+        },
+        'playlist_mincount': 6,
+    }, {
+        # query: vodType, sort
+        'url': 'https://nicochannel.jp/testman/videos?vodType=1&sort=-released_at',
+        'info_dict': {
+            'id': 'testman-videos',
+            'title': '本番チャンネルプラステストマン-videos',
+        },
+        'playlist_mincount': 18,
+    }, {
+        # query: tag, vodType, sort
+        'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8&vodType=1&sort=-released_at',
+        'info_dict': {
+            'id': 'testman-videos',
+            'title': '本番チャンネルプラステストマン-videos',
+        },
+        'playlist_mincount': 6,
+    }]
+
+    def _real_extract(self, url):
+        """
+        API parameters:
+            sort:
+                -released_at         公開日が新しい順 (newest to oldest)
+                 released_at         公開日が古い順 (oldest to newest)
+                -number_of_vod_views 再生数が多い順 (most play count)
+                 number_of_vod_views コメントが多い順 (most comments)
+            vod_type (is "vodType" in "url"):
+                0 すべて (all)
+                1 会員限定 (members only)
+                2 一部無料 (partially free)
+                3 レンタル (rental)
+                4 生放送アーカイブ (live archives)
+                5 アップロード動画 (uploaded videos)
+        """
+
+        channel_id = self._match_id(url)
+        fanclub_site_id = self._find_fanclub_site_id(channel_id)
+        channel_name = self._get_channel_base_info(fanclub_site_id).get('fanclub_site_name')
+        qs = parse_qs(url)
+
+        return self.playlist_result(
+            OnDemandPagedList(
+                functools.partial(
+                    self._fetch_paged_channel_video_list, f'fanclub_sites/{fanclub_site_id}/video_pages',
+                    filter_dict({
+                        'tag': traverse_obj(qs, ('tag', 0)),
+                        'sort': traverse_obj(qs, ('sort', 0), default='-released_at'),
+                        'vod_type': traverse_obj(qs, ('vodType', 0), default='0'),
+                    }),
+                    channel_id, f'{channel_id}/videos'),
+                self._PAGE_SIZE),
+            playlist_id=f'{channel_id}-videos', playlist_title=f'{channel_name}-videos')
+
+
+class NiconicoChannelPlusChannelLivesIE(NiconicoChannelPlusChannelBaseIE):
+    IE_NAME = 'NiconicoChannelPlus:channel:lives'
+    IE_DESC = 'ニコニコチャンネルプラス - チャンネル - ライブリスト. nicochannel.jp/channel/lives'
+    _VALID_URL = r'https?://nicochannel\.jp/(?P<id>[a-z\d\._-]+)/lives'
+    _TESTS = [{
+        'url': 'https://nicochannel.jp/testman/lives',
+        'info_dict': {
+            'id': 'testman-lives',
+            'title': '本番チャンネルプラステストマン-lives',
+        },
+        'playlist_mincount': 18,
+    }, {
+        'url': 'https://nicochannel.jp/testtarou/lives',
+        'info_dict': {
+            'id': 'testtarou-lives',
+            'title': 'チャンネルプラステスト太郎-lives',
+        },
+        'playlist_mincount': 2,
+    }, {
+        'url': 'https://nicochannel.jp/testjirou/lives',
+        'info_dict': {
+            'id': 'testjirou-lives',
+            'title': 'チャンネルプラステスト二郎-lives',
+        },
+        'playlist_mincount': 6,
+    }]
+
+    def _real_extract(self, url):
+        """
+        API parameters:
+            live_type:
+                1 放送中 (on air)
+                2 放送予定 (scheduled live streams, oldest to newest)
+                3 過去の放送 - すべて (all ended live streams, newest to oldest)
+                4 過去の放送 - 生放送アーカイブ (all archives for live streams, oldest to newest)
+            We use "4" instead of "3" because some recently ended live streams could not be downloaded.
+        """
+
+        channel_id = self._match_id(url)
+        fanclub_site_id = self._find_fanclub_site_id(channel_id)
+        channel_name = self._get_channel_base_info(fanclub_site_id).get('fanclub_site_name')
+
+        return self.playlist_result(
+            OnDemandPagedList(
+                functools.partial(
+                    self._fetch_paged_channel_video_list, f'fanclub_sites/{fanclub_site_id}/live_pages',
+                    {
+                        'live_type': 4,
+                    },
+                    channel_id, f'{channel_id}/lives'),
+                self._PAGE_SIZE),
+            playlist_id=f'{channel_id}-lives', playlist_title=f'{channel_name}-lives')

From 92feb5654c5a4c81ba872904a618700fcbb3e546 Mon Sep 17 00:00:00 2001
From: Mozi <29089388+pzhlkj6612@users.noreply.github.com>
Date: Sun, 24 Sep 2023 07:42:29 +0800
Subject: [PATCH 191/218] [ie/brilliantpala] Add extractors (#6680)

Authored by: pzhlkj6612
---
 yt_dlp/extractor/_extractors.py   |   4 +
 yt_dlp/extractor/brilliantpala.py | 127 ++++++++++++++++++++++++++++++
 2 files changed, 131 insertions(+)
 create mode 100644 yt_dlp/extractor/brilliantpala.py

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index f325864d15..908abb8ace 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -271,6 +271,10 @@
     BrightcoveLegacyIE,
     BrightcoveNewIE,
 )
+from .brilliantpala import (
+    BrilliantpalaElearnIE,
+    BrilliantpalaClassesIE,
+)
 from .businessinsider import BusinessInsiderIE
 from .bundesliga import BundesligaIE
 from .buzzfeed import BuzzFeedIE
diff --git a/yt_dlp/extractor/brilliantpala.py b/yt_dlp/extractor/brilliantpala.py
new file mode 100644
index 0000000000..6fd5b81480
--- /dev/null
+++ b/yt_dlp/extractor/brilliantpala.py
@@ -0,0 +1,127 @@
+import hashlib
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    traverse_obj,
+    urlencode_postdata,
+)
+
+
+class BrilliantpalaBaseIE(InfoExtractor):
+    _NETRC_MACHINE = 'brilliantpala'
+    _DOMAIN = '{subdomain}.brilliantpala.org'
+
+    def _initialize_pre_login(self):
+        self._HOMEPAGE = f'https://{self._DOMAIN}'
+        self._LOGIN_API = f'{self._HOMEPAGE}/login/'
+        self._LOGOUT_DEVICES_API = f'{self._HOMEPAGE}/logout_devices/?next=/'
+        self._CONTENT_API = f'{self._HOMEPAGE}/api/v2.4/contents/{{content_id}}/'
+        self._HLS_AES_URI = f'{self._HOMEPAGE}/api/v2.5/video_contents/{{content_id}}/key/'
+
+    def _get_logged_in_username(self, url, video_id):
+        webpage, urlh = self._download_webpage_handle(url, video_id)
+        if self._LOGIN_API == urlh.url:
+            self.raise_login_required()
+        return self._html_search_regex(
+            r'"username"\s*:\s*"(?P<username>[^"]+)"', webpage, 'stream page info', 'username')
+
+    def _perform_login(self, username, password):
+        login_form = self._hidden_inputs(self._download_webpage(
+            self._LOGIN_API, None, 'Downloading login page'))
+        login_form.update({
+            'username': username,
+            'password': password,
+        })
+        self._set_cookie(self._DOMAIN, 'csrftoken', login_form['csrfmiddlewaretoken'])
+
+        logged_page = self._download_webpage(
+            self._LOGIN_API, None, note='Logging in', headers={'Referer': self._LOGIN_API},
+            data=urlencode_postdata(login_form))
+
+        if self._html_search_regex(
+                r'(Your username / email and password)', logged_page, 'auth fail', default=None):
+            raise ExtractorError('wrong username or password', expected=True)
+
+        # the maximum number of logins is one
+        if self._html_search_regex(
+                r'(Logout Other Devices)', logged_page, 'logout devices button', default=None):
+            logout_device_form = self._hidden_inputs(logged_page)
+            self._download_webpage(
+                self._LOGOUT_DEVICES_API, None, headers={'Referer': self._LOGIN_API},
+                note='Logging out other devices', data=urlencode_postdata(logout_device_form))
+
+    def _real_extract(self, url):
+        course_id, content_id = self._match_valid_url(url).group('course_id', 'content_id')
+        video_id = f'{course_id}-{content_id}'
+
+        username = self._get_logged_in_username(url, video_id)
+
+        content_json = self._download_json(
+            self._CONTENT_API.format(content_id=content_id), video_id,
+            note='Fetching content info', errnote='Unable to fetch content info')
+
+        entries = []
+        for stream in traverse_obj(content_json, ('video', 'streams', lambda _, v: v['id'] and v['url'])):
+            formats = self._extract_m3u8_formats(stream['url'], video_id, fatal=False)
+            if not formats:
+                continue
+            entries.append({
+                'id': str(stream['id']),
+                'title': content_json.get('title'),
+                'formats': formats,
+                'hls_aes': {'uri': self._HLS_AES_URI.format(content_id=content_id)},
+                'http_headers': {'X-Key': hashlib.sha256(username.encode('ascii')).hexdigest()},
+                'thumbnail': content_json.get('cover_image'),
+            })
+
+        return self.playlist_result(
+            entries, playlist_id=video_id, playlist_title=content_json.get('title'))
+
+
+class BrilliantpalaElearnIE(BrilliantpalaBaseIE):
+    IE_NAME = 'Brilliantpala:Elearn'
+    IE_DESC = 'VoD on elearn.brilliantpala.org'
+    _VALID_URL = r'https?://elearn\.brilliantpala\.org/courses/(?P<course_id>\d+)/contents/(?P<content_id>\d+)/?'
+    _TESTS = [{
+        'url': 'https://elearn.brilliantpala.org/courses/42/contents/12345/',
+        'only_matching': True,
+    }, {
+        'url': 'https://elearn.brilliantpala.org/courses/98/contents/36683/',
+        'info_dict': {
+            'id': '23577',
+            'ext': 'mp4',
+            'title': 'Physical World, Units and Measurements  - 1',
+            'thumbnail': 'https://d1j3vi2u94ebt0.cloudfront.net/institute/brilliantpalalms/chapter_contents/26237/e657f81b90874be19795c7ea081f8d5c.png',
+            'live_status': 'not_live',
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }]
+
+    _DOMAIN = BrilliantpalaBaseIE._DOMAIN.format(subdomain='elearn')
+
+
+class BrilliantpalaClassesIE(BrilliantpalaBaseIE):
+    IE_NAME = 'Brilliantpala:Classes'
+    IE_DESC = 'VoD on classes.brilliantpala.org'
+    _VALID_URL = r'https?://classes\.brilliantpala\.org/courses/(?P<course_id>\d+)/contents/(?P<content_id>\d+)/?'
+    _TESTS = [{
+        'url': 'https://classes.brilliantpala.org/courses/42/contents/12345/',
+        'only_matching': True,
+    }, {
+        'url': 'https://classes.brilliantpala.org/courses/416/contents/25445/',
+        'info_dict': {
+            'id': '9128',
+            'ext': 'mp4',
+            'title': 'Motion in a Straight Line - Class 1',
+            'thumbnail': 'https://d3e4y8hquds3ek.cloudfront.net/institute/brilliantpalaelearn/chapter_contents/ff5ba838d0ec43419f67387fe1a01fa8.png',
+            'live_status': 'not_live',
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }]
+
+    _DOMAIN = BrilliantpalaBaseIE._DOMAIN.format(subdomain='classes')

From 1eaca74bc2ca0f5b1ec532f24c61de44f2e8cb2d Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Sat, 23 Sep 2023 18:47:14 -0500
Subject: [PATCH 192/218] [ie/nfl.com:plus:replay] Fix extractor (#7838)

Closes #7836
Authored by: bashonly
---
 README.md               |   3 +
 yt_dlp/extractor/nfl.py | 236 ++++++++++++++++++++++++++--------------
 2 files changed, 157 insertions(+), 82 deletions(-)

diff --git a/README.md b/README.md
index 40515d48dc..7bf4465721 100644
--- a/README.md
+++ b/README.md
@@ -1868,6 +1868,9 @@ #### twitch
 #### nhkradirulive (NHK らじる★らじる LIVE)
 * `area`: Which regional variation to extract. Valid areas are: `sapporo`, `sendai`, `tokyo`, `nagoya`, `osaka`, `hiroshima`, `matsuyama`, `fukuoka`. Defaults to `tokyo`
 
+#### nflplusreplay
+* `type`: Type(s) of game replays to extract. Valid types are: `full_game`, `full_game_spanish`, `condensed_game` and `all_22`. You can use `all` to extract all available replay types, which is the default
+
 **Note**: These options may be changed/removed in the future without concern for backward compatibility
 
 <!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->
diff --git a/yt_dlp/extractor/nfl.py b/yt_dlp/extractor/nfl.py
index cc3f4495c1..bd060dba9d 100644
--- a/yt_dlp/extractor/nfl.py
+++ b/yt_dlp/extractor/nfl.py
@@ -64,6 +64,85 @@ class NFLBaseIE(InfoExtractor):
     _VIDEO_CONFIG_REGEX = r'<script[^>]+id="[^"]*video-config-[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}[^"]*"[^>]*>\s*({.+});?\s*</script>'
     _ANVATO_PREFIX = 'anvato:GXvEgwyJeWem8KCYXfeoHWknwP48Mboj:'
 
+    _CLIENT_DATA = {
+        'clientKey': '4cFUW6DmwJpzT9L7LrG3qRAcABG5s04g',
+        'clientSecret': 'CZuvCL49d9OwfGsR',
+        'deviceId': str(uuid.uuid4()),
+        'deviceInfo': base64.b64encode(json.dumps({
+            'model': 'desktop',
+            'version': 'Chrome',
+            'osName': 'Windows',
+            'osVersion': '10.0',
+        }, separators=(',', ':')).encode()).decode(),
+        'networkType': 'other',
+        'nflClaimGroupsToAdd': [],
+        'nflClaimGroupsToRemove': [],
+    }
+    _ACCOUNT_INFO = {}
+    _API_KEY = None
+
+    _TOKEN = None
+    _TOKEN_EXPIRY = 0
+
+    def _get_account_info(self, url, slug):
+        if not self._API_KEY:
+            webpage = self._download_webpage(url, slug, fatal=False) or ''
+            self._API_KEY = self._search_regex(
+                r'window\.gigyaApiKey\s*=\s*["\'](\w+)["\'];', webpage, 'API key',
+                fatal=False) or '3_Qa8TkWpIB8ESCBT8tY2TukbVKgO5F6BJVc7N1oComdwFzI7H2L9NOWdm11i_BY9f'
+
+        cookies = self._get_cookies('https://auth-id.nfl.com/')
+        login_token = traverse_obj(cookies, (
+            (f'glt_{self._API_KEY}', lambda k, _: k.startswith('glt_')), {lambda x: x.value}), get_all=False)
+        if not login_token:
+            self.raise_login_required()
+        if 'ucid' not in cookies:
+            raise ExtractorError(
+                'Required cookies for the auth-id.nfl.com domain were not found among passed cookies. '
+                'If using --cookies, these cookies must be exported along with .nfl.com cookies, '
+                'or else try using --cookies-from-browser instead', expected=True)
+
+        account = self._download_json(
+            'https://auth-id.nfl.com/accounts.getAccountInfo', slug,
+            note='Downloading account info', data=urlencode_postdata({
+                'include': 'profile,data',
+                'lang': 'en',
+                'APIKey': self._API_KEY,
+                'sdk': 'js_latest',
+                'login_token': login_token,
+                'authMode': 'cookie',
+                'pageURL': url,
+                'sdkBuild': traverse_obj(cookies, (
+                    'gig_canary_ver', {lambda x: x.value.partition('-')[0]}), default='15170'),
+                'format': 'json',
+            }), headers={'Content-Type': 'application/x-www-form-urlencoded'})
+
+        self._ACCOUNT_INFO = traverse_obj(account, {
+            'signatureTimestamp': 'signatureTimestamp',
+            'uid': 'UID',
+            'uidSignature': 'UIDSignature',
+        })
+
+        if len(self._ACCOUNT_INFO) != 3:
+            raise ExtractorError('Failed to retrieve account info with provided cookies', expected=True)
+
+    def _get_auth_token(self, url, slug):
+        if self._TOKEN and self._TOKEN_EXPIRY > int(time.time() + 30):
+            return
+
+        if not self._ACCOUNT_INFO:
+            self._get_account_info(url, slug)
+
+        token = self._download_json(
+            'https://api.nfl.com/identity/v3/token%s' % (
+                '/refresh' if self._ACCOUNT_INFO.get('refreshToken') else ''),
+            slug, headers={'Content-Type': 'application/json'}, note='Downloading access token',
+            data=json.dumps({**self._CLIENT_DATA, **self._ACCOUNT_INFO}, separators=(',', ':')).encode())
+
+        self._TOKEN = token['accessToken']
+        self._TOKEN_EXPIRY = token['expiresIn']
+        self._ACCOUNT_INFO['refreshToken'] = token['refreshToken']
+
     def _parse_video_config(self, video_config, display_id):
         video_config = self._parse_json(video_config, display_id)
         item = video_config['playlist'][0]
@@ -168,7 +247,7 @@ def _real_extract(self, url):
 
 class NFLPlusReplayIE(NFLBaseIE):
     IE_NAME = 'nfl.com:plus:replay'
-    _VALID_URL = r'https?://(?:www\.)?nfl.com/plus/games/[\w-]+/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?nfl.com/plus/games/(?P<slug>[\w-]+)(?:/(?P<id>\d+))?'
     _TESTS = [{
         'url': 'https://www.nfl.com/plus/games/giants-at-vikings-2022-post-1/1572108',
         'info_dict': {
@@ -185,23 +264,92 @@ class NFLPlusReplayIE(NFLBaseIE):
             'thumbnail': r're:^https?://.*\.jpg',
         },
         'params': {'skip_download': 'm3u8'},
+    }, {
+        'note': 'Subscription required',
+        'url': 'https://www.nfl.com/plus/games/giants-at-vikings-2022-post-1',
+        'playlist_count': 4,
+        'info_dict': {
+            'id': 'giants-at-vikings-2022-post-1',
+        },
+    }, {
+        'note': 'Subscription required',
+        'url': 'https://www.nfl.com/plus/games/giants-at-patriots-2011-pre-4',
+        'playlist_count': 2,
+        'info_dict': {
+            'id': 'giants-at-patriots-2011-pre-4',
+        },
+    }, {
+        'note': 'Subscription required',
+        'url': 'https://www.nfl.com/plus/games/giants-at-patriots-2011-pre-4',
+        'info_dict': {
+            'id': '950701',
+            'ext': 'mp4',
+            'title': 'Giants @ Patriots',
+            'description': 'Giants at Patriots on September 01, 2011',
+            'uploader': 'NFL',
+            'upload_date': '20210724',
+            'timestamp': 1627085874,
+            'duration': 1532,
+            'categories': ['Game Highlights'],
+            'tags': ['play-by-play'],
+            'thumbnail': r're:^https?://.*\.jpg',
+        },
+        'params': {
+            'skip_download': 'm3u8',
+            'extractor_args': {'nflplusreplay': {'type': ['condensed_game']}},
+        },
     }]
 
+    _REPLAY_TYPES = {
+        'full_game': 'Full Game',
+        'full_game_spanish': 'Full Game - Spanish',
+        'condensed_game': 'Condensed Game',
+        'all_22': 'All-22',
+    }
+
     def _real_extract(self, url):
-        video_id = self._match_id(url)
-        return self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id)
+        slug, video_id = self._match_valid_url(url).group('slug', 'id')
+        requested_types = self._configuration_arg('type', ['all'])
+        if 'all' in requested_types:
+            requested_types = list(self._REPLAY_TYPES.keys())
+        requested_types = traverse_obj(self._REPLAY_TYPES, (None, requested_types))
+
+        if not video_id:
+            self._get_auth_token(url, slug)
+            headers = {'Authorization': f'Bearer {self._TOKEN}'}
+            game_id = self._download_json(
+                f'https://api.nfl.com/football/v2/games/externalId/slug/{slug}', slug,
+                'Downloading game ID', query={'withExternalIds': 'true'}, headers=headers)['id']
+            replays = self._download_json(
+                'https://api.nfl.com/content/v1/videos/replays', slug, 'Downloading replays JSON',
+                query={'gameId': game_id}, headers=headers)
+            if len(requested_types) == 1:
+                video_id = traverse_obj(replays, (
+                    'items', lambda _, v: v['subType'] == requested_types[0], 'mcpPlaybackId'), get_all=False)
+
+        if video_id:
+            return self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id)
+
+        def entries():
+            for replay in traverse_obj(
+                replays, ('items', lambda _, v: v['mcpPlaybackId'] and v['subType'] in requested_types)
+            ):
+                video_id = replay['mcpPlaybackId']
+                yield self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id)
+
+        return self.playlist_result(entries(), slug)
 
 
 class NFLPlusEpisodeIE(NFLBaseIE):
     IE_NAME = 'nfl.com:plus:episode'
     _VALID_URL = r'https?://(?:www\.)?nfl.com/plus/episodes/(?P<id>[\w-]+)'
     _TESTS = [{
-        'note': 'premium content',
+        'note': 'Subscription required',
         'url': 'https://www.nfl.com/plus/episodes/kurt-s-qb-insider-conference-championships',
         'info_dict': {
             'id': '1576832',
             'ext': 'mp4',
-            'title': 'Kurt\'s QB Insider: Conference Championships',
+            'title': 'Conference Championships',
             'description': 'md5:944f7fab56f7a37430bf8473f5473857',
             'uploader': 'NFL',
             'upload_date': '20230127',
@@ -214,85 +362,9 @@ class NFLPlusEpisodeIE(NFLBaseIE):
         'params': {'skip_download': 'm3u8'},
     }]
 
-    _CLIENT_DATA = {
-        'clientKey': '4cFUW6DmwJpzT9L7LrG3qRAcABG5s04g',
-        'clientSecret': 'CZuvCL49d9OwfGsR',
-        'deviceId': str(uuid.uuid4()),
-        'deviceInfo': base64.b64encode(json.dumps({
-            'model': 'desktop',
-            'version': 'Chrome',
-            'osName': 'Windows',
-            'osVersion': '10.0',
-        }, separators=(',', ':')).encode()).decode(),
-        'networkType': 'other',
-        'nflClaimGroupsToAdd': [],
-        'nflClaimGroupsToRemove': [],
-    }
-    _ACCOUNT_INFO = {}
-    _API_KEY = None
-
-    _TOKEN = None
-    _TOKEN_EXPIRY = 0
-
-    def _get_account_info(self, url, video_id):
-        cookies = self._get_cookies('https://www.nfl.com/')
-        login_token = traverse_obj(cookies, (
-            (f'glt_{self._API_KEY}', f'gig_loginToken_{self._API_KEY}',
-             lambda k, _: k.startswith('glt_') or k.startswith('gig_loginToken_')),
-            {lambda x: x.value}), get_all=False)
-        if not login_token:
-            self.raise_login_required()
-
-        account = self._download_json(
-            'https://auth-id.nfl.com/accounts.getAccountInfo', video_id,
-            note='Downloading account info', data=urlencode_postdata({
-                'include': 'profile,data',
-                'lang': 'en',
-                'APIKey': self._API_KEY,
-                'sdk': 'js_latest',
-                'login_token': login_token,
-                'authMode': 'cookie',
-                'pageURL': url,
-                'sdkBuild': traverse_obj(cookies, (
-                    'gig_canary_ver', {lambda x: x.value.partition('-')[0]}), default='13642'),
-                'format': 'json',
-            }), headers={'Content-Type': 'application/x-www-form-urlencoded'})
-
-        self._ACCOUNT_INFO = traverse_obj(account, {
-            'signatureTimestamp': 'signatureTimestamp',
-            'uid': 'UID',
-            'uidSignature': 'UIDSignature',
-        })
-
-        if len(self._ACCOUNT_INFO) != 3:
-            raise ExtractorError('Failed to retrieve account info with provided cookies', expected=True)
-
-    def _get_auth_token(self, url, video_id):
-        if not self._ACCOUNT_INFO:
-            self._get_account_info(url, video_id)
-
-        token = self._download_json(
-            'https://api.nfl.com/identity/v3/token%s' % (
-                '/refresh' if self._ACCOUNT_INFO.get('refreshToken') else ''),
-            video_id, headers={'Content-Type': 'application/json'}, note='Downloading access token',
-            data=json.dumps({**self._CLIENT_DATA, **self._ACCOUNT_INFO}, separators=(',', ':')).encode())
-
-        self._TOKEN = token['accessToken']
-        self._TOKEN_EXPIRY = token['expiresIn']
-        self._ACCOUNT_INFO['refreshToken'] = token['refreshToken']
-
     def _real_extract(self, url):
         slug = self._match_id(url)
-
-        if not self._API_KEY:
-            webpage = self._download_webpage(url, slug, fatal=False) or ''
-            self._API_KEY = self._search_regex(
-                r'window\.gigyaApiKey=["\'](\w+)["\'];', webpage, 'API key',
-                default='3_Qa8TkWpIB8ESCBT8tY2TukbVKgO5F6BJVc7N1oComdwFzI7H2L9NOWdm11i_BY9f')
-
-        if not self._TOKEN or self._TOKEN_EXPIRY <= int(time.time()):
-            self._get_auth_token(url, slug)
-
+        self._get_auth_token(url, slug)
         video_id = self._download_json(
             f'https://api.nfl.com/content/v1/videos/episodes/{slug}', slug, headers={
                 'Authorization': f'Bearer {self._TOKEN}',

From 61bdf15fc7400601c3da1aa7a43917310a5bf391 Mon Sep 17 00:00:00 2001
From: Simon Sawicki <contact@grub4k.xyz>
Date: Sun, 24 Sep 2023 02:24:47 +0200
Subject: [PATCH 193/218] [core] Raise minimum recommended Python version to
 3.8 (#8183)

Authored by: Grub4K
---
 devscripts/changelog_override.json |  5 +++++
 test/test_execution.py             |  3 +++
 yt_dlp/YoutubeDL.py                | 16 ++++------------
 yt_dlp/update.py                   | 25 +++++++++++++++++++++++++
 4 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json
index e7f453acf8..9dfbf510f7 100644
--- a/devscripts/changelog_override.json
+++ b/devscripts/changelog_override.json
@@ -88,5 +88,10 @@
         "when": "59e92b1f1833440bb2190f847eb735cf0f90bc85",
         "short": "[rh:urllib] Simplify gzip decoding (#7611)",
         "authors": ["Grub4K"]
+    },
+    {
+        "action": "add",
+        "when": "c1d71d0d9f41db5e4306c86af232f5f6220a130b",
+        "short": "[priority] **The minimum *recommended* Python version has been raised to 3.8**\nSince Python 3.7 has reached end-of-life, support for it will be dropped soon. [Read more](https://github.com/yt-dlp/yt-dlp/issues/7803)"
     }
 ]
diff --git a/test/test_execution.py b/test/test_execution.py
index 7a9e800b66..fb2f6e2e9c 100644
--- a/test/test_execution.py
+++ b/test/test_execution.py
@@ -45,6 +45,9 @@ def test_lazy_extractors(self):
             self.assertTrue(os.path.exists(LAZY_EXTRACTORS))
 
             _, stderr = self.run_yt_dlp(opts=('-s', 'test:'))
+            # `MIN_RECOMMENDED` emits a deprecated feature warning for deprecated python versions
+            if stderr and stderr.startswith('Deprecated Feature: Support for Python'):
+                stderr = ''
             self.assertFalse(stderr)
 
             subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=subprocess.DEVNULL)
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index 39aaf2c2ed..f322b12a22 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -60,7 +60,7 @@
     get_postprocessor,
 )
 from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
-from .update import REPOSITORY, current_git_head, detect_variant
+from .update import REPOSITORY, _get_system_deprecation, current_git_head, detect_variant
 from .utils import (
     DEFAULT_OUTTMPL,
     IDENTITY,
@@ -640,17 +640,9 @@ def process_color_policy(stream):
             for name, stream in self._out_files.items_ if name != 'console'
         })
 
-        # The code is left like this to be reused for future deprecations
-        MIN_SUPPORTED, MIN_RECOMMENDED = (3, 7), (3, 7)
-        current_version = sys.version_info[:2]
-        if current_version < MIN_RECOMMENDED:
-            msg = ('Support for Python version %d.%d has been deprecated. '
-                   'See  https://github.com/yt-dlp/yt-dlp/issues/3764  for more details.'
-                   '\n                    You will no longer receive updates on this version')
-            if current_version < MIN_SUPPORTED:
-                msg = 'Python version %d.%d is no longer supported'
-            self.deprecated_feature(
-                f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED))
+        system_deprecation = _get_system_deprecation()
+        if system_deprecation:
+            self.deprecated_feature(system_deprecation.replace('\n', '\n                    '))
 
         if self.params.get('allow_unplayable_formats'):
             self.report_warning(
diff --git a/yt_dlp/update.py b/yt_dlp/update.py
index d708b09e35..db79df1271 100644
--- a/yt_dlp/update.py
+++ b/yt_dlp/update.py
@@ -112,6 +112,31 @@ def is_non_updateable():
         detect_variant(), _NON_UPDATEABLE_REASONS['unknown' if VARIANT else 'other'])
 
 
+def _get_system_deprecation():
+    MIN_SUPPORTED, MIN_RECOMMENDED = (3, 7), (3, 8)
+
+    if sys.version_info > MIN_RECOMMENDED:
+        return None
+
+    major, minor = sys.version_info[:2]
+    if sys.version_info < MIN_SUPPORTED:
+        msg = f'Python version {major}.{minor} is no longer supported'
+    else:
+        msg = f'Support for Python version {major}.{minor} has been deprecated. '
+        # Temporary until `win_x86_exe` uses 3.8, which will deprecate Vista and Server 2008
+        if detect_variant() == 'win_x86_exe':
+            platform_name = platform.platform()
+            if any(platform_name.startswith(f'Windows-{name}') for name in ('Vista', '2008Server')):
+                msg = 'Support for Windows Vista/Server 2008 has been deprecated. '
+            else:
+                return None
+        msg += ('See  https://github.com/yt-dlp/yt-dlp/issues/7803  for details.'
+                '\nYou may stop receiving updates on this version at any time')
+
+    major, minor = MIN_RECOMMENDED
+    return f'{msg}! Please update to Python {major}.{minor} or above'
+
+
 def _sha256_file(path):
     h = hashlib.sha256()
     mv = memoryview(bytearray(128 * 1024))

From de015e930747165dbb8fcd360f8775fd973b7d6e Mon Sep 17 00:00:00 2001
From: Simon Sawicki <contact@grub4k.xyz>
Date: Sun, 24 Sep 2023 02:29:01 +0200
Subject: [PATCH 194/218] [core] Prevent RCE when using `--exec` with `%q`
 (CVE-2023-40581)

The shell escape function is now using `""` instead of `\"`. `utils.Popen` has been patched to properly quote commands.

Prior to this fix using `--exec` together with `%q` when on Windows could cause remote code to execute. See https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-42h4-v29r-42qg for reference.

Authored by: Grub4K
---
 devscripts/changelog_override.json |  5 +++++
 test/test_YoutubeDL.py             |  6 +++---
 test/test_utils.py                 | 16 ++++++++++++++++
 yt_dlp/compat/__init__.py          |  2 +-
 yt_dlp/postprocessor/exec.py       | 12 +++++-------
 yt_dlp/utils/_utils.py             | 18 ++++++++++++++++--
 6 files changed, 46 insertions(+), 13 deletions(-)

diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json
index 9dfbf510f7..fe0c82c66b 100644
--- a/devscripts/changelog_override.json
+++ b/devscripts/changelog_override.json
@@ -93,5 +93,10 @@
         "action": "add",
         "when": "c1d71d0d9f41db5e4306c86af232f5f6220a130b",
         "short": "[priority] **The minimum *recommended* Python version has been raised to 3.8**\nSince Python 3.7 has reached end-of-life, support for it will be dropped soon. [Read more](https://github.com/yt-dlp/yt-dlp/issues/7803)"
+    },
+    {
+        "action": "add",
+        "when": "61bdf15fc7400601c3da1aa7a43917310a5bf391",
+        "short": "[priority] Security: [[CVE-2023-40581](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-40581)] [Prevent RCE when using `--exec` with `%q` on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-42h4-v29r-42qg)\n    - The shell escape function is now using `\"\"` instead of `\\\"`.\n    - `utils.Popen` has been patched to properly quote commands."
     }
 ]
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index 916ee48b97..0cf130db03 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -784,9 +784,9 @@ def expect_same_infodict(out):
         test('%(title4)#S', 'foo_bar_test')
         test('%(title4).10S', ('foo ＂bar＂ ', 'foo ＂bar＂' + ('#' if compat_os_name == 'nt' else ' ')))
         if compat_os_name == 'nt':
-            test('%(title4)q', ('"foo \\"bar\\" test"', "＂foo ⧹＂bar⧹＂ test＂"))
-            test('%(formats.:.id)#q', ('"id 1" "id 2" "id 3"', '＂id 1＂ ＂id 2＂ ＂id 3＂'))
-            test('%(formats.0.id)#q', ('"id 1"', '＂id 1＂'))
+            test('%(title4)q', ('"foo ""bar"" test"', None))
+            test('%(formats.:.id)#q', ('"id 1" "id 2" "id 3"', None))
+            test('%(formats.0.id)#q', ('"id 1"', None))
         else:
             test('%(title4)q', ('\'foo "bar" test\'', '\'foo ＂bar＂ test\''))
             test('%(formats.:.id)#q', "'id 1' 'id 2' 'id 3'")
diff --git a/test/test_utils.py b/test/test_utils.py
index 47d1f71bfe..dc2d8ce12b 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -14,6 +14,7 @@
 import io
 import itertools
 import json
+import subprocess
 import xml.etree.ElementTree
 
 from yt_dlp.compat import (
@@ -28,6 +29,7 @@
     InAdvancePagedList,
     LazyList,
     OnDemandPagedList,
+    Popen,
     age_restricted,
     args_to_str,
     base_url,
@@ -2388,6 +2390,20 @@ def test_extract_basic_auth(self):
         assert extract_basic_auth('http://user:@foo.bar') == ('http://foo.bar', 'Basic dXNlcjo=')
         assert extract_basic_auth('http://user:pass@foo.bar') == ('http://foo.bar', 'Basic dXNlcjpwYXNz')
 
+    @unittest.skipUnless(compat_os_name == 'nt', 'Only relevant on Windows')
+    def test_Popen_windows_escaping(self):
+        def run_shell(args):
+            stdout, stderr, error = Popen.run(
+                args, text=True, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            assert not stderr
+            assert not error
+            return stdout
+
+        # Test escaping
+        assert run_shell(['echo', 'test"&']) == '"test""&"\n'
+        # Test if delayed expansion is disabled
+        assert run_shell(['echo', '^!']) == '"^!"\n'
+        assert run_shell('echo "^!"') == '"^!"\n'
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/yt_dlp/compat/__init__.py b/yt_dlp/compat/__init__.py
index 832a9138d3..5ad5c70ecf 100644
--- a/yt_dlp/compat/__init__.py
+++ b/yt_dlp/compat/__init__.py
@@ -30,7 +30,7 @@ def compat_etree_fromstring(text):
 if compat_os_name == 'nt':
     def compat_shlex_quote(s):
         import re
-        return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"')
+        return s if re.match(r'^[-_\w./]+$', s) else s.replace('"', '""').join('""')
 else:
     from shlex import quote as compat_shlex_quote  # noqa: F401
 
diff --git a/yt_dlp/postprocessor/exec.py b/yt_dlp/postprocessor/exec.py
index cfc83167ce..c2e73fbabd 100644
--- a/yt_dlp/postprocessor/exec.py
+++ b/yt_dlp/postprocessor/exec.py
@@ -1,8 +1,6 @@
-import subprocess
-
 from .common import PostProcessor
 from ..compat import compat_shlex_quote
-from ..utils import PostProcessingError, encodeArgument, variadic
+from ..utils import Popen, PostProcessingError, variadic
 
 
 class ExecPP(PostProcessor):
@@ -27,10 +25,10 @@ def parse_cmd(self, cmd, info):
     def run(self, info):
         for tmpl in self.exec_cmd:
             cmd = self.parse_cmd(tmpl, info)
-            self.to_screen('Executing command: %s' % cmd)
-            retCode = subprocess.call(encodeArgument(cmd), shell=True)
-            if retCode != 0:
-                raise PostProcessingError('Command returned error code %d' % retCode)
+            self.to_screen(f'Executing command: {cmd}')
+            _, _, return_code = Popen.run(cmd, shell=True)
+            if return_code != 0:
+                raise PostProcessingError(f'Command returned error code {return_code}')
         return [], info
 
 
diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py
index 213ccc6363..ba62423806 100644
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@@ -825,7 +825,7 @@ def _fix(key):
         _fix('LD_LIBRARY_PATH')  # Linux
         _fix('DYLD_LIBRARY_PATH')  # macOS
 
-    def __init__(self, *args, env=None, text=False, **kwargs):
+    def __init__(self, args, *remaining, env=None, text=False, shell=False, **kwargs):
         if env is None:
             env = os.environ.copy()
         self._fix_pyinstaller_ld_path(env)
@@ -835,7 +835,21 @@ def __init__(self, *args, env=None, text=False, **kwargs):
             kwargs['universal_newlines'] = True  # For 3.6 compatibility
             kwargs.setdefault('encoding', 'utf-8')
             kwargs.setdefault('errors', 'replace')
-        super().__init__(*args, env=env, **kwargs, startupinfo=self._startupinfo)
+
+        if shell and compat_os_name == 'nt' and kwargs.get('executable') is None:
+            if not isinstance(args, str):
+                args = ' '.join(compat_shlex_quote(a) for a in args)
+            shell = False
+            args = f'{self.__comspec()} /Q /S /D /V:OFF /C "{args}"'
+
+        super().__init__(args, *remaining, env=env, shell=shell, **kwargs, startupinfo=self._startupinfo)
+
+    def __comspec(self):
+        comspec = os.environ.get('ComSpec') or os.path.join(
+            os.environ.get('SystemRoot', ''), 'System32', 'cmd.exe')
+        if os.path.isabs(comspec):
+            return comspec
+        raise FileNotFoundError('shell not found: neither %ComSpec% nor %SystemRoot% is set')
 
     def communicate_or_kill(self, *args, **kwargs):
         try:

From 088add9567d39b758737e4299a0e619fd89d2e8f Mon Sep 17 00:00:00 2001
From: Simon Sawicki <contact@grub4k.xyz>
Date: Sun, 24 Sep 2023 02:35:23 +0200
Subject: [PATCH 195/218] [cleanup] Misc

Authored by: Grub4K
---
 test/test_utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/test_utils.py b/test/test_utils.py
index dc2d8ce12b..fd612ff86f 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -2405,5 +2405,6 @@ def run_shell(args):
         assert run_shell(['echo', '^!']) == '"^!"\n'
         assert run_shell('echo "^!"') == '"^!"\n'
 
+
 if __name__ == '__main__':
     unittest.main()

From c54ddfba0f7d68034339426223d75373c5fc86df Mon Sep 17 00:00:00 2001
From: github-actions <github-actions@example.com>
Date: Sun, 24 Sep 2023 00:38:42 +0000
Subject: [PATCH 196/218] Release 2023.09.24

Created by: Grub4K

:ci skip all :ci run dl
---
 .github/ISSUE_TEMPLATE/1_broken_site.yml      |   8 +-
 .../ISSUE_TEMPLATE/2_site_support_request.yml |   8 +-
 .../ISSUE_TEMPLATE/3_site_feature_request.yml |   8 +-
 .github/ISSUE_TEMPLATE/4_bug_report.yml       |   8 +-
 .github/ISSUE_TEMPLATE/5_feature_request.yml  |   8 +-
 .github/ISSUE_TEMPLATE/6_question.yml         |   8 +-
 CONTRIBUTORS                                  |  36 ++++
 Changelog.md                                  | 196 ++++++++++++++++++
 supportedsites.md                             |  49 ++++-
 yt_dlp/version.py                             |   4 +-
 10 files changed, 298 insertions(+), 35 deletions(-)

diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.yml b/.github/ISSUE_TEMPLATE/1_broken_site.yml
index dd1b33dde2..f0fc71d575 100644
--- a/.github/ISSUE_TEMPLATE/1_broken_site.yml
+++ b/.github/ISSUE_TEMPLATE/1_broken_site.yml
@@ -18,7 +18,7 @@ body:
       options:
         - label: I'm reporting that yt-dlp is broken on a **supported** site
           required: true
-        - label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
+        - label: I've verified that I'm running yt-dlp version **2023.09.24** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
           required: true
         - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
           required: true
@@ -64,7 +64,7 @@ body:
         [debug] Command-line config: ['-vU', 'test:youtube']
         [debug] Portable config "yt-dlp.conf": ['-i']
         [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
-        [debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe)
+        [debug] yt-dlp version 2023.09.24 [9d339c4] (win32_exe)
         [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
         [debug] Checking exe version: ffmpeg -bsfs
         [debug] Checking exe version: ffprobe -bsfs
@@ -72,8 +72,8 @@ body:
         [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
         [debug] Proxy map: {}
         [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
-        Latest version: 2023.07.06, Current version: 2023.07.06
-        yt-dlp is up to date (2023.07.06)
+        Latest version: 2023.09.24, Current version: 2023.09.24
+        yt-dlp is up to date (2023.09.24)
         <more lines>
       render: shell
     validations:
diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.yml b/.github/ISSUE_TEMPLATE/2_site_support_request.yml
index 4f4378924d..ac9a72a1c1 100644
--- a/.github/ISSUE_TEMPLATE/2_site_support_request.yml
+++ b/.github/ISSUE_TEMPLATE/2_site_support_request.yml
@@ -18,7 +18,7 @@ body:
       options:
         - label: I'm reporting a new site support request
           required: true
-        - label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
+        - label: I've verified that I'm running yt-dlp version **2023.09.24** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
           required: true
         - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
           required: true
@@ -76,7 +76,7 @@ body:
         [debug] Command-line config: ['-vU', 'test:youtube']
         [debug] Portable config "yt-dlp.conf": ['-i']
         [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
-        [debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe)
+        [debug] yt-dlp version 2023.09.24 [9d339c4] (win32_exe)
         [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
         [debug] Checking exe version: ffmpeg -bsfs
         [debug] Checking exe version: ffprobe -bsfs
@@ -84,8 +84,8 @@ body:
         [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
         [debug] Proxy map: {}
         [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
-        Latest version: 2023.07.06, Current version: 2023.07.06
-        yt-dlp is up to date (2023.07.06)
+        Latest version: 2023.09.24, Current version: 2023.09.24
+        yt-dlp is up to date (2023.09.24)
         <more lines>
       render: shell
     validations:
diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml
index 05b4dd23b3..577e4d4910 100644
--- a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml
+++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml
@@ -18,7 +18,7 @@ body:
       options:
         - label: I'm requesting a site-specific feature
           required: true
-        - label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
+        - label: I've verified that I'm running yt-dlp version **2023.09.24** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
           required: true
         - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
           required: true
@@ -72,7 +72,7 @@ body:
         [debug] Command-line config: ['-vU', 'test:youtube']
         [debug] Portable config "yt-dlp.conf": ['-i']
         [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
-        [debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe)
+        [debug] yt-dlp version 2023.09.24 [9d339c4] (win32_exe)
         [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
         [debug] Checking exe version: ffmpeg -bsfs
         [debug] Checking exe version: ffprobe -bsfs
@@ -80,8 +80,8 @@ body:
         [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
         [debug] Proxy map: {}
         [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
-        Latest version: 2023.07.06, Current version: 2023.07.06
-        yt-dlp is up to date (2023.07.06)
+        Latest version: 2023.09.24, Current version: 2023.09.24
+        yt-dlp is up to date (2023.09.24)
         <more lines>
       render: shell
     validations:
diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.yml b/.github/ISSUE_TEMPLATE/4_bug_report.yml
index 880f1014c2..9529c1bd6c 100644
--- a/.github/ISSUE_TEMPLATE/4_bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/4_bug_report.yml
@@ -18,7 +18,7 @@ body:
       options:
         - label: I'm reporting a bug unrelated to a specific site
           required: true
-        - label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
+        - label: I've verified that I'm running yt-dlp version **2023.09.24** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
           required: true
         - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
           required: true
@@ -57,7 +57,7 @@ body:
         [debug] Command-line config: ['-vU', 'test:youtube']
         [debug] Portable config "yt-dlp.conf": ['-i']
         [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
-        [debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe)
+        [debug] yt-dlp version 2023.09.24 [9d339c4] (win32_exe)
         [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
         [debug] Checking exe version: ffmpeg -bsfs
         [debug] Checking exe version: ffprobe -bsfs
@@ -65,8 +65,8 @@ body:
         [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
         [debug] Proxy map: {}
         [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
-        Latest version: 2023.07.06, Current version: 2023.07.06
-        yt-dlp is up to date (2023.07.06)
+        Latest version: 2023.09.24, Current version: 2023.09.24
+        yt-dlp is up to date (2023.09.24)
         <more lines>
       render: shell
     validations:
diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.yml b/.github/ISSUE_TEMPLATE/5_feature_request.yml
index acb11795f6..b17a6e046c 100644
--- a/.github/ISSUE_TEMPLATE/5_feature_request.yml
+++ b/.github/ISSUE_TEMPLATE/5_feature_request.yml
@@ -20,7 +20,7 @@ body:
           required: true
         - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
           required: true
-        - label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
+        - label: I've verified that I'm running yt-dlp version **2023.09.24** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
           required: true
         - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates
           required: true
@@ -53,7 +53,7 @@ body:
         [debug] Command-line config: ['-vU', 'test:youtube']
         [debug] Portable config "yt-dlp.conf": ['-i']
         [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
-        [debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe)
+        [debug] yt-dlp version 2023.09.24 [9d339c4] (win32_exe)
         [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
         [debug] Checking exe version: ffmpeg -bsfs
         [debug] Checking exe version: ffprobe -bsfs
@@ -61,7 +61,7 @@ body:
         [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
         [debug] Proxy map: {}
         [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
-        Latest version: 2023.07.06, Current version: 2023.07.06
-        yt-dlp is up to date (2023.07.06)
+        Latest version: 2023.09.24, Current version: 2023.09.24
+        yt-dlp is up to date (2023.09.24)
         <more lines>
       render: shell
diff --git a/.github/ISSUE_TEMPLATE/6_question.yml b/.github/ISSUE_TEMPLATE/6_question.yml
index a2563e975b..5345e8917c 100644
--- a/.github/ISSUE_TEMPLATE/6_question.yml
+++ b/.github/ISSUE_TEMPLATE/6_question.yml
@@ -26,7 +26,7 @@ body:
           required: true
         - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
           required: true
-        - label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
+        - label: I've verified that I'm running yt-dlp version **2023.09.24** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
           required: true
         - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions **including closed ones**. DO NOT post duplicates
           required: true
@@ -59,7 +59,7 @@ body:
         [debug] Command-line config: ['-vU', 'test:youtube']
         [debug] Portable config "yt-dlp.conf": ['-i']
         [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
-        [debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe)
+        [debug] yt-dlp version 2023.09.24 [9d339c4] (win32_exe)
         [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
         [debug] Checking exe version: ffmpeg -bsfs
         [debug] Checking exe version: ffprobe -bsfs
@@ -67,7 +67,7 @@ body:
         [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
         [debug] Proxy map: {}
         [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
-        Latest version: 2023.07.06, Current version: 2023.07.06
-        yt-dlp is up to date (2023.07.06)
+        Latest version: 2023.09.24, Current version: 2023.09.24
+        yt-dlp is up to date (2023.09.24)
         <more lines>
       render: shell
diff --git a/CONTRIBUTORS b/CONTRIBUTORS
index 6b9b9f4701..72b9584ecf 100644
--- a/CONTRIBUTORS
+++ b/CONTRIBUTORS
@@ -467,3 +467,39 @@ rdamas
 RfadnjdExt
 urectanc
 nao20010128nao/Lesmiscore
+04-pasha-04
+aaruni96
+aky-01
+AmirAflak
+ApoorvShah111
+at-wat
+davinkevin
+demon071
+denhotte
+FinnRG
+fireattack
+Frankgoji
+GD-Slime
+hatsomatt
+ifan-t
+kshitiz305
+kylegustavo
+mabdelfattah
+nathantouze
+niemands
+Rajeshwaran2001
+RedDeffender
+Rohxn16
+sb0stn
+SevenLives
+simon300000
+snixon
+soundchaser128
+szabyg
+trainman261
+trislee
+wader
+Yalab7
+zhallgato
+zhong-yiyu
+Zprokkel
diff --git a/Changelog.md b/Changelog.md
index 32cdaca2ab..04511927fa 100644
--- a/Changelog.md
+++ b/Changelog.md
@@ -4,6 +4,202 @@ # Changelog
 # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
 -->
 
+### 2023.09.24
+
+#### Important changes
+- **The minimum *recommended* Python version has been raised to 3.8**
+Since Python 3.7 has reached end-of-life, support for it will be dropped soon. [Read more](https://github.com/yt-dlp/yt-dlp/issues/7803)
+- Security: [[CVE-2023-40581](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-40581)] [Prevent RCE when using `--exec` with `%q` on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-42h4-v29r-42qg)
+    - The shell escape function is now using `""` instead of `\"`.
+    - `utils.Popen` has been patched to properly quote commands.
+
+#### Core changes
+- [Fix HTTP headers and cookie handling](https://github.com/yt-dlp/yt-dlp/commit/6c5211cebeacfc53ad5d5ddf4a659be76039656f) by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan)
+- [Fix `--check-formats`](https://github.com/yt-dlp/yt-dlp/commit/8cb7fc44db010e965d808ee679ef0725cb6e147c) by [pukkandan](https://github.com/pukkandan)
+- [Fix support for upcoming Python 3.12](https://github.com/yt-dlp/yt-dlp/commit/836e06d246512f286f30c1371b2c54b72c9ecd93) ([#8130](https://github.com/yt-dlp/yt-dlp/issues/8130)) by [Grub4K](https://github.com/Grub4K)
+- [Merged with youtube-dl 66ab08](https://github.com/yt-dlp/yt-dlp/commit/9d6254069c75877bc88bc3584f4326fb1853a543) by [coletdjnz](https://github.com/coletdjnz)
+- [Prevent RCE when using `--exec` with `%q` (CVE-2023-40581)](https://github.com/yt-dlp/yt-dlp/commit/de015e930747165dbb8fcd360f8775fd973b7d6e) by [Grub4K](https://github.com/Grub4K)
+- [Raise minimum recommended Python version to 3.8](https://github.com/yt-dlp/yt-dlp/commit/61bdf15fc7400601c3da1aa7a43917310a5bf391) ([#8183](https://github.com/yt-dlp/yt-dlp/issues/8183)) by [Grub4K](https://github.com/Grub4K)
+- [`FFmpegFixupM3u8PP` may need to run with ffmpeg](https://github.com/yt-dlp/yt-dlp/commit/f73c11803579889dc8e1c99e25dba9a22fef39d8) by [pukkandan](https://github.com/pukkandan)
+- **compat**
+    - [Add `types.NoneType`](https://github.com/yt-dlp/yt-dlp/commit/e0c4db04dc82a699bdabd9821ddc239ebe17d30a) by [pukkandan](https://github.com/pukkandan) (With fixes in [25b6e8f](https://github.com/yt-dlp/yt-dlp/commit/25b6e8f94679b4458550702b46e61249b875a4fd))
+    - [Deprecate old functions](https://github.com/yt-dlp/yt-dlp/commit/3d2623a898196640f7cc0fc8b70118ff19e6925d) ([#2861](https://github.com/yt-dlp/yt-dlp/issues/2861)) by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+    - [Ensure submodules are imported correctly](https://github.com/yt-dlp/yt-dlp/commit/a250b247334ce9f641e709cbb64974da6034a2b3) by [pukkandan](https://github.com/pukkandan)
+- **cookies**: [Containers JSON should be opened as utf-8](https://github.com/yt-dlp/yt-dlp/commit/dab87ca23650fd87184ff5286b53e6985b59f71d) ([#7800](https://github.com/yt-dlp/yt-dlp/issues/7800)) by [bashonly](https://github.com/bashonly)
+- **dependencies**: [Handle deprecation of `sqlite3.version`](https://github.com/yt-dlp/yt-dlp/commit/35f9a306e6934793cff100200cd03f288ec33f11) ([#8167](https://github.com/yt-dlp/yt-dlp/issues/8167)) by [bashonly](https://github.com/bashonly)
+- **outtmpl**: [Fix replacement for `playlist_index`](https://github.com/yt-dlp/yt-dlp/commit/a264433c9fba147ecae2420091614186cfeeb895) by [pukkandan](https://github.com/pukkandan)
+- **utils**
+    - [Add temporary shim for logging](https://github.com/yt-dlp/yt-dlp/commit/1b392f905d20ef1f1b300b180f867d43c9ce49b8) by [pukkandan](https://github.com/pukkandan)
+    - [Improve `parse_duration`](https://github.com/yt-dlp/yt-dlp/commit/af86873218c24c3859ccf575a87f2b00a73b49d0) by [bashonly](https://github.com/bashonly)
+    - HTTPHeaderDict: [Handle byte values](https://github.com/yt-dlp/yt-dlp/commit/3f7965105d8d2048359e67c1e8b8ebd51588143b) by [pukkandan](https://github.com/pukkandan)
+    - `clean_podcast_url`: [Handle more trackers](https://github.com/yt-dlp/yt-dlp/commit/2af4eeb77246b8183aae75a0a8d19f18c08115b2) ([#7556](https://github.com/yt-dlp/yt-dlp/issues/7556)) by [bashonly](https://github.com/bashonly), [mabdelfattah](https://github.com/mabdelfattah)
+    - `js_to_json`: [Handle `Array` objects](https://github.com/yt-dlp/yt-dlp/commit/52414d64ca7b92d3f83964cdd68247989b0c4625) by [Grub4K](https://github.com/Grub4K), [std-move](https://github.com/std-move)
+
+#### Extractor changes
+- [Extract subtitles from SMIL manifests](https://github.com/yt-dlp/yt-dlp/commit/550e65410a7a1b105923494ac44460a4dc1a15d9) ([#7667](https://github.com/yt-dlp/yt-dlp/issues/7667)) by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan)
+- [Fix `--load-pages`](https://github.com/yt-dlp/yt-dlp/commit/81b4712bca608b9015aa68a4d96661d56e9cb894) by [pukkandan](https://github.com/pukkandan)
+- [Make `_search_nuxt_data` more lenient](https://github.com/yt-dlp/yt-dlp/commit/904a19ee93195ce0bd4b08bd22b186120afb5b17) by [std-move](https://github.com/std-move)
+- **abematv**
+    - [Fix proxy handling](https://github.com/yt-dlp/yt-dlp/commit/497bbbbd7328cb705f70eced94dbd90993819a46) ([#8046](https://github.com/yt-dlp/yt-dlp/issues/8046)) by [SevenLives](https://github.com/SevenLives)
+    - [Temporary fix for protocol handler](https://github.com/yt-dlp/yt-dlp/commit/9f66247289b9f8ecf931833b3f5f127274dd2161) by [pukkandan](https://github.com/pukkandan)
+- **amazonminitv**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/538d37671a17e0782d17f08df17800e2e3bd57c8) by [bashonly](https://github.com/bashonly), [GautamMKGarg](https://github.com/GautamMKGarg)
+- **antenna**: [Support antenna.gr](https://github.com/yt-dlp/yt-dlp/commit/665876034c8d3c031443f6b4958bed02ccdf4164) ([#7584](https://github.com/yt-dlp/yt-dlp/issues/7584)) by [stdedos](https://github.com/stdedos)
+- **artetv**: [Fix HLS formats extraction](https://github.com/yt-dlp/yt-dlp/commit/c2da0b5ea215298135f76e3dc14b972a3c4afacb) by [bashonly](https://github.com/bashonly)
+- **axs**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/aee6b9b88c0bcccf27fd23b7e00fc0b7b168928f) ([#8094](https://github.com/yt-dlp/yt-dlp/issues/8094)) by [barsnick](https://github.com/barsnick)
+- **banbye**: [Support video ids containing a hyphen](https://github.com/yt-dlp/yt-dlp/commit/578a82e497502b951036ce9da6fe0dac6937ac27) ([#8059](https://github.com/yt-dlp/yt-dlp/issues/8059)) by [kshitiz305](https://github.com/kshitiz305)
+- **bbc**: [Extract tracklist as chapters](https://github.com/yt-dlp/yt-dlp/commit/eda0e415d26eb084e570cf5372d38ee1f616b70f) ([#7788](https://github.com/yt-dlp/yt-dlp/issues/7788)) by [garret1317](https://github.com/garret1317)
+- **bild.de**: [Extract HLS formats](https://github.com/yt-dlp/yt-dlp/commit/b4c1c408c63724339eb12b16c91b253a7ee62cfa) ([#8032](https://github.com/yt-dlp/yt-dlp/issues/8032)) by [barsnick](https://github.com/barsnick)
+- **bilibili**
+    - [Add support for series, favorites and watch later](https://github.com/yt-dlp/yt-dlp/commit/9e68747f9607f05e92bb7d9b6e79d678b50070e1) ([#7518](https://github.com/yt-dlp/yt-dlp/issues/7518)) by [c-basalt](https://github.com/c-basalt)
+    - [Extract Dolby audio formats](https://github.com/yt-dlp/yt-dlp/commit/b84fda7388dd20d38921e23b469147f3957c1812) ([#8142](https://github.com/yt-dlp/yt-dlp/issues/8142)) by [ClosedPort22](https://github.com/ClosedPort22)
+    - [Extract `format_id`](https://github.com/yt-dlp/yt-dlp/commit/5336bf57a7061e0955a37f0542fc8ebf50d55b17) ([#7555](https://github.com/yt-dlp/yt-dlp/issues/7555)) by [c-basalt](https://github.com/c-basalt)
+- **bilibilibangumi**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/bdd0b75e3f41ff35440eda6d395008beef19ef2f) ([#7337](https://github.com/yt-dlp/yt-dlp/issues/7337)) by [GD-Slime](https://github.com/GD-Slime)
+- **bpb**: [Overhaul extractor](https://github.com/yt-dlp/yt-dlp/commit/f659e6439444ac64305b5c80688cd82f59d2279c) ([#8119](https://github.com/yt-dlp/yt-dlp/issues/8119)) by [Grub4K](https://github.com/Grub4K)
+- **brilliantpala**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/92feb5654c5a4c81ba872904a618700fcbb3e546) ([#6680](https://github.com/yt-dlp/yt-dlp/issues/6680)) by [pzhlkj6612](https://github.com/pzhlkj6612)
+- **canal1, caracoltvplay**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/b3febedbeb662dfdf9b5c1d5799039ad4fc969de) ([#7151](https://github.com/yt-dlp/yt-dlp/issues/7151)) by [elyse0](https://github.com/elyse0)
+- **cbc**: [Ignore any 426 from API](https://github.com/yt-dlp/yt-dlp/commit/9bf14be775289bd88cc1f5c89fd761ae51879484) ([#7689](https://github.com/yt-dlp/yt-dlp/issues/7689)) by [makew0rld](https://github.com/makew0rld)
+- **cbcplayer**: [Extract HLS formats and subtitles](https://github.com/yt-dlp/yt-dlp/commit/339c339fec095ff4141b20e6aa83629117fb26df) ([#7484](https://github.com/yt-dlp/yt-dlp/issues/7484)) by [trainman261](https://github.com/trainman261)
+- **cbcplayerplaylist**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/ed711897814f3ee0b1822e4205e74133467e8f1c) ([#7870](https://github.com/yt-dlp/yt-dlp/issues/7870)) by [trainman261](https://github.com/trainman261)
+- **cineverse**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/15591940ff102d1ae337d603a46d8f238c83a61f) ([#8146](https://github.com/yt-dlp/yt-dlp/issues/8146)) by [garret1317](https://github.com/garret1317)
+- **crunchyroll**: [Remove initial state extraction](https://github.com/yt-dlp/yt-dlp/commit/9b16762f48914de9ac914601769c76668e433325) ([#7632](https://github.com/yt-dlp/yt-dlp/issues/7632)) by [Grub4K](https://github.com/Grub4K)
+- **douyutv**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/21f40e75dfc0055ea9cdbd7fe2c46c6f9b561afd) ([#7652](https://github.com/yt-dlp/yt-dlp/issues/7652)) by [c-basalt](https://github.com/c-basalt)
+- **dropbox**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/b9f2bc2dbed2323734a0d18e65e1e2e23dc833d8) ([#7926](https://github.com/yt-dlp/yt-dlp/issues/7926)) by [bashonly](https://github.com/bashonly), [denhotte](https://github.com/denhotte), [nathantouze](https://github.com/nathantouze) (With fixes in [099fb1b](https://github.com/yt-dlp/yt-dlp/commit/099fb1b35cf835303306549f5113d1802d79c9c7) by [bashonly](https://github.com/bashonly))
+- **eplus**: inbound: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/295fbb3ae3a7d0dd50e286be5c487cf145ed5778) ([#5782](https://github.com/yt-dlp/yt-dlp/issues/5782)) by [pzhlkj6612](https://github.com/pzhlkj6612)
+- **expressen**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/a5e264d74b4bd60c6e7ec4e38f1a23af4e420531) ([#8153](https://github.com/yt-dlp/yt-dlp/issues/8153)) by [kylegustavo](https://github.com/kylegustavo)
+- **facebook**
+    - [Add dash manifest URL](https://github.com/yt-dlp/yt-dlp/commit/a854fbec56d5004f5147116a41d1dd050632a579) ([#7743](https://github.com/yt-dlp/yt-dlp/issues/7743)) by [ringus1](https://github.com/ringus1)
+    - [Fix webpage extraction](https://github.com/yt-dlp/yt-dlp/commit/d3d81cc98f554d0adb87d24bfd6fabaaa803944d) ([#7890](https://github.com/yt-dlp/yt-dlp/issues/7890)) by [ringus1](https://github.com/ringus1)
+    - [Improve format sorting](https://github.com/yt-dlp/yt-dlp/commit/308936619c8a4f3a52d73c829c2006ff6c55fea2) ([#8074](https://github.com/yt-dlp/yt-dlp/issues/8074)) by [fireattack](https://github.com/fireattack)
+    - reel: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/bb5d84c9d2f1e978c3eddfb5ccbe138036682a36) ([#7564](https://github.com/yt-dlp/yt-dlp/issues/7564)) by [bashonly](https://github.com/bashonly), [demon071](https://github.com/demon071)
+- **fox**: [Support foxsports.com](https://github.com/yt-dlp/yt-dlp/commit/30b29f37159e9226e2f2d5434c9a4096ac4efa2e) ([#7724](https://github.com/yt-dlp/yt-dlp/issues/7724)) by [ischmidt20](https://github.com/ischmidt20)
+- **funker530**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/0ce1f48bf1cb78d40d734ce73ee1c90eccf92274) ([#8040](https://github.com/yt-dlp/yt-dlp/issues/8040)) by [04-pasha-04](https://github.com/04-pasha-04)
+- **generic**
+    - [Fix KVS thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/53675852195d8dd859555d4789944a6887171ff8) by [bashonly](https://github.com/bashonly)
+    - [Fix generic title for embeds](https://github.com/yt-dlp/yt-dlp/commit/994f7ef8e6003f4b7b258528755d0b6adcc31714) by [pukkandan](https://github.com/pukkandan)
+- **gofile**: [Update token](https://github.com/yt-dlp/yt-dlp/commit/99c99c7185f5d8e9b3699a6fc7f86ec663d7b97e) by [bashonly](https://github.com/bashonly)
+- **hotstar**
+    - [Extract `release_year`](https://github.com/yt-dlp/yt-dlp/commit/7237c8dca0590aa7438ade93f927df88c9381ec7) ([#7869](https://github.com/yt-dlp/yt-dlp/issues/7869)) by [Rajeshwaran2001](https://github.com/Rajeshwaran2001)
+    - [Make metadata extraction non-fatal](https://github.com/yt-dlp/yt-dlp/commit/30ea88591b728cca0896018dbf67c2298070c669) by [bashonly](https://github.com/bashonly)
+    - [Support `/clips/` URLs](https://github.com/yt-dlp/yt-dlp/commit/86eeb044c2342d68c6ef177577f87852e6badd85) ([#7710](https://github.com/yt-dlp/yt-dlp/issues/7710)) by [bashonly](https://github.com/bashonly)
+- **hungama**: [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/4b3a6ef1b3e235ba9a45142830b6edb357c71696) ([#7757](https://github.com/yt-dlp/yt-dlp/issues/7757)) by [bashonly](https://github.com/bashonly), [Yalab7](https://github.com/Yalab7)
+- **indavideoembed**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/63e0c5748c0eb461a2ccca4181616eb930b4b750) ([#8129](https://github.com/yt-dlp/yt-dlp/issues/8129)) by [aky-01](https://github.com/aky-01)
+- **iprima**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/568f08051841aedea968258889539741e26009e9) ([#7216](https://github.com/yt-dlp/yt-dlp/issues/7216)) by [std-move](https://github.com/std-move)
+- **lbry**: [Fix original format extraction](https://github.com/yt-dlp/yt-dlp/commit/127a22460658ac39cbe5c4b3fb88d578363e0dfa) ([#7711](https://github.com/yt-dlp/yt-dlp/issues/7711)) by [bashonly](https://github.com/bashonly)
+- **lecturio**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/efa2339502a37cf13ae7f143bd8b2c28f452d1cd) ([#7649](https://github.com/yt-dlp/yt-dlp/issues/7649)) by [simon300000](https://github.com/simon300000)
+- **magellantv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/f4ea501551526ebcb54d19b84cf0ebe798583a85) ([#7616](https://github.com/yt-dlp/yt-dlp/issues/7616)) by [bashonly](https://github.com/bashonly)
+- **massengeschmack.tv**: [Fix title extraction](https://github.com/yt-dlp/yt-dlp/commit/81f46ac573dc443ad48560f308582a26784d3015) ([#7813](https://github.com/yt-dlp/yt-dlp/issues/7813)) by [sb0stn](https://github.com/sb0stn)
+- **media.ccc.de**: lists: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/cf11b40ac40e3d23a6352753296f3a732886efb9) ([#8144](https://github.com/yt-dlp/yt-dlp/issues/8144)) by [Rohxn16](https://github.com/Rohxn16)
+- **mediaite**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/630a55df8de7747e79aa680959d785dfff2c4b76) ([#7923](https://github.com/yt-dlp/yt-dlp/issues/7923)) by [Grabien](https://github.com/Grabien)
+- **mediaklikk**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6e07e4bc7e59f5bdb60e93c011e57b18b009f2b5) ([#8086](https://github.com/yt-dlp/yt-dlp/issues/8086)) by [bashonly](https://github.com/bashonly), [zhallgato](https://github.com/zhallgato)
+- **mediastream**: [Make embed extraction non-fatal](https://github.com/yt-dlp/yt-dlp/commit/635ae31f68a3ac7f6393d59657ed711e34ee3552) by [bashonly](https://github.com/bashonly)
+- **mixcloud**: [Update API URL](https://github.com/yt-dlp/yt-dlp/commit/7b71643cc986de9a3768dac4ac9b64f4d05e7f5e) ([#8114](https://github.com/yt-dlp/yt-dlp/issues/8114)) by [garret1317](https://github.com/garret1317)
+- **monstercat**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/eaee21bf71889d495076037cbe590c8c0b21ef3a) ([#8133](https://github.com/yt-dlp/yt-dlp/issues/8133)) by [garret1317](https://github.com/garret1317)
+- **motortrendondemand**: [Update `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/c03a58ec9933e4a42c2d8fa80b8a0ddb2cde64e6) ([#7683](https://github.com/yt-dlp/yt-dlp/issues/7683)) by [AmirAflak](https://github.com/AmirAflak)
+- **museai**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/65cfa2b057d7946fbe322155a778fe206556d0c6) ([#7614](https://github.com/yt-dlp/yt-dlp/issues/7614)) by [bashonly](https://github.com/bashonly)
+- **mzaalo**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/d7aee8e310b2c4f21d50aac0b420e1b3abde21a4) by [bashonly](https://github.com/bashonly)
+- **n1info**: article: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/8ac5b6d96ae5c60cd5ae2495949e0068a6754c45) ([#7373](https://github.com/yt-dlp/yt-dlp/issues/7373)) by [u-spec-png](https://github.com/u-spec-png)
+- **nfl.com**: plus, replay: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/1eaca74bc2ca0f5b1ec532f24c61de44f2e8cb2d) ([#7838](https://github.com/yt-dlp/yt-dlp/issues/7838)) by [bashonly](https://github.com/bashonly)
+- **niconicochannelplus**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/698beb9a497f51693e64d167e572ff9efa4bc25f) ([#5686](https://github.com/yt-dlp/yt-dlp/issues/5686)) by [pzhlkj6612](https://github.com/pzhlkj6612)
+- **nitter**: [Fix title extraction fallback](https://github.com/yt-dlp/yt-dlp/commit/a83da3717d30697102e76f63a6f29d77f9373c2a) ([#8102](https://github.com/yt-dlp/yt-dlp/issues/8102)) by [ApoorvShah111](https://github.com/ApoorvShah111)
+- **noodlemagazine**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/bae4834245a708fff97219849ec880c319c88bc6) ([#7830](https://github.com/yt-dlp/yt-dlp/issues/7830)) by [RedDeffender](https://github.com/RedDeffender) (With fixes in [69dbfe0](https://github.com/yt-dlp/yt-dlp/commit/69dbfe01c47cd078682a87f179f5846e2679e927) by [bashonly](https://github.com/bashonly))
+- **novaembed**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/2269065ad60cb0ab62408ae6a7b20283e5252232) ([#7910](https://github.com/yt-dlp/yt-dlp/issues/7910)) by [std-move](https://github.com/std-move)
+- **patreoncampaign**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/11de6fec9c9b8d34d1f90c8e6218ec58a3471b58) ([#7664](https://github.com/yt-dlp/yt-dlp/issues/7664)) by [bashonly](https://github.com/bashonly)
+- **pbs**: [Add extractor `PBSKidsIE`](https://github.com/yt-dlp/yt-dlp/commit/6d6081dda1290a85bdab6717f239289e3aa74c8e) ([#7602](https://github.com/yt-dlp/yt-dlp/issues/7602)) by [snixon](https://github.com/snixon)
+- **piapro**: [Support `/content` URL](https://github.com/yt-dlp/yt-dlp/commit/1bcb9fe8715b1f288efc322be3de409ee0597080) ([#7592](https://github.com/yt-dlp/yt-dlp/issues/7592)) by [FinnRG](https://github.com/FinnRG)
+- **piaulizaportal**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/6636021206dad17c7745ae6bce6cb73d6f2ef319) ([#7903](https://github.com/yt-dlp/yt-dlp/issues/7903)) by [pzhlkj6612](https://github.com/pzhlkj6612)
+- **picartovod**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/db9743894071760f994f640a4c24358f749a78c0) ([#7727](https://github.com/yt-dlp/yt-dlp/issues/7727)) by [Frankgoji](https://github.com/Frankgoji)
+- **pornbox**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/40999467f72db074a3f13057da9bf82a857530fe) ([#7386](https://github.com/yt-dlp/yt-dlp/issues/7386)) by [niemands](https://github.com/niemands)
+- **pornhub**: [Update access cookies for UK](https://github.com/yt-dlp/yt-dlp/commit/1d3d579c2142f69831b6ae140e1d8e824e07fa0e) ([#7591](https://github.com/yt-dlp/yt-dlp/issues/7591)) by [zhong-yiyu](https://github.com/zhong-yiyu)
+- **pr0gramm**: [Rewrite extractor](https://github.com/yt-dlp/yt-dlp/commit/b532556d0a85e7d76f8f0880861232fb706ddbc5) ([#8151](https://github.com/yt-dlp/yt-dlp/issues/8151)) by [Grub4K](https://github.com/Grub4K)
+- **radiofrance**: [Add support for livestreams, podcasts, playlists](https://github.com/yt-dlp/yt-dlp/commit/ba8e9eb2c8bbb699f314169fab8e544437ad731e) ([#7006](https://github.com/yt-dlp/yt-dlp/issues/7006)) by [elyse0](https://github.com/elyse0)
+- **rbgtum**: [Fix extraction and support new URL format](https://github.com/yt-dlp/yt-dlp/commit/5fccabac27ca3c1165ade1b0df6fbadc24258dc2) ([#7690](https://github.com/yt-dlp/yt-dlp/issues/7690)) by [simon300000](https://github.com/simon300000)
+- **reddit**
+    - [Extract subtitles](https://github.com/yt-dlp/yt-dlp/commit/20c3c9b433dd47faf0dbde6b46e4e34eb76109a5) by [bashonly](https://github.com/bashonly)
+    - [Fix thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/9a04113dfbb69b904e4e2bea736da293505786b8) by [bashonly](https://github.com/bashonly)
+- **rtvslo**: [Fix format extraction](https://github.com/yt-dlp/yt-dlp/commit/94389b225d9bcf29aa7ba8afaf1bbd7c62204eae) ([#8131](https://github.com/yt-dlp/yt-dlp/issues/8131)) by [bashonly](https://github.com/bashonly)
+- **rule34video**: [Extract tags](https://github.com/yt-dlp/yt-dlp/commit/58493923e9b6f774947a2131e5258e9f3cf816be) ([#7117](https://github.com/yt-dlp/yt-dlp/issues/7117)) by [soundchaser128](https://github.com/soundchaser128)
+- **rumble**: [Fix embed extraction](https://github.com/yt-dlp/yt-dlp/commit/23d829a3420450bcfb0788e6fb2cf4f6acdbe596) ([#8035](https://github.com/yt-dlp/yt-dlp/issues/8035)) by [trislee](https://github.com/trislee)
+- **s4c**
+    - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/b9de629d78ce31699f2de886071dc257830f9676) ([#7730](https://github.com/yt-dlp/yt-dlp/issues/7730)) by [ifan-t](https://github.com/ifan-t)
+    - [Add series support and extract subs/thumbs](https://github.com/yt-dlp/yt-dlp/commit/fe371dcf0ba5ce8d42480eade54eeeac99ab3cb0) ([#7776](https://github.com/yt-dlp/yt-dlp/issues/7776)) by [ifan-t](https://github.com/ifan-t)
+- **sohu**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/5be7e978867b5f66ad6786c674d79d40e950ae16) ([#7628](https://github.com/yt-dlp/yt-dlp/issues/7628)) by [bashonly](https://github.com/bashonly), [c-basalt](https://github.com/c-basalt)
+- **stageplus**: [Fix m3u8 extraction](https://github.com/yt-dlp/yt-dlp/commit/56b3dc03354b75be995759d8441d2754c0442b9a) ([#7929](https://github.com/yt-dlp/yt-dlp/issues/7929)) by [bashonly](https://github.com/bashonly)
+- **streamanity**: [Remove](https://github.com/yt-dlp/yt-dlp/commit/2cfe221fbbe46faa3f46552c08d947a51f424903) ([#7571](https://github.com/yt-dlp/yt-dlp/issues/7571)) by [alerikaisattera](https://github.com/alerikaisattera)
+- **svtplay**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/2301b5c1b77a65abbb46b72f91e1e4666fd5d985) ([#7789](https://github.com/yt-dlp/yt-dlp/issues/7789)) by [dirkf](https://github.com/dirkf), [wader](https://github.com/wader)
+- **tbsjp**: [Add episode, program, playlist extractors](https://github.com/yt-dlp/yt-dlp/commit/876b70c8edf4c0147f180bd981fbc4d625cbfb9c) ([#7765](https://github.com/yt-dlp/yt-dlp/issues/7765)) by [garret1317](https://github.com/garret1317)
+- **tiktok**
+    - [Fix audio-only format extraction](https://github.com/yt-dlp/yt-dlp/commit/b09bd0c19648f60c59fb980cd454cb0069959fb9) ([#7712](https://github.com/yt-dlp/yt-dlp/issues/7712)) by [bashonly](https://github.com/bashonly)
+    - [Fix webpage extraction](https://github.com/yt-dlp/yt-dlp/commit/069cbece9dba6384f1cc5fcfc7ce562a31af42fc) by [bashonly](https://github.com/bashonly)
+- **triller**: [Fix unlisted video extraction](https://github.com/yt-dlp/yt-dlp/commit/39837ae3199aa934299badbd0d63243ed639e6c8) ([#7670](https://github.com/yt-dlp/yt-dlp/issues/7670)) by [bashonly](https://github.com/bashonly)
+- **tv5mondeplus**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/7d3d658f4c558ee7d72b1c01b46f2126948681cd) ([#7952](https://github.com/yt-dlp/yt-dlp/issues/7952)) by [dirkf](https://github.com/dirkf), [korli](https://github.com/korli)
+- **twitcasting**
+    - [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/cebbd33b1c678149fc8f0e254db6fc0da317ea80) ([#8120](https://github.com/yt-dlp/yt-dlp/issues/8120)) by [c-basalt](https://github.com/c-basalt)
+    - [Support `--wait-for-video`](https://github.com/yt-dlp/yt-dlp/commit/c1d71d0d9f41db5e4306c86af232f5f6220a130b) ([#7975](https://github.com/yt-dlp/yt-dlp/issues/7975)) by [at-wat](https://github.com/at-wat)
+- **twitter**
+    - [Add fallback, improve error handling](https://github.com/yt-dlp/yt-dlp/commit/6014355c6142f68e20c8374e3787e5b5820f19e2) ([#7621](https://github.com/yt-dlp/yt-dlp/issues/7621)) by [bashonly](https://github.com/bashonly)
+    - [Fix GraphQL and legacy API](https://github.com/yt-dlp/yt-dlp/commit/92315c03774cfabb3a921884326beb4b981f786b) ([#7516](https://github.com/yt-dlp/yt-dlp/issues/7516)) by [bashonly](https://github.com/bashonly)
+    - [Fix retweet extraction and syndication API](https://github.com/yt-dlp/yt-dlp/commit/a006ce2b27357c15792eb5c18f06765e640b801c) ([#8016](https://github.com/yt-dlp/yt-dlp/issues/8016)) by [bashonly](https://github.com/bashonly)
+    - [Revert 92315c03774cfabb3a921884326beb4b981f786b](https://github.com/yt-dlp/yt-dlp/commit/b03fa7834579a01cc5fba48c0e73488a16683d48) by [pukkandan](https://github.com/pukkandan)
+    - spaces
+        - [Fix format protocol](https://github.com/yt-dlp/yt-dlp/commit/613dbce177d34ffc31053e8e01acf4bb107bcd1e) ([#7550](https://github.com/yt-dlp/yt-dlp/issues/7550)) by [bashonly](https://github.com/bashonly)
+        - [Pass referer header to downloader](https://github.com/yt-dlp/yt-dlp/commit/c6ef553792ed48462f9fd0e78143bef6b1a71c2e) by [bashonly](https://github.com/bashonly)
+- **unsupported**: [List more sites with DRM](https://github.com/yt-dlp/yt-dlp/commit/e7057383380d7d53815f8feaf90ca3dcbde88983) by [pukkandan](https://github.com/pukkandan)
+- **videa**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/98eac0e6ba0e510ae7dfdfd249d42ee71fb272b1) ([#8003](https://github.com/yt-dlp/yt-dlp/issues/8003)) by [aky-01](https://github.com/aky-01), [hatsomatt](https://github.com/hatsomatt)
+- **vrt**: [Update token signing key](https://github.com/yt-dlp/yt-dlp/commit/325191d0c9bf3fe257b8a7c2eb95080f44f6ddfc) ([#7519](https://github.com/yt-dlp/yt-dlp/issues/7519)) by [Zprokkel](https://github.com/Zprokkel)
+- **wat.tv**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/7cccab79e7d00ed965b48b8cefce1da8a0513409) ([#7898](https://github.com/yt-dlp/yt-dlp/issues/7898)) by [davinkevin](https://github.com/davinkevin)
+- **wdr**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/5d0395498d7065aa5e55bac85fa9354b4b0d48eb) ([#7979](https://github.com/yt-dlp/yt-dlp/issues/7979)) by [szabyg](https://github.com/szabyg)
+- **web.archive**: vlive: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/9652bca1bd02f6bc1b8cb1e186f2ccbf32225561) ([#8132](https://github.com/yt-dlp/yt-dlp/issues/8132)) by [bashonly](https://github.com/bashonly)
+- **weibo**: [Fix extractor and support user extraction](https://github.com/yt-dlp/yt-dlp/commit/69b03f84f8378b0b5a2fbae56f9b7d860b2f529e) ([#7657](https://github.com/yt-dlp/yt-dlp/issues/7657)) by [c-basalt](https://github.com/c-basalt)
+- **weverse**: [Support extraction without auth](https://github.com/yt-dlp/yt-dlp/commit/c2d8ee0000302aba63476b7d5bd8793e57b6c8c6) ([#7924](https://github.com/yt-dlp/yt-dlp/issues/7924)) by [seproDev](https://github.com/seproDev)
+- **wimbledon**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/a15fcd299e767a510debd8dc1646fe863b96ce0e) ([#7551](https://github.com/yt-dlp/yt-dlp/issues/7551)) by [nnoboa](https://github.com/nnoboa)
+- **wrestleuniverseppv**: [Fix HLS AES key extraction](https://github.com/yt-dlp/yt-dlp/commit/dae349da97cafe7357106a8f3187fd48a2ad1210) by [bashonly](https://github.com/bashonly)
+- **youtube**
+    - [Add `player_params` extractor arg](https://github.com/yt-dlp/yt-dlp/commit/ba06d77a316650ff057347d224b5afa8b203ad65) ([#7719](https://github.com/yt-dlp/yt-dlp/issues/7719)) by [coletdjnz](https://github.com/coletdjnz)
+    - [Fix `player_params` arg being converted to lowercase](https://github.com/yt-dlp/yt-dlp/commit/546b2c28a106cf8101d481b215b676d1b091d276) by [coletdjnz](https://github.com/coletdjnz)
+    - [Fix consent cookie](https://github.com/yt-dlp/yt-dlp/commit/378ae9f9fb8e8c86e6ac89c4c5b815b48ce93620) ([#7774](https://github.com/yt-dlp/yt-dlp/issues/7774)) by [coletdjnz](https://github.com/coletdjnz)
+    - tab: [Detect looping feeds](https://github.com/yt-dlp/yt-dlp/commit/1ba6fe9db5f660d5538588315c23ad6cf0371c5f) ([#6621](https://github.com/yt-dlp/yt-dlp/issues/6621)) by [coletdjnz](https://github.com/coletdjnz)
+- **zaiko**: [Improve thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/ecef42c3adbcb6a84405139047923c4967316f28) ([#8054](https://github.com/yt-dlp/yt-dlp/issues/8054)) by [pzhlkj6612](https://github.com/pzhlkj6612)
+- **zee5**: [Update access token endpoint](https://github.com/yt-dlp/yt-dlp/commit/a0de8bb8601146b8f87bf7cd562eef8bfb4690be) ([#7914](https://github.com/yt-dlp/yt-dlp/issues/7914)) by [bashonly](https://github.com/bashonly)
+- **zoom**: [Extract duration](https://github.com/yt-dlp/yt-dlp/commit/66cc64ff6696f9921ff112a278542f8d999ffea4) by [bashonly](https://github.com/bashonly)
+
+#### Downloader changes
+- **external**
+    - [Fix ffmpeg input from stdin](https://github.com/yt-dlp/yt-dlp/commit/e57eb98222d29cc4c09ee975d3c492274a6e5be3) ([#7655](https://github.com/yt-dlp/yt-dlp/issues/7655)) by [bashonly](https://github.com/bashonly)
+    - [Fixes to cookie handling](https://github.com/yt-dlp/yt-dlp/commit/42ded0a429c20ec13dc006825e1508d9a02f0ad4) by [bashonly](https://github.com/bashonly)
+
+#### Postprocessor changes
+- **embedthumbnail**: [Support `m4v`](https://github.com/yt-dlp/yt-dlp/commit/8a4cd12c8f8e93292e3e95200b9d17a3af39624c) ([#7583](https://github.com/yt-dlp/yt-dlp/issues/7583)) by [Neurognostic](https://github.com/Neurognostic)
+
+#### Networking changes
+- [Add module](https://github.com/yt-dlp/yt-dlp/commit/c365dba8430ee33abda85d31f95128605bf240eb) ([#2861](https://github.com/yt-dlp/yt-dlp/issues/2861)) by [pukkandan](https://github.com/pukkandan)
+- [Add request handler preference framework](https://github.com/yt-dlp/yt-dlp/commit/db7b054a6111ca387220d0eb87bf342f9c130eb8) ([#7603](https://github.com/yt-dlp/yt-dlp/issues/7603)) by [coletdjnz](https://github.com/coletdjnz)
+- [Add strict Request extension checking](https://github.com/yt-dlp/yt-dlp/commit/86aea0d3a213da3be1da638b9b828e6f0ee1d59f) ([#7604](https://github.com/yt-dlp/yt-dlp/issues/7604)) by [coletdjnz](https://github.com/coletdjnz)
+- [Fix POST requests with zero-length payloads](https://github.com/yt-dlp/yt-dlp/commit/71baa490ebd3655746430f208a9b605d120cd315) ([#7648](https://github.com/yt-dlp/yt-dlp/issues/7648)) by [bashonly](https://github.com/bashonly)
+- [Fix `--legacy-server-connect`](https://github.com/yt-dlp/yt-dlp/commit/75dc8e673b481a82d0688aeec30f6c65d82bb359) ([#7645](https://github.com/yt-dlp/yt-dlp/issues/7645)) by [bashonly](https://github.com/bashonly)
+- [Fix various socks proxy bugs](https://github.com/yt-dlp/yt-dlp/commit/20fbbd9249a2f26c7ae579bde5ba5d69aa8fac69) ([#8065](https://github.com/yt-dlp/yt-dlp/issues/8065)) by [coletdjnz](https://github.com/coletdjnz)
+- [Ignore invalid proxies in env](https://github.com/yt-dlp/yt-dlp/commit/bbeacff7fcaa3b521066088a5ccbf34ef5070d1d) ([#7704](https://github.com/yt-dlp/yt-dlp/issues/7704)) by [coletdjnz](https://github.com/coletdjnz)
+- [Rewrite architecture](https://github.com/yt-dlp/yt-dlp/commit/227bf1a33be7b89cd7d44ad046844c4ccba104f4) ([#2861](https://github.com/yt-dlp/yt-dlp/issues/2861)) by [coletdjnz](https://github.com/coletdjnz)
+- **Request Handler**
+    - urllib
+        - [Remove dot segments during URL normalization](https://github.com/yt-dlp/yt-dlp/commit/4bf912282a34b58b6b35d8f7e6be535770c89c76) ([#7662](https://github.com/yt-dlp/yt-dlp/issues/7662)) by [coletdjnz](https://github.com/coletdjnz)
+        - [Simplify gzip decoding](https://github.com/yt-dlp/yt-dlp/commit/59e92b1f1833440bb2190f847eb735cf0f90bc85) ([#7611](https://github.com/yt-dlp/yt-dlp/issues/7611)) by [Grub4K](https://github.com/Grub4K) (With fixes in [77bff23](https://github.com/yt-dlp/yt-dlp/commit/77bff23ee97565bab2e0d75b893a21bf7983219a))
+
+#### Misc. changes
+- **build**: [Make sure deprecated modules are added](https://github.com/yt-dlp/yt-dlp/commit/131d132da5c98c6c78bd7eed4b37f4458561b3d9) by [pukkandan](https://github.com/pukkandan)
+- **cleanup**
+    - [Add color to `download-archive` message](https://github.com/yt-dlp/yt-dlp/commit/2b029ca0a9f9105c4f7626993fa60e54c9782749) ([#5138](https://github.com/yt-dlp/yt-dlp/issues/5138)) by [aaruni96](https://github.com/aaruni96), [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan)
+    - Miscellaneous
+        - [6148833](https://github.com/yt-dlp/yt-dlp/commit/6148833f5ceb7674142ddb8d761ffe03cee7df69), [62b5c94](https://github.com/yt-dlp/yt-dlp/commit/62b5c94cadaa5f596dc1a7083db9db12efe357be) by [pukkandan](https://github.com/pukkandan)
+        - [5ca095c](https://github.com/yt-dlp/yt-dlp/commit/5ca095cbcde3e32642a4fe5b2d69e8e3c785a021) by [barsnick](https://github.com/barsnick), [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [gamer191](https://github.com/gamer191), [Grub4K](https://github.com/Grub4K), [sqrtNOT](https://github.com/sqrtNOT)
+        - [088add9](https://github.com/yt-dlp/yt-dlp/commit/088add9567d39b758737e4299a0e619fd89d2e8f) by [Grub4K](https://github.com/Grub4K)
+- **devscripts**: `make_changelog`: [Fix changelog grouping and add networking group](https://github.com/yt-dlp/yt-dlp/commit/30ba233d4cee945756ed7344e7ddb3a90d2ae608) ([#8124](https://github.com/yt-dlp/yt-dlp/issues/8124)) by [Grub4K](https://github.com/Grub4K)
+- **docs**: [Update collaborators](https://github.com/yt-dlp/yt-dlp/commit/1be0a96a4d14f629097509fcc89d15f69a8243c7) by [Grub4K](https://github.com/Grub4K)
+- **test**
+    - [Add tests for socks proxies](https://github.com/yt-dlp/yt-dlp/commit/fcd6a76adc49d5cd8783985c7ce35384b72e545f) ([#7908](https://github.com/yt-dlp/yt-dlp/issues/7908)) by [coletdjnz](https://github.com/coletdjnz)
+    - [Fix `httplib_validation_errors` test for old Python versions](https://github.com/yt-dlp/yt-dlp/commit/95abea9a03289da1384e5bda3d590223ccc0a238) ([#7677](https://github.com/yt-dlp/yt-dlp/issues/7677)) by [coletdjnz](https://github.com/coletdjnz)
+    - [Fix `test_load_certifi`](https://github.com/yt-dlp/yt-dlp/commit/de20687ee6b742646128a7629b57096631a20619) by [pukkandan](https://github.com/pukkandan)
+    - download: [Test for `expected_exception`](https://github.com/yt-dlp/yt-dlp/commit/661c9a1d029296b28e0b2f8be8a72a43abaf6536) by [at-wat](https://github.com/at-wat)
+
 ### 2023.07.06
 
 #### Important changes
diff --git a/supportedsites.md b/supportedsites.md
index 379d28ef38..620e0f3058 100644
--- a/supportedsites.md
+++ b/supportedsites.md
@@ -77,7 +77,7 @@ # Supported sites
  - **AnimalPlanet**
  - **ant1newsgr:article**: ant1news.gr articles
  - **ant1newsgr:embed**: ant1news.gr embedded videos
- - **ant1newsgr:watch**: ant1news.gr videos
+ - **antenna:watch**: antenna.gr and ant1news.gr videos
  - **Anvato**
  - **aol.com**: Yahoo screen and movies
  - **APA**
@@ -98,8 +98,6 @@ # Supported sites
  - **ArteTVCategory**
  - **ArteTVEmbed**
  - **ArteTVPlaylist**
- - **AsianCrush**
- - **AsianCrushPlaylist**
  - **AtresPlayer**: [*atresplayer*](## "netrc machine")
  - **AtScaleConfEvent**
  - **ATTTechChannel**
@@ -118,6 +116,7 @@ # Supported sites
  - **awaan:live**
  - **awaan:season**
  - **awaan:video**
+ - **axs.tv**
  - **AZMedien**: AZ Medien videos
  - **BaiduVideo**: 百度视频
  - **BanBye**
@@ -162,11 +161,16 @@ # Supported sites
  - **BilibiliAudioAlbum**
  - **BiliBiliBangumi**
  - **BiliBiliBangumiMedia**
+ - **BiliBiliBangumiSeason**
+ - **BilibiliCollectionList**
+ - **BilibiliFavoritesList**
  - **BiliBiliPlayer**
+ - **BilibiliPlaylist**
  - **BiliBiliSearch**: Bilibili video search; "bilisearch:" prefix
+ - **BilibiliSeriesList**
  - **BilibiliSpaceAudio**
- - **BilibiliSpacePlaylist**
  - **BilibiliSpaceVideo**
+ - **BilibiliWatchlater**
  - **BiliIntl**: [*biliintl*](## "netrc machine")
  - **biliIntl:series**: [*biliintl*](## "netrc machine")
  - **BiliLive**
@@ -201,6 +205,8 @@ # Supported sites
  - **BreitBart**
  - **brightcove:legacy**
  - **brightcove:new**
+ - **Brilliantpala:Classes**: [*brilliantpala*](## "netrc machine") VoD on classes.brilliantpala.org
+ - **Brilliantpala:Elearn**: [*brilliantpala*](## "netrc machine") VoD on elearn.brilliantpala.org
  - **BRMediathek**: Bayerischer Rundfunk Mediathek
  - **bt:article**: Bergens Tidende Articles
  - **bt:vestlendingen**: Bergens Tidende - Vestlendingen
@@ -220,14 +226,17 @@ # Supported sites
  - **Camsoda**
  - **CamtasiaEmbed**
  - **CamWithHer**
+ - **Canal1**
  - **CanalAlpha**
  - **canalc2.tv**
  - **Canalplus**: mycanal.fr and piwiplus.fr
+ - **CaracolTvPlay**: [*caracoltv-play*](## "netrc machine")
  - **CarambaTV**
  - **CarambaTVPage**
  - **CartoonNetwork**
  - **cbc.ca**
  - **cbc.ca:player**
+ - **cbc.ca:​player:playlist**
  - **CBS**
  - **CBSInteractive**
  - **CBSLocal**
@@ -257,6 +266,8 @@ # Supported sites
  - **Cinchcast**
  - **Cinemax**
  - **CinetecaMilano**
+ - **Cineverse**
+ - **CineverseDetails**
  - **CiscoLiveSearch**
  - **CiscoLiveSession**
  - **ciscowebex**: Cisco Webex
@@ -365,7 +376,7 @@ # Supported sites
  - **Dotsub**
  - **Douyin**
  - **DouyuShow**
- - **DouyuTV**: 斗鱼
+ - **DouyuTV**: 斗鱼直播
  - **DPlay**
  - **DRBonanza**
  - **Drooble**
@@ -408,6 +419,7 @@ # Supported sites
  - **Engadget**
  - **Epicon**
  - **EpiconSeries**
+ - **eplus:inbound**: e+ (イープラス) overseas
  - **Epoch**
  - **Eporner**
  - **EroProfile**: [*eroprofile*](## "netrc machine")
@@ -732,6 +744,7 @@ # Supported sites
  - **lynda**: [*lynda*](## "netrc machine") lynda.com videos
  - **lynda:course**: [*lynda*](## "netrc machine") lynda.com online courses
  - **m6**
+ - **MagellanTV**
  - **MagentaMusik360**
  - **mailru**: Видео@Mail.Ru
  - **mailru:music**: Музыка@Mail.Ru
@@ -812,6 +825,7 @@ # Supported sites
  - **Mofosex**
  - **MofosexEmbed**
  - **Mojvideo**
+ - **Monstercat**
  - **MonsterSirenHypergryphMusic**
  - **Morningstar**: morningstar.com
  - **Motherless**
@@ -840,6 +854,7 @@ # Supported sites
  - **MujRozhlas**
  - **Murrtube**
  - **MurrtubeUser**: Murrtube user profile
+ - **MuseAI**
  - **MuseScore**
  - **MusicdexAlbum**
  - **MusicdexArtist**
@@ -944,6 +959,9 @@ # Supported sites
  - **niconico:playlist**
  - **niconico:series**
  - **niconico:tag**: NicoNico video tag URLs
+ - **NiconicoChannelPlus**: ニコニコチャンネルプラス
+ - **NiconicoChannelPlus:​channel:lives**: ニコニコチャンネルプラス - チャンネル - ライブリスト. nicochannel.jp/channel/lives
+ - **NiconicoChannelPlus:​channel:videos**: ニコニコチャンネルプラス - チャンネル - 動画リスト. nicochannel.jp/channel/videos
  - **NiconicoUser**
  - **nicovideo:search**: Nico video search; "nicosearch:" prefix
  - **nicovideo:​search:date**: Nico video search, newest first; "nicosearchdate:" prefix
@@ -1046,6 +1064,7 @@ # Supported sites
  - **Patreon**
  - **PatreonCampaign**
  - **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET  (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC)
+ - **PBSKids**
  - **PearVideo**
  - **PeekVids**
  - **peer.tv**
@@ -1062,6 +1081,7 @@ # Supported sites
  - **phoenix.de**
  - **Photobucket**
  - **Piapro**: [*piapro*](## "netrc machine")
+ - **PIAULIZAPortal**: ulizaportal.jp - PIA LIVE STREAM
  - **Picarto**
  - **PicartoVod**
  - **Piksel**
@@ -1105,6 +1125,7 @@ # Supported sites
  - **polskieradio:​podcast:list**
  - **Popcorntimes**
  - **PopcornTV**
+ - **Pornbox**
  - **PornCom**
  - **PornerBros**
  - **Pornez**
@@ -1121,7 +1142,6 @@ # Supported sites
  - **PornTop**
  - **PornTube**
  - **Pr0gramm**
- - **Pr0grammStatic**
  - **PrankCast**
  - **PremiershipRugby**
  - **PressTV**
@@ -1156,6 +1176,10 @@ # Supported sites
  - **radiocanada**
  - **radiocanada:audiovideo**
  - **radiofrance**
+ - **RadioFranceLive**
+ - **RadioFrancePodcast**
+ - **RadioFranceProfile**
+ - **RadioFranceProgramSchedule**
  - **RadioJavan**
  - **radiokapital**
  - **radiokapital:show**
@@ -1177,6 +1201,7 @@ # Supported sites
  - **RayWenderlichCourse**
  - **RbgTum**
  - **RbgTumCourse**
+ - **RbgTumNewCourse**
  - **RBMARadio**
  - **RCS**
  - **RCSEmbeds**
@@ -1259,6 +1284,8 @@ # Supported sites
  - **Ruutu**
  - **Ruv**
  - **ruv.is:spila**
+ - **S4C**
+ - **S4CSeries**
  - **safari**: [*safari*](## "netrc machine") safaribooksonline.com online video
  - **safari:api**: [*safari*](## "netrc machine")
  - **safari:course**: [*safari*](## "netrc machine") safaribooksonline.com online courses
@@ -1325,6 +1352,7 @@ # Supported sites
  - **Smotrim**
  - **Snotr**
  - **Sohu**
+ - **SohuV**
  - **SonyLIV**: [*sonyliv*](## "netrc machine")
  - **SonyLIVSeries**
  - **soundcloud**: [*soundcloud*](## "netrc machine")
@@ -1378,7 +1406,6 @@ # Supported sites
  - **StoryFireSeries**
  - **StoryFireUser**
  - **Streamable**
- - **Streamanity**
  - **streamcloud.eu**
  - **StreamCZ**
  - **StreamFF**
@@ -1403,6 +1430,9 @@ # Supported sites
  - **Tagesschau**
  - **Tass**
  - **TBS**
+ - **TBSJPEpisode**
+ - **TBSJPPlaylist**
+ - **TBSJPProgram**
  - **TDSLifeway**
  - **Teachable**: [*teachable*](## "netrc machine")
  - **TeachableCourse**: [*teachable*](## "netrc machine")
@@ -1702,7 +1732,6 @@ # Supported sites
  - **wdr:mobile**: (**Currently broken**)
  - **WDRElefant**
  - **WDRPage**
- - **web.archive:vlive**: web.archive.org saved vlive videos
  - **web.archive:youtube**: web.archive.org saved youtube videos, "ytarchive:" prefix
  - **Webcamerapl**
  - **Webcaster**
@@ -1710,7 +1739,8 @@ # Supported sites
  - **WebOfStories**
  - **WebOfStoriesPlaylist**
  - **Weibo**
- - **WeiboMobile**
+ - **WeiboUser**
+ - **WeiboVideo**
  - **WeiqiTV**: WQTV
  - **wetv:episode**
  - **WeTvSeries**
@@ -1726,6 +1756,7 @@ # Supported sites
  - **Whyp**
  - **wikimedia.org**
  - **Willow**
+ - **Wimbledon**
  - **WimTV**
  - **WinSportsVideo**
  - **Wistia**
diff --git a/yt_dlp/version.py b/yt_dlp/version.py
index 67cfe44efd..2a7c84b93f 100644
--- a/yt_dlp/version.py
+++ b/yt_dlp/version.py
@@ -1,8 +1,8 @@
 # Autogenerated by devscripts/update-version.py
 
-__version__ = '2023.07.06'
+__version__ = '2023.09.24'
 
-RELEASE_GIT_HEAD = 'b532a3481046e1eabb6232ee8196fb696c356ff6'
+RELEASE_GIT_HEAD = '088add9567d39b758737e4299a0e619fd89d2e8f'
 
 VARIANT = None
 

From eb5bdbfa70126c7d5355cc0954b63720522e462c Mon Sep 17 00:00:00 2001
From: coletdjnz <coletdjnz@protonmail.com>
Date: Tue, 3 Oct 2023 19:42:30 +1300
Subject: [PATCH 197/218] [ie/youtube] Raise a warning for `Incomplete Data`
 instead of an error (#8238)

Closes https://github.com/yt-dlp/yt-dlp/issues/8206

Adds `raise_incomplete_data` extractor arg to revert this behaviour and raise an error.

Authored by: coletdjnz
Co-authored-by: Simon Sawicki <contact@grub4k.xyz>
---
 README.md                   |  1 +
 yt_dlp/extractor/youtube.py | 26 +++++++++++++++++++-------
 2 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 7bf4465721..a0b69c9a1a 100644
--- a/README.md
+++ b/README.md
@@ -1809,6 +1809,7 @@ #### youtube
 * `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8)
 * `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others
 * `innertube_key`: Innertube API key to use for all API requests
+* `raise_incomplete_data`: `Incomplete Data Received` raises an error instead of reporting a warning
 
 #### youtubetab (YouTube playlists, channels, feeds, etc.)
 * `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details)
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index a39d17cf11..7e13aa7797 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -941,7 +941,13 @@ def _parse_time_text(self, text):
     def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
                           ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
                           default_client='web'):
-        for retry in self.RetryManager():
+        raise_for_incomplete = bool(self._configuration_arg('raise_incomplete_data', ie_key=YoutubeIE))
+        # Incomplete Data should be a warning by default when retries are exhausted, while other errors should be fatal.
+        icd_retries = iter(self.RetryManager(fatal=raise_for_incomplete))
+        icd_rm = next(icd_retries)
+        main_retries = iter(self.RetryManager())
+        main_rm = next(main_retries)
+        for _ in range(main_rm.retries + icd_rm.retries + 1):
             try:
                 response = self._call_api(
                     ep=ep, fatal=True, headers=headers,
@@ -953,7 +959,8 @@ def _extract_response(self, item_id, query, note='Downloading API JSON', headers
                 if not isinstance(e.cause, network_exceptions):
                     return self._error_or_warning(e, fatal=fatal)
                 elif not isinstance(e.cause, HTTPError):
-                    retry.error = e
+                    main_rm.error = e
+                    next(main_retries)
                     continue
 
                 first_bytes = e.cause.response.read(512)
@@ -965,27 +972,32 @@ def _extract_response(self, item_id, query, note='Downloading API JSON', headers
                     if yt_error:
                         self._report_alerts([('ERROR', yt_error)], fatal=False)
                 # Downloading page may result in intermittent 5xx HTTP error
-                # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
+                # Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289
                 # We also want to catch all other network exceptions since errors in later pages can be troublesome
                 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
                 if e.cause.status not in (403, 429):
-                    retry.error = e
+                    main_rm.error = e
+                    next(main_retries)
                     continue
                 return self._error_or_warning(e, fatal=fatal)
 
             try:
                 self._extract_and_report_alerts(response, only_once=True)
             except ExtractorError as e:
-                # YouTube servers may return errors we want to retry on in a 200 OK response
+                # YouTube's servers may return errors we want to retry on in a 200 OK response
                 # See: https://github.com/yt-dlp/yt-dlp/issues/839
                 if 'unknown error' in e.msg.lower():
-                    retry.error = e
+                    main_rm.error = e
+                    next(main_retries)
                     continue
                 return self._error_or_warning(e, fatal=fatal)
             # Youtube sometimes sends incomplete data
             # See: https://github.com/ytdl-org/youtube-dl/issues/28194
             if not traverse_obj(response, *variadic(check_get_keys)):
-                retry.error = ExtractorError('Incomplete data received', expected=True)
+                icd_rm.error = ExtractorError('Incomplete data received', expected=True)
+                should_retry = next(icd_retries, None)
+                if not should_retry:
+                    return None
                 continue
 
             return response

From cc8d8441524ec3442d7c0d3f8f33f15b66aa06f3 Mon Sep 17 00:00:00 2001
From: Simon Sawicki <contact@grub4k.xyz>
Date: Tue, 3 Oct 2023 11:33:40 +0200
Subject: [PATCH 198/218] [ie/xhamster:user] Support creator urls (#8232)

Authored by: Grub4K
---
 yt_dlp/extractor/xhamster.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/yt_dlp/extractor/xhamster.py b/yt_dlp/extractor/xhamster.py
index 37224799bf..aec1f20bb8 100644
--- a/yt_dlp/extractor/xhamster.py
+++ b/yt_dlp/extractor/xhamster.py
@@ -407,7 +407,7 @@ def _real_extract(self, url):
 
 
 class XHamsterUserIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:.+?\.)?%s/users/(?P<id>[^/?#&]+)' % XHamsterIE._DOMAINS
+    _VALID_URL = rf'https?://(?:[^/?#]+\.)?{XHamsterIE._DOMAINS}/(?:(?P<user>users)|creators)/(?P<id>[^/?#&]+)'
     _TESTS = [{
         # Paginated user profile
         'url': 'https://xhamster.com/users/netvideogirls/videos',
@@ -422,6 +422,12 @@ class XHamsterUserIE(InfoExtractor):
             'id': 'firatkaan',
         },
         'playlist_mincount': 1,
+    }, {
+        'url': 'https://xhamster.com/creators/squirt-orgasm-69',
+        'info_dict': {
+            'id': 'squirt-orgasm-69',
+        },
+        'playlist_mincount': 150,
     }, {
         'url': 'https://xhday.com/users/mobhunter',
         'only_matching': True,
@@ -430,8 +436,9 @@ class XHamsterUserIE(InfoExtractor):
         'only_matching': True,
     }]
 
-    def _entries(self, user_id):
-        next_page_url = 'https://xhamster.com/users/%s/videos/1' % user_id
+    def _entries(self, user_id, is_user):
+        prefix, suffix = ('users', 'videos') if is_user else ('creators', 'exclusive')
+        next_page_url = f'https://xhamster.com/{prefix}/{user_id}/{suffix}/1'
         for pagenum in itertools.count(1):
             page = self._download_webpage(
                 next_page_url, user_id, 'Downloading page %s' % pagenum)
@@ -454,5 +461,5 @@ def _entries(self, user_id):
                 break
 
     def _real_extract(self, url):
-        user_id = self._match_id(url)
-        return self.playlist_result(self._entries(user_id), user_id)
+        user, user_id = self._match_valid_url(url).group('user', 'id')
+        return self.playlist_result(self._entries(user_id, bool(user)), user_id)

From 0730d5a966fa8a937d84bfb7f68be5198acb039b Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@bashonly.com>
Date: Wed, 4 Oct 2023 12:44:13 -0500
Subject: [PATCH 199/218] [ie/gofile] Fix token cookie bug

Authored by: bashonly
---
 yt_dlp/extractor/gofile.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/gofile.py b/yt_dlp/extractor/gofile.py
index 8983905839..ef14b57d08 100644
--- a/yt_dlp/extractor/gofile.py
+++ b/yt_dlp/extractor/gofile.py
@@ -60,7 +60,7 @@ def _real_initialize(self):
         account_data = self._download_json(
             'https://api.gofile.io/createAccount', None, note='Getting a new guest account')
         self._TOKEN = account_data['data']['token']
-        self._set_cookie('gofile.io', 'accountToken', self._TOKEN)
+        self._set_cookie('.gofile.io', 'accountToken', self._TOKEN)
 
     def _entries(self, file_id):
         query_params = {

From b095fd3fa9d58a65dc9b830bd63b9d909422aa86 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Wed, 4 Oct 2023 13:01:52 -0500
Subject: [PATCH 200/218] [ie/WrestleUniverseVOD] Call API with device ID
 (#8272)

Closes #8271
Authored by: bashonly
---
 yt_dlp/extractor/wrestleuniverse.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/yt_dlp/extractor/wrestleuniverse.py b/yt_dlp/extractor/wrestleuniverse.py
index dd12804db3..145246a148 100644
--- a/yt_dlp/extractor/wrestleuniverse.py
+++ b/yt_dlp/extractor/wrestleuniverse.py
@@ -190,10 +190,7 @@ class WrestleUniverseVODIE(WrestleUniverseBaseIE):
     def _real_extract(self, url):
         lang, video_id = self._match_valid_url(url).group('lang', 'id')
         metadata = self._download_metadata(url, video_id, lang, 'videoEpisodeFallbackData')
-        video_data = self._call_api(video_id, ':watch', 'watch', data={
-            # 'deviceId' is required if ignoreDeviceRestriction is False
-            'ignoreDeviceRestriction': True,
-        })
+        video_data = self._call_api(video_id, ':watch', 'watch', data={'deviceId': self._DEVICE_ID})
 
         return {
             'id': video_id,

From 91a670a4f7babe9c8aa2018f57d8c8952a6f49d8 Mon Sep 17 00:00:00 2001
From: gillux <jiru@users.noreply.github.com>
Date: Sat, 7 Oct 2023 06:27:54 +0800
Subject: [PATCH 201/218] [ie/LiTV] Fix extractor (#7785)

Closes #5456
Authored by: jiru
---
 yt_dlp/extractor/litv.py | 48 ++++++++++++++++++++++------------------
 1 file changed, 27 insertions(+), 21 deletions(-)

diff --git a/yt_dlp/extractor/litv.py b/yt_dlp/extractor/litv.py
index 19b298ec6c..2c7c7175ea 100644
--- a/yt_dlp/extractor/litv.py
+++ b/yt_dlp/extractor/litv.py
@@ -13,7 +13,7 @@
 class LiTVIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?litv\.tv/(?:vod|promo)/[^/]+/(?:content\.do)?\?.*?\b(?:content_)?id=(?P<id>[^&]+)'
 
-    _URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?id=%s'
+    _URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?content_id=%s'
 
     _TESTS = [{
         'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
@@ -21,16 +21,18 @@ class LiTVIE(InfoExtractor):
             'id': 'VOD00041606',
             'title': '花千骨',
         },
-        'playlist_count': 50,
+        'playlist_count': 51,  # 50 episodes + 1 trailer
     }, {
         'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
-        'md5': '969e343d9244778cb29acec608e53640',
+        'md5': 'b90ff1e9f1d8f5cfcd0a44c3e2b34c7a',
         'info_dict': {
             'id': 'VOD00041610',
             'ext': 'mp4',
             'title': '花千骨第1集',
             'thumbnail': r're:https?://.*\.jpg$',
-            'description': 'md5:c7017aa144c87467c4fb2909c4b05d6f',
+            'description': '《花千骨》陸劇線上看。十六年前，平靜的村莊內，一名女嬰隨異相出生，途徑此地的蜀山掌門清虛道長算出此女命運非同一般，她體內散發的異香易招惹妖魔。一念慈悲下，他在村莊周邊設下結界阻擋妖魔入侵，讓其年滿十六後去蜀山，並賜名花千骨。',
+            'categories': ['奇幻', '愛情', '中國', '仙俠'],
+            'episode': 'Episode 1',
             'episode_number': 1,
         },
         'params': {
@@ -46,20 +48,17 @@ class LiTVIE(InfoExtractor):
             'title': '芈月傳第1集　霸星芈月降世楚國',
             'description': '楚威王二年，太史令唐昧夜觀星象，發現霸星即將現世。王后得知霸星的預言後，想盡辦法不讓孩子順利出生，幸得莒姬相護化解危機。沒想到眾人期待下出生的霸星卻是位公主，楚威王對此失望至極。楚王后命人將女嬰丟棄河中，居然奇蹟似的被少司命像攔下，楚威王認為此女非同凡響，為她取名芈月。',
         },
-        'skip': 'Georestricted to Taiwan',
+        'skip': 'No longer exists',
     }]
 
-    def _extract_playlist(self, season_list, video_id, program_info, prompt=True):
-        episode_title = program_info['title']
-        content_id = season_list['contentId']
-
+    def _extract_playlist(self, playlist_data, content_type):
         all_episodes = [
             self.url_result(smuggle_url(
-                self._URL_TEMPLATE % (program_info['contentType'], episode['contentId']),
+                self._URL_TEMPLATE % (content_type, episode['contentId']),
                 {'force_noplaylist': True}))  # To prevent infinite recursion
-            for episode in season_list['episode']]
+            for episode in traverse_obj(playlist_data, ('seasons', ..., 'episode', lambda _, v: v['contentId']))]
 
-        return self.playlist_result(all_episodes, content_id, episode_title)
+        return self.playlist_result(all_episodes, playlist_data['contentId'], playlist_data.get('title'))
 
     def _real_extract(self, url):
         url, smuggled_data = unsmuggle_url(url, {})
@@ -68,24 +67,31 @@ def _real_extract(self, url):
 
         webpage = self._download_webpage(url, video_id)
 
+        if self._search_regex(
+                r'(?i)<meta\s[^>]*http-equiv="refresh"\s[^>]*content="[0-9]+;\s*url=https://www\.litv\.tv/"',
+                webpage, 'meta refresh redirect', default=False, group=0):
+            raise ExtractorError('No such content found', expected=True)
+
         program_info = self._parse_json(self._search_regex(
             r'var\s+programInfo\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'),
             video_id)
 
-        season_list = list(program_info.get('seasonList', {}).values())
-        playlist_id = traverse_obj(season_list, 0, 'contentId')
-        if self._yes_playlist(playlist_id, video_id, smuggled_data):
-            return self._extract_playlist(season_list[0], video_id, program_info)
-
-        # In browsers `getMainUrl` request is always issued. Usually this
+        # In browsers `getProgramInfo` request is always issued. Usually this
         # endpoint gives the same result as the data embedded in the webpage.
-        # If georestricted, there are no embedded data, so an extra request is
-        # necessary to get the error code
+        # If, for some reason, there are no embedded data, we do an extra request.
         if 'assetId' not in program_info:
             program_info = self._download_json(
                 'https://www.litv.tv/vod/ajax/getProgramInfo', video_id,
                 query={'contentId': video_id},
                 headers={'Accept': 'application/json'})
+
+        series_id = program_info['seriesId']
+        if self._yes_playlist(series_id, video_id, smuggled_data):
+            playlist_data = self._download_json(
+                'https://www.litv.tv/vod/ajax/getSeriesTree', video_id,
+                query={'seriesId': series_id}, headers={'Accept': 'application/json'})
+            return self._extract_playlist(playlist_data, program_info['contentType'])
+
         video_data = self._parse_json(self._search_regex(
             r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);',
             webpage, 'video data', default='{}'), video_id)
@@ -96,7 +102,7 @@ def _real_extract(self, url):
                 'contentType': program_info['contentType'],
             }
             video_data = self._download_json(
-                'https://www.litv.tv/vod/getMainUrl', video_id,
+                'https://www.litv.tv/vod/ajax/getMainUrlNoAuth', video_id,
                 data=json.dumps(payload).encode('utf-8'),
                 headers={'Content-Type': 'application/json'})
 

From f980df734cf5c0eaded2f7b38c6c60bccfeebb48 Mon Sep 17 00:00:00 2001
From: c-basalt <117849907+c-basalt@users.noreply.github.com>
Date: Fri, 6 Oct 2023 18:31:33 -0400
Subject: [PATCH 202/218] [ie/neteasemusic] Fix extractors (#8181)

Closes #4388
Authored by: c-basalt
---
 yt_dlp/extractor/neteasemusic.py | 575 +++++++++++++++++--------------
 1 file changed, 312 insertions(+), 263 deletions(-)

diff --git a/yt_dlp/extractor/neteasemusic.py b/yt_dlp/extractor/neteasemusic.py
index 5b7307bc8f..68bfcb6ba7 100644
--- a/yt_dlp/extractor/neteasemusic.py
+++ b/yt_dlp/extractor/neteasemusic.py
@@ -2,105 +2,74 @@
 import json
 import re
 import time
-from base64 import b64encode
-from binascii import hexlify
-from datetime import datetime
 from hashlib import md5
 from random import randint
 
 from .common import InfoExtractor
 from ..aes import aes_ecb_encrypt, pkcs7_padding
-from ..compat import compat_urllib_parse_urlencode
-from ..networking import Request
 from ..utils import (
     ExtractorError,
-    bytes_to_intlist,
-    error_to_compat_str,
-    float_or_none,
     int_or_none,
-    intlist_to_bytes,
-    try_get,
+    join_nonempty,
+    str_or_none,
+    strftime_or_none,
+    traverse_obj,
+    unified_strdate,
+    url_or_none,
+    urljoin,
+    variadic,
 )
 
 
 class NetEaseMusicBaseIE(InfoExtractor):
     _FORMATS = ['bMusic', 'mMusic', 'hMusic']
-    _NETEASE_SALT = '3go8&$8*3*3h0k(2)2'
     _API_BASE = 'http://music.163.com/api/'
+    _GEO_BYPASS = False
 
-    @classmethod
-    def _encrypt(cls, dfsid):
-        salt_bytes = bytearray(cls._NETEASE_SALT.encode('utf-8'))
-        string_bytes = bytearray(str(dfsid).encode('ascii'))
-        salt_len = len(salt_bytes)
-        for i in range(len(string_bytes)):
-            string_bytes[i] = string_bytes[i] ^ salt_bytes[i % salt_len]
-        m = md5()
-        m.update(bytes(string_bytes))
-        result = b64encode(m.digest()).decode('ascii')
-        return result.replace('/', '_').replace('+', '-')
+    @staticmethod
+    def kilo_or_none(value):
+        return int_or_none(value, scale=1000)
 
-    def make_player_api_request_data_and_headers(self, song_id, bitrate):
-        KEY = b'e82ckenh8dichen8'
-        URL = '/api/song/enhance/player/url'
-        now = int(time.time() * 1000)
-        rand = randint(0, 1000)
-        cookie = {
-            'osver': None,
-            'deviceId': None,
+    def _create_eapi_cipher(self, api_path, query_body, cookies):
+        request_text = json.dumps({**query_body, 'header': cookies}, separators=(',', ':'))
+
+        message = f'nobody{api_path}use{request_text}md5forencrypt'.encode('latin1')
+        msg_digest = md5(message).hexdigest()
+
+        data = pkcs7_padding(list(str.encode(
+            f'{api_path}-36cd479b6b5-{request_text}-36cd479b6b5-{msg_digest}')))
+        encrypted = bytes(aes_ecb_encrypt(data, list(b'e82ckenh8dichen8')))
+        return f'params={encrypted.hex().upper()}'.encode()
+
+    def _download_eapi_json(self, path, video_id, query_body, headers={}, **kwargs):
+        cookies = {
+            'osver': 'undefined',
+            'deviceId': 'undefined',
             'appver': '8.0.0',
             'versioncode': '140',
-            'mobilename': None,
+            'mobilename': 'undefined',
             'buildver': '1623435496',
             'resolution': '1920x1080',
             '__csrf': '',
             'os': 'pc',
-            'channel': None,
-            'requestId': '{0}_{1:04}'.format(now, rand),
+            'channel': 'undefined',
+            'requestId': f'{int(time.time() * 1000)}_{randint(0, 1000):04}',
+            **traverse_obj(self._get_cookies(self._API_BASE), {
+                'MUSIC_U': ('MUSIC_U', {lambda i: i.value}),
+            })
         }
-        request_text = json.dumps(
-            {'ids': '[{0}]'.format(song_id), 'br': bitrate, 'header': cookie},
-            separators=(',', ':'))
-        message = 'nobody{0}use{1}md5forencrypt'.format(
-            URL, request_text).encode('latin1')
-        msg_digest = md5(message).hexdigest()
-
-        data = '{0}-36cd479b6b5-{1}-36cd479b6b5-{2}'.format(
-            URL, request_text, msg_digest)
-        data = pkcs7_padding(bytes_to_intlist(data))
-        encrypted = intlist_to_bytes(aes_ecb_encrypt(data, bytes_to_intlist(KEY)))
-        encrypted_params = hexlify(encrypted).decode('ascii').upper()
-
-        cookie = '; '.join(
-            ['{0}={1}'.format(k, v if v is not None else 'undefined')
-             for [k, v] in cookie.items()])
-
-        headers = {
-            'User-Agent': self.extractor.get_param('http_headers')['User-Agent'],
-            'Content-Type': 'application/x-www-form-urlencoded',
-            'Referer': 'https://music.163.com',
-            'Cookie': cookie,
-        }
-        return ('params={0}'.format(encrypted_params), headers)
+        return self._download_json(
+            urljoin('https://interface3.music.163.com/', f'/eapi{path}'), video_id,
+            data=self._create_eapi_cipher(f'/api{path}', query_body, cookies), headers={
+                'Referer': 'https://music.163.com',
+                'Cookie': '; '.join([f'{k}={v}' for k, v in cookies.items()]),
+                **headers,
+            }, **kwargs)
 
     def _call_player_api(self, song_id, bitrate):
-        url = 'https://interface3.music.163.com/eapi/song/enhance/player/url'
-        data, headers = self.make_player_api_request_data_and_headers(song_id, bitrate)
-        try:
-            msg = 'empty result'
-            result = self._download_json(
-                url, song_id, data=data.encode('ascii'), headers=headers)
-            if result:
-                return result
-        except ExtractorError as e:
-            if type(e.cause) in (ValueError, TypeError):
-                # JSON load failure
-                raise
-        except Exception as e:
-            msg = error_to_compat_str(e)
-            self.report_warning('%s API call (%s) failed: %s' % (
-                song_id, bitrate, msg))
-        return {}
+        return self._download_eapi_json(
+            '/song/enhance/player/url', song_id, {'ids': f'[{song_id}]', 'br': bitrate},
+            note=f'Downloading song URL info: bitrate {bitrate}')
 
     def extract_formats(self, info):
         err = 0
@@ -110,45 +79,50 @@ def extract_formats(self, info):
             details = info.get(song_format)
             if not details:
                 continue
-
             bitrate = int_or_none(details.get('bitrate')) or 999000
-            data = self._call_player_api(song_id, bitrate)
-            for song in try_get(data, lambda x: x['data'], list) or []:
-                song_url = try_get(song, lambda x: x['url'])
-                if not song_url:
-                    continue
+            for song in traverse_obj(self._call_player_api(song_id, bitrate), ('data', lambda _, v: url_or_none(v['url']))):
+                song_url = song['url']
                 if self._is_valid_url(song_url, info['id'], 'song'):
                     formats.append({
                         'url': song_url,
-                        'ext': details.get('extension'),
-                        'abr': float_or_none(song.get('br'), scale=1000),
                         'format_id': song_format,
-                        'filesize': int_or_none(song.get('size')),
-                        'asr': int_or_none(details.get('sr')),
+                        'asr': traverse_obj(details, ('sr', {int_or_none})),
+                        **traverse_obj(song, {
+                            'ext': ('type', {str}),
+                            'abr': ('br', {self.kilo_or_none}),
+                            'filesize': ('size', {int_or_none}),
+                        }),
                     })
                 elif err == 0:
-                    err = try_get(song, lambda x: x['code'], int)
+                    err = traverse_obj(song, ('code', {int})) or 0
 
         if not formats:
-            msg = 'No media links found'
             if err != 0 and (err < 200 or err >= 400):
-                raise ExtractorError(
-                    '%s (site code %d)' % (msg, err, ), expected=True)
+                raise ExtractorError(f'No media links found (site code {err})', expected=True)
             else:
                 self.raise_geo_restricted(
-                    msg + ': probably this video is not available from your location due to geo restriction.',
-                    countries=['CN'])
-
+                    'No media links found: probably due to geo restriction.', countries=['CN'])
         return formats
 
-    @classmethod
-    def convert_milliseconds(cls, ms):
-        return int(round(ms / 1000.0))
-
     def query_api(self, endpoint, video_id, note):
-        req = Request('%s%s' % (self._API_BASE, endpoint))
-        req.headers['Referer'] = self._API_BASE
-        return self._download_json(req, video_id, note)
+        result = self._download_json(
+            f'{self._API_BASE}{endpoint}', video_id, note, headers={'Referer': self._API_BASE})
+        code = traverse_obj(result, ('code', {int}))
+        message = traverse_obj(result, ('message', {str})) or ''
+        if code == -462:
+            self.raise_login_required(f'Login required to download: {message}')
+        elif code != 200:
+            raise ExtractorError(f'Failed to get meta info: {code} {message}')
+        return result
+
+    def _get_entries(self, songs_data, entry_keys=None, id_key='id', name_key='name'):
+        for song in traverse_obj(songs_data, (
+                *variadic(entry_keys, (str, bytes, dict, set)),
+                lambda _, v: int_or_none(v[id_key]) is not None)):
+            song_id = str(song[id_key])
+            yield self.url_result(
+                f'http://music.163.com/#/song?id={song_id}', NetEaseMusicIE,
+                song_id, traverse_obj(song, (name_key, {str})))
 
 
 class NetEaseMusicIE(NetEaseMusicBaseIE):
@@ -156,16 +130,18 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
     IE_DESC = '网易云音乐'
     _VALID_URL = r'https?://(y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)'
     _TESTS = [{
-        'url': 'http://music.163.com/#/song?id=32102397',
-        'md5': '3e909614ce09b1ccef4a3eb205441190',
+        'url': 'https://music.163.com/#/song?id=548648087',
         'info_dict': {
-            'id': '32102397',
+            'id': '548648087',
             'ext': 'mp3',
-            'title': 'Bad Blood',
-            'creator': 'Taylor Swift / Kendrick Lamar',
-            'upload_date': '20150516',
-            'timestamp': 1431792000,
-            'description': 'md5:25fc5f27e47aad975aa6d36382c7833c',
+            'title': '戒烟 (Live)',
+            'creator': '李荣浩 / 朱正廷 / 陈立农 / 尤长靖 / ONER灵超 / ONER木子洋 / 杨非同 / 陆定昊',
+            'timestamp': 1522944000,
+            'upload_date': '20180405',
+            'description': 'md5:3650af9ee22c87e8637cb2dde22a765c',
+            'subtitles': {'lyrics': [{'ext': 'lrc'}]},
+            "duration": 256,
+            'thumbnail': r're:^http.*\.jpg',
         },
     }, {
         'note': 'No lyrics.',
@@ -176,21 +152,9 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
             'title': 'Opus 28',
             'creator': 'Dustin O\'Halloran',
             'upload_date': '20080211',
-            'description': 'md5:f12945b0f6e0365e3b73c5032e1b0ff4',
             'timestamp': 1202745600,
-        },
-    }, {
-        'note': 'Has translated name.',
-        'url': 'http://music.163.com/#/song?id=22735043',
-        'info_dict': {
-            'id': '22735043',
-            'ext': 'mp3',
-            'title': '소원을 말해봐 (Genie)',
-            'creator': '少女时代',
-            'description': 'md5:79d99cc560e4ca97e0c4d86800ee4184',
-            'upload_date': '20100127',
-            'timestamp': 1264608000,
-            'alt_title': '说出愿望吧(Genie)',
+            'duration': 263,
+            'thumbnail': r're:^http.*\.jpg',
         },
     }, {
         'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846',
@@ -203,59 +167,99 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
             'upload_date': '19911130',
             'timestamp': 691516800,
             'description': 'md5:1ba2f911a2b0aa398479f595224f2141',
+            'subtitles': {'lyrics': [{'ext': 'lrc'}]},
+            'duration': 268,
+            'alt_title': '伴唱:现代人乐队 合唱:总政歌舞团',
+            'thumbnail': r're:^http.*\.jpg',
         },
+    }, {
+        'url': 'http://music.163.com/#/song?id=32102397',
+        'md5': '3e909614ce09b1ccef4a3eb205441190',
+        'info_dict': {
+            'id': '32102397',
+            'ext': 'mp3',
+            'title': 'Bad Blood',
+            'creator': 'Taylor Swift / Kendrick Lamar',
+            'upload_date': '20150516',
+            'timestamp': 1431792000,
+            'description': 'md5:21535156efb73d6d1c355f95616e285a',
+            'subtitles': {'lyrics': [{'ext': 'lrc'}]},
+            'duration': 199,
+            'thumbnail': r're:^http.*\.jpg',
+        },
+        'skip': 'Blocked outside Mainland China',
+    }, {
+        'note': 'Has translated name.',
+        'url': 'http://music.163.com/#/song?id=22735043',
+        'info_dict': {
+            'id': '22735043',
+            'ext': 'mp3',
+            'title': '소원을 말해봐 (Genie)',
+            'creator': '少女时代',
+            'upload_date': '20100127',
+            'timestamp': 1264608000,
+            'description': 'md5:03d1ffebec3139aa4bafe302369269c5',
+            'subtitles': {'lyrics': [{'ext': 'lrc'}]},
+            'duration': 229,
+            'alt_title': '说出愿望吧(Genie)',
+            'thumbnail': r're:^http.*\.jpg',
+        },
+        'skip': 'Blocked outside Mainland China',
     }]
 
     def _process_lyrics(self, lyrics_info):
-        original = lyrics_info.get('lrc', {}).get('lyric')
-        translated = lyrics_info.get('tlyric', {}).get('lyric')
+        original = traverse_obj(lyrics_info, ('lrc', 'lyric', {str}))
+        translated = traverse_obj(lyrics_info, ('tlyric', 'lyric', {str}))
+
+        if not original or original == '[99:00.00]纯音乐，请欣赏\n':
+            return None
 
         if not translated:
-            return original
+            return {
+                'lyrics': [{'data': original, 'ext': 'lrc'}],
+            }
 
         lyrics_expr = r'(\[[0-9]{2}:[0-9]{2}\.[0-9]{2,}\])([^\n]+)'
         original_ts_texts = re.findall(lyrics_expr, original)
-        translation_ts_dict = dict(
-            (time_stamp, text) for time_stamp, text in re.findall(lyrics_expr, translated)
-        )
-        lyrics = '\n'.join([
-            '%s%s / %s' % (time_stamp, text, translation_ts_dict.get(time_stamp, ''))
-            for time_stamp, text in original_ts_texts
-        ])
-        return lyrics
+        translation_ts_dict = dict(re.findall(lyrics_expr, translated))
+
+        merged = '\n'.join(
+            join_nonempty(f'{timestamp}{text}', translation_ts_dict.get(timestamp, ''), delim=' / ')
+            for timestamp, text in original_ts_texts)
+
+        return {
+            'lyrics_merged': [{'data': merged, 'ext': 'lrc'}],
+            'lyrics': [{'data': original, 'ext': 'lrc'}],
+            'lyrics_translated': [{'data': translated, 'ext': 'lrc'}],
+        }
 
     def _real_extract(self, url):
         song_id = self._match_id(url)
 
-        params = {
-            'id': song_id,
-            'ids': '[%s]' % song_id
-        }
         info = self.query_api(
-            'song/detail?' + compat_urllib_parse_urlencode(params),
-            song_id, 'Downloading song info')['songs'][0]
+            f'song/detail?id={song_id}&ids=%5B{song_id}%5D', song_id, 'Downloading song info')['songs'][0]
 
         formats = self.extract_formats(info)
 
-        lyrics_info = self.query_api(
-            'song/lyric?id=%s&lv=-1&tv=-1' % song_id,
-            song_id, 'Downloading lyrics data')
-        lyrics = self._process_lyrics(lyrics_info)
-
-        alt_title = None
-        if info.get('transNames'):
-            alt_title = '/'.join(info.get('transNames'))
+        lyrics = self._process_lyrics(self.query_api(
+            f'song/lyric?id={song_id}&lv=-1&tv=-1', song_id, 'Downloading lyrics data'))
+        lyric_data = {
+            'description': traverse_obj(lyrics, (('lyrics_merged', 'lyrics'), 0, 'data'), get_all=False),
+            'subtitles': lyrics,
+        } if lyrics else {}
 
         return {
             'id': song_id,
-            'title': info['name'],
-            'alt_title': alt_title,
-            'creator': ' / '.join([artist['name'] for artist in info.get('artists', [])]),
-            'timestamp': self.convert_milliseconds(info.get('album', {}).get('publishTime')),
-            'thumbnail': info.get('album', {}).get('picUrl'),
-            'duration': self.convert_milliseconds(info.get('duration', 0)),
-            'description': lyrics,
             'formats': formats,
+            'alt_title': '/'.join(traverse_obj(info, (('transNames', 'alias'), ...))) or None,
+            'creator': ' / '.join(traverse_obj(info, ('artists', ..., 'name'))) or None,
+            **lyric_data,
+            **traverse_obj(info, {
+                'title': ('name', {str}),
+                'timestamp': ('album', 'publishTime', {self.kilo_or_none}),
+                'thumbnail': ('album', 'picUrl', {url_or_none}),
+                'duration': ('duration', {self.kilo_or_none}),
+            }),
         }
 
 
@@ -263,31 +267,44 @@ class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
     IE_NAME = 'netease:album'
     IE_DESC = '网易云音乐 - 专辑'
     _VALID_URL = r'https?://music\.163\.com/(#/)?album\?id=(?P<id>[0-9]+)'
-    _TEST = {
+    _TESTS = [{
+        'url': 'https://music.163.com/#/album?id=133153666',
+        'info_dict': {
+            'id': '133153666',
+            'title': '桃几的翻唱',
+            'upload_date': '20210913',
+            'description': '桃几2021年翻唱合集',
+            'thumbnail': r're:^http.*\.jpg',
+        },
+        'playlist_mincount': 13,
+    }, {
         'url': 'http://music.163.com/#/album?id=220780',
         'info_dict': {
             'id': '220780',
-            'title': 'B\'day',
+            'title': 'B\'Day',
+            'upload_date': '20060904',
+            'description': 'md5:71a74e1d8f392d88cf1bbe48879ad0b0',
+            'thumbnail': r're:^http.*\.jpg',
         },
         'playlist_count': 23,
-        'skip': 'Blocked outside Mainland China',
-    }
+    }]
 
     def _real_extract(self, url):
         album_id = self._match_id(url)
+        webpage = self._download_webpage(f'https://music.163.com/album?id={album_id}', album_id)
 
-        info = self.query_api(
-            'album/%s?id=%s' % (album_id, album_id),
-            album_id, 'Downloading album data')['album']
-
-        name = info['name']
-        desc = info.get('description')
-        entries = [
-            self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
-                            'NetEaseMusic', song['id'])
-            for song in info['songs']
-        ]
-        return self.playlist_result(entries, album_id, name, desc)
+        songs = self._search_json(
+            r'<textarea[^>]+\bid="song-list-pre-data"[^>]*>', webpage, 'metainfo', album_id,
+            end_pattern=r'</textarea>', contains_pattern=r'\[(?s:.+)\]')
+        metainfo = {
+            'title': self._og_search_property('title', webpage, 'title', fatal=False),
+            'description': self._html_search_regex(
+                (rf'<div[^>]+\bid="album-desc-{suffix}"[^>]*>(.*?)</div>' for suffix in ('more', 'dot')),
+                webpage, 'description', flags=re.S, fatal=False),
+            'thumbnail': self._og_search_property('image', webpage, 'thumbnail', fatal=False),
+            'upload_date': unified_strdate(self._html_search_meta('music:release_date', webpage, 'date', fatal=False)),
+        }
+        return self.playlist_result(self._get_entries(songs), album_id, **metainfo)
 
 
 class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
@@ -299,10 +316,9 @@ class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
         'url': 'http://music.163.com/#/artist?id=10559',
         'info_dict': {
             'id': '10559',
-            'title': '张惠妹 - aMEI;阿密特',
+            'title': '张惠妹 - aMEI;阿妹;阿密特',
         },
         'playlist_count': 50,
-        'skip': 'Blocked outside Mainland China',
     }, {
         'note': 'Singer has translated name.',
         'url': 'http://music.163.com/#/artist?id=124098',
@@ -311,28 +327,28 @@ class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
             'title': '李昇基 - 이승기',
         },
         'playlist_count': 50,
-        'skip': 'Blocked outside Mainland China',
+    }, {
+        'note': 'Singer with both translated and alias',
+        'url': 'https://music.163.com/#/artist?id=159692',
+        'info_dict': {
+            'id': '159692',
+            'title': '初音ミク - 初音未来;Hatsune Miku',
+        },
+        'playlist_count': 50,
     }]
 
     def _real_extract(self, url):
         singer_id = self._match_id(url)
 
         info = self.query_api(
-            'artist/%s?id=%s' % (singer_id, singer_id),
-            singer_id, 'Downloading singer data')
+            f'artist/{singer_id}?id={singer_id}', singer_id, note='Downloading singer data')
 
-        name = info['artist']['name']
-        if info['artist']['trans']:
-            name = '%s - %s' % (name, info['artist']['trans'])
-        if info['artist']['alias']:
-            name = '%s - %s' % (name, ';'.join(info['artist']['alias']))
+        name = join_nonempty(
+            traverse_obj(info, ('artist', 'name', {str})),
+            join_nonempty(*traverse_obj(info, ('artist', ('trans', ('alias', ...)), {str})), delim=';'),
+            delim=' - ')
 
-        entries = [
-            self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
-                            'NetEaseMusic', song['id'])
-            for song in info['hotSongs']
-        ]
-        return self.playlist_result(entries, singer_id, name)
+        return self.playlist_result(self._get_entries(info, 'hotSongs'), singer_id, name)
 
 
 class NetEaseMusicListIE(NetEaseMusicBaseIE):
@@ -344,10 +360,28 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE):
         'info_dict': {
             'id': '79177352',
             'title': 'Billboard 2007 Top 100',
-            'description': 'md5:12fd0819cab2965b9583ace0f8b7b022'
+            'description': 'md5:12fd0819cab2965b9583ace0f8b7b022',
+            'tags': ['欧美'],
+            'uploader': '浑然破灭',
+            'uploader_id': '67549805',
+            'timestamp': int,
+            'upload_date': r're:\d{8}',
         },
-        'playlist_count': 99,
-        'skip': 'Blocked outside Mainland China',
+        'playlist_mincount': 95,
+    }, {
+        'note': 'Toplist/Charts sample',
+        'url': 'https://music.163.com/#/discover/toplist?id=60198',
+        'info_dict': {
+            'id': '60198',
+            'title': 're:美国Billboard榜 [0-9]{4}-[0-9]{2}-[0-9]{2}',
+            'description': '美国Billboard排行榜',
+            'tags': ['流行', '欧美', '榜单'],
+            'uploader': 'Billboard公告牌',
+            'uploader_id': '48171',
+            'timestamp': int,
+            'upload_date': r're:\d{8}',
+        },
+        'playlist_count': 100,
     }, {
         'note': 'Toplist/Charts sample',
         'url': 'http://music.163.com/#/discover/toplist?id=3733003',
@@ -363,64 +397,86 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE):
     def _real_extract(self, url):
         list_id = self._match_id(url)
 
-        info = self.query_api(
-            'playlist/detail?id=%s&lv=-1&tv=-1' % list_id,
-            list_id, 'Downloading playlist data')['result']
+        info = self._download_eapi_json(
+            '/v3/playlist/detail', list_id,
+            {'id': list_id, 't': '-1', 'n': '500', 's': '0'},
+            note="Downloading playlist info")
 
-        name = info['name']
-        desc = info.get('description')
+        metainfo = traverse_obj(info, ('playlist', {
+            'title': ('name', {str}),
+            'description': ('description', {str}),
+            'tags': ('tags', ..., {str}),
+            'uploader': ('creator', 'nickname', {str}),
+            'uploader_id': ('creator', 'userId', {str_or_none}),
+            'timestamp': ('updateTime', {self.kilo_or_none}),
+        }))
+        if traverse_obj(info, ('playlist', 'specialType')) == 10:
+            metainfo['title'] = f'{metainfo.get("title")} {strftime_or_none(metainfo.get("timestamp"), "%Y-%m-%d")}'
 
-        if info.get('specialType') == 10:  # is a chart/toplist
-            datestamp = datetime.fromtimestamp(
-                self.convert_milliseconds(info['updateTime'])).strftime('%Y-%m-%d')
-            name = '%s %s' % (name, datestamp)
-
-        entries = [
-            self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
-                            'NetEaseMusic', song['id'])
-            for song in info['tracks']
-        ]
-        return self.playlist_result(entries, list_id, name, desc)
+        return self.playlist_result(self._get_entries(info, ('playlist', 'tracks')), list_id, **metainfo)
 
 
 class NetEaseMusicMvIE(NetEaseMusicBaseIE):
     IE_NAME = 'netease:mv'
     IE_DESC = '网易云音乐 - MV'
     _VALID_URL = r'https?://music\.163\.com/(#/)?mv\?id=(?P<id>[0-9]+)'
-    _TEST = {
+    _TESTS = [{
+        'url': 'https://music.163.com/#/mv?id=10958064',
+        'info_dict': {
+            'id': '10958064',
+            'ext': 'mp4',
+            'title': '交换余生',
+            'description': 'md5:e845872cff28820642a2b02eda428fea',
+            'creator': '林俊杰',
+            'upload_date': '20200916',
+            'thumbnail': r're:http.*\.jpg',
+            'duration': 364,
+            'view_count': int,
+            'like_count': int,
+            'comment_count': int,
+        },
+    }, {
         'url': 'http://music.163.com/#/mv?id=415350',
         'info_dict': {
             'id': '415350',
             'ext': 'mp4',
             'title': '이럴거면 그러지말지',
             'description': '白雅言自作曲唱甜蜜爱情',
-            'creator': '白雅言',
+            'creator': '白娥娟',
             'upload_date': '20150520',
+            'thumbnail': r're:http.*\.jpg',
+            'duration': 216,
+            'view_count': int,
+            'like_count': int,
+            'comment_count': int,
         },
-        'skip': 'Blocked outside Mainland China',
-    }
+    }]
 
     def _real_extract(self, url):
         mv_id = self._match_id(url)
 
         info = self.query_api(
-            'mv/detail?id=%s&type=mp4' % mv_id,
-            mv_id, 'Downloading mv info')['data']
+            f'mv/detail?id={mv_id}&type=mp4', mv_id, 'Downloading mv info')['data']
 
         formats = [
-            {'url': mv_url, 'ext': 'mp4', 'format_id': '%sp' % brs, 'height': int(brs)}
+            {'url': mv_url, 'ext': 'mp4', 'format_id': f'{brs}p', 'height': int_or_none(brs)}
             for brs, mv_url in info['brs'].items()
         ]
 
         return {
             'id': mv_id,
-            'title': info['name'],
-            'description': info.get('desc') or info.get('briefDesc'),
-            'creator': info['artistName'],
-            'upload_date': info['publishTime'].replace('-', ''),
             'formats': formats,
-            'thumbnail': info.get('cover'),
-            'duration': self.convert_milliseconds(info.get('duration', 0)),
+            **traverse_obj(info, {
+                'title': ('name', {str}),
+                'description': (('desc', 'briefDesc'), {str}, {lambda x: x or None}),
+                'creator': ('artistName', {str}),
+                'upload_date': ('publishTime', {unified_strdate}),
+                'thumbnail': ('cover', {url_or_none}),
+                'duration': ('duration', {self.kilo_or_none}),
+                'view_count': ('playCount', {int_or_none}),
+                'like_count': ('likeCount', {int_or_none}),
+                'comment_count': ('commentCount', {int_or_none}),
+            }, get_all=False),
         }
 
 
@@ -431,75 +487,74 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
     _TESTS = [{
         'url': 'http://music.163.com/#/program?id=10109055',
         'info_dict': {
-            'id': '10109055',
+            'id': '32593346',
             'ext': 'mp3',
             'title': '不丹足球背后的故事',
             'description': '喜马拉雅人的足球梦 ...',
             'creator': '大话西藏',
-            'timestamp': 1434179342,
+            'timestamp': 1434179287,
             'upload_date': '20150613',
+            'thumbnail': r're:http.*\.jpg',
             'duration': 900,
         },
-        'skip': 'Blocked outside Mainland China',
     }, {
         'note': 'This program has accompanying songs.',
         'url': 'http://music.163.com/#/program?id=10141022',
         'info_dict': {
             'id': '10141022',
-            'title': '25岁，你是自在如风的少年<27°C>',
+            'title': '滚滚电台的有声节目',
             'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
+            'creator': '滚滚电台ORZ',
+            'timestamp': 1434450733,
+            'upload_date': '20150616',
+            'thumbnail': r're:http.*\.jpg',
         },
         'playlist_count': 4,
-        'skip': 'Blocked outside Mainland China',
     }, {
         'note': 'This program has accompanying songs.',
         'url': 'http://music.163.com/#/program?id=10141022',
         'info_dict': {
-            'id': '10141022',
+            'id': '32647209',
             'ext': 'mp3',
-            'title': '25岁，你是自在如风的少年<27°C>',
+            'title': '滚滚电台的有声节目',
             'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
-            'timestamp': 1434450841,
+            'creator': '滚滚电台ORZ',
+            'timestamp': 1434450733,
             'upload_date': '20150616',
+            'thumbnail': r're:http.*\.jpg',
+            'duration': 1104,
         },
         'params': {
             'noplaylist': True
         },
-        'skip': 'Blocked outside Mainland China',
     }]
 
     def _real_extract(self, url):
         program_id = self._match_id(url)
 
         info = self.query_api(
-            'dj/program/detail?id=%s' % program_id,
-            program_id, 'Downloading program info')['program']
+            f'dj/program/detail?id={program_id}', program_id, note='Downloading program info')['program']
 
-        name = info['name']
-        description = info['description']
+        metainfo = traverse_obj(info, {
+            'title': ('name', {str}),
+            'description': ('description', {str}),
+            'creator': ('dj', 'brand', {str}),
+            'thumbnail': ('coverUrl', {url_or_none}),
+            'timestamp': ('createTime', {self.kilo_or_none}),
+        })
 
         if not self._yes_playlist(info['songs'] and program_id, info['mainSong']['id']):
             formats = self.extract_formats(info['mainSong'])
 
             return {
-                'id': info['mainSong']['id'],
-                'title': name,
-                'description': description,
-                'creator': info['dj']['brand'],
-                'timestamp': self.convert_milliseconds(info['createTime']),
-                'thumbnail': info['coverUrl'],
-                'duration': self.convert_milliseconds(info.get('duration', 0)),
+                'id': str(info['mainSong']['id']),
                 'formats': formats,
+                'duration': traverse_obj(info, ('mainSong', 'duration', {self.kilo_or_none})),
+                **metainfo,
             }
 
-        song_ids = [info['mainSong']['id']]
-        song_ids.extend([song['id'] for song in info['songs']])
-        entries = [
-            self.url_result('http://music.163.com/#/song?id=%s' % song_id,
-                            'NetEaseMusic', song_id)
-            for song_id in song_ids
-        ]
-        return self.playlist_result(entries, program_id, name, description)
+        songs = traverse_obj(info, (('mainSong', ('songs', ...)),))
+        return self.playlist_result(self._get_entries(songs), program_id, **metainfo)
 
 
 class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
@@ -511,38 +566,32 @@ class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
         'info_dict': {
             'id': '42',
             'title': '声音蔓延',
-            'description': 'md5:766220985cbd16fdd552f64c578a6b15'
+            'description': 'md5:c7381ebd7989f9f367668a5aee7d5f08'
         },
         'playlist_mincount': 40,
-        'skip': 'Blocked outside Mainland China',
     }
     _PAGE_SIZE = 1000
 
     def _real_extract(self, url):
         dj_id = self._match_id(url)
 
-        name = None
-        desc = None
+        metainfo = {}
         entries = []
         for offset in itertools.count(start=0, step=self._PAGE_SIZE):
             info = self.query_api(
-                'dj/program/byradio?asc=false&limit=%d&radioId=%s&offset=%d'
-                % (self._PAGE_SIZE, dj_id, offset),
-                dj_id, 'Downloading dj programs - %d' % offset)
+                f'dj/program/byradio?asc=false&limit={self._PAGE_SIZE}&radioId={dj_id}&offset={offset}',
+                dj_id, note=f'Downloading dj programs - {offset}')
 
-            entries.extend([
-                self.url_result(
-                    'http://music.163.com/#/program?id=%s' % program['id'],
-                    'NetEaseMusicProgram', program['id'])
-                for program in info['programs']
-            ])
-
-            if name is None:
-                radio = info['programs'][0]['radio']
-                name = radio['name']
-                desc = radio['desc']
+            entries.extend(self.url_result(
+                f'http://music.163.com/#/program?id={program["id"]}', NetEaseMusicProgramIE,
+                program['id'], program.get('name')) for program in info['programs'])
+            if not metainfo:
+                metainfo = traverse_obj(info, ('programs', 0, 'radio', {
+                    'title': ('name', {str}),
+                    'description': ('desc', {str}),
+                }))
 
             if not info['more']:
                 break
 
-        return self.playlist_result(entries, dj_id, name, desc)
+        return self.playlist_result(entries, dj_id, **metainfo)

From a9efb4b8d74f3583450ffda0ee57259a47d39c70 Mon Sep 17 00:00:00 2001
From: xofe <22776566+xofe@users.noreply.github.com>
Date: Fri, 6 Oct 2023 22:35:11 +0000
Subject: [PATCH 203/218] [ie/abc.net.au:iview] Improve `episode` extraction
 (#8201)

Authored by: xofe
---
 yt_dlp/extractor/abc.py | 90 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 88 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/extractor/abc.py b/yt_dlp/extractor/abc.py
index d2cf5f7c51..9d527246a1 100644
--- a/yt_dlp/extractor/abc.py
+++ b/yt_dlp/extractor/abc.py
@@ -181,18 +181,102 @@ class ABCIViewIE(InfoExtractor):
     _GEO_COUNTRIES = ['AU']
 
     _TESTS = [{
+        'url': 'https://iview.abc.net.au/show/utopia/series/1/video/CO1211V001S00',
+        'md5': '52a942bfd7a0b79a6bfe9b4ce6c9d0ed',
+        'info_dict': {
+            'id': 'CO1211V001S00',
+            'ext': 'mp4',
+            'title': 'Series 1 Ep 1 Wood For The Trees',
+            'series': 'Utopia',
+            'description': 'md5:0cfb2c183c1b952d1548fd65c8a95c00',
+            'upload_date': '20230726',
+            'uploader_id': 'abc1',
+            'series_id': 'CO1211V',
+            'episode_id': 'CO1211V001S00',
+            'season_number': 1,
+            'season': 'Season 1',
+            'episode_number': 1,
+            'episode': 'Wood For The Trees',
+            'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/co/CO1211V001S00_5ad8353f4df09_1280.jpg',
+            'timestamp': 1690403700,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'note': 'No episode name',
         'url': 'https://iview.abc.net.au/show/gruen/series/11/video/LE1927H001S00',
         'md5': '67715ce3c78426b11ba167d875ac6abf',
         'info_dict': {
             'id': 'LE1927H001S00',
             'ext': 'mp4',
-            'title': "Series 11 Ep 1",
-            'series': "Gruen",
+            'title': 'Series 11 Ep 1',
+            'series': 'Gruen',
             'description': 'md5:52cc744ad35045baf6aded2ce7287f67',
             'upload_date': '20190925',
             'uploader_id': 'abc1',
+            'series_id': 'LE1927H',
+            'episode_id': 'LE1927H001S00',
+            'season_number': 11,
+            'season': 'Season 11',
+            'episode_number': 1,
+            'episode': 'Episode 1',
+            'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/le/LE1927H001S00_5d954fbd79e25_1280.jpg',
             'timestamp': 1569445289,
         },
+        'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'note': 'No episode number',
+        'url': 'https://iview.abc.net.au/show/four-corners/series/2022/video/NC2203H039S00',
+        'md5': '77cb7d8434440e3b28fbebe331c2456a',
+        'info_dict': {
+            'id': 'NC2203H039S00',
+            'ext': 'mp4',
+            'title': 'Series 2022 Locking Up Kids',
+            'series': 'Four Corners',
+            'description': 'md5:54829ca108846d1a70e1fcce2853e720',
+            'upload_date': '20221114',
+            'uploader_id': 'abc1',
+            'series_id': 'NC2203H',
+            'episode_id': 'NC2203H039S00',
+            'season_number': 2022,
+            'season': 'Season 2022',
+            'episode_number': None,
+            'episode': 'Locking Up Kids',
+            'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/nc/NC2203H039S00_636d8a0944a22_1920.jpg',
+            'timestamp': 1668460497,
+
+        },
+        'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'note': 'No episode name or number',
+        'url': 'https://iview.abc.net.au/show/landline/series/2021/video/RF2004Q043S00',
+        'md5': '2e17dec06b13cc81dc119d2565289396',
+        'info_dict': {
+            'id': 'RF2004Q043S00',
+            'ext': 'mp4',
+            'title': 'Series 2021',
+            'series': 'Landline',
+            'description': 'md5:c9f30d9c0c914a7fd23842f6240be014',
+            'upload_date': '20211205',
+            'uploader_id': 'abc1',
+            'series_id': 'RF2004Q',
+            'episode_id': 'RF2004Q043S00',
+            'season_number': 2021,
+            'season': 'Season 2021',
+            'episode_number': None,
+            'episode': None,
+            'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/rf/RF2004Q043S00_61a950639dbc0_1920.jpg',
+            'timestamp': 1638710705,
+
+        },
+        'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
         'params': {
             'skip_download': True,
         },
@@ -254,6 +338,8 @@ def tokenize_url(url, token):
             'episode_number': int_or_none(self._search_regex(
                 r'\bEp\s+(\d+)\b', title, 'episode number', default=None)),
             'episode_id': house_number,
+            'episode': self._search_regex(
+                r'^(?:Series\s+\d+)?\s*(?:Ep\s+\d+)?\s*(.*)$', title, 'episode', default='') or None,
             'uploader_id': video_params.get('channel'),
             'formats': formats,
             'subtitles': subtitles,

From 48cceec1ddb8649b5e771df8df79eb9c39c82b90 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rapha=C3=ABl=20Droz?= <raphael.droz@gmail.com>
Date: Fri, 6 Oct 2023 19:38:26 -0300
Subject: [PATCH 204/218] [ie/lbry] Add playlist support (#8213)

Closes #5982, Closes #8204
Authored by: drzraf, bashonly, Grub4K
---
 yt_dlp/extractor/_extractors.py |   1 +
 yt_dlp/extractor/lbry.py        | 184 ++++++++++++++++++++------------
 2 files changed, 116 insertions(+), 69 deletions(-)

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 908abb8ace..ef6123e8a7 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -951,6 +951,7 @@
 from .lbry import (
     LBRYIE,
     LBRYChannelIE,
+    LBRYPlaylistIE,
 )
 from .lci import LCIIE
 from .lcp import (
diff --git a/yt_dlp/extractor/lbry.py b/yt_dlp/extractor/lbry.py
index 9a9f9256fe..ccce300b5b 100644
--- a/yt_dlp/extractor/lbry.py
+++ b/yt_dlp/extractor/lbry.py
@@ -22,10 +22,11 @@
 
 
 class LBRYBaseIE(InfoExtractor):
-    _BASE_URL_REGEX = r'(?:https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/|lbry://)'
+    _BASE_URL_REGEX = r'(?x)(?:https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/|lbry://)'
     _CLAIM_ID_REGEX = r'[0-9a-f]{1,40}'
-    _OPT_CLAIM_ID = '[^:/?#&]+(?:[:#]%s)?' % _CLAIM_ID_REGEX
+    _OPT_CLAIM_ID = '[^$@:/?#&]+(?:[:#]%s)?' % _CLAIM_ID_REGEX
     _SUPPORTED_STREAM_TYPES = ['video', 'audio']
+    _PAGE_SIZE = 50
 
     def _call_api_proxy(self, method, display_id, params, resource):
         headers = {'Content-Type': 'application/json-rpc'}
@@ -77,10 +78,70 @@ def _parse_stream(self, stream, url):
 
         return info
 
+    def _fetch_page(self, display_id, url, params, page):
+        page += 1
+        page_params = {
+            'no_totals': True,
+            'page': page,
+            'page_size': self._PAGE_SIZE,
+            **params,
+        }
+        result = self._call_api_proxy(
+            'claim_search', display_id, page_params, f'page {page}')
+        for item in traverse_obj(result, ('items', lambda _, v: v['name'] and v['claim_id'])):
+            yield {
+                **self._parse_stream(item, url),
+                '_type': 'url',
+                'id': item['claim_id'],
+                'url': self._permanent_url(url, item['name'], item['claim_id']),
+            }
+
+    def _playlist_entries(self, url, display_id, claim_param, metadata):
+        qs = parse_qs(url)
+        content = qs.get('content', [None])[0]
+        params = {
+            'fee_amount': qs.get('fee_amount', ['>=0'])[0],
+            'order_by': {
+                'new': ['release_time'],
+                'top': ['effective_amount'],
+                'trending': ['trending_group', 'trending_mixed'],
+            }[qs.get('order', ['new'])[0]],
+            'claim_type': 'stream',
+            'stream_types': [content] if content in ['audio', 'video'] else self._SUPPORTED_STREAM_TYPES,
+            **claim_param,
+        }
+        duration = qs.get('duration', [None])[0]
+        if duration:
+            params['duration'] = {
+                'long': '>=1200',
+                'short': '<=240',
+            }[duration]
+        language = qs.get('language', ['all'])[0]
+        if language != 'all':
+            languages = [language]
+            if language == 'en':
+                languages.append('none')
+            params['any_languages'] = languages
+
+        entries = OnDemandPagedList(
+            functools.partial(self._fetch_page, display_id, url, params),
+            self._PAGE_SIZE)
+
+        return self.playlist_result(
+            entries, display_id, **traverse_obj(metadata, ('value', {
+                'title': 'title',
+                'description': 'description',
+            })))
+
 
 class LBRYIE(LBRYBaseIE):
     IE_NAME = 'lbry'
-    _VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'(?P<id>\$/[^/]+/[^/]+/{1}|@{0}/{0}|(?!@){0})'.format(LBRYBaseIE._OPT_CLAIM_ID, LBRYBaseIE._CLAIM_ID_REGEX)
+    _VALID_URL = LBRYBaseIE._BASE_URL_REGEX + rf'''
+        (?:\$/(?:download|embed)/)?
+        (?P<id>
+            [^$@:/?#]+/{LBRYBaseIE._CLAIM_ID_REGEX}
+            |(?:@{LBRYBaseIE._OPT_CLAIM_ID}/)?{LBRYBaseIE._OPT_CLAIM_ID}
+        )'''
     _TESTS = [{
         # Video
         'url': 'https://lbry.tv/@Mantega:1/First-day-LBRY:1',
@@ -149,7 +210,7 @@ class LBRYIE(LBRYBaseIE):
             'channel': 'Gardening In Canada',
             'channel_id': 'b8be0e93b423dad221abe29545fbe8ec36e806bc',
             'channel_url': 'https://odysee.com/@gardeningincanada:b8be0e93b423dad221abe29545fbe8ec36e806bc',
-            'formats': 'mincount:3',
+            'formats': 'mincount:3',  # FIXME
             'thumbnail': 'https://thumbnails.lbry.com/AgHSc_HzrrE',
             'license': 'Copyrighted (contact publisher)',
         }
@@ -184,12 +245,12 @@ class LBRYIE(LBRYBaseIE):
             'id': '41fbfe805eb73c8d3012c0c49faa0f563274f634',
             'ext': 'mp4',
             'title': 'Biotechnological Invasion of Skin (April 2023)',
-            'description': 'md5:709a2f4c07bd8891cda3a7cc2d6fcf5c',
+            'description': 'md5:fe28689db2cb7ba3436d819ac3ffc378',
             'channel': 'Wicked Truths',
             'channel_id': '23d2bbf856b0ceed5b1d7c5960bcc72da5a20cb0',
             'channel_url': 'https://odysee.com/@wickedtruths:23d2bbf856b0ceed5b1d7c5960bcc72da5a20cb0',
-            'timestamp': 1685790036,
-            'upload_date': '20230603',
+            'timestamp': 1695114347,
+            'upload_date': '20230919',
             'release_timestamp': 1685617473,
             'release_date': '20230601',
             'duration': 1063,
@@ -229,10 +290,10 @@ class LBRYIE(LBRYBaseIE):
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
-        if display_id.startswith('$/'):
-            display_id = display_id.split('/', 2)[-1].replace('/', ':')
-        else:
+        if display_id.startswith('@'):
             display_id = display_id.replace(':', '#')
+        else:
+            display_id = display_id.replace('/', ':')
         display_id = urllib.parse.unquote(display_id)
         uri = 'lbry://' + display_id
         result = self._resolve_url(uri, display_id, 'stream')
@@ -299,7 +360,7 @@ def _real_extract(self, url):
 
 class LBRYChannelIE(LBRYBaseIE):
     IE_NAME = 'lbry:channel'
-    _VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'(?P<id>@%s)/?(?:[?&]|$)' % LBRYBaseIE._OPT_CLAIM_ID
+    _VALID_URL = LBRYBaseIE._BASE_URL_REGEX + rf'(?P<id>@{LBRYBaseIE._OPT_CLAIM_ID})/?(?:[?&]|$)'
     _TESTS = [{
         'url': 'https://lbry.tv/@LBRYFoundation:0',
         'info_dict': {
@@ -315,65 +376,50 @@ class LBRYChannelIE(LBRYBaseIE):
         'url': 'lbry://@lbry#3f',
         'only_matching': True,
     }]
-    _PAGE_SIZE = 50
-
-    def _fetch_page(self, claim_id, url, params, page):
-        page += 1
-        page_params = {
-            'channel_ids': [claim_id],
-            'claim_type': 'stream',
-            'no_totals': True,
-            'page': page,
-            'page_size': self._PAGE_SIZE,
-        }
-        page_params.update(params)
-        result = self._call_api_proxy(
-            'claim_search', claim_id, page_params, 'page %d' % page)
-        for item in (result.get('items') or []):
-            stream_claim_name = item.get('name')
-            stream_claim_id = item.get('claim_id')
-            if not (stream_claim_name and stream_claim_id):
-                continue
-
-            yield {
-                **self._parse_stream(item, url),
-                '_type': 'url',
-                'id': stream_claim_id,
-                'url': self._permanent_url(url, stream_claim_name, stream_claim_id),
-            }
 
     def _real_extract(self, url):
         display_id = self._match_id(url).replace(':', '#')
-        result = self._resolve_url(
-            'lbry://' + display_id, display_id, 'channel')
+        result = self._resolve_url(f'lbry://{display_id}', display_id, 'channel')
         claim_id = result['claim_id']
-        qs = parse_qs(url)
-        content = qs.get('content', [None])[0]
-        params = {
-            'fee_amount': qs.get('fee_amount', ['>=0'])[0],
-            'order_by': {
-                'new': ['release_time'],
-                'top': ['effective_amount'],
-                'trending': ['trending_group', 'trending_mixed'],
-            }[qs.get('order', ['new'])[0]],
-            'stream_types': [content] if content in ['audio', 'video'] else self._SUPPORTED_STREAM_TYPES,
-        }
-        duration = qs.get('duration', [None])[0]
-        if duration:
-            params['duration'] = {
-                'long': '>=1200',
-                'short': '<=240',
-            }[duration]
-        language = qs.get('language', ['all'])[0]
-        if language != 'all':
-            languages = [language]
-            if language == 'en':
-                languages.append('none')
-            params['any_languages'] = languages
-        entries = OnDemandPagedList(
-            functools.partial(self._fetch_page, claim_id, url, params),
-            self._PAGE_SIZE)
-        result_value = result.get('value') or {}
-        return self.playlist_result(
-            entries, claim_id, result_value.get('title'),
-            result_value.get('description'))
+
+        return self._playlist_entries(url, claim_id, {'channel_ids': [claim_id]}, result)
+
+
+class LBRYPlaylistIE(LBRYBaseIE):
+    IE_NAME = 'lbry:playlist'
+    _VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'\$/(?:play)?list/(?P<id>[0-9a-f-]+)'
+    _TESTS = [{
+        'url': 'https://odysee.com/$/playlist/ffef782f27486f0ac138bde8777f72ebdd0548c2',
+        'info_dict': {
+            'id': 'ffef782f27486f0ac138bde8777f72ebdd0548c2',
+            'title': 'Théâtre Classique',
+            'description': 'Théâtre Classique',
+        },
+        'playlist_mincount': 4,
+    }, {
+        'url': 'https://odysee.com/$/list/9c6658b3dd21e4f2a0602d523a13150e2b48b770',
+        'info_dict': {
+            'id': '9c6658b3dd21e4f2a0602d523a13150e2b48b770',
+            'title': 'Social Media Exposed',
+            'description': 'md5:98af97317aacd5b85d595775ea37d80e',
+        },
+        'playlist_mincount': 34,
+    }, {
+        'url': 'https://odysee.com/$/playlist/938fb11d-215f-4d1c-ad64-723954df2184',
+        'info_dict': {
+            'id': '938fb11d-215f-4d1c-ad64-723954df2184',
+        },
+        'playlist_mincount': 1000,
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        result = traverse_obj(self._call_api_proxy('claim_search', display_id, {
+            'claim_ids': [display_id],
+            'no_totals': True,
+            'page': 1,
+            'page_size': self._PAGE_SIZE,
+        }, 'playlist'), ('items', 0))
+        claim_param = {'claim_ids': traverse_obj(result, ('value', 'claims', ..., {str}))}
+
+        return self._playlist_entries(url, display_id, claim_param, result)

From fbcc299bd8a19cf8b3c8805d6c268a9110230973 Mon Sep 17 00:00:00 2001
From: Umar Getagazov <umar@handlerug.me>
Date: Sat, 7 Oct 2023 01:45:46 +0300
Subject: [PATCH 205/218] [ie/substack] Fix embed extraction (#8218)

Authored by: handlerug
---
 yt_dlp/extractor/substack.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/substack.py b/yt_dlp/extractor/substack.py
index 3782ceed1c..5835a5a8d3 100644
--- a/yt_dlp/extractor/substack.py
+++ b/yt_dlp/extractor/substack.py
@@ -50,7 +50,7 @@ def _extract_embed_urls(cls, url, webpage):
         if not re.search(r'<script[^>]+src=["\']https://substackcdn.com/[^"\']+\.js', webpage):
             return
 
-        mobj = re.search(r'{[^}]*["\']subdomain["\']\s*:\s*["\'](?P<subdomain>[^"]+)', webpage)
+        mobj = re.search(r'{[^}]*\\?["\']subdomain\\?["\']\s*:\s*\\?["\'](?P<subdomain>[^\\"\']+)', webpage)
         if mobj:
             parsed = urllib.parse.urlparse(url)
             yield parsed._replace(netloc=f'{mobj.group("subdomain")}.substack.com').geturl()

From 2f2dda3a7e85148773da3cdbc03ac9949ec1bc45 Mon Sep 17 00:00:00 2001
From: Umar Getagazov <umar@handlerug.me>
Date: Sat, 7 Oct 2023 01:48:54 +0300
Subject: [PATCH 206/218] [ie/substack] Fix download cookies bug (#8219)

Authored by: handlerug
---
 yt_dlp/extractor/substack.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/yt_dlp/extractor/substack.py b/yt_dlp/extractor/substack.py
index 5835a5a8d3..6ee3f75e1a 100644
--- a/yt_dlp/extractor/substack.py
+++ b/yt_dlp/extractor/substack.py
@@ -56,10 +56,10 @@ def _extract_embed_urls(cls, url, webpage):
             yield parsed._replace(netloc=f'{mobj.group("subdomain")}.substack.com').geturl()
             raise cls.StopExtraction()
 
-    def _extract_video_formats(self, video_id, username):
+    def _extract_video_formats(self, video_id, url):
         formats, subtitles = [], {}
         for video_format in ('hls', 'mp4'):
-            video_url = f'https://{username}.substack.com/api/v1/video/upload/{video_id}/src?type={video_format}'
+            video_url = urllib.parse.urljoin(url, f'/api/v1/video/upload/{video_id}/src?type={video_format}')
 
             if video_format == 'hls':
                 fmts, subs = self._extract_m3u8_formats_and_subtitles(video_url, video_id, 'mp4', fatal=False)
@@ -81,12 +81,17 @@ def _real_extract(self, url):
             r'window\._preloads\s*=\s*JSON\.parse\(', webpage, 'json string',
             display_id, transform_source=js_to_json, contains_pattern=r'"{(?s:.+)}"'), display_id)
 
+        canonical_url = url
+        domain = traverse_obj(webpage_info, ('domainInfo', 'customDomain', {str}))
+        if domain:
+            canonical_url = urllib.parse.urlparse(url)._replace(netloc=domain).geturl()
+
         post_type = webpage_info['post']['type']
         formats, subtitles = [], {}
         if post_type == 'podcast':
             formats, subtitles = [{'url': webpage_info['post']['podcast_url']}], {}
         elif post_type == 'video':
-            formats, subtitles = self._extract_video_formats(webpage_info['post']['videoUpload']['id'], username)
+            formats, subtitles = self._extract_video_formats(webpage_info['post']['videoUpload']['id'], canonical_url)
         else:
             self.raise_no_formats(f'Page type "{post_type}" is not supported')
 
@@ -99,4 +104,5 @@ def _real_extract(self, url):
             'thumbnail': traverse_obj(webpage_info, ('post', 'cover_image')),
             'uploader': traverse_obj(webpage_info, ('pub', 'name')),
             'uploader_id': str_or_none(traverse_obj(webpage_info, ('post', 'publication_id'))),
+            'webpage_url': canonical_url,
         }

From 2ad3873f0dfa9285c91d2160e36c039e69d597c7 Mon Sep 17 00:00:00 2001
From: garret <garret1317@yandex.com>
Date: Fri, 6 Oct 2023 23:53:11 +0100
Subject: [PATCH 207/218] [ie/radiko] Improve extraction (#8221)

Authored by: garret1317
---
 yt_dlp/extractor/radiko.py | 67 ++++++++++++++++++++++----------------
 1 file changed, 39 insertions(+), 28 deletions(-)

diff --git a/yt_dlp/extractor/radiko.py b/yt_dlp/extractor/radiko.py
index cef68eba08..8c8fb1a8f9 100644
--- a/yt_dlp/extractor/radiko.py
+++ b/yt_dlp/extractor/radiko.py
@@ -1,4 +1,5 @@
 import base64
+import random
 import urllib.parse
 
 from .common import InfoExtractor
@@ -13,6 +14,7 @@
 
 
 class RadikoBaseIE(InfoExtractor):
+    _GEO_BYPASS = False
     _FULL_KEY = None
     _HOSTS_FOR_TIME_FREE_FFMPEG_UNSUPPORTED = (
         'https://c-rpaa.smartstream.ne.jp',
@@ -32,7 +34,7 @@ class RadikoBaseIE(InfoExtractor):
         'https://c-radiko.smartstream.ne.jp',
     )
 
-    def _auth_client(self):
+    def _negotiate_token(self):
         _, auth1_handle = self._download_webpage_handle(
             'https://radiko.jp/v2/api/auth1', None, 'Downloading authentication page',
             headers={
@@ -58,10 +60,23 @@ def _auth_client(self):
                 'x-radiko-partialkey': partial_key,
             }).split(',')[0]
 
+        if area_id == 'OUT':
+            self.raise_geo_restricted(countries=['JP'])
+
         auth_data = (auth_token, area_id)
         self.cache.store('radiko', 'auth_data', auth_data)
         return auth_data
 
+    def _auth_client(self):
+        cachedata = self.cache.load('radiko', 'auth_data')
+        if cachedata is not None:
+            response = self._download_webpage(
+                'https://radiko.jp/v2/api/auth_check', None, 'Checking cached token', expected_status=401,
+                headers={'X-Radiko-AuthToken': cachedata[0], 'X-Radiko-AreaId': cachedata[1]})
+            if response == 'OK':
+                return cachedata
+        return self._negotiate_token()
+
     def _extract_full_key(self):
         if self._FULL_KEY:
             return self._FULL_KEY
@@ -75,7 +90,7 @@ def _extract_full_key(self):
 
         if full_key:
             full_key = full_key.encode()
-        else:  # use full key ever known
+        else:  # use only full key ever known
             full_key = b'bcd151073c03b352e1ef2fd66c32209da9ca0afa'
 
         self._FULL_KEY = full_key
@@ -103,24 +118,24 @@ def _extract_formats(self, video_id, station, is_onair, ft, cursor, auth_token,
         m3u8_playlist_data = self._download_xml(
             f'https://radiko.jp/v3/station/stream/pc_html5/{station}.xml', video_id,
             note='Downloading stream information')
-        m3u8_urls = m3u8_playlist_data.findall('.//url')
 
         formats = []
         found = set()
-        for url_tag in m3u8_urls:
-            pcu = url_tag.find('playlist_create_url').text
-            url_attrib = url_tag.attrib
+
+        timefree_int = 0 if is_onair else 1
+
+        for element in m3u8_playlist_data.findall(f'.//url[@timefree="{timefree_int}"]/playlist_create_url'):
+            pcu = element.text
+            if pcu in found:
+                continue
+            found.add(pcu)
             playlist_url = update_url_query(pcu, {
                 'station_id': station,
                 **query,
                 'l': '15',
-                'lsid': '88ecea37e968c1f17d5413312d9f8003',
+                'lsid': ''.join(random.choices('0123456789abcdef', k=32)),
                 'type': 'b',
             })
-            if playlist_url in found:
-                continue
-            else:
-                found.add(playlist_url)
 
             time_to_skip = None if is_onair else cursor - ft
 
@@ -138,7 +153,7 @@ def _extract_formats(self, video_id, station, is_onair, ft, cursor, auth_token,
                         not is_onair and pcu.startswith(self._HOSTS_FOR_TIME_FREE_FFMPEG_UNSUPPORTED)):
                     sf['preference'] = -100
                     sf['format_note'] = 'not preferred'
-                if not is_onair and url_attrib['timefree'] == '1' and time_to_skip:
+                if not is_onair and timefree_int == 1 and time_to_skip:
                     sf['downloader_options'] = {'ffmpeg_args': ['-ss', time_to_skip]}
             formats.extend(subformats)
 
@@ -166,21 +181,7 @@ def _real_extract(self, url):
         vid_int = unified_timestamp(video_id, False)
         prog, station_program, ft, radio_begin, radio_end = self._find_program(video_id, station, vid_int)
 
-        auth_cache = self.cache.load('radiko', 'auth_data')
-        for attempt in range(2):
-            auth_token, area_id = (not attempt and auth_cache) or self._auth_client()
-            formats = self._extract_formats(
-                video_id=video_id, station=station, is_onair=False,
-                ft=ft, cursor=vid_int, auth_token=auth_token, area_id=area_id,
-                query={
-                    'start_at': radio_begin,
-                    'ft': radio_begin,
-                    'end_at': radio_end,
-                    'to': radio_end,
-                    'seek': video_id,
-                })
-            if formats:
-                break
+        auth_token, area_id = self._auth_client()
 
         return {
             'id': video_id,
@@ -189,8 +190,18 @@ def _real_extract(self, url):
             'uploader': try_call(lambda: station_program.find('.//name').text),
             'uploader_id': station,
             'timestamp': vid_int,
-            'formats': formats,
             'is_live': True,
+            'formats': self._extract_formats(
+                video_id=video_id, station=station, is_onair=False,
+                ft=ft, cursor=vid_int, auth_token=auth_token, area_id=area_id,
+                query={
+                    'start_at': radio_begin,
+                    'ft': radio_begin,
+                    'end_at': radio_end,
+                    'to': radio_end,
+                    'seek': video_id
+                }
+            ),
         }
 
 

From 35d9cbaf9638ccc9daf8a863063b2e7c135bc664 Mon Sep 17 00:00:00 2001
From: AS6939 <46506352+AS6939@users.noreply.github.com>
Date: Sat, 7 Oct 2023 06:56:12 +0800
Subject: [PATCH 208/218] [ie/iq.com] Fix extraction and subtitles (#8260)

Closes #7734, Closes #8123
Authored by: AS6939
---
 yt_dlp/extractor/iqiyi.py | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/yt_dlp/extractor/iqiyi.py b/yt_dlp/extractor/iqiyi.py
index fa602ba887..3368ab1d93 100644
--- a/yt_dlp/extractor/iqiyi.py
+++ b/yt_dlp/extractor/iqiyi.py
@@ -499,9 +499,10 @@ class IqIE(InfoExtractor):
                     'tm': tm,
                     'qdy': 'a',
                     'qds': 0,
-                    'k_ft1': 141287244169348,
-                    'k_ft4': 34359746564,
-                    'k_ft5': 1,
+                    'k_ft1': '143486267424900',
+                    'k_ft4': '1572868',
+                    'k_ft7': '4',
+                    'k_ft5': '1',
                     'bop': JSON.stringify({
                         'version': '10.0',
                         'dfp': dfp
@@ -529,14 +530,22 @@ def _extract_vms_player_js(self, webpage, video_id):
         webpack_js_url = self._proto_relative_url(self._search_regex(
             r'<script src="((?:https?:)?//stc\.iqiyipic\.com/_next/static/chunks/webpack-\w+\.js)"', webpage, 'webpack URL'))
         webpack_js = self._download_webpage(webpack_js_url, video_id, note='Downloading webpack JS', errnote='Unable to download webpack JS')
+
         webpack_map = self._search_json(
             r'["\']\s*\+\s*', webpack_js, 'JS locations', video_id,
             contains_pattern=r'{\s*(?:\d+\s*:\s*["\'][\da-f]+["\']\s*,?\s*)+}',
             end_pattern=r'\[\w+\]\+["\']\.js', transform_source=js_to_json)
 
+        replacement_map = self._search_json(
+            r'["\']\s*\+\(\s*', webpack_js, 'replacement map', video_id,
+            contains_pattern=r'{\s*(?:\d+\s*:\s*["\'][\w.-]+["\']\s*,?\s*)+}',
+            end_pattern=r'\[\w+\]\|\|\w+\)\+["\']\.', transform_source=js_to_json,
+            fatal=False) or {}
+
         for module_index in reversed(webpack_map):
+            real_module = replacement_map.get(module_index) or module_index
             module_js = self._download_webpage(
-                f'https://stc.iqiyipic.com/_next/static/chunks/{module_index}.{webpack_map[module_index]}.js',
+                f'https://stc.iqiyipic.com/_next/static/chunks/{real_module}.{webpack_map[module_index]}.js',
                 video_id, note=f'Downloading #{module_index} module JS', errnote='Unable to download module JS', fatal=False) or ''
             if 'vms request' in module_js:
                 self.cache.store('iq', 'player_js', module_js)

From 47c598783c98c179e04dd12c2a3fee0f3dc53087 Mon Sep 17 00:00:00 2001
From: Esme <madewokherd@gmail.com>
Date: Fri, 6 Oct 2023 17:58:28 -0500
Subject: [PATCH 209/218] [ie/erocast] Add extractor (#8264)

Closes #4001
Authored by: madewokherd
---
 yt_dlp/extractor/_extractors.py |  1 +
 yt_dlp/extractor/erocast.py     | 63 +++++++++++++++++++++++++++++++++
 2 files changed, 64 insertions(+)
 create mode 100644 yt_dlp/extractor/erocast.py

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index ef6123e8a7..b10ef2f332 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -574,6 +574,7 @@
 from .eplus import EplusIbIE
 from .epoch import EpochIE
 from .eporner import EpornerIE
+from .erocast import ErocastIE
 from .eroprofile import (
     EroProfileIE,
     EroProfileAlbumIE,
diff --git a/yt_dlp/extractor/erocast.py b/yt_dlp/extractor/erocast.py
new file mode 100644
index 0000000000..92a57536c4
--- /dev/null
+++ b/yt_dlp/extractor/erocast.py
@@ -0,0 +1,63 @@
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    parse_iso8601,
+    str_or_none,
+    traverse_obj,
+    url_or_none,
+)
+
+
+class ErocastIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?erocast\.me/track/(?P<id>[0-9]+)'
+    _TESTS = [{
+        'url': 'https://erocast.me/track/9787/f',
+        'md5': 'af63b91f5f231096aba54dd682abea3b',
+        'info_dict': {
+            'id': '9787',
+            'title': '[F4M] Your roommate, who is definitely not possessed by an alien, suddenly wants to fuck you',
+            'url': 'https://erocast.s3.us-east-2.wasabisys.com/1220419/track.m3u8',
+            'ext': 'm4a',
+            'age_limit': 18,
+            'release_timestamp': 1696178652,
+            'release_date': '20231001',
+            'modified_timestamp': int,
+            'modified_date': str,
+            'description': 'ExtraTerrestrial Tuesday!',
+            'uploader': 'clarissaisshy',
+            'uploader_id': '8113',
+            'uploader_url': 'https://erocast.me/clarissaisshy',
+            'thumbnail': 'https://erocast.s3.us-east-2.wasabisys.com/1220418/conversions/1696179247-lg.jpg',
+            'duration': 2307,
+            'view_count': int,
+            'comment_count': int,
+            'webpage_url': 'https://erocast.me/track/9787/f4m-your-roommate-who-is-definitely-not-possessed-by-an-alien-suddenly-wants-to-fuck-you',
+        }
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        data = self._search_json(
+            rf'<script>\s*var song_data_{video_id}\s*=', webpage, 'data', video_id, end_pattern=r'</script>')
+
+        return {
+            'id': video_id,
+            'formats': self._extract_m3u8_formats(
+                data.get('file_url') or data['stream_url'], video_id, 'm4a', m3u8_id='hls'),
+            'age_limit': 18,
+            **traverse_obj(data, {
+                'title': ('title', {str}),
+                'description': ('description', {str}),
+                'release_timestamp': ('created_at', {parse_iso8601}),
+                'modified_timestamp': ('updated_at', {parse_iso8601}),
+                'uploader': ('user', 'name', {str}),
+                'uploader_id': ('user', 'id', {str_or_none}),
+                'uploader_url': ('user', 'permalink_url', {url_or_none}),
+                'thumbnail': ('artwork_url', {url_or_none}),
+                'duration': ('duration', {int_or_none}),
+                'view_count': ('plays', {int_or_none}),
+                'comment_count': ('comment_count', {int_or_none}),
+                'webpage_url': ('permalink_url', {url_or_none}),
+            }),
+        }

From 0e722f2f3ca42e634fd7b06ee70b16bf833ce132 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rapha=C3=ABl=20Droz?= <raphael.droz@gmail.com>
Date: Fri, 6 Oct 2023 19:59:42 -0300
Subject: [PATCH 210/218] [ie/lbry] Extract `uploader_id` (#8244)

Closes #123
Authored by: drzraf
---
 yt_dlp/extractor/lbry.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/yt_dlp/extractor/lbry.py b/yt_dlp/extractor/lbry.py
index ccce300b5b..cc37c41e8c 100644
--- a/yt_dlp/extractor/lbry.py
+++ b/yt_dlp/extractor/lbry.py
@@ -70,11 +70,11 @@ def _parse_stream(self, stream, url):
             'duration': ('value', stream_type, 'duration', {int_or_none}),
             'channel': ('signing_channel', 'value', 'title', {str}),
             'channel_id': ('signing_channel', 'claim_id', {str}),
+            'uploader_id': ('signing_channel', 'name', {str}),
         })
 
-        channel_name = traverse_obj(stream, ('signing_channel', 'name', {str}))
-        if channel_name and info.get('channel_id'):
-            info['channel_url'] = self._permanent_url(url, channel_name, info['channel_id'])
+        if info.get('uploader_id') and info.get('channel_id'):
+            info['channel_url'] = self._permanent_url(url, info['uploader_id'], info['channel_id'])
 
         return info
 
@@ -159,6 +159,7 @@ class LBRYIE(LBRYBaseIE):
             'height': 720,
             'thumbnail': 'https://spee.ch/7/67f2d809c263288c.png',
             'license': 'None',
+            'uploader_id': '@Mantega',
             'duration': 346,
             'channel': 'LBRY/Odysee rats united!!!',
             'channel_id': '1c8ad6a2ab4e889a71146ae4deeb23bb92dab627',
@@ -192,6 +193,7 @@ class LBRYIE(LBRYBaseIE):
             'vcodec': 'none',
             'thumbnail': 'https://spee.ch/d/0bc63b0e6bf1492d.png',
             'license': 'None',
+            'uploader_id': '@LBRYFoundation',
         }
     }, {
         'url': 'https://odysee.com/@gardeningincanada:b/plants-i-will-never-grow-again.-the:e',
@@ -210,7 +212,8 @@ class LBRYIE(LBRYBaseIE):
             'channel': 'Gardening In Canada',
             'channel_id': 'b8be0e93b423dad221abe29545fbe8ec36e806bc',
             'channel_url': 'https://odysee.com/@gardeningincanada:b8be0e93b423dad221abe29545fbe8ec36e806bc',
-            'formats': 'mincount:3',  # FIXME
+            'uploader_id': '@gardeningincanada',
+            'formats': 'mincount:3',
             'thumbnail': 'https://thumbnails.lbry.com/AgHSc_HzrrE',
             'license': 'Copyrighted (contact publisher)',
         }
@@ -235,6 +238,7 @@ class LBRYIE(LBRYBaseIE):
             'formats': 'mincount:1',
             'thumbnail': 'startswith:https://thumb',
             'license': 'None',
+            'uploader_id': '@RT',
         },
         'params': {'skip_download': True}
     }, {
@@ -249,6 +253,7 @@ class LBRYIE(LBRYBaseIE):
             'channel': 'Wicked Truths',
             'channel_id': '23d2bbf856b0ceed5b1d7c5960bcc72da5a20cb0',
             'channel_url': 'https://odysee.com/@wickedtruths:23d2bbf856b0ceed5b1d7c5960bcc72da5a20cb0',
+            'uploader_id': '@wickedtruths',
             'timestamp': 1695114347,
             'upload_date': '20230919',
             'release_timestamp': 1685617473,

From e831c80e8b2fc025b3b67d82974cc59e3526fdc8 Mon Sep 17 00:00:00 2001
From: garret <garret1317@yandex.com>
Date: Sat, 7 Oct 2023 00:05:48 +0100
Subject: [PATCH 211/218] [ie/nhk] Fix VOD extraction (#8249)

Closes #8242
Authored by: garret1317
---
 yt_dlp/extractor/nhk.py | 46 ++++++++++++++++++++++++++++++++++++++---
 1 file changed, 43 insertions(+), 3 deletions(-)

diff --git a/yt_dlp/extractor/nhk.py b/yt_dlp/extractor/nhk.py
index fbd6a18f6d..bcbc2279f6 100644
--- a/yt_dlp/extractor/nhk.py
+++ b/yt_dlp/extractor/nhk.py
@@ -28,6 +28,44 @@ def _call_api(self, m_id, lang, is_video, is_episode, is_clip):
                 m_id, lang, '/all' if is_video else ''),
             m_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'] or []
 
+    def _get_api_info(self, refresh=True):
+        if not refresh:
+            return self.cache.load('nhk', 'api_info')
+
+        self.cache.store('nhk', 'api_info', {})
+        movie_player_js = self._download_webpage(
+            'https://movie-a.nhk.or.jp/world/player/js/movie-player.js', None,
+            note='Downloading stream API information')
+        api_info = {
+            'url': self._search_regex(
+                r'prod:[^;]+\bapiUrl:\s*[\'"]([^\'"]+)[\'"]', movie_player_js, None, 'stream API url'),
+            'token': self._search_regex(
+                r'prod:[^;]+\btoken:\s*[\'"]([^\'"]+)[\'"]', movie_player_js, None, 'stream API token'),
+        }
+        self.cache.store('nhk', 'api_info', api_info)
+        return api_info
+
+    def _extract_formats_and_subtitles(self, vod_id):
+        for refresh in (False, True):
+            api_info = self._get_api_info(refresh)
+            if not api_info:
+                continue
+
+            api_url = api_info.pop('url')
+            stream_url = traverse_obj(
+                self._download_json(
+                    api_url, vod_id, 'Downloading stream url info', fatal=False, query={
+                        **api_info,
+                        'type': 'json',
+                        'optional_id': vod_id,
+                        'active_flg': 1,
+                    }),
+                ('meta', 0, 'movie_url', ('mb_auto', 'auto_sp', 'auto_pc'), {url_or_none}), get_all=False)
+            if stream_url:
+                return self._extract_m3u8_formats_and_subtitles(stream_url, vod_id)
+
+        raise ExtractorError('Unable to extract stream url')
+
     def _extract_episode_info(self, url, episode=None):
         fetch_episode = episode is None
         lang, m_type, episode_id = NhkVodIE._match_valid_url(url).groups()
@@ -67,12 +105,14 @@ def get_clean_field(key):
         }
         if is_video:
             vod_id = episode['vod_id']
+            formats, subs = self._extract_formats_and_subtitles(vod_id)
+
             info.update({
-                '_type': 'url_transparent',
-                'ie_key': 'Piksel',
-                'url': 'https://movie-s.nhk.or.jp/v/refid/nhkworld/prefid/' + vod_id,
                 'id': vod_id,
+                'formats': formats,
+                'subtitles': subs,
             })
+
         else:
             if fetch_episode:
                 audio_path = episode['audio']['audio']

From 19c90e405b4137c06dfe6f9aaa02396df0da93e5 Mon Sep 17 00:00:00 2001
From: trainman261 <trainman261@users.noreply.github.com>
Date: Sat, 7 Oct 2023 01:56:19 +0200
Subject: [PATCH 212/218] [cleanup] Update extractor tests (#7718)

Authored by: trainman261
---
 yt_dlp/extractor/aenetworks.py         |  1 +
 yt_dlp/extractor/amcnetworks.py        |  1 +
 yt_dlp/extractor/cbc.py                |  7 ++++++-
 yt_dlp/extractor/cbs.py                |  2 ++
 yt_dlp/extractor/cnbc.py               |  2 ++
 yt_dlp/extractor/corus.py              |  3 ++-
 yt_dlp/extractor/generic.py            | 13 ++++++++++---
 yt_dlp/extractor/mediaset.py           |  3 ++-
 yt_dlp/extractor/movieclips.py         |  1 +
 yt_dlp/extractor/nationalgeographic.py |  3 +++
 yt_dlp/extractor/nbc.py                | 22 +++++++++++++++++-----
 yt_dlp/extractor/scrippsnetworks.py    |  4 ++++
 yt_dlp/extractor/syfy.py               |  1 +
 yt_dlp/extractor/theplatform.py        |  6 +++---
 yt_dlp/extractor/theweatherchannel.py  | 20 +++++++++++---------
 15 files changed, 66 insertions(+), 23 deletions(-)

diff --git a/yt_dlp/extractor/aenetworks.py b/yt_dlp/extractor/aenetworks.py
index f049a0fb3c..cc26653c1d 100644
--- a/yt_dlp/extractor/aenetworks.py
+++ b/yt_dlp/extractor/aenetworks.py
@@ -338,6 +338,7 @@ class BiographyIE(AENetworksBaseIE):
             'skip_download': True,
         },
         'add_ie': ['ThePlatform'],
+        'skip': '404 Not Found',
     }]
 
     def _real_extract(self, url):
diff --git a/yt_dlp/extractor/amcnetworks.py b/yt_dlp/extractor/amcnetworks.py
index c58bc7bfbf..10bd021c55 100644
--- a/yt_dlp/extractor/amcnetworks.py
+++ b/yt_dlp/extractor/amcnetworks.py
@@ -26,6 +26,7 @@ class AMCNetworksIE(ThePlatformIE):  # XXX: Do not subclass from concrete IE
             # m3u8 download
             'skip_download': True,
         },
+        'skip': '404 Not Found',
     }, {
         'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
         'only_matching': True,
diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py
index 2920b9027d..be2d13e442 100644
--- a/yt_dlp/extractor/cbc.py
+++ b/yt_dlp/extractor/cbc.py
@@ -66,6 +66,7 @@ class CBCIE(InfoExtractor):
             'uploader': 'CBCC-NEW',
             'timestamp': 255977160,
         },
+        'skip': '404 Not Found',
     }, {
         # multiple iframes
         'url': 'http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot',
@@ -97,7 +98,7 @@ class CBCIE(InfoExtractor):
         # multiple CBC.APP.Caffeine.initInstance(...)
         'url': 'http://www.cbc.ca/news/canada/calgary/dog-indoor-exercise-winter-1.3928238',
         'info_dict': {
-            'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks',
+            'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks',  # FIXME
             'id': 'dog-indoor-exercise-winter-1.3928238',
             'description': 'md5:c18552e41726ee95bd75210d1ca9194c',
         },
@@ -476,6 +477,10 @@ class CBCGemPlaylistIE(InfoExtractor):
             'id': 'schitts-creek/s06',
             'title': 'Season 6',
             'description': 'md5:6a92104a56cbeb5818cc47884d4326a2',
+            'series': 'Schitt\'s Creek',
+            'season_number': 6,
+            'season': 'Season 6',
+            'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/season/perso/cbc_schitts_creek_season_06_carousel_v03.jpg?impolicy=ott&im=Resize=(_Size_)&quality=75',
         },
     }, {
         'url': 'https://gem.cbc.ca/schitts-creek/s06',
diff --git a/yt_dlp/extractor/cbs.py b/yt_dlp/extractor/cbs.py
index 1c0dbdea94..d97fbd758c 100644
--- a/yt_dlp/extractor/cbs.py
+++ b/yt_dlp/extractor/cbs.py
@@ -101,6 +101,7 @@ class CBSIE(CBSBaseIE):
             # m3u8 download
             'skip_download': True,
         },
+        'skip': 'Subscription required',
     }, {
         'url': 'https://www.cbs.com/shows/video/sZH1MGgomIosZgxGJ1l263MFq16oMtW1/',
         'info_dict': {
@@ -117,6 +118,7 @@ class CBSIE(CBSBaseIE):
         },
         'expected_warnings': [
             'This content expired on', 'No video formats found', 'Requested format is not available'],
+        'skip': '404 Not Found',
     }, {
         'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/',
         'only_matching': True,
diff --git a/yt_dlp/extractor/cnbc.py b/yt_dlp/extractor/cnbc.py
index 68fd025b7c..7d209b6d90 100644
--- a/yt_dlp/extractor/cnbc.py
+++ b/yt_dlp/extractor/cnbc.py
@@ -19,6 +19,7 @@ class CNBCIE(InfoExtractor):
             # m3u8 download
             'skip_download': True,
         },
+        'skip': 'Dead link',
     }
 
     def _real_extract(self, url):
@@ -49,6 +50,7 @@ class CNBCVideoIE(InfoExtractor):
         'params': {
             'skip_download': True,
         },
+        'skip': 'Dead link',
     }
 
     def _real_extract(self, url):
diff --git a/yt_dlp/extractor/corus.py b/yt_dlp/extractor/corus.py
index c03d65310d..bcc34ddd8a 100644
--- a/yt_dlp/extractor/corus.py
+++ b/yt_dlp/extractor/corus.py
@@ -41,7 +41,7 @@ class CorusIE(ThePlatformFeedIE):  # XXX: Do not subclass from concrete IE
                         )
                     '''
     _TESTS = [{
-        'url': 'http://www.hgtv.ca/shows/bryan-inc/videos/movie-night-popcorn-with-bryan-870923331648/',
+        'url': 'https://www.hgtv.ca/video/bryan-inc/movie-night-popcorn-with-bryan/870923331648/',
         'info_dict': {
             'id': '870923331648',
             'ext': 'mp4',
@@ -54,6 +54,7 @@ class CorusIE(ThePlatformFeedIE):  # XXX: Do not subclass from concrete IE
             'skip_download': True,
         },
         'expected_warnings': ['Failed to parse JSON'],
+        # FIXME: yt-dlp wrongly raises for geo restriction
     }, {
         'url': 'http://www.foodnetwork.ca/shows/chopped/video/episode/chocolate-obsession/video.html?v=872683587753',
         'only_matching': True,
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index 33e71d1c57..5e1240c13a 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -58,6 +58,8 @@ class GenericIE(InfoExtractor):
                 'ext': 'mp4',
                 'title': 'trailer',
                 'upload_date': '20100513',
+                'direct': True,
+                'timestamp': 1273772943.0,
             }
         },
         # Direct link to media delivered compressed (until Accept-Encoding is *)
@@ -101,6 +103,8 @@ class GenericIE(InfoExtractor):
                 'ext': 'webm',
                 'title': '5_Lennart_Poettering_-_Systemd',
                 'upload_date': '20141120',
+                'direct': True,
+                'timestamp': 1416498816.0,
             },
             'expected_warnings': [
                 'URL could be a direct video link, returning it as such.'
@@ -133,6 +137,7 @@ class GenericIE(InfoExtractor):
                     'upload_date': '20201204',
                 },
             }],
+            'skip': 'Dead link',
         },
         # RSS feed with item with description and thumbnails
         {
@@ -145,12 +150,12 @@ class GenericIE(InfoExtractor):
             'playlist': [{
                 'info_dict': {
                     'ext': 'm4a',
-                    'id': 'c1c879525ce2cb640b344507e682c36d',
+                    'id': '818a5d38-01cd-152f-2231-ee479677fa82',
                     'title': 're:Hydrogen!',
                     'description': 're:.*In this episode we are going.*',
                     'timestamp': 1567977776,
                     'upload_date': '20190908',
-                    'duration': 459,
+                    'duration': 423,
                     'thumbnail': r're:^https?://.*\.jpg$',
                     'episode_number': 1,
                     'season_number': 1,
@@ -267,6 +272,7 @@ class GenericIE(InfoExtractor):
             'params': {
                 'skip_download': True,
             },
+            'skip': '404 Not Found',
         },
         # MPD from http://dash-mse-test.appspot.com/media.html
         {
@@ -278,6 +284,7 @@ class GenericIE(InfoExtractor):
                 'title': 'car-20120827-manifest',
                 'formats': 'mincount:9',
                 'upload_date': '20130904',
+                'timestamp': 1378272859.0,
             },
         },
         # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
@@ -318,7 +325,7 @@ class GenericIE(InfoExtractor):
                 'id': 'cmQHVoWB5FY',
                 'ext': 'mp4',
                 'upload_date': '20130224',
-                'uploader_id': 'TheVerge',
+                'uploader_id': '@TheVerge',
                 'description': r're:^Chris Ziegler takes a look at the\.*',
                 'uploader': 'The Verge',
                 'title': 'First Firefox OS phones side-by-side',
diff --git a/yt_dlp/extractor/mediaset.py b/yt_dlp/extractor/mediaset.py
index e3b728dcae..2d62042982 100644
--- a/yt_dlp/extractor/mediaset.py
+++ b/yt_dlp/extractor/mediaset.py
@@ -127,7 +127,8 @@ class MediasetIE(ThePlatformBaseIE):
         },
         'params': {
             'skip_download': True,
-        }
+        },
+        'skip': 'Dead link',
     }, {
         # WittyTV embed
         'url': 'https://www.wittytv.it/mauriziocostanzoshow/ultima-puntata-venerdi-25-novembre/',
diff --git a/yt_dlp/extractor/movieclips.py b/yt_dlp/extractor/movieclips.py
index 4777f440e0..f7f2921fdb 100644
--- a/yt_dlp/extractor/movieclips.py
+++ b/yt_dlp/extractor/movieclips.py
@@ -23,6 +23,7 @@ class MovieClipsIE(InfoExtractor):
             'uploader': 'Movieclips',
         },
         'add_ie': ['ThePlatform'],
+        'skip': 'redirects to YouTube',
     }
 
     def _real_extract(self, url):
diff --git a/yt_dlp/extractor/nationalgeographic.py b/yt_dlp/extractor/nationalgeographic.py
index ad525c2589..6f046bc29c 100644
--- a/yt_dlp/extractor/nationalgeographic.py
+++ b/yt_dlp/extractor/nationalgeographic.py
@@ -24,6 +24,7 @@ class NationalGeographicVideoIE(InfoExtractor):
                 'uploader': 'NAGS',
             },
             'add_ie': ['ThePlatform'],
+            'skip': 'Redirects to main page',
         },
         {
             'url': 'http://video.nationalgeographic.com/wild/when-sharks-attack/the-real-jaws',
@@ -38,6 +39,7 @@ class NationalGeographicVideoIE(InfoExtractor):
                 'uploader': 'NAGS',
             },
             'add_ie': ['ThePlatform'],
+            'skip': 'Redirects to main page',
         },
     ]
 
@@ -75,6 +77,7 @@ class NationalGeographicTVIE(FOXIE):  # XXX: Do not subclass from concrete IE
         'params': {
             'skip_download': True,
         },
+        'skip': 'Content not available',
     }]
     _HOME_PAGE_URL = 'https://www.nationalgeographic.com/tv/'
     _API_KEY = '238bb0a0c2aba67922c48709ce0c06fd'
diff --git a/yt_dlp/extractor/nbc.py b/yt_dlp/extractor/nbc.py
index b3c28ab55d..666550a491 100644
--- a/yt_dlp/extractor/nbc.py
+++ b/yt_dlp/extractor/nbc.py
@@ -284,7 +284,7 @@ class NBCSportsIE(InfoExtractor):
 
     _TESTS = [{
         # iframe src
-        'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke',
+        'url': 'https://www.nbcsports.com/watch/nfl/profootballtalk/pft-pm/unpacking-addisons-reckless-driving-citation',
         'info_dict': {
             'id': 'PHJSaFWbrTY9',
             'ext': 'mp4',
@@ -379,7 +379,7 @@ class NBCNewsIE(ThePlatformIE):  # XXX: Do not subclass from concrete IE
     _TESTS = [
         {
             'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880',
-            'md5': 'cf4bc9e6ce0130f00f545d80ecedd4bf',
+            'md5': 'fb3dcd2d7b1dd9804305fa2fc95ab610',  # md5 tends to fluctuate
             'info_dict': {
                 'id': '269389891880',
                 'ext': 'mp4',
@@ -387,6 +387,8 @@ class NBCNewsIE(ThePlatformIE):  # XXX: Do not subclass from concrete IE
                 'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
                 'timestamp': 1401363060,
                 'upload_date': '20140529',
+                'duration': 46.0,
+                'thumbnail': 'https://media-cldnry.s-nbcnews.com/image/upload/MSNBC/Components/Video/140529/p_tweet_snow_140529.jpg',
             },
         },
         {
@@ -402,7 +404,7 @@ class NBCNewsIE(ThePlatformIE):  # XXX: Do not subclass from concrete IE
         },
         {
             'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
-            'md5': '8eb831eca25bfa7d25ddd83e85946548',
+            'md5': '40d0e48c68896359c80372306ece0fc3',
             'info_dict': {
                 'id': '394064451844',
                 'ext': 'mp4',
@@ -410,11 +412,13 @@ class NBCNewsIE(ThePlatformIE):  # XXX: Do not subclass from concrete IE
                 'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
                 'timestamp': 1423104900,
                 'upload_date': '20150205',
+                'duration': 1236.0,
+                'thumbnail': 'https://media-cldnry.s-nbcnews.com/image/upload/MSNBC/Components/Video/__NEW/nn_netcast_150204.jpg',
             },
         },
         {
             'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456',
-            'md5': '4a8c4cec9e1ded51060bdda36ff0a5c0',
+            'md5': 'ffb59bcf0733dc3c7f0ace907f5e3939',
             'info_dict': {
                 'id': 'n431456',
                 'ext': 'mp4',
@@ -422,11 +426,13 @@ class NBCNewsIE(ThePlatformIE):  # XXX: Do not subclass from concrete IE
                 'description': 'md5:d22d1281a24f22ea0880741bb4dd6301',
                 'upload_date': '20150922',
                 'timestamp': 1442917800,
+                'duration': 37.0,
+                'thumbnail': 'https://media-cldnry.s-nbcnews.com/image/upload/MSNBC/Components/Video/__NEW/x_lon_vwhorn_150922.jpg',
             },
         },
         {
             'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788',
-            'md5': '118d7ca3f0bea6534f119c68ef539f71',
+            'md5': '693d1fa21d23afcc9b04c66b227ed9ff',
             'info_dict': {
                 'id': '669831235788',
                 'ext': 'mp4',
@@ -434,6 +440,8 @@ class NBCNewsIE(ThePlatformIE):  # XXX: Do not subclass from concrete IE
                 'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1',
                 'upload_date': '20160420',
                 'timestamp': 1461152093,
+                'duration': 69.0,
+                'thumbnail': 'https://media-cldnry.s-nbcnews.com/image/upload/MSNBC/Components/Video/201604/2016-04-20T11-35-09-133Z--1280x720.jpg',
             },
         },
         {
@@ -447,6 +455,7 @@ class NBCNewsIE(ThePlatformIE):  # XXX: Do not subclass from concrete IE
                 'thumbnail': r're:^https?://.*\.jpg$',
                 'timestamp': 1406937606,
                 'upload_date': '20140802',
+                'duration': 940.0,
             },
         },
         {
@@ -535,6 +544,7 @@ class NBCOlympicsIE(InfoExtractor):
             'upload_date': '20160815',
             'uploader': 'NBCU-SPORTS',
         },
+        'skip': '404 Not Found',
     }
 
     def _real_extract(self, url):
@@ -578,6 +588,7 @@ class NBCOlympicsStreamIE(AdobePassIE):
             'params': {
                 'skip_download': 'm3u8',
             },
+            'skip': 'Livestream',
         }, {
             'note': 'Plain m3u8 source URL',
             'url': 'https://stream.nbcolympics.com/gymnastics-event-finals-mens-floor-pommel-horse-womens-vault-bars',
@@ -589,6 +600,7 @@ class NBCOlympicsStreamIE(AdobePassIE):
             'params': {
                 'skip_download': 'm3u8',
             },
+            'skip': 'Livestream',
         },
     ]
 
diff --git a/yt_dlp/extractor/scrippsnetworks.py b/yt_dlp/extractor/scrippsnetworks.py
index adfd7e5f29..7f0bc96456 100644
--- a/yt_dlp/extractor/scrippsnetworks.py
+++ b/yt_dlp/extractor/scrippsnetworks.py
@@ -39,6 +39,7 @@ class ScrippsNetworksWatchIE(AWSIE):
             'skip_download': True,
         },
         'add_ie': [AnvatoIE.ie_key()],
+        'skip': '404 Not Found',
     }]
 
     _SNI_TABLE = {
@@ -113,6 +114,9 @@ class ScrippsNetworksIE(InfoExtractor):
             'timestamp': 1475678834,
             'upload_date': '20161005',
             'uploader': 'SCNI-SCND',
+            'duration': 29.995,
+            'chapters': [{'start_time': 0.0, 'end_time': 29.995, 'title': '<Untitled Chapter 1>'}],
+            'thumbnail': 'https://images.dds.discovery.com/up/tp/Scripps_-_Food_Category_Prod/122/987/0260338_630x355.jpg',
         },
         'add_ie': ['ThePlatform'],
         'expected_warnings': ['No HLS formats found'],
diff --git a/yt_dlp/extractor/syfy.py b/yt_dlp/extractor/syfy.py
index c79d27a0de..afcdbf7804 100644
--- a/yt_dlp/extractor/syfy.py
+++ b/yt_dlp/extractor/syfy.py
@@ -23,6 +23,7 @@ class SyfyIE(AdobePassIE):
             'skip_download': True,
         },
         'add_ie': ['ThePlatform'],
+        'skip': 'Redirects to main page',
     }]
 
     def _real_extract(self, url):
diff --git a/yt_dlp/extractor/theplatform.py b/yt_dlp/extractor/theplatform.py
index 99caeb5f99..433ce8427c 100644
--- a/yt_dlp/extractor/theplatform.py
+++ b/yt_dlp/extractor/theplatform.py
@@ -167,7 +167,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
             # rtmp download
             'skip_download': True,
         },
-        'skip': '404 Not Found',
+        'skip': 'CNet no longer uses ThePlatform',
     }, {
         'url': 'https://player.theplatform.com/p/D6x-PC/pulse_preview/embed/select/media/yMBg9E8KFxZD',
         'info_dict': {
@@ -177,7 +177,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
             'title': 'HIGHLIGHTS: USA bag first ever series Cup win',
             'uploader': 'EGSM',
         },
-        'skip': '404 Not Found',
+        'skip': 'Dead link',
     }, {
         'url': 'http://player.theplatform.com/p/NnzsPC/widget/select/media/4Y0TlYUr_ZT7',
         'only_matching': True,
@@ -195,7 +195,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
             'upload_date': '20150701',
             'uploader': 'NBCU-NEWS',
         },
-        'skip': '404 Not Found',
+        'skip': 'Error: Player PID "nbcNewsOffsite" is disabled',
     }, {
         # From http://www.nbc.com/the-blacklist/video/sir-crispin-crandall/2928790?onid=137781#vc137781=1
         # geo-restricted (US), HLS encrypted with AES-128
diff --git a/yt_dlp/extractor/theweatherchannel.py b/yt_dlp/extractor/theweatherchannel.py
index 682e4335d2..d1921e4f9a 100644
--- a/yt_dlp/extractor/theweatherchannel.py
+++ b/yt_dlp/extractor/theweatherchannel.py
@@ -11,17 +11,19 @@
 class TheWeatherChannelIE(ThePlatformIE):  # XXX: Do not subclass from concrete IE
     _VALID_URL = r'https?://(?:www\.)?weather\.com(?P<asset_name>(?:/(?P<locale>[a-z]{2}-[A-Z]{2}))?/(?:[^/]+/)*video/(?P<id>[^/?#]+))'
     _TESTS = [{
-        'url': 'https://weather.com/series/great-outdoors/video/ice-climber-is-in-for-a-shock',
-        'md5': 'c4cbe74c9c17c5676b704b950b73dd92',
+        'url': 'https://weather.com/storms/hurricane/video/invest-95l-in-atlantic-has-a-medium-chance-of-development',
+        'md5': '68f0cf616435683f27ce36bd9c927394',
         'info_dict': {
-            'id': 'cc82397e-cc3f-4d11-9390-a785add090e8',
+            'id': '81acef2d-ee8c-4545-ba83-bff3cc80db97',
             'ext': 'mp4',
-            'title': 'Ice Climber Is In For A Shock',
-            'description': 'md5:55606ce1378d4c72e6545e160c9d9695',
-            'uploader': 'TWC - Digital (No Distro)',
-            'uploader_id': '6ccd5455-16bb-46f2-9c57-ff858bb9f62c',
-            'upload_date': '20160720',
-            'timestamp': 1469018835,
+            'title': 'Invest 95L In Atlantic Has A Medium Chance Of Development',
+            'description': 'md5:0de720fd5f0d0e32207bd4c270fff824',
+            'uploader': 'TWC - Digital',
+            'uploader_id': 'b5a999e0-9e04-11e1-9ee2-001d092f5a10',
+            'upload_date': '20230721',
+            'timestamp': 1689967343,
+            'display_id': 'invest-95l-in-atlantic-has-a-medium-chance-of-development',
+            'duration': 34.0,
         }
     }, {
         'url': 'https://weather.com/en-CA/international/videos/video/unidentified-object-falls-from-sky-in-india',

From 792f1e64f6a2beac51e85408d142b3118115c4fd Mon Sep 17 00:00:00 2001
From: Aleri Kaisattera <73682764+alerikaisattera@users.noreply.github.com>
Date: Sat, 7 Oct 2023 05:56:47 +0600
Subject: [PATCH 213/218] [ie/theta] Remove extractors (#8251)

Authored by: alerikaisattera
---
 yt_dlp/extractor/_extractors.py |  4 --
 yt_dlp/extractor/theta.py       | 90 ---------------------------------
 2 files changed, 94 deletions(-)
 delete mode 100644 yt_dlp/extractor/theta.py

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index b10ef2f332..55c3c2f8e8 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -2004,10 +2004,6 @@
 )
 from .thestar import TheStarIE
 from .thesun import TheSunIE
-from .theta import (
-    ThetaVideoIE,
-    ThetaStreamIE,
-)
 from .theweatherchannel import TheWeatherChannelIE
 from .thisamericanlife import ThisAmericanLifeIE
 from .thisav import ThisAVIE
diff --git a/yt_dlp/extractor/theta.py b/yt_dlp/extractor/theta.py
deleted file mode 100644
index ecf0ea091d..0000000000
--- a/yt_dlp/extractor/theta.py
+++ /dev/null
@@ -1,90 +0,0 @@
-from .common import InfoExtractor
-from ..utils import try_get
-
-
-class ThetaStreamIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?theta\.tv/(?!video/)(?P<id>[a-z0-9-]+)'
-    _TESTS = [{
-        'url': 'https://www.theta.tv/davirus',
-        'skip': 'The live may have ended',
-        'info_dict': {
-            'id': 'DaVirus',
-            'ext': 'mp4',
-            'title': 'I choose you - My Community is King -👀 - YO HABLO ESPANOL - CODE DAVIRUS',
-            'thumbnail': r're:https://live-thumbnails-prod-theta-tv\.imgix\.net/thumbnail/.+\.jpg',
-        }
-    }, {
-        'url': 'https://www.theta.tv/mst3k',
-        'note': 'This channel is live 24/7',
-        'info_dict': {
-            'id': 'MST3K',
-            'ext': 'mp4',
-            'title': 'Mystery Science Theatre 3000 24/7 Powered by the THETA Network.',
-            'thumbnail': r're:https://user-prod-theta-tv\.imgix\.net/.+\.jpg',
-        }
-    }, {
-        'url': 'https://www.theta.tv/contv-anime',
-        'info_dict': {
-            'id': 'ConTVAnime',
-            'ext': 'mp4',
-            'title': 'CONTV ANIME 24/7. Powered by THETA Network.',
-            'thumbnail': r're:https://user-prod-theta-tv\.imgix\.net/.+\.jpg',
-        }
-    }]
-
-    def _real_extract(self, url):
-        channel_id = self._match_id(url)
-        info = self._download_json(f'https://api.theta.tv/v1/channel?alias={channel_id}', channel_id)['body']
-
-        m3u8_playlist = next(
-            data['url'] for data in info['live_stream']['video_urls']
-            if data.get('type') != 'embed' and data.get('resolution') in ('master', 'source'))
-
-        formats = self._extract_m3u8_formats(m3u8_playlist, channel_id, 'mp4', m3u8_id='hls', live=True)
-
-        channel = try_get(info, lambda x: x['user']['username'])  # using this field instead of channel_id due to capitalization
-
-        return {
-            'id': channel,
-            'title': try_get(info, lambda x: x['live_stream']['title']),
-            'channel': channel,
-            'view_count': try_get(info, lambda x: x['live_stream']['view_count']),
-            'is_live': True,
-            'formats': formats,
-            'thumbnail': try_get(info, lambda x: x['live_stream']['thumbnail_url']),
-        }
-
-
-class ThetaVideoIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?theta\.tv/video/(?P<id>vid[a-z0-9]+)'
-    _TEST = {
-        'url': 'https://www.theta.tv/video/vidiq6aaet3kzf799p0',
-        'md5': '633d8c29eb276bb38a111dbd591c677f',
-        'info_dict': {
-            'id': 'vidiq6aaet3kzf799p0',
-            'ext': 'mp4',
-            'title': 'Theta EdgeCast Tutorial',
-            'uploader': 'Pixiekittie',
-            'description': 'md5:e316253f5bdced8b5a46bb50ae60a09f',
-            'thumbnail': r're:https://user-prod-theta-tv\.imgix\.net/.+/vod_thumb/.+.jpg',
-        }
-    }
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        info = self._download_json(f'https://api.theta.tv/v1/video/{video_id}/raw', video_id)['body']
-
-        m3u8_playlist = try_get(info, lambda x: x['video_urls'][0]['url'])
-
-        formats = self._extract_m3u8_formats(m3u8_playlist, video_id, 'mp4', m3u8_id='hls')
-
-        return {
-            'id': video_id,
-            'title': info.get('title'),
-            'uploader': try_get(info, lambda x: x['user']['username']),
-            'description': info.get('description'),
-            'view_count': info.get('view_count'),
-            'like_count': info.get('like_count'),
-            'formats': formats,
-            'thumbnail': info.get('thumbnail_url'),
-        }

From 03e85ea99db76a2fddb65bf46f8819bda780aaf3 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Fri, 6 Oct 2023 20:00:15 -0500
Subject: [PATCH 214/218] [ie/youtube] Fix `heatmap` extraction (#8299)

Closes #8189
Authored by: bashonly
---
 yt_dlp/extractor/youtube.py | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index 7e13aa7797..b7ac3e9cc1 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -3292,16 +3292,15 @@ def _extract_chapters_from_engagement_panel(self, data, duration):
                                           chapter_time, chapter_title, duration)
             for contents in content_list)), [])
 
-    def _extract_heatmap_from_player_overlay(self, data):
-        content_list = traverse_obj(data, (
-            'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer', 'decoratedPlayerBarRenderer', 'playerBar',
-            'multiMarkersPlayerBarRenderer', 'markersMap', ..., 'value', 'heatmap', 'heatmapRenderer', 'heatMarkers', {list}))
-        return next(filter(None, (
-            traverse_obj(contents, (..., 'heatMarkerRenderer', {
-                'start_time': ('timeRangeStartMillis', {functools.partial(float_or_none, scale=1000)}),
-                'end_time': {lambda x: (x['timeRangeStartMillis'] + x['markerDurationMillis']) / 1000},
-                'value': ('heatMarkerIntensityScoreNormalized', {float_or_none}),
-            })) for contents in content_list)), None)
+    def _extract_heatmap(self, data):
+        return traverse_obj(data, (
+            'frameworkUpdates', 'entityBatchUpdate', 'mutations',
+            lambda _, v: v['payload']['macroMarkersListEntity']['markersList']['markerType'] == 'MARKER_TYPE_HEATMAP',
+            'payload', 'macroMarkersListEntity', 'markersList', 'markers', ..., {
+                'start_time': ('startMillis', {functools.partial(float_or_none, scale=1000)}),
+                'end_time': {lambda x: (int(x['startMillis']) + int(x['durationMillis'])) / 1000},
+                'value': ('intensityScoreNormalized', {float_or_none}),
+            })) or None
 
     def _extract_comment(self, comment_renderer, parent=None):
         comment_id = comment_renderer.get('commentId')
@@ -4435,7 +4434,7 @@ def process_language(container, base_url, lang_code, sub_name, query):
                 or self._extract_chapters_from_description(video_description, duration)
                 or None)
 
-            info['heatmap'] = self._extract_heatmap_from_player_overlay(initial_data)
+            info['heatmap'] = self._extract_heatmap(initial_data)
 
         contents = traverse_obj(
             initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),

From 377e85a1797db9e98b78b38203ed9d4ded229991 Mon Sep 17 00:00:00 2001
From: Simon Sawicki <contact@grub4k.xyz>
Date: Sat, 7 Oct 2023 03:02:45 +0200
Subject: [PATCH 215/218] [cleanup] Misc (#8300)

* Simplify nuxt regex
* Fix tmz quotes and tests
* Update test python versions

Authored by: dirkf, gamer191, Grub4K
---
 .github/workflows/core.yml     |   4 +-
 .github/workflows/download.yml |   2 +-
 yt_dlp/extractor/common.py     |   2 +-
 yt_dlp/extractor/tmz.py        | 266 +++++++++++++++++----------------
 4 files changed, 138 insertions(+), 136 deletions(-)

diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml
index 689408c500..7fcf11dfa2 100644
--- a/.github/workflows/core.yml
+++ b/.github/workflows/core.yml
@@ -13,7 +13,7 @@ jobs:
       matrix:
         os: [ubuntu-latest]
         # CPython 3.11 is in quick-test
-        python-version: ['3.8', '3.9', '3.10', '3.12-dev', pypy-3.7, pypy-3.8, pypy-3.10]
+        python-version: ['3.8', '3.9', '3.10', '3.12', pypy-3.7, pypy-3.8, pypy-3.10]
         run-tests-ext: [sh]
         include:
         # atleast one of each CPython/PyPy tests must be in windows
@@ -21,7 +21,7 @@ jobs:
           python-version: '3.7'
           run-tests-ext: bat
         - os: windows-latest
-          python-version: '3.12-dev'
+          python-version: '3.12'
           run-tests-ext: bat
         - os: windows-latest
           python-version: pypy-3.9
diff --git a/.github/workflows/download.yml b/.github/workflows/download.yml
index 2b2387d4f1..c3478721c3 100644
--- a/.github/workflows/download.yml
+++ b/.github/workflows/download.yml
@@ -28,7 +28,7 @@ jobs:
       fail-fast: true
       matrix:
         os: [ubuntu-latest]
-        python-version: ['3.7', '3.10', 3.11-dev, pypy-3.7, pypy-3.8]
+        python-version: ['3.7', '3.10', '3.12', pypy-3.7, pypy-3.8, pypy-3.10]
         run-tests-ext: [sh]
         include:
         # atleast one of each CPython/PyPy tests must be in windows
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index c94b4abdc2..c3ceb00391 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -1687,7 +1687,7 @@ def _search_nextjs_data(self, webpage, video_id, *, transform_source=None, fatal
     def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)):
         """Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
         rectx = re.escape(context_name)
-        FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){(?:.*?)return\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)'
+        FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){.*?\breturn\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)'
         js, arg_keys, arg_vals = self._search_regex(
             (rf'<script>\s*window\.{rectx}={FUNCTION_RE}\s*\)\s*;?\s*</script>', rf'{rectx}\(.*?{FUNCTION_RE}'),
             webpage, context_name, group=('js', 'arg_keys', 'arg_vals'),
diff --git a/yt_dlp/extractor/tmz.py b/yt_dlp/extractor/tmz.py
index ffb30c6b87..edd16bc5b2 100644
--- a/yt_dlp/extractor/tmz.py
+++ b/yt_dlp/extractor/tmz.py
@@ -8,158 +8,160 @@
 
 
 class TMZIE(InfoExtractor):
-    _VALID_URL = r"https?://(?:www\.)?tmz\.com/.*"
+    _VALID_URL = r'https?://(?:www\.)?tmz\.com/.*'
     _TESTS = [
         {
-            "url": "http://www.tmz.com/videos/0-cegprt2p/",
-            "info_dict": {
-                "id": "http://www.tmz.com/videos/0-cegprt2p/",
-                "ext": "mp4",
-                "title": "No Charges Against Hillary Clinton? Harvey Says It Ain't Over Yet",
-                "description": "Harvey talks about Director Comey’s decision not to prosecute Hillary Clinton.",
-                "timestamp": 1467831837,
-                "uploader": "TMZ Staff",
-                "upload_date": "20160706",
-                "thumbnail": "https://imagez.tmz.com/image/5e/4by3/2016/07/06/5eea7dc01baa5c2e83eb06930c170e46_xl.jpg",
-                "duration": 772.0,
+            'url': 'http://www.tmz.com/videos/0-cegprt2p/',
+            'info_dict': {
+                'id': 'http://www.tmz.com/videos/0-cegprt2p/',
+                'ext': 'mp4',
+                'title': 'No Charges Against Hillary Clinton? Harvey Says It Ain\'t Over Yet',
+                'description': 'Harvey talks about Director Comey’s decision not to prosecute Hillary Clinton.',
+                'timestamp': 1467831837,
+                'uploader': 'TMZ Staff',
+                'upload_date': '20160706',
+                'thumbnail': 'https://imagez.tmz.com/image/5e/4by3/2016/07/06/5eea7dc01baa5c2e83eb06930c170e46_xl.jpg',
+                'duration': 772.0,
             },
         },
         {
-            "url": "https://www.tmz.com/videos/071119-chris-morgan-women-4590005-0-zcsejvcr/",
-            "info_dict": {
-                "id": "https://www.tmz.com/videos/071119-chris-morgan-women-4590005-0-zcsejvcr/",
-                "ext": "mp4",
-                "title": "Angry Bagel Shop Guy Says He Doesn't Trust Women",
-                "description": "The enraged man who went viral for ranting about women on dating sites before getting ragdolled in a bagel shop is defending his misogyny ... he says it's women's fault in the first place.",
-                "timestamp": 1562889485,
-                "uploader": "TMZ Staff",
-                "upload_date": "20190711",
-                "thumbnail": "https://imagez.tmz.com/image/a8/4by3/2019/07/12/a85480d27b2f50a7bfea2322151d67a5_xl.jpg",
-                "duration": 123.0,
+            'url': 'https://www.tmz.com/videos/071119-chris-morgan-women-4590005-0-zcsejvcr/',
+            'info_dict': {
+                'id': 'https://www.tmz.com/videos/071119-chris-morgan-women-4590005-0-zcsejvcr/',
+                'ext': 'mp4',
+                'title': 'Angry Bagel Shop Guy Says He Doesn\'t Trust Women',
+                'description': 'The enraged man who went viral for ranting about women on dating sites before getting ragdolled in a bagel shop is defending his misogyny ... he says it\'s women\'s fault in the first place.',
+                'timestamp': 1562889485,
+                'uploader': 'TMZ Staff',
+                'upload_date': '20190711',
+                'thumbnail': 'https://imagez.tmz.com/image/a8/4by3/2019/07/12/a85480d27b2f50a7bfea2322151d67a5_xl.jpg',
+                'duration': 123.0,
             },
         },
         {
-            "url": "http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert",
-            "md5": "5429c85db8bde39a473a56ca8c4c5602",
-            "info_dict": {
-                "id": "http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert",
-                "ext": "mp4",
-                "title": "Bobby Brown Tells Crowd ... Bobbi Kristina is Awake",
-                "description": 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake.  She\'s watching me."',
-                "timestamp": 1429467813,
-                "uploader": "TMZ Staff",
-                "upload_date": "20150419",
-                "duration": 29.0,
-                "thumbnail": "https://imagez.tmz.com/image/15/4by3/2015/04/20/1539c7ae136359fc979236fa6a9449dd_xl.jpg",
+            'url': 'http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert',
+            'md5': '5429c85db8bde39a473a56ca8c4c5602',
+            'info_dict': {
+                'id': 'http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert',
+                'ext': 'mp4',
+                'title': 'Bobby Brown Tells Crowd ... Bobbi Kristina is Awake',
+                'description': 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake.  She\'s watching me."',
+                'timestamp': 1429467813,
+                'uploader': 'TMZ Staff',
+                'upload_date': '20150419',
+                'duration': 29.0,
+                'thumbnail': 'https://imagez.tmz.com/image/15/4by3/2015/04/20/1539c7ae136359fc979236fa6a9449dd_xl.jpg',
             },
         },
         {
-            "url": "http://www.tmz.com/2015/09/19/patti-labelle-concert-fan-stripping-kicked-out-nicki-minaj/",
-            "info_dict": {
-                "id": "http://www.tmz.com/2015/09/19/patti-labelle-concert-fan-stripping-kicked-out-nicki-minaj/",
-                "ext": "mp4",
-                "title": "Patti LaBelle -- Goes Nuclear On Stripping Fan",
-                "description": "Patti LaBelle made it known loud and clear last night ... NO "
-                "ONE gets on her stage and strips down.",
-                "timestamp": 1442683746,
-                "uploader": "TMZ Staff",
-                "upload_date": "20150919",
-                "duration": 104.0,
-                "thumbnail": "https://imagez.tmz.com/image/5e/4by3/2015/09/20/5e57d7575062528082994e18ac3f0f48_xl.jpg",
+            'url': 'http://www.tmz.com/2015/09/19/patti-labelle-concert-fan-stripping-kicked-out-nicki-minaj/',
+            'info_dict': {
+                'id': 'http://www.tmz.com/2015/09/19/patti-labelle-concert-fan-stripping-kicked-out-nicki-minaj/',
+                'ext': 'mp4',
+                'title': 'Patti LaBelle -- Goes Nuclear On Stripping Fan',
+                'description': 'Patti LaBelle made it known loud and clear last night ... NO '
+                'ONE gets on her stage and strips down.',
+                'timestamp': 1442683746,
+                'uploader': 'TMZ Staff',
+                'upload_date': '20150919',
+                'duration': 104.0,
+                'thumbnail': 'https://imagez.tmz.com/image/5e/4by3/2015/09/20/5e57d7575062528082994e18ac3f0f48_xl.jpg',
             },
         },
         {
-            "url": "http://www.tmz.com/2016/01/28/adam-silver-sting-drake-blake-griffin/",
-            "info_dict": {
-                "id": "http://www.tmz.com/2016/01/28/adam-silver-sting-drake-blake-griffin/",
-                "ext": "mp4",
-                "title": "NBA's Adam Silver -- Blake Griffin's a Great Guy ... He'll Learn from This",
-                "description": "Two pretty parts of this video with NBA Commish Adam Silver.",
-                "timestamp": 1454010989,
-                "uploader": "TMZ Staff",
-                "upload_date": "20160128",
-                "duration": 59.0,
-                "thumbnail": "https://imagez.tmz.com/image/38/4by3/2016/01/29/3856e83e0beb57059ec412122b842fb1_xl.jpg",
+            'url': 'http://www.tmz.com/2016/01/28/adam-silver-sting-drake-blake-griffin/',
+            'info_dict': {
+                'id': 'http://www.tmz.com/2016/01/28/adam-silver-sting-drake-blake-griffin/',
+                'ext': 'mp4',
+                'title': 'NBA\'s Adam Silver -- Blake Griffin\'s a Great Guy ... He\'ll Learn from This',
+                'description': 'Two pretty parts of this video with NBA Commish Adam Silver.',
+                'timestamp': 1454010989,
+                'uploader': 'TMZ Staff',
+                'upload_date': '20160128',
+                'duration': 59.0,
+                'thumbnail': 'https://imagez.tmz.com/image/38/4by3/2016/01/29/3856e83e0beb57059ec412122b842fb1_xl.jpg',
             },
         },
         {
-            "url": "http://www.tmz.com/2016/10/27/donald-trump-star-vandal-arrested-james-otis/",
-            "info_dict": {
-                "id": "http://www.tmz.com/2016/10/27/donald-trump-star-vandal-arrested-james-otis/",
-                "ext": "mp4",
-                "title": "Trump Star Vandal -- I'm Not Afraid of Donald or the Cops!",
-                "description": "James Otis is the the guy who took a pickaxe to Donald Trump's star on the Walk of Fame, and he tells TMZ .. he's ready and willing to go to jail for the crime.",
-                "timestamp": 1477500095,
-                "uploader": "TMZ Staff",
-                "upload_date": "20161026",
-                "thumbnail": "https://imagez.tmz.com/image/0d/4by3/2016/10/27/0d904814d4a75dcf9cc3b8cfd1edc1a3_xl.jpg",
-                "duration": 128.0,
+            'url': 'http://www.tmz.com/2016/10/27/donald-trump-star-vandal-arrested-james-otis/',
+            'info_dict': {
+                'id': 'http://www.tmz.com/2016/10/27/donald-trump-star-vandal-arrested-james-otis/',
+                'ext': 'mp4',
+                'title': 'Trump Star Vandal -- I\'m Not Afraid of Donald or the Cops!',
+                'description': 'James Otis is the the guy who took a pickaxe to Donald Trump\'s star on the Walk of Fame, and he tells TMZ .. he\'s ready and willing to go to jail for the crime.',
+                'timestamp': 1477500095,
+                'uploader': 'TMZ Staff',
+                'upload_date': '20161026',
+                'thumbnail': 'https://imagez.tmz.com/image/0d/4by3/2016/10/27/0d904814d4a75dcf9cc3b8cfd1edc1a3_xl.jpg',
+                'duration': 128.0,
             },
         },
         {
-            "url": "https://www.tmz.com/videos/2020-10-31-103120-beverly-hills-protest-4878209/",
-            "info_dict": {
-                "id": "https://www.tmz.com/videos/2020-10-31-103120-beverly-hills-protest-4878209/",
-                "ext": "mp4",
-                "title": "Cops Use Billy Clubs Against Pro-Trump and Anti-Fascist "
-                "Demonstrators",
-                "description": "Beverly Hills may be an omen of what's coming next week, "
-                "because things got crazy on the streets and cops started "
-                "swinging their billy clubs at both Anti-Fascist and Pro-Trump "
-                "demonstrators.",
-                "timestamp": 1604182772,
-                "uploader": "TMZ Staff",
-                "upload_date": "20201031",
-                "duration": 96.0,
-                "thumbnail": "https://imagez.tmz.com/image/f3/4by3/2020/10/31/f37bd5a8aef84497866f425130c58be3_xl.jpg",
+            'url': 'https://www.tmz.com/videos/2020-10-31-103120-beverly-hills-protest-4878209/',
+            'info_dict': {
+                'id': 'https://www.tmz.com/videos/2020-10-31-103120-beverly-hills-protest-4878209/',
+                'ext': 'mp4',
+                'title': 'Cops Use Billy Clubs Against Pro-Trump and Anti-Fascist '
+                'Demonstrators',
+                'description': 'Beverly Hills may be an omen of what\'s coming next week, '
+                'because things got crazy on the streets and cops started '
+                'swinging their billy clubs at both Anti-Fascist and Pro-Trump '
+                'demonstrators.',
+                'timestamp': 1604182772,
+                'uploader': 'TMZ Staff',
+                'upload_date': '20201031',
+                'duration': 96.0,
+                'thumbnail': 'https://imagez.tmz.com/image/f3/4by3/2020/10/31/f37bd5a8aef84497866f425130c58be3_xl.jpg',
             },
         },
         {
-            "url": "https://www.tmz.com/2020/11/05/gervonta-davis-car-crash-hit-and-run-police/",
-            "info_dict": {
-                "id": "Dddb6IGe-ws",
-                "ext": "mp4",
-                "title": "SICK LAMBO GERVONTA DAVIS IN HIS NEW RIDE RIGHT AFTER KO AFTER LEO  EsNews Boxing",
-                "uploader": "ESNEWS",
-                "description": "md5:49675bc58883ccf80474b8aa701e1064",
-                "upload_date": "20201102",
-                "uploader_id": "ESNEWS",
-                "uploader_url": "http://www.youtube.com/user/ESNEWS",
-                "like_count": int,
-                "channel_id": "UCI-Oq7oFGakzSzHFlTtsUsQ",
-                "channel": "ESNEWS",
-                "view_count": int,
-                "duration": 225,
-                "live_status": "not_live",
-                "thumbnail": "https://i.ytimg.com/vi_webp/Dddb6IGe-ws/maxresdefault.webp",
-                "channel_url": "https://www.youtube.com/channel/UCI-Oq7oFGakzSzHFlTtsUsQ",
-                "channel_follower_count": int,
-                "playable_in_embed": True,
-                "categories": ["Sports"],
-                "age_limit": 0,
-                "tags": "count:10",
-                "availability": "public",
+            'url': 'https://www.tmz.com/2020/11/05/gervonta-davis-car-crash-hit-and-run-police/',
+            'info_dict': {
+                'id': 'Dddb6IGe-ws',
+                'ext': 'mp4',
+                'title': 'SICK LAMBO GERVONTA DAVIS IN HIS NEW RIDE RIGHT AFTER KO AFTER LEO  EsNews Boxing',
+                'uploader': 'ESNEWS',
+                'description': 'md5:49675bc58883ccf80474b8aa701e1064',
+                'upload_date': '20201102',
+                'uploader_id': '@ESNEWS',
+                'uploader_url': 'https://www.youtube.com/@ESNEWS',
+                'like_count': int,
+                'channel_id': 'UCI-Oq7oFGakzSzHFlTtsUsQ',
+                'channel': 'ESNEWS',
+                'view_count': int,
+                'duration': 225,
+                'live_status': 'not_live',
+                'thumbnail': 'https://i.ytimg.com/vi_webp/Dddb6IGe-ws/maxresdefault.webp',
+                'channel_url': 'https://www.youtube.com/channel/UCI-Oq7oFGakzSzHFlTtsUsQ',
+                'channel_follower_count': int,
+                'playable_in_embed': True,
+                'categories': ['Sports'],
+                'age_limit': 0,
+                'tags': 'count:10',
+                'availability': 'public',
+                'comment_count': int,
             },
         },
         {
-            "url": "https://www.tmz.com/2020/11/19/conor-mcgregor-dustin-poirier-contract-fight-ufc-257-fight-island/",
-            "info_dict": {
-                "id": "1329450007125225473",
-                "ext": "mp4",
-                "title": "The Mac Life - BREAKING: Conor McGregor (@thenotoriousmma) has signed his bout agreement for his rematch with Dustin Poirier for January 23.",
-                "uploader": "The Mac Life",
-                "description": "md5:56e6009bbc3d12498e10d08a8e1f1c69",
-                "upload_date": "20201119",
-                "uploader_id": "TheMacLife",
-                "timestamp": 1605800556,
-                "thumbnail": "https://pbs.twimg.com/media/EnMmfT8XYAExgxJ.jpg?name=small",
-                "like_count": int,
-                "duration": 11.812,
-                "uploader_url": "https://twitter.com/TheMacLife",
-                "age_limit": 0,
-                "repost_count": int,
-                "tags": [],
-                "comment_count": int,
+            'url': 'https://www.tmz.com/2020/11/19/conor-mcgregor-dustin-poirier-contract-fight-ufc-257-fight-island/',
+            'info_dict': {
+                'id': '1329448013937471491',
+                'ext': 'mp4',
+                'title': 'The Mac Life - BREAKING: Conor McGregor (@thenotoriousmma) has signed his bout agreement for his rematch with Dustin Poirier for January 23.',
+                'uploader': 'The Mac Life',
+                'description': 'md5:56e6009bbc3d12498e10d08a8e1f1c69',
+                'upload_date': '20201119',
+                'display_id': '1329450007125225473',
+                'uploader_id': 'TheMacLife',
+                'timestamp': 1605800556,
+                'thumbnail': 'https://pbs.twimg.com/media/EnMmfT8XYAExgxJ.jpg?name=small',
+                'like_count': int,
+                'duration': 11.812,
+                'uploader_url': 'https://twitter.com/TheMacLife',
+                'age_limit': 0,
+                'repost_count': int,
+                'tags': [],
+                'comment_count': int,
             },
         },
     ]
@@ -167,25 +169,25 @@ class TMZIE(InfoExtractor):
     def _real_extract(self, url):
         webpage = self._download_webpage(url, url)
         jsonld = self._search_json_ld(webpage, url)
-        if not jsonld or "url" not in jsonld:
+        if not jsonld or 'url' not in jsonld:
             # try to extract from YouTube Player API
             # see https://developers.google.com/youtube/iframe_api_reference#Video_Queueing_Functions
             match_obj = re.search(r'\.cueVideoById\(\s*(?P<quote>[\'"])(?P<id>.*?)(?P=quote)', webpage)
             if match_obj:
-                res = self.url_result(match_obj.group("id"))
+                res = self.url_result(match_obj.group('id'))
                 return res
             # try to extract from twitter
-            blockquote_el = get_element_by_attribute("class", "twitter-tweet", webpage)
+            blockquote_el = get_element_by_attribute('class', 'twitter-tweet', webpage)
             if blockquote_el:
                 matches = re.findall(
                     r'<a[^>]+href=\s*(?P<quote>[\'"])(?P<link>.*?)(?P=quote)',
                     blockquote_el)
                 if matches:
                     for _, match in matches:
-                        if "/status/" in match:
+                        if '/status/' in match:
                             res = self.url_result(match)
                             return res
-            raise ExtractorError("No video found!")
+            raise ExtractorError('No video found!')
         if id not in jsonld:
-            jsonld["id"] = url
+            jsonld['id'] = url
         return jsonld

From 4392c4680c383b221b6aa26d25c6e4b5581a5ad6 Mon Sep 17 00:00:00 2001
From: github-actions <github-actions@example.com>
Date: Sat, 7 Oct 2023 01:28:34 +0000
Subject: [PATCH 216/218] Release 2023.10.07

Created by: Grub4K

:ci skip all :ci run dl
---
 .github/ISSUE_TEMPLATE/1_broken_site.yml      |  8 ++---
 .../ISSUE_TEMPLATE/2_site_support_request.yml |  8 ++---
 .../ISSUE_TEMPLATE/3_site_feature_request.yml |  8 ++---
 .github/ISSUE_TEMPLATE/4_bug_report.yml       |  8 ++---
 .github/ISSUE_TEMPLATE/5_feature_request.yml  |  8 ++---
 .github/ISSUE_TEMPLATE/6_question.yml         |  8 ++---
 CONTRIBUTORS                                  |  6 ++++
 Changelog.md                                  | 29 +++++++++++++++++++
 supportedsites.md                             |  4 +--
 yt_dlp/version.py                             |  4 +--
 10 files changed, 63 insertions(+), 28 deletions(-)

diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.yml b/.github/ISSUE_TEMPLATE/1_broken_site.yml
index f0fc71d575..dacb41758d 100644
--- a/.github/ISSUE_TEMPLATE/1_broken_site.yml
+++ b/.github/ISSUE_TEMPLATE/1_broken_site.yml
@@ -18,7 +18,7 @@ body:
       options:
         - label: I'm reporting that yt-dlp is broken on a **supported** site
           required: true
-        - label: I've verified that I'm running yt-dlp version **2023.09.24** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
+        - label: I've verified that I'm running yt-dlp version **2023.10.07** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
           required: true
         - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
           required: true
@@ -64,7 +64,7 @@ body:
         [debug] Command-line config: ['-vU', 'test:youtube']
         [debug] Portable config "yt-dlp.conf": ['-i']
         [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
-        [debug] yt-dlp version 2023.09.24 [9d339c4] (win32_exe)
+        [debug] yt-dlp version 2023.10.07 [9d339c4] (win32_exe)
         [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
         [debug] Checking exe version: ffmpeg -bsfs
         [debug] Checking exe version: ffprobe -bsfs
@@ -72,8 +72,8 @@ body:
         [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
         [debug] Proxy map: {}
         [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
-        Latest version: 2023.09.24, Current version: 2023.09.24
-        yt-dlp is up to date (2023.09.24)
+        Latest version: 2023.10.07, Current version: 2023.10.07
+        yt-dlp is up to date (2023.10.07)
         <more lines>
       render: shell
     validations:
diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.yml b/.github/ISSUE_TEMPLATE/2_site_support_request.yml
index ac9a72a1c1..ec6e298a19 100644
--- a/.github/ISSUE_TEMPLATE/2_site_support_request.yml
+++ b/.github/ISSUE_TEMPLATE/2_site_support_request.yml
@@ -18,7 +18,7 @@ body:
       options:
         - label: I'm reporting a new site support request
           required: true
-        - label: I've verified that I'm running yt-dlp version **2023.09.24** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
+        - label: I've verified that I'm running yt-dlp version **2023.10.07** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
           required: true
         - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
           required: true
@@ -76,7 +76,7 @@ body:
         [debug] Command-line config: ['-vU', 'test:youtube']
         [debug] Portable config "yt-dlp.conf": ['-i']
         [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
-        [debug] yt-dlp version 2023.09.24 [9d339c4] (win32_exe)
+        [debug] yt-dlp version 2023.10.07 [9d339c4] (win32_exe)
         [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
         [debug] Checking exe version: ffmpeg -bsfs
         [debug] Checking exe version: ffprobe -bsfs
@@ -84,8 +84,8 @@ body:
         [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
         [debug] Proxy map: {}
         [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
-        Latest version: 2023.09.24, Current version: 2023.09.24
-        yt-dlp is up to date (2023.09.24)
+        Latest version: 2023.10.07, Current version: 2023.10.07
+        yt-dlp is up to date (2023.10.07)
         <more lines>
       render: shell
     validations:
diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml
index 577e4d4910..cf3cdd21f3 100644
--- a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml
+++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml
@@ -18,7 +18,7 @@ body:
       options:
         - label: I'm requesting a site-specific feature
           required: true
-        - label: I've verified that I'm running yt-dlp version **2023.09.24** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
+        - label: I've verified that I'm running yt-dlp version **2023.10.07** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
           required: true
         - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
           required: true
@@ -72,7 +72,7 @@ body:
         [debug] Command-line config: ['-vU', 'test:youtube']
         [debug] Portable config "yt-dlp.conf": ['-i']
         [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
-        [debug] yt-dlp version 2023.09.24 [9d339c4] (win32_exe)
+        [debug] yt-dlp version 2023.10.07 [9d339c4] (win32_exe)
         [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
         [debug] Checking exe version: ffmpeg -bsfs
         [debug] Checking exe version: ffprobe -bsfs
@@ -80,8 +80,8 @@ body:
         [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
         [debug] Proxy map: {}
         [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
-        Latest version: 2023.09.24, Current version: 2023.09.24
-        yt-dlp is up to date (2023.09.24)
+        Latest version: 2023.10.07, Current version: 2023.10.07
+        yt-dlp is up to date (2023.10.07)
         <more lines>
       render: shell
     validations:
diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.yml b/.github/ISSUE_TEMPLATE/4_bug_report.yml
index 9529c1bd6c..1bbcf68956 100644
--- a/.github/ISSUE_TEMPLATE/4_bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/4_bug_report.yml
@@ -18,7 +18,7 @@ body:
       options:
         - label: I'm reporting a bug unrelated to a specific site
           required: true
-        - label: I've verified that I'm running yt-dlp version **2023.09.24** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
+        - label: I've verified that I'm running yt-dlp version **2023.10.07** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
           required: true
         - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
           required: true
@@ -57,7 +57,7 @@ body:
         [debug] Command-line config: ['-vU', 'test:youtube']
         [debug] Portable config "yt-dlp.conf": ['-i']
         [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
-        [debug] yt-dlp version 2023.09.24 [9d339c4] (win32_exe)
+        [debug] yt-dlp version 2023.10.07 [9d339c4] (win32_exe)
         [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
         [debug] Checking exe version: ffmpeg -bsfs
         [debug] Checking exe version: ffprobe -bsfs
@@ -65,8 +65,8 @@ body:
         [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
         [debug] Proxy map: {}
         [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
-        Latest version: 2023.09.24, Current version: 2023.09.24
-        yt-dlp is up to date (2023.09.24)
+        Latest version: 2023.10.07, Current version: 2023.10.07
+        yt-dlp is up to date (2023.10.07)
         <more lines>
       render: shell
     validations:
diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.yml b/.github/ISSUE_TEMPLATE/5_feature_request.yml
index b17a6e046c..d3bc06e809 100644
--- a/.github/ISSUE_TEMPLATE/5_feature_request.yml
+++ b/.github/ISSUE_TEMPLATE/5_feature_request.yml
@@ -20,7 +20,7 @@ body:
           required: true
         - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
           required: true
-        - label: I've verified that I'm running yt-dlp version **2023.09.24** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
+        - label: I've verified that I'm running yt-dlp version **2023.10.07** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
           required: true
         - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates
           required: true
@@ -53,7 +53,7 @@ body:
         [debug] Command-line config: ['-vU', 'test:youtube']
         [debug] Portable config "yt-dlp.conf": ['-i']
         [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
-        [debug] yt-dlp version 2023.09.24 [9d339c4] (win32_exe)
+        [debug] yt-dlp version 2023.10.07 [9d339c4] (win32_exe)
         [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
         [debug] Checking exe version: ffmpeg -bsfs
         [debug] Checking exe version: ffprobe -bsfs
@@ -61,7 +61,7 @@ body:
         [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
         [debug] Proxy map: {}
         [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
-        Latest version: 2023.09.24, Current version: 2023.09.24
-        yt-dlp is up to date (2023.09.24)
+        Latest version: 2023.10.07, Current version: 2023.10.07
+        yt-dlp is up to date (2023.10.07)
         <more lines>
       render: shell
diff --git a/.github/ISSUE_TEMPLATE/6_question.yml b/.github/ISSUE_TEMPLATE/6_question.yml
index 5345e8917c..30311d5b56 100644
--- a/.github/ISSUE_TEMPLATE/6_question.yml
+++ b/.github/ISSUE_TEMPLATE/6_question.yml
@@ -26,7 +26,7 @@ body:
           required: true
         - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
           required: true
-        - label: I've verified that I'm running yt-dlp version **2023.09.24** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
+        - label: I've verified that I'm running yt-dlp version **2023.10.07** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
           required: true
         - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions **including closed ones**. DO NOT post duplicates
           required: true
@@ -59,7 +59,7 @@ body:
         [debug] Command-line config: ['-vU', 'test:youtube']
         [debug] Portable config "yt-dlp.conf": ['-i']
         [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
-        [debug] yt-dlp version 2023.09.24 [9d339c4] (win32_exe)
+        [debug] yt-dlp version 2023.10.07 [9d339c4] (win32_exe)
         [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
         [debug] Checking exe version: ffmpeg -bsfs
         [debug] Checking exe version: ffprobe -bsfs
@@ -67,7 +67,7 @@ body:
         [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
         [debug] Proxy map: {}
         [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
-        Latest version: 2023.09.24, Current version: 2023.09.24
-        yt-dlp is up to date (2023.09.24)
+        Latest version: 2023.10.07, Current version: 2023.10.07
+        yt-dlp is up to date (2023.10.07)
         <more lines>
       render: shell
diff --git a/CONTRIBUTORS b/CONTRIBUTORS
index 72b9584ecf..8eda413072 100644
--- a/CONTRIBUTORS
+++ b/CONTRIBUTORS
@@ -503,3 +503,9 @@ Yalab7
 zhallgato
 zhong-yiyu
 Zprokkel
+AS6939
+drzraf
+handlerug
+jiru
+madewokherd
+xofe
diff --git a/Changelog.md b/Changelog.md
index 04511927fa..48dcbf1029 100644
--- a/Changelog.md
+++ b/Changelog.md
@@ -4,6 +4,35 @@ # Changelog
 # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
 -->
 
+### 2023.10.07
+
+#### Extractor changes
+- **abc.net.au**: iview: [Improve `episode` extraction](https://github.com/yt-dlp/yt-dlp/commit/a9efb4b8d74f3583450ffda0ee57259a47d39c70) ([#8201](https://github.com/yt-dlp/yt-dlp/issues/8201)) by [xofe](https://github.com/xofe)
+- **erocast**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/47c598783c98c179e04dd12c2a3fee0f3dc53087) ([#8264](https://github.com/yt-dlp/yt-dlp/issues/8264)) by [madewokherd](https://github.com/madewokherd)
+- **gofile**: [Fix token cookie bug](https://github.com/yt-dlp/yt-dlp/commit/0730d5a966fa8a937d84bfb7f68be5198acb039b) by [bashonly](https://github.com/bashonly)
+- **iq.com**: [Fix extraction and subtitles](https://github.com/yt-dlp/yt-dlp/commit/35d9cbaf9638ccc9daf8a863063b2e7c135bc664) ([#8260](https://github.com/yt-dlp/yt-dlp/issues/8260)) by [AS6939](https://github.com/AS6939)
+- **lbry**
+    - [Add playlist support](https://github.com/yt-dlp/yt-dlp/commit/48cceec1ddb8649b5e771df8df79eb9c39c82b90) ([#8213](https://github.com/yt-dlp/yt-dlp/issues/8213)) by [bashonly](https://github.com/bashonly), [drzraf](https://github.com/drzraf), [Grub4K](https://github.com/Grub4K)
+    - [Extract `uploader_id`](https://github.com/yt-dlp/yt-dlp/commit/0e722f2f3ca42e634fd7b06ee70b16bf833ce132) ([#8244](https://github.com/yt-dlp/yt-dlp/issues/8244)) by [drzraf](https://github.com/drzraf)
+- **litv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/91a670a4f7babe9c8aa2018f57d8c8952a6f49d8) ([#7785](https://github.com/yt-dlp/yt-dlp/issues/7785)) by [jiru](https://github.com/jiru)
+- **neteasemusic**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/f980df734cf5c0eaded2f7b38c6c60bccfeebb48) ([#8181](https://github.com/yt-dlp/yt-dlp/issues/8181)) by [c-basalt](https://github.com/c-basalt)
+- **nhk**: [Fix VOD extraction](https://github.com/yt-dlp/yt-dlp/commit/e831c80e8b2fc025b3b67d82974cc59e3526fdc8) ([#8249](https://github.com/yt-dlp/yt-dlp/issues/8249)) by [garret1317](https://github.com/garret1317)
+- **radiko**: [Improve extraction](https://github.com/yt-dlp/yt-dlp/commit/2ad3873f0dfa9285c91d2160e36c039e69d597c7) ([#8221](https://github.com/yt-dlp/yt-dlp/issues/8221)) by [garret1317](https://github.com/garret1317)
+- **substack**
+    - [Fix download cookies bug](https://github.com/yt-dlp/yt-dlp/commit/2f2dda3a7e85148773da3cdbc03ac9949ec1bc45) ([#8219](https://github.com/yt-dlp/yt-dlp/issues/8219)) by [handlerug](https://github.com/handlerug)
+    - [Fix embed extraction](https://github.com/yt-dlp/yt-dlp/commit/fbcc299bd8a19cf8b3c8805d6c268a9110230973) ([#8218](https://github.com/yt-dlp/yt-dlp/issues/8218)) by [handlerug](https://github.com/handlerug)
+- **theta**: [Remove extractors](https://github.com/yt-dlp/yt-dlp/commit/792f1e64f6a2beac51e85408d142b3118115c4fd) ([#8251](https://github.com/yt-dlp/yt-dlp/issues/8251)) by [alerikaisattera](https://github.com/alerikaisattera)
+- **wrestleuniversevod**: [Call API with device ID](https://github.com/yt-dlp/yt-dlp/commit/b095fd3fa9d58a65dc9b830bd63b9d909422aa86) ([#8272](https://github.com/yt-dlp/yt-dlp/issues/8272)) by [bashonly](https://github.com/bashonly)
+- **xhamster**: user: [Support creator urls](https://github.com/yt-dlp/yt-dlp/commit/cc8d8441524ec3442d7c0d3f8f33f15b66aa06f3) ([#8232](https://github.com/yt-dlp/yt-dlp/issues/8232)) by [Grub4K](https://github.com/Grub4K)
+- **youtube**
+    - [Fix `heatmap` extraction](https://github.com/yt-dlp/yt-dlp/commit/03e85ea99db76a2fddb65bf46f8819bda780aaf3) ([#8299](https://github.com/yt-dlp/yt-dlp/issues/8299)) by [bashonly](https://github.com/bashonly)
+    - [Raise a warning for `Incomplete Data` instead of an error](https://github.com/yt-dlp/yt-dlp/commit/eb5bdbfa70126c7d5355cc0954b63720522e462c) ([#8238](https://github.com/yt-dlp/yt-dlp/issues/8238)) by [coletdjnz](https://github.com/coletdjnz)
+
+#### Misc. changes
+- **cleanup**
+    - [Update extractor tests](https://github.com/yt-dlp/yt-dlp/commit/19c90e405b4137c06dfe6f9aaa02396df0da93e5) ([#7718](https://github.com/yt-dlp/yt-dlp/issues/7718)) by [trainman261](https://github.com/trainman261)
+    - Miscellaneous: [377e85a](https://github.com/yt-dlp/yt-dlp/commit/377e85a1797db9e98b78b38203ed9d4ded229991) by [dirkf](https://github.com/dirkf), [gamer191](https://github.com/gamer191), [Grub4K](https://github.com/Grub4K)
+
 ### 2023.09.24
 
 #### Important changes
diff --git a/supportedsites.md b/supportedsites.md
index 620e0f3058..ecef4dc2d1 100644
--- a/supportedsites.md
+++ b/supportedsites.md
@@ -422,6 +422,7 @@ # Supported sites
  - **eplus:inbound**: e+ (イープラス) overseas
  - **Epoch**
  - **Eporner**
+ - **Erocast**
  - **EroProfile**: [*eroprofile*](## "netrc machine")
  - **EroProfile:album**
  - **ertflix**: ERTFLIX videos
@@ -699,6 +700,7 @@ # Supported sites
  - **LastFMUser**
  - **lbry**
  - **lbry:channel**
+ - **lbry:playlist**
  - **LCI**
  - **Lcp**
  - **LcpPlay**
@@ -1474,8 +1476,6 @@ # Supported sites
  - **ThePlatformFeed**
  - **TheStar**
  - **TheSun**
- - **ThetaStream**
- - **ThetaVideo**
  - **TheWeatherChannel**
  - **ThisAmericanLife**
  - **ThisAV**
diff --git a/yt_dlp/version.py b/yt_dlp/version.py
index 2a7c84b93f..60c1c94cc3 100644
--- a/yt_dlp/version.py
+++ b/yt_dlp/version.py
@@ -1,8 +1,8 @@
 # Autogenerated by devscripts/update-version.py
 
-__version__ = '2023.09.24'
+__version__ = '2023.10.07'
 
-RELEASE_GIT_HEAD = '088add9567d39b758737e4299a0e619fd89d2e8f'
+RELEASE_GIT_HEAD = '377e85a1797db9e98b78b38203ed9d4ded229991'
 
 VARIANT = None
 

From 9d7ded6419089c1bf252496073f73ad90ed71004 Mon Sep 17 00:00:00 2001
From: Awal Garg <awalgarg@gmail.com>
Date: Sun, 8 Oct 2023 01:57:23 +0200
Subject: [PATCH 217/218] [utils] `js_to_json`: Fix `Date` constructor parsing
 (#8295)

Authored by: awalgarg, Grub4K
---
 test/test_utils.py     | 7 ++++++-
 yt_dlp/utils/_utils.py | 2 +-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/test/test_utils.py b/test/test_utils.py
index fd612ff86f..77040f29c6 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -1209,6 +1209,9 @@ def test_js_to_json_edgecases(self):
         on = js_to_json('\'"\\""\'')
         self.assertEqual(json.loads(on), '"""', msg='Unnecessary quote escape should be escaped')
 
+        on = js_to_json('[new Date("spam"), \'("eggs")\']')
+        self.assertEqual(json.loads(on), ['spam', '("eggs")'], msg='Date regex should match a single string')
+
     def test_js_to_json_malformed(self):
         self.assertEqual(js_to_json('42a1'), '42"a1"')
         self.assertEqual(js_to_json('42a-1'), '42"a"-1')
@@ -1220,11 +1223,13 @@ def test_js_to_json_template_literal(self):
         self.assertEqual(js_to_json('`${name}"${name}"`', {'name': '5'}), '"5\\"5\\""')
         self.assertEqual(js_to_json('`${name}`', {}), '"name"')
 
-    def test_js_to_json_map_array_constructors(self):
+    def test_js_to_json_common_constructors(self):
         self.assertEqual(json.loads(js_to_json('new Map([["a", 5]])')), {'a': 5})
         self.assertEqual(json.loads(js_to_json('Array(5, 10)')), [5, 10])
         self.assertEqual(json.loads(js_to_json('new Array(15,5)')), [15, 5])
         self.assertEqual(json.loads(js_to_json('new Map([Array(5, 10),new Array(15,5)])')), {'5': 10, '15': 5})
+        self.assertEqual(json.loads(js_to_json('new Date("123")')), "123")
+        self.assertEqual(json.loads(js_to_json('new Date(\'2023-10-19\')')), "2023-10-19")
 
     def test_extract_attributes(self):
         self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py
index ba62423806..3dc17bf593 100644
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@@ -2744,7 +2744,7 @@ def create_map(mobj):
     code = re.sub(r'(?:new\s+)?Array\((.*?)\)', r'[\g<1>]', code)
     code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code)
     if not strict:
-        code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
+        code = re.sub(rf'new Date\(({STRING_RE})\)', r'\g<1>', code)
         code = re.sub(r'new \w+\((.*?)\)', lambda m: json.dumps(m.group(0)), code)
         code = re.sub(r'parseInt\([^\d]+(\d+)[^\d]+\)', r'\1', code)
         code = re.sub(r'\(function\([^)]*\)\s*\{[^}]*\}\s*\)\s*\(\s*(["\'][^)]*["\'])\s*\)', r'\1', code)

From 1c51c520f7b511ebd9e4eb7322285a8c31eedbbd Mon Sep 17 00:00:00 2001
From: Simon Sawicki <contact@grub4k.xyz>
Date: Sun, 8 Oct 2023 02:01:01 +0200
Subject: [PATCH 218/218] [fd/fragment] Improve progress calculation (#8241)

This uses the download speed from all threads and also adds smoothing to speed and eta

Authored by: Grub4K
---
 yt_dlp/downloader/fragment.py |  48 ++++++---------
 yt_dlp/utils/progress.py      | 109 ++++++++++++++++++++++++++++++++++
 2 files changed, 128 insertions(+), 29 deletions(-)
 create mode 100644 yt_dlp/utils/progress.py

diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py
index b4b680dae1..b4f003d37f 100644
--- a/yt_dlp/downloader/fragment.py
+++ b/yt_dlp/downloader/fragment.py
@@ -14,6 +14,7 @@
 from ..networking.exceptions import HTTPError, IncompleteRead
 from ..utils import DownloadError, RetryManager, encodeFilename, traverse_obj
 from ..utils.networking import HTTPHeaderDict
+from ..utils.progress import ProgressCalculator
 
 
 class HttpQuietDownloader(HttpFD):
@@ -226,8 +227,7 @@ def _start_frag_download(self, ctx, info_dict):
         resume_len = ctx['complete_frags_downloaded_bytes']
         total_frags = ctx['total_frags']
         ctx_id = ctx.get('ctx_id')
-        # This dict stores the download progress, it's updated by the progress
-        # hook
+        # Stores the download progress, updated by the progress hook
         state = {
             'status': 'downloading',
             'downloaded_bytes': resume_len,
@@ -237,14 +237,8 @@ def _start_frag_download(self, ctx, info_dict):
             'tmpfilename': ctx['tmpfilename'],
         }
 
-        start = time.time()
-        ctx.update({
-            'started': start,
-            'fragment_started': start,
-            # Amount of fragment's bytes downloaded by the time of the previous
-            # frag progress hook invocation
-            'prev_frag_downloaded_bytes': 0,
-        })
+        ctx['started'] = time.time()
+        progress = ProgressCalculator(resume_len)
 
         def frag_progress_hook(s):
             if s['status'] not in ('downloading', 'finished'):
@@ -259,38 +253,35 @@ def frag_progress_hook(s):
             state['max_progress'] = ctx.get('max_progress')
             state['progress_idx'] = ctx.get('progress_idx')
 
-            time_now = time.time()
-            state['elapsed'] = time_now - start
+            state['elapsed'] = progress.elapsed
             frag_total_bytes = s.get('total_bytes') or 0
             s['fragment_info_dict'] = s.pop('info_dict', {})
+
+            # XXX: Fragment resume is not accounted for here
             if not ctx['live']:
                 estimated_size = (
                     (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes)
                     / (state['fragment_index'] + 1) * total_frags)
-                state['total_bytes_estimate'] = estimated_size
+                progress.total = estimated_size
+                progress.update(s.get('downloaded_bytes'))
+                state['total_bytes_estimate'] = progress.total
+            else:
+                progress.update(s.get('downloaded_bytes'))
 
             if s['status'] == 'finished':
                 state['fragment_index'] += 1
                 ctx['fragment_index'] = state['fragment_index']
-                state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes']
-                ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes']
-                ctx['speed'] = state['speed'] = self.calc_speed(
-                    ctx['fragment_started'], time_now, frag_total_bytes)
-                ctx['fragment_started'] = time.time()
-                ctx['prev_frag_downloaded_bytes'] = 0
-            else:
-                frag_downloaded_bytes = s['downloaded_bytes']
-                state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
-                ctx['speed'] = state['speed'] = self.calc_speed(
-                    ctx['fragment_started'], time_now, frag_downloaded_bytes - ctx.get('frag_resume_len', 0))
-                if not ctx['live']:
-                    state['eta'] = self.calc_eta(state['speed'], estimated_size - state['downloaded_bytes'])
-                ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
+                progress.thread_reset()
+
+            state['downloaded_bytes'] = ctx['complete_frags_downloaded_bytes'] = progress.downloaded
+            state['speed'] = ctx['speed'] = progress.speed.smooth
+            state['eta'] = progress.eta.smooth
+
             self._hook_progress(state, info_dict)
 
         ctx['dl'].add_progress_hook(frag_progress_hook)
 
-        return start
+        return ctx['started']
 
     def _finish_frag_download(self, ctx, info_dict):
         ctx['dest_stream'].close()
@@ -500,7 +491,6 @@ def _download_fragment(fragment):
                 download_fragment(fragment, ctx_copy)
                 return fragment, fragment['frag_index'], ctx_copy.get('fragment_filename_sanitized')
 
-            self.report_warning('The download speed shown is only of one thread. This is a known issue')
             with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
                 try:
                     for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments):
diff --git a/yt_dlp/utils/progress.py b/yt_dlp/utils/progress.py
new file mode 100644
index 0000000000..f254a3887e
--- /dev/null
+++ b/yt_dlp/utils/progress.py
@@ -0,0 +1,109 @@
+from __future__ import annotations
+
+import bisect
+import threading
+import time
+
+
+class ProgressCalculator:
+    # Time to calculate the speed over (seconds)
+    SAMPLING_WINDOW = 3
+    # Minimum timeframe before to sample next downloaded bytes (seconds)
+    SAMPLING_RATE = 0.05
+    # Time before showing eta (seconds)
+    GRACE_PERIOD = 1
+
+    def __init__(self, initial: int):
+        self._initial = initial or 0
+        self.downloaded = self._initial
+
+        self.elapsed: float = 0
+        self.speed = SmoothValue(0, smoothing=0.7)
+        self.eta = SmoothValue(None, smoothing=0.9)
+
+        self._total = 0
+        self._start_time = time.monotonic()
+        self._last_update = self._start_time
+
+        self._lock = threading.Lock()
+        self._thread_sizes: dict[int, int] = {}
+
+        self._times = [self._start_time]
+        self._downloaded = [self.downloaded]
+
+    @property
+    def total(self):
+        return self._total
+
+    @total.setter
+    def total(self, value: int | None):
+        with self._lock:
+            if value is not None and value < self.downloaded:
+                value = self.downloaded
+
+            self._total = value
+
+    def thread_reset(self):
+        current_thread = threading.get_ident()
+        with self._lock:
+            self._thread_sizes[current_thread] = 0
+
+    def update(self, size: int | None):
+        if not size:
+            return
+
+        current_thread = threading.get_ident()
+
+        with self._lock:
+            last_size = self._thread_sizes.get(current_thread, 0)
+            self._thread_sizes[current_thread] = size
+            self._update(size - last_size)
+
+    def _update(self, size: int):
+        current_time = time.monotonic()
+
+        self.downloaded += size
+        self.elapsed = current_time - self._start_time
+        if self.total is not None and self.downloaded > self.total:
+            self._total = self.downloaded
+
+        if self._last_update + self.SAMPLING_RATE > current_time:
+            return
+        self._last_update = current_time
+
+        self._times.append(current_time)
+        self._downloaded.append(self.downloaded)
+
+        offset = bisect.bisect_left(self._times, current_time - self.SAMPLING_WINDOW)
+        del self._times[:offset]
+        del self._downloaded[:offset]
+        if len(self._times) < 2:
+            self.speed.reset()
+            self.eta.reset()
+            return
+
+        download_time = current_time - self._times[0]
+        if not download_time:
+            return
+
+        self.speed.set((self.downloaded - self._downloaded[0]) / download_time)
+        if self.total and self.speed.value and self.elapsed > self.GRACE_PERIOD:
+            self.eta.set((self.total - self.downloaded) / self.speed.value)
+        else:
+            self.eta.reset()
+
+
+class SmoothValue:
+    def __init__(self, initial: float | None, smoothing: float):
+        self.value = self.smooth = self._initial = initial
+        self._smoothing = smoothing
+
+    def set(self, value: float):
+        self.value = value
+        if self.smooth is None:
+            self.smooth = self.value
+        else:
+            self.smooth = (1 - self._smoothing) * value + self._smoothing * self.smooth
+
+    def reset(self):
+        self.value = self.smooth = self._initial