From 46c1b7cfec1d0e6155083ca7e6948674c64ecb97 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Thu, 13 Jun 2024 16:13:08 -0500
Subject: [PATCH 01/48] [build] Cache dependencies for `macos` job (#10088)

Authored by: bashonly
---
 .github/workflows/build.yml           | 50 +++++++++++++++++++++++----
 .github/workflows/release-master.yml  |  3 +-
 .github/workflows/release-nightly.yml |  3 +-
 .github/workflows/release.yml         |  3 +-
 4 files changed, 49 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 9a1a22e8f5..1adb62dfb1 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -237,27 +237,43 @@ jobs:
   macos:
     needs: process
     if: inputs.macos
+    permissions:
+      contents: read
+      actions: write  # For cleaning up cache
     runs-on: macos-12
 
     steps:
       - uses: actions/checkout@v4
       # NB: Building universal2 does not work with python from actions/setup-python
+
+      - name: Restore cached requirements
+        id: restore-cache
+        uses: actions/cache/restore@v4
+        env:
+          SEGMENT_DOWNLOAD_TIMEOUT_MINS: 1
+        with:
+          path: |
+            ~/yt-dlp-build-venv
+          key: cache-reqs-${{ github.job }}
+
       - name: Install Requirements
         run: |
           brew install coreutils
-          python3 devscripts/install_deps.py --user -o --include build
+          python3 -m venv ~/yt-dlp-build-venv
+          source ~/yt-dlp-build-venv/bin/activate
+          python3 devscripts/install_deps.py -o --include build
           python3 devscripts/install_deps.py --print --include pyinstaller > requirements.txt
           # We need to ignore wheels otherwise we break universal2 builds
-          python3 -m pip install -U --user --no-binary :all: -r requirements.txt
+          python3 -m pip install -U --no-binary :all: -r requirements.txt
           # We need to fuse our own universal2 wheels for curl_cffi
-          python3 -m pip install -U --user delocate
+          python3 -m pip install -U delocate
           mkdir curl_cffi_whls curl_cffi_universal2
           python3 devscripts/install_deps.py --print -o --include curl-cffi > requirements.txt
           for platform in "macosx_11_0_arm64" "macosx_11_0_x86_64"; do
             python3 -m pip download \
               --only-binary=:all: \
               --platform "${platform}" \
-              --pre -d curl_cffi_whls \
+              -d curl_cffi_whls \
               -r requirements.txt
           done
           ( # Overwrite x86_64-only libs with fat/universal2 libs or else Pyinstaller will do the opposite
@@ -274,9 +290,10 @@ jobs:
           )
           python3 -m delocate.cmd.delocate_fuse curl_cffi_whls/curl_cffi*.whl -w curl_cffi_universal2
           python3 -m delocate.cmd.delocate_fuse curl_cffi_whls/cffi*.whl -w curl_cffi_universal2
-          cd curl_cffi_universal2
-          for wheel in ./*cffi*.whl; do mv -n -- "${wheel}" "${wheel/x86_64/universal2}"; done
-          python3 -m pip install -U --user ./*cffi*.whl
+          for wheel in curl_cffi_universal2/*cffi*.whl; do
+            mv -n -- "${wheel}" "${wheel/x86_64/universal2}"
+          done
+          python3 -m pip install --force-reinstall -U curl_cffi_universal2/*cffi*.whl
 
       - name: Prepare
         run: |
@@ -284,6 +301,7 @@ jobs:
           python3 devscripts/make_lazy_extractors.py
       - name: Build
         run: |
+          source ~/yt-dlp-build-venv/bin/activate
           python3 -m bundle.pyinstaller --target-architecture universal2 --onedir
           (cd ./dist/yt-dlp_macos && zip -r ../yt-dlp_macos.zip .)
           python3 -m bundle.pyinstaller --target-architecture universal2
@@ -307,6 +325,24 @@ jobs:
             dist/yt-dlp_macos.zip
           compression-level: 0
 
+      - name: Cleanup cache
+        if: steps.restore-cache.outputs.cache-hit == 'true'
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          cache_key: cache-reqs-${{ github.job }}
+          repository: ${{ github.repository }}
+          branch: ${{ github.ref }}
+        run: |
+          gh extension install actions/gh-actions-cache
+          gh actions-cache delete "${cache_key}" -R "${repository}" -B "${branch}" --confirm
+
+      - name: Cache requirements
+        uses: actions/cache/save@v4
+        with:
+          path: |
+            ~/yt-dlp-build-venv
+          key: cache-reqs-${{ github.job }}
+
   macos_legacy:
     needs: process
     if: inputs.macos_legacy
diff --git a/.github/workflows/release-master.yml b/.github/workflows/release-master.yml
index a84547580b..c49319b171 100644
--- a/.github/workflows/release-master.yml
+++ b/.github/workflows/release-master.yml
@@ -24,6 +24,7 @@ jobs:
       source: master
     permissions:
       contents: write
-      packages: write
+      packages: write  # For package cache
+      actions: write  # For cleaning up cache
       id-token: write  # mandatory for trusted publishing
     secrets: inherit
diff --git a/.github/workflows/release-nightly.yml b/.github/workflows/release-nightly.yml
index f459a3a17e..b536c50669 100644
--- a/.github/workflows/release-nightly.yml
+++ b/.github/workflows/release-nightly.yml
@@ -37,6 +37,7 @@ jobs:
       source: nightly
     permissions:
       contents: write
-      packages: write
+      packages: write  # For package cache
+      actions: write  # For cleaning up cache
       id-token: write  # mandatory for trusted publishing
     secrets: inherit
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 32268b32f3..fa5ad7e515 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -228,7 +228,8 @@ jobs:
       origin: ${{ needs.prepare.outputs.target_repo }}
     permissions:
       contents: read
-      packages: write # For package cache
+      packages: write  # For package cache
+      actions: write  # For cleaning up cache
     secrets:
       GPG_SIGNING_KEY: ${{ secrets.GPG_SIGNING_KEY }}
 

From d7d861811c15585a4f7ec9d5ae68d2ac28de28a0 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Thu, 13 Jun 2024 16:59:17 -0500
Subject: [PATCH 02/48] [ie/tubitv:series] Fix extractor (#10116)

Closes #8563
Authored by: bashonly
---
 yt_dlp/extractor/tubitv.py | 57 +++++++++++++++++++++++++++-----------
 1 file changed, 41 insertions(+), 16 deletions(-)

diff --git a/yt_dlp/extractor/tubitv.py b/yt_dlp/extractor/tubitv.py
index 9d9ddae720..85eb3a211c 100644
--- a/yt_dlp/extractor/tubitv.py
+++ b/yt_dlp/extractor/tubitv.py
@@ -13,6 +13,7 @@
 
 
 class TubiTvIE(InfoExtractor):
+    IE_NAME = 'tubitv'
     _VALID_URL = r'https?://(?:www\.)?tubitv\.com/(?P<type>video|movies|tv-shows)/(?P<id>\d+)'
     _LOGIN_URL = 'http://tubitv.com/login'
     _NETRC_MACHINE = 'tubitv'
@@ -148,30 +149,54 @@ def _real_extract(self, url):
 
 
 class TubiTvShowIE(InfoExtractor):
-    _WORKING = False
-    _VALID_URL = r'https?://(?:www\.)?tubitv\.com/series/[0-9]+/(?P<show_name>[^/?#]+)'
+    IE_NAME = 'tubitv:series'
+    _VALID_URL = r'https?://(?:www\.)?tubitv\.com/series/\d+/(?P<show_name>[^/?#]+)(?:/season-(?P<season>\d+))?'
     _TESTS = [{
         'url': 'https://tubitv.com/series/3936/the-joy-of-painting-with-bob-ross?start=true',
-        'playlist_mincount': 390,
+        'playlist_mincount': 389,
         'info_dict': {
             'id': 'the-joy-of-painting-with-bob-ross',
         },
+    }, {
+        'url': 'https://tubitv.com/series/2311/the-saddle-club/season-1',
+        'playlist_count': 26,
+        'info_dict': {
+            'id': 'the-saddle-club-season-1',
+        },
+    }, {
+        'url': 'https://tubitv.com/series/2311/the-saddle-club/season-3',
+        'playlist_count': 19,
+        'info_dict': {
+            'id': 'the-saddle-club-season-3',
+        },
+    }, {
+        'url': 'https://tubitv.com/series/2311/the-saddle-club/',
+        'playlist_mincount': 71,
+        'info_dict': {
+            'id': 'the-saddle-club',
+        },
     }]
 
-    def _entries(self, show_url, show_name):
-        show_webpage = self._download_webpage(show_url, show_name)
+    def _entries(self, show_url, playlist_id, selected_season):
+        webpage = self._download_webpage(show_url, playlist_id)
 
-        show_json = self._parse_json(self._search_regex(
-            r'window\.__data\s*=\s*({[^<]+});\s*</script>',
-            show_webpage, 'data'), show_name, transform_source=js_to_json)['video']
+        data = self._search_json(
+            r'window\.__data\s*=', webpage, 'data', playlist_id,
+            transform_source=js_to_json)['video']
 
-        for episode_id in show_json['fullContentById']:
-            if traverse_obj(show_json, ('byId', episode_id, 'type')) == 's':
-                continue
-            yield self.url_result(
-                f'https://tubitv.com/tv-shows/{episode_id}/',
-                ie=TubiTvIE.ie_key(), video_id=episode_id)
+        # v['number'] is already a decimal string, but stringify to protect against API changes
+        path = [lambda _, v: str(v['number']) == selected_season] if selected_season else [..., {dict}]
+
+        for season in traverse_obj(data, ('byId', lambda _, v: v['type'] == 's', 'seasons', *path)):
+            season_number = int_or_none(season.get('number'))
+            for episode in traverse_obj(season, ('episodes', lambda _, v: v['id'])):
+                episode_id = episode['id']
+                yield self.url_result(
+                    f'https://tubitv.com/tv-shows/{episode_id}/', TubiTvIE, episode_id,
+                    season_number=season_number, episode_number=int_or_none(episode.get('num')))
 
     def _real_extract(self, url):
-        show_name = self._match_valid_url(url).group('show_name')
-        return self.playlist_result(self._entries(url, show_name), playlist_id=show_name)
+        playlist_id, selected_season = self._match_valid_url(url).group('show_name', 'season')
+        if selected_season:
+            playlist_id = f'{playlist_id}-season-{selected_season}'
+        return self.playlist_result(self._entries(url, playlist_id, selected_season), playlist_id)

From 081708d6074dfbb907e25af61ba530bba0d4b31d Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Thu, 13 Jun 2024 17:31:13 -0500
Subject: [PATCH 03/48] [ie/francetv] Fix extractor (#10177)

Closes #10175
Authored by: bashonly
---
 yt_dlp/extractor/francetv.py | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/yt_dlp/extractor/francetv.py b/yt_dlp/extractor/francetv.py
index f732d56772..de2bec25ac 100644
--- a/yt_dlp/extractor/francetv.py
+++ b/yt_dlp/extractor/francetv.py
@@ -33,6 +33,7 @@ class FranceTVIE(InfoExtractor):
     _GEO_BYPASS = False
 
     _TESTS = [{
+        # tokenized url is in dinfo['video']['token']
         'url': 'francetv:ec217ecc-0733-48cf-ac06-af1347b849d1',
         'info_dict': {
             'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1',
@@ -44,6 +45,19 @@ class FranceTVIE(InfoExtractor):
             'upload_date': '20170813',
         },
         'params': {'skip_download': 'm3u8'},
+    }, {
+        # tokenized url is in dinfo['video']['token']['akamai']
+        'url': 'francetv:c5bda21d-2c6f-4470-8849-3d8327adb2ba',
+        'info_dict': {
+            'id': 'c5bda21d-2c6f-4470-8849-3d8327adb2ba',
+            'ext': 'mp4',
+            'title': '13h15, le dimanche... - Les mystères de Jésus',
+            'timestamp': 1514118300,
+            'duration': 2880,
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'upload_date': '20171224',
+        },
+        'params': {'skip_download': 'm3u8'},
     }, {
         'url': 'francetv:162311093',
         'only_matching': True,
@@ -119,7 +133,7 @@ def _extract_video(self, video_id, hostname=None):
             video_url = video['url']
             format_id = video.get('format')
 
-            if token_url := url_or_none(video.get('token')):
+            if token_url := traverse_obj(video, ('token', (None, 'akamai'), {url_or_none}, any)):
                 tokenized_url = traverse_obj(self._download_json(
                     token_url, video_id, f'Downloading signed {format_id} manifest URL',
                     fatal=False, query={
@@ -225,13 +239,13 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
     _TESTS = [{
         'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html',
         'info_dict': {
-            'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1',
+            'id': 'c5bda21d-2c6f-4470-8849-3d8327adb2ba',
             'ext': 'mp4',
             'title': '13h15, le dimanche... - Les mystères de Jésus',
-            'timestamp': 1502623500,
-            'duration': 2580,
+            'timestamp': 1514118300,
+            'duration': 2880,
             'thumbnail': r're:^https?://.*\.jpg$',
-            'upload_date': '20170813',
+            'upload_date': '20171224',
         },
         'params': {
             'skip_download': True,

From 3690c2f59827c79a1bbe388a7c1ae75db7477db2 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Thu, 13 Jun 2024 17:44:20 -0500
Subject: [PATCH 04/48] [ie/francetv] Detect and raise errors for DRM (#10165)

Closes #10163
Authored by: bashonly
---
 yt_dlp/extractor/francetv.py | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/yt_dlp/extractor/francetv.py b/yt_dlp/extractor/francetv.py
index de2bec25ac..ab08f1c6bf 100644
--- a/yt_dlp/extractor/francetv.py
+++ b/yt_dlp/extractor/francetv.py
@@ -5,6 +5,7 @@
 from .dailymotion import DailymotionIE
 from ..networking import HEADRequest
 from ..utils import (
+    clean_html,
     determine_ext,
     filter_dict,
     format_field,
@@ -82,6 +83,7 @@ class FranceTVIE(InfoExtractor):
     def _extract_video(self, video_id, hostname=None):
         is_live = None
         videos = []
+        drm_formats = False
         title = None
         subtitle = None
         episode_number = None
@@ -99,13 +101,12 @@ def _extract_video(self, video_id, hostname=None):
                     'device_type': device_type,
                     'browser': browser,
                     'domain': hostname,
-                }), fatal=False)
+                }), fatal=False, expected_status=422)  # 422 json gives detailed error code/message
 
             if not dinfo:
                 continue
 
-            video = traverse_obj(dinfo, ('video', {dict}))
-            if video:
+            if video := traverse_obj(dinfo, ('video', {dict})):
                 videos.append(video)
                 if duration is None:
                     duration = video.get('duration')
@@ -113,9 +114,19 @@ def _extract_video(self, video_id, hostname=None):
                     is_live = video.get('is_live')
                 if spritesheets is None:
                     spritesheets = video.get('spritesheets')
+            elif code := traverse_obj(dinfo, ('code', {int})):
+                if code == 2009:
+                    self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+                elif code in (2015, 2017):
+                    # 2015: L'accès à cette vidéo est impossible. (DRM-only)
+                    # 2017: Cette vidéo n'est pas disponible depuis le site web mobile (b/c DRM)
+                    drm_formats = True
+                    continue
+                self.report_warning(
+                    f'{self.IE_NAME} said: {code} "{clean_html(dinfo.get("message"))}"')
+                continue
 
-            meta = traverse_obj(dinfo, ('meta', {dict}))
-            if meta:
+            if meta := traverse_obj(dinfo, ('meta', {dict})):
                 if title is None:
                     title = meta.get('title')
                 # meta['pre_title'] contains season and episode number for series in format "S<ID> E<ID>"
@@ -128,6 +139,9 @@ def _extract_video(self, video_id, hostname=None):
                 if timestamp is None:
                     timestamp = parse_iso8601(meta.get('broadcasted_at'))
 
+        if not videos and drm_formats:
+            self.report_drm(video_id)
+
         formats, subtitles, video_url = [], {}, None
         for video in traverse_obj(videos, lambda _, v: url_or_none(v['url'])):
             video_url = video['url']

From 92a1c4abaeeba9a69d611c57b73555cb1a1f00ad Mon Sep 17 00:00:00 2001
From: JSubelj <jan.subelj010@gmail.com>
Date: Fri, 14 Jun 2024 00:51:12 +0200
Subject: [PATCH 05/48] [ie/rtvslo.si:show] Add extractor (#8418)

Authored by: JSubelj, seproDev

Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>
---
 yt_dlp/extractor/_extractors.py |   5 +-
 yt_dlp/extractor/rtvslo.py      | 160 ++++++++++++++++++--------------
 2 files changed, 96 insertions(+), 69 deletions(-)

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index e9cd38a651..0f599c9db7 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1755,7 +1755,10 @@
     RTVETelevisionIE,
 )
 from .rtvs import RTVSIE
-from .rtvslo import RTVSLOIE
+from .rtvslo import (
+    RTVSLOIE,
+    RTVSLOShowIE,
+)
 from .rudovideo import RudoVideoIE
 from .rule34video import Rule34VideoIE
 from .rumble import (
diff --git a/yt_dlp/extractor/rtvslo.py b/yt_dlp/extractor/rtvslo.py
index e71d01d1e0..9c2e6fb6b5 100644
--- a/yt_dlp/extractor/rtvslo.py
+++ b/yt_dlp/extractor/rtvslo.py
@@ -1,3 +1,5 @@
+import re
+
 from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
@@ -6,6 +8,7 @@
     traverse_obj,
     unified_timestamp,
     url_or_none,
+    urljoin,
 )
 
 
@@ -21,75 +24,73 @@ class RTVSLOIE(InfoExtractor):
     _API_BASE = 'https://api.rtvslo.si/ava/{}/{}?client_id=82013fb3a531d5414f478747c1aca622'
     SUB_LANGS_MAP = {'Slovenski': 'sl'}
 
-    _TESTS = [
-        {
-            'url': 'https://www.rtvslo.si/rtv365/arhiv/174842550?s=tv',
-            'info_dict': {
-                'id': '174842550',
-                'ext': 'mp4',
-                'release_timestamp': 1643140032,
-                'upload_date': '20220125',
-                'series': 'Dnevnik',
-                'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/92/dnevnik_3_wide2.jpg',
-                'description': 'md5:76a18692757aeb8f0f51221106277dd2',
-                'timestamp': 1643137046,
-                'title': 'Dnevnik',
-                'series_id': '92',
-                'release_date': '20220125',
-                'duration': 1789,
-            },
-        }, {
-            'url': 'https://365.rtvslo.si/arhiv/utrip/174843754',
-            'info_dict': {
-                'id': '174843754',
-                'ext': 'mp4',
-                'series_id': '94',
-                'release_date': '20220129',
-                'timestamp': 1643484455,
-                'title': 'Utrip',
-                'duration': 813,
-                'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/94/utrip_1_wide2.jpg',
-                'description': 'md5:77f2892630c7b17bb7a5bb84319020c9',
-                'release_timestamp': 1643485825,
-                'upload_date': '20220129',
-                'series': 'Utrip',
-            },
-        }, {
-            'url': 'https://365.rtvslo.si/arhiv/il-giornale-della-sera/174844609',
-            'info_dict': {
-                'id': '174844609',
-                'ext': 'mp3',
-                'series_id': '106615841',
-                'title': 'Il giornale della sera',
-                'duration': 1328,
-                'series': 'Il giornale della sera',
-                'timestamp': 1643743800,
-                'release_timestamp': 1643745424,
-                'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/il-giornale-della-sera_wide2.jpg',
-                'upload_date': '20220201',
-                'tbr': 128000,
-                'release_date': '20220201',
-            },
-        }, {
-            'url': 'https://365.rtvslo.si/arhiv/razred-zase/148350750',
-            'info_dict': {
-                'id': '148350750',
-                'ext': 'mp4',
-                'title': 'Prvi šolski dan, mozaična oddaja za mlade',
-                'series': 'Razred zase',
-                'series_id': '148185730',
-                'duration': 1481,
-                'upload_date': '20121019',
-                'timestamp': 1350672122,
-                'release_date': '20121019',
-                'release_timestamp': 1350672122,
-                'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/148185730/razred_zase_2014_logo_4d_wide2.jpg',
-            },
-        }, {
-            'url': 'https://4d.rtvslo.si/arhiv/dnevnik/174842550',
-            'only_matching': True,
+    _TESTS = [{
+        'url': 'https://www.rtvslo.si/rtv365/arhiv/174842550?s=tv',
+        'info_dict': {
+            'id': '174842550',
+            'ext': 'mp4',
+            'release_timestamp': 1643140032,
+            'upload_date': '20220125',
+            'series': 'Dnevnik',
+            'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/92/dnevnik_3_wide2.jpg',
+            'description': 'md5:76a18692757aeb8f0f51221106277dd2',
+            'timestamp': 1643137046,
+            'title': 'Dnevnik',
+            'series_id': '92',
+            'release_date': '20220125',
+            'duration': 1789,
         },
-    ]
+    }, {
+        'url': 'https://365.rtvslo.si/arhiv/utrip/174843754',
+        'info_dict': {
+            'id': '174843754',
+            'ext': 'mp4',
+            'series_id': '94',
+            'release_date': '20220129',
+            'timestamp': 1643484455,
+            'title': 'Utrip',
+            'duration': 813,
+            'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/94/utrip_1_wide2.jpg',
+            'description': 'md5:77f2892630c7b17bb7a5bb84319020c9',
+            'release_timestamp': 1643485825,
+            'upload_date': '20220129',
+            'series': 'Utrip',
+        },
+    }, {
+        'url': 'https://365.rtvslo.si/arhiv/il-giornale-della-sera/174844609',
+        'info_dict': {
+            'id': '174844609',
+            'ext': 'mp3',
+            'series_id': '106615841',
+            'title': 'Il giornale della sera',
+            'duration': 1328,
+            'series': 'Il giornale della sera',
+            'timestamp': 1643743800,
+            'release_timestamp': 1643745424,
+            'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/il-giornale-della-sera_wide2.jpg',
+            'upload_date': '20220201',
+            'tbr': 128000,
+            'release_date': '20220201',
+        },
+    }, {
+        'url': 'https://365.rtvslo.si/arhiv/razred-zase/148350750',
+        'info_dict': {
+            'id': '148350750',
+            'ext': 'mp4',
+            'title': 'Prvi šolski dan, mozaična oddaja za mlade',
+            'series': 'Razred zase',
+            'series_id': '148185730',
+            'duration': 1481,
+            'upload_date': '20121019',
+            'timestamp': 1350672122,
+            'release_date': '20121019',
+            'release_timestamp': 1350672122,
+            'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/148185730/razred_zase_2014_logo_4d_wide2.jpg',
+        },
+    }, {
+        'url': 'https://4d.rtvslo.si/arhiv/dnevnik/174842550',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         v_id = self._match_id(url)
@@ -164,3 +165,26 @@ def _real_extract(self, url):
             'series': meta.get('showName'),
             'series_id': meta.get('showId'),
         }
+
+
+class RTVSLOShowIE(InfoExtractor):
+    IE_NAME = 'rtvslo.si:show'
+    _VALID_URL = r'https?://(?:365|4d)\.rtvslo.si/oddaja/[^/?#&]+/(?P<id>\d+)'
+
+    _TESTS = [{
+        'url': 'https://365.rtvslo.si/oddaja/ekipa-bled/173250997',
+        'info_dict': {
+            'id': '173250997',
+            'title': 'Ekipa Bled',
+        },
+        'playlist_count': 18,
+    }]
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+        webpage = self._download_webpage(url, playlist_id)
+
+        return self.playlist_from_matches(
+            re.findall(r'<a [^>]*\bhref="(/arhiv/[^"]+)"', webpage),
+            playlist_id, self._html_extract_title(webpage),
+            getter=lambda x: urljoin('https://365.rtvslo.si', x), ie=RTVSLOIE)

From e53e56b73543799638fa6abb0c78f8b091aa84e1 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Thu, 13 Jun 2024 18:01:19 -0500
Subject: [PATCH 06/48] [ie/soundcloud] Fix `download` format extraction
 (#10125)

Authored by: bashonly
---
 yt_dlp/extractor/soundcloud.py | 52 +++++++++++++++++++++-------------
 1 file changed, 33 insertions(+), 19 deletions(-)

diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py
index 0f73684355..0c6f0b070a 100644
--- a/yt_dlp/extractor/soundcloud.py
+++ b/yt_dlp/extractor/soundcloud.py
@@ -95,7 +95,7 @@ def _update_client_id(self):
                     return
         raise ExtractorError('Unable to extract client id')
 
-    def _download_json(self, *args, **kwargs):
+    def _call_api(self, *args, **kwargs):
         non_fatal = kwargs.get('fatal') is False
         if non_fatal:
             del kwargs['fatal']
@@ -104,7 +104,7 @@ def _download_json(self, *args, **kwargs):
             query['client_id'] = self._CLIENT_ID
             kwargs['query'] = query
             try:
-                return super()._download_json(*args, **kwargs)
+                return self._download_json(*args, **kwargs)
             except ExtractorError as e:
                 if isinstance(e.cause, HTTPError) and e.cause.status in (401, 403):
                     self._store_client_id(None)
@@ -163,7 +163,7 @@ def genNumBlock():
             'user_agent': self._USER_AGENT
         }
 
-        response = self._download_json(
+        response = self._call_api(
             self._API_AUTH_URL_PW % (self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID),
             None, note='Verifying login token...', fatal=False,
             data=json.dumps(payload).encode())
@@ -217,12 +217,26 @@ def _extract_info_dict(self, info, full_title=None, secret_token=None, extract_f
             query['secret_token'] = secret_token
 
         if not extract_flat and info.get('downloadable') and info.get('has_downloads_left'):
-            download_url = update_url_query(
-                self._API_V2_BASE + 'tracks/' + track_id + '/download', query)
-            redirect_url = (self._download_json(download_url, track_id, fatal=False) or {}).get('redirectUri')
-            if redirect_url:
+            try:
+                # Do not use _call_api(); HTTP Error codes have different meanings for this request
+                download_data = self._download_json(
+                    f'{self._API_V2_BASE}tracks/{track_id}/download', track_id,
+                    'Downloading original download format info JSON', query=query, headers=self._HEADERS)
+            except ExtractorError as e:
+                if isinstance(e.cause, HTTPError) and e.cause.status == 401:
+                    self.report_warning(
+                        'Original download format is only available '
+                        f'for registered users. {self._login_hint()}')
+                elif isinstance(e.cause, HTTPError) and e.cause.status == 403:
+                    self.write_debug('Original download format is not available for this client')
+                else:
+                    self.report_warning(e.msg)
+                download_data = None
+
+            if redirect_url := traverse_obj(download_data, ('redirectUri', {url_or_none})):
                 urlh = self._request_webpage(
-                    HEADRequest(redirect_url), track_id, 'Checking for original download format', fatal=False)
+                    HEADRequest(redirect_url), track_id, 'Checking original download format availability',
+                    'Original download format is not available', fatal=False)
                 if urlh:
                     format_url = urlh.url
                     format_urls.add(format_url)
@@ -303,7 +317,7 @@ def add_format(f, protocol, is_preview=False):
             stream = None
             for retry in self.RetryManager(fatal=False):
                 try:
-                    stream = self._download_json(
+                    stream = self._call_api(
                         format_url, track_id, f'Downloading {identifier} format info JSON',
                         query=query, headers=self._HEADERS)
                 except ExtractorError as e:
@@ -630,7 +644,7 @@ def _real_extract(self, url):
                 resolve_title += f'/{token}'
             info_json_url = self._resolv_url(self._BASE_URL + resolve_title)
 
-        info = self._download_json(
+        info = self._call_api(
             info_json_url, full_title, 'Downloading info JSON', query=query, headers=self._HEADERS)
 
         return self._extract_info_dict(info, full_title, token)
@@ -641,7 +655,7 @@ def _extract_set(self, playlist, token=None):
         playlist_id = str(playlist['id'])
         tracks = playlist.get('tracks') or []
         if not all(t.get('permalink_url') for t in tracks) and token:
-            tracks = self._download_json(
+            tracks = self._call_api(
                 self._API_V2_BASE + 'tracks', playlist_id,
                 'Downloading tracks', query={
                     'ids': ','.join([str(t['id']) for t in tracks]),
@@ -699,7 +713,7 @@ def _real_extract(self, url):
         if token:
             full_title += '/' + token
 
-        info = self._download_json(self._resolv_url(
+        info = self._call_api(self._resolv_url(
             self._BASE_URL + full_title), full_title, headers=self._HEADERS)
 
         if 'errors' in info:
@@ -730,7 +744,7 @@ def _entries(self, url, playlist_id):
         for i in itertools.count():
             for retry in self.RetryManager():
                 try:
-                    response = self._download_json(
+                    response = self._call_api(
                         url, playlist_id, query=query, headers=self._HEADERS,
                         note=f'Downloading track page {i + 1}')
                     break
@@ -838,7 +852,7 @@ def _real_extract(self, url):
         mobj = self._match_valid_url(url)
         uploader = mobj.group('user')
 
-        user = self._download_json(
+        user = self._call_api(
             self._resolv_url(self._BASE_URL + uploader),
             uploader, 'Downloading user info', headers=self._HEADERS)
 
@@ -864,7 +878,7 @@ class SoundcloudUserPermalinkIE(SoundcloudPagedPlaylistBaseIE):
 
     def _real_extract(self, url):
         user_id = self._match_id(url)
-        user = self._download_json(
+        user = self._call_api(
             self._resolv_url(url), user_id, 'Downloading user info', headers=self._HEADERS)
 
         return self._extract_playlist(
@@ -886,7 +900,7 @@ class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE):
     def _real_extract(self, url):
         track_name = self._match_id(url)
 
-        track = self._download_json(self._resolv_url(url), track_name, headers=self._HEADERS)
+        track = self._call_api(self._resolv_url(url), track_name, headers=self._HEADERS)
         track_id = self._search_regex(
             r'soundcloud:track-stations:(\d+)', track['id'], 'track id')
 
@@ -930,7 +944,7 @@ class SoundcloudRelatedIE(SoundcloudPagedPlaylistBaseIE):
     def _real_extract(self, url):
         slug, relation = self._match_valid_url(url).group('slug', 'relation')
 
-        track = self._download_json(
+        track = self._call_api(
             self._resolv_url(self._BASE_URL + slug),
             slug, 'Downloading track info', headers=self._HEADERS)
 
@@ -965,7 +979,7 @@ def _real_extract(self, url):
         if token:
             query['secret_token'] = token
 
-        data = self._download_json(
+        data = self._call_api(
             self._API_V2_BASE + 'playlists/' + playlist_id,
             playlist_id, 'Downloading playlist', query=query, headers=self._HEADERS)
 
@@ -1000,7 +1014,7 @@ def _get_collection(self, endpoint, collection_id, **query):
         next_url = update_url_query(self._API_V2_BASE + endpoint, query)
 
         for i in itertools.count(1):
-            response = self._download_json(
+            response = self._call_api(
                 next_url, collection_id, f'Downloading page {i}',
                 'Unable to download API page', headers=self._HEADERS)
 

From b8e2a5e0e1030076f833917906e19bb6c7b318f6 Mon Sep 17 00:00:00 2001
From: garret1317 <garret1317@yandex.com>
Date: Fri, 14 Jun 2024 00:08:40 +0100
Subject: [PATCH 07/48] [ie/NHKRadiru] Fix extractor (#10106)

Closes #10105
Authored by: garret1317
---
 yt_dlp/extractor/nhk.py | 240 ++++++++++++++++++++++++++++------------
 1 file changed, 171 insertions(+), 69 deletions(-)

diff --git a/yt_dlp/extractor/nhk.py b/yt_dlp/extractor/nhk.py
index 0ff25a6909..0bd6edfcba 100644
--- a/yt_dlp/extractor/nhk.py
+++ b/yt_dlp/extractor/nhk.py
@@ -4,6 +4,7 @@
 from ..utils import (
     ExtractorError,
     clean_html,
+    filter_dict,
     get_element_by_class,
     int_or_none,
     join_nonempty,
@@ -590,21 +591,22 @@ class NhkRadiruIE(InfoExtractor):
     IE_DESC = 'NHK らじる (Radiru/Rajiru)'
     _VALID_URL = r'https?://www\.nhk\.or\.jp/radio/(?:player/ondemand|ondemand/detail)\.html\?p=(?P<site>[\da-zA-Z]+)_(?P<corner>[\da-zA-Z]+)(?:_(?P<headline>[\da-zA-Z]+))?'
     _TESTS = [{
-        'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=0449_01_3926210',
-        'skip': 'Episode expired on 2024-02-24',
+        'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=0449_01_4003239',
+        'skip': 'Episode expired on 2024-06-09',
         'info_dict': {
-            'title': 'ジャズ・トゥナイト　シリーズＪＡＺＺジャイアンツ　５６　ジョニー・ホッジス',
-            'id': '0449_01_3926210',
+            'title': 'ジャズ・トゥナイト　ジャズ「Ｎｉｇｈｔ　ａｎｄ　Ｄａｙ」特集',
+            'id': '0449_01_4003239',
             'ext': 'm4a',
+            'uploader': 'NHK FM 東京',
+            'description': 'md5:ad05f3c3f3f6e99b2e69f9b5e49551dc',
             'series': 'ジャズ・トゥナイト',
-            'uploader': 'NHK-FM',
-            'channel': 'NHK-FM',
+            'channel': 'NHK FM 東京',
             'thumbnail': 'https://www.nhk.or.jp/prog/img/449/g449.jpg',
-            'release_date': '20240217',
-            'description': 'md5:a456ee8e5e59e6dd2a7d32e62386e811',
-            'timestamp': 1708185600,
-            'release_timestamp': 1708178400,
-            'upload_date': '20240217',
+            'upload_date': '20240601',
+            'series_id': '0449_01',
+            'release_date': '20240601',
+            'timestamp': 1717257600,
+            'release_timestamp': 1717250400,
         },
     }, {
         # playlist, airs every weekday so it should _hopefully_ be okay forever
@@ -613,71 +615,145 @@ class NhkRadiruIE(InfoExtractor):
             'id': '0458_01',
             'title': 'ベストオブクラシック',
             'description': '世界中の上質な演奏会をじっくり堪能する本格派クラシック番組。',
-            'channel': 'NHK-FM',
-            'uploader': 'NHK-FM',
             'thumbnail': 'https://www.nhk.or.jp/prog/img/458/g458.jpg',
+            'series_id': '0458_01',
+            'uploader': 'NHK FM',
+            'channel': 'NHK FM',
+            'series': 'ベストオブクラシック',
         },
         'playlist_mincount': 3,
     }, {
         # one with letters in the id
-        'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F300_06_3738470',
-        'note': 'Expires on 2024-03-31',
+        'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F683_01_3910688',
+        'note': 'Expires on 2025-03-31',
         'info_dict': {
-            'id': 'F300_06_3738470',
+            'id': 'F683_01_3910688',
             'ext': 'm4a',
-            'title': '有島武郎「一房のぶどう」',
-            'description': '朗読：川野一宇（ラジオ深夜便アンカー）\r\n\r\n（2016年12月8日放送「ラジオ深夜便『アンカー朗読シリーズ』」より）',
-            'channel': 'NHKラジオ第1、NHK-FM',
-            'uploader': 'NHKラジオ第1、NHK-FM',
-            'timestamp': 1635757200,
-            'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F300/img/corner/box_109_thumbnail.jpg',
-            'release_date': '20161207',
-            'series': 'らじる文庫 by ラジオ深夜便 ',
-            'release_timestamp': 1481126700,
-            'upload_date': '20211101',
+            'title': '夏目漱石「文鳥」第1回',
+            'series': '【らじる文庫】夏目漱石「文鳥」（全4回）',
+            'series_id': 'F683_01',
+            'description': '朗読：浅井理アナウンサー',
+            'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F683/img/roudoku_05_rod_640.jpg',
+            'upload_date': '20240106',
+            'release_date': '20240106',
+            'uploader': 'NHK R1',
+            'release_timestamp': 1704511800,
+            'channel': 'NHK R1',
+            'timestamp': 1704512700,
         },
-        'expected_warnings': ['Unable to download JSON metadata', 'Failed to get extended description'],
+        'expected_warnings': ['Unable to download JSON metadata',
+                              'Failed to get extended metadata. API returned Error 1: Invalid parameters'],
     }, {
         # news
-        'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F261_01_3855109',
-        'skip': 'Expires on 2023-04-17',
+        'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F261_01_4012173',
         'info_dict': {
-            'id': 'F261_01_3855109',
+            'id': 'F261_01_4012173',
             'ext': 'm4a',
             'channel': 'NHKラジオ第1',
             'uploader': 'NHKラジオ第1',
-            'timestamp': 1681635900,
-            'release_date': '20230416',
             'series': 'NHKラジオニュース',
-            'title': '午後６時のNHKニュース',
+            'title': '午前０時のNHKニュース',
             'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F261/img/RADIONEWS_640.jpg',
-            'upload_date': '20230416',
-            'release_timestamp': 1681635600,
+            'release_timestamp': 1718290800,
+            'release_date': '20240613',
+            'timestamp': 1718291400,
+            'upload_date': '20240613',
         },
+    }, {
+        # fallback when extended metadata fails
+        'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=2834_01_4009298',
+        'skip': 'Expires on 2024-06-07',
+        'info_dict': {
+            'id': '2834_01_4009298',
+            'title': 'まち☆キラ！開成町特集',
+            'ext': 'm4a',
+            'release_date': '20240531',
+            'upload_date': '20240531',
+            'series': 'はま☆キラ！',
+            'thumbnail': 'https://www.nhk.or.jp/prog/img/2834/g2834.jpg',
+            'channel': 'NHK R1,FM',
+            'description': '',
+            'timestamp': 1717123800,
+            'uploader': 'NHK R1,FM',
+            'release_timestamp': 1717120800,
+            'series_id': '2834_01',
+        },
+        'expected_warnings': ['Failed to get extended metadata. API returned empty list.'],
     }]
 
     _API_URL_TMPL = None
 
-    def _extract_extended_description(self, episode_id, episode):
-        service, _, area = traverse_obj(episode, ('aa_vinfo2', {str}, {lambda x: (x or '').partition(',')}))
-        aa_vinfo3 = traverse_obj(episode, ('aa_vinfo3', {str}))
+    def _extract_extended_metadata(self, episode_id, aa_vinfo):
+        service, _, area = traverse_obj(aa_vinfo, (2, {str}, {lambda x: (x or '').partition(',')}))
         detail_url = try_call(
-            lambda: self._API_URL_TMPL.format(service=service, area=area, dateid=aa_vinfo3))
+            lambda: self._API_URL_TMPL.format(area=area, service=service, dateid=aa_vinfo[3]))
         if not detail_url:
-            return
+            return {}
 
-        full_meta = traverse_obj(
-            self._download_json(detail_url, episode_id, 'Downloading extended metadata', fatal=False),
-            ('list', service, 0, {dict})) or {}
-        return join_nonempty('subtitle', 'content', 'act', 'music', delim='\n\n', from_dict=full_meta)
+        response = self._download_json(
+            detail_url, episode_id, 'Downloading extended metadata',
+            'Failed to download extended metadata', fatal=False, expected_status=400)
+        if not response:
+            return {}
 
-    def _extract_episode_info(self, headline, programme_id, series_meta):
+        if error := traverse_obj(response, ('error', {dict})):
+            self.report_warning(
+                'Failed to get extended metadata. API returned '
+                f'Error {join_nonempty("code", "message", from_dict=error, delim=": ")}')
+            return {}
+
+        full_meta = traverse_obj(response, ('list', service, 0, {dict}))
+        if not full_meta:
+            self.report_warning('Failed to get extended metadata. API returned empty list.')
+            return {}
+
+        station = ' '.join(traverse_obj(full_meta, (('service', 'area'), 'name', {str}))) or None
+        thumbnails = [{
+            'id': str(id_),
+            'preference': 1 if id_.startswith('thumbnail') else -2 if id_.startswith('logo') else -1,
+            **traverse_obj(thumb, {
+                'url': 'url',
+                'width': ('width', {int_or_none}),
+                'height': ('height', {int_or_none}),
+            }),
+        } for id_, thumb in traverse_obj(full_meta, ('images', {dict.items}, lambda _, v: v[1]['url']))]
+
+        return filter_dict({
+            'channel': station,
+            'uploader': station,
+            'description': join_nonempty(
+                'subtitle', 'content', 'act', 'music', delim='\n\n', from_dict=full_meta),
+            'thumbnails': thumbnails,
+            **traverse_obj(full_meta, {
+                'title': ('title', {str}),
+                'timestamp': ('end_time', {unified_timestamp}),
+                'release_timestamp': ('start_time', {unified_timestamp}),
+            }),
+        })
+
+    def _extract_episode_info(self, episode, programme_id, series_meta):
+        episode_id = f'{programme_id}_{episode["id"]}'
+        aa_vinfo = traverse_obj(episode, ('aa_contents_id', {lambda x: x.split(';')}))
+        extended_metadata = self._extract_extended_metadata(episode_id, aa_vinfo)
+        fallback_start_time, _, fallback_end_time = traverse_obj(
+            aa_vinfo, (4, {str}, {lambda x: (x or '').partition('_')}))
+
+        return {
+            **series_meta,
+            'id': episode_id,
+            'formats': self._extract_m3u8_formats(episode.get('stream_url'), episode_id, fatal=False),
+            'container': 'm4a_dash',  # force fixup, AAC-only HLS
+            'was_live': True,
+            'title': episode.get('program_title'),
+            'description': episode.get('program_sub_title'),  # fallback
+            'timestamp': unified_timestamp(fallback_end_time),
+            'release_timestamp': unified_timestamp(fallback_start_time),
+            **extended_metadata,
+        }
+
+    def _extract_news_info(self, headline, programme_id, series_meta):
         episode_id = f'{programme_id}_{headline["headline_id"]}'
         episode = traverse_obj(headline, ('file_list', 0, {dict}))
-        description = self._extract_extended_description(episode_id, episode)
-        if not description:
-            self.report_warning('Failed to get extended description, falling back to summary')
-            description = traverse_obj(episode, ('file_title_sub', {str}))
 
         return {
             **series_meta,
@@ -687,9 +763,9 @@ def _extract_episode_info(self, headline, programme_id, series_meta):
             'was_live': True,
             'series': series_meta.get('title'),
             'thumbnail': url_or_none(headline.get('headline_image')) or series_meta.get('thumbnail'),
-            'description': description,
             **traverse_obj(episode, {
-                'title': 'file_title',
+                'title': ('file_title', {str}),
+                'description': ('file_title_sub', {str}),
                 'timestamp': ('open_time', {unified_timestamp}),
                 'release_timestamp': ('aa_vinfo4', {lambda x: x.split('_')[0]}, {unified_timestamp}),
             }),
@@ -706,32 +782,58 @@ def _real_extract(self, url):
         site_id, corner_id, headline_id = self._match_valid_url(url).group('site', 'corner', 'headline')
         programme_id = f'{site_id}_{corner_id}'
 
-        if site_id == 'F261':
-            json_url = 'https://www.nhk.or.jp/s-media/news/news-site/list/v1/all.json'
-        else:
-            json_url = f'https://www.nhk.or.jp/radioondemand/json/{site_id}/bangumi_{programme_id}.json'
+        if site_id == 'F261':  # XXX: News programmes use old API (for now?)
+            meta = self._download_json(
+                'https://www.nhk.or.jp/s-media/news/news-site/list/v1/all.json', programme_id)['main']
+            series_meta = traverse_obj(meta, {
+                'title': ('program_name', {str}),
+                'channel': ('media_name', {str}),
+                'uploader': ('media_name', {str}),
+                'thumbnail': (('thumbnail_c', 'thumbnail_p'), {url_or_none}),
+            }, get_all=False)
 
-        meta = self._download_json(json_url, programme_id)['main']
+            if headline_id:
+                headline = traverse_obj(
+                    meta, ('detail_list', lambda _, v: v['headline_id'] == headline_id, any))
+                if not headline:
+                    raise ExtractorError('Content not found; it has most likely expired', expected=True)
+                return self._extract_news_info(headline, programme_id, series_meta)
 
-        series_meta = traverse_obj(meta, {
-            'title': 'program_name',
-            'channel': 'media_name',
-            'uploader': 'media_name',
-            'thumbnail': (('thumbnail_c', 'thumbnail_p'), {url_or_none}),
-        }, get_all=False)
+            def news_entries():
+                for headline in traverse_obj(meta, ('detail_list', ..., {dict})):
+                    yield self._extract_news_info(headline, programme_id, series_meta)
+
+            return self.playlist_result(
+                news_entries(), programme_id, description=meta.get('site_detail'), **series_meta)
+
+        meta = self._download_json(
+            'https://www.nhk.or.jp/radio-api/app/v1/web/ondemand/series', programme_id, query={
+                'site_id': site_id,
+                'corner_site_id': corner_id,
+            })
+
+        fallback_station = join_nonempty('NHK', traverse_obj(meta, ('radio_broadcast', {str})), delim=' ')
+        series_meta = {
+            'series': join_nonempty('title', 'corner_name', delim=' ', from_dict=meta),
+            'series_id': programme_id,
+            'thumbnail': traverse_obj(meta, ('thumbnail_url', {url_or_none})),
+            'channel': fallback_station,
+            'uploader': fallback_station,
+        }
 
         if headline_id:
-            return self._extract_episode_info(
-                traverse_obj(meta, (
-                    'detail_list', lambda _, v: v['headline_id'] == headline_id), get_all=False),
-                programme_id, series_meta)
+            episode = traverse_obj(meta, ('episodes', lambda _, v: v['id'] == int(headline_id), any))
+            if not episode:
+                raise ExtractorError('Content not found; it has most likely expired', expected=True)
+            return self._extract_episode_info(episode, programme_id, series_meta)
 
         def entries():
-            for headline in traverse_obj(meta, ('detail_list', ..., {dict})):
-                yield self._extract_episode_info(headline, programme_id, series_meta)
+            for episode in traverse_obj(meta, ('episodes', ..., {dict})):
+                yield self._extract_episode_info(episode, programme_id, series_meta)
 
         return self.playlist_result(
-            entries(), programme_id, playlist_description=meta.get('site_detail'), **series_meta)
+            entries(), programme_id, title=series_meta.get('series'),
+            description=meta.get('series_description'), **series_meta)
 
 
 class NhkRadioNewsPageIE(InfoExtractor):

From ea88129784fcbb6987161df9ba05909325d8e2e9 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Thu, 13 Jun 2024 18:16:43 -0500
Subject: [PATCH 08/48] [ie/tiktok] Detect and raise when login is required
 (#10124)

Authored by: bashonly
---
 yt_dlp/extractor/tiktok.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py
index dc74d4a1f5..48934fc6b3 100644
--- a/yt_dlp/extractor/tiktok.py
+++ b/yt_dlp/extractor/tiktok.py
@@ -213,8 +213,19 @@ def _extract_aweme_app(self, aweme_id):
         return self._parse_aweme_video_app(aweme_detail)
 
     def _extract_web_data_and_status(self, url, video_id, fatal=True):
-        webpage = self._download_webpage(url, video_id, headers={'User-Agent': 'Mozilla/5.0'}, fatal=fatal) or ''
-        video_data, status = {}, None
+        video_data, status = {}, -1
+
+        res = self._download_webpage_handle(url, video_id, fatal=fatal, headers={'User-Agent': 'Mozilla/5.0'})
+        if res is False:
+            return video_data, status
+
+        webpage, urlh = res
+        if urllib.parse.urlparse(urlh.url).path == '/login':
+            message = 'TikTok is requiring login for access to this content'
+            if fatal:
+                self.raise_login_required(message)
+            self.report_warning(f'{message}. {self._login_hint()}')
+            return video_data, status
 
         if universal_data := self._get_universal_data(webpage, video_id):
             self.write_debug('Found universal data for rehydration')

From a0d9967f6822fc279e86bce33464194985148727 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Thu, 13 Jun 2024 18:22:30 -0500
Subject: [PATCH 09/48] [ie/youtube:tab] Fix channel metadata extraction
 (#10071)

Closes #9893, Closes #10090
Authored by: bashonly, shoxie007

Co-authored-by: shoxie007 <74592022+shoxie007@users.noreply.github.com>
---
 yt_dlp/extractor/youtube.py | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index a227f24258..a89744eb10 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -885,14 +885,14 @@ def _get_count(self, data, *path_list):
         return count
 
     @staticmethod
-    def _extract_thumbnails(data, *path_list):
+    def _extract_thumbnails(data, *path_list, final_key='thumbnails'):
         """
         Extract thumbnails from thumbnails dict
         @param path_list: path list to level that contains 'thumbnails' key
         """
         thumbnails = []
         for path in path_list or [()]:
-            for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...)):
+            for thumbnail in traverse_obj(data, (*variadic(path), final_key, ...)):
                 thumbnail_url = url_or_none(thumbnail.get('url'))
                 if not thumbnail_url:
                     continue
@@ -5124,6 +5124,10 @@ def _extract_metadata_from_tabs(self, item_id, data):
         else:
             metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict)
 
+        # pageHeaderViewModel slow rollout began April 2024
+        page_header_view_model = traverse_obj(data, (
+            'header', 'pageHeaderRenderer', 'content', 'pageHeaderViewModel', {dict}))
+
         # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
         # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
         def _get_uncropped(url):
@@ -5139,8 +5143,10 @@ def _get_uncropped(url):
                     'preference': 1,
                 })
 
-        channel_banners = self._extract_thumbnails(
-            data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner')))
+        channel_banners = (
+            self._extract_thumbnails(data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner')))
+            or self._extract_thumbnails(
+                page_header_view_model, ('banner', 'imageBannerViewModel', 'image'), final_key='sources'))
         for banner in channel_banners:
             banner['preference'] = -10
 
@@ -5167,7 +5173,11 @@ def _get_uncropped(url):
                       or self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag'))
                       or info['id']),
             'availability': self._extract_availability(data),
-            'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),
+            'channel_follower_count': (
+                self._get_count(data, ('header', ..., 'subscriberCountText'))
+                or traverse_obj(page_header_view_model, (
+                    'metadata', 'contentMetadataViewModel', 'metadataRows', ..., 'metadataParts',
+                    lambda _, v: 'subscribers' in v['text']['content'], 'text', 'content', {parse_count}, any))),
             'description': try_get(metadata_renderer, lambda x: x.get('description', '')),
             'tags': (traverse_obj(data, ('microformat', 'microformatDataRenderer', 'tags', ..., {str}))
                      or traverse_obj(metadata_renderer, ('keywords', {lambda x: x and shlex.split(x)}, ...))),

From 4093eb1fcc29a0e2aea9adfcba479787d9ae0c0c Mon Sep 17 00:00:00 2001
From: c-basalt <117849907+c-basalt@users.noreply.github.com>
Date: Sat, 15 Jun 2024 15:51:27 -0400
Subject: [PATCH 10/48] [ie/khanacademy] Fix extractors (#9136)

Closes #8775
Authored by: c-basalt
---
 yt_dlp/extractor/khanacademy.py | 141 +++++++++++++++++++++-----------
 1 file changed, 92 insertions(+), 49 deletions(-)

diff --git a/yt_dlp/extractor/khanacademy.py b/yt_dlp/extractor/khanacademy.py
index 5333036a8b..3f03f9e4c4 100644
--- a/yt_dlp/extractor/khanacademy.py
+++ b/yt_dlp/extractor/khanacademy.py
@@ -3,43 +3,52 @@
 from .common import InfoExtractor
 from ..utils import (
     int_or_none,
+    make_archive_id,
     parse_iso8601,
-    try_get,
+    str_or_none,
+    traverse_obj,
+    url_or_none,
+    urljoin,
 )
 
 
 class KhanAcademyBaseIE(InfoExtractor):
     _VALID_URL_TEMPL = r'https?://(?:www\.)?khanacademy\.org/(?P<id>(?:[^/]+/){%s}%s[^?#/&]+)'
 
+    _PUBLISHED_CONTENT_VERSION = '171419ab20465d931b356f22d20527f13969bb70'
+
     def _parse_video(self, video):
         return {
             '_type': 'url_transparent',
             'url': video['youtubeId'],
-            'id': video.get('slug'),
-            'title': video.get('title'),
-            'thumbnail': video.get('imageUrl') or video.get('thumbnailUrl'),
-            'duration': int_or_none(video.get('duration')),
-            'description': video.get('description'),
+            'id': video['youtubeId'],
             'ie_key': 'Youtube',
+            **traverse_obj(video, {
+                'display_id': ('id', {str_or_none}),
+                'title': ('translatedTitle', {str}),
+                'thumbnail': ('thumbnailUrls', ..., 'url', {url_or_none}),
+                'duration': ('duration', {int_or_none}),
+                'description': ('description', {str}),
+            }, get_all=False),
         }
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
         content = self._download_json(
-            'https://www.khanacademy.org/api/internal/graphql/FetchContentData',
-            display_id, query={
+            'https://www.khanacademy.org/api/internal/graphql/ContentForPath', display_id,
+            query={
                 'fastly_cacheable': 'persist_until_publish',
-                'hash': '4134764944',
-                'lang': 'en',
+                'pcv': self._PUBLISHED_CONTENT_VERSION,
+                'hash': '1242644265',
                 'variables': json.dumps({
                     'path': display_id,
-                    'queryParams': 'lang=en',
-                    'isModal': False,
-                    'followRedirects': True,
                     'countryCode': 'US',
+                    'kaLocale': 'en',
+                    'clientPublishedContentVersion': self._PUBLISHED_CONTENT_VERSION,
                 }),
-            })['data']['contentJson']
-        return self._parse_component_props(self._parse_json(content, display_id)['componentProps'])
+                'lang': 'en',
+            })['data']['contentRoute']['listedPathData']
+        return self._parse_component_props(content, display_id)
 
 
 class KhanAcademyIE(KhanAcademyBaseIE):
@@ -47,64 +56,98 @@ class KhanAcademyIE(KhanAcademyBaseIE):
     _VALID_URL = KhanAcademyBaseIE._VALID_URL_TEMPL % ('4', 'v/')
     _TEST = {
         'url': 'https://www.khanacademy.org/computing/computer-science/cryptography/crypt/v/one-time-pad',
-        'md5': '9c84b7b06f9ebb80d22a5c8dedefb9a0',
+        'md5': '1d5c2e70fa6aa29c38eca419f12515ce',
         'info_dict': {
             'id': 'FlIG3TvQCBQ',
             'ext': 'mp4',
             'title': 'The one-time pad',
             'description': 'The perfect cipher',
+            'display_id': '716378217',
             'duration': 176,
-            'uploader': 'Brit Cruise',
-            'uploader_id': 'khanacademy',
+            'uploader': 'Khan Academy',
+            'uploader_id': '@khanacademy',
+            'uploader_url': 'https://www.youtube.com/@khanacademy',
             'upload_date': '20120411',
             'timestamp': 1334170113,
             'license': 'cc-by-nc-sa',
+            'live_status': 'not_live',
+            'channel': 'Khan Academy',
+            'channel_id': 'UC4a-Gbdw7vOaccHmFo40b9g',
+            'channel_url': 'https://www.youtube.com/channel/UC4a-Gbdw7vOaccHmFo40b9g',
+            'channel_is_verified': True,
+            'playable_in_embed': True,
+            'categories': ['Education'],
+            'creators': ['Brit Cruise'],
+            'tags': [],
+            'age_limit': 0,
+            'availability': 'public',
+            'comment_count': int,
+            'channel_follower_count': int,
+            'thumbnail': str,
+            'view_count': int,
+            'like_count': int,
+            'heatmap': list,
         },
         'add_ie': ['Youtube'],
     }
 
-    def _parse_component_props(self, component_props):
-        video = component_props['tutorialPageData']['contentModel']
-        info = self._parse_video(video)
-        author_names = video.get('authorNames')
-        info.update({
-            'uploader': ', '.join(author_names) if author_names else None,
-            'timestamp': parse_iso8601(video.get('dateAdded')),
-            'license': video.get('kaUserLicense'),
-        })
-        return info
+    def _parse_component_props(self, component_props, display_id):
+        video = component_props['content']
+        return {
+            **self._parse_video(video),
+            **traverse_obj(video, {
+                'creators': ('authorNames', ..., {str}),
+                'timestamp': ('dateAdded', {parse_iso8601}),
+                'license': ('kaUserLicense', {str}),
+            }),
+        }
 
 
 class KhanAcademyUnitIE(KhanAcademyBaseIE):
     IE_NAME = 'khanacademy:unit'
-    _VALID_URL = (KhanAcademyBaseIE._VALID_URL_TEMPL % ('2', '')) + '/?(?:[?#&]|$)'
-    _TEST = {
+    _VALID_URL = (KhanAcademyBaseIE._VALID_URL_TEMPL % ('1,2', '')) + '/?(?:[?#&]|$)'
+    _TESTS = [{
         'url': 'https://www.khanacademy.org/computing/computer-science/cryptography',
         'info_dict': {
-            'id': 'cryptography',
+            'id': 'x48c910b6',
             'title': 'Cryptography',
             'description': 'How have humans protected their secret messages through history? What has changed today?',
+            'display_id': 'computing/computer-science/cryptography',
+            '_old_archive_ids': ['khanacademyunit cryptography'],
         },
         'playlist_mincount': 31,
-    }
+    }, {
+        'url': 'https://www.khanacademy.org/computing/computer-science',
+        'info_dict': {
+            'id': 'x301707a0',
+            'title': 'Computer science theory',
+            'description': 'md5:4b472a4646e6cf6ec4ccb52c4062f8ba',
+            'display_id': 'computing/computer-science',
+            '_old_archive_ids': ['khanacademyunit computer-science'],
+        },
+        'playlist_mincount': 50,
+    }]
 
-    def _parse_component_props(self, component_props):
-        curation = component_props['curation']
+    def _parse_component_props(self, component_props, display_id):
+        course = component_props['course']
+        selected_unit = traverse_obj(course, (
+            'unitChildren', lambda _, v: v['relativeUrl'] == f'/{display_id}', any)) or course
 
-        entries = []
-        tutorials = try_get(curation, lambda x: x['tabs'][0]['modules'][0]['tutorials'], list) or []
-        for tutorial_number, tutorial in enumerate(tutorials, 1):
-            chapter_info = {
-                'chapter': tutorial.get('title'),
-                'chapter_number': tutorial_number,
-                'chapter_id': tutorial.get('id'),
-            }
-            for content_item in (tutorial.get('contentItems') or []):
-                if content_item.get('kind') == 'Video':
-                    info = self._parse_video(content_item)
-                    info.update(chapter_info)
-                    entries.append(info)
+        def build_entry(entry):
+            return self.url_result(urljoin(
+                'https://www.khanacademy.org', entry['canonicalUrl']),
+                KhanAcademyIE, title=entry.get('translatedTitle'))
+
+        entries = traverse_obj(selected_unit, (
+            (('unitChildren', ...), None), 'allOrderedChildren', ..., 'curatedChildren',
+            lambda _, v: v['contentKind'] == 'Video' and v['canonicalUrl'], {build_entry}))
 
         return self.playlist_result(
-            entries, curation.get('unit'), curation.get('title'),
-            curation.get('description'))
+            entries,
+            display_id=display_id,
+            **traverse_obj(selected_unit, {
+                'id': ('id', {str}),
+                'title': ('translatedTitle', {str}),
+                'description': ('translatedDescription', {str}),
+                '_old_archive_ids': ('slug', {str}, {lambda x: [make_archive_id(self, x)] if x else None}),
+            }))

From ca8885edd93bdf8912af6c22ee335b6222cb9ba9 Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@protonmail.com>
Date: Mon, 3 Jun 2024 11:22:49 -0500
Subject: [PATCH 11/48] [fd/hls] Apply `extra_param_to_key_url` from info dict

Authored by: bashonly
---
 yt_dlp/YoutubeDL.py           |  5 +++--
 yt_dlp/downloader/external.py |  2 +-
 yt_dlp/downloader/hls.py      | 24 ++++++++++++++----------
 yt_dlp/extractor/common.py    |  9 ++++++++-
 4 files changed, 26 insertions(+), 14 deletions(-)

diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index 5abcb4635c..7ed01bf840 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -581,8 +581,9 @@ class YoutubeDL:
         'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
         'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start', 'is_dash_periods', 'request_data',
         'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies',
-        'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
-        'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time',
+        'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'extra_param_to_key_url',
+        'hls_aes', 'downloader_options', 'page_url', 'app', 'play_path', 'tc_url', 'flash_version',
+        'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time',
     }
     _deprecated_multivalue_fields = {
         'album_artist': 'album_artists',
diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py
index 8b45c671a0..63c1085699 100644
--- a/yt_dlp/downloader/external.py
+++ b/yt_dlp/downloader/external.py
@@ -108,7 +108,7 @@ def supports(cls, info_dict):
         return all((
             not info_dict.get('to_stdout') or Features.TO_STDOUT in cls.SUPPORTED_FEATURES,
             '+' not in info_dict['protocol'] or Features.MULTIPLE_FORMATS in cls.SUPPORTED_FEATURES,
-            not traverse_obj(info_dict, ('hls_aes', ...), 'extra_param_to_segment_url'),
+            not traverse_obj(info_dict, ('hls_aes', ...), 'extra_param_to_segment_url', 'extra_param_to_key_url'),
             all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+')),
         ))
 
diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py
index 9cb4f014c0..0a00d5dabb 100644
--- a/yt_dlp/downloader/hls.py
+++ b/yt_dlp/downloader/hls.py
@@ -160,10 +160,12 @@ def is_ad_fragment_end(s):
         extra_state = ctx.setdefault('extra_state', {})
 
         format_index = info_dict.get('format_index')
-        extra_query = None
-        extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
-        if extra_param_to_segment_url:
-            extra_query = urllib.parse.parse_qs(extra_param_to_segment_url)
+        extra_segment_query = None
+        if extra_param_to_segment_url := info_dict.get('extra_param_to_segment_url'):
+            extra_segment_query = urllib.parse.parse_qs(extra_param_to_segment_url)
+        extra_key_query = None
+        if extra_param_to_key_url := info_dict.get('extra_param_to_key_url'):
+            extra_key_query = urllib.parse.parse_qs(extra_param_to_key_url)
         i = 0
         media_sequence = 0
         decrypt_info = {'METHOD': 'NONE'}
@@ -190,8 +192,8 @@ def is_ad_fragment_end(s):
                     if frag_index <= ctx['fragment_index']:
                         continue
                     frag_url = urljoin(man_url, line)
-                    if extra_query:
-                        frag_url = update_url_query(frag_url, extra_query)
+                    if extra_segment_query:
+                        frag_url = update_url_query(frag_url, extra_segment_query)
 
                     fragments.append({
                         'frag_index': frag_index,
@@ -212,8 +214,8 @@ def is_ad_fragment_end(s):
                     frag_index += 1
                     map_info = parse_m3u8_attributes(line[11:])
                     frag_url = urljoin(man_url, map_info.get('URI'))
-                    if extra_query:
-                        frag_url = update_url_query(frag_url, extra_query)
+                    if extra_segment_query:
+                        frag_url = update_url_query(frag_url, extra_segment_query)
 
                     if map_info.get('BYTERANGE'):
                         splitted_byte_range = map_info.get('BYTERANGE').split('@')
@@ -244,8 +246,10 @@ def is_ad_fragment_end(s):
                             decrypt_info['KEY'] = external_aes_key
                         else:
                             decrypt_info['URI'] = urljoin(man_url, decrypt_info['URI'])
-                            if extra_query:
-                                decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
+                            if extra_key_query or extra_segment_query:
+                                # Fall back to extra_segment_query to key for backwards compat
+                                decrypt_info['URI'] = update_url_query(
+                                    decrypt_info['URI'], extra_key_query or extra_segment_query)
                             if decrypt_url != decrypt_info['URI']:
                                 decrypt_info['KEY'] = None
 
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 2799747ece..e5efd08b4f 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -234,7 +234,14 @@ class InfoExtractor:
                                  'maybe' if the format may have DRM and has to be tested before download.
                     * extra_param_to_segment_url  A query string to append to each
                                  fragment's URL, or to update each existing query string
-                                 with. Only applied by the native HLS/DASH downloaders.
+                                 with. If it is an HLS stream with an AES-128 decryption key,
+                                 the query paramaters will be passed to the key URI as well,
+                                 unless there is an `extra_param_to_key_url` given,
+                                 or unless an external key URI is provided via `hls_aes`.
+                                 Only applied by the native HLS/DASH downloaders.
+                    * extra_param_to_key_url  A query string to append to the URL
+                                 of the format's HLS AES-128 decryption key.
+                                 Only applied by the native HLS downloader.
                     * hls_aes    A dictionary of HLS AES-128 decryption information
                                  used by the native HLS downloader to override the
                                  values in the media playlist when an '#EXT-X-KEY' tag

From 5dbac313ae4e3e8521dfe2e1a6a048a98ff4b4fe Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@protonmail.com>
Date: Sat, 15 Jun 2024 18:18:42 -0500
Subject: [PATCH 12/48] [ie/generic] Add `key_query` extractor-arg

Authored by: bashonly
---
 README.md                   |  3 ++-
 yt_dlp/extractor/generic.py | 10 +++++++++-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 42ffd9b520..ea7c671748 100644
--- a/README.md
+++ b/README.md
@@ -1779,8 +1779,9 @@ #### youtubetab (YouTube playlists, channels, feeds, etc.)
 * `approximate_date`: Extract approximate `upload_date` and `timestamp` in flat-playlist. This may cause date-based filters to be slightly off
 
 #### generic
-* `fragment_query`: Passthrough any query in mpd/m3u8 manifest URLs to their fragments if no value is provided, or else apply the query string given as `fragment_query=VALUE`. Does not apply to ffmpeg
+* `fragment_query`: Passthrough any query in mpd/m3u8 manifest URLs to their fragments if no value is provided, or else apply the query string given as `fragment_query=VALUE`. Note that if the stream has an HLS AES-128 key, then the query parameters will be passed to the key URI as well, unless the `key_query` extractor-arg is passed, or unless an external key URI is provided via the `hls_key` extractor-arg. Does not apply to ffmpeg
 * `variant_query`: Passthrough the master m3u8 URL query to its variant playlist URLs if no value is provided, or else apply the query string given as `variant_query=VALUE`
+* `key_query`: Passthrough the master m3u8 URL query to its HLS AES-128 decryption key URI if no value is provided, or else apply the query string given as `key_query=VALUE`. Note that this will have no effect if the key URI is provided via the `hls_key` extractor-arg. Does not apply to ffmpeg
 * `hls_key`: An HLS AES-128 key URI *or* key (as hex), and optionally the IV (as hex), in the form of `(URI|KEY)[,IV]`; e.g. `generic:hls_key=ABCDEF1234567980,0xFEDCBA0987654321`. Passing any of these values will force usage of the native HLS downloader and override the corresponding values found in the m3u8 playlist
 * `is_live`: Bypass live HLS detection and manually set `live_status` - a value of `false` will set `not_live`, any other value (or no value) will set `is_live`
 
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index cc17890e76..3b8e1e957c 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -2167,7 +2167,15 @@ def _extra_manifest_info(self, info, manifest_url):
                 urllib.parse.urlparse(fragment_query).query or fragment_query
                 or urllib.parse.urlparse(manifest_url).query or None)
 
-        hex_or_none = lambda x: x if re.fullmatch(r'(0x)?[\da-f]+', x, re.IGNORECASE) else None
+        key_query = self._configuration_arg('key_query', [None], casesense=True)[0]
+        if key_query is not None:
+            info['extra_param_to_key_url'] = (
+                urllib.parse.urlparse(key_query).query or key_query
+                or urllib.parse.urlparse(manifest_url).query or None)
+
+        def hex_or_none(value):
+            return value if re.fullmatch(r'(0x)?[\da-f]+', value, re.IGNORECASE) else None
+
         info['hls_aes'] = traverse_obj(self._configuration_arg('hls_key', casesense=True), {
             'uri': (0, {url_or_none}), 'key': (0, {hex_or_none}), 'iv': (1, {hex_or_none}),
         }) or None

From d6c2c2bc84f1434255be5c73baeb17d893d2c0d4 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Sun, 16 Jun 2024 19:01:46 -0500
Subject: [PATCH 13/48] [ie/sproutvideo] Add extractors (#10098)

Closes #2933, Closes #8942
Authored by: bashonly, TheZ3ro

Co-authored-by: thezero <io@thezero.org>
---
 yt_dlp/extractor/_extractors.py |   4 +
 yt_dlp/extractor/patreon.py     |  17 ++-
 yt_dlp/extractor/sproutvideo.py | 198 ++++++++++++++++++++++++++++++++
 3 files changed, 214 insertions(+), 5 deletions(-)
 create mode 100644 yt_dlp/extractor/sproutvideo.py

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 0f599c9db7..c411efb5aa 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1928,6 +1928,10 @@
 )
 from .springboardplatform import SpringboardPlatformIE
 from .sprout import SproutIE
+from .sproutvideo import (
+    SproutVideoIE,
+    VidsIoIE,
+)
 from .srgssr import (
     SRGSSRIE,
     SRGSSRPlayIE,
diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py
index 26ca84ab34..5dc46e3171 100644
--- a/yt_dlp/extractor/patreon.py
+++ b/yt_dlp/extractor/patreon.py
@@ -2,6 +2,7 @@
 import urllib.parse
 
 from .common import InfoExtractor
+from .sproutvideo import VidsIoIE
 from .vimeo import VimeoIE
 from ..networking.exceptions import HTTPError
 from ..utils import (
@@ -12,6 +13,7 @@
     int_or_none,
     mimetype2ext,
     parse_iso8601,
+    smuggle_url,
     str_or_none,
     traverse_obj,
     url_or_none,
@@ -305,22 +307,27 @@ def _real_extract(self, url):
                     'channel_follower_count': ('attributes', 'patron_count', {int_or_none}),
                 }))
 
+        # all-lowercase 'referer' so we can smuggle it to Generic, SproutVideo, Vimeo
+        headers = {'referer': 'https://patreon.com/'}
+
         # handle Vimeo embeds
         if traverse_obj(attributes, ('embed', 'provider')) == 'Vimeo':
             v_url = urllib.parse.unquote(self._html_search_regex(
                 r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)',
                 traverse_obj(attributes, ('embed', 'html', {str})), 'vimeo url', fatal=False) or '')
             if url_or_none(v_url) and self._request_webpage(
-                    v_url, video_id, 'Checking Vimeo embed URL',
-                    headers={'Referer': 'https://patreon.com/'},
-                    fatal=False, errnote=False):
+                    v_url, video_id, 'Checking Vimeo embed URL', headers=headers, fatal=False, errnote=False):
                 entries.append(self.url_result(
                     VimeoIE._smuggle_referrer(v_url, 'https://patreon.com/'),
                     VimeoIE, url_transparent=True))
 
         embed_url = traverse_obj(attributes, ('embed', 'url', {url_or_none}))
-        if embed_url and self._request_webpage(embed_url, video_id, 'Checking embed URL', fatal=False, errnote=False):
-            entries.append(self.url_result(embed_url))
+        if embed_url and (urlh := self._request_webpage(
+                embed_url, video_id, 'Checking embed URL', headers=headers,
+                fatal=False, errnote=False, expected_status=403)):
+            # Password-protected vids.io embeds return 403 errors w/o --video-password or session cookie
+            if urlh.status != 403 or VidsIoIE.suitable(embed_url):
+                entries.append(self.url_result(smuggle_url(embed_url, headers)))
 
         post_file = traverse_obj(attributes, ('post_file', {dict}))
         if post_file:
diff --git a/yt_dlp/extractor/sproutvideo.py b/yt_dlp/extractor/sproutvideo.py
new file mode 100644
index 0000000000..c0923594e5
--- /dev/null
+++ b/yt_dlp/extractor/sproutvideo.py
@@ -0,0 +1,198 @@
+import base64
+import urllib.parse
+
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    qualities,
+    remove_start,
+    smuggle_url,
+    unsmuggle_url,
+    update_url_query,
+    url_or_none,
+    urlencode_postdata,
+)
+from ..utils.traversal import traverse_obj
+
+
+class SproutVideoIE(InfoExtractor):
+    _NO_SCHEME_RE = r'//videos\.sproutvideo\.com/embed/(?P<id>[\da-f]+)/[\da-f]+'
+    _VALID_URL = rf'https?:{_NO_SCHEME_RE}'
+    _EMBED_REGEX = [rf'<iframe [^>]*\bsrc=["\'](?P<url>(?:https?:)?{_NO_SCHEME_RE}[^"\']*)["\']']
+    _TESTS = [{
+        'url': 'https://videos.sproutvideo.com/embed/4c9dddb01910e3c9c4/0fc24387c4f24ee3',
+        'md5': '1343ce1a6cb39d67889bfa07c7b02b0e',
+        'info_dict': {
+            'id': '4c9dddb01910e3c9c4',
+            'ext': 'mp4',
+            'title': 'Adrien Labaeye : Berlin, des communautés aux communs',
+            'duration': 576,
+            'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
+        },
+    }, {
+        'url': 'https://videos.sproutvideo.com/embed/a79fdcb21f1be2c62e/93bf31e41e39ca27',
+        'md5': 'cebae5cf558cca83271917cf4ec03f26',
+        'info_dict': {
+            'id': 'a79fdcb21f1be2c62e',
+            'ext': 'mp4',
+            'title': 'HS_01_Live Stream 2023-01-14 10:00',
+            'duration': 703,
+            'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
+        },
+    }, {
+        # http formats 'sd' and 'hd' are available
+        'url': 'https://videos.sproutvideo.com/embed/119cd6bc1a18e6cd98/30751a1761ae5b90',
+        'md5': 'f368c78df07e78a749508b221528672c',
+        'info_dict': {
+            'id': '119cd6bc1a18e6cd98',
+            'ext': 'mp4',
+            'title': '3. Updating your Partner details',
+            'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
+            'duration': 60,
+        },
+        'params': {'format': 'hd'},
+    }, {
+        # subtitles
+        'url': 'https://videos.sproutvideo.com/embed/119dd8ba121ee0cc98/4ee50c88a343215d?type=hd',
+        'md5': '7f6798f037d7a3e3e07e67959de68fc6',
+        'info_dict': {
+            'id': '119dd8ba121ee0cc98',
+            'ext': 'mp4',
+            'title': 'Recipients Setup - Domestic Wire Only',
+            'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
+            'duration': 77,
+            'subtitles': {'en': 'count:1'},
+        },
+    }]
+    _WEBPAGE_TESTS = [{
+        'url': 'https://www.solidarum.org/vivre-ensemble/adrien-labaeye-berlin-des-communautes-aux-communs',
+        'info_dict': {
+            'id': '4c9dddb01910e3c9c4',
+            'ext': 'mp4',
+            'title': 'Adrien Labaeye : Berlin, des communautés aux communs',
+            'duration': 576,
+            'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
+        },
+    }]
+    _M3U8_URL_TMPL = 'https://{base}.videos.sproutvideo.com/{s3_user_hash}/{s3_video_hash}/video/index.m3u8'
+    _QUALITIES = ('hd', 'uhd', 'source')  # Exclude 'sd' to prioritize hls formats above it
+
+    @staticmethod
+    def _policy_to_qs(policy, signature_key, as_string=False):
+        query = {}
+        for key, value in policy['signatures'][signature_key].items():
+            query[remove_start(key, 'CloudFront-')] = value
+        query['sessionID'] = policy['sessionID']
+        return urllib.parse.urlencode(query, doseq=True) if as_string else query
+
+    @classmethod
+    def _extract_embed_urls(cls, url, webpage):
+        for embed_url in super()._extract_embed_urls(url, webpage):
+            if embed_url.startswith('//'):
+                embed_url = f'https:{embed_url}'
+            yield smuggle_url(embed_url, {'referer': url})
+
+    def _real_extract(self, url):
+        url, smuggled_data = unsmuggle_url(url, {})
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(
+            url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'}))
+        data = self._search_json(
+            r'var\s+dat\s*=\s*["\']', webpage, 'data', video_id, contains_pattern=r'[A-Za-z0-9+/=]+',
+            end_pattern=r'["\'];', transform_source=lambda x: base64.b64decode(x).decode())
+
+        formats, subtitles = [], {}
+        headers = {
+            'Accept': '*/*',
+            'Origin': 'https://videos.sproutvideo.com',
+            'Referer': url,
+        }
+
+        # HLS extraction is fatal; only attempt it if the JSON data says it's available
+        if traverse_obj(data, 'hls'):
+            manifest_query = self._policy_to_qs(data, 'm')
+            fragment_query = self._policy_to_qs(data, 't', as_string=True)
+            key_query = self._policy_to_qs(data, 'k', as_string=True)
+
+            formats.extend(self._extract_m3u8_formats(
+                self._M3U8_URL_TMPL.format(**data), video_id, 'mp4',
+                m3u8_id='hls', headers=headers, query=manifest_query))
+            for fmt in formats:
+                fmt.update({
+                    'url': update_url_query(fmt['url'], manifest_query),
+                    'extra_param_to_segment_url': fragment_query,
+                    'extra_param_to_key_url': key_query,
+                })
+
+        if downloads := traverse_obj(data, ('downloads', {dict.items}, lambda _, v: url_or_none(v[1]))):
+            quality = qualities(self._QUALITIES)
+            acodec = 'none' if data.get('has_audio') is False else None
+            formats.extend([{
+                'format_id': str(format_id),
+                'url': format_url,
+                'ext': 'mp4',
+                'quality': quality(format_id),
+                'acodec': acodec,
+            } for format_id, format_url in downloads])
+
+        for sub_data in traverse_obj(data, ('subtitleData', lambda _, v: url_or_none(v['src']))):
+            subtitles.setdefault(sub_data.get('srclang', 'en'), []).append({
+                'url': sub_data['src'],
+            })
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            'subtitles': subtitles,
+            'http_headers': headers,
+            **traverse_obj(data, {
+                'title': ('title', {str}),
+                'duration': ('duration', {int_or_none}),
+                'thumbnail': ('posterframe_url', {url_or_none}),
+            }),
+        }
+
+
+class VidsIoIE(InfoExtractor):
+    IE_NAME = 'vids.io'
+    _VALID_URL = r'https?://[\w-]+\.vids\.io/videos/(?P<id>[\da-f]+)/(?P<display_id>[\w-]+)'
+    _TESTS = [{
+        'url': 'https://how-to-video.vids.io/videos/799cd8b11c10efc1f0/how-to-video-live-streaming',
+        'md5': '9bbbb2c0c0739eb163b80f87b8d77c9e',
+        'info_dict': {
+            'id': '799cd8b11c10efc1f0',
+            'ext': 'mp4',
+            'title': 'How to Video: Live Streaming',
+            'duration': 2787,
+            'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
+        webpage, urlh = self._download_webpage_handle(url, display_id, expected_status=403)
+
+        if urlh.status == 403:
+            password = self.get_param('videopassword')
+            if not password:
+                raise ExtractorError(
+                    'This video is password-protected; use the --video-password option', expected=True)
+            try:
+                webpage = self._download_webpage(
+                    url, display_id, 'Submitting video password',
+                    data=urlencode_postdata({
+                        'password': password,
+                        **self._hidden_inputs(webpage),
+                    }))
+                # Requests with user's session cookie `_sproutvideo_session` are now authorized
+            except ExtractorError as e:
+                if isinstance(e.cause, HTTPError) and e.cause.status == 403:
+                    raise ExtractorError('Incorrect password', expected=True)
+                raise
+
+        if embed_url := next(SproutVideoIE._extract_embed_urls(url, webpage), None):
+            return self.url_result(embed_url, SproutVideoIE, video_id)
+
+        raise ExtractorError('Unable to extract any SproutVideo embed url')

From d4b52ce3fcb8d9578ed12365648eaba8718c603e Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Sun, 16 Jun 2024 19:05:46 -0500
Subject: [PATCH 14/48] [ie/podbayfm] Fix extraction (#10195)

Authored by: bashonly, seproDev

Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>
---
 yt_dlp/extractor/podbayfm.py | 41 ++++++++++++++++++++++++------------
 1 file changed, 27 insertions(+), 14 deletions(-)

diff --git a/yt_dlp/extractor/podbayfm.py b/yt_dlp/extractor/podbayfm.py
index 2a26fd2b36..0141eca909 100644
--- a/yt_dlp/extractor/podbayfm.py
+++ b/yt_dlp/extractor/podbayfm.py
@@ -1,28 +1,40 @@
 from .common import InfoExtractor
-from ..utils import OnDemandPagedList, int_or_none, jwt_decode_hs256, try_call
+from ..utils import (
+    OnDemandPagedList,
+    clean_html,
+    int_or_none,
+    jwt_decode_hs256,
+    url_or_none,
+)
+from ..utils.traversal import traverse_obj
 
 
-def result_from_props(props, episode_id=None):
+def result_from_props(props):
     return {
-        'id': props.get('podcast_id') or episode_id,
-        'title': props.get('title'),
-        'url': props['mediaURL'],
+        **traverse_obj(props, {
+            'id': ('_id', {str}),
+            'title': ('title', {str}),
+            'url': ('mediaURL', {url_or_none}),
+            'description': ('description', {clean_html}),
+            'thumbnail': ('image', {jwt_decode_hs256}, 'url', {url_or_none}),
+            'timestamp': ('timestamp', {int_or_none}),
+            'duration': ('duration', {int_or_none}),
+        }),
         'ext': 'mp3',
-        'thumbnail': try_call(lambda: jwt_decode_hs256(props['image'])['url']),
-        'timestamp': props.get('timestamp'),
-        'duration': int_or_none(props.get('duration')),
+        'vcodec': 'none',
     }
 
 
 class PodbayFMIE(InfoExtractor):
-    _VALID_URL = r'https?://podbay\.fm/p/[^/]*/e/(?P<id>[^/]*)/?(?:[\?#].*)?$'
+    _VALID_URL = r'https?://podbay\.fm/p/[^/?#]+/e/(?P<id>\d+)'
     _TESTS = [{
         'url': 'https://podbay.fm/p/behind-the-bastards/e/1647338400',
-        'md5': '98b41285dcf7989d105a4ed0404054cf',
+        'md5': '895ac8505de349515f5ee8a4a3195c93',
         'info_dict': {
-            'id': '1647338400',
+            'id': '62306451f4a48e58d0c4d6a8',
             'title': 'Part One: Kissinger',
             'ext': 'mp3',
+            'description': r're:^We begin our epic six part series on Henry Kissinger.+',
             'thumbnail': r're:^https?://.*\.jpg',
             'timestamp': 1647338400,
             'duration': 5001,
@@ -34,24 +46,25 @@ def _real_extract(self, url):
         episode_id = self._match_id(url)
         webpage = self._download_webpage(url, episode_id)
         data = self._search_nextjs_data(webpage, episode_id)
-        return result_from_props(data['props']['pageProps']['episode'], episode_id)
+        return result_from_props(data['props']['pageProps']['episode'])
 
 
 class PodbayFMChannelIE(InfoExtractor):
-    _VALID_URL = r'https?://podbay\.fm/p/(?P<id>[^/]*)/?(?:[\?#].*)?$'
+    _VALID_URL = r'https?://podbay\.fm/p/(?P<id>[^/?#]+)/?(?:$|[?#])'
     _TESTS = [{
         'url': 'https://podbay.fm/p/behind-the-bastards',
         'info_dict': {
             'id': 'behind-the-bastards',
             'title': 'Behind the Bastards',
         },
+        'playlist_mincount': 21,
     }]
     _PAGE_SIZE = 10
 
     def _fetch_page(self, channel_id, pagenum):
         return self._download_json(
             f'https://podbay.fm/api/podcast?reverse=true&page={pagenum}&slug={channel_id}',
-            channel_id)['podcast']
+            f'Downloading channel JSON page {pagenum + 1}', channel_id)['podcast']
 
     @staticmethod
     def _results_from_page(channel_id, page):

From 90c3721a322756bb7f4ca10ceb73744500bee37e Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Mon, 17 Jun 2024 11:37:12 -0500
Subject: [PATCH 15/48] [ie/brightcove] Upgrade requests to HTTPS (#10202)

Closes #10199
Authored by: bashonly
---
 yt_dlp/extractor/brightcove.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py
index dc0c83572a..56d74764fd 100644
--- a/yt_dlp/extractor/brightcove.py
+++ b/yt_dlp/extractor/brightcove.py
@@ -386,7 +386,7 @@ def _build_brightcove_url_from_js(cls, object_js):
     @classmethod
     def _make_brightcove_url(cls, params):
         return update_url_query(
-            'http://c.brightcove.com/services/viewer/htmlFederated', params)
+            'https://c.brightcove.com/services/viewer/htmlFederated', params)
 
     @classmethod
     def _extract_brightcove_url(cls, webpage):
@@ -470,7 +470,7 @@ def _real_extract(self, url):
                         if referer:
                             headers['Referer'] = referer
                         player_page = self._download_webpage(
-                            'http://link.brightcove.com/services/player/bcpid' + player_id[0],
+                            'https://link.brightcove.com/services/player/bcpid' + player_id[0],
                             video_id, headers=headers, fatal=False)
                         if player_page:
                             player_key = self._search_regex(
@@ -480,7 +480,7 @@ def _real_extract(self, url):
                     enc_pub_id = player_key.split(',')[1].replace('~', '=')
                     publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0]
             if publisher_id:
-                brightcove_new_url = f'http://players.brightcove.net/{publisher_id}/default_default/index.html?videoId={video_id}'
+                brightcove_new_url = f'https://players.brightcove.net/{publisher_id}/default_default/index.html?videoId={video_id}'
                 if referer:
                     brightcove_new_url = smuggle_url(brightcove_new_url, {'referrer': referer})
                 return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
@@ -801,7 +801,7 @@ def _extract_brightcove_urls(ie, webpage):
         # Look for iframe embeds [1]
         for _, url in re.findall(
                 r'<iframe[^>]+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
-            entries.append(url if url.startswith('http') else 'http:' + url)
+            entries.append(url if url.startswith(('http:', 'https:')) else 'https:' + url)
 
         # Look for <video> tags [2] and embed_in_page embeds [3]
         # [2] looks like:
@@ -830,7 +830,7 @@ def _extract_brightcove_urls(ie, webpage):
             player_id = player_id or attrs.get('data-player') or 'default'
             embed = embed or attrs.get('data-embed') or 'default'
 
-            bc_url = f'http://players.brightcove.net/{account_id}/{player_id}_{embed}/index.html?videoId={video_id}'
+            bc_url = f'https://players.brightcove.net/{account_id}/{player_id}_{embed}/index.html?videoId={video_id}'
 
             # Some brightcove videos may be embedded with video tag only and
             # without script tag or any mentioning of brightcove at all. Such
@@ -867,7 +867,7 @@ def _real_extract(self, url):
         store_pk = lambda x: self.cache.store('brightcove', policy_key_id, x)
 
         def extract_policy_key():
-            base_url = f'http://players.brightcove.net/{account_id}/{player_id}_{embed}/'
+            base_url = f'https://players.brightcove.net/{account_id}/{player_id}_{embed}/'
             config = self._download_json(
                 base_url + 'config.json', video_id, fatal=False) or {}
             policy_key = try_get(

From 9bd85019931927a99b0fe0dc58ac51acca9fbe72 Mon Sep 17 00:00:00 2001
From: Haxy <clienthax@gmail.com>
Date: Thu, 20 Jun 2024 22:54:53 +0100
Subject: [PATCH 16/48] [ie/youtube] Extract all formats from multi-language
 m3u8s (#9875)

Authored by: clienthax, bashonly

Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
---
 yt_dlp/extractor/common.py  |  5 +++++
 yt_dlp/extractor/youtube.py | 24 +++++++++++++++---------
 2 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index e5efd08b4f..f63bd78258 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -2222,6 +2222,11 @@ def build_stream_name():
                         'quality': quality,
                         'has_drm': has_drm,
                     }
+
+                    # YouTube-specific
+                    if yt_audio_content_id := last_stream_inf.get('YT-EXT-AUDIO-CONTENT-ID'):
+                        f['language'] = yt_audio_content_id.split('.')[0]
+
                     resolution = last_stream_inf.get('RESOLUTION')
                     if resolution:
                         mobj = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', resolution)
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index a89744eb10..ab6201dae6 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -3797,6 +3797,8 @@ def _needs_live_processing(self, live_status, duration):
 
     def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
         CHUNK_SIZE = 10 << 20
+        PREFERRED_LANG_VALUE = 10
+        original_language = None
         itags, stream_ids = collections.defaultdict(set), []
         itag_qualities, res_qualities = {}, {0: None}
         q = qualities([
@@ -3894,10 +3896,12 @@ def build_fragments(f):
                     throttled = True
 
             tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
-            language_preference = (
-                10 if audio_track.get('audioIsDefault') and 10
-                else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
-                else -1)
+            is_default = audio_track.get('audioIsDefault')
+            is_descriptive = 'descriptive' in (audio_track.get('displayName') or '').lower()
+            language_code = audio_track.get('id', '').split('.')[0]
+            if language_code and is_default:
+                original_language = language_code
+
             format_duration = traverse_obj(fmt, ('approxDurationMs', {lambda x: float_or_none(x, 1000)}))
             # Some formats may have much smaller duration than others (possibly damaged during encoding)
             # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
@@ -3924,8 +3928,7 @@ def build_fragments(f):
                 'filesize': int_or_none(fmt.get('contentLength')),
                 'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}',
                 'format_note': join_nonempty(
-                    join_nonempty(audio_track.get('displayName'),
-                                  language_preference > 0 and ' (default)', delim=''),
+                    join_nonempty(audio_track.get('displayName'), is_default and ' (default)', delim=''),
                     name, fmt.get('isDrc') and 'DRC',
                     try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
                     try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
@@ -3944,9 +3947,8 @@ def build_fragments(f):
                 'filesize_approx': filesize_from_tbr(tbr, format_duration),
                 'url': fmt_url,
                 'width': int_or_none(fmt.get('width')),
-                'language': join_nonempty(audio_track.get('id', '').split('.')[0],
-                                          'desc' if language_preference < -1 else '') or None,
-                'language_preference': language_preference,
+                'language': join_nonempty(language_code, 'desc' if is_descriptive else '') or None,
+                'language_preference': PREFERRED_LANG_VALUE if is_default else -10 if is_descriptive else -1,
                 # Strictly de-prioritize broken, damaged and 3gp formats
                 'preference': -20 if is_broken else -10 if is_damaged else -2 if itag == '17' else None,
             }
@@ -4007,6 +4009,10 @@ def process_manifest_format(f, proto, client_name, itag):
             elif itag:
                 f['format_id'] = itag
 
+            if original_language and f.get('language') == original_language:
+                f['format_note'] = join_nonempty(f.get('format_note'), '(default)', delim=' ')
+                f['language_preference'] = PREFERRED_LANG_VALUE
+
             if f.get('source_preference') is None:
                 f['source_preference'] = -1
 

From 7aa322c02cec54eb77154a89da7e400194f0bd03 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Thu, 20 Jun 2024 17:05:25 -0500
Subject: [PATCH 17/48] [ie/cloudflarestream] Fix `_VALID_URL` and embed
 extraction (#10215)

Authored by: bashonly
---
 yt_dlp/extractor/cloudflarestream.py | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/yt_dlp/extractor/cloudflarestream.py b/yt_dlp/extractor/cloudflarestream.py
index f902daacf6..8a409461a8 100644
--- a/yt_dlp/extractor/cloudflarestream.py
+++ b/yt_dlp/extractor/cloudflarestream.py
@@ -6,11 +6,11 @@
 class CloudflareStreamIE(InfoExtractor):
     _SUBDOMAIN_RE = r'(?:(?:watch|iframe|customer-\w+)\.)?'
     _DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
-    _EMBED_RE = rf'embed\.{_DOMAIN_RE}/embed/[^/]+\.js\?.*?\bvideo='
-    _ID_RE = r'[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+'
+    _EMBED_RE = rf'(?:embed\.|{_SUBDOMAIN_RE}){_DOMAIN_RE}/embed/[^/?#]+\.js\?(?:[^#]+&)?video='
+    _ID_RE = r'[\da-f]{32}|eyJ[\w-]+\.[\w-]+\.[\w-]+'
     _VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}{_DOMAIN_RE}/|{_EMBED_RE})(?P<id>{_ID_RE})'
     _EMBED_REGEX = [
-        rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE}).*?)\1',
+        rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE})(?:(?!\1).)*)\1',
         rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})',
     ]
     _TESTS = [{
@@ -24,6 +24,14 @@ class CloudflareStreamIE(InfoExtractor):
         'params': {
             'skip_download': 'm3u8',
         },
+    }, {
+        'url': 'https://watch.cloudflarestream.com/embed/sdk-iframe-integration.fla9.latest.js?video=0e8e040aec776862e1d632a699edf59e',
+        'info_dict': {
+            'id': '0e8e040aec776862e1d632a699edf59e',
+            'ext': 'mp4',
+            'title': '0e8e040aec776862e1d632a699edf59e',
+            'thumbnail': 'https://videodelivery.net/0e8e040aec776862e1d632a699edf59e/thumbnails/thumbnail.jpg',
+        },
     }, {
         'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
         'only_matching': True,
@@ -36,6 +44,9 @@ class CloudflareStreamIE(InfoExtractor):
     }, {
         'url': 'https://customer-aw5py76sw8wyqzmh.cloudflarestream.com/2463f6d3e06fa29710a337f5f5389fd8/iframe',
         'only_matching': True,
+    }, {
+        'url': 'https://watch.cloudflarestream.com/eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJraWQiOiJmYTA0YjViMzQ2NDkwYTM5NWJiNzQ1NWFhZTA2YzYwZSIsInN1YiI6Ijg4ZDQxMDhhMzY0MjA3M2VhYmFhZjg3ZGExODJkMjYzIiwiZXhwIjoxNjAwNjA5MzE5fQ.xkRJwLGkt0nZ%5F0BlPiwU7iW4pqb4lKkznbKfAhGg0tGcxSS6ZBA3lcTUwu7W%2DyCFbnAl%2Dhqk3Fn%5FqeQS%5FQydP27qTHpB9iIFFsMtk1tqzGZV5v4yrYDnwLSKzEKvVd6QwJnfABtxH2JdpSNuWlMUiVXFxGWgjOw6QeTNDDklTQYXV%5FNLV7sErSn5CeOPeRRkdXb%2D8ip%5FVOcfk1nDsFoOo4fctFtGP0wYMyY5ae8nhhatydHwevuvJCcEvEfh%2D4qjq9mCZOodevmtSQ4YWmggf4BxtWnDWYrGW8Otp6oqezrR8oY4%2DbKdV6PaqBj49aJdcls6xK7PmM8%5Fvjy3xfm0Mg',
+        'only_matching': True,
     }]
     _WEBPAGE_TESTS = [{
         'url': 'https://upride.cc/incident/shoulder-pass-at-light/',

From 96472d72f29550c25c5dcedcde02c38c192b0011 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Fri, 21 Jun 2024 17:57:29 -0500
Subject: [PATCH 18/48] [ie/tiktok] Fix API extraction (#10216)

Closes #10213
Authored by: bashonly
---
 yt_dlp/extractor/tiktok.py | 32 +++++++++++++++++++-------------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py
index 48934fc6b3..c3505b14fe 100644
--- a/yt_dlp/extractor/tiktok.py
+++ b/yt_dlp/extractor/tiktok.py
@@ -30,6 +30,7 @@
     try_call,
     try_get,
     url_or_none,
+    urlencode_postdata,
 )
 
 
@@ -43,8 +44,8 @@ class TikTokBaseIE(InfoExtractor):
         'iid': None,
         # TikTok (KR/PH/TW/TH/VN) = trill, TikTok (rest of world) = musical_ly, Douyin = aweme
         'app_name': 'musical_ly',
-        'app_version': '34.1.2',
-        'manifest_app_version': '2023401020',
+        'app_version': '35.1.3',
+        'manifest_app_version': '2023501030',
         # "app id": aweme = 1128, trill = 1180, musical_ly = 1233, universal = 0
         'aid': '0',
     }
@@ -114,7 +115,7 @@ def _get_universal_data(self, webpage, display_id):
             'universal data', display_id, end_pattern=r'</script>', default={}),
             ('__DEFAULT_SCOPE__', {dict})) or {}
 
-    def _call_api_impl(self, ep, query, video_id, fatal=True,
+    def _call_api_impl(self, ep, video_id, query=None, data=None, headers=None, fatal=True,
                        note='Downloading API JSON', errnote='Unable to download API page'):
         self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choices('0123456789abcdef', k=160)))
         webpage_cookies = self._get_cookies(self._WEBPAGE_HOST)
@@ -125,7 +126,8 @@ def _call_api_impl(self, ep, query, video_id, fatal=True,
             fatal=fatal, note=note, errnote=errnote, headers={
                 'User-Agent': self._APP_USER_AGENT,
                 'Accept': 'application/json',
-            }, query=query)
+                **(headers or {}),
+            }, query=query, data=data)
 
     def _build_api_query(self, query):
         return filter_dict({
@@ -174,7 +176,7 @@ def _build_api_query(self, query):
             'openudid': ''.join(random.choices('0123456789abcdef', k=16)),
         })
 
-    def _call_api(self, ep, query, video_id, fatal=True,
+    def _call_api(self, ep, video_id, query=None, data=None, headers=None, fatal=True,
                   note='Downloading API JSON', errnote='Unable to download API page'):
         if not self._APP_INFO and not self._get_next_app_info():
             message = 'No working app info is available'
@@ -187,9 +189,11 @@ def _call_api(self, ep, query, video_id, fatal=True,
         max_tries = len(self._APP_INFO_POOL) + 1  # _APP_INFO_POOL + _APP_INFO
         for count in itertools.count(1):
             self.write_debug(str(self._APP_INFO))
-            real_query = self._build_api_query(query)
+            real_query = self._build_api_query(query or {})
             try:
-                return self._call_api_impl(ep, real_query, video_id, fatal, note, errnote)
+                return self._call_api_impl(
+                    ep, video_id, query=real_query, data=data, headers=headers,
+                    fatal=fatal, note=note, errnote=errnote)
             except ExtractorError as e:
                 if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:
                     message = str(e.cause or e.msg)
@@ -204,12 +208,13 @@ def _call_api(self, ep, query, video_id, fatal=True,
                 raise
 
     def _extract_aweme_app(self, aweme_id):
-        feed_list = self._call_api(
-            'feed', {'aweme_id': aweme_id}, aweme_id, note='Downloading video feed',
-            errnote='Unable to download video feed').get('aweme_list') or []
-        aweme_detail = next((aweme for aweme in feed_list if str(aweme.get('aweme_id')) == aweme_id), None)
+        aweme_detail = traverse_obj(
+            self._call_api('multi/aweme/detail', aweme_id, data=urlencode_postdata({
+                'aweme_ids': f'[{aweme_id}]',
+                'request_source': '0',
+            }), headers={'X-Argus': ''}), ('aweme_details', 0, {dict}))
         if not aweme_detail:
-            raise ExtractorError('Unable to find video in feed', video_id=aweme_id)
+            raise ExtractorError('Unable to extract aweme detail info', video_id=aweme_id)
         return self._parse_aweme_video_app(aweme_detail)
 
     def _extract_web_data_and_status(self, url, video_id, fatal=True):
@@ -1037,7 +1042,8 @@ def _entries(self, list_id, display_id):
             for retry in self.RetryManager():
                 try:
                     post_list = self._call_api(
-                        self._API_ENDPOINT, query, display_id, note=f'Downloading video list page {page}',
+                        self._API_ENDPOINT, display_id, query=query,
+                        note=f'Downloading video list page {page}',
                         errnote='Unable to download video list')
                 except ExtractorError as e:
                     if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:

From 800ec085ccf98420584d8bb38c20a2c079669b09 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Fri, 21 Jun 2024 18:19:59 -0500
Subject: [PATCH 19/48] [ie/youtube] Skip formats if nsig decoding fails
 (#10223)

Ref: https://github.com/ytdl-org/youtube-dl/issues/32815

Authored by: bashonly
---
 yt_dlp/extractor/youtube.py | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index ab6201dae6..7aa84aa8b5 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -3847,6 +3847,13 @@ def build_fragments(f):
                     itag_qualities[itag] = quality
                 if height:
                     res_qualities[height] = quality
+
+            is_default = audio_track.get('audioIsDefault')
+            is_descriptive = 'descriptive' in (audio_track.get('displayName') or '').lower()
+            language_code = audio_track.get('id', '').split('.')[0]
+            if language_code and is_default:
+                original_language = language_code
+
             # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
             # (adding `&sq=0` to the URL) and parsing emsg box to determine the
             # number of fragment that would subsequently requested with (`&sq=N`)
@@ -3872,7 +3879,6 @@ def build_fragments(f):
                     continue
 
             query = parse_qs(fmt_url)
-            throttled = False
             if query.get('n'):
                 try:
                     decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
@@ -3886,22 +3892,16 @@ def build_fragments(f):
                                           f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
                     if player_url:
                         self.report_warning(
-                            f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
+                            f'nsig extraction failed: Some formats may be missing\n{phantomjs_hint}'
                             f'         n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
                         self.write_debug(e, only_once=True)
                     else:
                         self.report_warning(
-                            'Cannot decrypt nsig without player_url: You may experience throttling for some formats',
+                            'Cannot decrypt nsig without player_url: Some formats may be missing',
                             video_id=video_id, only_once=True)
-                    throttled = True
+                    continue
 
             tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
-            is_default = audio_track.get('audioIsDefault')
-            is_descriptive = 'descriptive' in (audio_track.get('displayName') or '').lower()
-            language_code = audio_track.get('id', '').split('.')[0]
-            if language_code and is_default:
-                original_language = language_code
-
             format_duration = traverse_obj(fmt, ('approxDurationMs', {lambda x: float_or_none(x, 1000)}))
             # Some formats may have much smaller duration than others (possibly damaged during encoding)
             # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
@@ -3932,12 +3932,11 @@ def build_fragments(f):
                     name, fmt.get('isDrc') and 'DRC',
                     try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
                     try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
-                    throttled and 'THROTTLED', is_damaged and 'DAMAGED', is_broken and 'BROKEN',
+                    is_damaged and 'DAMAGED', is_broken and 'BROKEN',
                     (self.get_param('verbose') or all_formats) and client_name,
                     delim=', '),
                 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
-                'source_preference': ((-10 if throttled else -5 if itag == '22' else -1)
-                                      + (100 if 'Premium' in name else 0)),
+                'source_preference': (-5 if itag == '22' else -1) + (100 if 'Premium' in name else 0),
                 'fps': fps if fps > 1 else None,  # For some formats, fps is wrongly returned as 1
                 'audio_channels': fmt.get('audioChannels'),
                 'height': height,
@@ -4357,7 +4356,7 @@ def is_bad_format(fmt):
             'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
             'live_status': live_status,
             'release_timestamp': live_start_time,
-            '_format_sort_fields': (  # source_preference is lower for throttled/potentially damaged formats
+            '_format_sort_fields': (  # source_preference is lower for potentially damaged formats
                 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'),
         }
 

From 8ca1d57ed08d00efa117820a5a82f763b20e2d1d Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Fri, 21 Jun 2024 18:21:45 -0500
Subject: [PATCH 20/48] [ie/facebook:reel] Fix extraction (#10232)

Closes #10227
Authored by: bashonly
---
 yt_dlp/extractor/facebook.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py
index 1d1e0770a6..a3ca291fca 100644
--- a/yt_dlp/extractor/facebook.py
+++ b/yt_dlp/extractor/facebook.py
@@ -621,6 +621,9 @@ def parse_graphql_video(video):
                                 'url': playable_url,
                             })
                     extract_dash_manifest(video, formats)
+                    if not formats:
+                        # Do not append false positive entry w/o any formats
+                        return
 
                     automatic_captions, subtitles = {}, {}
                     is_broadcast = traverse_obj(video, ('is_video_broadcast', {bool}))

From a8520244b8642880e4d35925e9e49eff94d548de Mon Sep 17 00:00:00 2001
From: Peisen Wang <wangps@mail.ustc.edu.cn>
Date: Sun, 23 Jun 2024 07:25:16 +0800
Subject: [PATCH 21/48] [cookies] Fix `--cookies-from-browser` DE detection on
 Linux (#10237)

Align with chromium source by parsing every part of `XDG_CURRENT_DESKTOP`

Authored by: peisenwang
---
 test/test_cookies.py |  1 +
 yt_dlp/cookies.py    | 64 +++++++++++++++++++++-----------------------
 2 files changed, 32 insertions(+), 33 deletions(-)

diff --git a/test/test_cookies.py b/test/test_cookies.py
index a682fee1d3..e1271f67eb 100644
--- a/test/test_cookies.py
+++ b/test/test_cookies.py
@@ -67,6 +67,7 @@ def test_get_desktop_environment(self):
             ({'XDG_CURRENT_DESKTOP': 'GNOME'}, _LinuxDesktopEnvironment.GNOME),
             ({'XDG_CURRENT_DESKTOP': 'GNOME:GNOME-Classic'}, _LinuxDesktopEnvironment.GNOME),
             ({'XDG_CURRENT_DESKTOP': 'GNOME : GNOME-Classic'}, _LinuxDesktopEnvironment.GNOME),
+            ({'XDG_CURRENT_DESKTOP': 'ubuntu:GNOME'}, _LinuxDesktopEnvironment.GNOME),
 
             ({'XDG_CURRENT_DESKTOP': 'Unity', 'DESKTOP_SESSION': 'gnome-fallback'}, _LinuxDesktopEnvironment.GNOME),
             ({'XDG_CURRENT_DESKTOP': 'KDE', 'KDE_SESSION_VERSION': '5'}, _LinuxDesktopEnvironment.KDE5),
diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py
index 0850ad2600..d07269a677 100644
--- a/yt_dlp/cookies.py
+++ b/yt_dlp/cookies.py
@@ -740,40 +740,38 @@ def _get_linux_desktop_environment(env, logger):
     xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
     desktop_session = env.get('DESKTOP_SESSION', None)
     if xdg_current_desktop is not None:
-        xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
-
-        if xdg_current_desktop == 'Unity':
-            if desktop_session is not None and 'gnome-fallback' in desktop_session:
+        for part in map(str.strip, xdg_current_desktop.split(':')):
+            if part == 'Unity':
+                if desktop_session is not None and 'gnome-fallback' in desktop_session:
+                    return _LinuxDesktopEnvironment.GNOME
+                else:
+                    return _LinuxDesktopEnvironment.UNITY
+            elif part == 'Deepin':
+                return _LinuxDesktopEnvironment.DEEPIN
+            elif part == 'GNOME':
                 return _LinuxDesktopEnvironment.GNOME
-            else:
-                return _LinuxDesktopEnvironment.UNITY
-        elif xdg_current_desktop == 'Deepin':
-            return _LinuxDesktopEnvironment.DEEPIN
-        elif xdg_current_desktop == 'GNOME':
-            return _LinuxDesktopEnvironment.GNOME
-        elif xdg_current_desktop == 'X-Cinnamon':
-            return _LinuxDesktopEnvironment.CINNAMON
-        elif xdg_current_desktop == 'KDE':
-            kde_version = env.get('KDE_SESSION_VERSION', None)
-            if kde_version == '5':
-                return _LinuxDesktopEnvironment.KDE5
-            elif kde_version == '6':
-                return _LinuxDesktopEnvironment.KDE6
-            elif kde_version == '4':
-                return _LinuxDesktopEnvironment.KDE4
-            else:
-                logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4')
-                return _LinuxDesktopEnvironment.KDE4
-        elif xdg_current_desktop == 'Pantheon':
-            return _LinuxDesktopEnvironment.PANTHEON
-        elif xdg_current_desktop == 'XFCE':
-            return _LinuxDesktopEnvironment.XFCE
-        elif xdg_current_desktop == 'UKUI':
-            return _LinuxDesktopEnvironment.UKUI
-        elif xdg_current_desktop == 'LXQt':
-            return _LinuxDesktopEnvironment.LXQT
-        else:
-            logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')
+            elif part == 'X-Cinnamon':
+                return _LinuxDesktopEnvironment.CINNAMON
+            elif part == 'KDE':
+                kde_version = env.get('KDE_SESSION_VERSION', None)
+                if kde_version == '5':
+                    return _LinuxDesktopEnvironment.KDE5
+                elif kde_version == '6':
+                    return _LinuxDesktopEnvironment.KDE6
+                elif kde_version == '4':
+                    return _LinuxDesktopEnvironment.KDE4
+                else:
+                    logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4')
+                    return _LinuxDesktopEnvironment.KDE4
+            elif part == 'Pantheon':
+                return _LinuxDesktopEnvironment.PANTHEON
+            elif part == 'XFCE':
+                return _LinuxDesktopEnvironment.XFCE
+            elif part == 'UKUI':
+                return _LinuxDesktopEnvironment.UKUI
+            elif part == 'LXQt':
+                return _LinuxDesktopEnvironment.LXQT
+        logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')
 
     elif desktop_session is not None:
         if desktop_session == 'deepin':

From f3411af12e209bc5624e1ac31271b8aabe2d3c90 Mon Sep 17 00:00:00 2001
From: megumin <34505936+megumintyan@users.noreply.github.com>
Date: Tue, 25 Jun 2024 01:49:09 +0300
Subject: [PATCH 22/48] [ie/matchtv] Fix extractor (#10190)

Authored by: megumintyan
---
 yt_dlp/extractor/matchtv.py | 40 +++++++++++--------------------------
 1 file changed, 12 insertions(+), 28 deletions(-)

diff --git a/yt_dlp/extractor/matchtv.py b/yt_dlp/extractor/matchtv.py
index a67fa9fe4c..93799fe859 100644
--- a/yt_dlp/extractor/matchtv.py
+++ b/yt_dlp/extractor/matchtv.py
@@ -1,51 +1,35 @@
-import random
-
 from .common import InfoExtractor
-from ..utils import xpath_text
 
 
 class MatchTVIE(InfoExtractor):
-    _VALID_URL = r'https?://matchtv\.ru(?:/on-air|/?#live-player)'
+    _VALID_URL = [
+        r'https?://matchtv\.ru/on-air/?(?:$|[?#])',
+        r'https?://video\.matchtv\.ru/iframe/channel/106/?(?:$|[?#])',
+    ]
     _TESTS = [{
-        'url': 'http://matchtv.ru/#live-player',
+        'url': 'http://matchtv.ru/on-air/',
         'info_dict': {
             'id': 'matchtv-live',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': r're:^Матч ТВ - Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
-            'is_live': True,
+            'live_status': 'is_live',
         },
         'params': {
             'skip_download': True,
         },
     }, {
-        'url': 'http://matchtv.ru/on-air/',
+        'url': 'https://video.matchtv.ru/iframe/channel/106',
         'only_matching': True,
     }]
 
     def _real_extract(self, url):
         video_id = 'matchtv-live'
-        video_url = self._download_json(
-            'http://player.matchtv.ntvplus.tv/player/smil', video_id,
-            query={
-                'ts': '',
-                'quality': 'SD',
-                'contentId': '561d2c0df7159b37178b4567',
-                'sign': '',
-                'includeHighlights': '0',
-                'userId': '',
-                'sessionId': random.randint(1, 1000000000),
-                'contentType': 'channel',
-                'timeShift': '0',
-                'platform': 'portal',
-            },
-            headers={
-                'Referer': 'http://player.matchtv.ntvplus.tv/embed-player/NTVEmbedPlayer.swf',
-            })['data']['videoUrl']
-        f4m_url = xpath_text(self._download_xml(video_url, video_id), './to')
-        formats = self._extract_f4m_formats(f4m_url, video_id)
+        webpage = self._download_webpage('https://video.matchtv.ru/iframe/channel/106', video_id)
+        video_url = self._html_search_regex(
+            r'data-config="config=(https?://[^?"]+)[?"]', webpage, 'video URL').replace('/feed/', '/media/') + '.m3u8'
         return {
             'id': video_id,
             'title': 'Матч ТВ - Прямой эфир',
             'is_live': True,
-            'formats': formats,
+            'formats': self._extract_m3u8_formats(video_url, video_id, 'mp4', live=True),
         }

From b758877afa225747fba81c8a580e27583a231734 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A6sim?= <caesim404@gmail.com>
Date: Thu, 27 Jun 2024 02:56:44 +0300
Subject: [PATCH 23/48] [ie/cloudycdn] Fix formats extraction (#10271)

Authored by: Caesim404
---
 yt_dlp/extractor/cloudycdn.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/yt_dlp/extractor/cloudycdn.py b/yt_dlp/extractor/cloudycdn.py
index 58bde46663..6e757d79ee 100644
--- a/yt_dlp/extractor/cloudycdn.py
+++ b/yt_dlp/extractor/cloudycdn.py
@@ -1,3 +1,5 @@
+import re
+
 from .common import InfoExtractor
 from ..utils import (
     int_or_none,
@@ -35,6 +37,20 @@ class CloudyCDNIE(InfoExtractor):
             'duration': 1205,
             'upload_date': '20221130',
         },
+    }, {
+        # Video-only m3u8 formats need manual fixup
+        'url': 'https://embed.cloudycdn.services/ltv/media/08j_d24-6000-074',
+        'md5': 'fc472e40f6e6238446509be411c920e2',
+        'info_dict': {
+            'id': '08j_d24-6000-074',
+            'ext': 'mp4',
+            'upload_date': '20240620',
+            'duration': 1673,
+            'title': 'D24-6000-074-cetstud',
+            'timestamp': 1718902233,
+            'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/788392/placeholder1718903938.jpg',
+        },
+        'params': {'format': 'bv'},
     }]
     _WEBPAGE_TESTS = [{
         'url': 'https://www.tavaklase.lv/video/es-esmu-mina-um-2/',
@@ -63,6 +79,9 @@ def _real_extract(self, url):
         formats, subtitles = [], {}
         for m3u8_url in traverse_obj(data, ('source', 'sources', ..., 'src', {url_or_none})):
             fmts, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, fatal=False)
+            for fmt in fmts:
+                if re.search(r'chunklist_b\d+_vo_', fmt['url']):
+                    fmt['acodec'] = 'none'
             formats.extend(fmts)
             self._merge_subtitles(subs, target=subtitles)
 

From 0953209a857c51648aee89d205c086b0e1dd3864 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Wed, 26 Jun 2024 18:57:34 -0500
Subject: [PATCH 24/48] [ie/mediasite] Fix extraction (#10273)

Fix regression in add96eb9f84cfffe85682bf2fb85135746994ee8

Closes #10270
Authored by: bashonly
---
 yt_dlp/extractor/mediasite.py | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/yt_dlp/extractor/mediasite.py b/yt_dlp/extractor/mediasite.py
index beb12f8a40..ad7ab27e28 100644
--- a/yt_dlp/extractor/mediasite.py
+++ b/yt_dlp/extractor/mediasite.py
@@ -15,6 +15,7 @@
     url_or_none,
     urljoin,
 )
+from ..utils.traversal import traverse_obj
 
 _ID_RE = r'(?:[0-9a-f]{32,34}|[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12,14})'
 
@@ -212,13 +213,14 @@ def _real_extract(self, url):
                 stream_type, 'type%u' % stream_type)
 
             stream_formats = []
-            for unum, video_url in enumerate(video_urls):
-                video_url = url_or_none(video_url.get('Location'))
+            for unum, video in enumerate(video_urls):
+                video_url = url_or_none(video.get('Location'))
                 if not video_url:
                     continue
                 # XXX: if Stream.get('CanChangeScheme', False), switch scheme to HTTP/HTTPS
 
-                media_type = video_url.get('MediaType')
+                media_type = video.get('MediaType')
+                ext = mimetype2ext(video.get('MimeType'))
                 if media_type == 'SS':
                     stream_formats.extend(self._extract_ism_formats(
                         video_url, resource_id,
@@ -229,15 +231,20 @@ def _real_extract(self, url):
                         video_url, resource_id,
                         mpd_id=f'{stream_id}-{snum}.{unum}',
                         fatal=False))
+                elif ext in ('m3u', 'm3u8'):
+                    stream_formats.extend(self._extract_m3u8_formats(
+                        video_url, resource_id,
+                        m3u8_id=f'{stream_id}-{snum}.{unum}',
+                        fatal=False))
                 else:
                     stream_formats.append({
                         'format_id': f'{stream_id}-{snum}.{unum}',
                         'url': video_url,
-                        'ext': mimetype2ext(video_url.get('MimeType')),
+                        'ext': ext,
                     })
 
-            if stream.get('HasSlideContent', False):
-                images = player_options['PlayerLayoutOptions']['Images']
+            images = traverse_obj(player_options, ('PlayerLayoutOptions', 'Images', {dict}))
+            if stream.get('HasSlideContent') and images:
                 stream_formats.append(self.__extract_slides(
                     stream_id=stream_id,
                     snum=snum,

From f2a4ea1794718e4dc0148bc172cb877f1080903b Mon Sep 17 00:00:00 2001
From: Simon Sawicki <contact@grub4k.xyz>
Date: Thu, 27 Jun 2024 16:12:19 +0200
Subject: [PATCH 25/48] [pp/embedthumbnail] Fix postprocessor (#10248)

* [compat] Improve `imghdr.what` detection
* [pp/embedthumbnail] Improve imghdr fail message
* [pp/embedthumbnail] Fix AtomicParsley error handling

Authored by: Grub4K
---
 yt_dlp/compat/imghdr.py                | 26 ++++++++++++++++----------
 yt_dlp/postprocessor/embedthumbnail.py | 18 +++++++++++++-----
 2 files changed, 29 insertions(+), 15 deletions(-)

diff --git a/yt_dlp/compat/imghdr.py b/yt_dlp/compat/imghdr.py
index 5d64ab07bc..4ae173fdec 100644
--- a/yt_dlp/compat/imghdr.py
+++ b/yt_dlp/compat/imghdr.py
@@ -1,16 +1,22 @@
-tests = {
-    'webp': lambda h: h[0:4] == b'RIFF' and h[8:] == b'WEBP',
-    'png': lambda h: h[:8] == b'\211PNG\r\n\032\n',
-    'jpeg': lambda h: h[6:10] in (b'JFIF', b'Exif'),
-    'gif': lambda h: h[:6] in (b'GIF87a', b'GIF89a'),
-}
-
-
 def what(file=None, h=None):
     """Detect format of image (Currently supports jpeg, png, webp, gif only)
-    Ref: https://github.com/python/cpython/blob/3.10/Lib/imghdr.py
+    Ref: https://github.com/python/cpython/blob/3.11/Lib/imghdr.py
+    Ref: https://www.w3.org/Graphics/JPEG/itu-t81.pdf
     """
     if h is None:
         with open(file, 'rb') as f:
             h = f.read(12)
-    return next((type_ for type_, test in tests.items() if test(h)), None)
+
+    if h.startswith(b'RIFF') and h.startswith(b'WEBP', 8):
+        return 'webp'
+
+    if h.startswith(b'\x89PNG'):
+        return 'png'
+
+    if h.startswith(b'\xFF\xD8\xFF'):
+        return 'jpeg'
+
+    if h.startswith(b'GIF'):
+        return 'gif'
+
+    return None
diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py
index 673a924685..f2228ac61e 100644
--- a/yt_dlp/postprocessor/embedthumbnail.py
+++ b/yt_dlp/postprocessor/embedthumbnail.py
@@ -119,14 +119,21 @@ def run(self, info):
             if not mutagen or prefer_atomicparsley:
                 success = False
             else:
+                self._report_run('mutagen', filename)
+                f = {'jpeg': MP4Cover.FORMAT_JPEG, 'png': MP4Cover.FORMAT_PNG}
                 try:
-                    self._report_run('mutagen', filename)
+                    with open(thumbnail_filename, 'rb') as thumbfile:
+                        thumb_data = thumbfile.read()
+
+                    type_ = imghdr.what(h=thumb_data)
+                    if not type_:
+                        raise ValueError('could not determine image type')
+                    elif type_ not in f:
+                        raise ValueError(f'incompatible image type: {type_}')
+
                     meta = MP4(filename)
                     # NOTE: the 'covr' atom is a non-standard MPEG-4 atom,
                     # Apple iTunes 'M4A' files include the 'moov.udta.meta.ilst' atom.
-                    f = {'jpeg': MP4Cover.FORMAT_JPEG, 'png': MP4Cover.FORMAT_PNG}[imghdr.what(thumbnail_filename)]
-                    with open(thumbnail_filename, 'rb') as thumbfile:
-                        thumb_data = thumbfile.read()
                     meta.tags['covr'] = [MP4Cover(data=thumb_data, imageformat=f)]
                     meta.save()
                     temp_filename = filename
@@ -160,9 +167,10 @@ def run(self, info):
                     stdout, stderr, returncode = Popen.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                     if returncode:
                         self.report_warning(f'Unable to embed thumbnails using AtomicParsley; {stderr.strip()}')
+                        success = False
                     # for formats that don't support thumbnails (like 3gp) AtomicParsley
                     # won't create to the temporary file
-                    if 'No changes' in stdout:
+                    elif 'No changes' in stdout:
                         self.report_warning('The file format doesn\'t support embedding a thumbnail')
                         success = False
 

From 7a03f88c40b80d3cf54f68edd9d4bdd6aa527570 Mon Sep 17 00:00:00 2001
From: hafeoz <me@zony.dev>
Date: Thu, 27 Jun 2024 16:17:32 +0000
Subject: [PATCH 26/48] [ie/neteasemusic] Extract more formats from new API
 (#10258)

Closes #9196, Closes #10239
Authored by: hafeoz
---
 yt_dlp/extractor/neteasemusic.py | 188 ++++++++++++++++++-------------
 1 file changed, 109 insertions(+), 79 deletions(-)

diff --git a/yt_dlp/extractor/neteasemusic.py b/yt_dlp/extractor/neteasemusic.py
index dd50efe51a..a759da2147 100644
--- a/yt_dlp/extractor/neteasemusic.py
+++ b/yt_dlp/extractor/neteasemusic.py
@@ -22,12 +22,22 @@
 
 
 class NetEaseMusicBaseIE(InfoExtractor):
-    _FORMATS = ['bMusic', 'mMusic', 'hMusic']
+    # XXX: _extract_formats logic depends on the order of the levels in each tier
+    _LEVELS = (
+        'standard',  # free tier; 标准; 128kbps mp3 or aac
+        'higher',    # free tier; 192kbps mp3 or aac
+        'exhigh',    # free tier; 极高 (HQ); 320kbps mp3 or aac
+        'lossless',  # VIP  tier; 无损 (SQ); 48kHz/16bit flac
+        'hires',     # VIP  tier; 高解析度无损 (Hi-Res); 192kHz/24bit flac
+        'jyeffect',  # VIP  tier; 高清臻音 (Spatial Audio); 96kHz/24bit flac
+        'jymaster',  # SVIP tier; 超清母带 (Master); 192kHz/24bit flac
+        'sky',       # SVIP tier; 沉浸环绕声 (Surround Audio); flac
+    )
     _API_BASE = 'http://music.163.com/api/'
     _GEO_BYPASS = False
 
     @staticmethod
-    def kilo_or_none(value):
+    def _kilo_or_none(value):
         return int_or_none(value, scale=1000)
 
     def _create_eapi_cipher(self, api_path, query_body, cookies):
@@ -66,45 +76,43 @@ def _download_eapi_json(self, path, video_id, query_body, headers={}, **kwargs):
                 **headers,
             }, **kwargs)
 
-    def _call_player_api(self, song_id, bitrate):
+    def _call_player_api(self, song_id, level):
         return self._download_eapi_json(
-            '/song/enhance/player/url', song_id, {'ids': f'[{song_id}]', 'br': bitrate},
-            note=f'Downloading song URL info: bitrate {bitrate}')
+            '/song/enhance/player/url/v1', song_id,
+            {'ids': f'[{song_id}]', 'level': level, 'encodeType': 'flac'},
+            note=f'Downloading song URL info: level {level}')
 
-    def extract_formats(self, info):
-        err = 0
+    def _extract_formats(self, info):
         formats = []
         song_id = info['id']
-        for song_format in self._FORMATS:
-            details = info.get(song_format)
-            if not details:
+        for level in self._LEVELS:
+            song = traverse_obj(
+                self._call_player_api(song_id, level), ('data', lambda _, v: url_or_none(v['url']), any))
+            if not song:
+                break  # Media is not available due to removal or geo-restriction
+            actual_level = song.get('level')
+            if actual_level and actual_level != level:
+                if level in ('lossless', 'jymaster'):
+                    break  # We've already extracted the highest level of the user's account tier
                 continue
-            bitrate = int_or_none(details.get('bitrate')) or 999000
-            for song in traverse_obj(self._call_player_api(song_id, bitrate), ('data', lambda _, v: url_or_none(v['url']))):
-                song_url = song['url']
-                if self._is_valid_url(song_url, info['id'], 'song'):
-                    formats.append({
-                        'url': song_url,
-                        'format_id': song_format,
-                        'asr': traverse_obj(details, ('sr', {int_or_none})),
-                        **traverse_obj(song, {
-                            'ext': ('type', {str}),
-                            'abr': ('br', {self.kilo_or_none}),
-                            'filesize': ('size', {int_or_none}),
-                        }),
-                    })
-                elif err == 0:
-                    err = traverse_obj(song, ('code', {int})) or 0
-
+            formats.append({
+                'url': song['url'],
+                'format_id': level,
+                'vcodec': 'none',
+                **traverse_obj(song, {
+                    'ext': ('type', {str}),
+                    'abr': ('br', {self._kilo_or_none}),
+                    'filesize': ('size', {int_or_none}),
+                }),
+            })
+            if not actual_level:
+                break  # Only 1 level is available if API does not return a value (netease:program)
         if not formats:
-            if err != 0 and (err < 200 or err >= 400):
-                raise ExtractorError(f'No media links found (site code {err})', expected=True)
-            else:
-                self.raise_geo_restricted(
-                    'No media links found: probably due to geo restriction.', countries=['CN'])
+            self.raise_geo_restricted(
+                'No media links found; possibly due to geo restriction', countries=['CN'])
         return formats
 
-    def query_api(self, endpoint, video_id, note):
+    def _query_api(self, endpoint, video_id, note):
         result = self._download_json(
             f'{self._API_BASE}{endpoint}', video_id, note, headers={'Referer': self._API_BASE})
         code = traverse_obj(result, ('code', {int}))
@@ -128,32 +136,29 @@ def _get_entries(self, songs_data, entry_keys=None, id_key='id', name_key='name'
 class NetEaseMusicIE(NetEaseMusicBaseIE):
     IE_NAME = 'netease:song'
     IE_DESC = '网易云音乐'
-    _VALID_URL = r'https?://(y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)'
     _TESTS = [{
-        'url': 'https://music.163.com/#/song?id=548648087',
+        'url': 'https://music.163.com/#/song?id=550136151',
         'info_dict': {
-            'id': '548648087',
+            'id': '550136151',
             'ext': 'mp3',
-            'title': '戒烟 (Live)',
-            'creator': '李荣浩 / 朱正廷 / 陈立农 / 尤长靖 / ONER灵超 / ONER木子洋 / 杨非同 / 陆定昊',
+            'title': 'It\'s Ok (Live)',
+            'creators': 'count:10',
             'timestamp': 1522944000,
             'upload_date': '20180405',
-            'description': 'md5:3650af9ee22c87e8637cb2dde22a765c',
-            'subtitles': {'lyrics': [{'ext': 'lrc'}]},
-            'duration': 256,
+            'description': 'md5:9fd07059c2ccee3950dc8363429a3135',
+            'duration': 197,
             'thumbnail': r're:^http.*\.jpg',
             'album': '偶像练习生 表演曲目合集',
             'average_rating': int,
-            'album_artist': '偶像练习生',
+            'album_artists': ['偶像练习生'],
         },
     }, {
-        'note': 'No lyrics.',
         'url': 'http://music.163.com/song?id=17241424',
         'info_dict': {
             'id': '17241424',
             'ext': 'mp3',
             'title': 'Opus 28',
-            'creator': 'Dustin O\'Halloran',
             'upload_date': '20080211',
             'timestamp': 1202745600,
             'duration': 263,
@@ -161,15 +166,18 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
             'album': 'Piano Solos Vol. 2',
             'album_artist': 'Dustin O\'Halloran',
             'average_rating': int,
+            'description': '[00:05.00]纯音乐，请欣赏\n',
+            'album_artists': ['Dustin O\'Halloran'],
+            'creators': ['Dustin O\'Halloran'],
+            'subtitles': {'lyrics': [{'ext': 'lrc'}]},
         },
     }, {
         'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846',
-        'md5': '95826c73ea50b1c288b22180ec9e754d',
+        'md5': 'b896be78d8d34bd7bb665b26710913ff',
         'info_dict': {
             'id': '95670',
             'ext': 'mp3',
             'title': '国际歌',
-            'creator': '马备',
             'upload_date': '19911130',
             'timestamp': 691516800,
             'description': 'md5:1ba2f911a2b0aa398479f595224f2141',
@@ -180,6 +188,8 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
             'average_rating': int,
             'album': '红色摇滚',
             'album_artist': '侯牧人',
+            'creators': ['马备'],
+            'album_artists': ['侯牧人'],
         },
     }, {
         'url': 'http://music.163.com/#/song?id=32102397',
@@ -188,7 +198,7 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
             'id': '32102397',
             'ext': 'mp3',
             'title': 'Bad Blood',
-            'creator': 'Taylor Swift / Kendrick Lamar',
+            'creators': ['Taylor Swift', 'Kendrick Lamar'],
             'upload_date': '20150516',
             'timestamp': 1431792000,
             'description': 'md5:21535156efb73d6d1c355f95616e285a',
@@ -207,7 +217,7 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
             'id': '22735043',
             'ext': 'mp3',
             'title': '소원을 말해봐 (Genie)',
-            'creator': '少女时代',
+            'creators': ['少女时代'],
             'upload_date': '20100127',
             'timestamp': 1264608000,
             'description': 'md5:03d1ffebec3139aa4bafe302369269c5',
@@ -251,12 +261,12 @@ def _process_lyrics(self, lyrics_info):
     def _real_extract(self, url):
         song_id = self._match_id(url)
 
-        info = self.query_api(
+        info = self._query_api(
             f'song/detail?id={song_id}&ids=%5B{song_id}%5D', song_id, 'Downloading song info')['songs'][0]
 
-        formats = self.extract_formats(info)
+        formats = self._extract_formats(info)
 
-        lyrics = self._process_lyrics(self.query_api(
+        lyrics = self._process_lyrics(self._query_api(
             f'song/lyric?id={song_id}&lv=-1&tv=-1', song_id, 'Downloading lyrics data'))
         lyric_data = {
             'description': traverse_obj(lyrics, (('lyrics_merged', 'lyrics'), 0, 'data'), get_all=False),
@@ -267,14 +277,14 @@ def _real_extract(self, url):
             'id': song_id,
             'formats': formats,
             'alt_title': '/'.join(traverse_obj(info, (('transNames', 'alias'), ...))) or None,
-            'creator': ' / '.join(traverse_obj(info, ('artists', ..., 'name'))) or None,
-            'album_artist': ' / '.join(traverse_obj(info, ('album', 'artists', ..., 'name'))) or None,
+            'creators': traverse_obj(info, ('artists', ..., 'name')) or None,
+            'album_artists': traverse_obj(info, ('album', 'artists', ..., 'name')) or None,
             **lyric_data,
             **traverse_obj(info, {
                 'title': ('name', {str}),
-                'timestamp': ('album', 'publishTime', {self.kilo_or_none}),
+                'timestamp': ('album', 'publishTime', {self._kilo_or_none}),
                 'thumbnail': ('album', 'picUrl', {url_or_none}),
-                'duration': ('duration', {self.kilo_or_none}),
+                'duration': ('duration', {self._kilo_or_none}),
                 'album': ('album', 'name', {str}),
                 'average_rating': ('score', {int_or_none}),
             }),
@@ -284,7 +294,7 @@ def _real_extract(self, url):
 class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
     IE_NAME = 'netease:album'
     IE_DESC = '网易云音乐 - 专辑'
-    _VALID_URL = r'https?://music\.163\.com/(#/)?album\?id=(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://music\.163\.com/(?:#/)?album\?id=(?P<id>[0-9]+)'
     _TESTS = [{
         'url': 'https://music.163.com/#/album?id=133153666',
         'info_dict': {
@@ -294,7 +304,7 @@ class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
             'description': '桃几2021年翻唱合集',
             'thumbnail': r're:^http.*\.jpg',
         },
-        'playlist_mincount': 13,
+        'playlist_mincount': 12,
     }, {
         'url': 'http://music.163.com/#/album?id=220780',
         'info_dict': {
@@ -328,7 +338,7 @@ def _real_extract(self, url):
 class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
     IE_NAME = 'netease:singer'
     IE_DESC = '网易云音乐 - 歌手'
-    _VALID_URL = r'https?://music\.163\.com/(#/)?artist\?id=(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://music\.163\.com/(?:#/)?artist\?id=(?P<id>[0-9]+)'
     _TESTS = [{
         'note': 'Singer has aliases.',
         'url': 'http://music.163.com/#/artist?id=10559',
@@ -358,7 +368,7 @@ class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
     def _real_extract(self, url):
         singer_id = self._match_id(url)
 
-        info = self.query_api(
+        info = self._query_api(
             f'artist/{singer_id}?id={singer_id}', singer_id, note='Downloading singer data')
 
         name = join_nonempty(
@@ -372,7 +382,7 @@ def _real_extract(self, url):
 class NetEaseMusicListIE(NetEaseMusicBaseIE):
     IE_NAME = 'netease:playlist'
     IE_DESC = '网易云音乐 - 歌单'
-    _VALID_URL = r'https?://music\.163\.com/(#/)?(playlist|discover/toplist)\?id=(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://music\.163\.com/(?:#/)?(?:playlist|discover/toplist)\?id=(?P<id>[0-9]+)'
     _TESTS = [{
         'url': 'http://music.163.com/#/playlist?id=79177352',
         'info_dict': {
@@ -405,11 +415,15 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE):
         'url': 'http://music.163.com/#/discover/toplist?id=3733003',
         'info_dict': {
             'id': '3733003',
-            'title': 're:韩国Melon排行榜周榜 [0-9]{4}-[0-9]{2}-[0-9]{2}',
+            'title': 're:韩国Melon排行榜周榜(?: [0-9]{4}-[0-9]{2}-[0-9]{2})?',
             'description': 'md5:73ec782a612711cadc7872d9c1e134fc',
+            'upload_date': '20200109',
+            'uploader_id': '2937386',
+            'tags': ['韩语', '榜单'],
+            'uploader': 'Melon榜单',
+            'timestamp': 1578569373,
         },
         'playlist_count': 50,
-        'skip': 'Blocked outside Mainland China',
     }]
 
     def _real_extract(self, url):
@@ -426,7 +440,7 @@ def _real_extract(self, url):
             'tags': ('tags', ..., {str}),
             'uploader': ('creator', 'nickname', {str}),
             'uploader_id': ('creator', 'userId', {str_or_none}),
-            'timestamp': ('updateTime', {self.kilo_or_none}),
+            'timestamp': ('updateTime', {self._kilo_or_none}),
         }))
         if traverse_obj(info, ('playlist', 'specialType')) == 10:
             metainfo['title'] = f'{metainfo.get("title")} {strftime_or_none(metainfo.get("timestamp"), "%Y-%m-%d")}'
@@ -437,7 +451,7 @@ def _real_extract(self, url):
 class NetEaseMusicMvIE(NetEaseMusicBaseIE):
     IE_NAME = 'netease:mv'
     IE_DESC = '网易云音乐 - MV'
-    _VALID_URL = r'https?://music\.163\.com/(#/)?mv\?id=(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://music\.163\.com/(?:#/)?mv\?id=(?P<id>[0-9]+)'
     _TESTS = [{
         'url': 'https://music.163.com/#/mv?id=10958064',
         'info_dict': {
@@ -445,7 +459,7 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
             'ext': 'mp4',
             'title': '交换余生',
             'description': 'md5:e845872cff28820642a2b02eda428fea',
-            'creator': '林俊杰',
+            'creators': ['林俊杰'],
             'upload_date': '20200916',
             'thumbnail': r're:http.*\.jpg',
             'duration': 364,
@@ -460,7 +474,7 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
             'ext': 'mp4',
             'title': '이럴거면 그러지말지',
             'description': '白雅言自作曲唱甜蜜爱情',
-            'creator': '白娥娟',
+            'creators': ['白娥娟'],
             'upload_date': '20150520',
             'thumbnail': r're:http.*\.jpg',
             'duration': 216,
@@ -468,12 +482,28 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
             'like_count': int,
             'comment_count': int,
         },
+        'skip': 'Blocked outside Mainland China',
+    }, {
+        'note': 'This MV has multiple creators.',
+        'url': 'https://music.163.com/#/mv?id=22593543',
+        'info_dict': {
+            'id': '22593543',
+            'ext': 'mp4',
+            'title': '老北京杀器',
+            'creators': ['秃子2z', '辉子', 'Saber梁维嘉'],
+            'duration': 206,
+            'upload_date': '20240618',
+            'like_count': int,
+            'comment_count': int,
+            'thumbnail': r're:http.*\.jpg',
+            'view_count': int,
+        },
     }]
 
     def _real_extract(self, url):
         mv_id = self._match_id(url)
 
-        info = self.query_api(
+        info = self._query_api(
             f'mv/detail?id={mv_id}&type=mp4', mv_id, 'Downloading mv info')['data']
 
         formats = [
@@ -484,13 +514,13 @@ def _real_extract(self, url):
         return {
             'id': mv_id,
             'formats': formats,
+            'creators': traverse_obj(info, ('artists', ..., 'name')) or [info.get('artistName')],
             **traverse_obj(info, {
                 'title': ('name', {str}),
                 'description': (('desc', 'briefDesc'), {str}, {lambda x: x or None}),
-                'creator': ('artistName', {str}),
                 'upload_date': ('publishTime', {unified_strdate}),
                 'thumbnail': ('cover', {url_or_none}),
-                'duration': ('duration', {self.kilo_or_none}),
+                'duration': ('duration', {self._kilo_or_none}),
                 'view_count': ('playCount', {int_or_none}),
                 'like_count': ('likeCount', {int_or_none}),
                 'comment_count': ('commentCount', {int_or_none}),
@@ -501,7 +531,7 @@ def _real_extract(self, url):
 class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
     IE_NAME = 'netease:program'
     IE_DESC = '网易云音乐 - 电台节目'
-    _VALID_URL = r'https?://music\.163\.com/(#/?)program\?id=(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://music\.163\.com/(?:#/)?program\?id=(?P<id>[0-9]+)'
     _TESTS = [{
         'url': 'http://music.163.com/#/program?id=10109055',
         'info_dict': {
@@ -509,7 +539,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
             'ext': 'mp3',
             'title': '不丹足球背后的故事',
             'description': '喜马拉雅人的足球梦 ...',
-            'creator': '大话西藏',
+            'creators': ['大话西藏'],
             'timestamp': 1434179287,
             'upload_date': '20150613',
             'thumbnail': r're:http.*\.jpg',
@@ -522,7 +552,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
             'id': '10141022',
             'title': '滚滚电台的有声节目',
             'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
-            'creator': '滚滚电台ORZ',
+            'creators': ['滚滚电台ORZ'],
             'timestamp': 1434450733,
             'upload_date': '20150616',
             'thumbnail': r're:http.*\.jpg',
@@ -536,7 +566,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
             'ext': 'mp3',
             'title': '滚滚电台的有声节目',
             'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
-            'creator': '滚滚电台ORZ',
+            'creators': ['滚滚电台ORZ'],
             'timestamp': 1434450733,
             'upload_date': '20150616',
             'thumbnail': r're:http.*\.jpg',
@@ -550,7 +580,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
     def _real_extract(self, url):
         program_id = self._match_id(url)
 
-        info = self.query_api(
+        info = self._query_api(
             f'dj/program/detail?id={program_id}', program_id, note='Downloading program info')['program']
 
         metainfo = traverse_obj(info, {
@@ -558,17 +588,17 @@ def _real_extract(self, url):
             'description': ('description', {str}),
             'creator': ('dj', 'brand', {str}),
             'thumbnail': ('coverUrl', {url_or_none}),
-            'timestamp': ('createTime', {self.kilo_or_none}),
+            'timestamp': ('createTime', {self._kilo_or_none}),
         })
 
         if not self._yes_playlist(
                 info['songs'] and program_id, info['mainSong']['id'], playlist_label='program', video_label='song'):
-            formats = self.extract_formats(info['mainSong'])
+            formats = self._extract_formats(info['mainSong'])
 
             return {
                 'id': str(info['mainSong']['id']),
                 'formats': formats,
-                'duration': traverse_obj(info, ('mainSong', 'duration', {self.kilo_or_none})),
+                'duration': traverse_obj(info, ('mainSong', 'duration', {self._kilo_or_none})),
                 **metainfo,
             }
 
@@ -579,7 +609,7 @@ def _real_extract(self, url):
 class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
     IE_NAME = 'netease:djradio'
     IE_DESC = '网易云音乐 - 电台'
-    _VALID_URL = r'https?://music\.163\.com/(#/)?djradio\?id=(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://music\.163\.com/(?:#/)?djradio\?id=(?P<id>[0-9]+)'
     _TEST = {
         'url': 'http://music.163.com/#/djradio?id=42',
         'info_dict': {
@@ -597,7 +627,7 @@ def _real_extract(self, url):
         metainfo = {}
         entries = []
         for offset in itertools.count(start=0, step=self._PAGE_SIZE):
-            info = self.query_api(
+            info = self._query_api(
                 f'dj/program/byradio?asc=false&limit={self._PAGE_SIZE}&radioId={dj_id}&offset={offset}',
                 dj_id, note=f'Downloading dj programs - {offset}')
 

From 54a63e80af82791d2f0985bd0176bb182963fd5f Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Thu, 27 Jun 2024 19:23:44 -0500
Subject: [PATCH 27/48] [test:download] Raise on network errors (#10283)

Authored by: bashonly, seproDev
Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>
---
 test/test_download.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/test/test_download.py b/test/test_download.py
index 882d545650..3f36869d9d 100755
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -20,7 +20,6 @@
     gettestcases,
     getwebpagetestcases,
     is_download_test,
-    report_warning,
     try_rm,
 )
 
@@ -178,8 +177,7 @@ def try_rm_tcs_files(tcs=None):
                         raise
 
                     if try_num == RETRIES:
-                        report_warning(f'{tname} failed due to network errors, skipping...')
-                        return
+                        raise
 
                     print(f'Retrying: {try_num} failed tries\n\n##########\n\n')
 

From 7814c50948a2b9a4c746441ecbc509ae563d5d1f Mon Sep 17 00:00:00 2001
From: sepro <4618135+seproDev@users.noreply.github.com>
Date: Sat, 29 Jun 2024 17:30:57 +0200
Subject: [PATCH 28/48] [cleanup] Bump ruff to 0.5.x (#10282)

Authored by: seproDev
---
 pyproject.toml                  |  3 ++-
 yt_dlp/extractor/atresplayer.py | 19 +++++++++----------
 yt_dlp/extractor/cbc.py         |  6 ++----
 yt_dlp/jsinterp.py              |  4 ++--
 yt_dlp/networking/_requests.py  |  4 +---
 5 files changed, 16 insertions(+), 20 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 01162b794c..a2442a14d5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -72,7 +72,7 @@ dev = [
 ]
 static-analysis = [
     "autopep8~=2.0",
-    "ruff~=0.4.4",
+    "ruff~=0.5.0",
 ]
 test = [
     "pytest~=8.1",
@@ -211,6 +211,7 @@ ignore = [
     "TD002",   # missing-todo-author
     "TD003",   # missing-todo-link
     "PLE0604", # invalid-all-object (false positives)
+    "PLE0643", # potential-index-error (false positives)
     "PLW0603", # global-statement
     "PLW1510", # subprocess-run-without-check
     "PLW2901", # redefined-loop-name
diff --git a/yt_dlp/extractor/atresplayer.py b/yt_dlp/extractor/atresplayer.py
index 7c8139714f..0fe95bec5c 100644
--- a/yt_dlp/extractor/atresplayer.py
+++ b/yt_dlp/extractor/atresplayer.py
@@ -33,14 +33,6 @@ class AtresPlayerIE(InfoExtractor):
     ]
     _API_BASE = 'https://api.atresplayer.com/'
 
-    def _handle_error(self, e, code):
-        if isinstance(e.cause, HTTPError) and e.cause.status == code:
-            error = self._parse_json(e.cause.response.read(), None)
-            if error.get('error') == 'required_registered':
-                self.raise_login_required()
-            raise ExtractorError(error['error_description'], expected=True)
-        raise
-
     def _perform_login(self, username, password):
         self._request_webpage(
             self._API_BASE + 'login', None, 'Downloading login page')
@@ -55,7 +47,9 @@ def _perform_login(self, username, password):
                     'password': password,
                 }))['targetUrl']
         except ExtractorError as e:
-            self._handle_error(e, 400)
+            if isinstance(e.cause, HTTPError) and e.cause.status == 400:
+                raise ExtractorError('Invalid username and/or password', expected=True)
+            raise
 
         self._request_webpage(target_url, None, 'Following Target URL')
 
@@ -66,7 +60,12 @@ def _real_extract(self, url):
             episode = self._download_json(
                 self._API_BASE + 'client/v1/player/episode/' + video_id, video_id)
         except ExtractorError as e:
-            self._handle_error(e, 403)
+            if isinstance(e.cause, HTTPError) and e.cause.status == 403:
+                error = self._parse_json(e.cause.response.read(), None)
+                if error.get('error') == 'required_registered':
+                    self.raise_login_required()
+                raise ExtractorError(error['error_description'], expected=True)
+            raise
 
         title = episode['titulo']
 
diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py
index 740e129264..1522b08e25 100644
--- a/yt_dlp/extractor/cbc.py
+++ b/yt_dlp/extractor/cbc.py
@@ -455,10 +455,8 @@ def _get_claims_token_expiry(self):
 
     def claims_token_expired(self):
         exp = self._get_claims_token_expiry()
-        if exp - time.time() < 10:
-            # It will expire in less than 10 seconds, or has already expired
-            return True
-        return False
+        # It will expire in less than 10 seconds, or has already expired
+        return exp - time.time() < 10
 
     def claims_token_valid(self):
         return self._claims_token is not None and not self.claims_token_expired()
diff --git a/yt_dlp/jsinterp.py b/yt_dlp/jsinterp.py
index 5c82de19ea..a0f32892fd 100644
--- a/yt_dlp/jsinterp.py
+++ b/yt_dlp/jsinterp.py
@@ -667,12 +667,12 @@ def eval_method():
                     self.interpret_expression(v, local_vars, allow_recursion)
                     for v in self._separate(arg_str)]
 
-                if obj == str:
+                if obj is str:
                     if member == 'fromCharCode':
                         assertion(argvals, 'takes one or more arguments')
                         return ''.join(map(chr, argvals))
                     raise self.Exception(f'Unsupported String method {member}', expr)
-                elif obj == float:
+                elif obj is float:
                     if member == 'pow':
                         assertion(len(argvals) == 2, 'takes two arguments')
                         return argvals[0] ** argvals[1]
diff --git a/yt_dlp/networking/_requests.py b/yt_dlp/networking/_requests.py
index c69c54b3a0..86850c1851 100644
--- a/yt_dlp/networking/_requests.py
+++ b/yt_dlp/networking/_requests.py
@@ -230,9 +230,7 @@ class Urllib3LoggingFilter(logging.Filter):
 
     def filter(self, record):
         # Ignore HTTP request messages since HTTPConnection prints those
-        if record.msg == '%s://%s:%s "%s %s %s" %s %s':
-            return False
-        return True
+        return record.msg != '%s://%s:%s "%s %s %s" %s %s'
 
 
 class Urllib3LoggingHandler(logging.Handler):

From 5b1a2aa978d0074cee278e7659f32f52ecc4ab53 Mon Sep 17 00:00:00 2001
From: sepro <4618135+seproDev@users.noreply.github.com>
Date: Sat, 29 Jun 2024 17:32:41 +0200
Subject: [PATCH 29/48] [ie/bitchute] Fix extractors (#10301)

Closes #10293
Authored by: seproDev
---
 yt_dlp/extractor/bitchute.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/yt_dlp/extractor/bitchute.py b/yt_dlp/extractor/bitchute.py
index c74f34c2a9..c83222ea5b 100644
--- a/yt_dlp/extractor/bitchute.py
+++ b/yt_dlp/extractor/bitchute.py
@@ -24,7 +24,7 @@
 
 
 class BitChuteIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
     _EMBED_REGEX = [rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{_VALID_URL})']
     _TESTS = [{
         'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/',
@@ -91,6 +91,9 @@ class BitChuteIE(InfoExtractor):
     }, {
         'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent',
         'only_matching': True,
+    }, {
+        'url': 'https://old.bitchute.com/video/UGlrF9o9b-Q/',
+        'only_matching': True,
     }]
     _GEO_BYPASS = False
 
@@ -132,7 +135,7 @@ def _make_url(html):
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(
-            f'https://www.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS)
+            f'https://old.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS)
 
         self._raise_if_restricted(webpage)
         publish_date = clean_html(get_element_by_class('video-publish-date', webpage))
@@ -171,13 +174,13 @@ def _real_extract(self, url):
 
 
 class BitChuteChannelIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?P<type>channel|playlist)/(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?P<type>channel|playlist)/(?P<id>[^/?#&]+)'
     _TESTS = [{
         'url': 'https://www.bitchute.com/channel/bitchute/',
         'info_dict': {
             'id': 'bitchute',
             'title': 'BitChute',
-            'description': 'md5:5329fb3866125afa9446835594a9b138',
+            'description': 'md5:2134c37d64fc3a4846787c402956adac',
         },
         'playlist': [
             {
@@ -210,6 +213,9 @@ class BitChuteChannelIE(InfoExtractor):
             'title': 'Bruce MacDonald and "The Light of Darkness"',
             'description': 'md5:747724ef404eebdfc04277714f81863e',
         },
+    }, {
+        'url': 'https://old.bitchute.com/playlist/wV9Imujxasw9/',
+        'only_matching': True,
     }]
 
     _TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7'
@@ -230,7 +236,7 @@ class BitChuteChannelIE(InfoExtractor):
 
     @staticmethod
     def _make_url(playlist_id, playlist_type):
-        return f'https://www.bitchute.com/{playlist_type}/{playlist_id}/'
+        return f'https://old.bitchute.com/{playlist_type}/{playlist_id}/'
 
     def _fetch_page(self, playlist_id, playlist_type, page_num):
         playlist_url = self._make_url(playlist_id, playlist_type)

From 61edf57f8f13f6dfd81154174e647eb5fdd26089 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Sat, 29 Jun 2024 10:43:55 -0500
Subject: [PATCH 30/48] [ie/mlbtv] Fix extraction (#10296)

Closes #10275
Authored by: bashonly
---
 yt_dlp/extractor/mlb.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/yt_dlp/extractor/mlb.py b/yt_dlp/extractor/mlb.py
index 8a693dc0be..6f67602a69 100644
--- a/yt_dlp/extractor/mlb.py
+++ b/yt_dlp/extractor/mlb.py
@@ -9,9 +9,10 @@
     join_nonempty,
     parse_duration,
     parse_iso8601,
-    traverse_obj,
     try_get,
+    url_or_none,
 )
+from ..utils.traversal import traverse_obj
 
 
 class MLBBaseIE(InfoExtractor):
@@ -326,15 +327,20 @@ def _real_extract(self, url):
             video_id)['data']['Airings']
 
         formats, subtitles = [], {}
-        for airing in airings:
-            m3u8_url = self._download_json(
+        for airing in traverse_obj(airings, lambda _, v: v['playbackUrls'][0]['href']):
+            format_id = join_nonempty('feedType', 'feedLanguage', from_dict=airing)
+            m3u8_url = traverse_obj(self._download_json(
                 airing['playbackUrls'][0]['href'].format(scenario='browser~csai'), video_id,
-                headers={
+                note=f'Downloading {format_id} stream info JSON',
+                errnote=f'Failed to download {format_id} stream info, skipping',
+                fatal=False, headers={
                     'Authorization': self._access_token,
                     'Accept': 'application/vnd.media-service+json; version=2',
-                })['stream']['complete']
+                }), ('stream', 'complete', {url_or_none}))
+            if not m3u8_url:
+                continue
             f, s = self._extract_m3u8_formats_and_subtitles(
-                m3u8_url, video_id, 'mp4', m3u8_id=join_nonempty(airing.get('feedType'), airing.get('feedLanguage')))
+                m3u8_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)
             formats.extend(f)
             self._merge_subtitles(s, target=subtitles)
 

From 61714f46956f61612032bba857aed7ad1387eccd Mon Sep 17 00:00:00 2001
From: Varun Chopra <360979+varunchopra@users.noreply.github.com>
Date: Mon, 1 Jul 2024 01:59:01 +0530
Subject: [PATCH 31/48] [ie/jiocinema:series] Fix extraction (#10139)

Authored by: varunchopra
---
 yt_dlp/extractor/jiocinema.py | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/yt_dlp/extractor/jiocinema.py b/yt_dlp/extractor/jiocinema.py
index 5898e1f497..30d98ba796 100644
--- a/yt_dlp/extractor/jiocinema.py
+++ b/yt_dlp/extractor/jiocinema.py
@@ -364,20 +364,25 @@ class JioCinemaSeriesIE(JioCinemaBaseIE):
             'title': 'naagin',
         },
         'playlist_mincount': 120,
+    }, {
+        'url': 'https://www.jiocinema.com/tv-shows/mtv-splitsvilla-x5/3499820',
+        'info_dict': {
+            'id': '3499820',
+            'title': 'mtv-splitsvilla-x5',
+        },
+        'playlist_mincount': 310,
     }]
 
     def _entries(self, series_id):
-        seasons = self._download_json(
-            f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/season-by-show', series_id,
-            'Downloading series metadata JSON', query={
-                'sort': 'season:asc',
-                'id': series_id,
-                'responseType': 'common',
-            })
+        seasons = traverse_obj(self._download_json(
+            f'{self._METADATA_API_BASE}/voot/v1/voot-web/view/show/{series_id}', series_id,
+            'Downloading series metadata JSON', query={'responseType': 'common'}), (
+            'trays', lambda _, v: v['trayId'] == 'season-by-show-multifilter',
+            'trayTabs', lambda _, v: v['id']))
 
-        for season_num, season in enumerate(traverse_obj(seasons, ('result', lambda _, v: v['id'])), 1):
+        for season_num, season in enumerate(seasons, start=1):
             season_id = season['id']
-            label = season.get('season') or season_num
+            label = season.get('label') or season_num
             for page_num in itertools.count(1):
                 episodes = traverse_obj(self._download_json(
                     f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/series-wise-episode',

From 2a4f2e82dbeeb0c9130883c83dac689d5260c871 Mon Sep 17 00:00:00 2001
From: tippfehlr <tippfehlr@tippfehlr.eu>
Date: Sun, 30 Jun 2024 22:48:54 +0200
Subject: [PATCH 32/48] [ie/digitalconcerthall] Rework extractor (#10152)

Authored by: tippfehlr, seproDev

Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>
---
 yt_dlp/extractor/digitalconcerthall.py | 61 ++++++++++++++++----------
 1 file changed, 37 insertions(+), 24 deletions(-)

diff --git a/yt_dlp/extractor/digitalconcerthall.py b/yt_dlp/extractor/digitalconcerthall.py
index 594ce2d0b9..8b4d5c0fc4 100644
--- a/yt_dlp/extractor/digitalconcerthall.py
+++ b/yt_dlp/extractor/digitalconcerthall.py
@@ -1,16 +1,16 @@
 from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
-    parse_resolution,
-    traverse_obj,
     try_get,
+    url_or_none,
     urlencode_postdata,
 )
+from ..utils.traversal import traverse_obj
 
 
 class DigitalConcertHallIE(InfoExtractor):
     IE_DESC = 'DigitalConcertHall extractor'
-    _VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P<language>[a-z]+)/(?P<type>film|concert)/(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P<language>[a-z]+)/(?P<type>film|concert|work)/(?P<id>[0-9]+)-?(?P<part>[0-9]+)?'
     _OAUTH_URL = 'https://api.digitalconcerthall.com/v2/oauth2/token'
     _ACCESS_TOKEN = None
     _NETRC_MACHINE = 'digitalconcerthall'
@@ -26,7 +26,8 @@ class DigitalConcertHallIE(InfoExtractor):
             'upload_date': '20210624',
             'timestamp': 1624548600,
             'duration': 2798,
-            'album_artist': 'Members of the Berliner Philharmoniker / Simon Rössler',
+            'album_artists': ['Members of the Berliner Philharmoniker', 'Simon Rössler'],
+            'composers': ['Kurt Weill'],
         },
         'params': {'skip_download': 'm3u8'},
     }, {
@@ -34,8 +35,9 @@ class DigitalConcertHallIE(InfoExtractor):
         'url': 'https://www.digitalconcerthall.com/en/concert/53785',
         'info_dict': {
             'id': '53785',
-            'album_artist': 'Berliner Philharmoniker / Kirill Petrenko',
+            'album_artists': ['Berliner Philharmoniker', 'Kirill Petrenko'],
             'title': 'Kirill Petrenko conducts Mendelssohn and Shostakovich',
+            'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$',
         },
         'params': {'skip_download': 'm3u8'},
         'playlist_count': 3,
@@ -49,9 +51,20 @@ class DigitalConcertHallIE(InfoExtractor):
             'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$',
             'upload_date': '20220714',
             'timestamp': 1657785600,
-            'album_artist': 'Frank Peter Zimmermann / Benedikt von Bernstorff / Jakob von Bernstorff',
+            'album_artists': ['Frank Peter Zimmermann', 'Benedikt von Bernstorff', 'Jakob von Bernstorff'],
         },
         'params': {'skip_download': 'm3u8'},
+    }, {
+        'note': 'Concert with several works and an interview',
+        'url': 'https://www.digitalconcerthall.com/en/work/53785-1',
+        'info_dict': {
+            'id': '53785',
+            'album_artists': ['Berliner Philharmoniker', 'Kirill Petrenko'],
+            'title': 'Kirill Petrenko conducts Mendelssohn and Shostakovich',
+            'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$',
+        },
+        'params': {'skip_download': 'm3u8'},
+        'playlist_count': 1,
     }]
 
     def _perform_login(self, username, password):
@@ -97,15 +110,14 @@ def _entries(self, items, language, type_, **kwargs):
                     'Accept-Language': language,
                 })
 
-            m3u8_url = traverse_obj(
-                stream_info, ('channel', lambda k, _: k.startswith('vod_mixed'), 'stream', 0, 'url'), get_all=False)
-            formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', 'm3u8_native', fatal=False)
+            formats = []
+            for m3u8_url in traverse_obj(stream_info, ('channel', ..., 'stream', ..., 'url', {url_or_none})):
+                formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', fatal=False))
 
             yield {
                 'id': video_id,
                 'title': item.get('title'),
                 'composer': item.get('name_composer'),
-                'url': m3u8_url,
                 'formats': formats,
                 'duration': item.get('duration_total'),
                 'timestamp': traverse_obj(item, ('date', 'published')),
@@ -119,31 +131,32 @@ def _entries(self, items, language, type_, **kwargs):
             }
 
     def _real_extract(self, url):
-        language, type_, video_id = self._match_valid_url(url).group('language', 'type', 'id')
+        language, type_, video_id, part = self._match_valid_url(url).group('language', 'type', 'id', 'part')
         if not language:
             language = 'en'
 
-        thumbnail_url = self._html_search_regex(
-            r'(https?://images\.digitalconcerthall\.com/cms/thumbnails/.*\.jpg)',
-            self._download_webpage(url, video_id), 'thumbnail')
-        thumbnails = [{
-            'url': thumbnail_url,
-            **parse_resolution(thumbnail_url),
-        }]
-
+        api_type = 'concert' if type_ == 'work' else type_
         vid_info = self._download_json(
-            f'https://api.digitalconcerthall.com/v2/{type_}/{video_id}', video_id, headers={
+            f'https://api.digitalconcerthall.com/v2/{api_type}/{video_id}', video_id, headers={
                 'Accept': 'application/json',
                 'Accept-Language': language,
             })
-        album_artist = ' / '.join(traverse_obj(vid_info, ('_links', 'artist', ..., 'name')) or '')
+        album_artists = traverse_obj(vid_info, ('_links', 'artist', ..., 'name'))
         videos = [vid_info] if type_ == 'film' else traverse_obj(vid_info, ('_embedded', ..., ...))
 
+        if type_ == 'work':
+            videos = [videos[int(part) - 1]]
+
+        thumbnail = traverse_obj(vid_info, (
+            'image', ..., {self._proto_relative_url}, {url_or_none},
+            {lambda x: x.format(width=0, height=0)}, any))  # NB: 0x0 is the original size
+
         return {
             '_type': 'playlist',
             'id': video_id,
             'title': vid_info.get('title'),
-            'entries': self._entries(videos, language, thumbnails=thumbnails, album_artist=album_artist, type_=type_),
-            'thumbnails': thumbnails,
-            'album_artist': album_artist,
+            'entries': self._entries(
+                videos, language, type_, thumbnail=thumbnail, album_artists=album_artists),
+            'thumbnail': thumbnail,
+            'album_artists': album_artists,
         }

From e8352ad6599de7b5371dc39a1a1edc7890aaedb4 Mon Sep 17 00:00:00 2001
From: Dong Heon Hee <hui1601@naver.com>
Date: Mon, 1 Jul 2024 06:55:21 +0900
Subject: [PATCH 33/48] [ie/afreecatv] Support browser impersonation (#10174)

Closes #8187
Authored by: hui1601
---
 yt_dlp/extractor/afreecatv.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/afreecatv.py b/yt_dlp/extractor/afreecatv.py
index bcfb02cb95..7e628396fb 100644
--- a/yt_dlp/extractor/afreecatv.py
+++ b/yt_dlp/extractor/afreecatv.py
@@ -189,7 +189,7 @@ def _real_extract(self, url):
             headers={'Referer': url}, data=urlencode_postdata({
                 'nTitleNo': video_id,
                 'nApiLevel': 10,
-            }))['data']
+            }), impersonate=True)['data']
 
         error_code = traverse_obj(data, ('code', {int}))
         if error_code == -6221:

From 054a3ba7d1293f9fbe21800d62d1e5ddcbded238 Mon Sep 17 00:00:00 2001
From: Dong Heon Hee <hui1601@naver.com>
Date: Mon, 1 Jul 2024 07:00:33 +0900
Subject: [PATCH 34/48] [ie/afreecatv:catchstory] Add extractor (#10235)

Closes #10112
Authored by: hui1601
---
 yt_dlp/extractor/_extractors.py |  1 +
 yt_dlp/extractor/afreecatv.py   | 39 ++++++++++++++++++++++++++++++++-
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index c411efb5aa..62a9c98645 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -76,6 +76,7 @@
 )
 from .aeonco import AeonCoIE
 from .afreecatv import (
+    AfreecaTVCatchStoryIE,
     AfreecaTVIE,
     AfreecaTVLiveIE,
     AfreecaTVUserIE,
diff --git a/yt_dlp/extractor/afreecatv.py b/yt_dlp/extractor/afreecatv.py
index 7e628396fb..f51b5a68b5 100644
--- a/yt_dlp/extractor/afreecatv.py
+++ b/yt_dlp/extractor/afreecatv.py
@@ -72,7 +72,7 @@ class AfreecaTVIE(AfreecaTVBaseIE):
                             )\?.*?\bnTitleNo=|
                             vod\.afreecatv\.com/(PLAYER/STATION|player)/
                         )
-                        (?P<id>\d+)
+                        (?P<id>\d+)/?(?:$|[?#&])
                     '''
     _TESTS = [{
         'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
@@ -253,6 +253,43 @@ def _real_extract(self, url):
         return self.playlist_result(entries, video_id, multi_video=True, **common_info)
 
 
+class AfreecaTVCatchStoryIE(AfreecaTVBaseIE):
+    IE_NAME = 'afreecatv:catchstory'
+    IE_DESC = 'afreecatv.com catch story'
+    _VALID_URL = r'https?://vod\.afreecatv\.com/player/(?P<id>\d+)/catchstory'
+    _TESTS = [{
+        'url': 'https://vod.afreecatv.com/player/103247/catchstory',
+        'info_dict': {
+            'id': '103247',
+        },
+        'playlist_count': 2,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        data = self._download_json(
+            'https://api.m.afreecatv.com/catchstory/a/view', video_id, headers={'Referer': url},
+            query={'aStoryListIdx': '', 'nStoryIdx': video_id}, impersonate=True)
+
+        return self.playlist_result(self._entries(data), video_id)
+
+    @staticmethod
+    def _entries(data):
+        # 'files' is always a list with 1 element
+        yield from traverse_obj(data, (
+            'data', lambda _, v: v['story_type'] == 'catch',
+            'catch_list', lambda _, v: v['files'][0]['file'], {
+                'id': ('files', 0, 'file_info_key', {str}),
+                'url': ('files', 0, 'file', {url_or_none}),
+                'duration': ('files', 0, 'duration', {functools.partial(int_or_none, scale=1000)}),
+                'title': ('title', {str}),
+                'uploader': ('writer_nick', {str}),
+                'uploader_id': ('writer_id', {str}),
+                'thumbnail': ('thumb', {url_or_none}),
+                'timestamp': ('write_timestamp', {int_or_none}),
+            }))
+
+
 class AfreecaTVLiveIE(AfreecaTVBaseIE):
     IE_NAME = 'afreecatv:live'
     IE_DESC = 'afreecatv.com livestreams'

From 24f3097ea9a470a984d0454dc013cafa2325f5f8 Mon Sep 17 00:00:00 2001
From: Marius Gedminas <marius@gedmin.as>
Date: Mon, 1 Jul 2024 01:17:17 +0300
Subject: [PATCH 35/48] [ie/youtube] Suppress "Unavailable videos are hidden"
 warning (#10159)

Authored by: mgedmin
---
 yt_dlp/extractor/youtube.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index 7aa84aa8b5..094b1e9a36 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -468,7 +468,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
         'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko',
     ]
 
-    _IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}
+    _IGNORED_WARNINGS = {
+        'Unavailable videos will be hidden during playback',
+        'Unavailable videos are hidden',
+    }
 
     _YT_HANDLE_RE = r'@[\w.-]{3,30}'  # https://support.google.com/youtube/answer/11585688?hl=en
     _YT_CHANNEL_UCID_RE = r'UC[\w-]{22}'

From b8da8a98f897599095d4ef1644b8c5fd39921118 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?A=2E=20Serta=C3=A7=20Akkaya?=
 <42076080+ASertacAkkaya@users.noreply.github.com>
Date: Mon, 1 Jul 2024 13:14:44 +0300
Subject: [PATCH 36/48] [ie/laracasts] Add extractors (#10055)

Authored by: ASertacAkkaya, seproDev

Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>
---
 yt_dlp/extractor/_extractors.py |   4 ++
 yt_dlp/extractor/laracasts.py   | 114 ++++++++++++++++++++++++++++++++
 2 files changed, 118 insertions(+)
 create mode 100644 yt_dlp/extractor/laracasts.py

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 62a9c98645..bb12133661 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -970,6 +970,10 @@
     LA7PodcastEpisodeIE,
     LA7PodcastIE,
 )
+from .laracasts import (
+    LaracastsIE,
+    LaracastsPlaylistIE,
+)
 from .lastfm import (
     LastFMIE,
     LastFMPlaylistIE,
diff --git a/yt_dlp/extractor/laracasts.py b/yt_dlp/extractor/laracasts.py
new file mode 100644
index 0000000000..4494c4b79a
--- /dev/null
+++ b/yt_dlp/extractor/laracasts.py
@@ -0,0 +1,114 @@
+import json
+
+from .common import InfoExtractor
+from .vimeo import VimeoIE
+from ..utils import (
+    clean_html,
+    extract_attributes,
+    get_element_html_by_id,
+    int_or_none,
+    parse_duration,
+    str_or_none,
+    unified_strdate,
+    url_or_none,
+    urljoin,
+)
+from ..utils.traversal import traverse_obj
+
+
+class LaracastsBaseIE(InfoExtractor):
+    def _get_prop_data(self, url, display_id):
+        webpage = self._download_webpage(url, display_id)
+        return traverse_obj(
+            get_element_html_by_id('app', webpage),
+            ({extract_attributes}, 'data-page', {json.loads}, 'props'))
+
+    def _parse_episode(self, episode):
+        if not traverse_obj(episode, 'vimeoId'):
+            self.raise_login_required('This video is only available for subscribers.')
+        return self.url_result(
+            VimeoIE._smuggle_referrer(
+                f'https://player.vimeo.com/video/{episode["vimeoId"]}', 'https://laracasts.com/'),
+            VimeoIE, url_transparent=True,
+            **traverse_obj(episode, {
+                'id': ('id', {int}, {str_or_none}),
+                'webpage_url': ('path', {lambda x: urljoin('https://laracasts.com', x)}),
+                'title': ('title', {clean_html}),
+                'season_number': ('chapter', {int_or_none}),
+                'episode_number': ('position', {int_or_none}),
+                'description': ('body', {clean_html}),
+                'thumbnail': ('largeThumbnail', {url_or_none}),
+                'duration': ('length', {int_or_none}),
+                'date': ('dateSegments', 'published', {unified_strdate}),
+            }))
+
+
+class LaracastsIE(LaracastsBaseIE):
+    IE_NAME = 'laracasts'
+    _VALID_URL = r'https?://(?:www\.)?laracasts\.com/series/(?P<id>[\w-]+/episodes/\d+)/?(?:[?#]|$)'
+    _TESTS = [{
+        'url': 'https://laracasts.com/series/30-days-to-learn-laravel-11/episodes/1',
+        'md5': 'c8f5e7b02ad0e438ef9280a08c8493dc',
+        'info_dict': {
+            'id': '922040563',
+            'title': 'Hello, Laravel',
+            'ext': 'mp4',
+            'duration': 519,
+            'date': '20240312',
+            'thumbnail': 'https://laracasts.s3.amazonaws.com/videos/thumbnails/youtube/30-days-to-learn-laravel-11-1.png',
+            'description': 'md5:ddd658bb241975871d236555657e1dd1',
+            'season_number': 1,
+            'season': 'Season 1',
+            'episode_number': 1,
+            'episode': 'Episode 1',
+            'uploader': 'Laracasts',
+            'uploader_id': 'user20182673',
+            'uploader_url': 'https://vimeo.com/user20182673',
+        },
+        'expected_warnings': ['Failed to parse XML'],  # TODO: Remove when vimeo extractor is fixed
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        return self._parse_episode(self._get_prop_data(url, display_id)['lesson'])
+
+
+class LaracastsPlaylistIE(LaracastsBaseIE):
+    IE_NAME = 'laracasts:series'
+    _VALID_URL = r'https?://(?:www\.)?laracasts\.com/series/(?P<id>[\w-]+)/?(?:[?#]|$)'
+    _TESTS = [{
+        'url': 'https://laracasts.com/series/30-days-to-learn-laravel-11',
+        'info_dict': {
+            'title': '30 Days to Learn Laravel',
+            'id': '210',
+            'thumbnail': 'https://laracasts.s3.amazonaws.com/series/thumbnails/social-cards/30-days-to-learn-laravel-11.png?v=2',
+            'duration': 30600.0,
+            'modified_date': '20240511',
+            'description': 'md5:27c260a1668a450984e8f901579912dd',
+            'categories': ['Frameworks'],
+            'tags': ['Laravel'],
+            'display_id': '30-days-to-learn-laravel-11',
+        },
+        'playlist_count': 30,
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        series = self._get_prop_data(url, display_id)['series']
+
+        metadata = {
+            'display_id': display_id,
+            **traverse_obj(series, {
+                'title': ('title', {str}),
+                'id': ('id', {int}, {str_or_none}),
+                'description': ('body', {clean_html}),
+                'thumbnail': (('large_thumbnail', 'thumbnail'), {url_or_none}, any),
+                'duration': ('runTime', {parse_duration}),
+                'categories': ('taxonomy', 'name', {str}, {lambda x: x and [x]}),
+                'tags': ('topics', ..., 'name', {str}),
+                'modified_date': ('lastUpdated', {unified_strdate}),
+            }),
+        }
+
+        return self.playlist_result(traverse_obj(
+            series, ('chapters', ..., 'episodes', lambda _, v: v['vimeoId'], {self._parse_episode})), **metadata)

From e6a22834df1776ec4e486526f6df2bf53cb7e06f Mon Sep 17 00:00:00 2001
From: sepro <4618135+seproDev@users.noreply.github.com>
Date: Mon, 1 Jul 2024 12:43:52 +0200
Subject: [PATCH 37/48] [ie/orf:on] Allow downloading of video in segments
 (#10314)

Closes #10142
Authored by: seproDev
---
 README.md               | 3 +++
 yt_dlp/extractor/orf.py | 3 ++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index ea7c671748..794d507b21 100644
--- a/README.md
+++ b/README.md
@@ -1851,6 +1851,9 @@ #### afreecatvlive
 #### soundcloud
 * `formats`: Formats to request from the API. Requested values should be in the format of `{protocol}_{extension}` (omitting the bitrate), e.g. `hls_opus,http_aac`. The `*` character functions as a wildcard, e.g. `*_mp3`, and can passed by itself to request all formats. Known protocols include `http`, `hls` and `hls-aes`; known extensions include `aac`, `opus` and `mp3`. Original `download` formats are always extracted. Default is `http_aac,hls_aac,http_opus,hls_opus,http_mp3,hls_mp3`
 
+#### orfon (orf:on)
+* `prefer_segments_playlist`: Prefer a playlist of program segments instead of a single complete video when available. If individual segments are desired, use `--concat-playlist never --extractor-args "orfon:prefer_segments_playlist"`
+
 **Note**: These options may be changed/removed in the future without concern for backward compatibility
 
 <!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->
diff --git a/yt_dlp/extractor/orf.py b/yt_dlp/extractor/orf.py
index f1403d9207..9c37a54d62 100644
--- a/yt_dlp/extractor/orf.py
+++ b/yt_dlp/extractor/orf.py
@@ -550,7 +550,8 @@ def _real_extract(self, url):
             return self._extract_video_info(segment_id, selected_segment)
 
         # Even some segmented videos have an unsegmented version available in API response root
-        if not traverse_obj(api_json, ('sources', ..., ..., 'src', {url_or_none})):
+        if (self._configuration_arg('prefer_segments_playlist')
+                or not traverse_obj(api_json, ('sources', ..., ..., 'src', {url_or_none}))):
             return self.playlist_result(
                 (self._extract_video_info(str(segment['id']), segment) for segment in segments),
                 video_id, **self._parse_metadata(api_json), multi_video=True)

From 36e8dd832579b5375a0f6626af4268b86b4eb21a Mon Sep 17 00:00:00 2001
From: Alexander Pauls <avpauls@gmail.com>
Date: Mon, 1 Jul 2024 23:30:07 +0700
Subject: [PATCH 38/48] [ie/pokergo] Make metadata extraction non-fatal
 (#10319)

Authored by: axpauls
---
 yt_dlp/extractor/pokergo.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/pokergo.py b/yt_dlp/extractor/pokergo.py
index e22348053c..72cbce0a0c 100644
--- a/yt_dlp/extractor/pokergo.py
+++ b/yt_dlp/extractor/pokergo.py
@@ -5,6 +5,7 @@
     ExtractorError,
     try_get,
 )
+from ..utils.traversal import traverse_obj
 
 
 class PokerGoBaseIE(InfoExtractor):
@@ -65,7 +66,7 @@ def _real_extract(self, url):
             'width': image.get('width'),
             'height': image.get('height'),
         } for image in data_json.get('images') or [] if image.get('url')]
-        series_json = next(dct for dct in data_json.get('show_tags') or [] if dct.get('video_id') == video_id) or {}
+        series_json = traverse_obj(data_json, ('show_tags', lambda _, v: v['video_id'] == video_id, any)) or {}
 
         return {
             '_type': 'url_transparent',

From 55e3e6fd21e741ec5ae3d8624de5e5ea345810eb Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Mon, 1 Jul 2024 11:48:11 -0500
Subject: [PATCH 39/48] Add `playlist_channel` and `playlist_channel_id` fields
 (#10266)

Authored by: bashonly
---
 README.md           | 2 ++
 yt_dlp/YoutubeDL.py | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/README.md b/README.md
index 794d507b21..ed022c0b9d 100644
--- a/README.md
+++ b/README.md
@@ -1288,6 +1288,8 @@ # OUTPUT TEMPLATE
  - `playlist_autonumber` (numeric): Position of the video in the playlist download queue padded with leading zeros according to the total length of the playlist
  - `playlist_uploader` (string): Full name of the playlist uploader
  - `playlist_uploader_id` (string): Nickname or id of the playlist uploader
+ - `playlist_channel` (string): Display name of the channel that uploaded the playlist
+ - `playlist_channel_id` (string): Identifier of the channel that uploaded the playlist
  - `webpage_url` (string): A URL to the video webpage which if given to yt-dlp should allow to get the same result again
  - `webpage_url_basename` (string): The basename of the webpage URL
  - `webpage_url_domain` (string): The domain of the webpage URL
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index 7ed01bf840..ba29b29dcb 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -1926,6 +1926,8 @@ def _playlist_infodict(ie_result, strict=False, **kwargs):
             'playlist_title': ie_result.get('title'),
             'playlist_uploader': ie_result.get('uploader'),
             'playlist_uploader_id': ie_result.get('uploader_id'),
+            'playlist_channel': ie_result.get('channel'),
+            'playlist_channel_id': ie_result.get('channel_id'),
             **kwargs,
         }
         if strict:

From 1d369b4096d79233e0ac2c93762746a64d7a69c8 Mon Sep 17 00:00:00 2001
From: Thomas R <kiwiiii@gmail.com>
Date: Mon, 1 Jul 2024 18:49:19 +0200
Subject: [PATCH 40/48] [ie/graspop] Add extractor (#10268)

Authored by: Niluge-KiWi
---
 yt_dlp/extractor/_extractors.py |  1 +
 yt_dlp/extractor/graspop.py     | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+)
 create mode 100644 yt_dlp/extractor/graspop.py

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index bb12133661..719a89ddc0 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -780,6 +780,7 @@
 from .goshgay import GoshgayIE
 from .gotostage import GoToStageIE
 from .gputechconf import GPUTechConfIE
+from .graspop import GraspopIE
 from .gronkh import (
     GronkhFeedIE,
     GronkhIE,
diff --git a/yt_dlp/extractor/graspop.py b/yt_dlp/extractor/graspop.py
new file mode 100644
index 0000000000..09371f8c46
--- /dev/null
+++ b/yt_dlp/extractor/graspop.py
@@ -0,0 +1,32 @@
+from .common import InfoExtractor
+from ..utils import update_url, url_or_none
+from ..utils.traversal import traverse_obj
+
+
+class GraspopIE(InfoExtractor):
+    _VALID_URL = r'https?://vod\.graspop\.be/[a-z]{2}/(?P<id>\d+)/'
+    _TESTS = [{
+        'url': 'https://vod.graspop.be/fr/101556/thy-art-is-murder-concert/',
+        'info_dict': {
+            'id': '101556',
+            'ext': 'mp4',
+            'title': 'Thy Art Is Murder',
+            'thumbnail': r're:https://cdn-mds\.pickx\.be/festivals/v3/global/original/.+\.jpg',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        metadata = self._download_json(
+            f'https://tv.proximus.be/MWC/videocenter/festivals/{video_id}/stream', video_id)
+
+        return {
+            'id': video_id,
+            'formats': self._extract_m3u8_formats(
+                # Downgrade manifest request to avoid incomplete certificate chain error
+                update_url(metadata['source']['assetUri'], scheme='http'), video_id, 'mp4'),
+            **traverse_obj(metadata, {
+                'title': ('name', {str}),
+                'thumbnail': ('source', 'poster', {url_or_none}),
+            }),
+        }

From 4f5d7be3c5590bb257d8ff521572aee9839ab754 Mon Sep 17 00:00:00 2001
From: c-basalt <117849907+c-basalt@users.noreply.github.com>
Date: Mon, 1 Jul 2024 12:54:15 -0400
Subject: [PATCH 41/48] [ie/qqmusic] Fix extractors (#9768)

Closes #9336
Authored by: c-basalt
---
 yt_dlp/extractor/_extractors.py |   1 +
 yt_dlp/extractor/qqmusic.py     | 544 ++++++++++++++++++++------------
 2 files changed, 339 insertions(+), 206 deletions(-)

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 719a89ddc0..09dfa73ff0 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1609,6 +1609,7 @@
     QQMusicPlaylistIE,
     QQMusicSingerIE,
     QQMusicToplistIE,
+    QQMusicVideoIE,
 )
 from .r7 import (
     R7IE,
diff --git a/yt_dlp/extractor/qqmusic.py b/yt_dlp/extractor/qqmusic.py
index a57dd5fb35..d0238692f6 100644
--- a/yt_dlp/extractor/qqmusic.py
+++ b/yt_dlp/extractor/qqmusic.py
@@ -1,48 +1,125 @@
+import base64
+import functools
+import json
 import random
-import re
 import time
 
 from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
+    OnDemandPagedList,
     clean_html,
+    int_or_none,
+    join_nonempty,
+    js_to_json,
+    str_or_none,
     strip_jsonp,
+    traverse_obj,
     unescapeHTML,
+    url_or_none,
+    urljoin,
 )
 
 
-class QQMusicIE(InfoExtractor):
+class QQMusicBaseIE(InfoExtractor):
+    def _get_cookie(self, key, default=None):
+        return getattr(self._get_cookies('https://y.qq.com').get(key), 'value', default)
+
+    def _get_g_tk(self):
+        n = 5381
+        for c in self._get_cookie('qqmusic_key', ''):
+            n += (n << 5) + ord(c)
+        return n & 2147483647
+
+    def _get_uin(self):
+        return int_or_none(self._get_cookie('uin')) or 0
+
+    @property
+    def is_logged_in(self):
+        return bool(self._get_uin() and self._get_cookie('fqm_pvqid'))
+
+    # Reference: m_r_GetRUin() in top_player.js
+    # http://imgcache.gtimg.cn/music/portal_v3/y/top_player.js
+    @staticmethod
+    def _m_r_get_ruin():
+        cur_ms = int(time.time() * 1000) % 1000
+        return int(round(random.random() * 2147483647) * cur_ms % 1E10)
+
+    def _download_init_data(self, url, mid, fatal=True):
+        webpage = self._download_webpage(url, mid, fatal=fatal)
+        return self._search_json(r'window\.__INITIAL_DATA__\s*=', webpage,
+                                 'init data', mid, transform_source=js_to_json, fatal=fatal)
+
+    def _make_fcu_req(self, req_dict, mid, headers={}, **kwargs):
+        return self._download_json(
+            'https://u.y.qq.com/cgi-bin/musicu.fcg', mid, data=json.dumps({
+                'comm': {
+                    'cv': 0,
+                    'ct': 24,
+                    'format': 'json',
+                    'uin': self._get_uin(),
+                },
+                **req_dict,
+            }, separators=(',', ':')).encode(), headers=headers, **kwargs)
+
+
+class QQMusicIE(QQMusicBaseIE):
     IE_NAME = 'qqmusic'
     IE_DESC = 'QQ音乐'
-    _VALID_URL = r'https?://y\.qq\.com/n/yqq/song/(?P<id>[0-9A-Za-z]+)\.html'
+    _VALID_URL = r'https?://y\.qq\.com/n/ryqq/songDetail/(?P<id>[0-9A-Za-z]+)'
     _TESTS = [{
-        'url': 'https://y.qq.com/n/yqq/song/004295Et37taLD.html',
+        'url': 'https://y.qq.com/n/ryqq/songDetail/004Ti8rT003TaZ',
+        'md5': 'd7adc5c438d12e2cb648cca81593fd47',
+        'info_dict': {
+            'id': '004Ti8rT003TaZ',
+            'ext': 'mp3',
+            'title': '永夜のパレード (永夜的游行)',
+            'album': '幻想遊園郷 -Fantastic Park-',
+            'release_date': '20111230',
+            'duration': 281,
+            'creators': ['ケーキ姫', 'JUMA'],
+            'genres': ['Pop'],
+            'description': 'md5:b5261f3d595657ae561e9e6aee7eb7d9',
+            'size': 4501244,
+            'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
+            'subtitles': 'count:1',
+        },
+    }, {
+        'url': 'https://y.qq.com/n/ryqq/songDetail/004295Et37taLD',
         'md5': '5f1e6cea39e182857da7ffc5ef5e6bb8',
         'info_dict': {
             'id': '004295Et37taLD',
             'ext': 'mp3',
             'title': '可惜没如果',
-            'release_date': '20141227',
-            'creator': '林俊杰',
-            'description': 'md5:d85afb3051952ecc50a1ee8a286d1eac',
-            'thumbnail': r're:^https?://.*\.jpg$',
+            'album': '新地球 - 人 (Special Edition)',
+            'release_date': '20150129',
+            'duration': 298,
+            'creators': ['林俊杰'],
+            'genres': ['Pop'],
+            'description': 'md5:f568421ff618d2066e74b65a04149c4e',
+            'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
         },
+        'skip': 'premium member only',
     }, {
         'note': 'There is no mp3-320 version of this song.',
-        'url': 'https://y.qq.com/n/yqq/song/004MsGEo3DdNxV.html',
-        'md5': 'fa3926f0c585cda0af8fa4f796482e3e',
+        'url': 'https://y.qq.com/n/ryqq/songDetail/004MsGEo3DdNxV',
+        'md5': '028aaef1ae13d8a9f4861a92614887f9',
         'info_dict': {
             'id': '004MsGEo3DdNxV',
             'ext': 'mp3',
             'title': '如果',
+            'album': '新传媒电视连续剧金曲系列II',
             'release_date': '20050626',
-            'creator': '李季美',
-            'description': 'md5:46857d5ed62bc4ba84607a805dccf437',
-            'thumbnail': r're:^https?://.*\.jpg$',
+            'duration': 220,
+            'creators': ['李季美'],
+            'genres': [],
+            'description': 'md5:fc711212aa623b28534954dc4bd67385',
+            'size': 3535730,
+            'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
         },
     }, {
         'note': 'lyrics not in .lrc format',
-        'url': 'https://y.qq.com/n/yqq/song/001JyApY11tIp6.html',
+        'url': 'https://y.qq.com/n/ryqq/songDetail/001JyApY11tIp6',
         'info_dict': {
             'id': '001JyApY11tIp6',
             'ext': 'mp3',
@@ -50,185 +127,193 @@ class QQMusicIE(InfoExtractor):
             'release_date': '19970225',
             'creator': 'Dark Funeral',
             'description': 'md5:c9b20210587cbcd6836a1c597bab4525',
-            'thumbnail': r're:^https?://.*\.jpg$',
-        },
-        'params': {
-            'skip_download': True,
+            'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
         },
+        'params': {'skip_download': True},
+        'skip': 'no longer available',
     }]
 
     _FORMATS = {
-        'mp3-320': {'prefix': 'M800', 'ext': 'mp3', 'preference': 40, 'abr': 320},
-        'mp3-128': {'prefix': 'M500', 'ext': 'mp3', 'preference': 30, 'abr': 128},
-        'm4a': {'prefix': 'C200', 'ext': 'm4a', 'preference': 10},
+        'F000': {'name': 'flac', 'prefix': 'F000', 'ext': 'flac', 'preference': 60},
+        'A000': {'name': 'ape', 'prefix': 'A000', 'ext': 'ape', 'preference': 50},
+        'M800': {'name': '320mp3', 'prefix': 'M800', 'ext': 'mp3', 'preference': 40, 'abr': 320},
+        'M500': {'name': '128mp3', 'prefix': 'M500', 'ext': 'mp3', 'preference': 30, 'abr': 128},
+        'C400': {'name': '96aac', 'prefix': 'C400', 'ext': 'm4a', 'preference': 20, 'abr': 96},
+        'C200': {'name': '48aac', 'prefix': 'C200', 'ext': 'm4a', 'preference': 20, 'abr': 48},
     }
 
-    # Reference: m_r_GetRUin() in top_player.js
-    # http://imgcache.gtimg.cn/music/portal_v3/y/top_player.js
-    @staticmethod
-    def m_r_get_ruin():
-        cur_ms = int(time.time() * 1000) % 1000
-        return int(round(random.random() * 2147483647) * cur_ms % 1E10)
-
     def _real_extract(self, url):
         mid = self._match_id(url)
 
-        detail_info_page = self._download_webpage(
-            f'http://s.plcloud.music.qq.com/fcgi-bin/fcg_yqq_song_detail_info.fcg?songmid={mid}&play=0',
-            mid, note='Download song detail info',
-            errnote='Unable to get song detail info', encoding='gbk')
+        init_data = self._download_init_data(url, mid, fatal=False)
+        info_data = self._make_fcu_req({'info': {
+            'module': 'music.pf_song_detail_svr',
+            'method': 'get_song_detail_yqq',
+            'param': {
+                'song_mid': mid,
+                'song_type': 0,
+            },
+        }}, mid, note='Downloading song info')['info']['data']['track_info']
 
-        song_name = self._html_search_regex(
-            r"songname:\s*'([^']+)'", detail_info_page, 'song name')
+        media_mid = info_data['file']['media_mid']
 
-        publish_time = self._html_search_regex(
-            r'发行时间：(\d{4}-\d{2}-\d{2})', detail_info_page,
-            'publish time', default=None)
-        if publish_time:
-            publish_time = publish_time.replace('-', '')
-
-        singer = self._html_search_regex(
-            r"singer:\s*'([^']+)", detail_info_page, 'singer', default=None)
-
-        lrc_content = self._html_search_regex(
-            r'<div class="content" id="lrc_content"[^<>]*>([^<>]+)</div>',
-            detail_info_page, 'LRC lyrics', default=None)
-        if lrc_content:
-            lrc_content = lrc_content.replace('\\n', '\n')
-
-        thumbnail_url = None
-        albummid = self._search_regex(
-            [r'albummid:\'([0-9a-zA-Z]+)\'', r'"albummid":"([0-9a-zA-Z]+)"'],
-            detail_info_page, 'album mid', default=None)
-        if albummid:
-            thumbnail_url = f'http://i.gtimg.cn/music/photo/mid_album_500/{albummid[-2:-1]}/{albummid[-1]}/{albummid}.jpg'
-
-        guid = self.m_r_get_ruin()
-
-        vkey = self._download_json(
-            f'http://base.music.qq.com/fcgi-bin/fcg_musicexpress.fcg?json=3&guid={guid}',
-            mid, note='Retrieve vkey', errnote='Unable to get vkey',
-            transform_source=strip_jsonp)['key']
+        data = self._make_fcu_req({
+            'req_1': {
+                'module': 'vkey.GetVkeyServer',
+                'method': 'CgiGetVkey',
+                'param': {
+                    'guid': str(self._m_r_get_ruin()),
+                    'songmid': [mid] * len(self._FORMATS),
+                    'songtype': [0] * len(self._FORMATS),
+                    'uin': str(self._get_uin()),
+                    'loginflag': 1,
+                    'platform': '20',
+                    'filename': [f'{f["prefix"]}{media_mid}.{f["ext"]}' for f in self._FORMATS.values()],
+                },
+            },
+            'req_2': {
+                'module': 'music.musichallSong.PlayLyricInfo',
+                'method': 'GetPlayLyricInfo',
+                'param': {'songMID': mid},
+            },
+        }, mid, note='Downloading formats and lyric', headers=self.geo_verification_headers())
 
+        code = traverse_obj(data, ('req_1', 'code', {int}))
+        if code != 0:
+            raise ExtractorError(f'Failed to download format info, error code {code or "unknown"}')
         formats = []
-        for format_id, details in self._FORMATS.items():
+        for media_info in traverse_obj(data, (
+            'req_1', 'data', 'midurlinfo', lambda _, v: v['songmid'] == mid and v['purl']),
+        ):
+            format_key = traverse_obj(media_info, ('filename', {str}, {lambda x: x[:4]}))
+            format_info = self._FORMATS.get(format_key) or {}
+            format_id = format_info.get('name')
             formats.append({
-                'url': 'http://cc.stream.qqmusic.qq.com/{}{}.{}?vkey={}&guid={}&fromtag=0'.format(
-                    details['prefix'], mid, details['ext'], vkey, guid),
+                'url': urljoin('https://dl.stream.qqmusic.qq.com', media_info['purl']),
                 'format': format_id,
                 'format_id': format_id,
-                'quality': details['preference'],
-                'abr': details.get('abr'),
+                'size': traverse_obj(info_data, ('file', f'size_{format_id}', {int_or_none})),
+                'quality': format_info.get('preference'),
+                'abr': format_info.get('abr'),
+                'ext': format_info.get('ext'),
+                'vcodec': 'none',
             })
-        self._check_formats(formats, mid)
 
-        actual_lrc_lyrics = ''.join(
-            line + '\n' for line in re.findall(
-                r'(?m)^(\[[0-9]{2}:[0-9]{2}(?:\.[0-9]{2,})?\][^\n]*|\[[^\]]*\])', lrc_content))
+        if not formats and not self.is_logged_in:
+            self.raise_login_required()
+
+        if traverse_obj(data, ('req_2', 'code')):
+            self.report_warning(f'Failed to download lyric, error {data["req_2"]["code"]!r}')
+        lrc_content = traverse_obj(data, ('req_2', 'data', 'lyric', {lambda x: base64.b64decode(x).decode('utf-8')}))
 
         info_dict = {
             'id': mid,
             'formats': formats,
-            'title': song_name,
-            'release_date': publish_time,
-            'creator': singer,
-            'description': lrc_content,
-            'thumbnail': thumbnail_url,
+            **traverse_obj(info_data, {
+                'title': ('title', {str}),
+                'album': ('album', 'title', {str}, {lambda x: x or None}),
+                'release_date': ('time_public', {lambda x: x.replace('-', '') or None}),
+                'creators': ('singer', ..., 'name', {str}),
+                'alt_title': ('subtitle', {str}, {lambda x: x or None}),
+                'duration': ('interval', {int_or_none}),
+            }),
+            **traverse_obj(init_data, ('detail', {
+                'thumbnail': ('picurl', {url_or_none}),
+                'description': ('info', 'intro', 'content', ..., 'value', {str}),
+                'genres': ('info', 'genre', 'content', ..., 'value', {str}, all),
+            }), get_all=False),
         }
-        if actual_lrc_lyrics:
-            info_dict['subtitles'] = {
-                'origin': [{
-                    'ext': 'lrc',
-                    'data': actual_lrc_lyrics,
-                }],
-            }
+        if lrc_content:
+            info_dict['subtitles'] = {'origin': [{'ext': 'lrc', 'data': lrc_content}]}
+            info_dict['description'] = join_nonempty(info_dict.get('description'), lrc_content, delim='\n')
         return info_dict
 
 
-class QQPlaylistBaseIE(InfoExtractor):
-    @staticmethod
-    def qq_static_url(category, mid):
-        return f'http://y.qq.com/y/static/{category}/{mid[-2]}/{mid[-1]}/{mid}.html'
-
-    def get_singer_all_songs(self, singmid, num):
-        return self._download_webpage(
-            r'https://c.y.qq.com/v8/fcg-bin/fcg_v8_singer_track_cp.fcg', singmid,
-            query={
-                'format': 'json',
-                'inCharset': 'utf8',
-                'outCharset': 'utf-8',
-                'platform': 'yqq',
-                'needNewCode': 0,
-                'singermid': singmid,
-                'order': 'listen',
-                'begin': 0,
-                'num': num,
-                'songstatus': 1,
-            })
-
-    def get_entries_from_page(self, singmid):
-        entries = []
-
-        default_num = 1
-        json_text = self.get_singer_all_songs(singmid, default_num)
-        json_obj_all_songs = self._parse_json(json_text, singmid)
-
-        if json_obj_all_songs['code'] == 0:
-            total = json_obj_all_songs['data']['total']
-            json_text = self.get_singer_all_songs(singmid, total)
-            json_obj_all_songs = self._parse_json(json_text, singmid)
-
-        for item in json_obj_all_songs['data']['list']:
-            if item['musicData'].get('songmid') is not None:
-                songmid = item['musicData']['songmid']
-                entries.append(self.url_result(
-                    rf'https://y.qq.com/n/yqq/song/{songmid}.html', 'QQMusic', songmid))
-
-        return entries
-
-
-class QQMusicSingerIE(QQPlaylistBaseIE):
+class QQMusicSingerIE(QQMusicBaseIE):
     IE_NAME = 'qqmusic:singer'
     IE_DESC = 'QQ音乐 - 歌手'
-    _VALID_URL = r'https?://y\.qq\.com/n/yqq/singer/(?P<id>[0-9A-Za-z]+)\.html'
-    _TEST = {
-        'url': 'https://y.qq.com/n/yqq/singer/001BLpXF2DyJe2.html',
+    _VALID_URL = r'https?://y\.qq\.com/n/ryqq/singer/(?P<id>[0-9A-Za-z]+)'
+    _TESTS = [{
+        'url': 'https://y.qq.com/n/ryqq/singer/001BLpXF2DyJe2',
         'info_dict': {
             'id': '001BLpXF2DyJe2',
             'title': '林俊杰',
-            'description': 'md5:870ec08f7d8547c29c93010899103751',
+            'description': 'md5:10624ce73b06fa400bc846f59b0305fa',
+            'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
         },
-        'playlist_mincount': 12,
-    }
+        'playlist_mincount': 100,
+    }, {
+        'url': 'https://y.qq.com/n/ryqq/singer/000Q00f213YzNV',
+        'info_dict': {
+            'id': '000Q00f213YzNV',
+            'title': '桃几OvO',
+            'description': '小破站小唱见~希望大家喜欢听我唱歌~！',
+            'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
+        },
+        'playlist_count': 12,
+        'playlist': [{
+            'info_dict': {
+                'id': '0016cvsy02mmCl',
+                'ext': 'mp3',
+                'title': '群青',
+                'album': '桃几2021年翻唱集',
+                'release_date': '20210913',
+                'duration': 248,
+                'creators': ['桃几OvO'],
+                'genres': ['Pop'],
+                'description': 'md5:4296005a04edcb5cdbe0889d5055a7ae',
+                'size': 3970822,
+                'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
+            },
+        }],
+    }]
+
+    _PAGE_SIZE = 50
+
+    def _fetch_page(self, mid, page_size, page_num):
+        data = self._make_fcu_req({'req_1': {
+            'module': 'music.web_singer_info_svr',
+            'method': 'get_singer_detail_info',
+            'param': {
+                'sort': 5,
+                'singermid': mid,
+                'sin': page_num * page_size,
+                'num': page_size,
+            }}}, mid, note=f'Downloading page {page_num}')
+        yield from traverse_obj(data, ('req_1', 'data', 'songlist', ..., {lambda x: self.url_result(
+            f'https://y.qq.com/n/ryqq/songDetail/{x["mid"]}', QQMusicIE, x['mid'], x.get('title'))}))
 
     def _real_extract(self, url):
         mid = self._match_id(url)
+        init_data = self._download_init_data(url, mid, fatal=False)
 
-        entries = self.get_entries_from_page(mid)
-        singer_page = self._download_webpage(url, mid, 'Download singer page')
-        singer_name = self._html_search_regex(
-            r"singername\s*:\s*'(.*?)'", singer_page, 'singer name', default=None)
-        singer_desc = None
+        return self.playlist_result(
+            OnDemandPagedList(functools.partial(self._fetch_page, mid, self._PAGE_SIZE), self._PAGE_SIZE),
+            mid, **traverse_obj(init_data, ('singerDetail', {
+                'title': ('basic_info', 'name', {str}),
+                'description': ('ex_info', 'desc', {str}),
+                'thumbnail': ('pic', 'pic', {url_or_none}),
+            })))
 
-        if mid:
-            singer_desc_page = self._download_xml(
-                'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg', mid,
-                'Donwload singer description XML',
-                query={'utf8': 1, 'outCharset': 'utf-8', 'format': 'xml', 'singermid': mid},
-                headers={'Referer': 'https://y.qq.com/n/yqq/singer/'})
 
-            singer_desc = singer_desc_page.find('./data/info/desc').text
-
-        return self.playlist_result(entries, mid, singer_name, singer_desc)
+class QQPlaylistBaseIE(InfoExtractor):
+    def _extract_entries(self, info_json, path):
+        for song in traverse_obj(info_json, path):
+            song_mid = song.get('songmid')
+            if not song_mid:
+                continue
+            yield self.url_result(
+                f'https://y.qq.com/n/ryqq/songDetail/{song_mid}',
+                QQMusicIE, song_mid, song.get('songname'))
 
 
 class QQMusicAlbumIE(QQPlaylistBaseIE):
     IE_NAME = 'qqmusic:album'
     IE_DESC = 'QQ音乐 - 专辑'
-    _VALID_URL = r'https?://y\.qq\.com/n/yqq/album/(?P<id>[0-9A-Za-z]+)\.html'
+    _VALID_URL = r'https?://y\.qq\.com/n/ryqq/albumDetail/(?P<id>[0-9A-Za-z]+)'
 
     _TESTS = [{
-        'url': 'https://y.qq.com/n/yqq/album/000gXCTb2AhRR1.html',
+        'url': 'https://y.qq.com/n/ryqq/albumDetail/000gXCTb2AhRR1',
         'info_dict': {
             'id': '000gXCTb2AhRR1',
             'title': '我们都是这样长大的',
@@ -236,10 +321,10 @@ class QQMusicAlbumIE(QQPlaylistBaseIE):
         },
         'playlist_count': 4,
     }, {
-        'url': 'https://y.qq.com/n/yqq/album/002Y5a3b3AlCu3.html',
+        'url': 'https://y.qq.com/n/ryqq/albumDetail/002Y5a3b3AlCu3',
         'info_dict': {
             'id': '002Y5a3b3AlCu3',
-            'title': '그리고...',
+            'title': '그리고…',
             'description': 'md5:a48823755615508a95080e81b51ba729',
         },
         'playlist_count': 8,
@@ -248,49 +333,45 @@ class QQMusicAlbumIE(QQPlaylistBaseIE):
     def _real_extract(self, url):
         mid = self._match_id(url)
 
-        album = self._download_json(
-            f'http://i.y.qq.com/v8/fcg-bin/fcg_v8_album_info_cp.fcg?albummid={mid}&format=json',
-            mid, 'Download album page')['data']
+        album_json = self._download_json(
+            'http://i.y.qq.com/v8/fcg-bin/fcg_v8_album_info_cp.fcg',
+            mid, 'Download album page',
+            query={'albummid': mid, 'format': 'json'})['data']
 
-        entries = [
-            self.url_result(
-                'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid'],
-            ) for song in album['list']
-        ]
-        album_name = album.get('name')
-        album_detail = album.get('desc')
-        if album_detail is not None:
-            album_detail = album_detail.strip()
+        entries = self._extract_entries(album_json, ('list', ...))
 
-        return self.playlist_result(entries, mid, album_name, album_detail)
+        return self.playlist_result(entries, mid, **traverse_obj(album_json, {
+            'title': ('name', {str}),
+            'description': ('desc', {str.strip}),
+        }))
 
 
 class QQMusicToplistIE(QQPlaylistBaseIE):
     IE_NAME = 'qqmusic:toplist'
     IE_DESC = 'QQ音乐 - 排行榜'
-    _VALID_URL = r'https?://y\.qq\.com/n/yqq/toplist/(?P<id>[0-9]+)\.html'
+    _VALID_URL = r'https?://y\.qq\.com/n/ryqq/toplist/(?P<id>[0-9]+)'
 
     _TESTS = [{
-        'url': 'https://y.qq.com/n/yqq/toplist/123.html',
+        'url': 'https://y.qq.com/n/ryqq/toplist/123',
         'info_dict': {
             'id': '123',
-            'title': '美国iTunes榜',
-            'description': 'md5:89db2335fdbb10678dee2d43fe9aba08',
+            'title': r're:美国热门音乐榜 \d{4}-\d{2}-\d{2}',
+            'description': '美国热门音乐榜，每周一更新。',
         },
-        'playlist_count': 100,
+        'playlist_count': 95,
     }, {
-        'url': 'https://y.qq.com/n/yqq/toplist/3.html',
+        'url': 'https://y.qq.com/n/ryqq/toplist/3',
         'info_dict': {
             'id': '3',
-            'title': '巅峰榜·欧美',
-            'description': 'md5:5a600d42c01696b26b71f8c4d43407da',
+            'title': r're:巅峰榜·欧美 \d{4}-\d{2}-\d{2}',
+            'description': 'md5:4def03b60d3644be4c9a36f21fd33857',
         },
         'playlist_count': 100,
     }, {
-        'url': 'https://y.qq.com/n/yqq/toplist/106.html',
+        'url': 'https://y.qq.com/n/ryqq/toplist/106',
         'info_dict': {
             'id': '106',
-            'title': '韩国Mnet榜',
+            'title': r're:韩国Mnet榜 \d{4}-\d{2}-\d{2}',
             'description': 'md5:cb84b325215e1d21708c615cac82a6e7',
         },
         'playlist_count': 50,
@@ -304,33 +385,20 @@ def _real_extract(self, url):
             note='Download toplist page',
             query={'type': 'toplist', 'topid': list_id, 'format': 'json'})
 
-        entries = [self.url_result(
-            'https://y.qq.com/n/yqq/song/' + song['data']['songmid'] + '.html', 'QQMusic',
-            song['data']['songmid'])
-            for song in toplist_json['songlist']]
-
-        topinfo = toplist_json.get('topinfo', {})
-        list_name = topinfo.get('ListName')
-        list_description = topinfo.get('info')
-        return self.playlist_result(entries, list_id, list_name, list_description)
+        return self.playlist_result(
+            self._extract_entries(toplist_json, ('songlist', ..., 'data')), list_id,
+            playlist_title=join_nonempty(*traverse_obj(
+                toplist_json, ((('topinfo', 'ListName'), 'update_time'), None)), delim=' '),
+            playlist_description=traverse_obj(toplist_json, ('topinfo', 'info')))
 
 
 class QQMusicPlaylistIE(QQPlaylistBaseIE):
     IE_NAME = 'qqmusic:playlist'
     IE_DESC = 'QQ音乐 - 歌单'
-    _VALID_URL = r'https?://y\.qq\.com/n/yqq/playlist/(?P<id>[0-9]+)\.html'
+    _VALID_URL = r'https?://y\.qq\.com/n/ryqq/playlist/(?P<id>[0-9]+)'
 
     _TESTS = [{
-        'url': 'http://y.qq.com/n/yqq/playlist/3462654915.html',
-        'info_dict': {
-            'id': '3462654915',
-            'title': '韩国5月新歌精选下旬',
-            'description': 'md5:d2c9d758a96b9888cf4fe82f603121d4',
-        },
-        'playlist_count': 40,
-        'skip': 'playlist gone',
-    }, {
-        'url': 'https://y.qq.com/n/yqq/playlist/1374105607.html',
+        'url': 'https://y.qq.com/n/ryqq/playlist/1374105607',
         'info_dict': {
             'id': '1374105607',
             'title': '易入人心的华语民谣',
@@ -346,19 +414,83 @@ def _real_extract(self, url):
             'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg',
             list_id, 'Download list page',
             query={'type': 1, 'json': 1, 'utf8': 1, 'onlysong': 0, 'disstid': list_id},
-            transform_source=strip_jsonp)
+            transform_source=strip_jsonp, headers={'Referer': url})
         if not len(list_json.get('cdlist', [])):
-            if list_json.get('code'):
-                raise ExtractorError(
-                    'QQ Music said: error %d in fetching playlist info' % list_json['code'],
-                    expected=True)
-            raise ExtractorError('Unable to get playlist info')
+            raise ExtractorError(join_nonempty(
+                'Unable to get playlist info',
+                join_nonempty('code', 'subcode', from_dict=list_json),
+                list_json.get('msg'), delim=': '))
 
-        cdlist = list_json['cdlist'][0]
-        entries = [self.url_result(
-            'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid'])
-            for song in cdlist['songlist']]
+        entries = self._extract_entries(list_json, ('cdlist', 0, 'songlist', ...))
 
-        list_name = cdlist.get('dissname')
-        list_description = clean_html(unescapeHTML(cdlist.get('desc')))
-        return self.playlist_result(entries, list_id, list_name, list_description)
+        return self.playlist_result(entries, list_id, **traverse_obj(list_json, ('cdlist', 0, {
+            'title': ('dissname', {str}),
+            'description': ('desc', {unescapeHTML}, {clean_html}),
+        })))
+
+
+class QQMusicVideoIE(QQMusicBaseIE):
+    IE_NAME = 'qqmusic:mv'
+    IE_DESC = 'QQ音乐 - MV'
+    _VALID_URL = r'https?://y\.qq\.com/n/ryqq/mv/(?P<id>[0-9A-Za-z]+)'
+
+    _TESTS = [{
+        'url': 'https://y.qq.com/n/ryqq/mv/002Vsarh3SVU8K',
+        'info_dict': {
+            'id': '002Vsarh3SVU8K',
+            'ext': 'mp4',
+            'title': 'The Chant (Extended Mix / Audio)',
+            'description': '',
+            'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
+            'release_timestamp': 1688918400,
+            'release_date': '20230709',
+            'duration': 313,
+            'creators': ['Duke Dumont'],
+            'view_count': int,
+        },
+    }]
+
+    def _parse_url_formats(self, url_data):
+        return traverse_obj(url_data, ('mp4', lambda _, v: v['freeflow_url'], {
+            'url': ('freeflow_url', 0, {url_or_none}),
+            'filesize': ('fileSize', {int_or_none}),
+            'format_id': ('newFileType', {str_or_none}),
+        }))
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        video_info = self._make_fcu_req({
+            'mvInfo': {
+                'module': 'music.video.VideoData',
+                'method': 'get_video_info_batch',
+                'param': {
+                    'vidlist': [video_id],
+                    'required': [
+                        'vid', 'type', 'sid', 'cover_pic', 'duration', 'singers',
+                        'video_pay', 'hint', 'code', 'msg', 'name', 'desc',
+                        'playcnt', 'pubdate', 'play_forbid_reason'],
+                },
+            },
+            'mvUrl': {
+                'module': 'music.stream.MvUrlProxy',
+                'method': 'GetMvUrls',
+                'param': {'vids': [video_id]},
+            },
+        }, video_id, headers=self.geo_verification_headers())
+        if traverse_obj(video_info, ('mvInfo', 'data', video_id, 'play_forbid_reason')) == 3:
+            self.raise_geo_restricted()
+
+        return {
+            'id': video_id,
+            'formats': self._parse_url_formats(traverse_obj(video_info, ('mvUrl', 'data', video_id))),
+            **traverse_obj(video_info, ('mvInfo', 'data', video_id, {
+                'title': ('name', {str}),
+                'description': ('desc', {str}),
+                'thumbnail': ('cover_pic', {url_or_none}),
+                'release_timestamp': ('pubdate', {int_or_none}),
+                'duration': ('duration', {int_or_none}),
+                'creators': ('singers', ..., 'name', {str}),
+                'view_count': ('playcnt', {int_or_none}),
+            })),
+        }

From aefede25561a06cba398d4f593eee2fbe942693b Mon Sep 17 00:00:00 2001
From: DmitryScaletta <DmitryScaletta@users.noreply.github.com>
Date: Mon, 1 Jul 2024 20:01:51 +0300
Subject: [PATCH 42/48] [ie/nuum] Fix formats extraction (#10316)

Pass referer header to m3u8 requests

Closes #10310
Authored by: DmitryScaletta
---
 yt_dlp/extractor/nuum.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/extractor/nuum.py b/yt_dlp/extractor/nuum.py
index 3db663ded0..697fc6b32e 100644
--- a/yt_dlp/extractor/nuum.py
+++ b/yt_dlp/extractor/nuum.py
@@ -43,15 +43,17 @@ def _parse_video_data(self, container, extract_formats=True):
         is_live = media.get('media_status') == 'RUNNING'
 
         formats, subtitles = None, None
+        headers = {'Referer': 'https://nuum.ru/'}
         if extract_formats:
             formats, subtitles = self._extract_m3u8_formats_and_subtitles(
-                media_url, video_id, 'mp4', live=is_live)
+                media_url, video_id, 'mp4', live=is_live, headers=headers)
 
         return filter_dict({
             'id': video_id,
             'is_live': is_live,
             'formats': formats,
             'subtitles': subtitles,
+            'http_headers': headers,
             **traverse_obj(container, {
                 'title': ('media_container_name', {str}),
                 'description': ('media_container_description', {str}),
@@ -78,7 +80,7 @@ class NuumMediaIE(NuumBaseIE):
         'only_matching': True,
     }, {
         'url': 'https://nuum.ru/videos/1567547-toxi-hurtz',
-        'md5': 'f1d9118a30403e32b702a204eb03aca3',
+        'md5': 'ce28837a5bbffe6952d7bfd3d39811b0',
         'info_dict': {
             'id': '1567547',
             'ext': 'mp4',

From 9200bc70c94546b2191bb6fbfc9cea98a919cc56 Mon Sep 17 00:00:00 2001
From: c-basalt <117849907+c-basalt@users.noreply.github.com>
Date: Mon, 1 Jul 2024 13:11:33 -0400
Subject: [PATCH 43/48] [ie/microsoftembed] Add extractors for dev materials
 (#9177)

Closes #7112
Authored by: c-basalt
---
 yt_dlp/extractor/_extractors.py             |  13 +-
 yt_dlp/extractor/microsoftembed.py          | 258 +++++++++++++++++++-
 yt_dlp/extractor/microsoftvirtualacademy.py | 188 --------------
 3 files changed, 265 insertions(+), 194 deletions(-)
 delete mode 100644 yt_dlp/extractor/microsoftvirtualacademy.py

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 09dfa73ff0..7f6507defd 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1119,12 +1119,15 @@
 from .melonvod import MelonVODIE
 from .metacritic import MetacriticIE
 from .mgtv import MGTVIE
-from .microsoftembed import MicrosoftEmbedIE
-from .microsoftstream import MicrosoftStreamIE
-from .microsoftvirtualacademy import (
-    MicrosoftVirtualAcademyCourseIE,
-    MicrosoftVirtualAcademyIE,
+from .microsoftembed import (
+    MicrosoftBuildIE,
+    MicrosoftEmbedIE,
+    MicrosoftLearnEpisodeIE,
+    MicrosoftLearnPlaylistIE,
+    MicrosoftLearnSessionIE,
+    MicrosoftMediusIE,
 )
+from .microsoftstream import MicrosoftStreamIE
 from .mildom import (
     MildomClipIE,
     MildomIE,
diff --git a/yt_dlp/extractor/microsoftembed.py b/yt_dlp/extractor/microsoftembed.py
index 98d50b18a9..d0135f5a9c 100644
--- a/yt_dlp/extractor/microsoftembed.py
+++ b/yt_dlp/extractor/microsoftembed.py
@@ -1,5 +1,14 @@
+import re
+
 from .common import InfoExtractor
-from ..utils import int_or_none, traverse_obj, unified_timestamp
+from ..utils import (
+    int_or_none,
+    parse_iso8601,
+    traverse_obj,
+    unified_timestamp,
+    url_basename,
+    url_or_none,
+)
 
 
 class MicrosoftEmbedIE(InfoExtractor):
@@ -63,3 +72,250 @@ def _real_extract(self, url):
             'subtitles': subtitles,
             'thumbnails': thumbnails,
         }
+
+
+class MicrosoftMediusBaseIE(InfoExtractor):
+    @staticmethod
+    def _sub_to_dict(subtitle_list):
+        subtitles = {}
+        for sub in subtitle_list:
+            subtitles.setdefault(sub.pop('tag', 'und'), []).append(sub)
+        return subtitles
+
+    def _extract_ism(self, ism_url, video_id):
+        formats = self._extract_ism_formats(ism_url, video_id)
+        for fmt in formats:
+            if fmt['language'] != 'eng' and 'English' not in fmt['format_id']:
+                fmt['language_preference'] = -10
+        return formats
+
+
+class MicrosoftMediusIE(MicrosoftMediusBaseIE):
+    _VALID_URL = r'https?://medius\.microsoft\.com/Embed/(?:Video\?id=|video-nc/|VideoDetails/)(?P<id>[\da-f-]+)'
+
+    _TESTS = [{
+        'url': 'https://medius.microsoft.com/Embed/video-nc/9640d86c-f513-4889-959e-5dace86e7d2b',
+        'info_dict': {
+            'id': '9640d86c-f513-4889-959e-5dace86e7d2b',
+            'ext': 'ismv',
+            'title': 'Rapidly code, test and ship from secure cloud developer environments',
+            'description': 'md5:33c8e4facadc438613476eea24165f71',
+            'thumbnail': r're:https://mediusimg\.event\.microsoft\.com/video-\d+/thumbnail\.jpg.*',
+            'subtitles': 'count:30',
+        },
+    }, {
+        'url': 'https://medius.microsoft.com/Embed/video-nc/81215af5-c813-4dcd-aede-94f4e1a7daa3',
+        'info_dict': {
+            'id': '81215af5-c813-4dcd-aede-94f4e1a7daa3',
+            'ext': 'ismv',
+            'title': 'Microsoft Build opening',
+            'description': 'md5:43455096141077a1f23144cab8cec1cb',
+            'thumbnail': r're:https://mediusimg\.event\.microsoft\.com/video-\d+/thumbnail\.jpg.*',
+            'subtitles': 'count:31',
+        },
+    }, {
+        'url': 'https://medius.microsoft.com/Embed/VideoDetails/78493569-9b3b-4a85-a409-ee76e789e25c',
+        'info_dict': {
+            'id': '78493569-9b3b-4a85-a409-ee76e789e25c',
+            'ext': 'ismv',
+            'title': ' Anomaly Detection & Root cause at Edge',
+            'description': 'md5:f8f1ad93d7918649bfb97fa081b03b83',
+            'thumbnail': r're:https://mediusdownload.event.microsoft.com/asset.*\.jpg.*',
+            'subtitles': 'count:17',
+        },
+    }, {
+        'url': 'https://medius.microsoft.com/Embed/Video?id=0dc69bda-079b-4070-a7db-a8da1a06a9c7',
+        'only_matching': True,
+    }, {
+        'url': 'https://medius.microsoft.com/Embed/video-nc/fe823a91-959c-465b-96d4-8f4db624f72c',
+        'only_matching': True,
+    }]
+
+    def _extract_subtitle(self, webpage, video_id):
+        captions = traverse_obj(
+            self._search_json(r'const\s+captionsConfiguration\s*=', webpage, 'captions', video_id, default=None),
+            ('languageList', lambda _, v: url_or_none(v['src']), {
+                'url': 'src',
+                'tag': ('srclang', {str}),
+                'name': ('kind', {str}),
+            })) or [{'url': url, 'tag': url_basename(url).split('.vtt')[0].split('_')[-1]}
+                    for url in re.findall(r'var\s+file\s+=\s+\{[^}]+\'(https://[^\']+\.vtt\?[^\']+)', webpage)]
+
+        return self._sub_to_dict(captions)
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(f'https://medius.microsoft.com/Embed/video-nc/{video_id}', video_id)
+
+        return {
+            'id': video_id,
+            'title': self._og_search_title(webpage),
+            'description': self._og_search_description(webpage),
+            'formats': self._extract_ism(
+                self._search_regex(r'StreamUrl\s*=\s*"([^"]+manifest)"', webpage, 'ism url'), video_id),
+            'thumbnail': self._og_search_thumbnail(webpage),
+            'subtitles': self._extract_subtitle(webpage, video_id),
+        }
+
+
+class MicrosoftLearnPlaylistIE(InfoExtractor):
+    _VALID_URL = r'https?://learn\.microsoft\.com/(?:[\w-]+/)?(?P<type>shows|events)/(?P<id>[\w-]+)/?(?:[?#]|$)'
+    _TESTS = [{
+        'url': 'https://learn.microsoft.com/en-us/shows/bash-for-beginners',
+        'info_dict': {
+            'id': 'bash-for-beginners',
+            'title': 'Bash for Beginners',
+            'description': 'md5:16a91c07222117d1e00912f0dbc02c2c',
+        },
+        'playlist_count': 20,
+    }, {
+        'url': 'https://learn.microsoft.com/en-us/events/build-2022',
+        'info_dict': {
+            'id': 'build-2022',
+            'title': 'Microsoft Build 2022 - Events',
+            'description': 'md5:c16b43848027df837b22c6fbac7648d3',
+        },
+        'playlist_count': 201,
+    }]
+
+    def _entries(self, url_base, video_id):
+        skip = 0
+        while True:
+            playlist_info = self._download_json(url_base, video_id, f'Downloading entries {skip}', query={
+                'locale': 'en-us',
+                '$skip': skip,
+            })
+            url_paths = traverse_obj(playlist_info, ('results', ..., 'url', {str}))
+            for url_path in url_paths:
+                yield self.url_result(f'https://learn.microsoft.com/en-us{url_path}')
+            skip += len(url_paths)
+            if skip >= playlist_info.get('count', 0) or not url_paths:
+                break
+
+    def _real_extract(self, url):
+        playlist_id, playlist_type = self._match_valid_url(url).group('id', 'type')
+        webpage = self._download_webpage(url, playlist_id)
+
+        metainfo = {
+            'title': self._og_search_title(webpage),
+            'description': self._og_search_description(webpage),
+        }
+        sub_type = 'episodes' if playlist_type == 'shows' else 'sessions'
+
+        url_base = f'https://learn.microsoft.com/api/contentbrowser/search/{playlist_type}/{playlist_id}/{sub_type}'
+        return self.playlist_result(self._entries(url_base, playlist_id), playlist_id, **metainfo)
+
+
+class MicrosoftLearnEpisodeIE(MicrosoftMediusBaseIE):
+    _VALID_URL = r'https?://learn\.microsoft\.com/(?:[\w-]+/)?shows/[\w-]+/(?P<id>[^?#/]+)'
+    _TESTS = [{
+        'url': 'https://learn.microsoft.com/en-us/shows/bash-for-beginners/what-is-the-difference-between-a-terminal-and-a-shell-2-of-20-bash-for-beginners/',
+        'info_dict': {
+            'id': 'd44e1a03-a0e5-45c2-9496-5c9fa08dc94c',
+            'ext': 'ismv',
+            'title': 'What is the Difference Between a Terminal and a Shell? (Part 2 of 20)',
+            'description': 'md5:7bbbfb593d21c2cf2babc3715ade6b88',
+            'timestamp': 1676339547,
+            'upload_date': '20230214',
+            'thumbnail': r're:https://learn\.microsoft\.com/video/media/.*\.png',
+            'subtitles': 'count:14',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        entry_id = self._html_search_meta('entryId', webpage, 'entryId', fatal=True)
+        video_info = self._download_json(
+            f'https://learn.microsoft.com/api/video/public/v1/entries/{entry_id}', video_id)
+        return {
+            'id': entry_id,
+            'formats': self._extract_ism(video_info['publicVideo']['adaptiveVideoUrl'], video_id),
+            'subtitles': self._sub_to_dict(traverse_obj(video_info, (
+                'publicVideo', 'captions', lambda _, v: url_or_none(v['url']), {
+                    'tag': ('language', {str}),
+                    'url': 'url',
+                }))),
+            'title': self._og_search_title(webpage),
+            'description': self._og_search_description(webpage),
+            **traverse_obj(video_info, {
+                'timestamp': ('createTime', {parse_iso8601}),
+                'thumbnails': ('publicVideo', 'thumbnailOtherSizes', ..., {'url': {url_or_none}}),
+            }),
+        }
+
+
+class MicrosoftLearnSessionIE(InfoExtractor):
+    _VALID_URL = r'https?://learn\.microsoft\.com/(?:[\w-]+/)?events/[\w-]+/(?P<id>[^?#/]+)'
+    _TESTS = [{
+        'url': 'https://learn.microsoft.com/en-us/events/build-2022/ts01-rapidly-code-test-ship-from-secure-cloud-developer-environments',
+        'info_dict': {
+            'id': '9640d86c-f513-4889-959e-5dace86e7d2b',
+            'ext': 'ismv',
+            'title': 'Rapidly code, test and ship from secure cloud developer environments - Events',
+            'description': 'md5:f26c1a85d41c1cffd27a0279254a25c3',
+            'timestamp': 1653408600,
+            'upload_date': '20220524',
+            'thumbnail': r're:https://mediusimg\.event\.microsoft\.com/video-\d+/thumbnail\.jpg.*',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        metainfo = {
+            'title': self._og_search_title(webpage),
+            'description': self._og_search_description(webpage),
+            'timestamp': parse_iso8601(self._html_search_meta('startDate', webpage, 'startDate')),
+        }
+
+        return self.url_result(
+            self._html_search_meta('externalVideoUrl', webpage, 'videoUrl', fatal=True),
+            url_transparent=True, ie=MicrosoftMediusIE, **metainfo)
+
+
+class MicrosoftBuildIE(InfoExtractor):
+    _VALID_URL = [
+        r'https?://build\.microsoft\.com/[\w-]+/sessions/(?P<id>[\da-f-]+)',
+        r'https?://build\.microsoft\.com/[\w-]+/(?P<id>sessions)/?(?:[?#]|$)',
+    ]
+
+    _TESTS = [{
+        'url': 'https://build.microsoft.com/en-US/sessions/b49feb31-afcd-4217-a538-d3ca1d171198?source=sessions',
+        'info_dict': {
+            'id': 'aee55fb5-fcf9-4b38-b764-a3527cb57554',
+            'ext': 'ismv',
+            'title': 'Microsoft Build opening keynote',
+            'description': 'md5:d38338f336ef4b6ef9ad2a7466a76655',
+            'timestamp': 1716307200,
+            'upload_date': '20240521',
+            'thumbnail': r're:https://mediusimg\.event\.microsoft\.com/video-\d+/thumbnail\.jpg.*',
+        },
+    }, {
+        'url': 'https://build.microsoft.com/en-US/sessions',
+        'info_dict': {
+            'id': 'sessions',
+        },
+        'playlist_mincount': 418,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        entries = [
+            self.url_result(
+                video_info['onDemand'], ie=MicrosoftMediusIE, url_transparent=True, **traverse_obj(video_info, {
+                    'id': ('sessionId', {str}),
+                    'title': ('title', {str}),
+                    'description': ('description', {str}),
+                    'timestamp': ('startDateTime', {parse_iso8601}),
+                }))
+            for video_info in self._download_json(
+                'https://api-v2.build.microsoft.com/api/session/all/en-US', video_id, 'Downloading video info')
+        ]
+        if video_id == 'sessions':
+            return self.playlist_result(entries, video_id)
+        else:
+            return traverse_obj(entries, (lambda _, v: v['id'] == video_id), get_all=False)
diff --git a/yt_dlp/extractor/microsoftvirtualacademy.py b/yt_dlp/extractor/microsoftvirtualacademy.py
deleted file mode 100644
index e354d8a507..0000000000
--- a/yt_dlp/extractor/microsoftvirtualacademy.py
+++ /dev/null
@@ -1,188 +0,0 @@
-import re
-
-from .common import InfoExtractor
-from ..utils import (
-    int_or_none,
-    parse_duration,
-    smuggle_url,
-    unsmuggle_url,
-    xpath_text,
-)
-
-
-class MicrosoftVirtualAcademyBaseIE(InfoExtractor):
-    def _extract_base_url(self, course_id, display_id):
-        return self._download_json(
-            f'https://api-mlxprod.microsoft.com/services/products/anonymous/{course_id}',
-            display_id, 'Downloading course base URL')
-
-    def _extract_chapter_and_title(self, title):
-        if not title:
-            return None, None
-        m = re.search(r'(?P<chapter>\d+)\s*\|\s*(?P<title>.+)', title)
-        return (int(m.group('chapter')), m.group('title')) if m else (None, title)
-
-
-class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE):
-    IE_NAME = 'mva'
-    IE_DESC = 'Microsoft Virtual Academy videos'
-    _VALID_URL = rf'(?:{IE_NAME}:|https?://(?:mva\.microsoft|(?:www\.)?microsoftvirtualacademy)\.com/[^/]+/training-courses/[^/?#&]+-)(?P<course_id>\d+)(?::|\?l=)(?P<id>[\da-zA-Z]+_\d+)'
-
-    _TESTS = [{
-        'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788?l=gfVXISmEB_6804984382',
-        'md5': '7826c44fc31678b12ad8db11f6b5abb9',
-        'info_dict': {
-            'id': 'gfVXISmEB_6804984382',
-            'ext': 'mp4',
-            'title': 'Course Introduction',
-            'formats': 'mincount:3',
-            'subtitles': {
-                'en': [{
-                    'ext': 'ttml',
-                }],
-            },
-        },
-    }, {
-        'url': 'mva:11788:gfVXISmEB_6804984382',
-        'only_matching': True,
-    }]
-
-    def _real_extract(self, url):
-        url, smuggled_data = unsmuggle_url(url, {})
-
-        mobj = self._match_valid_url(url)
-        course_id = mobj.group('course_id')
-        video_id = mobj.group('id')
-
-        base_url = smuggled_data.get('base_url') or self._extract_base_url(course_id, video_id)
-
-        settings = self._download_xml(
-            f'{base_url}/content/content_{video_id}/videosettings.xml?v=1',
-            video_id, 'Downloading video settings XML')
-
-        _, title = self._extract_chapter_and_title(xpath_text(
-            settings, './/Title', 'title', fatal=True))
-
-        formats = []
-
-        for sources in settings.findall('.//MediaSources'):
-            sources_type = sources.get('videoType')
-            for source in sources.findall('./MediaSource'):
-                video_url = source.text
-                if not video_url or not video_url.startswith('http'):
-                    continue
-                if sources_type == 'smoothstreaming':
-                    formats.extend(self._extract_ism_formats(
-                        video_url, video_id, 'mss', fatal=False))
-                    continue
-                video_mode = source.get('videoMode')
-                height = int_or_none(self._search_regex(
-                    r'^(\d+)[pP]$', video_mode or '', 'height', default=None))
-                codec = source.get('codec')
-                acodec, vcodec = [None] * 2
-                if codec:
-                    codecs = codec.split(',')
-                    if len(codecs) == 2:
-                        acodec, vcodec = codecs
-                    elif len(codecs) == 1:
-                        vcodec = codecs[0]
-                formats.append({
-                    'url': video_url,
-                    'format_id': video_mode,
-                    'height': height,
-                    'acodec': acodec,
-                    'vcodec': vcodec,
-                })
-
-        subtitles = {}
-        for source in settings.findall('.//MarkerResourceSource'):
-            subtitle_url = source.text
-            if not subtitle_url:
-                continue
-            subtitles.setdefault('en', []).append({
-                'url': f'{base_url}/{subtitle_url}',
-                'ext': source.get('type'),
-            })
-
-        return {
-            'id': video_id,
-            'title': title,
-            'subtitles': subtitles,
-            'formats': formats,
-        }
-
-
-class MicrosoftVirtualAcademyCourseIE(MicrosoftVirtualAcademyBaseIE):
-    IE_NAME = 'mva:course'
-    IE_DESC = 'Microsoft Virtual Academy courses'
-    _VALID_URL = rf'(?:{IE_NAME}:|https?://(?:mva\.microsoft|(?:www\.)?microsoftvirtualacademy)\.com/[^/]+/training-courses/(?P<display_id>[^/?#&]+)-)(?P<id>\d+)'
-
-    _TESTS = [{
-        'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788',
-        'info_dict': {
-            'id': '11788',
-            'title': 'Microsoft Azure Fundamentals: Virtual Machines',
-        },
-        'playlist_count': 36,
-    }, {
-        # with emphasized chapters
-        'url': 'https://mva.microsoft.com/en-US/training-courses/developing-windows-10-games-with-construct-2-16335',
-        'info_dict': {
-            'id': '16335',
-            'title': 'Developing Windows 10 Games with Construct 2',
-        },
-        'playlist_count': 10,
-    }, {
-        'url': 'https://www.microsoftvirtualacademy.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788',
-        'only_matching': True,
-    }, {
-        'url': 'mva:course:11788',
-        'only_matching': True,
-    }]
-
-    @classmethod
-    def suitable(cls, url):
-        return False if MicrosoftVirtualAcademyIE.suitable(url) else super().suitable(url)
-
-    def _real_extract(self, url):
-        mobj = self._match_valid_url(url)
-        course_id = mobj.group('id')
-        display_id = mobj.group('display_id')
-
-        base_url = self._extract_base_url(course_id, display_id)
-
-        manifest = self._download_json(
-            f'{base_url}/imsmanifestlite.json',
-            display_id, 'Downloading course manifest JSON')['manifest']
-
-        organization = manifest['organizations']['organization'][0]
-
-        entries = []
-        for chapter in organization['item']:
-            chapter_number, chapter_title = self._extract_chapter_and_title(chapter.get('title'))
-            chapter_id = chapter.get('@identifier')
-            for item in chapter.get('item', []):
-                item_id = item.get('@identifier')
-                if not item_id:
-                    continue
-                metadata = item.get('resource', {}).get('metadata') or {}
-                if metadata.get('learningresourcetype') != 'Video':
-                    continue
-                _, title = self._extract_chapter_and_title(item.get('title'))
-                duration = parse_duration(metadata.get('duration'))
-                description = metadata.get('description')
-                entries.append({
-                    '_type': 'url_transparent',
-                    'url': smuggle_url(
-                        f'mva:{course_id}:{item_id}', {'base_url': base_url}),
-                    'title': title,
-                    'description': description,
-                    'duration': duration,
-                    'chapter': chapter_title,
-                    'chapter_number': chapter_number,
-                    'chapter_id': chapter_id,
-                })
-
-        title = organization.get('title') or manifest.get('metadata', {}).get('title')
-
-        return self.playlist_result(entries, course_id, title)

From 1d6ab17d0752ee9cf19e3e63c7dec7b600d3f228 Mon Sep 17 00:00:00 2001
From: c-basalt <117849907+c-basalt@users.noreply.github.com>
Date: Mon, 1 Jul 2024 16:22:49 -0400
Subject: [PATCH 44/48] [ie/bilibili] Support legacy formats (#9117)

Adds extractor-arg `prefer_multi_flv`

Closes #6438, Closes #8525, Closes #8553, Closes #10243
Authored by: c-basalt, GD-Slime

Co-authored-by: GD-Slime <82302542+GD-Slime@users.noreply.github.com>
---
 README.md                    |   3 +
 yt_dlp/extractor/bilibili.py | 364 +++++++++++++++++++++++++----------
 2 files changed, 270 insertions(+), 97 deletions(-)

diff --git a/README.md b/README.md
index ed022c0b9d..1dffbf8e92 100644
--- a/README.md
+++ b/README.md
@@ -1856,6 +1856,9 @@ #### soundcloud
 #### orfon (orf:on)
 * `prefer_segments_playlist`: Prefer a playlist of program segments instead of a single complete video when available. If individual segments are desired, use `--concat-playlist never --extractor-args "orfon:prefer_segments_playlist"`
 
+#### bilibili
+* `prefer_multi_flv`: Prefer extracting flv formats over mp4 for older videos that still provide legacy formats
+
 **Note**: These options may be changed/removed in the future without concern for backward compatibility
 
 <!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->
diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py
index 411b48c282..a84b7a6f7c 100644
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@@ -31,12 +31,12 @@
     mimetype2ext,
     parse_count,
     parse_qs,
+    parse_resolution,
     qualities,
     smuggle_url,
     srt_subtitles_timecode,
     str_or_none,
     traverse_obj,
-    try_call,
     unified_timestamp,
     unsmuggle_url,
     url_or_none,
@@ -47,6 +47,23 @@
 
 class BilibiliBaseIE(InfoExtractor):
     _FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
+    _WBI_KEY_CACHE_TIMEOUT = 30  # exact expire timeout is unclear, use 30s for one session
+    _wbi_key_cache = {}
+
+    @property
+    def is_logged_in(self):
+        return bool(self._get_cookies('https://api.bilibili.com').get('SESSDATA'))
+
+    def _check_missing_formats(self, play_info, formats):
+        parsed_qualities = set(traverse_obj(formats, (..., 'quality')))
+        missing_formats = join_nonempty(*[
+            traverse_obj(fmt, 'new_description', 'display_desc', 'quality')
+            for fmt in traverse_obj(play_info, (
+                'support_formats', lambda _, v: v['quality'] not in parsed_qualities))], delim=', ')
+        if missing_formats:
+            self.to_screen(
+                f'Format(s) {missing_formats} are missing; you have to login or '
+                f'become a premium member to download them. {self._login_hint()}')
 
     def extract_formats(self, play_info):
         format_names = {
@@ -86,18 +103,75 @@ def extract_formats(self, play_info):
             'format': format_names.get(video.get('id')),
         } for video in traverse_obj(play_info, ('dash', 'video', ...)))
 
-        missing_formats = format_names.keys() - set(traverse_obj(formats, (..., 'quality')))
-        if missing_formats:
-            self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
-                           f'you have to login or become premium member to download them. {self._login_hint()}')
+        if formats:
+            self._check_missing_formats(play_info, formats)
 
+        fragments = traverse_obj(play_info, ('durl', lambda _, v: url_or_none(v['url']), {
+            'url': ('url', {url_or_none}),
+            'duration': ('length', {functools.partial(float_or_none, scale=1000)}),
+            'filesize': ('size', {int_or_none}),
+        }))
+        if fragments:
+            formats.append({
+                'url': fragments[0]['url'],
+                'filesize': sum(traverse_obj(fragments, (..., 'filesize'))),
+                **({
+                    'fragments': fragments,
+                    'protocol': 'http_dash_segments',
+                } if len(fragments) > 1 else {}),
+                **traverse_obj(play_info, {
+                    'quality': ('quality', {int_or_none}),
+                    'format_id': ('quality', {str_or_none}),
+                    'format_note': ('quality', {lambda x: format_names.get(x)}),
+                    'duration': ('timelength', {functools.partial(float_or_none, scale=1000)}),
+                }),
+                **parse_resolution(format_names.get(play_info.get('quality'))),
+            })
         return formats
 
-    def _download_playinfo(self, video_id, cid, headers=None):
+    def _get_wbi_key(self, video_id):
+        if time.time() < self._wbi_key_cache.get('ts', 0) + self._WBI_KEY_CACHE_TIMEOUT:
+            return self._wbi_key_cache['key']
+
+        session_data = self._download_json(
+            'https://api.bilibili.com/x/web-interface/nav', video_id, note='Downloading wbi sign')
+
+        lookup = ''.join(traverse_obj(session_data, (
+            'data', 'wbi_img', ('img_url', 'sub_url'),
+            {lambda x: x.rpartition('/')[2].partition('.')[0]})))
+
+        # from getMixinKey() in the vendor js
+        mixin_key_enc_tab = [
+            46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49,
+            33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40,
+            61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, 57, 62, 11,
+            36, 20, 34, 44, 52,
+        ]
+
+        self._wbi_key_cache.update({
+            'key': ''.join(lookup[i] for i in mixin_key_enc_tab)[:32],
+            'ts': time.time(),
+        })
+        return self._wbi_key_cache['key']
+
+    def _sign_wbi(self, params, video_id):
+        params['wts'] = round(time.time())
+        params = {
+            k: ''.join(filter(lambda char: char not in "!'()*", str(v)))
+            for k, v in sorted(params.items())
+        }
+        query = urllib.parse.urlencode(params)
+        params['w_rid'] = hashlib.md5(f'{query}{self._get_wbi_key(video_id)}'.encode()).hexdigest()
+        return params
+
+    def _download_playinfo(self, bvid, cid, headers=None, qn=None):
+        params = {'bvid': bvid, 'cid': cid, 'fnval': 4048}
+        if qn:
+            params['qn'] = qn
         return self._download_json(
-            'https://api.bilibili.com/x/player/playurl', video_id,
-            query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
-            note=f'Downloading video formats for cid {cid}', headers=headers)['data']
+            'https://api.bilibili.com/x/player/wbi/playurl', bvid,
+            query=self._sign_wbi(params, bvid), headers=headers,
+            note=f'Downloading video formats for cid {cid} {qn or ""}')['data']
 
     def json2srt(self, json_data):
         srt_data = ''
@@ -115,15 +189,15 @@ def _get_subtitles(self, video_id, cid, aid=None):
             }],
         }
 
-        subtitle_info = traverse_obj(self._download_json(
+        video_info = self._download_json(
             'https://api.bilibili.com/x/player/v2', video_id,
             query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
-            note=f'Extracting subtitle info {cid}'), ('data', 'subtitle'))
-        subs_list = traverse_obj(subtitle_info, ('subtitles', lambda _, v: v['subtitle_url'] and v['lan']))
-        if not subs_list and traverse_obj(subtitle_info, 'allow_submit'):
-            if not self._get_cookies('https://api.bilibili.com').get('SESSDATA'):  # no login session cookie
-                self.report_warning(f'CC subtitles (if any) are only visible when logged in. {self._login_hint()}', only_once=True)
-        for s in subs_list:
+            note=f'Extracting subtitle info {cid}')
+        if traverse_obj(video_info, ('data', 'need_login_subtitle')):
+            self.report_warning(
+                f'Subtitles are only available when logged in. {self._login_hint()}', only_once=True)
+        for s in traverse_obj(video_info, (
+                'data', 'subtitle', 'subtitles', lambda _, v: v['subtitle_url'] and v['lan'])):
             subtitles.setdefault(s['lan'], []).append({
                 'ext': 'srt',
                 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)),
@@ -203,15 +277,15 @@ def _get_divisions(self, video_id, graph_version, edges, edge_id, cid_edges=None
                 self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges)
         return cid_edges
 
-    def _get_interactive_entries(self, video_id, cid, metainfo):
+    def _get_interactive_entries(self, video_id, cid, metainfo, headers=None):
         graph_version = traverse_obj(
             self._download_json(
                 'https://api.bilibili.com/x/player/wbi/v2', video_id,
-                'Extracting graph version', query={'bvid': video_id, 'cid': cid}),
+                'Extracting graph version', query={'bvid': video_id, 'cid': cid}, headers=headers),
             ('data', 'interaction', 'graph_version', {int_or_none}))
         cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
         for cid, edges in cid_edges.items():
-            play_info = self._download_playinfo(video_id, cid)
+            play_info = self._download_playinfo(video_id, cid, headers=headers)
             yield {
                 **metainfo,
                 'id': f'{video_id}_{cid}',
@@ -243,17 +317,17 @@ class BiliBiliIE(BilibiliBaseIE):
             'timestamp': 1488353834,
             'like_count': int,
             'view_count': int,
+            '_old_archive_ids': ['bilibili 8903802_part1'],
         },
     }, {
         'note': 'old av URL version',
         'url': 'http://www.bilibili.com/video/av1074402/',
         'info_dict': {
-            'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
+            'id': 'BV11x411K7CN',
             'ext': 'mp4',
+            'title': '【金坷垃】金泡沫',
             'uploader': '菊子桑',
             'uploader_id': '156160',
-            'id': 'BV11x411K7CN',
-            'title': '【金坷垃】金泡沫',
             'duration': 308.36,
             'upload_date': '20140420',
             'timestamp': 1397983878,
@@ -262,6 +336,8 @@ class BiliBiliIE(BilibiliBaseIE):
             'comment_count': int,
             'view_count': int,
             'tags': list,
+            'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
+            '_old_archive_ids': ['bilibili 1074402_part1'],
         },
         'params': {'skip_download': True},
     }, {
@@ -288,6 +364,7 @@ class BiliBiliIE(BilibiliBaseIE):
                 'view_count': int,
                 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
                 'duration': 90.314,
+                '_old_archive_ids': ['bilibili 498159642_part1'],
             },
         }],
     }, {
@@ -308,28 +385,8 @@ class BiliBiliIE(BilibiliBaseIE):
             'view_count': int,
             'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
             'duration': 90.314,
+            '_old_archive_ids': ['bilibili 498159642_part1'],
         },
-    }, {
-        'note': 'video has subtitles',
-        'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
-        'info_dict': {
-            'id': 'BV12N4y1M7rh',
-            'ext': 'mp4',
-            'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
-            'tags': list,
-            'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
-            'duration': 313.557,
-            'upload_date': '20220709',
-            'uploader': '小夫太渴',
-            'timestamp': 1657347907,
-            'uploader_id': '1326814124',
-            'comment_count': int,
-            'view_count': int,
-            'like_count': int,
-            'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
-            'subtitles': 'count:2',
-        },
-        'params': {'listsubtitles': True},
     }, {
         'url': 'https://www.bilibili.com/video/av8903802/',
         'info_dict': {
@@ -347,6 +404,7 @@ class BiliBiliIE(BilibiliBaseIE):
             'comment_count': int,
             'view_count': int,
             'like_count': int,
+            '_old_archive_ids': ['bilibili 8903802_part1'],
         },
         'params': {
             'skip_download': True,
@@ -370,6 +428,7 @@ class BiliBiliIE(BilibiliBaseIE):
             'view_count': int,
             'like_count': int,
             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+            '_old_archive_ids': ['bilibili 463665680_part1'],
         },
         'params': {'skip_download': True},
     }, {
@@ -388,8 +447,8 @@ class BiliBiliIE(BilibiliBaseIE):
             'view_count': int,
             'like_count': int,
             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+            '_old_archive_ids': ['bilibili 893839363_part1'],
         },
-        'params': {'skip_download': True},
     }, {
         'note': 'newer festival video',
         'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
@@ -406,8 +465,57 @@ class BiliBiliIE(BilibiliBaseIE):
             'view_count': int,
             'like_count': int,
             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+            '_old_archive_ids': ['bilibili 778246196_part1'],
+        },
+    }, {
+        'note': 'legacy flv/mp4 video',
+        'url': 'https://www.bilibili.com/video/BV1ms411Q7vw/?p=4',
+        'info_dict': {
+            'id': 'BV1ms411Q7vw_p4',
+            'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
+            'timestamp': 1458222815,
+            'upload_date': '20160317',
+            'description': '云南方言快乐生产线出品',
+            'duration': float,
+            'uploader': '一笑颠天',
+            'uploader_id': '3916081',
+            'view_count': int,
+            'comment_count': int,
+            'like_count': int,
+            'tags': list,
+            'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+            '_old_archive_ids': ['bilibili 4120229_part4'],
+        },
+        'params': {'extractor_args': {'bilibili': {'prefer_multi_flv': ['32']}}},
+        'playlist_count': 19,
+        'playlist': [{
+            'info_dict': {
+                'id': 'BV1ms411Q7vw_p4_0',
+                'ext': 'flv',
+                'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
+                'duration': 399.102,
+            },
+        }],
+    }, {
+        'note': 'legacy mp4-only video',
+        'url': 'https://www.bilibili.com/video/BV1nx411u79K',
+        'info_dict': {
+            'id': 'BV1nx411u79K',
+            'ext': 'mp4',
+            'title': '【练习室】201603声乐练习《No Air》with VigoVan',
+            'timestamp': 1508893551,
+            'upload_date': '20171025',
+            'description': '@ZERO-G伯远\n声乐练习 《No Air》with Vigo Van',
+            'duration': 80.384,
+            'uploader': '伯远',
+            'uploader_id': '10584494',
+            'comment_count': int,
+            'view_count': int,
+            'like_count': int,
+            'tags': list,
+            'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+            '_old_archive_ids': ['bilibili 15700301_part1'],
         },
-        'params': {'skip_download': True},
     }, {
         'note': 'interactive/split-path video',
         'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
@@ -425,6 +533,7 @@ class BiliBiliIE(BilibiliBaseIE):
             'view_count': int,
             'like_count': int,
             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+            '_old_archive_ids': ['bilibili 292734508_part1'],
         },
         'playlist_count': 33,
         'playlist': [{
@@ -443,6 +552,7 @@ class BiliBiliIE(BilibiliBaseIE):
                 'view_count': int,
                 'like_count': int,
                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+                '_old_archive_ids': ['bilibili 292734508_part1'],
             },
         }],
     }, {
@@ -465,6 +575,29 @@ class BiliBiliIE(BilibiliBaseIE):
             'upload_date': '20191021',
             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
         },
+    }, {
+        'note': 'video has subtitles, which requires login',
+        'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
+        'info_dict': {
+            'id': 'BV12N4y1M7rh',
+            'ext': 'mp4',
+            'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
+            'tags': list,
+            'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
+            'duration': 313.557,
+            'upload_date': '20220709',
+            'uploader': '小夫太渴',
+            'timestamp': 1657347907,
+            'uploader_id': '1326814124',
+            'comment_count': int,
+            'view_count': int,
+            'like_count': int,
+            'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+            'subtitles': 'count:2',  # login required for CC subtitle
+            '_old_archive_ids': ['bilibili 898179753_part1'],
+        },
+        'params': {'listsubtitles': True},
+        'skip': 'login required for subtitle',
     }, {
         'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
         'info_dict': {
@@ -498,8 +631,9 @@ def _real_extract(self, url):
         if not self._match_valid_url(urlh.url):
             return self.url_result(urlh.url)
 
-        initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
+        headers['Referer'] = url
 
+        initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
         is_festival = 'videoData' not in initial_state
         if is_festival:
             video_data = initial_state['videoInfo']
@@ -548,7 +682,6 @@ def _real_extract(self, url):
 
         aid = video_data.get('aid')
         old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
-
         cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
 
         festival_info = {}
@@ -586,18 +719,65 @@ def _real_extract(self, url):
         is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate'))
         if is_interactive:
             return self.playlist_result(
-                self._get_interactive_entries(video_id, cid, metainfo), **metainfo,
+                self._get_interactive_entries(video_id, cid, metainfo, headers=headers), **metainfo,
                 duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
                 __post_extractor=self.extract_comments(aid))
         else:
-            return {
-                **metainfo,
-                'duration': float_or_none(play_info.get('timelength'), scale=1000),
-                'chapters': self._get_chapters(aid, cid),
-                'subtitles': self.extract_subtitles(video_id, cid),
-                'formats': self.extract_formats(play_info),
-                '__post_extractor': self.extract_comments(aid),
-            }
+            formats = self.extract_formats(play_info)
+
+            if not traverse_obj(play_info, ('dash')):
+                # we only have legacy formats and need additional work
+                has_qn = lambda x: x in traverse_obj(formats, (..., 'quality'))
+                for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})):
+                    formats.extend(traverse_obj(
+                        self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, qn=qn)),
+                        lambda _, v: not has_qn(v['quality'])))
+                self._check_missing_formats(play_info, formats)
+                flv_formats = traverse_obj(formats, lambda _, v: v['fragments'])
+                if flv_formats and len(flv_formats) < len(formats):
+                    # Flv and mp4 are incompatible due to `multi_video` workaround, so drop one
+                    if not self._configuration_arg('prefer_multi_flv'):
+                        dropped_fmts = ', '.join(
+                            f'{f.get("format_note")} ({f.get("format_id")})' for f in flv_formats)
+                        formats = traverse_obj(formats, lambda _, v: not v.get('fragments'))
+                        if dropped_fmts:
+                            self.to_screen(
+                                f'Dropping incompatible flv format(s) {dropped_fmts} since mp4 is available. '
+                                'To extract flv, pass --extractor-args "bilibili:prefer_multi_flv"')
+                    else:
+                        formats = traverse_obj(
+                            # XXX: Filtering by extractor-arg is for testing purposes
+                            formats, lambda _, v: v['quality'] == int(self._configuration_arg('prefer_multi_flv')[0]),
+                        ) or [max(flv_formats, key=lambda x: x['quality'])]
+
+            if traverse_obj(formats, (0, 'fragments')):
+                # We have flv formats, which are individual short videos with their own timestamps and metainfo
+                # Binary concatenation corrupts their timestamps, so we need a `multi_video` workaround
+                return {
+                    **metainfo,
+                    '_type': 'multi_video',
+                    'entries': [{
+                        'id': f'{metainfo["id"]}_{idx}',
+                        'title': metainfo['title'],
+                        'http_headers': metainfo['http_headers'],
+                        'formats': [{
+                            **fragment,
+                            'format_id': formats[0].get('format_id'),
+                        }],
+                        'subtitles': self.extract_subtitles(video_id, cid) if idx == 0 else None,
+                        '__post_extractor': self.extract_comments(aid) if idx == 0 else None,
+                    } for idx, fragment in enumerate(formats[0]['fragments'])],
+                    'duration': float_or_none(play_info.get('timelength'), scale=1000),
+                }
+            else:
+                return {
+                    **metainfo,
+                    'formats': formats,
+                    'duration': float_or_none(play_info.get('timelength'), scale=1000),
+                    'chapters': self._get_chapters(aid, cid),
+                    'subtitles': self.extract_subtitles(video_id, cid),
+                    '__post_extractor': self.extract_comments(aid),
+                }
 
 
 class BiliBiliBangumiIE(BilibiliBaseIE):
@@ -968,7 +1148,7 @@ def _real_extract(self, url):
             }))
 
 
-class BilibiliSpaceBaseIE(InfoExtractor):
+class BilibiliSpaceBaseIE(BilibiliBaseIE):
     def _extract_playlist(self, fetch_page, get_metadata, get_entries):
         first_page = fetch_page(0)
         metadata = get_metadata(first_page)
@@ -988,73 +1168,53 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
             'id': '3985676',
         },
         'playlist_mincount': 178,
+        'skip': 'login required',
     }, {
         'url': 'https://space.bilibili.com/313580179/video',
         'info_dict': {
             'id': '313580179',
         },
         'playlist_mincount': 92,
+        'skip': 'login required',
     }]
 
-    def _extract_signature(self, playlist_id):
-        session_data = self._download_json('https://api.bilibili.com/x/web-interface/nav', playlist_id, fatal=False)
-
-        key_from_url = lambda x: x[x.rfind('/') + 1:].split('.')[0]
-        img_key = traverse_obj(
-            session_data, ('data', 'wbi_img', 'img_url', {key_from_url})) or '34478ba821254d9d93542680e3b86100'
-        sub_key = traverse_obj(
-            session_data, ('data', 'wbi_img', 'sub_url', {key_from_url})) or '7e16a90d190a4355a78fd00b32a38de6'
-
-        session_key = img_key + sub_key
-
-        signature_values = []
-        for position in (
-            46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
-            12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
-            57, 62, 11, 36, 20, 34, 44, 52,
-        ):
-            char_at_position = try_call(lambda: session_key[position])
-            if char_at_position:
-                signature_values.append(char_at_position)
-
-        return ''.join(signature_values)[:32]
-
     def _real_extract(self, url):
         playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
         if not is_video_url:
             self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
                            'To download audios, add a "/audio" to the URL')
 
-        signature = self._extract_signature(playlist_id)
-
         def fetch_page(page_idx):
             query = {
                 'keyword': '',
                 'mid': playlist_id,
-                'order': 'pubdate',
+                'order': traverse_obj(parse_qs(url), ('order', 0)) or 'pubdate',
                 'order_avoided': 'true',
                 'platform': 'web',
                 'pn': page_idx + 1,
                 'ps': 30,
                 'tid': 0,
                 'web_location': 1550101,
-                'wts': int(time.time()),
             }
-            query['w_rid'] = hashlib.md5(f'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()
 
             try:
-                response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
-                                               playlist_id, note=f'Downloading page {page_idx}', query=query,
-                                               headers={'referer': url})
+                response = self._download_json(
+                    'https://api.bilibili.com/x/space/wbi/arc/search', playlist_id,
+                    query=self._sign_wbi(query, playlist_id),
+                    note=f'Downloading space page {page_idx}', headers={'Referer': url})
             except ExtractorError as e:
                 if isinstance(e.cause, HTTPError) and e.cause.status == 412:
                     raise ExtractorError(
                         'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
                 raise
-            if response['code'] in (-352, -401):
+            status_code = response['code']
+            if status_code == -401:
                 raise ExtractorError(
-                    f'Request is blocked by server ({-response["code"]}), '
-                    'please add cookies, wait and try later.', expected=True)
+                    'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
+            elif status_code == -352 and not self.is_logged_in:
+                self.raise_login_required('Request is rejected, you need to login to access playlist')
+            elif status_code != 0:
+                raise ExtractorError(f'Request failed ({status_code}): {response.get("message") or "Unknown error"}')
             return response['data']
 
         def get_metadata(page_data):
@@ -1280,7 +1440,10 @@ class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
     _TESTS = [{
         'url': 'https://www.bilibili.com/watchlater/#/list',
-        'info_dict': {'id': 'watchlater'},
+        'info_dict': {
+            'id': r're:\d+',
+            'title': '稍后再看',
+        },
         'playlist_mincount': 0,
         'skip': 'login required',
     }]
@@ -1356,14 +1519,19 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
         'skip': 'redirect url',
     }, {
         'url': 'https://www.bilibili.com/list/watchlater',
-        'info_dict': {'id': 'watchlater'},
+        'info_dict': {
+            'id': r're:2_\d+',
+            'title': '稍后再看',
+            'uploader': str,
+            'uploader_id': str,
+        },
         'playlist_mincount': 0,
         'skip': 'login required',
     }, {
         'url': 'https://www.bilibili.com/medialist/play/watchlater',
         'info_dict': {'id': 'watchlater'},
         'playlist_mincount': 0,
-        'skip': 'login required',
+        'skip': 'redirect url & login required',
     }]
 
     def _extract_medialist(self, query, list_id):
@@ -1414,7 +1582,7 @@ def _real_extract(self, url):
                 'title': ('title', {str}),
                 'uploader': ('upper', 'name', {str}),
                 'uploader_id': ('upper', 'mid', {str_or_none}),
-                'timestamp': ('ctime', {int_or_none}),
+                'timestamp': ('ctime', {int_or_none}, {lambda x: x or None}),
                 'thumbnail': ('cover', {url_or_none}),
             })),
         }
@@ -1808,7 +1976,8 @@ def _perform_login(self, username, password):
         public_key = Cryptodome.RSA.importKey(key_data['key'])
         password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode())
         login_post = self._download_json(
-            'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({
+            'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None,
+            data=urlencode_postdata({
                 'username': username,
                 'password': base64.b64encode(password_hash).decode('ascii'),
                 'keep_me': 'true',
@@ -2140,7 +2309,8 @@ def _entries(self, series_id):
 
     def _real_extract(self, url):
         series_id = self._match_id(url)
-        series_info = self._call_api(f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
+        series_info = self._call_api(
+            f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
         return self.playlist_result(
             self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
             categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),

From d4b99a233314bf31f9c842035ea9884673d5313a Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Mon, 1 Jul 2024 15:55:18 -0500
Subject: [PATCH 45/48] [ie/vimeo] Support browser impersonation (#10327)

Closes #10325
Authored by: bashonly
---
 yt_dlp/extractor/patreon.py |  3 ++-
 yt_dlp/extractor/vimeo.py   | 34 +++++++++++++++++++++++-----------
 2 files changed, 25 insertions(+), 12 deletions(-)

diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py
index 5dc46e3171..7d6e8439c6 100644
--- a/yt_dlp/extractor/patreon.py
+++ b/yt_dlp/extractor/patreon.py
@@ -316,7 +316,8 @@ def _real_extract(self, url):
                 r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)',
                 traverse_obj(attributes, ('embed', 'html', {str})), 'vimeo url', fatal=False) or '')
             if url_or_none(v_url) and self._request_webpage(
-                    v_url, video_id, 'Checking Vimeo embed URL', headers=headers, fatal=False, errnote=False):
+                    v_url, video_id, 'Checking Vimeo embed URL', headers=headers,
+                    fatal=False, errnote=False, expected_status=429):  # 429 is TLS fingerprint rejection
                 entries.append(self.url_result(
                     VimeoIE._smuggle_referrer(v_url, 'https://patreon.com/'),
                     VimeoIE, url_transparent=True))
diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py
index 7e79032f28..a4ab7e24a9 100644
--- a/yt_dlp/extractor/vimeo.py
+++ b/yt_dlp/extractor/vimeo.py
@@ -829,21 +829,33 @@ def _real_extract(self, url):
             url = 'https://vimeo.com/' + video_id
 
         self._try_album_password(url)
+        is_secure = urllib.parse.urlparse(url).scheme == 'https'
         try:
             # Retrieve video webpage to extract further information
             webpage, urlh = self._download_webpage_handle(
-                url, video_id, headers=headers)
+                url, video_id, headers=headers, impersonate=is_secure)
             redirect_url = urlh.url
-        except ExtractorError as ee:
-            if isinstance(ee.cause, HTTPError) and ee.cause.status == 403:
-                errmsg = ee.cause.response.read()
-                if b'Because of its privacy settings, this video cannot be played here' in errmsg:
-                    raise ExtractorError(
-                        'Cannot download embed-only video without embedding '
-                        'URL. Please call yt-dlp with the URL of the page '
-                        'that embeds this video.',
-                        expected=True)
-            raise
+        except ExtractorError as error:
+            if not isinstance(error.cause, HTTPError) or error.cause.status not in (403, 429):
+                raise
+            errmsg = error.cause.response.read()
+            if b'Because of its privacy settings, this video cannot be played here' in errmsg:
+                raise ExtractorError(
+                    'Cannot download embed-only video without embedding URL. Please call yt-dlp '
+                    'with the URL of the page that embeds this video.', expected=True)
+            # 403 == vimeo.com TLS fingerprint or DC IP block; 429 == player.vimeo.com TLS FP block
+            status = error.cause.status
+            dcip_msg = 'If you are using a data center IP or VPN/proxy, your IP may be blocked'
+            if target := error.cause.response.extensions.get('impersonate'):
+                raise ExtractorError(
+                    f'Got HTTP Error {status} when using impersonate target "{target}". {dcip_msg}')
+            elif not is_secure:
+                raise ExtractorError(f'Got HTTP Error {status}. {dcip_msg}', expected=True)
+            raise ExtractorError(
+                'This request has been blocked due to its TLS fingerprint. Install a '
+                'required impersonation dependency if possible, or else if you are okay with '
+                f'{self._downloader._format_err("compromising your security/cookies", "light red")}, '
+                f'try replacing "https:" with "http:" in the input URL. {dcip_msg}.', expected=True)
 
         if '://player.vimeo.com/video/' in url:
             config = self._search_json(

From 6aaf96a3d6e7d0d426e97e11a2fcf52fda00e733 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Mon, 1 Jul 2024 17:51:27 -0500
Subject: [PATCH 46/48] [cleanup] Misc (#10075)

Closes #10303
Authored by: bashonly, seproDev, jucor, c-basalt

Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>
Co-authored-by: Julien Cornebise <julien@cornebise.com>
Co-authored-by: c-basalt <117849907+c-basalt@users.noreply.github.com>
---
 .github/workflows/build.yml        |  4 ++
 CONTRIBUTING.md                    | 12 +++---
 Collaborators.md                   |  7 ++++
 README.md                          | 66 +++++++++++++++---------------
 devscripts/changelog_override.json |  6 +++
 devscripts/cli_to_api.py           |  2 +
 pyproject.toml                     |  2 +-
 test/test_jsinterp.py              |  1 +
 test/test_youtube_signature.py     |  4 ++
 yt_dlp/YoutubeDL.py                |  3 +-
 yt_dlp/cookies.py                  |  6 +--
 yt_dlp/downloader/external.py      |  2 +-
 yt_dlp/extractor/arcpublishing.py  |  3 +-
 yt_dlp/extractor/arkena.py         |  4 +-
 yt_dlp/extractor/bandcamp.py       |  2 +-
 yt_dlp/extractor/brightcove.py     |  8 +---
 yt_dlp/extractor/cbsnews.py        |  1 -
 yt_dlp/extractor/dailymail.py      |  3 +-
 yt_dlp/extractor/eitb.py           | 12 +++---
 yt_dlp/extractor/eporner.py        |  3 --
 yt_dlp/extractor/go.py             |  3 +-
 yt_dlp/extractor/hbo.py            |  3 +-
 yt_dlp/extractor/hketv.py          |  3 --
 yt_dlp/extractor/instagram.py      |  2 +-
 yt_dlp/extractor/iwara.py          |  1 -
 yt_dlp/extractor/nbc.py            |  5 +--
 yt_dlp/extractor/nhl.py            |  3 +-
 yt_dlp/extractor/peloton.py        |  6 +--
 yt_dlp/extractor/performgroup.py   |  7 +---
 yt_dlp/extractor/pr0gramm.py       |  2 +-
 yt_dlp/extractor/prosiebensat1.py  |  5 ++-
 yt_dlp/extractor/tubetugraz.py     | 10 +++--
 yt_dlp/extractor/twitter.py        |  3 +-
 yt_dlp/extractor/viewlift.py       |  3 +-
 yt_dlp/extractor/weibo.py          |  1 +
 yt_dlp/extractor/wsj.py            |  3 +-
 yt_dlp/extractor/yahoo.py          |  8 ++--
 yt_dlp/extractor/yandexdisk.py     |  6 +--
 yt_dlp/extractor/youtube.py        |  2 +-
 yt_dlp/networking/_websockets.py   |  2 +-
 yt_dlp/options.py                  |  6 +--
 yt_dlp/postprocessor/ffmpeg.py     |  3 +-
 42 files changed, 132 insertions(+), 106 deletions(-)
 mode change 100644 => 100755 devscripts/cli_to_api.py

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 1adb62dfb1..12ec5b0d8c 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -525,6 +525,10 @@ jobs:
           # make sure SHA sums are also printed to stdout
           sha256sum -- * | tee ../SHA2-256SUMS
           sha512sum -- * | tee ../SHA2-512SUMS
+          # also print as permanent annotations to the summary page
+          while read -r shasum; do
+            echo "::notice title=${shasum##* }::sha256: ${shasum% *}"
+          done < ../SHA2-256SUMS
 
       - name: Make Update spec
         run: |
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index aeba3c44d1..dbae6476f6 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -127,7 +127,7 @@ ### Are you willing to share account details if needed?
 
 ### Is the website primarily used for piracy?
 
-We follow [youtube-dl's policy](https://github.com/ytdl-org/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free) to not support services that is primarily used for infringing copyright. Additionally, it has been decided to not to support porn sites that specialize in fakes. We also cannot support any service that serves only [DRM protected content](https://en.wikipedia.org/wiki/Digital_rights_management). 
+We follow [youtube-dl's policy](https://github.com/ytdl-org/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free) to not support services that is primarily used for infringing copyright. Additionally, it has been decided to not to support porn sites that specialize in fakes. We also cannot support any service that serves only [DRM protected content](https://en.wikipedia.org/wiki/Digital_rights_management).
 
 
 
@@ -215,8 +215,8 @@ ## Adding support for a new site
 
     ```python
     from .common import InfoExtractor
-    
-    
+
+
     class YourExtractorIE(InfoExtractor):
         _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
         _TESTS = [{
@@ -244,7 +244,7 @@ ## Adding support for a new site
         def _real_extract(self, url):
             video_id = self._match_id(url)
             webpage = self._download_webpage(url, video_id)
-    
+
             # TODO more code goes here, for example ...
             title = self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title')
 
@@ -320,7 +320,7 @@ #### Example
 ```python
 meta = self._download_json(url, video_id)
 ```
-    
+
 Assume at this point `meta`'s layout is:
 
 ```python
@@ -750,7 +750,7 @@ ### Use convenience conversion and parsing functions
 
 Use `traverse_obj` and `try_call` (superseeds `dict_get` and `try_get`) for safe metadata extraction from parsed JSON.
 
-Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction. 
+Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction.
 
 Explore [`yt_dlp/utils/`](yt_dlp/utils/) for more useful convenience functions.
 
diff --git a/Collaborators.md b/Collaborators.md
index 894a853c9b..ee748eb7fd 100644
--- a/Collaborators.md
+++ b/Collaborators.md
@@ -61,3 +61,10 @@ ## [Grub4K](https://github.com/Grub4K)
 * Reworked internals like `traverse_obj`, various core refactors and bugs fixes
 * Implemented proper progress reporting for parallel downloads
 * Improved/fixed/added Bundestag, crunchyroll, pr0gramm, Twitter, WrestleUniverse etc
+
+
+## [sepro](https://github.com/seproDev)
+
+* UX improvements: Warn when ffmpeg is missing, warn when double-clicking exe
+* Code cleanup: Remove dead extractors, mark extractors as broken, enable/apply ruff rules
+* Improved/fixed/added ArdMediathek, DRTV, Floatplane, MagentaMusik, Naver, Nebula, OnDemandKorea, Vbox7 etc
diff --git a/README.md b/README.md
index 1dffbf8e92..f265c8b558 100644
--- a/README.md
+++ b/README.md
@@ -141,7 +141,7 @@ ## UPDATE
 
 If you [installed with pip](https://github.com/yt-dlp/yt-dlp/wiki/Installation#with-pip), simply re-run the same command that was used to install the program
 
-For other third-party package managers, see [the wiki](https://github.com/yt-dlp/yt-dlp/wiki/Installation#third-party-package-managers) or refer their documentation
+For other third-party package managers, see [the wiki](https://github.com/yt-dlp/yt-dlp/wiki/Installation#third-party-package-managers) or refer to their documentation
 
 <a id="update-channels"></a>
 
@@ -184,10 +184,10 @@ ## DEPENDENCIES
 
 ### Strongly recommended
 
-* [**ffmpeg** and **ffprobe**](https://www.ffmpeg.org) - Required for [merging separate video and audio files](#format-selection) as well as for various [post-processing](#post-processing-options) tasks. License [depends on the build](https://www.ffmpeg.org/legal.html)
+* [**ffmpeg** and **ffprobe**](https://www.ffmpeg.org) - Required for [merging separate video and audio files](#format-selection), as well as for various [post-processing](#post-processing-options) tasks. License [depends on the build](https://www.ffmpeg.org/legal.html)
 
     There are bugs in ffmpeg that cause various issues when used alongside yt-dlp. Since ffmpeg is such an important dependency, we provide [custom builds](https://github.com/yt-dlp/FFmpeg-Builds#ffmpeg-static-auto-builds) with patches for some of these issues at [yt-dlp/FFmpeg-Builds](https://github.com/yt-dlp/FFmpeg-Builds). See [the readme](https://github.com/yt-dlp/FFmpeg-Builds#patches-applied) for details on the specific issues solved by these builds
-    
+
     **Important**: What you need is ffmpeg *binary*, **NOT** [the Python package of the same name](https://pypi.org/project/ffmpeg)
 
 ### Networking
@@ -198,7 +198,7 @@ ### Networking
 
 #### Impersonation
 
-The following provide support for impersonating browser requests. This may be required for some sites that employ TLS fingerprinting. 
+The following provide support for impersonating browser requests. This may be required for some sites that employ TLS fingerprinting.
 
 * [**curl_cffi**](https://github.com/yifeikong/curl_cffi) (recommended) - Python binding for [curl-impersonate](https://github.com/lwthiker/curl-impersonate). Provides impersonation targets for Chrome, Edge and Safari. Licensed under [MIT](https://github.com/yifeikong/curl_cffi/blob/main/LICENSE)
   * Can be installed with the `curl-cffi` group, e.g. `pip install "yt-dlp[default,curl-cffi]"`
@@ -275,7 +275,7 @@ ### Standalone Py2Exe Builds (Windows)
 ### Related scripts
 
 * **`devscripts/install_deps.py`** - Install dependencies for yt-dlp.
-* **`devscripts/update-version.py`** - Update the version number based on current date.
+* **`devscripts/update-version.py`** - Update the version number based on the current date.
 * **`devscripts/set-variant.py`** - Set the build variant of the executable.
 * **`devscripts/make_changelog.py`** - Create a markdown changelog using short commit messages and update `CONTRIBUTORS` file.
 * **`devscripts/make_lazy_extractors.py`** - Create lazy extractors. Running this before building the binaries (any variant) will improve their startup performance. Set the environment variable `YTDLP_NO_LAZY_EXTRACTORS=1` if you wish to forcefully disable lazy extractor loading.
@@ -1125,7 +1125,7 @@ # CONFIGURATION
     * `/etc/yt-dlp/config`
     * `/etc/yt-dlp/config.txt`
 
-E.g. with the following configuration file yt-dlp will always extract the audio, not copy the mtime, use a proxy and save all videos under `YouTube` directory in your home directory:
+E.g. with the following configuration file, yt-dlp will always extract the audio, not copy the mtime, use a proxy and save all videos under `YouTube` directory in your home directory:
 ```
 # Lines starting with # are comments
 
@@ -1142,7 +1142,7 @@ # Save all videos under YouTube directory in your home directory
 -o ~/YouTube/%(title)s.%(ext)s
 ```
 
-**Note**: Options in configuration file are just the same options aka switches used in regular command line calls; thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`. They must also be quoted when necessary as-if it were a UNIX shell.
+**Note**: Options in configuration file are just the same options aka switches used in regular command line calls; thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`. They must also be quoted when necessary, as if it were a UNIX shell.
 
 You can use `--ignore-config` if you want to disable all configuration files for a particular yt-dlp run. If `--ignore-config` is found inside any configuration file, no further configuration will be loaded. For example, having the option in the portable configuration file prevents loading of home, user, and system configurations. Additionally, (for backward compatibility) if `--ignore-config` is found inside the system configuration file, the user configuration is not loaded.
 
@@ -1154,12 +1154,12 @@ ### Configuration file encoding
 
 ### Authentication with netrc
 
-You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every yt-dlp execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](https://stackoverflow.com/tags/.netrc/info) on a per-extractor basis. For that you will need to create a `.netrc` file in `--netrc-location` and restrict permissions to read/write by only you:
+You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every yt-dlp execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](https://stackoverflow.com/tags/.netrc/info) on a per-extractor basis. For that, you will need to create a `.netrc` file in `--netrc-location` and restrict permissions to read/write by only you:
 ```
 touch ${HOME}/.netrc
 chmod a-rwx,u+rw ${HOME}/.netrc
 ```
-After that you can add credentials for an extractor in the following format, where *extractor* is the name of the extractor in lowercase:
+After that, you can add credentials for an extractor in the following format, where *extractor* is the name of the extractor in lowercase:
 ```
 machine <extractor> login <username> password <password>
 ```
@@ -1201,7 +1201,7 @@ # OUTPUT TEMPLATE
 
 The field names themselves (the part inside the parenthesis) can also have some special formatting:
 
-1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a dot `.` separator; e.g. `%(tags.0)s`, `%(subtitles.en.-1.ext)s`. You can do Python slicing with colon `:`; E.g. `%(id.3:7:-1)s`, `%(formats.:.format_id)s`. Curly braces `{}` can be used to build dictionaries with only specific keys; e.g. `%(formats.:.{format_id,height})#j`. An empty field name `%()s` refers to the entire infodict; e.g. `%(.{id,title})s`. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields
+1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a dot `.` separator; e.g. `%(tags.0)s`, `%(subtitles.en.-1.ext)s`. You can do Python slicing with colon `:`; E.g. `%(id.3:7)s`, `%(id.6:2:-1)s`, `%(formats.:.format_id)s`. Curly braces `{}` can be used to build dictionaries with only specific keys; e.g. `%(formats.:.{format_id,height})#j`. An empty field name `%()s` refers to the entire infodict; e.g. `%(.{id,title})s`. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields
 
 1. **Arithmetic**: Simple arithmetic can be done on numeric fields using `+`, `-` and `*`. E.g. `%(playlist_index+10)03d`, `%(n_entries+1-playlist_index)d`
 
@@ -1222,7 +1222,7 @@ # OUTPUT TEMPLATE
 %(name[.keys][addition][>strf][,alternate][&replacement][|default])[flags][width][.precision][length]type
 ```
 
-Additionally, you can set different output templates for the various metadata files separately from the general output template by specifying the type of file followed by the template separated by a colon `:`. The different file types supported are `subtitle`, `thumbnail`, `description`, `annotation` (deprecated), `infojson`, `link`, `pl_thumbnail`, `pl_description`, `pl_infojson`, `chapter`, `pl_video`. E.g. `-o "%(title)s.%(ext)s" -o "thumbnail:%(title)s\%(title)s.%(ext)s"`  will put the thumbnails in a folder with the same name as the video. If any of the templates is empty, that type of file will not be written. E.g. `--write-thumbnail -o "thumbnail:"` will write thumbnails only for playlists and not for video.
+Additionally, you can set different output templates for the various metadata files separately from the general output template by specifying the type of file followed by the template separated by a colon `:`. The different file types supported are `subtitle`, `thumbnail`, `description`, `annotation` (deprecated), `infojson`, `link`, `pl_thumbnail`, `pl_description`, `pl_infojson`, `chapter`, `pl_video`. E.g. `-o "%(title)s.%(ext)s" -o "thumbnail:%(title)s\%(title)s.%(ext)s"` will put the thumbnails in a folder with the same name as the video. If any of the templates is empty, that type of file will not be written. E.g. `--write-thumbnail -o "thumbnail:"` will write thumbnails only for playlists and not for video.
 
 <a id="outtmpl-postprocess-note"></a>
 
@@ -1282,7 +1282,7 @@ # OUTPUT TEMPLATE
  - `n_entries` (numeric): Total number of extracted items in the playlist
  - `playlist_id` (string): Identifier of the playlist that contains the video
  - `playlist_title` (string): Name of the playlist that contains the video
- - `playlist` (string): `playlist_id` or `playlist_title`
+ - `playlist` (string): `playlist_title` if available or else `playlist_id`
  - `playlist_count` (numeric): Total number of items in the playlist. May not be known if entire playlist is not extracted
  - `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according the final index
  - `playlist_autonumber` (numeric): Position of the video in the playlist download queue padded with leading zeros according to the total length of the playlist
@@ -1290,7 +1290,7 @@ # OUTPUT TEMPLATE
  - `playlist_uploader_id` (string): Nickname or id of the playlist uploader
  - `playlist_channel` (string): Display name of the channel that uploaded the playlist
  - `playlist_channel_id` (string): Identifier of the channel that uploaded the playlist
- - `webpage_url` (string): A URL to the video webpage which if given to yt-dlp should allow to get the same result again
+ - `webpage_url` (string): A URL to the video webpage which, if given to yt-dlp, should yield the same result again
  - `webpage_url_basename` (string): The basename of the webpage URL
  - `webpage_url_domain` (string): The domain of the webpage URL
  - `original_url` (string): The URL given by the user (or same as `webpage_url` for playlist entries)
@@ -1306,10 +1306,10 @@ # OUTPUT TEMPLATE
  - `chapter_number` (numeric): Number of the chapter the video belongs to
  - `chapter_id` (string): Id of the chapter the video belongs to
 
-Available for the video that is an episode of some series or programme:
+Available for the video that is an episode of some series or program:
 
- - `series` (string): Title of the series or programme the video episode belongs to
- - `series_id` (string): Id of the series or programme the video episode belongs to
+ - `series` (string): Title of the series or program the video episode belongs to
+ - `series_id` (string): Id of the series or program the video episode belongs to
  - `season` (string): Title of the season the video episode belongs to
  - `season_number` (numeric): Number of the season the video episode belongs to
  - `season_id` (string): Id of the season the video episode belongs to
@@ -1349,9 +1349,9 @@ # OUTPUT TEMPLATE
  - `thumbnails_table` (table): The thumbnail format table as printed by `--list-thumbnails`
  - `subtitles_table` (table): The subtitle format table as printed by `--list-subs`
  - `automatic_captions_table` (table): The automatic subtitle format table as printed by `--list-subs`
- 
+
  Available only after the video is downloaded (`post_process`/`after_move`):
- 
+
  - `filepath`: Actual path of downloaded video file
 
 Available only in `--sponsorblock-chapter-title`:
@@ -1366,7 +1366,7 @@ # OUTPUT TEMPLATE
 
 Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. E.g. for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `yt-dlp test video` and id `BaW_jenozKc`, this will result in a `yt-dlp test video-BaW_jenozKc.mp4` file created in the current directory.
 
-**Note**: Some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default).
+**Note**: Some of the sequences are not guaranteed to be present, since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default).
 
 **Tip**: Look at the `-j` output to identify which fields are available for the particular URL
 
@@ -1444,7 +1444,7 @@ # FORMAT SELECTION
 
  - `all`: Select **all formats** separately
  - `mergeall`: Select and **merge all formats** (Must be used with `--audio-multistreams`, `--video-multistreams` or both)
- - `b*`, `best*`: Select the best quality format that **contains either** a video or an audio or both (ie; `vcodec!=none or acodec!=none`)
+ - `b*`, `best*`: Select the best quality format that **contains either** a video or an audio or both (i.e.; `vcodec!=none or acodec!=none`)
  - `b`, `best`: Select the best quality format that **contains both** video and audio. Equivalent to `best*[vcodec!=none][acodec!=none]`
  - `bv`, `bestvideo`: Select the best quality **video-only** format. Equivalent to `best*[acodec=none]`
  - `bv*`, `bestvideo*`: Select the best quality format that **contains video**. It may also contain audio. Equivalent to `best*[vcodec!=none]`
@@ -1457,7 +1457,7 @@ # FORMAT SELECTION
  - `wa`, `worstaudio`: Select the worst quality audio-only format. Equivalent to `worst*[vcodec=none]`
  - `wa*`, `worstaudio*`: Select the worst quality format that contains audio. It may also contain video. Equivalent to `worst*[acodec!=none]`
 
-For example, to download the worst quality video-only format you can use `-f worstvideo`. It is however recommended not to use `worst` and related options. When your format selector is `worst`, the format which is worst in all respects is selected. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-S +size` or more rigorously, `-S +size,+br,+res,+fps` instead of `-f worst`. See [Sorting Formats](#sorting-formats) for more details.
+For example, to download the worst quality video-only format you can use `-f worstvideo`. It is, however, recommended not to use `worst` and related options. When your format selector is `worst`, the format which is worst in all respects is selected. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-S +size` or more rigorously, `-S +size,+br,+res,+fps` instead of `-f worst`. See [Sorting Formats](#sorting-formats) for more details.
 
 You can select the n'th best format of a type by using `best<type>.<n>`. For example, `best.2` will select the 2nd best combined format. Similarly, `bv*.3` will select the 3rd best format that contains a video stream.
 
@@ -1507,7 +1507,7 @@ ## Filtering Formats
 
 Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain). The comparand of a string comparison needs to be quoted with either double or single quotes if it contains spaces or special characters other than `._-`.
 
-**Note**: None of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the website. Any other field made available by the extractor can also be used for filtering.
+**Note**: None of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by the particular extractor, i.e. the metadata offered by the website. Any other field made available by the extractor can also be used for filtering.
 
 Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "bv[height<=?720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 kbps. You can also use the filters with `all` to download all formats that satisfy the filter, e.g. `-f "all[vcodec=none]"` selects all audio-only formats.
 
@@ -1546,14 +1546,14 @@ ## Sorting Formats
  - `abr`: Average audio bitrate in [kbps](## "1000 bits/sec")
  - `br`: Average bitrate in [kbps](## "1000 bits/sec"), `tbr`/`vbr`/`abr`
  - `asr`: Audio sample rate in Hz
- 
+
 **Deprecation warning**: Many of these fields have (currently undocumented) aliases, that may be removed in a future version. It is recommended to use only the documented field names.
 
 All fields, unless specified otherwise, are sorted in descending order. To reverse this, prefix the field with a `+`. E.g. `+res` prefers format with the smallest resolution. Additionally, you can suffix a preferred value for the fields, separated by a `:`. E.g. `res:720` prefers larger videos, but no larger than 720p and the smallest video if there are no videos less than 720p. For `codec` and `ext`, you can provide two preferred values, the first for video and the second for audio. E.g. `+codec:avc:m4a` (equivalent to `+vcodec:avc,+acodec:m4a`) sets the video codec preference to `h264` > `h265` > `vp9` > `vp9.2` > `av01` > `vp8` > `h263` > `theora` and audio codec preference to `mp4a` > `aac` > `vorbis` > `opus` > `mp3` > `ac3` > `dts`. You can also make the sorting prefer the nearest values to the provided by using `~` as the delimiter. E.g. `filesize~1G` prefers the format with filesize closest to 1 GiB.
 
-The fields `hasvid` and `ie_pref` are always given highest priority in sorting, irrespective of the user-defined order. This behaviour can be changed by using `--format-sort-force`. Apart from these, the default order used is: `lang,quality,res,fps,hdr:12,vcodec:vp9.2,channels,acodec,size,br,asr,proto,ext,hasaud,source,id`. The extractors may override this default order, but they cannot override the user-provided order.
+The fields `hasvid` and `ie_pref` are always given highest priority in sorting, irrespective of the user-defined order. This behavior can be changed by using `--format-sort-force`. Apart from these, the default order used is: `lang,quality,res,fps,hdr:12,vcodec:vp9.2,channels,acodec,size,br,asr,proto,ext,hasaud,source,id`. The extractors may override this default order, but they cannot override the user-provided order.
 
-Note that the default has `vcodec:vp9.2`; i.e. `av1` is not preferred. Similarly, the default for hdr is `hdr:12`; i.e. dolby vision is not preferred. These choices are made since DV and AV1 formats are not yet fully compatible with most devices. This may be changed in the future as more devices become capable of smoothly playing back these formats.
+Note that the default has `vcodec:vp9.2`; i.e. `av1` is not preferred. Similarly, the default for hdr is `hdr:12`; i.e. Dolby Vision is not preferred. These choices are made since DV and AV1 formats are not yet fully compatible with most devices. This may be changed in the future as more devices become capable of smoothly playing back these formats.
 
 If your format selector is `worst`, the last item is selected after sorting. This means it will select the format that is worst in all respects. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-f best -S +size,+br,+res,+fps`.
 
@@ -1765,7 +1765,7 @@ # EXTRACTOR ARGUMENTS
 #### youtube
 * `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
 * `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
-* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb`, `mweb_embedscreen` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. The `android` clients will always be given lowest priority since their formats are broken. You can use `all` to use all the clients, and `default` for the default clients.
+* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mediaconnect`, `mweb`, `mweb_embedscreen` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. The `android` clients will always be given lowest priority since their formats are broken. You can use `all` to use all the clients, and `default` for the default clients.
 * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
 * `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
 * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
@@ -1851,7 +1851,7 @@ #### afreecatvlive
 * `cdn`: One or more CDN IDs to use with the API call for stream URLs, e.g. `gcp_cdn`, `gs_cdn_pc_app`, `gs_cdn_mobile_web`, `gs_cdn_pc_web`
 
 #### soundcloud
-* `formats`: Formats to request from the API. Requested values should be in the format of `{protocol}_{extension}` (omitting the bitrate), e.g. `hls_opus,http_aac`. The `*` character functions as a wildcard, e.g. `*_mp3`, and can passed by itself to request all formats. Known protocols include `http`, `hls` and `hls-aes`; known extensions include `aac`, `opus` and `mp3`. Original `download` formats are always extracted. Default is `http_aac,hls_aac,http_opus,hls_opus,http_mp3,hls_mp3`
+* `formats`: Formats to request from the API. Requested values should be in the format of `{protocol}_{extension}` (omitting the bitrate), e.g. `hls_opus,http_aac`. The `*` character functions as a wildcard, e.g. `*_mp3`, and can be passed by itself to request all formats. Known protocols include `http`, `hls` and `hls-aes`; known extensions include `aac`, `opus` and `mp3`. Original `download` formats are always extracted. Default is `http_aac,hls_aac,http_opus,hls_opus,http_mp3,hls_mp3`
 
 #### orfon (orf:on)
 * `prefer_segments_playlist`: Prefer a playlist of program segments instead of a single complete video when available. If individual segments are desired, use `--concat-playlist never --extractor-args "orfon:prefer_segments_playlist"`
@@ -1868,16 +1868,16 @@ # PLUGINS
 
 Note that **all** plugins are imported even if not invoked, and that **there are no checks** performed on plugin code. **Use plugins at your own risk and only if you trust the code!**
 
-Plugins can be of `<type>`s `extractor` or `postprocessor`. 
-- Extractor plugins do not need to be enabled from the CLI and are automatically invoked when the input URL is suitable for it. 
-- Extractor plugins take priority over builtin extractors.
+Plugins can be of `<type>`s `extractor` or `postprocessor`.
+- Extractor plugins do not need to be enabled from the CLI and are automatically invoked when the input URL is suitable for it.
+- Extractor plugins take priority over built-in extractors.
 - Postprocessor plugins can be invoked using `--use-postprocessor NAME`.
 
 
 Plugins are loaded from the namespace packages `yt_dlp_plugins.extractor` and `yt_dlp_plugins.postprocessor`.
 
 In other words, the file structure on the disk looks something like:
-    
+
         yt_dlp_plugins/
             extractor/
                 myplugin.py
@@ -1925,7 +1925,7 @@ ## Developing Plugins
 
 See the [yt-dlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) repo for a template plugin package and the [Plugin Development](https://github.com/yt-dlp/yt-dlp/wiki/Plugin-Development) section of the wiki for a plugin development guide.
 
-All public classes with a name ending in `IE`/`PP` are imported from each file for extractors and postprocessors repectively. This respects underscore prefix (e.g. `_MyBasePluginIE` is private) and `__all__`. Modules can similarly be excluded by prefixing the module name with an underscore (e.g. `_myplugin.py`).
+All public classes with a name ending in `IE`/`PP` are imported from each file for extractors and postprocessors respectively. This respects underscore prefix (e.g. `_MyBasePluginIE` is private) and `__all__`. Modules can similarly be excluded by prefixing the module name with an underscore (e.g. `_myplugin.py`).
 
 To replace an existing extractor with a subclass of one, set the `plugin_name` class keyword argument (e.g. `class MyPluginIE(ABuiltInIE, plugin_name='myplugin')` will replace `ABuiltInIE` with `MyPluginIE`). Since the extractor replaces the parent, you should exclude the subclass extractor from being imported separately by making it private using one of the methods described above.
 
@@ -1937,7 +1937,7 @@ # EMBEDDING YT-DLP
 
 yt-dlp makes the best effort to be a good command-line program, and thus should be callable from any programming language.
 
-Your program should avoid parsing the normal stdout since they may change in future versions. Instead they should use options such as `-J`, `--print`, `--progress-template`, `--exec` etc to create console output that you can reliably reproduce and parse.
+Your program should avoid parsing the normal stdout since they may change in future versions. Instead, they should use options such as `-J`, `--print`, `--progress-template`, `--exec` etc to create console output that you can reliably reproduce and parse.
 
 From a Python program, you can embed yt-dlp in a more powerful fashion, like this:
 
diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json
index 86e8ec2f99..f7209f3bda 100644
--- a/devscripts/changelog_override.json
+++ b/devscripts/changelog_override.json
@@ -169,5 +169,11 @@
         "when": "5c019f6328ad40d66561eac3c4de0b3cd070d0f6",
         "short": "[cleanup] Misc (#9765)",
         "authors": ["bashonly", "Grub4K", "seproDev"]
+    },
+    {
+        "action": "change",
+        "when": "e6a22834df1776ec4e486526f6df2bf53cb7e06f",
+        "short": "[ie/orf:on] Add `prefer_segments_playlist` extractor-arg (#10314)",
+        "authors": ["seproDev"]
     }
 ]
diff --git a/devscripts/cli_to_api.py b/devscripts/cli_to_api.py
old mode 100644
new mode 100755
index 2aa51eb6e9..9c2710e09f
--- a/devscripts/cli_to_api.py
+++ b/devscripts/cli_to_api.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python3
+
 # Allow direct execution
 import os
 import sys
diff --git a/pyproject.toml b/pyproject.toml
index a2442a14d5..39986a355c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -299,7 +299,7 @@ banned-from = [
     "string",
     "sys",
     "time",
-    "urllib",
+    "urllib.parse",
     "uuid",
     "xml",
 ]
diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index 86928a6a02..7c556e4611 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -92,6 +92,7 @@ def test_operators(self):
         self._test('function f(){return 0 && 1 || 2;}', 2)
         self._test('function f(){return 0 ?? 42;}', 0)
         self._test('function f(){return "life, the universe and everything" < 42;}', False)
+        self._test('function f(){return 0  - 7 * - 6;}', 42)
 
     def test_array_access(self):
         self._test('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}', [5, 2, 7])
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index bfaff83a0a..b0f3269e1c 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -163,6 +163,10 @@
         'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js',
         '_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ',
     ),
+    (
+        'https://www.youtube.com/s/player/590f65a6/player_ias.vflset/en_US/base.js',
+        '1tm7-g_A9zsI8_Lay_', 'xI4Vem4Put_rOg',
+    ),
 ]
 
 
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index ba29b29dcb..a8d0342d53 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -4,6 +4,7 @@
 import datetime as dt
 import errno
 import fileinput
+import functools
 import http.cookiejar
 import io
 import itertools
@@ -24,7 +25,7 @@
 import unicodedata
 
 from .cache import Cache
-from .compat import functools, urllib  # isort: split
+from .compat import urllib  # isort: split
 from .compat import compat_os_name, urllib_req_to_req
 from .cookies import LenientSimpleCookie, load_cookies
 from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py
index d07269a677..070d2fcb98 100644
--- a/yt_dlp/cookies.py
+++ b/yt_dlp/cookies.py
@@ -2,7 +2,9 @@
 import collections
 import contextlib
 import datetime as dt
+import functools
 import glob
+import hashlib
 import http.cookiejar
 import http.cookies
 import io
@@ -17,14 +19,12 @@
 import time
 import urllib.request
 from enum import Enum, auto
-from hashlib import pbkdf2_hmac
 
 from .aes import (
     aes_cbc_decrypt_bytes,
     aes_gcm_decrypt_and_verify_bytes,
     unpad_pkcs7,
 )
-from .compat import functools  # isort: split
 from .compat import compat_os_name
 from .dependencies import (
     _SECRETSTORAGE_UNAVAILABLE_REASON,
@@ -999,7 +999,7 @@ def _get_windows_v10_key(browser_root, logger):
 
 
 def pbkdf2_sha1(password, salt, iterations, key_length):
-    return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
+    return hashlib.pbkdf2_hmac('sha1', password, salt, iterations, key_length)
 
 
 def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16):
diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py
index 63c1085699..ae2372915b 100644
--- a/yt_dlp/downloader/external.py
+++ b/yt_dlp/downloader/external.py
@@ -1,4 +1,5 @@
 import enum
+import functools
 import json
 import os
 import re
@@ -9,7 +10,6 @@
 import uuid
 
 from .fragment import FragmentFD
-from ..compat import functools
 from ..networking import Request
 from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor
 from ..utils import (
diff --git a/yt_dlp/extractor/arcpublishing.py b/yt_dlp/extractor/arcpublishing.py
index 338bada7c8..8da9bc4ccb 100644
--- a/yt_dlp/extractor/arcpublishing.py
+++ b/yt_dlp/extractor/arcpublishing.py
@@ -4,6 +4,7 @@
 from ..utils import (
     extract_attributes,
     int_or_none,
+    join_nonempty,
     parse_iso8601,
     try_get,
 )
@@ -136,7 +137,7 @@ def _real_extract(self, url):
             else:
                 vbr = int_or_none(s.get('bitrate'))
                 formats.append({
-                    'format_id': f'{stream_type}-{vbr}' if vbr else stream_type,
+                    'format_id': join_nonempty(stream_type, vbr),
                     'vbr': vbr,
                     'width': int_or_none(s.get('width')),
                     'height': int_or_none(s.get('height')),
diff --git a/yt_dlp/extractor/arkena.py b/yt_dlp/extractor/arkena.py
index b0e853d57a..aa6c5ca4d6 100644
--- a/yt_dlp/extractor/arkena.py
+++ b/yt_dlp/extractor/arkena.py
@@ -131,8 +131,8 @@ def _real_extract(self, url):
                             formats.extend(self._extract_f4m_formats(
                                 href, video_id, f4m_id='hds', fatal=False))
                         elif mime_type == 'application/dash+xml':
-                            formats.extend(self._extract_f4m_formats(
-                                href, video_id, f4m_id='hds', fatal=False))
+                            formats.extend(self._extract_mpd_formats(
+                                href, video_id, mpd_id='dash', fatal=False))
                         elif mime_type == 'application/vnd.ms-sstr+xml':
                             formats.extend(self._extract_ism_formats(
                                 href, video_id, ism_id='mss', fatal=False))
diff --git a/yt_dlp/extractor/bandcamp.py b/yt_dlp/extractor/bandcamp.py
index 6128de791b..61cbab5a7a 100644
--- a/yt_dlp/extractor/bandcamp.py
+++ b/yt_dlp/extractor/bandcamp.py
@@ -41,7 +41,7 @@ class BandcampIE(InfoExtractor):
             'uploader_id': 'youtube-dl',
             'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg',
         },
-        '_skip': 'There is a limit of 200 free downloads / month for the test song',
+        'skip': 'There is a limit of 200 free downloads / month for the test song',
     }, {
         # free download
         'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py
index 56d74764fd..2526f25dac 100644
--- a/yt_dlp/extractor/brightcove.py
+++ b/yt_dlp/extractor/brightcove.py
@@ -18,6 +18,7 @@
     fix_xml_ampersands,
     float_or_none,
     int_or_none,
+    join_nonempty,
     js_to_json,
     mimetype2ext,
     parse_iso8601,
@@ -538,12 +539,7 @@ def _parse_brightcove_metadata(self, json_data, video_id, headers={}):
                     })
 
                 def build_format_id(kind):
-                    format_id = kind
-                    if tbr:
-                        format_id += f'-{int(tbr)}k'
-                    if height:
-                        format_id += f'-{height}p'
-                    return format_id
+                    return join_nonempty(kind, tbr and f'{int(tbr)}k', height and f'{height}p')
 
                 if src or streaming_src:
                     f.update({
diff --git a/yt_dlp/extractor/cbsnews.py b/yt_dlp/extractor/cbsnews.py
index 5a8ebb8476..972e111190 100644
--- a/yt_dlp/extractor/cbsnews.py
+++ b/yt_dlp/extractor/cbsnews.py
@@ -1,6 +1,5 @@
 import base64
 import re
-import urllib.error
 import urllib.parse
 import zlib
 
diff --git a/yt_dlp/extractor/dailymail.py b/yt_dlp/extractor/dailymail.py
index 516df1b5cf..540676ac0f 100644
--- a/yt_dlp/extractor/dailymail.py
+++ b/yt_dlp/extractor/dailymail.py
@@ -2,6 +2,7 @@
 from ..utils import (
     determine_protocol,
     int_or_none,
+    join_nonempty,
     try_get,
     unescapeHTML,
 )
@@ -52,7 +53,7 @@ def _real_extract(self, url):
             is_hls = container == 'M2TS'
             protocol = 'm3u8_native' if is_hls else determine_protocol({'url': rendition_url})
             formats.append({
-                'format_id': ('hls' if is_hls else protocol) + (f'-{tbr}' if tbr else ''),
+                'format_id': join_nonempty('hls' if is_hls else protocol, tbr),
                 'url': rendition_url,
                 'width': int_or_none(rendition.get('frameWidth')),
                 'height': int_or_none(rendition.get('frameHeight')),
diff --git a/yt_dlp/extractor/eitb.py b/yt_dlp/extractor/eitb.py
index 3e27507965..18b802eb15 100644
--- a/yt_dlp/extractor/eitb.py
+++ b/yt_dlp/extractor/eitb.py
@@ -1,6 +1,11 @@
 from .common import InfoExtractor
 from ..networking import Request
-from ..utils import float_or_none, int_or_none, parse_iso8601
+from ..utils import (
+    float_or_none,
+    int_or_none,
+    join_nonempty,
+    parse_iso8601,
+)
 
 
 class EitbIE(InfoExtractor):
@@ -37,12 +42,9 @@ def _real_extract(self, url):
             if not video_url:
                 continue
             tbr = float_or_none(rendition.get('ENCODING_RATE'), 1000)
-            format_id = 'http'
-            if tbr:
-                format_id += f'-{int(tbr)}'
             formats.append({
                 'url': rendition['PMD_URL'],
-                'format_id': format_id,
+                'format_id': join_nonempty('http', int_or_none(tbr)),
                 'width': int_or_none(rendition.get('FRAME_WIDTH')),
                 'height': int_or_none(rendition.get('FRAME_HEIGHT')),
                 'tbr': tbr,
diff --git a/yt_dlp/extractor/eporner.py b/yt_dlp/extractor/eporner.py
index e0b6ca96fb..2d25c6b4df 100644
--- a/yt_dlp/extractor/eporner.py
+++ b/yt_dlp/extractor/eporner.py
@@ -29,9 +29,6 @@ class EpornerIE(InfoExtractor):
             'view_count': int,
             'age_limit': 18,
         },
-        'params': {
-            'proxy': '127.0.0.1:8118',
-        },
     }, {
         # New (May 2016) URL layout
         'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/',
diff --git a/yt_dlp/extractor/go.py b/yt_dlp/extractor/go.py
index bbb23ffc0a..83c1979db8 100644
--- a/yt_dlp/extractor/go.py
+++ b/yt_dlp/extractor/go.py
@@ -5,6 +5,7 @@
     ExtractorError,
     determine_ext,
     int_or_none,
+    join_nonempty,
     parse_age_limit,
     remove_end,
     remove_start,
@@ -287,7 +288,7 @@ def _real_extract(self, url):
                     if mobj:
                         height = int(mobj.group(2))
                         f.update({
-                            'format_id': (f'{format_id}-' if format_id else '') + f'{height}P',
+                            'format_id': join_nonempty(format_id, f'{height}P'),
                             'width': int(mobj.group(1)),
                             'height': height,
                         })
diff --git a/yt_dlp/extractor/hbo.py b/yt_dlp/extractor/hbo.py
index 146d8a23d8..34cff458d8 100644
--- a/yt_dlp/extractor/hbo.py
+++ b/yt_dlp/extractor/hbo.py
@@ -3,6 +3,7 @@
 from .common import InfoExtractor
 from ..utils import (
     int_or_none,
+    join_nonempty,
     parse_duration,
     urljoin,
     xpath_element,
@@ -69,7 +70,7 @@ def _extract_info(self, url, display_id):
                 height = format_info.get('height')
                 fmt = {
                     'url': path,
-                    'format_id': 'http{}'.format(f'-{height}p' if height else ''),
+                    'format_id': join_nonempty('http'. height and f'{height}p'),
                     'width': format_info.get('width'),
                     'height': height,
                 }
diff --git a/yt_dlp/extractor/hketv.py b/yt_dlp/extractor/hketv.py
index bffb6afe02..3998abc121 100644
--- a/yt_dlp/extractor/hketv.py
+++ b/yt_dlp/extractor/hketv.py
@@ -44,9 +44,6 @@ class HKETVIE(InfoExtractor):
             'duration': 907,
             'subtitles': {},
         },
-        'params': {
-            'geo_verification_proxy': '<HK proxy here>',
-        },
         'skip': 'Geo restricted to HK',
     }]
 
diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py
index c8bf7e9c4a..754f710ae2 100644
--- a/yt_dlp/extractor/instagram.py
+++ b/yt_dlp/extractor/instagram.py
@@ -453,7 +453,7 @@ def _real_extract(self, url):
             else:
                 self.report_warning('Main webpage is locked behind the login page. Retrying with embed webpage (some metadata might be missing).')
                 webpage = self._download_webpage(
-                    f'{url}/embed/', video_id, note='Downloading embed webpage', fatal=False)
+                    f'{url}/embed/', video_id, note='Downloading embed webpage', fatal=False) or ''
                 additional_data = self._search_json(
                     r'window\.__additionalDataLoaded\s*\(\s*[^,]+,', webpage, 'additional data', video_id, fatal=False)
                 if not additional_data and not media:
diff --git a/yt_dlp/extractor/iwara.py b/yt_dlp/extractor/iwara.py
index df2088f9e7..5b5c367ad8 100644
--- a/yt_dlp/extractor/iwara.py
+++ b/yt_dlp/extractor/iwara.py
@@ -2,7 +2,6 @@
 import hashlib
 import json
 import time
-import urllib.error
 import urllib.parse
 
 from .common import InfoExtractor
diff --git a/yt_dlp/extractor/nbc.py b/yt_dlp/extractor/nbc.py
index 1da2cad3d4..8f6fb22b17 100644
--- a/yt_dlp/extractor/nbc.py
+++ b/yt_dlp/extractor/nbc.py
@@ -16,6 +16,7 @@
     determine_ext,
     float_or_none,
     int_or_none,
+    join_nonempty,
     mimetype2ext,
     parse_age_limit,
     parse_duration,
@@ -498,10 +499,8 @@ def _real_extract(self, url):
                     m3u8_id=format_id, fatal=False))
                 continue
             tbr = int_or_none(va.get('bitrate'), 1000)
-            if tbr:
-                format_id += f'-{tbr}'
             formats.append({
-                'format_id': format_id,
+                'format_id': join_nonempty(format_id, tbr),
                 'url': public_url,
                 'width': int_or_none(va.get('width')),
                 'height': int_or_none(va.get('height')),
diff --git a/yt_dlp/extractor/nhl.py b/yt_dlp/extractor/nhl.py
index 83dd480cfa..ca47a81211 100644
--- a/yt_dlp/extractor/nhl.py
+++ b/yt_dlp/extractor/nhl.py
@@ -2,6 +2,7 @@
 from ..utils import (
     determine_ext,
     int_or_none,
+    join_nonempty,
     parse_duration,
     parse_iso8601,
 )
@@ -41,7 +42,7 @@ def _real_extract(self, url):
             else:
                 height = int_or_none(playback.get('height'))
                 formats.append({
-                    'format_id': playback.get('name', 'http' + (f'-{height}p' if height else '')),
+                    'format_id': playback.get('name') or join_nonempty('http', height and f'{height}p'),
                     'url': playback_url,
                     'width': int_or_none(playback.get('width')),
                     'height': height,
diff --git a/yt_dlp/extractor/peloton.py b/yt_dlp/extractor/peloton.py
index c0443e9c91..5999d4a6a0 100644
--- a/yt_dlp/extractor/peloton.py
+++ b/yt_dlp/extractor/peloton.py
@@ -41,7 +41,7 @@ class PelotonIE(InfoExtractor):
         }, 'params': {
             'skip_download': 'm3u8',
         },
-        '_skip': 'Account needed',
+        'skip': 'Account needed',
     }, {
         'url': 'https://members.onepeloton.com/classes/player/26603d53d6bb4de1b340514864a6a6a8',
         'info_dict': {
@@ -61,7 +61,7 @@ class PelotonIE(InfoExtractor):
         }, 'params': {
             'skip_download': 'm3u8',
         },
-        '_skip': 'Account needed',
+        'skip': 'Account needed',
     }]
 
     _MANIFEST_URL_TEMPLATE = '%s?hdnea=%s'
@@ -199,7 +199,7 @@ class PelotonLiveIE(InfoExtractor):
         'params': {
             'skip_download': 'm3u8',
         },
-        '_skip': 'Account needed',
+        'skip': 'Account needed',
     }
 
     def _real_extract(self, url):
diff --git a/yt_dlp/extractor/performgroup.py b/yt_dlp/extractor/performgroup.py
index c0d5575912..df726c975b 100644
--- a/yt_dlp/extractor/performgroup.py
+++ b/yt_dlp/extractor/performgroup.py
@@ -1,5 +1,5 @@
 from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import int_or_none, join_nonempty
 
 
 class PerformGroupIE(InfoExtractor):
@@ -50,11 +50,8 @@ def _real_extract(self, url):
             if not c_url:
                 continue
             tbr = int_or_none(c.get('bitrate'), 1000)
-            format_id = 'http'
-            if tbr:
-                format_id += f'-{tbr}'
             formats.append({
-                'format_id': format_id,
+                'format_id': join_nonempty('http', tbr),
                 'url': c_url,
                 'tbr': tbr,
                 'width': int_or_none(c.get('width')),
diff --git a/yt_dlp/extractor/pr0gramm.py b/yt_dlp/extractor/pr0gramm.py
index f2c4e12e66..b0d6475fe4 100644
--- a/yt_dlp/extractor/pr0gramm.py
+++ b/yt_dlp/extractor/pr0gramm.py
@@ -1,9 +1,9 @@
 import datetime as dt
+import functools
 import json
 import urllib.parse
 
 from .common import InfoExtractor
-from ..compat import functools
 from ..utils import (
     ExtractorError,
     float_or_none,
diff --git a/yt_dlp/extractor/prosiebensat1.py b/yt_dlp/extractor/prosiebensat1.py
index 6a3e0971cd..e8a4712051 100644
--- a/yt_dlp/extractor/prosiebensat1.py
+++ b/yt_dlp/extractor/prosiebensat1.py
@@ -7,6 +7,7 @@
     determine_ext,
     float_or_none,
     int_or_none,
+    join_nonempty,
     merge_dicts,
     unified_strdate,
 )
@@ -147,13 +148,13 @@ def fix_bitrate(bitrate):
                                 'page_url': 'http://www.prosieben.de',
                                 'tbr': tbr,
                                 'ext': 'flv',
-                                'format_id': 'rtmp{}'.format(f'-{tbr}' if tbr else ''),
+                                'format_id': join_nonempty('rtmp', tbr),
                             })
                         else:
                             formats.append({
                                 'url': source_url,
                                 'tbr': tbr,
-                                'format_id': 'http{}'.format(f'-{tbr}' if tbr else ''),
+                                'format_id': join_nonempty('http', tbr),
                             })
 
         return {
diff --git a/yt_dlp/extractor/tubetugraz.py b/yt_dlp/extractor/tubetugraz.py
index e13375f0a1..d5dbf007b1 100644
--- a/yt_dlp/extractor/tubetugraz.py
+++ b/yt_dlp/extractor/tubetugraz.py
@@ -21,7 +21,7 @@ def _perform_login(self, username, password):
         if not urlh:
             return
 
-        content, urlh = self._download_webpage_handle(
+        response = self._download_webpage_handle(
             urlh.url, None, fatal=False, headers={'referer': urlh.url},
             note='logging in', errnote='unable to log in',
             data=urlencode_postdata({
@@ -30,7 +30,11 @@ def _perform_login(self, username, password):
                 'j_username': username,
                 'j_password': password,
             }))
-        if not urlh or urlh.url == 'https://tube.tugraz.at/paella/ui/index.html':
+        if not response:
+            return
+
+        content, urlh = response
+        if urlh.url == 'https://tube.tugraz.at/paella/ui/index.html':
             return
 
         if not self._html_search_regex(
@@ -39,7 +43,7 @@ def _perform_login(self, username, password):
             self.report_warning('unable to login: incorrect password')
             return
 
-        content, urlh = self._download_webpage_handle(
+        urlh = self._request_webpage(
             urlh.url, None, fatal=False, headers={'referer': urlh.url},
             note='logging in with TFA', errnote='unable to log in with TFA',
             data=urlencode_postdata({
diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py
index 4ed48ec5ac..d056797f3c 100644
--- a/yt_dlp/extractor/twitter.py
+++ b/yt_dlp/extractor/twitter.py
@@ -14,6 +14,7 @@
     float_or_none,
     format_field,
     int_or_none,
+    join_nonempty,
     make_archive_id,
     remove_end,
     str_or_none,
@@ -107,7 +108,7 @@ def _extract_variant_formats(self, variant, video_id):
             tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
             f = {
                 'url': variant_url,
-                'format_id': 'http' + (f'-{tbr}' if tbr else ''),
+                'format_id': join_nonempty('http', tbr),
                 'tbr': tbr,
             }
             self._search_dimensions_in_video_url(f, variant_url)
diff --git a/yt_dlp/extractor/viewlift.py b/yt_dlp/extractor/viewlift.py
index 7ac094f2f0..4a7ba9839e 100644
--- a/yt_dlp/extractor/viewlift.py
+++ b/yt_dlp/extractor/viewlift.py
@@ -5,6 +5,7 @@
 from ..utils import (
     ExtractorError,
     int_or_none,
+    join_nonempty,
     parse_age_limit,
     traverse_obj,
 )
@@ -120,7 +121,7 @@ def _real_extract(self, url):
                 'height', default=None))
             formats.append({
                 'url': video_asset_url,
-                'format_id': 'http{}'.format(f'-{bitrate}' if bitrate else ''),
+                'format_id': join_nonempty('http', bitrate),
                 'tbr': bitrate,
                 'height': height,
                 'vcodec': video_asset.get('codec'),
diff --git a/yt_dlp/extractor/weibo.py b/yt_dlp/extractor/weibo.py
index 3e82909825..b5c0e926f8 100644
--- a/yt_dlp/extractor/weibo.py
+++ b/yt_dlp/extractor/weibo.py
@@ -52,6 +52,7 @@ def _update_visitor_cookies(self, visitor_url, video_id):
             })
 
     def _weibo_download_json(self, url, video_id, *args, fatal=True, note='Downloading JSON metadata', **kwargs):
+        # XXX: Always fatal; _download_webpage_handle only returns False (not a tuple) on error
         webpage, urlh = self._download_webpage_handle(url, video_id, *args, fatal=fatal, note=note, **kwargs)
         if urllib.parse.urlparse(urlh.url).netloc == 'passport.weibo.com':
             self._update_visitor_cookies(urlh.url, video_id)
diff --git a/yt_dlp/extractor/wsj.py b/yt_dlp/extractor/wsj.py
index 7b3f6aa2ae..b6b656f7d3 100644
--- a/yt_dlp/extractor/wsj.py
+++ b/yt_dlp/extractor/wsj.py
@@ -2,6 +2,7 @@
 from ..utils import (
     float_or_none,
     int_or_none,
+    join_nonempty,
     unified_strdate,
 )
 
@@ -76,7 +77,7 @@ def _real_extract(self, url):
             tbr = int_or_none(v.get('bitrate'))
             formats.append({
                 'url': mp4_url,
-                'format_id': 'http' + (f'-{tbr}' if tbr else ''),
+                'format_id': join_nonempty('http', tbr),
                 'tbr': tbr,
                 'width': int_or_none(v.get('width')),
                 'height': int_or_none(v.get('height')),
diff --git a/yt_dlp/extractor/yahoo.py b/yt_dlp/extractor/yahoo.py
index f0ba830380..35e71209c5 100644
--- a/yt_dlp/extractor/yahoo.py
+++ b/yt_dlp/extractor/yahoo.py
@@ -8,6 +8,7 @@
     ExtractorError,
     clean_html,
     int_or_none,
+    join_nonempty,
     mimetype2ext,
     parse_iso8601,
     traverse_obj,
@@ -213,7 +214,7 @@ def _extract_yahoo_video(self, video_id, country):
                 tbr = int_or_none(s.get('bitrate'))
                 formats.append({
                     'url': s_url,
-                    'format_id': fmt + (f'-{tbr}' if tbr else ''),
+                    'format_id': join_nonempty(fmt, tbr),
                     'width': int_or_none(s.get('width')),
                     'height': int_or_none(s.get('height')),
                     'tbr': tbr,
@@ -371,12 +372,13 @@ def _extract_formats(self, json_data, content_id):
                         url, content_id, 'mp4', 'm3u8_native',
                         m3u8_id='hls', fatal=False))
             else:
+                bitrate = int_or_none(vid.get('bitrate'))
                 formats.append({
                     'url': url,
-                    'format_id': f'http-{vid.get("bitrate")}',
+                    'format_id': join_nonempty('http', bitrate),
                     'height': int_or_none(vid.get('height')),
                     'width': int_or_none(vid.get('width')),
-                    'tbr': int_or_none(vid.get('bitrate')),
+                    'tbr': bitrate,
                 })
         self._remove_duplicate_formats(formats)
 
diff --git a/yt_dlp/extractor/yandexdisk.py b/yt_dlp/extractor/yandexdisk.py
index 56aa792929..3214816701 100644
--- a/yt_dlp/extractor/yandexdisk.py
+++ b/yt_dlp/extractor/yandexdisk.py
@@ -5,6 +5,7 @@
     determine_ext,
     float_or_none,
     int_or_none,
+    join_nonempty,
     mimetype2ext,
     try_get,
     urljoin,
@@ -116,12 +117,9 @@ def call_api(action):
             else:
                 size = video.get('size') or {}
                 height = int_or_none(size.get('height'))
-                format_id = 'hls'
-                if height:
-                    format_id += f'-{height}p'
                 formats.append({
                     'ext': 'mp4',
-                    'format_id': format_id,
+                    'format_id': join_nonempty('hls', height and f'{height}p'),
                     'height': height,
                     'protocol': 'm3u8_native',
                     'url': format_url,
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index 094b1e9a36..18e0ee91c7 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -4,6 +4,7 @@
 import copy
 import datetime as dt
 import enum
+import functools
 import hashlib
 import itertools
 import json
@@ -20,7 +21,6 @@
 
 from .common import InfoExtractor, SearchInfoExtractor
 from .openload import PhantomJSwrapper
-from ..compat import functools
 from ..jsinterp import JSInterpreter
 from ..networking.exceptions import HTTPError, network_exceptions
 from ..utils import (
diff --git a/yt_dlp/networking/_websockets.py b/yt_dlp/networking/_websockets.py
index 2153080a34..15db4fe433 100644
--- a/yt_dlp/networking/_websockets.py
+++ b/yt_dlp/networking/_websockets.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import contextlib
+import functools
 import io
 import logging
 import ssl
@@ -22,7 +23,6 @@
     TransportError,
 )
 from .websocket import WebSocketRequestHandler, WebSocketResponse
-from ..compat import functools
 from ..dependencies import websockets
 from ..socks import ProxyError as SocksProxyError
 from ..utils import int_or_none
diff --git a/yt_dlp/options.py b/yt_dlp/options.py
index 3d4c076610..b97c516ceb 100644
--- a/yt_dlp/options.py
+++ b/yt_dlp/options.py
@@ -646,7 +646,7 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
             'You can also simply specify a field to match if the field is present, '
             'use "!field" to check if the field is not present, and "&" to check multiple conditions. '
             'Use a "\\" to escape "&" or quotes if needed. If used multiple times, '
-            'the filter matches if atleast one of the conditions are met. E.g. --match-filter '
+            'the filter matches if at least one of the conditions is met. E.g. --match-filter '
             '!is_live --match-filter "like_count>?100 & description~=\'(?i)\\bcats \\& dogs\\b\'" '
             'matches only videos that are not live OR those that have a like count more than 100 '
             '(or the like field is not available) and also has a description '
@@ -1479,7 +1479,7 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
             'Optionally, the KEYRING used for decrypting Chromium cookies on Linux, '
             'the name/path of the PROFILE to load cookies from, '
             'and the CONTAINER name (if Firefox) ("none" for no container) '
-            'can be given with their respective seperators. '
+            'can be given with their respective separators. '
             'By default, all containers of the most recently accessed profile are used. '
             f'Currently supported keyrings are: {", ".join(map(str.lower, sorted(SUPPORTED_KEYRINGS)))}'))
     filesystem.add_option(
@@ -1781,7 +1781,7 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
             'It can be one of "pre_process" (after video extraction), "after_filter" (after video passes filter), '
             '"video" (after --format; before --print/--output), "before_dl" (before each video download), '
             '"post_process" (after each video download; default), '
-            '"after_move" (after moving video file to it\'s final locations), '
+            '"after_move" (after moving video file to its final locations), '
             '"after_video" (after downloading and processing all formats of a video), '
             'or "playlist" (at end of playlist). '
             'This option can be used multiple times to add different postprocessors'))
diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py
index 1ed37af518..164c46d143 100644
--- a/yt_dlp/postprocessor/ffmpeg.py
+++ b/yt_dlp/postprocessor/ffmpeg.py
@@ -1,5 +1,6 @@
 import collections
 import contextvars
+import functools
 import itertools
 import json
 import os
@@ -8,7 +9,7 @@
 import time
 
 from .common import PostProcessor
-from ..compat import functools, imghdr
+from ..compat import imghdr
 from ..utils import (
     MEDIA_EXTENSIONS,
     ISO639Utils,

From 5ce582448ececb8d9c30c8c31f58330090ced03a Mon Sep 17 00:00:00 2001
From: Simon Sawicki <contact@grub4k.xyz>
Date: Tue, 2 Jul 2024 00:52:50 +0200
Subject: [PATCH 47/48] [core] Disallow unsafe extensions (CVE-2024-38519)

Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j

Authored by: Grub4K
---
 README.md                          |   8 ++
 devscripts/changelog_override.json |   5 ++
 test/test_utils.py                 |  31 ++++++++
 yt_dlp/YoutubeDL.py                |  23 +++++-
 yt_dlp/__init__.py                 |   8 ++
 yt_dlp/options.py                  |   2 +-
 yt_dlp/utils/_utils.py             | 114 +++++++++++++++++++++++++++--
 7 files changed, 179 insertions(+), 12 deletions(-)

diff --git a/README.md b/README.md
index f265c8b558..d1fd6e4f06 100644
--- a/README.md
+++ b/README.md
@@ -2229,6 +2229,14 @@ ### Differences in default behavior
 * `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler,manifest-filesize-approx`
 * `--compat-options 2023`: Currently does nothing. Use this to enable all future compat options
 
+The following compat options restore vulnerable behavior from before security patches:
+
+* `--compat-options allow-unsafe-ext`: Allow files with any extension (including unsafe ones) to be downloaded ([GHSA-79w7-vh3h-8g4j](<https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j>))
+
+    > :warning: Only use if a valid file download is rejected because its extension is detected as uncommon
+    >
+    > **This option can enable remote code execution! Consider [opening an issue](<https://github.com/yt-dlp/yt-dlp/issues/new/choose>) instead!**
+
 ### Deprecated options
 
 These are all the deprecated options and the current alternative to achieve the same effect
diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json
index f7209f3bda..ced38a0ddf 100644
--- a/devscripts/changelog_override.json
+++ b/devscripts/changelog_override.json
@@ -175,5 +175,10 @@
         "when": "e6a22834df1776ec4e486526f6df2bf53cb7e06f",
         "short": "[ie/orf:on] Add `prefer_segments_playlist` extractor-arg (#10314)",
         "authors": ["seproDev"]
+    },
+    {
+        "action": "add",
+        "when": "6aaf96a3d6e7d0d426e97e11a2fcf52fda00e733",
+        "short": "[priority] Security: [[CVE-2024-10123](https://nvd.nist.gov/vuln/detail/CVE-2024-10123)] [Properly sanitize file-extension to prevent file system modification and RCE](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j)\n    - Unsafe extensions are now blocked from being downloaded"
     }
 ]
diff --git a/test/test_utils.py b/test/test_utils.py
index 251739686e..3ff1f8b556 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -130,6 +130,7 @@
     xpath_text,
     xpath_with_ns,
 )
+from yt_dlp.utils._utils import _UnsafeExtensionError
 from yt_dlp.utils.networking import (
     HTTPHeaderDict,
     escape_rfc3986,
@@ -281,6 +282,13 @@ def env(var):
         finally:
             os.environ['HOME'] = old_home or ''
 
+    _uncommon_extensions = [
+        ('exe', 'abc.exe.ext'),
+        ('de', 'abc.de.ext'),
+        ('../.mp4', None),
+        ('..\\.mp4', None),
+    ]
+
     def test_prepend_extension(self):
         self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext')
         self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext')
@@ -289,6 +297,19 @@ def test_prepend_extension(self):
         self.assertEqual(prepend_extension('.abc', 'temp'), '.abc.temp')
         self.assertEqual(prepend_extension('.abc.ext', 'temp'), '.abc.temp.ext')
 
+        # Test uncommon extensions
+        self.assertEqual(prepend_extension('abc.ext', 'bin'), 'abc.bin.ext')
+        for ext, result in self._uncommon_extensions:
+            with self.assertRaises(_UnsafeExtensionError):
+                prepend_extension('abc', ext)
+            if result:
+                self.assertEqual(prepend_extension('abc.ext', ext, 'ext'), result)
+            else:
+                with self.assertRaises(_UnsafeExtensionError):
+                    prepend_extension('abc.ext', ext, 'ext')
+            with self.assertRaises(_UnsafeExtensionError):
+                prepend_extension('abc.unexpected_ext', ext, 'ext')
+
     def test_replace_extension(self):
         self.assertEqual(replace_extension('abc.ext', 'temp'), 'abc.temp')
         self.assertEqual(replace_extension('abc.ext', 'temp', 'ext'), 'abc.temp')
@@ -297,6 +318,16 @@ def test_replace_extension(self):
         self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
         self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
 
+        # Test uncommon extensions
+        self.assertEqual(replace_extension('abc.ext', 'bin'), 'abc.unknown_video')
+        for ext, _ in self._uncommon_extensions:
+            with self.assertRaises(_UnsafeExtensionError):
+                replace_extension('abc', ext)
+            with self.assertRaises(_UnsafeExtensionError):
+                replace_extension('abc.ext', ext, 'ext')
+            with self.assertRaises(_UnsafeExtensionError):
+                replace_extension('abc.unexpected_ext', ext, 'ext')
+
     def test_subtitles_filename(self):
         self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt'), 'abc.en.vtt')
         self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt', 'ext'), 'abc.en.vtt')
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index a8d0342d53..e56c3ed3c9 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -159,7 +159,7 @@
     write_json_file,
     write_string,
 )
-from .utils._utils import _YDLLogger
+from .utils._utils import _UnsafeExtensionError, _YDLLogger
 from .utils.networking import (
     HTTPHeaderDict,
     clean_headers,
@@ -172,6 +172,20 @@
     import ctypes
 
 
+def _catch_unsafe_extension_error(func):
+    @functools.wraps(func)
+    def wrapper(self, *args, **kwargs):
+        try:
+            return func(self, *args, **kwargs)
+        except _UnsafeExtensionError as error:
+            self.report_error(
+                f'The extracted extension ({error.extension!r}) is unusual '
+                'and will be skipped for safety reasons. '
+                f'If you believe this is an error{bug_reports_message(",")}')
+
+    return wrapper
+
+
 class YoutubeDL:
     """YoutubeDL class.
 
@@ -454,8 +468,9 @@ class YoutubeDL:
                        Set the value to 'native' to use the native downloader
     compat_opts:       Compatibility options. See "Differences in default behavior".
                        The following options do not work when used through the API:
-                       filename, abort-on-error, multistreams, no-live-chat, format-sort
-                       no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
+                       filename, abort-on-error, multistreams, no-live-chat,
+                       format-sort, no-clean-infojson, no-playlist-metafiles,
+                       no-keep-subs, no-attach-info-json, allow-unsafe-ext.
                        Refer __init__.py for their implementation
     progress_template: Dictionary of templates for progress outputs.
                        Allowed keys are 'download', 'postprocess',
@@ -1400,6 +1415,7 @@ def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
         return self.escape_outtmpl(outtmpl) % info_dict
 
+    @_catch_unsafe_extension_error
     def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
         assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
         if outtmpl is None:
@@ -3192,6 +3208,7 @@ def existing_file(self, filepaths, *, default_overwrite=True):
             os.remove(file)
         return None
 
+    @_catch_unsafe_extension_error
     def process_info(self, info_dict):
         """Process a single resolved IE result. (Modifies it in-place)"""
 
diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py
index c18af75891..f88f15d70c 100644
--- a/yt_dlp/__init__.py
+++ b/yt_dlp/__init__.py
@@ -64,6 +64,7 @@
     write_string,
 )
 from .utils.networking import std_headers
+from .utils._utils import _UnsafeExtensionError
 from .YoutubeDL import YoutubeDL
 
 _IN_CLI = False
@@ -593,6 +594,13 @@ def report_deprecation(val, old, new=None):
     if opts.ap_username is not None and opts.ap_password is None:
         opts.ap_password = getpass.getpass('Type TV provider account password and press [Return]: ')
 
+    # compat option changes global state destructively; only allow from cli
+    if 'allow-unsafe-ext' in opts.compat_opts:
+        warnings.append(
+            'Using allow-unsafe-ext opens you up to potential attacks. '
+            'Use with great care!')
+        _UnsafeExtensionError.sanitize_extension = lambda x: x
+
     return warnings, deprecation_warnings
 
 
diff --git a/yt_dlp/options.py b/yt_dlp/options.py
index b97c516ceb..1b18575c18 100644
--- a/yt_dlp/options.py
+++ b/yt_dlp/options.py
@@ -474,7 +474,7 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
                 'no-attach-info-json', 'embed-thumbnail-atomicparsley', 'no-external-downloader-progress',
                 'embed-metadata', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi',
                 'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-youtube-prefer-utc-upload-date',
-                'prefer-legacy-http-handler', 'manifest-filesize-approx',
+                'prefer-legacy-http-handler', 'manifest-filesize-approx', 'allow-unsafe-ext',
             }, 'aliases': {
                 'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx'],
                 'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx'],
diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py
index 664675a099..b5e1e2950c 100644
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@@ -2085,17 +2085,20 @@ def parse_duration(s):
         (days, 86400), (hours, 3600), (mins, 60), (secs, 1), (ms, 1)))
 
 
-def prepend_extension(filename, ext, expected_real_ext=None):
+def _change_extension(prepend, filename, ext, expected_real_ext=None):
     name, real_ext = os.path.splitext(filename)
-    return (
-        f'{name}.{ext}{real_ext}'
-        if not expected_real_ext or real_ext[1:] == expected_real_ext
-        else f'{filename}.{ext}')
+
+    if not expected_real_ext or real_ext[1:] == expected_real_ext:
+        filename = name
+        if prepend and real_ext:
+            _UnsafeExtensionError.sanitize_extension(ext, prepend=True)
+            return f'{filename}.{ext}{real_ext}'
+
+    return f'{filename}.{_UnsafeExtensionError.sanitize_extension(ext)}'
 
 
-def replace_extension(filename, ext, expected_real_ext=None):
-    name, real_ext = os.path.splitext(filename)
-    return f'{name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename}.{ext}'
+prepend_extension = functools.partial(_change_extension, True)
+replace_extension = functools.partial(_change_extension, False)
 
 
 def check_executable(exe, args=[]):
@@ -5035,6 +5038,101 @@ def items_(self):
 KNOWN_EXTENSIONS = (*MEDIA_EXTENSIONS.video, *MEDIA_EXTENSIONS.audio, *MEDIA_EXTENSIONS.manifests)
 
 
+class _UnsafeExtensionError(Exception):
+    """
+    Mitigation exception for uncommon/malicious file extensions
+    This should be caught in YoutubeDL.py alongside a warning
+
+    Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j
+    """
+    ALLOWED_EXTENSIONS = frozenset([
+        # internal
+        'description',
+        'json',
+        'meta',
+        'orig',
+        'part',
+        'temp',
+        'uncut',
+        'unknown_video',
+        'ytdl',
+
+        # video
+        *MEDIA_EXTENSIONS.video,
+        'avif',
+        'ismv',
+        'm2ts',
+        'm4s',
+        'mng',
+        'mpeg',
+        'qt',
+        'swf',
+        'ts',
+        'vp9',
+        'wvm',
+
+        # audio
+        *MEDIA_EXTENSIONS.audio,
+        'isma',
+        'mid',
+        'mpga',
+        'ra',
+
+        # image
+        *MEDIA_EXTENSIONS.thumbnails,
+        'bmp',
+        'gif',
+        'heic',
+        'ico',
+        'jng',
+        'jpeg',
+        'jxl',
+        'svg',
+        'tif',
+        'wbmp',
+
+        # subtitle
+        *MEDIA_EXTENSIONS.subtitles,
+        'dfxp',
+        'fs',
+        'ismt',
+        'sami',
+        'scc',
+        'ssa',
+        'tt',
+        'ttml',
+
+        # others
+        *MEDIA_EXTENSIONS.manifests,
+        *MEDIA_EXTENSIONS.storyboards,
+        'desktop',
+        'ism',
+        'm3u',
+        'sbv',
+        'url',
+        'webloc',
+        'xml',
+    ])
+
+    def __init__(self, extension, /):
+        super().__init__(f'unsafe file extension: {extension!r}')
+        self.extension = extension
+
+    @classmethod
+    def sanitize_extension(cls, extension, /, *, prepend=False):
+        if '/' in extension or '\\' in extension:
+            raise cls(extension)
+
+        if not prepend:
+            _, _, last = extension.rpartition('.')
+            if last == 'bin':
+                extension = last = 'unknown_video'
+            if last.lower() not in cls.ALLOWED_EXTENSIONS:
+                raise cls(extension)
+
+        return extension
+
+
 class RetryManager:
     """Usage:
         for retry in RetryManager(...):

From cd68258225dc813c74fbda4c4fda0c736d6fda10 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Mon, 1 Jul 2024 23:01:05 +0000
Subject: [PATCH 48/48] Release 2024.07.01

Created by: Grub4K

:ci skip all :ci run dl
---
 CONTRIBUTORS      | 13 ++++++++
 Changelog.md      | 81 +++++++++++++++++++++++++++++++++++++++++++++++
 README.md         |  8 ++---
 supportedsites.md | 19 ++++++++---
 yt_dlp/version.py |  6 ++--
 5 files changed, 116 insertions(+), 11 deletions(-)

diff --git a/CONTRIBUTORS b/CONTRIBUTORS
index e0d1668ee2..a893572756 100644
--- a/CONTRIBUTORS
+++ b/CONTRIBUTORS
@@ -631,3 +631,16 @@ voidful
 vtexier
 WyohKnott
 trueauracoral
+ASertacAkkaya
+axpauls
+chilinux
+hafeoz
+JSubelj
+jucor
+megumintyan
+mgedmin
+Niluge-KiWi
+peisenwang
+TheZ3ro
+tippfehlr
+varunchopra
diff --git a/Changelog.md b/Changelog.md
index 267330208e..3dbbc210c9 100644
--- a/Changelog.md
+++ b/Changelog.md
@@ -4,6 +4,87 @@ # Changelog
 # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
 -->
 
+### 2024.07.01
+
+#### Important changes
+- Security: [[CVE-2024-10123](https://nvd.nist.gov/vuln/detail/CVE-2024-10123)] [Properly sanitize file-extension to prevent file system modification and RCE](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j)
+    - Unsafe extensions are now blocked from being downloaded
+
+#### Core changes
+- [Add `playlist_channel` and `playlist_channel_id` fields](https://github.com/yt-dlp/yt-dlp/commit/55e3e6fd21e741ec5ae3d8624de5e5ea345810eb) ([#10266](https://github.com/yt-dlp/yt-dlp/issues/10266)) by [bashonly](https://github.com/bashonly)
+- [Disallow unsafe extensions (CVE-2024-38519)](https://github.com/yt-dlp/yt-dlp/commit/5ce582448ececb8d9c30c8c31f58330090ced03a) by [Grub4K](https://github.com/Grub4K)
+- **cookies**: [Fix `--cookies-from-browser` DE detection on Linux](https://github.com/yt-dlp/yt-dlp/commit/a8520244b8642880e4d35925e9e49eff94d548de) ([#10237](https://github.com/yt-dlp/yt-dlp/issues/10237)) by [peisenwang](https://github.com/peisenwang)
+
+#### Extractor changes
+- **afreecatv**
+    - [Support browser impersonation](https://github.com/yt-dlp/yt-dlp/commit/e8352ad6599de7b5371dc39a1a1edc7890aaedb4) ([#10174](https://github.com/yt-dlp/yt-dlp/issues/10174)) by [hui1601](https://github.com/hui1601)
+    - catchstory: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/054a3ba7d1293f9fbe21800d62d1e5ddcbded238) ([#10235](https://github.com/yt-dlp/yt-dlp/issues/10235)) by [hui1601](https://github.com/hui1601)
+- **bilibili**: [Support legacy formats](https://github.com/yt-dlp/yt-dlp/commit/1d6ab17d0752ee9cf19e3e63c7dec7b600d3f228) ([#9117](https://github.com/yt-dlp/yt-dlp/issues/9117)) by [c-basalt](https://github.com/c-basalt), [GD-Slime](https://github.com/GD-Slime)
+- **bitchute**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/5b1a2aa978d0074cee278e7659f32f52ecc4ab53) ([#10301](https://github.com/yt-dlp/yt-dlp/issues/10301)) by [seproDev](https://github.com/seproDev)
+- **brightcove**: [Upgrade requests to HTTPS](https://github.com/yt-dlp/yt-dlp/commit/90c3721a322756bb7f4ca10ceb73744500bee37e) ([#10202](https://github.com/yt-dlp/yt-dlp/issues/10202)) by [bashonly](https://github.com/bashonly)
+- **cloudflarestream**: [Fix `_VALID_URL` and embed extraction](https://github.com/yt-dlp/yt-dlp/commit/7aa322c02cec54eb77154a89da7e400194f0bd03) ([#10215](https://github.com/yt-dlp/yt-dlp/issues/10215)) by [bashonly](https://github.com/bashonly)
+- **cloudycdn**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/b758877afa225747fba81c8a580e27583a231734) ([#10271](https://github.com/yt-dlp/yt-dlp/issues/10271)) by [Caesim404](https://github.com/Caesim404)
+- **digitalconcerthall**: [Rework extractor](https://github.com/yt-dlp/yt-dlp/commit/2a4f2e82dbeeb0c9130883c83dac689d5260c871) ([#10152](https://github.com/yt-dlp/yt-dlp/issues/10152)) by [seproDev](https://github.com/seproDev), [tippfehlr](https://github.com/tippfehlr)
+- **facebook**: reel: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/8ca1d57ed08d00efa117820a5a82f763b20e2d1d) ([#10232](https://github.com/yt-dlp/yt-dlp/issues/10232)) by [bashonly](https://github.com/bashonly)
+- **francetv**
+    - [Detect and raise errors for DRM](https://github.com/yt-dlp/yt-dlp/commit/3690c2f59827c79a1bbe388a7c1ae75db7477db2) ([#10165](https://github.com/yt-dlp/yt-dlp/issues/10165)) by [bashonly](https://github.com/bashonly)
+    - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/081708d6074dfbb907e25af61ba530bba0d4b31d) ([#10177](https://github.com/yt-dlp/yt-dlp/issues/10177)) by [bashonly](https://github.com/bashonly)
+- **generic**: [Add `key_query` extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/5dbac313ae4e3e8521dfe2e1a6a048a98ff4b4fe) by [bashonly](https://github.com/bashonly)
+- **graspop**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/1d369b4096d79233e0ac2c93762746a64d7a69c8) ([#10268](https://github.com/yt-dlp/yt-dlp/issues/10268)) by [Niluge-KiWi](https://github.com/Niluge-KiWi)
+- **jiocinema**: series: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/61714f46956f61612032bba857aed7ad1387eccd) ([#10139](https://github.com/yt-dlp/yt-dlp/issues/10139)) by [varunchopra](https://github.com/varunchopra)
+- **khanacademy**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/4093eb1fcc29a0e2aea9adfcba479787d9ae0c0c) ([#9136](https://github.com/yt-dlp/yt-dlp/issues/9136)) by [c-basalt](https://github.com/c-basalt)
+- **laracasts**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/b8da8a98f897599095d4ef1644b8c5fd39921118) ([#10055](https://github.com/yt-dlp/yt-dlp/issues/10055)) by [ASertacAkkaya](https://github.com/ASertacAkkaya), [seproDev](https://github.com/seproDev)
+- **matchtv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/f3411af12e209bc5624e1ac31271b8aabe2d3c90) ([#10190](https://github.com/yt-dlp/yt-dlp/issues/10190)) by [megumintyan](https://github.com/megumintyan)
+- **mediasite**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/0953209a857c51648aee89d205c086b0e1dd3864) ([#10273](https://github.com/yt-dlp/yt-dlp/issues/10273)) by [bashonly](https://github.com/bashonly)
+- **microsoftembed**: [Add extractors for dev materials](https://github.com/yt-dlp/yt-dlp/commit/9200bc70c94546b2191bb6fbfc9cea98a919cc56) ([#9177](https://github.com/yt-dlp/yt-dlp/issues/9177)) by [c-basalt](https://github.com/c-basalt)
+- **mlbtv**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/61edf57f8f13f6dfd81154174e647eb5fdd26089) ([#10296](https://github.com/yt-dlp/yt-dlp/issues/10296)) by [bashonly](https://github.com/bashonly)
+- **neteasemusic**: [Extract more formats from new API](https://github.com/yt-dlp/yt-dlp/commit/7a03f88c40b80d3cf54f68edd9d4bdd6aa527570) ([#10258](https://github.com/yt-dlp/yt-dlp/issues/10258)) by [hafeoz](https://github.com/hafeoz)
+- **nhkradiru**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/b8e2a5e0e1030076f833917906e19bb6c7b318f6) ([#10106](https://github.com/yt-dlp/yt-dlp/issues/10106)) by [garret1317](https://github.com/garret1317)
+- **nuum**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/aefede25561a06cba398d4f593eee2fbe942693b) ([#10316](https://github.com/yt-dlp/yt-dlp/issues/10316)) by [DmitryScaletta](https://github.com/DmitryScaletta)
+- **orf**
+    - on
+        - [Add `prefer_segments_playlist` extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/e6a22834df1776ec4e486526f6df2bf53cb7e06f) ([#10314](https://github.com/yt-dlp/yt-dlp/issues/10314)) by [seproDev](https://github.com/seproDev)
+        - [Support segmented episodes](https://github.com/yt-dlp/yt-dlp/commit/8b46ad4d8b8ee8c5472af0cde863baa89ca3f425) ([#10053](https://github.com/yt-dlp/yt-dlp/issues/10053)) by [seproDev](https://github.com/seproDev)
+- **patreoncampaign**: [Fix `campaign_id` extraction](https://github.com/yt-dlp/yt-dlp/commit/2e5a47da400b645aadbda6afd1156bd89c744f48) ([#10070](https://github.com/yt-dlp/yt-dlp/issues/10070)) by [bashonly](https://github.com/bashonly)
+- **podbayfm**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/d4b52ce3fcb8d9578ed12365648eaba8718c603e) ([#10195](https://github.com/yt-dlp/yt-dlp/issues/10195)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev)
+- **pokergo**: [Make metadata extraction non-fatal](https://github.com/yt-dlp/yt-dlp/commit/36e8dd832579b5375a0f6626af4268b86b4eb21a) ([#10319](https://github.com/yt-dlp/yt-dlp/issues/10319)) by [axpauls](https://github.com/axpauls)
+- **qqmusic**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/4f5d7be3c5590bb257d8ff521572aee9839ab754) ([#9768](https://github.com/yt-dlp/yt-dlp/issues/9768)) by [c-basalt](https://github.com/c-basalt)
+- **rtvslo.si**: show: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/92a1c4abaeeba9a69d611c57b73555cb1a1f00ad) ([#8418](https://github.com/yt-dlp/yt-dlp/issues/8418)) by [JSubelj](https://github.com/JSubelj), [seproDev](https://github.com/seproDev)
+- **soundcloud**: [Fix `download` format extraction](https://github.com/yt-dlp/yt-dlp/commit/e53e56b73543799638fa6abb0c78f8b091aa84e1) ([#10125](https://github.com/yt-dlp/yt-dlp/issues/10125)) by [bashonly](https://github.com/bashonly)
+- **sproutvideo**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/d6c2c2bc84f1434255be5c73baeb17d893d2c0d4) ([#10098](https://github.com/yt-dlp/yt-dlp/issues/10098)) by [bashonly](https://github.com/bashonly), [TheZ3ro](https://github.com/TheZ3ro)
+- **tiktok**
+    - [Detect and raise when login is required](https://github.com/yt-dlp/yt-dlp/commit/ea88129784fcbb6987161df9ba05909325d8e2e9) ([#10124](https://github.com/yt-dlp/yt-dlp/issues/10124)) by [bashonly](https://github.com/bashonly)
+    - [Fix API extraction](https://github.com/yt-dlp/yt-dlp/commit/96472d72f29550c25c5dcedcde02c38c192b0011) ([#10216](https://github.com/yt-dlp/yt-dlp/issues/10216)) by [bashonly](https://github.com/bashonly)
+- **tubitv**
+    - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/bef9a9e5361fd7a72e21d0f1a8c8afb70d89e8c5) ([#9975](https://github.com/yt-dlp/yt-dlp/issues/9975)) by [chilinux](https://github.com/chilinux)
+    - series: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/d7d861811c15585a4f7ec9d5ae68d2ac28de28a0) ([#10116](https://github.com/yt-dlp/yt-dlp/issues/10116)) by [bashonly](https://github.com/bashonly)
+- **vimeo**: [Support browser impersonation](https://github.com/yt-dlp/yt-dlp/commit/d4b99a233314bf31f9c842035ea9884673d5313a) ([#10327](https://github.com/yt-dlp/yt-dlp/issues/10327)) by [bashonly](https://github.com/bashonly)
+- **youtube**
+    - [Extract all formats from multi-language m3u8s](https://github.com/yt-dlp/yt-dlp/commit/9bd85019931927a99b0fe0dc58ac51acca9fbe72) ([#9875](https://github.com/yt-dlp/yt-dlp/issues/9875)) by [bashonly](https://github.com/bashonly), [clienthax](https://github.com/clienthax)
+    - [Skip formats if nsig decoding fails](https://github.com/yt-dlp/yt-dlp/commit/800ec085ccf98420584d8bb38c20a2c079669b09) ([#10223](https://github.com/yt-dlp/yt-dlp/issues/10223)) by [bashonly](https://github.com/bashonly)
+    - [Suppress "Unavailable videos are hidden" warning](https://github.com/yt-dlp/yt-dlp/commit/24f3097ea9a470a984d0454dc013cafa2325f5f8) ([#10159](https://github.com/yt-dlp/yt-dlp/issues/10159)) by [mgedmin](https://github.com/mgedmin)
+    - tab: [Fix channel metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/a0d9967f6822fc279e86bce33464194985148727) ([#10071](https://github.com/yt-dlp/yt-dlp/issues/10071)) by [bashonly](https://github.com/bashonly), [shoxie007](https://github.com/shoxie007)
+
+#### Downloader changes
+- **hls**: [Apply `extra_param_to_key_url` from info dict](https://github.com/yt-dlp/yt-dlp/commit/ca8885edd93bdf8912af6c22ee335b6222cb9ba9) by [bashonly](https://github.com/bashonly)
+
+#### Postprocessor changes
+- **embedthumbnail**: [Fix postprocessor](https://github.com/yt-dlp/yt-dlp/commit/f2a4ea1794718e4dc0148bc172cb877f1080903b) ([#10248](https://github.com/yt-dlp/yt-dlp/issues/10248)) by [Grub4K](https://github.com/Grub4K)
+
+#### Networking changes
+- **Request Handler**: requests: [Bump minimum `requests` version to 2.32.2](https://github.com/yt-dlp/yt-dlp/commit/db50f19d76c6870a5a13d0cab9287d684fd7449a) ([#10079](https://github.com/yt-dlp/yt-dlp/issues/10079)) by [bashonly](https://github.com/bashonly)
+
+#### Misc. changes
+- **build**
+    - [Bump Pyinstaller to `>=6.7.0` for all builds](https://github.com/yt-dlp/yt-dlp/commit/5fdd13006a1c5d78642c8d3c4c7df0448273c2ae) ([#10069](https://github.com/yt-dlp/yt-dlp/issues/10069)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev)
+    - [Cache dependencies for `macos` job](https://github.com/yt-dlp/yt-dlp/commit/46c1b7cfec1d0e6155083ca7e6948674c64ecb97) ([#10088](https://github.com/yt-dlp/yt-dlp/issues/10088)) by [bashonly](https://github.com/bashonly)
+    - [Use `macos-12` image for `yt-dlp_macos`](https://github.com/yt-dlp/yt-dlp/commit/03334d639d5282cd4107edb32c623ba400262fc4) ([#10063](https://github.com/yt-dlp/yt-dlp/issues/10063)) by [bashonly](https://github.com/bashonly)
+- **cleanup**
+    - [Add more ruff rules](https://github.com/yt-dlp/yt-dlp/commit/add96eb9f84cfffe85682bf2fb85135746994ee8) ([#10149](https://github.com/yt-dlp/yt-dlp/issues/10149)) by [seproDev](https://github.com/seproDev)
+    - [Bump ruff to 0.5.x](https://github.com/yt-dlp/yt-dlp/commit/7814c50948a2b9a4c746441ecbc509ae563d5d1f) ([#10282](https://github.com/yt-dlp/yt-dlp/issues/10282)) by [seproDev](https://github.com/seproDev)
+    - Miscellaneous: [6aaf96a](https://github.com/yt-dlp/yt-dlp/commit/6aaf96a3d6e7d0d426e97e11a2fcf52fda00e733) by [bashonly](https://github.com/bashonly), [c-basalt](https://github.com/c-basalt), [jucor](https://github.com/jucor), [seproDev](https://github.com/seproDev)
+- **test**: download: [Raise on network errors](https://github.com/yt-dlp/yt-dlp/commit/54a63e80af82791d2f0985bd0176bb182963fd5f) ([#10283](https://github.com/yt-dlp/yt-dlp/issues/10283)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev)
+
 ### 2024.05.27
 
 #### Extractor changes
diff --git a/README.md b/README.md
index d1fd6e4f06..e8aeb93f76 100644
--- a/README.md
+++ b/README.md
@@ -456,8 +456,8 @@ ## Video Selection:
                                     is not present, and "&" to check multiple
                                     conditions. Use a "\" to escape "&" or
                                     quotes if needed. If used multiple times,
-                                    the filter matches if atleast one of the
-                                    conditions are met. E.g. --match-filter
+                                    the filter matches if at least one of the
+                                    conditions is met. E.g. --match-filter
                                     !is_live --match-filter "like_count>?100 &
                                     description~='(?i)\bcats \& dogs\b'" matches
                                     only videos that are not live OR those that
@@ -674,7 +674,7 @@ ## Filesystem Options:
                                     PROFILE to load cookies from, and the
                                     CONTAINER name (if Firefox) ("none" for no
                                     container) can be given with their
-                                    respective seperators. By default, all
+                                    respective separators. By default, all
                                     containers of the most recently accessed
                                     profile are used. Currently supported
                                     keyrings are: basictext, gnomekeyring,
@@ -1036,7 +1036,7 @@ ## Post-Processing Options:
                                     --print/--output), "before_dl" (before each
                                     video download), "post_process" (after each
                                     video download; default), "after_move"
-                                    (after moving video file to it's final
+                                    (after moving video file to its final
                                     locations), "after_video" (after downloading
                                     and processing all formats of a video), or
                                     "playlist" (at end of playlist). This option
diff --git a/supportedsites.md b/supportedsites.md
index 3873956133..656366b4a9 100644
--- a/supportedsites.md
+++ b/supportedsites.md
@@ -46,6 +46,7 @@ # Supported sites
  - **aenetworks:show**
  - **AeonCo**
  - **afreecatv**: [*afreecatv*](## "netrc machine") afreecatv.com
+ - **afreecatv:catchstory**: [*afreecatv*](## "netrc machine") afreecatv.com catch story
  - **afreecatv:live**: [*afreecatv*](## "netrc machine") afreecatv.com livestreams
  - **afreecatv:user**
  - **AirTV**
@@ -542,6 +543,7 @@ # Supported sites
  - **Goshgay**
  - **GoToStage**
  - **GPUTechConf**
+ - **Graspop**
  - **Gronkh**
  - **gronkh:feed**
  - **gronkh:vods**
@@ -678,6 +680,8 @@ # Supported sites
  - **la7.it**
  - **la7.it:​pod:episode**
  - **la7.it:podcast**
+ - **laracasts**
+ - **laracasts:series**
  - **LastFM**
  - **LastFMPlaylist**
  - **LastFMUser**
@@ -775,7 +779,12 @@ # Supported sites
  - **MelonVOD**
  - **Metacritic**
  - **mewatch**
+ - **MicrosoftBuild**
  - **MicrosoftEmbed**
+ - **MicrosoftLearnEpisode**
+ - **MicrosoftLearnPlaylist**
+ - **MicrosoftLearnSession**
+ - **MicrosoftMedius**
  - **microsoftstream**: Microsoft Stream
  - **mildom**: Record ongoing live by specific user in Mildom
  - **mildom:clip**: Clip in Mildom
@@ -838,8 +847,6 @@ # Supported sites
  - **MusicdexArtist**
  - **MusicdexPlaylist**
  - **MusicdexSong**
- - **mva**: Microsoft Virtual Academy videos
- - **mva:course**: Microsoft Virtual Academy courses
  - **Mx3**
  - **Mx3Neo**
  - **Mx3Volksmusik**
@@ -1131,6 +1138,7 @@ # Supported sites
  - **QingTing**
  - **qqmusic**: QQ音乐
  - **qqmusic:album**: QQ音乐 - 专辑
+ - **qqmusic:mv**: QQ音乐 - MV
  - **qqmusic:playlist**: QQ音乐 - 歌单
  - **qqmusic:singer**: QQ音乐 - 歌手
  - **qqmusic:toplist**: QQ音乐 - 排行榜
@@ -1237,6 +1245,7 @@ # Supported sites
  - **rtve.es:television**
  - **RTVS**
  - **rtvslo.si**
+ - **rtvslo.si:show**
  - **RudoVideo**
  - **Rule34Video**
  - **Rumble**
@@ -1360,6 +1369,7 @@ # Supported sites
  - **SpreakerShowPage**
  - **SpringboardPlatform**
  - **Sprout**
+ - **SproutVideo**
  - **sr:mediathek**: Saarländischer Rundfunk (**Currently broken**)
  - **SRGSSR**
  - **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites
@@ -1494,8 +1504,8 @@ # Supported sites
  - **Tube8**: (**Currently broken**)
  - **TubeTuGraz**: [*tubetugraz*](## "netrc machine") tube.tugraz.at
  - **TubeTuGrazSeries**: [*tubetugraz*](## "netrc machine")
- - **TubiTv**: [*tubitv*](## "netrc machine")
- - **TubiTvShow**
+ - **tubitv**: [*tubitv*](## "netrc machine")
+ - **tubitv:series**
  - **Tumblr**: [*tumblr*](## "netrc machine")
  - **TuneInPodcast**
  - **TuneInPodcastEpisode**
@@ -1607,6 +1617,7 @@ # Supported sites
  - **VidioPremier**: [*vidio*](## "netrc machine")
  - **VidLii**
  - **Vidly**
+ - **vids.io**
  - **viewlift**
  - **viewlift:embed**
  - **Viidea**
diff --git a/yt_dlp/version.py b/yt_dlp/version.py
index a90b288c9a..6e8fd3ae85 100644
--- a/yt_dlp/version.py
+++ b/yt_dlp/version.py
@@ -1,8 +1,8 @@
 # Autogenerated by devscripts/update-version.py
 
-__version__ = '2024.05.27'
+__version__ = '2024.07.01'
 
-RELEASE_GIT_HEAD = '12b248ce60be1aa1362edd839d915bba70dbee4b'
+RELEASE_GIT_HEAD = '5ce582448ececb8d9c30c8c31f58330090ced03a'
 
 VARIANT = None
 
@@ -12,4 +12,4 @@
 
 ORIGIN = 'yt-dlp/yt-dlp'
 
-_pkg_version = '2024.05.27'
+_pkg_version = '2024.07.01'