Merge remote-tracking branch 'upstream/master'

2024-07-03 09:23:35 +00:00 · 2023-09-21 22:58:17 +02:00 · 2023-09-21 22:58:17 +02:00 · d8d31be98e
parent 86d98dab3b 2269065ad6
commit d8d31be98e
68 changed files with 2986 additions and 1077 deletions
--- a/.github/workflows/core.yml
+++ b/.github/workflows/core.yml
@ -13,13 +13,16 @@ jobs:
      matrix:
        os: [ubuntu-latest]
        # CPython 3.11 is in quick-test
-        python-version: ['3.8', '3.9', '3.10', pypy-3.7, pypy-3.8]
+        python-version: ['3.8', '3.9', '3.10', '3.12-dev', pypy-3.7, pypy-3.8, pypy-3.10]
        run-tests-ext: [sh]
        include:
        # atleast one of each CPython/PyPy tests must be in windows
        - os: windows-latest
          python-version: '3.7'
          run-tests-ext: bat
        - os: windows-latest
          python-version: '3.12-dev'
          run-tests-ext: bat
        - os: windows-latest
          python-version: pypy-3.9
          run-tests-ext: bat
--- a/README.md
+++ b/README.md
@ -76,7 +76,7 @@
 # NEW FEATURES
-* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@42f2d4**](https://github.com/ytdl-org/youtube-dl/commit/07af47960f3bb262ead02490ce65c8c45c01741e) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))
+* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@66ab08**](https://github.com/ytdl-org/youtube-dl/commit/66ab0814c4baa2dc79c2dd5287bc0ad61a37c5b9) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))
 * **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in YouTube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API
@ -1854,7 +1854,7 @@ #### rokfinchannel
 * `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks`
 #### twitter
-* `legacy_api`: Force usage of the legacy Twitter API instead of the GraphQL API for tweet extraction. Has no effect if login cookies are passed
+* `api`: Select one of `graphql` (default), `legacy` or `syndication` as the API for tweet extraction. Has no effect if logged in
 #### stacommu, wrestleuniverse
 * `device_id`: UUID value assigned by the website and used to enforce device limits for paid livestream content. Can be found in browser local storage
--- a/devscripts/changelog_override.json
+++ b/devscripts/changelog_override.json
@ -68,6 +68,25 @@
    {
        "action": "change",
        "when": "b03fa7834579a01cc5fba48c0e73488a16683d48",
-        "short": "[ie/twitter] Revert 92315c03774cfabb3a921884326beb4b981f786b"
+        "short": "[ie/twitter] Revert 92315c03774cfabb3a921884326beb4b981f786b",
        "authors": ["pukkandan"]
    },
    {
        "action": "change",
        "when": "fcd6a76adc49d5cd8783985c7ce35384b72e545f",
        "short": "[test] Add tests for socks proxies (#7908)",
        "authors": ["coletdjnz"]
    },
    {
        "action": "change",
        "when": "4bf912282a34b58b6b35d8f7e6be535770c89c76",
        "short": "[rh:urllib] Remove dot segments during URL normalization (#7662)",
        "authors": ["coletdjnz"]
    },
    {
        "action": "change",
        "when": "59e92b1f1833440bb2190f847eb735cf0f90bc85",
        "short": "[rh:urllib] Simplify gzip decoding (#7611)",
        "authors": ["Grub4K"]
    }
 ]
--- a/devscripts/make_changelog.py
+++ b/devscripts/make_changelog.py
@ -31,35 +31,27 @@ class CommitGroup(enum.Enum):
    EXTRACTOR = 'Extractor'
    DOWNLOADER = 'Downloader'
    POSTPROCESSOR = 'Postprocessor'
    NETWORKING = 'Networking'
    MISC = 'Misc.'
    @classmethod
    @property
    def ignorable_prefixes(cls):
        return ('core', 'downloader', 'extractor', 'misc', 'postprocessor', 'upstream')
    @classmethod
    @lru_cache
-    def commit_lookup(cls):
+    def subgroup_lookup(cls):
        return {
            name: group
            for group, names in {
                cls.PRIORITY: {'priority'},
                cls.CORE: {
                    'aes',
                    'cache',
                    'compat_utils',
                    'compat',
                    'cookies',
                    'core',
                    'dependencies',
                    'formats',
                    'jsinterp',
                    'networking',
                    'outtmpl',
                    'plugins',
                    'update',
                    'upstream',
                    'utils',
                },
                cls.MISC: {
@ -67,23 +59,40 @@ def commit_lookup(cls):
                    'cleanup',
                    'devscripts',
                    'docs',
                    'misc',
                    'test',
                },
-                cls.EXTRACTOR: {'extractor', 'ie'},
+                cls.NETWORKING: {
-                cls.DOWNLOADER: {'downloader', 'fd'},
+                    'rh',
-                cls.POSTPROCESSOR: {'postprocessor', 'pp'},
+                },
            }.items()
            for name in names
        }
    @classmethod
-    def get(cls, value):
+    @lru_cache
-        result = cls.commit_lookup().get(value)
+    def group_lookup(cls):
-        if result:
+        result = {
-            logger.debug(f'Mapped {value!r} => {result.name}')
+            'fd': cls.DOWNLOADER,
            'ie': cls.EXTRACTOR,
            'pp': cls.POSTPROCESSOR,
            'upstream': cls.CORE,
        }
        result.update({item.name.lower(): item for item in iter(cls)})
        return result
    @classmethod
    def get(cls, value: str) -> tuple[CommitGroup | None, str | None]:
        group, _, subgroup = (group.strip().lower() for group in value.partition('/'))
        result = cls.group_lookup().get(group)
        if not result:
            if subgroup:
                return None, value
            subgroup = group
            result = cls.subgroup_lookup().get(subgroup)
        return result, subgroup or None
@dataclass
 class Commit:
@ -198,19 +207,23 @@ def _prepare_cleanup_misc_items(self, items):
        for commit_infos in cleanup_misc_items.values():
            sorted_items.append(CommitInfo(
                'cleanup', ('Miscellaneous',), ', '.join(
-                    self._format_message_link(None, info.commit.hash).strip()
+                    self._format_message_link(None, info.commit.hash)
                    for info in sorted(commit_infos, key=lambda item: item.commit.hash or '')),
                [], Commit(None, '', commit_infos[0].commit.authors), []))
        return sorted_items
-    def format_single_change(self, info):
+    def format_single_change(self, info: CommitInfo):
-        message = self._format_message_link(info.message, info.commit.hash)
+        message, sep, rest = info.message.partition('\n')
        if '[' not in message:
            # If the message doesn't already contain markdown links, try to add a link to the commit
            message = self._format_message_link(message, info.commit.hash)
        if info.issues:
-            message = message.replace('\n', f' ({self._format_issues(info.issues)})\n', 1)
+            message = f'{message} ({self._format_issues(info.issues)})'
        if info.commit.authors:
-            message = message.replace('\n', f' by {self._format_authors(info.commit.authors)}\n', 1)
+            message = f'{message} by {self._format_authors(info.commit.authors)}'
        if info.fixes:
            fix_message = ', '.join(f'{self._format_message_link(None, fix.hash)}' for fix in info.fixes)
@ -219,16 +232,14 @@ def format_single_change(self, info):
            if authors != info.commit.authors:
                fix_message = f'{fix_message} by {self._format_authors(authors)}'
-            message = message.replace('\n', f' (With fixes in {fix_message})\n', 1)
+            message = f'{message} (With fixes in {fix_message})'
-        return message[:-1]
+        return message if not sep else f'{message}{sep}{rest}'
    def _format_message_link(self, message, hash):
        assert message or hash, 'Improperly defined commit message or override'
        message = message if message else hash[:HASH_LENGTH]
-        if not hash:
+        return f'[{message}]({self.repo_url}/commit/{hash})' if hash else message
            return f'{message}\n'
        return f'[{message}\n'.replace('\n', f']({self.repo_url}/commit/{hash})\n', 1)
    def _format_issues(self, issues):
        return ', '.join(f'[#{issue}]({self.repo_url}/issues/{issue})' for issue in issues)
@ -318,7 +329,7 @@ def _get_commits_and_fixes(self, default_author):
        for commitish, revert_commit in reverts.items():
            reverted = commits.pop(commitish, None)
            if reverted:
-                logger.debug(f'{commit} fully reverted {reverted}')
+                logger.debug(f'{commitish} fully reverted {reverted}')
            else:
                commits[revert_commit.hash] = revert_commit
@ -337,7 +348,7 @@ def apply_overrides(self, overrides):
        for override in overrides:
            when = override.get('when')
            if when and when not in self and when != self._start:
-                logger.debug(f'Ignored {when!r}, not in commits {self._start!r}')
+                logger.debug(f'Ignored {when!r} override')
                continue
            override_hash = override.get('hash') or when
@ -365,7 +376,7 @@ def groups(self):
        for commit in self:
            upstream_re = self.UPSTREAM_MERGE_RE.search(commit.short)
            if upstream_re:
-                commit.short = f'[core/upstream] Merged with youtube-dl {upstream_re.group(1)}'
+                commit.short = f'[upstream] Merged with youtube-dl {upstream_re.group(1)}'
            match = self.MESSAGE_RE.fullmatch(commit.short)
            if not match:
@ -410,25 +421,20 @@ def details_from_prefix(prefix):
        if not prefix:
            return CommitGroup.CORE, None, ()
-        prefix, _, details = prefix.partition('/')
+        prefix, *sub_details = prefix.split(':')
        prefix = prefix.strip()
        details = details.strip()
-        group = CommitGroup.get(prefix.lower())
+        group, details = CommitGroup.get(prefix)
-        if group is CommitGroup.PRIORITY:
+        if group is CommitGroup.PRIORITY and details:
-            prefix, _, details = details.partition('/')
+            details = details.partition('/')[2].strip()
-        if not details and prefix and prefix not in CommitGroup.ignorable_prefixes:
+        if details and '/' in details:
-            logger.debug(f'Replaced details with {prefix!r}')
+            logger.error(f'Prefix is overnested, using first part: {prefix}')
-            details = prefix or None
+            details = details.partition('/')[0].strip()
        if details == 'common':
            details = None
-
+        elif group is CommitGroup.NETWORKING and details == 'rh':
-        if details:
+            details = 'Request Handler'
            details, *sub_details = details.split(':')
        else:
            sub_details = []
        return group, details, sub_details
--- a/devscripts/update-version.py
+++ b/devscripts/update-version.py
@ -10,14 +10,14 @@
 import argparse
 import contextlib
 import sys
-from datetime import datetime
+from datetime import datetime, timezone
 from devscripts.utils import read_version, run_process, write_file
 def get_new_version(version, revision):
    if not version:
-        version = datetime.utcnow().strftime('%Y.%m.%d')
+        version = datetime.now(timezone.utc).strftime('%Y.%m.%d')
    if revision:
        assert revision.isdigit(), 'Revision must be a number'
--- a/test/test_socks.py
+++ b/test/test_socks.py
@ -281,17 +281,13 @@ def test_socks4_auth(self, handler, ctx):
                    rh, proxies={'all': f'socks4://user:@{server_address}'})
                assert response['version'] == 4
-    @pytest.mark.parametrize('handler,ctx', [
+    @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
        pytest.param('Urllib', 'http', marks=pytest.mark.xfail(
            reason='socks4a implementation currently broken when destination is not a domain name'))
    ], indirect=True)
    def test_socks4a_ipv4_target(self, handler, ctx):
        with ctx.socks_server(Socks4ProxyHandler) as server_address:
            with handler(proxies={'all': f'socks4a://{server_address}'}) as rh:
                response = ctx.socks_info_request(rh, target_domain='127.0.0.1')
                assert response['version'] == 4
-                assert response['ipv4_address'] == '127.0.0.1'
+                assert (response['ipv4_address'] == '127.0.0.1') != (response['domain_address'] == '127.0.0.1')
                assert response['domain_address'] is None
    @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
    def test_socks4a_domain_target(self, handler, ctx):
@ -302,10 +298,7 @@ def test_socks4a_domain_target(self, handler, ctx):
                assert response['ipv4_address'] is None
                assert response['domain_address'] == 'localhost'
-    @pytest.mark.parametrize('handler,ctx', [
+    @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
        pytest.param('Urllib', 'http', marks=pytest.mark.xfail(
            reason='source_address is not yet supported for socks4 proxies'))
    ], indirect=True)
    def test_ipv4_client_source_address(self, handler, ctx):
        with ctx.socks_server(Socks4ProxyHandler) as server_address:
            source_address = f'127.0.0.{random.randint(5, 255)}'
@ -327,10 +320,7 @@ def test_socks4_errors(self, handler, ctx, reply_code):
                with pytest.raises(ProxyError):
                    ctx.socks_info_request(rh)
-    @pytest.mark.parametrize('handler,ctx', [
+    @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
        pytest.param('Urllib', 'http', marks=pytest.mark.xfail(
            reason='IPv6 socks4 proxies are not yet supported'))
    ], indirect=True)
    def test_ipv6_socks4_proxy(self, handler, ctx):
        with ctx.socks_server(Socks4ProxyHandler, bind_ip='::1') as server_address:
            with handler(proxies={'all': f'socks4://{server_address}'}) as rh:
@ -342,7 +332,7 @@ def test_ipv6_socks4_proxy(self, handler, ctx):
    @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
    def test_timeout(self, handler, ctx):
        with ctx.socks_server(Socks4ProxyHandler, sleep=2) as server_address:
-            with handler(proxies={'all': f'socks4://{server_address}'}, timeout=1) as rh:
+            with handler(proxies={'all': f'socks4://{server_address}'}, timeout=0.5) as rh:
                with pytest.raises(TransportError):
                    ctx.socks_info_request(rh)
@ -383,7 +373,7 @@ def test_socks5_domain_target(self, handler, ctx):
        with ctx.socks_server(Socks5ProxyHandler) as server_address:
            with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
                response = ctx.socks_info_request(rh, target_domain='localhost')
-                assert response['ipv4_address'] == '127.0.0.1'
+                assert (response['ipv4_address'] == '127.0.0.1') != (response['ipv6_address'] == '::1')
                assert response['version'] == 5
    @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
@ -404,22 +394,15 @@ def test_socks5h_ip_target(self, handler, ctx):
                assert response['domain_address'] is None
                assert response['version'] == 5
-    @pytest.mark.parametrize('handler,ctx', [
+    @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
        pytest.param('Urllib', 'http', marks=pytest.mark.xfail(
            reason='IPv6 destination addresses are not yet supported'))
    ], indirect=True)
    def test_socks5_ipv6_destination(self, handler, ctx):
        with ctx.socks_server(Socks5ProxyHandler) as server_address:
            with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
                response = ctx.socks_info_request(rh, target_domain='[::1]')
                assert response['ipv6_address'] == '::1'
                assert response['port'] == 80
                assert response['version'] == 5
-    @pytest.mark.parametrize('handler,ctx', [
+    @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
        pytest.param('Urllib', 'http', marks=pytest.mark.xfail(
            reason='IPv6 socks5 proxies are not yet supported'))
    ], indirect=True)
    def test_ipv6_socks5_proxy(self, handler, ctx):
        with ctx.socks_server(Socks5ProxyHandler, bind_ip='::1') as server_address:
            with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
@ -430,10 +413,7 @@ def test_ipv6_socks5_proxy(self, handler, ctx):
    # XXX: is there any feasible way of testing IPv6 source addresses?
    # Same would go for non-proxy source_address test...
-    @pytest.mark.parametrize('handler,ctx', [
+    @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
        pytest.param('Urllib', 'http', marks=pytest.mark.xfail(
            reason='source_address is not yet supported for socks5 proxies'))
    ], indirect=True)
    def test_ipv4_client_source_address(self, handler, ctx):
        with ctx.socks_server(Socks5ProxyHandler) as server_address:
            source_address = f'127.0.0.{random.randint(5, 255)}'
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@ -2591,7 +2591,7 @@ def _fill_common_fields(self, info_dict, final=True):
                # Working around out-of-range timestamp values (e.g. negative ones on Windows,
                # see http://bugs.python.org/issue1646728)
                with contextlib.suppress(ValueError, OverflowError, OSError):
-                    upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
+                    upload_date = datetime.datetime.fromtimestamp(info_dict[ts_key], datetime.timezone.utc)
                    info_dict[date_key] = upload_date.strftime('%Y%m%d')
        live_keys = ('is_live', 'was_live')
--- a/yt_dlp/compat/compat_utils.py
+++ b/yt_dlp/compat/compat_utils.py
@ -15,7 +15,7 @@ def get_package_info(module):
        name=getattr(module, '_yt_dlp__identifier', module.__name__),
        version=str(next(filter(None, (
            getattr(module, attr, None)
-            for attr in ('__version__', 'version_string', 'version')
+            for attr in ('_yt_dlp__version', '__version__', 'version_string', 'version')
        )), None)))
--- a/yt_dlp/dependencies/init.py
+++ b/yt_dlp/dependencies/init.py
@ -43,6 +43,8 @@
 try:
    import sqlite3
    # We need to get the underlying `sqlite` version, see https://github.com/yt-dlp/yt-dlp/issues/8152
    sqlite3._yt_dlp__version = sqlite3.sqlite_version
 except ImportError:
    # although sqlite3 is part of the standard library, it is possible to compile python without
    # sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -122,7 +122,6 @@
 from .archiveorg import (
    ArchiveOrgIE,
    YoutubeWebArchiveIE,
    VLiveWebArchiveIE,
 )
 from .arcpublishing import ArcPublishingIE
 from .arkena import ArkenaIE
@ -165,6 +164,7 @@
    AWAANLiveIE,
    AWAANSeasonIE,
 )
 from .axs import AxsIE
 from .azmedien import AZMedienIE
 from .baidu import BaiduVideoIE
 from .banbye import (
@ -223,7 +223,11 @@
    BiliBiliPlayerIE,
    BilibiliSpaceVideoIE,
    BilibiliSpaceAudioIE,
-    BilibiliSpacePlaylistIE,
+    BilibiliCollectionListIE,
    BilibiliSeriesListIE,
    BilibiliFavoritesListIE,
    BilibiliWatchlaterIE,
    BilibiliPlaylistIE,
    BiliIntlIE,
    BiliIntlSeriesIE,
    BiliLiveIE,
@ -292,9 +296,11 @@
 from .camsoda import CamsodaIE
 from .camtasia import CamtasiaEmbedIE
 from .camwithher import CamWithHerIE
 from .canal1 import Canal1IE
 from .canalalpha import CanalAlphaIE
 from .canalplus import CanalplusIE
 from .canalc2 import Canalc2IE
 from .caracoltv import CaracolTvPlayIE
 from .carambatv import (
    CarambaTVIE,
    CarambaTVPageIE,
@ -561,6 +567,7 @@
    EpiconIE,
    EpiconSeriesIE,
 )
 from .eplus import EplusIbIE
 from .epoch import EpochIE
 from .eporner import EpornerIE
 from .eroprofile import (
@ -1501,6 +1508,7 @@
 from .popcorntimes import PopcorntimesIE
 from .popcorntv import PopcornTVIE
 from .porn91 import Porn91IE
 from .pornbox import PornboxIE
 from .porncom import PornComIE
 from .pornflip import PornFlipIE
 from .pornhd import PornHdIE
@ -1519,7 +1527,7 @@
    PuhuTVIE,
    PuhuTVSerieIE,
 )
-from .pr0gramm import Pr0grammStaticIE, Pr0grammIE
+from .pr0gramm import Pr0grammIE
 from .prankcast import PrankCastIE
 from .premiershiprugby import PremiershipRugbyIE
 from .presstv import PressTVIE
@ -1555,7 +1563,14 @@
 from .radiode import RadioDeIE
 from .radiojavan import RadioJavanIE
 from .radiobremen import RadioBremenIE
-from .radiofrance import FranceCultureIE, RadioFranceIE
+from .radiofrance import (
    FranceCultureIE,
    RadioFranceIE,
    RadioFranceLiveIE,
    RadioFrancePodcastIE,
    RadioFranceProfileIE,
    RadioFranceProgramScheduleIE,
 )
 from .radiozet import RadioZetPodcastIE
 from .radiokapital import (
    RadioKapitalIE,
@ -1586,6 +1601,7 @@
 from .rbgtum import (
    RbgTumIE,
    RbgTumCourseIE,
    RbgTumNewCourseIE,
 )
 from .rcs import (
    RCSIE,
@ -1710,7 +1726,10 @@
    RuvIE,
    RuvSpilaIE
 )
-from .s4c import S4CIE
+from .s4c import (
    S4CIE,
    S4CSeriesIE
 )
 from .safari import (
    SafariIE,
    SafariApiIE,
@ -1791,7 +1810,10 @@
 from .slutload import SlutloadIE
 from .smotrim import SmotrimIE
 from .snotr import SnotrIE
-from .sohu import SohuIE
+from .sohu import (
    SohuIE,
    SohuVIE,
 )
 from .sonyliv import (
    SonyLIVIE,
    SonyLIVSeriesIE,
@ -2354,7 +2376,8 @@
 )
 from .weibo import (
    WeiboIE,
-    WeiboMobileIE
+    WeiboVideoIE,
    WeiboUserIE,
 )
 from .weiqitv import WeiqiTVIE
 from .weverse import (
--- a/yt_dlp/extractor/abematv.py
+++ b/yt_dlp/extractor/abematv.py
@ -12,7 +12,7 @@
 import urllib.request
 import urllib.response
 import uuid
-
+from ..utils.networking import clean_proxies
 from .common import InfoExtractor
 from ..aes import aes_ecb_decrypt
 from ..utils import (
@ -35,7 +35,10 @@ def add_opener(ydl, handler):  # FIXME: Create proper API in .networking
    rh = ydl._request_director.handlers['Urllib']
    if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES:
        return
-    opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=ydl.proxies)
+    headers = ydl.params['http_headers'].copy()
    proxies = ydl.proxies.copy()
    clean_proxies(proxies, headers)
    opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=proxies)
    assert isinstance(opener, urllib.request.OpenerDirector)
    opener.add_handler(handler)
    rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license')
--- a/yt_dlp/extractor/amazonminitv.py
+++ b/yt_dlp/extractor/amazonminitv.py
@ -22,8 +22,11 @@ def _call_api(self, asin, data=None, note=None):
        resp = self._download_json(
            f'https://www.amazon.in/minitv/api/web/{"graphql" if data else "prs"}',
-            asin, note=note, headers={'Content-Type': 'application/json'},
+            asin, note=note, headers={
-            data=json.dumps(data).encode() if data else None,
+                'Content-Type': 'application/json',
                'currentpageurl': '/',
                'currentplatform': 'dWeb'
            }, data=json.dumps(data).encode() if data else None,
            query=None if data else {
                'deviceType': 'A1WMMUXPCUJL4N',
                'contentId': asin,
@ -46,7 +49,7 @@ class AmazonMiniTVIE(AmazonMiniTVBaseIE):
            'ext': 'mp4',
            'title': 'May I Kiss You?',
            'language': 'Hindi',
-            'thumbnail': r're:^https?://.*\.jpg$',
+            'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
            'description': 'md5:a549bfc747973e04feb707833474e59d',
            'release_timestamp': 1644710400,
            'release_date': '20220213',
@ -68,7 +71,7 @@ class AmazonMiniTVIE(AmazonMiniTVBaseIE):
            'ext': 'mp4',
            'title': 'Jahaan',
            'language': 'Hindi',
-            'thumbnail': r're:^https?://.*\.jpg',
+            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
            'description': 'md5:05eb765a77bf703f322f120ec6867339',
            'release_timestamp': 1647475200,
            'release_date': '20220317',
--- a/yt_dlp/extractor/archiveorg.py
+++ b/yt_dlp/extractor/archiveorg.py
@ -3,7 +3,6 @@
 import urllib.parse
 from .common import InfoExtractor
 from .naver import NaverBaseIE
 from .youtube import YoutubeBaseInfoExtractor, YoutubeIE
 from ..compat import compat_urllib_parse_unquote
 from ..networking import HEADRequest
@ -947,237 +946,3 @@ def _real_extract(self, url):
        if not info.get('title'):
            info['title'] = video_id
        return info
 class VLiveWebArchiveIE(InfoExtractor):
    IE_NAME = 'web.archive:vlive'
    IE_DESC = 'web.archive.org saved vlive videos'
    _VALID_URL = r'''(?x)
            (?:https?://)?web\.archive\.org/
            (?:web/)?(?:(?P<date>[0-9]{14})?[0-9A-Za-z_*]*/)?  # /web and the version index is optional
            (?:https?(?::|%3[Aa])//)?(?:
                (?:(?:www|m)\.)?vlive\.tv(?::(?:80|443))?/(?:video|embed)/(?P<id>[0-9]+)  # VLive URL
            )
        '''
    _TESTS = [{
        'url': 'https://web.archive.org/web/20221221144331/http://www.vlive.tv/video/1326',
        'md5': 'cc7314812855ce56de70a06a27314983',
        'info_dict': {
            'id': '1326',
            'ext': 'mp4',
            'title': "Girl's Day's Broadcast",
            'creator': "Girl's Day",
            'view_count': int,
            'uploader_id': 'muploader_a',
            'uploader_url': None,
            'uploader': None,
            'upload_date': '20150817',
            'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
            'timestamp': 1439816449,
            'like_count': int,
            'channel': 'Girl\'s Day',
            'channel_id': 'FDF27',
            'comment_count': int,
            'release_timestamp': 1439818140,
            'release_date': '20150817',
            'duration': 1014,
        },
        'params': {
            'skip_download': True,
        },
    }, {
        'url': 'https://web.archive.org/web/20221221182103/http://www.vlive.tv/video/16937',
        'info_dict': {
            'id': '16937',
            'ext': 'mp4',
            'title': '첸백시 걍방',
            'creator': 'EXO',
            'view_count': int,
            'subtitles': 'mincount:12',
            'uploader_id': 'muploader_j',
            'uploader_url': 'http://vlive.tv',
            'uploader': None,
            'upload_date': '20161112',
            'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
            'timestamp': 1478923074,
            'like_count': int,
            'channel': 'EXO',
            'channel_id': 'F94BD',
            'comment_count': int,
            'release_timestamp': 1478924280,
            'release_date': '20161112',
            'duration': 906,
        },
        'params': {
            'skip_download': True,
        },
    }, {
        'url': 'https://web.archive.org/web/20221127190050/http://www.vlive.tv/video/101870',
        'info_dict': {
            'id': '101870',
            'ext': 'mp4',
            'title': '[ⓓ xV] “레벨이들 매력에 반해? 안 반해?” 움직이는 HD 포토 (레드벨벳:Red Velvet)',
            'creator': 'Dispatch',
            'view_count': int,
            'subtitles': 'mincount:6',
            'uploader_id': 'V__FRA08071',
            'uploader_url': 'http://vlive.tv',
            'uploader': None,
            'upload_date': '20181130',
            'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
            'timestamp': 1543601327,
            'like_count': int,
            'channel': 'Dispatch',
            'channel_id': 'C796F3',
            'comment_count': int,
            'release_timestamp': 1543601040,
            'release_date': '20181130',
            'duration': 279,
        },
        'params': {
            'skip_download': True,
        },
    }]
    # The wayback machine has special timestamp and "mode" values:
    # timestamp:
    #   1 = the first capture
    #   2 = the last capture
    # mode:
    #   id_ = Identity - perform no alterations of the original resource, return it as it was archived.
    _WAYBACK_BASE_URL = 'https://web.archive.org/web/2id_/'
    def _download_archived_page(self, url, video_id, *, timestamp='2', **kwargs):
        for retry in self.RetryManager():
            try:
                return self._download_webpage(f'https://web.archive.org/web/{timestamp}id_/{url}', video_id, **kwargs)
            except ExtractorError as e:
                if isinstance(e.cause, HTTPError) and e.cause.status == 404:
                    raise ExtractorError('Page was not archived', expected=True)
                retry.error = e
                continue
    def _download_archived_json(self, url, video_id, **kwargs):
        page = self._download_archived_page(url, video_id, **kwargs)
        if not page:
            raise ExtractorError('Page was not archived', expected=True)
        else:
            return self._parse_json(page, video_id)
    def _extract_formats_from_m3u8(self, m3u8_url, params, video_id):
        m3u8_doc = self._download_archived_page(m3u8_url, video_id, note='Downloading m3u8', query=params, fatal=False)
        if not m3u8_doc:
            return
        # M3U8 document should be changed to archive domain
        m3u8_doc = m3u8_doc.splitlines()
        url_base = m3u8_url.rsplit('/', 1)[0]
        first_segment = None
        for i, line in enumerate(m3u8_doc):
            if not line.startswith('#'):
                m3u8_doc[i] = f'{self._WAYBACK_BASE_URL}{url_base}/{line}?{urllib.parse.urlencode(params)}'
                first_segment = first_segment or m3u8_doc[i]
        # Segments may not have been archived. See https://web.archive.org/web/20221127190050/http://www.vlive.tv/video/101870
        urlh = self._request_webpage(HEADRequest(first_segment), video_id, errnote=False,
                                     fatal=False, note='Check first segment availablity')
        if urlh:
            formats, subtitles = self._parse_m3u8_formats_and_subtitles('\n'.join(m3u8_doc), ext='mp4', video_id=video_id)
            if subtitles:
                self._report_ignoring_subs('m3u8')
            return formats
    # Closely follows the logic of the ArchiveTeam grab script
    # See: https://github.com/ArchiveTeam/vlive-grab/blob/master/vlive.lua
    def _real_extract(self, url):
        video_id, url_date = self._match_valid_url(url).group('id', 'date')
        webpage = self._download_archived_page(f'https://www.vlive.tv/video/{video_id}', video_id, timestamp=url_date)
        player_info = self._search_json(r'__PRELOADED_STATE__\s*=', webpage, 'player info', video_id)
        user_country = traverse_obj(player_info, ('common', 'userCountry'))
        main_script_url = self._search_regex(r'<script\s+src="([^"]+/js/main\.[^"]+\.js)"', webpage, 'main script url')
        main_script = self._download_archived_page(main_script_url, video_id, note='Downloading main script')
        app_id = self._search_regex(r'appId\s*=\s*"([^"]+)"', main_script, 'app id')
        inkey = self._download_archived_json(
            f'https://www.vlive.tv/globalv-web/vam-web/video/v1.0/vod/{video_id}/inkey', video_id, note='Fetching inkey', query={
                'appId': app_id,
                'platformType': 'PC',
                'gcc': user_country,
                'locale': 'en_US',
            }, fatal=False)
        vod_id = traverse_obj(player_info, ('postDetail', 'post', 'officialVideo', 'vodId'))
        vod_data = self._download_archived_json(
            f'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{vod_id}', video_id, note='Fetching vod data', query={
                'key': inkey.get('inkey'),
                'pid': 'rmcPlayer_16692457559726800',  # partially unix time and partially random. Fixed value used by archiveteam project
                'sid': '2024',
                'ver': '2.0',
                'devt': 'html5_pc',
                'doct': 'json',
                'ptc': 'https',
                'sptc': 'https',
                'cpt': 'vtt',
                'ctls': '%7B%22visible%22%3A%7B%22fullscreen%22%3Atrue%2C%22logo%22%3Afalse%2C%22playbackRate%22%3Afalse%2C%22scrap%22%3Afalse%2C%22playCount%22%3Atrue%2C%22commentCount%22%3Atrue%2C%22title%22%3Atrue%2C%22writer%22%3Atrue%2C%22expand%22%3Afalse%2C%22subtitles%22%3Atrue%2C%22thumbnails%22%3Atrue%2C%22quality%22%3Atrue%2C%22setting%22%3Atrue%2C%22script%22%3Afalse%2C%22logoDimmed%22%3Atrue%2C%22badge%22%3Atrue%2C%22seekingTime%22%3Atrue%2C%22muted%22%3Atrue%2C%22muteButton%22%3Afalse%2C%22viewerNotice%22%3Afalse%2C%22linkCount%22%3Afalse%2C%22createTime%22%3Afalse%2C%22thumbnail%22%3Atrue%7D%2C%22clicked%22%3A%7B%22expand%22%3Afalse%2C%22subtitles%22%3Afalse%7D%7D',
                'pv': '4.26.9',
                'dr': '1920x1080',
                'cpl': 'en_US',
                'lc': 'en_US',
                'adi': '%5B%7B%22type%22%3A%22pre%22%2C%22exposure%22%3Afalse%2C%22replayExposure%22%3Afalse%7D%5D',
                'adu': '%2F',
                'videoId': vod_id,
                'cc': user_country,
            })
        formats = []
        streams = traverse_obj(vod_data, ('streams', ...))
        if len(streams) > 1:
            self.report_warning('Multiple streams found. Only the first stream will be downloaded.')
        stream = streams[0]
        max_stream = max(
            stream.get('videos') or [],
            key=lambda v: traverse_obj(v, ('bitrate', 'video'), default=0), default=None)
        if max_stream is not None:
            params = {arg.get('name'): arg.get('value') for arg in stream.get('keys', []) if arg.get('type') == 'param'}
            formats = self._extract_formats_from_m3u8(max_stream.get('source'), params, video_id) or []
        # For parts of the project MP4 files were archived
        max_video = max(
            traverse_obj(vod_data, ('videos', 'list', ...)),
            key=lambda v: traverse_obj(v, ('bitrate', 'video'), default=0), default=None)
        if max_video is not None:
            video_url = self._WAYBACK_BASE_URL + max_video.get('source')
            urlh = self._request_webpage(HEADRequest(video_url), video_id, errnote=False,
                                         fatal=False, note='Check video availablity')
            if urlh:
                formats.append({'url': video_url})
        return {
            'id': video_id,
            'formats': formats,
            **traverse_obj(player_info, ('postDetail', 'post', {
                'title': ('officialVideo', 'title', {str}),
                'creator': ('author', 'nickname', {str}),
                'channel': ('channel', 'channelName', {str}),
                'channel_id': ('channel', 'channelCode', {str}),
                'duration': ('officialVideo', 'playTime', {int_or_none}),
                'view_count': ('officialVideo', 'playCount', {int_or_none}),
                'like_count': ('officialVideo', 'likeCount', {int_or_none}),
                'comment_count': ('officialVideo', 'commentCount', {int_or_none}),
                'timestamp': ('officialVideo', 'createdAt', {lambda x: int_or_none(x, scale=1000)}),
                'release_timestamp': ('officialVideo', 'willStartAt', {lambda x: int_or_none(x, scale=1000)}),
            })),
            **traverse_obj(vod_data, ('meta', {
                'uploader_id': ('user', 'id', {str}),
                'uploader': ('user', 'name', {str}),
                'uploader_url': ('user', 'url', {url_or_none}),
                'thumbnail': ('cover', 'source', {url_or_none}),
            }), expected_type=lambda x: x or None),
            **NaverBaseIE.process_subtitles(vod_data, lambda x: [self._WAYBACK_BASE_URL + x]),
        }
--- a/yt_dlp/extractor/aws.py
+++ b/yt_dlp/extractor/aws.py
@ -12,7 +12,7 @@ class AWSIE(InfoExtractor):  # XXX: Conventionally, base classes should end with
    def _aws_execute_api(self, aws_dict, video_id, query=None):
        query = query or {}
-        amz_date = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
+        amz_date = datetime.datetime.now(datetime.timezone.utc).strftime('%Y%m%dT%H%M%SZ')
        date = amz_date[:8]
        headers = {
            'Accept': 'application/json',
--- a/yt_dlp/extractor/axs.py
+++ b/yt_dlp/extractor/axs.py
@ -0,0 +1,87 @@
 from .common import InfoExtractor
 from ..utils import (
    float_or_none,
    js_to_json,
    parse_iso8601,
    traverse_obj,
    url_or_none,
 )
 class AxsIE(InfoExtractor):
    IE_NAME = 'axs.tv'
    _VALID_URL = r'https?://(?:www\.)?axs\.tv/(?:channel/(?:[^/?#]+/)+)?video/(?P<id>[^/?#]+)'
    _TESTS = [{
        'url': 'https://www.axs.tv/video/5f4dc776b70e4f1c194f22ef/',
        'md5': '8d97736ae8e50c64df528e5e676778cf',
        'info_dict': {
            'id': '5f4dc776b70e4f1c194f22ef',
            'title': 'Small Town',
            'ext': 'mp4',
            'description': 'md5:e314d28bfaa227a4d7ec965fae19997f',
            'upload_date': '20230602',
            'timestamp': 1685729564,
            'duration': 1284.216,
            'series': 'Rock & Roll Road Trip with Sammy Hagar',
            'season': 2,
            'episode': '3',
            'thumbnail': 'https://images.dotstudiopro.com/5f4e9d330a0c3b295a7e8394',
        },
    }, {
        'url': 'https://www.axs.tv/channel/rock-star-interview/video/daryl-hall',
        'md5': '300ae795cd8f9984652c0949734ffbdc',
        'info_dict': {
            'id': '5f488148b70e4f392572977c',
            'display_id': 'daryl-hall',
            'title': 'Daryl Hall',
            'ext': 'mp4',
            'description': 'md5:e54ecaa0f4b5683fc9259e9e4b196628',
            'upload_date': '20230214',
            'timestamp': 1676403615,
            'duration': 2570.668,
            'series': 'The Big Interview with Dan Rather',
            'season': 3,
            'episode': '5',
            'thumbnail': 'https://images.dotstudiopro.com/5f4d1901f340b50d937cec32',
        },
    }]
    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
        webpage_json_data = self._search_json(
            r'mountObj\s*=', webpage, 'video ID data', display_id,
            transform_source=js_to_json)
        video_id = webpage_json_data['video_id']
        company_id = webpage_json_data['company_id']
        meta = self._download_json(
            f'https://api.myspotlight.tv/dotplayer/video/{company_id}/{video_id}',
            video_id, query={'device_type': 'desktop_web'})['video']
        formats = self._extract_m3u8_formats(
            meta['video_m3u8'], video_id, 'mp4', m3u8_id='hls')
        subtitles = {}
        for cc in traverse_obj(meta, ('closeCaption', lambda _, v: url_or_none(v['srtPath']))):
            subtitles.setdefault(cc.get('srtShortLang') or 'en', []).append(
                {'ext': cc.get('srtExt'), 'url': cc['srtPath']})
        return {
            'id': video_id,
            'display_id': display_id,
            'formats': formats,
            **traverse_obj(meta, {
                'title': ('title', {str}),
                'description': ('description', {str}),
                'series': ('seriestitle', {str}),
                'season': ('season', {int}),
                'episode': ('episode', {str}),
                'duration': ('duration', {float_or_none}),
                'timestamp': ('updated_at', {parse_iso8601}),
                'thumbnail': ('thumb', {url_or_none}),
            }),
            'subtitles': subtitles,
        }
--- a/yt_dlp/extractor/banbye.py
+++ b/yt_dlp/extractor/banbye.py
@ -31,7 +31,7 @@ def _extract_playlist(self, playlist_id):
 class BanByeIE(BanByeBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?banbye.com/(?:en/)?watch/(?P<id>\w+)'
+    _VALID_URL = r'https?://(?:www\.)?banbye.com/(?:en/)?watch/(?P<id>[\w-]+)'
    _TESTS = [{
        'url': 'https://banbye.com/watch/v_ytfmvkVYLE8T',
        'md5': '2f4ea15c5ca259a73d909b2cfd558eb5',
@ -59,7 +59,27 @@ class BanByeIE(BanByeBaseIE):
            'title': 'Krzysztof Karoń',
            'id': 'p_Ld82N6gBw_OJ',
        },
-        'playlist_count': 9,
+        'playlist_mincount': 9,
    }, {
        'url': 'https://banbye.com/watch/v_kb6_o1Kyq-CD',
        'info_dict': {
            'id': 'v_kb6_o1Kyq-CD',
            'ext': 'mp4',
            'title': 'Co tak naprawdę dzieje się we Francji?! Czy Warszawa a potem cała Polska będzie drugim Paryżem?!🤔🇵🇱',
            'description': 'md5:82be4c0e13eae8ea1ca8b9f2e07226a8',
            'uploader': 'Marcin Rola - MOIM ZDANIEM!🇵🇱',
            'channel_id': 'ch_QgWnHvDG2fo5',
            'channel_url': 'https://banbye.com/channel/ch_QgWnHvDG2fo5',
            'duration': 597,
            'timestamp': 1688642656,
            'upload_date': '20230706',
            'thumbnail': 'https://cdn.banbye.com/video/v_kb6_o1Kyq-CD/96.webp',
            'tags': ['Paryż', 'Francja', 'Polska', 'Imigranci', 'Morawiecki', 'Tusk'],
            'like_count': int,
            'dislike_count': int,
            'view_count': int,
            'comment_count': int,
        },
    }]
    def _real_extract(self, url):
--- a/yt_dlp/extractor/bbc.py
+++ b/yt_dlp/extractor/bbc.py
@ -15,11 +15,13 @@
    float_or_none,
    get_element_by_class,
    int_or_none,
    join_nonempty,
    js_to_json,
    parse_duration,
    parse_iso8601,
    parse_qs,
    strip_or_none,
    traverse_obj,
    try_get,
    unescapeHTML,
    unified_timestamp,
@ -41,7 +43,6 @@ class BBCCoUkIE(InfoExtractor):
                            iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
                            music/(?:clips|audiovideo/popular)[/#]|
                            radio/player/|
                            sounds/play/|
                            events/[^/]+/play/[^/]+/
                        )
                        (?P<id>%s)(?!/(?:episodes|broadcasts|clips))
@ -218,20 +219,6 @@ class BBCCoUkIE(InfoExtractor):
                # rtmp download
                'skip_download': True,
            },
        }, {
            'url': 'https://www.bbc.co.uk/sounds/play/m0007jzb',
            'note': 'Audio',
            'info_dict': {
                'id': 'm0007jz9',
                'ext': 'mp4',
                'title': 'BBC Proms, 2019, Prom 34: West–Eastern Divan Orchestra',
                'description': "Live BBC Proms. West–Eastern Divan Orchestra with Daniel Barenboim and Martha Argerich.",
                'duration': 9840,
            },
            'params': {
                # rtmp download
                'skip_download': True,
            }
        }, {
            'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
            'only_matching': True,
@ -844,6 +831,20 @@ class BBCIE(BBCCoUkIE):  # XXX: Do not subclass from concrete IE
            'upload_date': '20190604',
            'categories': ['Psychology'],
        },
    }, {
        # BBC Sounds
        'url': 'https://www.bbc.co.uk/sounds/play/m001q78b',
        'info_dict': {
            'id': 'm001q789',
            'ext': 'mp4',
            'title': 'The Night Tracks Mix - Music for the darkling hour',
            'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0c00hym.jpg',
            'chapters': 'count:8',
            'description': 'md5:815fb51cbdaa270040aab8145b3f1d67',
            'uploader': 'Radio 3',
            'duration': 1800,
            'uploader_id': 'bbc_radio_three',
        },
    }, {  # onion routes
        'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576',
        'only_matching': True,
@ -1128,6 +1129,13 @@ def _real_extract(self, url):
                    'uploader_id': network.get('id'),
                    'formats': formats,
                    'subtitles': subtitles,
                    'chapters': traverse_obj(preload_state, (
                        'tracklist', 'tracks', lambda _, v: float_or_none(v['offset']['start']), {
                            'title': ('titles', {lambda x: join_nonempty(
                                'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
                            'start_time': ('offset', 'start', {float_or_none}),
                            'end_time': ('offset', 'end', {float_or_none}),
                        })) or None,
                }
        bbc3_config = self._parse_json(
--- a/yt_dlp/extractor/bild.py
+++ b/yt_dlp/extractor/bild.py
@ -1,6 +1,7 @@
 from .common import InfoExtractor
 from ..utils import (
    int_or_none,
    traverse_obj,
    unescapeHTML,
 )
@ -8,7 +9,8 @@
 class BildIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?bild\.de/(?:[^/]+/)+(?P<display_id>[^/]+)-(?P<id>\d+)(?:,auto=true)?\.bild\.html'
    IE_DESC = 'Bild.de'
-    _TEST = {
+    _TESTS = [{
        'note': 'static MP4 only',
        'url': 'http://www.bild.de/video/clip/apple-ipad-air/das-koennen-die-neuen-ipads-38184146.bild.html',
        'md5': 'dd495cbd99f2413502a1713a1156ac8a',
        'info_dict': {
@ -19,7 +21,19 @@ class BildIE(InfoExtractor):
            'thumbnail': r're:^https?://.*\.jpg$',
            'duration': 196,
        }
-    }
+    }, {
        'note': 'static MP4 and HLS',
        'url': 'https://www.bild.de/video/clip/news-ausland/deftiger-abgang-vom-10m-turm-bademeister-sorgt-fuer-skandal-85158620.bild.html',
        'md5': 'fb0ed4f09c495d4ba7ce2eee0bb90de1',
        'info_dict': {
            'id': '85158620',
            'ext': 'mp4',
            'title': 'Der Sprungturm-Skandal',
            'description': 'md5:709b543c24dc31bbbffee73bccda34ad',
            'thumbnail': r're:^https?://.*\.jpg$',
            'duration': 69,
        }
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
@ -27,11 +41,23 @@ def _real_extract(self, url):
        video_data = self._download_json(
            url.split('.bild.html')[0] + ',view=json.bild.html', video_id)
        formats = []
        for src in traverse_obj(video_data, ('clipList', 0, 'srces', lambda _, v: v['src'])):
            src_type = src.get('type')
            if src_type == 'application/x-mpegURL':
                formats.extend(
                    self._extract_m3u8_formats(
                        src['src'], video_id, 'mp4', m3u8_id='hls', fatal=False))
            elif src_type == 'video/mp4':
                formats.append({'url': src['src'], 'format_id': 'http-mp4'})
            else:
                self.report_warning(f'Skipping unsupported format type: "{src_type}"')
        return {
            'id': video_id,
            'title': unescapeHTML(video_data['title']).strip(),
            'description': unescapeHTML(video_data.get('description')),
-            'url': video_data['clipList'][0]['srces'][0]['src'],
+            'formats': formats,
            'thumbnail': video_data.get('poster'),
            'duration': int_or_none(video_data.get('durationSec')),
        }
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@ -3,6 +3,7 @@
 import hashlib
 import itertools
 import math
 import re
 import time
 import urllib.parse
@ -14,6 +15,7 @@
    GeoRestrictedError,
    InAdvancePagedList,
    OnDemandPagedList,
    bool_or_none,
    filter_dict,
    float_or_none,
    format_field,
@ -34,27 +36,31 @@
    unsmuggle_url,
    url_or_none,
    urlencode_postdata,
    variadic,
 )
 class BilibiliBaseIE(InfoExtractor):
    _FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
    def extract_formats(self, play_info):
        format_names = {
            r['quality']: traverse_obj(r, 'new_description', 'display_desc')
            for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
        }
-        audios = traverse_obj(play_info, ('dash', 'audio', ...))
+        audios = traverse_obj(play_info, ('dash', (None, 'dolby'), 'audio', ..., {dict}))
        flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
        if flac_audio:
            audios.append(flac_audio)
        formats = [{
            'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
            'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
-            'acodec': audio.get('codecs'),
+            'acodec': traverse_obj(audio, ('codecs', {str.lower})),
            'vcodec': 'none',
            'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
-            'filesize': int_or_none(audio.get('size'))
+            'filesize': int_or_none(audio.get('size')),
            'format_id': str_or_none(audio.get('id')),
        } for audio in audios]
        formats.extend({
@ -65,9 +71,13 @@ def extract_formats(self, play_info):
            'height': int_or_none(video.get('height')),
            'vcodec': video.get('codecs'),
            'acodec': 'none' if audios else None,
            'dynamic_range': {126: 'DV', 125: 'HDR10'}.get(int_or_none(video.get('id'))),
            'tbr': float_or_none(video.get('bandwidth'), scale=1000),
            'filesize': int_or_none(video.get('size')),
            'quality': int_or_none(video.get('id')),
            'format_id': traverse_obj(
                video, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}, 1),
                ('id', {str_or_none}), get_all=False),
            'format': format_names.get(video.get('id')),
        } for video in traverse_obj(play_info, ('dash', 'video', ...)))
@ -149,7 +159,7 @@ def _get_episodes_from_season(self, ss_id, url):
 class BiliBiliIE(BilibiliBaseIE):
-    _VALID_URL = r'https?://www\.bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
    _TESTS = [{
        'url': 'https://www.bilibili.com/video/BV13x41117TL',
@ -245,7 +255,7 @@ class BiliBiliIE(BilibiliBaseIE):
            'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
            'duration': 313.557,
            'upload_date': '20220709',
-            'uploader': '小夫Tech',
+            'uploader': '小夫太渴',
            'timestamp': 1657347907,
            'uploader_id': '1326814124',
            'comment_count': int,
@ -502,7 +512,7 @@ def _real_extract(self, url):
 class BiliBiliBangumiMediaIE(BilibiliBaseIE):
-    _VALID_URL = r'https?://www\.bilibili\.com/bangumi/media/md(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://www.bilibili.com/bangumi/media/md24097891',
        'info_dict': {
@ -521,7 +531,7 @@ def _real_extract(self, url):
 class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
-    _VALID_URL = r'(?x)https?://www\.bilibili\.com/bangumi/play/ss(?P<id>\d+)'
+    _VALID_URL = r'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://www.bilibili.com/bangumi/play/ss26801',
        'info_dict': {
@ -672,13 +682,35 @@ def get_entries(page_data):
        return self.playlist_result(paged_list, playlist_id)
-class BilibiliSpacePlaylistIE(BilibiliSpaceBaseIE):
+class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE):
-    _VALID_URL = r'https?://space.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail\?sid=(?P<sid>\d+)'
+    def _get_entries(self, page_data, bvid_keys, ending_key='bvid'):
        for bvid in traverse_obj(page_data, (*variadic(bvid_keys, (str, bytes, dict, set)), ..., ending_key, {str})):
            yield self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE, bvid)
    def _get_uploader(self, uid, playlist_id):
        webpage = self._download_webpage(f'https://space.bilibili.com/{uid}', playlist_id, fatal=False)
        return self._search_regex(r'(?s)<title\b[^>]*>([^<]+)的个人空间-', webpage, 'uploader', fatal=False)
    def _extract_playlist(self, fetch_page, get_metadata, get_entries):
        metadata, page_list = super()._extract_playlist(fetch_page, get_metadata, get_entries)
        metadata.pop('page_count', None)
        metadata.pop('page_size', None)
        return metadata, page_list
 class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
    _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
    _TESTS = [{
        'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
        'info_dict': {
            'id': '2142762_57445',
-            'title': '《底特律 变人》'
+            'title': '【完结】《底特律 变人》全结局流程解说',
            'description': '',
            'uploader': '老戴在此',
            'uploader_id': '2142762',
            'timestamp': int,
            'upload_date': str,
            'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
        },
        'playlist_mincount': 31,
    }]
@ -699,22 +731,251 @@ def get_metadata(page_data):
            return {
                'page_count': math.ceil(entry_count / page_size),
                'page_size': page_size,
-                'title': traverse_obj(page_data, ('meta', 'name'))
+                'uploader': self._get_uploader(mid, playlist_id),
                **traverse_obj(page_data, {
                    'title': ('meta', 'name', {str}),
                    'description': ('meta', 'description', {str}),
                    'uploader_id': ('meta', 'mid', {str_or_none}),
                    'timestamp': ('meta', 'ptime', {int_or_none}),
                    'thumbnail': ('meta', 'cover', {url_or_none}),
                })
            }
        def get_entries(page_data):
-            for entry in page_data.get('archives', []):
+            return self._get_entries(page_data, 'archives')
                yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}',
                                      BiliBiliIE, entry['bvid'])
        metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
-        return self.playlist_result(paged_list, playlist_id, metadata['title'])
+        return self.playlist_result(paged_list, playlist_id, **metadata)
 class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
    _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
    _TESTS = [{
        'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
        'info_dict': {
            'id': '1958703906_547718',
            'title': '直播回放',
            'description': '直播回放',
            'uploader': '靡烟miya',
            'uploader_id': '1958703906',
            'timestamp': 1637985853,
            'upload_date': '20211127',
            'modified_timestamp': int,
            'modified_date': str,
        },
        'playlist_mincount': 513,
    }]
    def _real_extract(self, url):
        mid, sid = self._match_valid_url(url).group('mid', 'sid')
        playlist_id = f'{mid}_{sid}'
        playlist_meta = traverse_obj(self._download_json(
            f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False
        ), {
            'title': ('data', 'meta', 'name', {str}),
            'description': ('data', 'meta', 'description', {str}),
            'uploader_id': ('data', 'meta', 'mid', {str_or_none}),
            'timestamp': ('data', 'meta', 'ctime', {int_or_none}),
            'modified_timestamp': ('data', 'meta', 'mtime', {int_or_none}),
        })
        def fetch_page(page_idx):
            return self._download_json(
                'https://api.bilibili.com/x/series/archives',
                playlist_id, note=f'Downloading page {page_idx}',
                query={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30})['data']
        def get_metadata(page_data):
            page_size = page_data['page']['size']
            entry_count = page_data['page']['total']
            return {
                'page_count': math.ceil(entry_count / page_size),
                'page_size': page_size,
                'uploader': self._get_uploader(mid, playlist_id),
                **playlist_meta
            }
        def get_entries(page_data):
            return self._get_entries(page_data, 'archives')
        metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
        return self.playlist_result(paged_list, playlist_id, **metadata)
 class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE):
    _VALID_URL = r'https?://(?:space\.bilibili\.com/\d+/favlist/?\?fid=|(?:www\.)?bilibili\.com/medialist/detail/ml)(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://space.bilibili.com/84912/favlist?fid=1103407912&ftype=create',
        'info_dict': {
            'id': '1103407912',
            'title': '【V2】（旧）',
            'description': '',
            'uploader': '晓月春日',
            'uploader_id': '84912',
            'timestamp': 1604905176,
            'upload_date': '20201109',
            'modified_timestamp': int,
            'modified_date': str,
            'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
            'view_count': int,
            'like_count': int,
        },
        'playlist_mincount': 22,
    }, {
        'url': 'https://www.bilibili.com/medialist/detail/ml1103407912',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        fid = self._match_id(url)
        list_info = self._download_json(
            f'https://api.bilibili.com/x/v3/fav/resource/list?media_id={fid}&pn=1&ps=20',
            fid, note='Downloading favlist metadata')
        if list_info['code'] == -403:
            self.raise_login_required(msg='This is a private favorites list. You need to log in as its owner')
        entries = self._get_entries(self._download_json(
            f'https://api.bilibili.com/x/v3/fav/resource/ids?media_id={fid}',
            fid, note='Download favlist entries'), 'data')
        return self.playlist_result(entries, fid, **traverse_obj(list_info, ('data', 'info', {
            'title': ('title', {str}),
            'description': ('intro', {str}),
            'uploader': ('upper', 'name', {str}),
            'uploader_id': ('upper', 'mid', {str_or_none}),
            'timestamp': ('ctime', {int_or_none}),
            'modified_timestamp': ('mtime', {int_or_none}),
            'thumbnail': ('cover', {url_or_none}),
            'view_count': ('cnt_info', 'play', {int_or_none}),
            'like_count': ('cnt_info', 'thumb_up', {int_or_none}),
        })))
 class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
    _VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
    _TESTS = [{
        'url': 'https://www.bilibili.com/watchlater/#/list',
        'info_dict': {'id': 'watchlater'},
        'playlist_mincount': 0,
        'skip': 'login required',
    }]
    def _real_extract(self, url):
        list_id = getattr(self._get_cookies(url).get('DedeUserID'), 'value', 'watchlater')
        watchlater_info = self._download_json(
            'https://api.bilibili.com/x/v2/history/toview/web?jsonp=jsonp', list_id)
        if watchlater_info['code'] == -101:
            self.raise_login_required(msg='You need to login to access your watchlater list')
        entries = self._get_entries(watchlater_info, ('data', 'list'))
        return self.playlist_result(entries, id=list_id, title='稍后再看')
 class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
    _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:medialist/play|list)/(?P<id>\w+)'
    _TESTS = [{
        'url': 'https://www.bilibili.com/list/1958703906?sid=547718',
        'info_dict': {
            'id': '5_547718',
            'title': '直播回放',
            'uploader': '靡烟miya',
            'uploader_id': '1958703906',
            'timestamp': 1637985853,
            'upload_date': '20211127',
        },
        'playlist_mincount': 513,
    }, {
        'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
        'info_dict': {
            'id': '5_547718',
        },
        'playlist_mincount': 513,
        'skip': 'redirect url',
    }, {
        'url': 'https://www.bilibili.com/list/ml1103407912',
        'info_dict': {
            'id': '3_1103407912',
            'title': '【V2】（旧）',
            'uploader': '晓月春日',
            'uploader_id': '84912',
            'timestamp': 1604905176,
            'upload_date': '20201109',
            'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
        },
        'playlist_mincount': 22,
    }, {
        'url': 'https://www.bilibili.com/medialist/play/ml1103407912',
        'info_dict': {
            'id': '3_1103407912',
        },
        'playlist_mincount': 22,
        'skip': 'redirect url',
    }, {
        'url': 'https://www.bilibili.com/list/watchlater',
        'info_dict': {'id': 'watchlater'},
        'playlist_mincount': 0,
        'skip': 'login required',
    }, {
        'url': 'https://www.bilibili.com/medialist/play/watchlater',
        'info_dict': {'id': 'watchlater'},
        'playlist_mincount': 0,
        'skip': 'login required',
    }]
    def _extract_medialist(self, query, list_id):
        for page_num in itertools.count(1):
            page_data = self._download_json(
                'https://api.bilibili.com/x/v2/medialist/resource/list',
                list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}'
            )['data']
            yield from self._get_entries(page_data, 'media_list', ending_key='bv_id')
            query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id'))
            if not page_data.get('has_more', False):
                break
    def _real_extract(self, url):
        list_id = self._match_id(url)
        webpage = self._download_webpage(url, list_id)
        initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
        if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
            error_code = traverse_obj(initial_state, ('error', 'trueCode', {int_or_none}))
            error_message = traverse_obj(initial_state, ('error', 'message', {str_or_none}))
            if error_code == -400 and list_id == 'watchlater':
                self.raise_login_required('You need to login to access your watchlater playlist')
            elif error_code == -403:
                self.raise_login_required('This is a private playlist. You need to login as its owner')
            elif error_code == 11010:
                raise ExtractorError('Playlist is no longer available', expected=True)
            raise ExtractorError(f'Could not access playlist: {error_code} {error_message}')
        query = {
            'ps': 20,
            'with_current': False,
            **traverse_obj(initial_state, {
                'type': ('playlist', 'type', {int_or_none}),
                'biz_id': ('playlist', 'id', {int_or_none}),
                'tid': ('tid', {int_or_none}),
                'sort_field': ('sortFiled', {int_or_none}),
                'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}),
            })
        }
        metadata = {
            'id': f'{query["type"]}_{query["biz_id"]}',
            **traverse_obj(initial_state, ('mediaListInfo', {
                'title': ('title', {str}),
                'uploader': ('upper', 'name', {str}),
                'uploader_id': ('upper', 'mid', {str_or_none}),
                'timestamp': ('ctime', {int_or_none}),
                'thumbnail': ('cover', {url_or_none}),
            })),
        }
        return self.playlist_result(self._extract_medialist(query, list_id), **metadata)
 class BilibiliCategoryIE(InfoExtractor):
    IE_NAME = 'Bilibili category extractor'
    _MAX_RESULTS = 1000000
-    _VALID_URL = r'https?://www\.bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
+    _VALID_URL = r'https?://(?:www\.)?bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
    _TESTS = [{
        'url': 'https://www.bilibili.com/v/kichiku/mad',
        'info_dict': {
@ -1399,7 +1660,7 @@ def _real_extract(self, url):
 class BiliLiveIE(InfoExtractor):
-    _VALID_URL = r'https?://live.bilibili.com/(?:blanc/)?(?P<id>\d+)'
+    _VALID_URL = r'https?://live\.bilibili\.com/(?:blanc/)?(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://live.bilibili.com/196',
--- a/yt_dlp/extractor/bpb.py
+++ b/yt_dlp/extractor/bpb.py
@ -1,56 +1,170 @@
 import functools
 import re
 from .common import InfoExtractor
 from ..utils import (
    clean_html,
    extract_attributes,
    get_element_text_and_html_by_tag,
    get_elements_by_class,
    join_nonempty,
    js_to_json,
-    determine_ext,
+    mimetype2ext,
    unified_strdate,
    url_or_none,
    urljoin,
    variadic,
 )
 from ..utils.traversal import traverse_obj
 def html_get_element(tag=None, cls=None):
    assert tag or cls, 'One of tag or class is required'
    if cls:
        func = functools.partial(get_elements_by_class, cls, tag=tag)
    else:
        func = functools.partial(get_element_text_and_html_by_tag, tag)
    def html_get_element_wrapper(html):
        return variadic(func(html))[0]
    return html_get_element_wrapper
 class BpbIE(InfoExtractor):
    IE_DESC = 'Bundeszentrale für politische Bildung'
-    _VALID_URL = r'https?://(?:www\.)?bpb\.de/mediathek/(?P<id>[0-9]+)/'
+    _VALID_URL = r'https?://(?:www\.|m\.)?bpb\.de/(?:[^/?#]+/)*(?P<id>\d+)(?:[/?#]|$)'
-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr',
        'md5': 'c4f84c8a8044ca9ff68bb8441d300b3f',
        'info_dict': {
            'id': '297',
            'ext': 'mp4',
            'creator': 'Kooperative Berlin',
            'description': 'md5:f4f75885ba009d3e2b156247a8941ce6',
            'release_date': '20160115',
            'series': 'Interview auf dem Geschichtsforum 1989 | 2009',
            'tags': ['Friedliche Revolution', 'Erinnerungskultur', 'Vergangenheitspolitik', 'DDR 1949 - 1990', 'Freiheitsrecht', 'BStU', 'Deutschland'],
            'thumbnail': 'https://www.bpb.de/cache/images/7/297_teaser_16x9_1240.jpg?8839D',
            'title': 'Joachim Gauck zu 1989 und die Erinnerung an die DDR',
-            'description': 'Joachim Gauck, erster Beauftragter für die Stasi-Unterlagen, spricht auf dem Geschichtsforum über die friedliche Revolution 1989 und eine "gewisse Traurigkeit" im Umgang mit der DDR-Vergangenheit.'
+            'uploader': 'Bundeszentrale für politische Bildung',
        },
    }, {
        'url': 'https://www.bpb.de/mediathek/video/522184/krieg-flucht-und-falschmeldungen-wirstattdesinformation-2/',
        'info_dict': {
            'id': '522184',
            'ext': 'mp4',
            'creator': 'Institute for Strategic Dialogue Germany gGmbH (ISD)',
            'description': 'md5:f83c795ff8f825a69456a9e51fc15903',
            'release_date': '20230621',
            'tags': ['Desinformation', 'Ukraine', 'Russland', 'Geflüchtete'],
            'thumbnail': 'https://www.bpb.de/cache/images/4/522184_teaser_16x9_1240.png?EABFB',
            'title': 'md5:9b01ccdbf58dbf9e5c9f6e771a803b1c',
            'uploader': 'Bundeszentrale für politische Bildung',
        },
    }, {
        'url': 'https://www.bpb.de/lernen/bewegtbild-und-politische-bildung/webvideo/518789/krieg-flucht-und-falschmeldungen-wirstattdesinformation-1/',
        'info_dict': {
            'id': '518789',
            'ext': 'mp4',
            'creator': 'Institute for Strategic Dialogue Germany gGmbH (ISD)',
            'description': 'md5:85228aed433e84ff0ff9bc582abd4ea8',
            'release_date': '20230302',
            'tags': ['Desinformation', 'Ukraine', 'Russland', 'Geflüchtete'],
            'thumbnail': 'https://www.bpb.de/cache/images/9/518789_teaser_16x9_1240.jpeg?56D0D',
            'title': 'md5:3e956f264bb501f6383f10495a401da4',
            'uploader': 'Bundeszentrale für politische Bildung',
        },
    }, {
        'url': 'https://www.bpb.de/mediathek/podcasts/apuz-podcast/539727/apuz-20-china/',
        'only_matching': True,
    }, {
        'url': 'https://www.bpb.de/mediathek/audio/315813/folge-1-eine-einfuehrung/',
        'info_dict': {
            'id': '315813',
            'ext': 'mp3',
            'creator': 'Axel Schröder',
            'description': 'md5:eda9d1af34e5912efef5baf54fba4427',
            'release_date': '20200921',
            'series': 'Auf Endlagersuche. Der deutsche Weg zu einem sicheren Atommülllager',
            'tags': ['Atomenergie', 'Endlager', 'hoch-radioaktiver Abfall', 'Endlagersuche', 'Atommüll', 'Atomendlager', 'Gorleben', 'Deutschland'],
            'thumbnail': 'https://www.bpb.de/cache/images/3/315813_teaser_16x9_1240.png?92A94',
            'title': 'Folge 1: Eine Einführung',
            'uploader': 'Bundeszentrale für politische Bildung',
        },
    }, {
        'url': 'https://www.bpb.de/517806/die-weltanschauung-der-neuen-rechten/',
        'info_dict': {
            'id': '517806',
            'ext': 'mp3',
            'creator': 'Bundeszentrale für politische Bildung',
            'description': 'md5:594689600e919912aade0b2871cc3fed',
            'release_date': '20230127',
            'series': 'Vorträge des Fachtags "Modernisierer. Grenzgänger. Anstifter. Sechs Jahrzehnte \'Neue Rechte\'"',
            'tags': ['Rechtsextremismus', 'Konservatismus', 'Konservativismus', 'neue Rechte', 'Rechtspopulismus', 'Schnellroda', 'Deutschland'],
            'thumbnail': 'https://www.bpb.de/cache/images/6/517806_teaser_16x9_1240.png?7A7A0',
            'title': 'Die Weltanschauung der "Neuen Rechten"',
            'uploader': 'Bundeszentrale für politische Bildung',
        },
    }, {
        'url': 'https://www.bpb.de/mediathek/reihen/zahlen-und-fakten-soziale-situation-filme/520153/zahlen-und-fakten-die-soziale-situation-in-deutschland-migration/',
        'only_matching': True,
    }]
    _TITLE_RE = re.compile('(?P<title>[^<]*)<[^>]+>(?P<series>[^<]*)')
    def _parse_vue_attributes(self, name, string, video_id):
        attributes = extract_attributes(self._search_regex(rf'(<{name}(?:"[^"]*?"|[^>])*>)', string, name))
        for key, value in attributes.items():
            if key.startswith(':'):
                attributes[key] = self._parse_json(value, video_id, transform_source=js_to_json, fatal=False)
        return attributes
    @staticmethod
    def _process_source(source):
        url = url_or_none(source['src'])
        if not url:
            return None
        source_type = source.get('type', '')
        extension = mimetype2ext(source_type)
        is_video = source_type.startswith('video')
        note = url.rpartition('.')[0].rpartition('_')[2] if is_video else None
        return {
            'url': url,
            'ext': extension,
            'vcodec': None if is_video else 'none',
            'quality': 10 if note == 'high' else 0,
            'format_note': note,
            'format_id': join_nonempty(extension, note),
        }
    }
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
-        title = self._html_search_regex(
+        title_result = traverse_obj(webpage, ({html_get_element(cls='opening-header__title')}, {self._TITLE_RE.match}))
-            r'<h2 class="white">(.*?)</h2>', webpage, 'title')
+        json_lds = list(self._yield_json_ld(webpage, video_id, fatal=False))
        video_info_dicts = re.findall(
            r"({\s*src\s*:\s*'https?://film\.bpb\.de/[^}]+})", webpage)
        formats = []
        for video_info in video_info_dicts:
            video_info = self._parse_json(
                video_info, video_id, transform_source=js_to_json, fatal=False)
            if not video_info:
                continue
            video_url = video_info.get('src')
            if not video_url:
                continue
            quality = 'high' if '_high' in video_url else 'low'
            formats.append({
                'url': video_url,
                'quality': 10 if quality == 'high' else 0,
                'format_note': quality,
                'format_id': '%s-%s' % (quality, determine_ext(video_url)),
            })
        return {
            'id': video_id,
-            'formats': formats,
+            'title': traverse_obj(title_result, ('title', {str.strip})) or None,
-            'title': title,
+            # This metadata could be interpreted otherwise, but it fits "series" the most
-            'description': self._og_search_description(webpage),
+            'series': traverse_obj(title_result, ('series', {str.strip})) or None,
            'description': join_nonempty(*traverse_obj(webpage, [(
                {html_get_element(cls='opening-intro')},
                [{html_get_element(tag='bpb-accordion-item')}, {html_get_element(cls='text-content')}],
            ), {clean_html}]), delim='\n\n') or None,
            'creator': self._html_search_meta('author', webpage),
            'uploader': self._html_search_meta('publisher', webpage),
            'release_date': unified_strdate(self._html_search_meta('date', webpage)),
            'tags': traverse_obj(json_lds, (..., 'keywords', {lambda x: x.split(',')}, ...)),
            **traverse_obj(self._parse_vue_attributes('bpb-player', webpage, video_id), {
                'formats': (':sources', ..., {self._process_source}),
                'thumbnail': ('poster', {lambda x: urljoin(url, x)}),
            }),
        }
--- a/yt_dlp/extractor/canal1.py
+++ b/yt_dlp/extractor/canal1.py
@ -0,0 +1,39 @@
 from .common import InfoExtractor
 class Canal1IE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.|noticias\.)?canal1\.com\.co/(?:[^?#&])+/(?P<id>[\w-]+)'
    _TESTS = [{
        'url': 'https://canal1.com.co/noticias/napa-i-una-cadena-de-produccion-de-arroz-que-se-quedo-en-veremos-y-abandonada-en-el-departamento-del-choco/',
        'info_dict': {
            'id': '63b39f6b354977084b85ab54',
            'display_id': 'napa-i-una-cadena-de-produccion-de-arroz-que-se-quedo-en-veremos-y-abandonada-en-el-departamento-del-choco',
            'title': 'Ñapa I Una cadena de producción de arroz que se quedó en veremos y abandonada en el departamento del Chocó',
            'description': 'md5:bc49c6d64d20610ea1e7daf079a0d013',
            'thumbnail': r're:^https?://[^?#]+63b39f6b354977084b85ab54',
            'ext': 'mp4',
        },
    }, {
        'url': 'https://noticias.canal1.com.co/noticias/tres-i-el-triste-record-que-impuso-elon-musk-el-dueno-de-tesla-y-de-twitter/',
        'info_dict': {
            'id': '63b39e93f5fd223aa32250fb',
            'display_id': 'tres-i-el-triste-record-que-impuso-elon-musk-el-dueno-de-tesla-y-de-twitter',
            'title': 'Tres I El triste récord que impuso Elon Musk, el dueño de Tesla y de Twitter',
            'description': 'md5:d9f691f131a21ce6767ca6c05d17d791',
            'thumbnail': r're:^https?://[^?#]+63b39e93f5fd223aa32250fb',
            'ext': 'mp4',
        },
    }, {
        # Geo-restricted to Colombia
        'url': 'https://canal1.com.co/programas/guerreros-canal-1/video-inedito-guerreros-despedida-kewin-zarate/',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
        return self.url_result(
            self._search_regex(r'"embedUrl"\s*:\s*"([^"]+)', webpage, 'embed url'),
            display_id=display_id, url_transparent=True)
--- a/yt_dlp/extractor/caracoltv.py
+++ b/yt_dlp/extractor/caracoltv.py
@ -0,0 +1,136 @@
 import base64
 import json
 import uuid
 from .common import InfoExtractor
 from ..utils import (
    int_or_none,
    js_to_json,
    traverse_obj,
    urljoin,
 )
 class CaracolTvPlayIE(InfoExtractor):
    _VALID_URL = r'https?://play\.caracoltv\.com/videoDetails/(?P<id>[^/?#]+)'
    _NETRC_MACHINE = 'caracoltv-play'
    _TESTS = [{
        'url': 'https://play.caracoltv.com/videoDetails/OTo4NGFmNjUwOWQ2ZmM0NTg2YWRiOWU0MGNhOWViOWJkYQ==',
        'info_dict': {
            'id': 'OTo4NGFmNjUwOWQ2ZmM0NTg2YWRiOWU0MGNhOWViOWJkYQ==',
            'title': 'La teoría del promedio',
            'description': 'md5:1cdd6d2c13f19ef0d9649ab81a023ac3',
        },
        'playlist_count': 6,
    }, {
        'url': 'https://play.caracoltv.com/videoDetails/OTo3OWM4ZTliYzQxMmM0MTMxYTk4Mjk2YjdjNGQ4NGRkOQ==/ella?season=0',
        'info_dict': {
            'id': 'OTo3OWM4ZTliYzQxMmM0MTMxYTk4Mjk2YjdjNGQ4NGRkOQ==',
            'title': 'Ella',
            'description': 'md5:a639b1feb5ddcc0cff92a489b4e544b8',
        },
        'playlist_count': 10,
    }, {
        'url': 'https://play.caracoltv.com/videoDetails/OTpiYTY1YTVmOTI5MzI0ZWJhOGZiY2Y3MmRlOWZlYmJkOA==/la-vuelta-al-mundo-en-80-risas-2022?season=0',
        'info_dict': {
            'id': 'OTpiYTY1YTVmOTI5MzI0ZWJhOGZiY2Y3MmRlOWZlYmJkOA==',
            'title': 'La vuelta al mundo en 80 risas 2022',
            'description': 'md5:e97aac36106e5c37ebf947b3350106a4',
        },
        'playlist_count': 17,
    }, {
        'url': 'https://play.caracoltv.com/videoDetails/MzoxX3BwbjRmNjB1',
        'only_matching': True,
    }]
    _USER_TOKEN = None
    def _extract_app_token(self, webpage):
        config_js_path = self._search_regex(
            r'<script[^>]+src\s*=\s*"([^"]+coreConfig.js[^"]+)', webpage, 'config js url', fatal=False)
        mediation_config = {} if not config_js_path else self._search_json(
            r'mediation\s*:', self._download_webpage(
                urljoin('https://play.caracoltv.com/', config_js_path), None, fatal=False, note='Extracting JS config'),
            'mediation_config', None, transform_source=js_to_json, fatal=False)
        key = traverse_obj(
            mediation_config, ('live', 'key')) or '795cd9c089a1fc48094524a5eba85a3fca1331817c802f601735907c8bbb4f50'
        secret = traverse_obj(
            mediation_config, ('live', 'secret')) or '64dec00a6989ba83d087621465b5e5d38bdac22033b0613b659c442c78976fa0'
        return base64.b64encode(f'{key}:{secret}'.encode()).decode()
    def _perform_login(self, email, password):
        webpage = self._download_webpage('https://play.caracoltv.com/', None, fatal=False)
        app_token = self._extract_app_token(webpage)
        bearer_token = self._download_json(
            'https://eu-gateway.inmobly.com/applications/oauth', None, data=b'', note='Retrieving bearer token',
            headers={'Authorization': f'Basic {app_token}'})['token']
        self._USER_TOKEN = self._download_json(
            'https://eu-gateway.inmobly.com/user/login', None, note='Performing login', headers={
                'Content-Type': 'application/json',
                'Authorization': f'Bearer {bearer_token}',
            }, data=json.dumps({
                'device_data': {
                    'device_id': str(uuid.uuid4()),
                    'device_token': '',
                    'device_type': 'web'
                },
                'login_data': {
                    'enabled': True,
                    'email': email,
                    'password': password,
                }
            }).encode())['user_token']
    def _extract_video(self, video_data, series_id=None, season_id=None, season_number=None):
        formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_data['stream_url'], series_id, 'mp4')
        return {
            'id': video_data['id'],
            'title': video_data.get('name'),
            'description': video_data.get('description'),
            'formats': formats,
            'subtitles': subtitles,
            'thumbnails': traverse_obj(
                video_data, ('extra_thumbs', ..., {'url': 'thumb_url', 'height': 'height', 'width': 'width'})),
            'series_id': series_id,
            'season_id': season_id,
            'season_number': int_or_none(season_number),
            'episode_number': int_or_none(video_data.get('item_order')),
            'is_live': video_data.get('entry_type') == 3,
        }
    def _extract_series_seasons(self, seasons, series_id):
        for season in seasons:
            api_response = self._download_json(
                'https://eu-gateway.inmobly.com/feed', series_id, query={'season_id': season['id']},
                headers={'Authorization': f'Bearer {self._USER_TOKEN}'})
            season_number = season.get('order')
            for episode in api_response['items']:
                yield self._extract_video(episode, series_id, season['id'], season_number)
    def _real_extract(self, url):
        series_id = self._match_id(url)
        if self._USER_TOKEN is None:
            self._perform_login('guest@inmobly.com', 'Test@gus1')
        api_response = self._download_json(
            'https://eu-gateway.inmobly.com/feed', series_id, query={'include_ids': series_id},
            headers={'Authorization': f'Bearer {self._USER_TOKEN}'})['items'][0]
        if not api_response.get('seasons'):
            return self._extract_video(api_response)
        return self.playlist_result(
            self._extract_series_seasons(api_response['seasons'], series_id),
            series_id, **traverse_obj(api_response, {
                'title': 'name',
                'description': 'description',
            }))
--- a/yt_dlp/extractor/cbc.py
+++ b/yt_dlp/extractor/cbc.py
@ -339,12 +339,12 @@ def _new_claims_token(self, email, password):
        data = json.dumps({'jwt': sig}).encode()
        headers = {'content-type': 'application/json', 'ott-device-type': 'web'}
        resp = self._download_json('https://services.radio-canada.ca/ott/cbc-api/v2/token',
-                                   None, data=data, headers=headers)
+                                   None, data=data, headers=headers, expected_status=426)
        cbc_access_token = resp['accessToken']
        headers = {'content-type': 'application/json', 'ott-device-type': 'web', 'ott-access-token': cbc_access_token}
        resp = self._download_json('https://services.radio-canada.ca/ott/cbc-api/v2/profile',
-                                   None, headers=headers)
+                                   None, headers=headers, expected_status=426)
        return resp['claimsToken']
    def _get_claims_token_expiry(self):
--- a/yt_dlp/extractor/ccc.py
+++ b/yt_dlp/extractor/ccc.py
@ -90,10 +90,17 @@ class CCCPlaylistIE(InfoExtractor):
            'id': '30c3',
        },
        'playlist_count': 135,
    }, {
        'url': 'https://media.ccc.de/c/DS2023',
        'info_dict': {
            'title': 'Datenspuren 2023',
            'id': 'DS2023',
        },
        'playlist_count': 37
    }]
    def _real_extract(self, url):
-        playlist_id = self._match_id(url).lower()
+        playlist_id = self._match_id(url)
        conf = self._download_json(
            'https://media.ccc.de/public/conferences/' + playlist_id,
--- a/yt_dlp/extractor/douyutv.py
+++ b/yt_dlp/extractor/douyutv.py
@ -1,31 +1,72 @@
 import time
 import hashlib
 import re
 import urllib
 import uuid
 from .common import InfoExtractor
 from .openload import PhantomJSwrapper
 from ..utils import (
    ExtractorError,
    UserNotLive,
    determine_ext,
    int_or_none,
    js_to_json,
    parse_resolution,
    str_or_none,
    traverse_obj,
    unescapeHTML,
-    unified_strdate,
+    url_or_none,
    urlencode_postdata,
    urljoin,
 )
-class DouyuTVIE(InfoExtractor):
+class DouyuBaseIE(InfoExtractor):
-    IE_DESC = '斗鱼'
+    def _download_cryptojs_md5(self, video_id):
        for url in [
            'https://cdnjs.cloudflare.com/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
            'https://cdn.bootcdn.net/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
        ]:
            js_code = self._download_webpage(
                url, video_id, note='Downloading signing dependency', fatal=False)
            if js_code:
                self.cache.store('douyu', 'crypto-js-md5', js_code)
                return js_code
        raise ExtractorError('Unable to download JS dependency (crypto-js/md5)')
    def _get_cryptojs_md5(self, video_id):
        return self.cache.load('douyu', 'crypto-js-md5') or self._download_cryptojs_md5(video_id)
    def _calc_sign(self, sign_func, video_id, a):
        b = uuid.uuid4().hex
        c = round(time.time())
        js_script = f'{self._get_cryptojs_md5(video_id)};{sign_func};console.log(ub98484234("{a}","{b}","{c}"))'
        phantom = PhantomJSwrapper(self)
        result = phantom.execute(js_script, video_id,
                                 note='Executing JS signing script').strip()
        return {i: v[0] for i, v in urllib.parse.parse_qs(result).items()}
    def _search_js_sign_func(self, webpage, fatal=True):
        # The greedy look-behind ensures last possible script tag is matched
        return self._search_regex(
            r'(?:<script.*)?<script[^>]*>(.*?ub98484234.*?)</script>', webpage, 'JS sign func', fatal=fatal)
 class DouyuTVIE(DouyuBaseIE):
    IE_DESC = '斗鱼直播'
    _VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(topic/\w+\?rid=|(?:[^/]+/))*(?P<id>[A-Za-z0-9]+)'
    _TESTS = [{
-        'url': 'http://www.douyutv.com/iseven',
+        'url': 'https://www.douyu.com/pigff',
        'info_dict': {
-            'id': '17732',
+            'id': '24422',
-            'display_id': 'iseven',
+            'display_id': 'pigff',
-            'ext': 'flv',
+            'ext': 'mp4',
-            'title': 're:^清晨醒脑！根本停不下来！ [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+            'title': 're:^【PIGFF】.* [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
-            'description': r're:.*m7show@163\.com.*',
+            'description': r'≥15级牌子看鱼吧置顶帖进粉丝vx群',
-            'thumbnail': r're:^https?://.*\.png',
+            'thumbnail': str,
-            'uploader': '7师傅',
+            'uploader': 'pigff',
            'is_live': True,
            'live_status': 'is_live',
        },
        'params': {
            'skip_download': True,
@ -85,15 +126,43 @@ class DouyuTVIE(InfoExtractor):
        'only_matching': True,
    }]
    def _get_sign_func(self, room_id, video_id):
        return self._download_json(
            f'https://www.douyu.com/swf_api/homeH5Enc?rids={room_id}', video_id,
            note='Getting signing script')['data'][f'room{room_id}']
    def _extract_stream_formats(self, stream_formats):
        formats = []
        for stream_info in traverse_obj(stream_formats, (..., 'data')):
            stream_url = urljoin(
                traverse_obj(stream_info, 'rtmp_url'), traverse_obj(stream_info, 'rtmp_live'))
            if stream_url:
                rate_id = traverse_obj(stream_info, ('rate', {int_or_none}))
                rate_info = traverse_obj(stream_info, ('multirates', lambda _, v: v['rate'] == rate_id), get_all=False)
                ext = determine_ext(stream_url)
                formats.append({
                    'url': stream_url,
                    'format_id': str_or_none(rate_id),
                    'ext': 'mp4' if ext == 'm3u8' else ext,
                    'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
                    'quality': rate_id % -10000 if rate_id is not None else None,
                    **traverse_obj(rate_info, {
                        'format': ('name', {str_or_none}),
                        'tbr': ('bit', {int_or_none}),
                    }),
                })
        return formats
    def _real_extract(self, url):
        video_id = self._match_id(url)
-        if video_id.isdigit():
+        webpage = self._download_webpage(url, video_id)
-            room_id = video_id
+        room_id = self._search_regex(r'\$ROOM\.room_id\s*=\s*(\d+)', webpage, 'room id')
-        else:
+
-            page = self._download_webpage(url, video_id)
+        if self._search_regex(r'"videoLoop"\s*:\s*(\d+)', webpage, 'loop', default='') == '1':
-            room_id = self._html_search_regex(
+            raise UserNotLive('The channel is auto-playing VODs', video_id=video_id)
-                r'"room_id\\?"\s*:\s*(\d+),', page, 'room id')
+        if self._search_regex(r'\$ROOM\.show_status\s*=\s*(\d+)', webpage, 'status', default='') == '2':
            raise UserNotLive(video_id=video_id)
        # Grab metadata from API
        params = {
@ -102,110 +171,136 @@ def _real_extract(self, url):
            'time': int(time.time()),
        }
        params['auth'] = hashlib.md5(
-            f'room/{video_id}?{urllib.parse.urlencode(params)}zNzMV1y4EMxOHS6I5WKm'.encode()).hexdigest()
+            f'room/{room_id}?{urllib.parse.urlencode(params)}zNzMV1y4EMxOHS6I5WKm'.encode()).hexdigest()
-        room = self._download_json(
+        room = traverse_obj(self._download_json(
            f'http://www.douyutv.com/api/v1/room/{room_id}', video_id,
-            note='Downloading room info', query=params)['data']
+            note='Downloading room info', query=params, fatal=False), 'data')
        # 1 = live, 2 = offline
-        if room.get('show_status') == '2':
+        if traverse_obj(room, 'show_status') == '2':
-            raise ExtractorError('Live stream is offline', expected=True)
+            raise UserNotLive(video_id=video_id)
-        video_url = urljoin('https://hls3-akm.douyucdn.cn/', self._search_regex(r'(live/.*)', room['hls_url'], 'URL'))
+        js_sign_func = self._search_js_sign_func(webpage, fatal=False) or self._get_sign_func(room_id, video_id)
-        formats, subs = self._extract_m3u8_formats_and_subtitles(video_url, room_id)
+        form_data = {
            'rate': 0,
            **self._calc_sign(js_sign_func, video_id, room_id),
        }
        stream_formats = [self._download_json(
            f'https://www.douyu.com/lapi/live/getH5Play/{room_id}',
            video_id, note="Downloading livestream format",
            data=urlencode_postdata(form_data))]
-        title = unescapeHTML(room['room_name'])
+        for rate_id in traverse_obj(stream_formats[0], ('data', 'multirates', ..., 'rate')):
-        description = room.get('show_details')
+            if rate_id != traverse_obj(stream_formats[0], ('data', 'rate')):
-        thumbnail = room.get('room_src')
+                form_data['rate'] = rate_id
-        uploader = room.get('nickname')
+                stream_formats.append(self._download_json(
                    f'https://www.douyu.com/lapi/live/getH5Play/{room_id}',
                    video_id, note=f'Downloading livestream format {rate_id}',
                    data=urlencode_postdata(form_data)))
        return {
            'id': room_id,
-            'display_id': video_id,
+            'formats': self._extract_stream_formats(stream_formats),
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'uploader': uploader,
            'is_live': True,
-            'subtitles': subs,
+            **traverse_obj(room, {
-            'formats': formats,
+                'display_id': ('url', {str}, {lambda i: i[1:]}),
                'title': ('room_name', {unescapeHTML}),
                'description': ('show_details', {str}),
                'uploader': ('nickname', {str}),
                'thumbnail': ('room_src', {url_or_none}),
            })
        }
-class DouyuShowIE(InfoExtractor):
+class DouyuShowIE(DouyuBaseIE):
    _VALID_URL = r'https?://v(?:mobile)?\.douyu\.com/show/(?P<id>[0-9a-zA-Z]+)'
    _TESTS = [{
-        'url': 'https://v.douyu.com/show/rjNBdvnVXNzvE2yw',
+        'url': 'https://v.douyu.com/show/mPyq7oVNe5Yv1gLY',
        'md5': '0c2cfd068ee2afe657801269b2d86214',
        'info_dict': {
-            'id': 'rjNBdvnVXNzvE2yw',
+            'id': 'mPyq7oVNe5Yv1gLY',
            'ext': 'mp4',
-            'title': '陈一发儿：砒霜 我有个室友系列！04-01 22点场',
+            'title': '四川人小时候的味道“蒜苗回锅肉”，传统菜不能丢，要常做来吃',
-            'duration': 7150.08,
+            'duration': 633,
-            'thumbnail': r're:^https?://.*\.jpg$',
+            'thumbnail': str,
-            'uploader': '陈一发儿',
+            'uploader': '美食作家王刚V',
-            'uploader_id': 'XrZwYelr5wbK',
+            'uploader_id': 'OVAO4NVx1m7Q',
-            'uploader_url': 'https://v.douyu.com/author/XrZwYelr5wbK',
+            'timestamp': 1661850002,
-            'upload_date': '20170402',
+            'upload_date': '20220830',
            'view_count': int,
            'tags': ['美食', '美食综合'],
        },
    }, {
        'url': 'https://vmobile.douyu.com/show/rjNBdvnVXNzvE2yw',
        'only_matching': True,
    }]
    _FORMATS = {
        'super': '原画',
        'high': '超清',
        'normal': '高清',
    }
    _QUALITIES = {
        'super': -1,
        'high': -2,
        'normal': -3,
    }
    _RESOLUTIONS = {
        'super': '1920x1080',
        'high': '1280x720',
        'normal': '852x480',
    }
    def _real_extract(self, url):
        url = url.replace('vmobile.', 'v.')
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
-        room_info = self._parse_json(self._search_regex(
+        video_info = self._search_json(
-            r'var\s+\$ROOM\s*=\s*({.+});', webpage, 'room info'), video_id)
+            r'<script>\s*window\.\$DATA\s*=', webpage,
            'video info', video_id, transform_source=js_to_json)
-        video_info = None
+        js_sign_func = self._search_js_sign_func(webpage)
        form_data = {
            'vid': video_id,
            **self._calc_sign(js_sign_func, video_id, video_info['ROOM']['point_id']),
        }
        url_info = self._download_json(
            'https://v.douyu.com/api/stream/getStreamUrl', video_id,
            data=urlencode_postdata(form_data), note="Downloading video formats")
-        for trial in range(5):
+        formats = []
-            # Sometimes Douyu rejects our request. Let's try it more times
+        for name, url in traverse_obj(url_info, ('data', 'thumb_video', {dict.items}, ...)):
-            try:
+            video_url = traverse_obj(url, ('url', {url_or_none}))
-                video_info = self._download_json(
+            if video_url:
-                    'https://vmobile.douyu.com/video/getInfo', video_id,
+                ext = determine_ext(video_url)
-                    query={'vid': video_id},
+                formats.append({
-                    headers={
+                    'format': self._FORMATS.get(name),
-                        'Referer': url,
+                    'format_id': name,
-                        'x-requested-with': 'XMLHttpRequest',
+                    'url': video_url,
-                    })
+                    'quality': self._QUALITIES.get(name),
-                break
+                    'ext': 'mp4' if ext == 'm3u8' else ext,
-            except ExtractorError:
+                    'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
-                self._sleep(1, video_id)
+                    **parse_resolution(self._RESOLUTIONS.get(name))
-
+                })
-        if not video_info:
+            else:
-            raise ExtractorError('Can\'t fetch video info')
+                self.to_screen(
-
+                    f'"{self._FORMATS.get(name, name)}" format may require logging in. {self._login_hint()}')
        formats = self._extract_m3u8_formats(
            video_info['data']['video_url'], video_id,
            entry_protocol='m3u8_native', ext='mp4')
        upload_date = unified_strdate(self._html_search_regex(
            r'<em>上传时间：</em><span>([^<]+)</span>', webpage,
            'upload date', fatal=False))
        uploader = uploader_id = uploader_url = None
        mobj = re.search(
            r'(?m)<a[^>]+href="/author/([0-9a-zA-Z]+)".+?<strong[^>]+title="([^"]+)"',
            webpage)
        if mobj:
            uploader_id, uploader = mobj.groups()
            uploader_url = urljoin(url, '/author/' + uploader_id)
        return {
            'id': video_id,
            'title': room_info['name'],
            'formats': formats,
-            'duration': room_info.get('duration'),
+            **traverse_obj(video_info, ('DATA', {
-            'thumbnail': room_info.get('pic'),
+                'title': ('content', 'title', {str}),
-            'upload_date': upload_date,
+                'uploader': ('content', 'author', {str}),
-            'uploader': uploader,
+                'uploader_id': ('content', 'up_id', {str_or_none}),
-            'uploader_id': uploader_id,
+                'duration': ('content', 'video_duration', {int_or_none}),
-            'uploader_url': uploader_url,
+                'thumbnail': ('content', 'video_pic', {url_or_none}),
                'timestamp': ('content', 'create_time', {int_or_none}),
                'view_count': ('content', 'view_num', {int_or_none}),
                'tags': ('videoTag', ..., 'tagName', {str}),
            }))
        }
--- a/yt_dlp/extractor/eplus.py
+++ b/yt_dlp/extractor/eplus.py
@ -0,0 +1,96 @@
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    try_call,
    unified_timestamp,
 )
 class EplusIbIE(InfoExtractor):
    IE_NAME = 'eplus:inbound'
    IE_DESC = 'e+ (イープラス) overseas'
    _VALID_URL = r'https?://live\.eplus\.jp/ex/player\?ib=(?P<id>(?:\w|%2B|%2F){86}%3D%3D)'
    _TESTS = [{
        'url': 'https://live.eplus.jp/ex/player?ib=YEFxb3Vyc2Dombnjg7blkrLlrablnJLjgrnjgq%2Fjg7zjg6vjgqLjgqTjg4njg6vlkIzlpb3kvJpgTGllbGxhIQ%3D%3D',
        'info_dict': {
            'id': '354502-0001-002',
            'title': 'LoveLive!Series Presents COUNTDOWN LoveLive! 2021→2022～LIVE with a smile!～【Streaming+(配信)】',
            'live_status': 'was_live',
            'release_date': '20211231',
            'release_timestamp': 1640952000,
            'description': str,
        },
        'params': {
            'skip_download': True,
            'ignore_no_formats_error': True,
        },
        'expected_warnings': [
            'Could not find the playlist URL. This event may not be accessible',
            'No video formats found!',
            'Requested format is not available',
        ],
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        data_json = self._search_json(r'<script>\s*var app\s*=', webpage, 'data json', video_id)
        delivery_status = data_json.get('delivery_status')
        archive_mode = data_json.get('archive_mode')
        release_timestamp = try_call(lambda: unified_timestamp(data_json['event_datetime']) - 32400)
        release_timestamp_str = data_json.get('event_datetime_text')  # JST
        self.write_debug(f'delivery_status = {delivery_status}, archive_mode = {archive_mode}')
        if delivery_status == 'PREPARING':
            live_status = 'is_upcoming'
        elif delivery_status == 'STARTED':
            live_status = 'is_live'
        elif delivery_status == 'STOPPED':
            if archive_mode != 'ON':
                raise ExtractorError(
                    'This event has ended and there is no archive for this event', expected=True)
            live_status = 'post_live'
        elif delivery_status == 'WAIT_CONFIRM_ARCHIVED':
            live_status = 'post_live'
        elif delivery_status == 'CONFIRMED_ARCHIVE':
            live_status = 'was_live'
        else:
            self.report_warning(f'Unknown delivery_status {delivery_status}, treat it as a live')
            live_status = 'is_live'
        formats = []
        m3u8_playlist_urls = self._search_json(
            r'var listChannels\s*=', webpage, 'hls URLs', video_id, contains_pattern=r'\[.+\]', default=[])
        if not m3u8_playlist_urls:
            if live_status == 'is_upcoming':
                self.raise_no_formats(
                    f'Could not find the playlist URL. This live event will begin at {release_timestamp_str} JST', expected=True)
            else:
                self.raise_no_formats(
                    'Could not find the playlist URL. This event may not be accessible', expected=True)
        elif live_status == 'is_upcoming':
            self.raise_no_formats(f'This live event will begin at {release_timestamp_str} JST', expected=True)
        elif live_status == 'post_live':
            self.raise_no_formats('This event has ended, and the archive will be available shortly', expected=True)
        else:
            for m3u8_playlist_url in m3u8_playlist_urls:
                formats.extend(self._extract_m3u8_formats(m3u8_playlist_url, video_id))
            # FIXME: HTTP request headers need to be updated to continue download
            warning = 'Due to technical limitations, the download will be interrupted after one hour'
            if live_status == 'is_live':
                self.report_warning(warning)
            elif live_status == 'was_live':
                self.report_warning(f'{warning}. You can restart to continue the download')
        return {
            'id': data_json['app_id'],
            'title': data_json.get('app_name'),
            'formats': formats,
            'live_status': live_status,
            'description': data_json.get('content'),
            'release_timestamp': release_timestamp,
        }
--- a/yt_dlp/extractor/expressen.py
+++ b/yt_dlp/extractor/expressen.py
@ -11,8 +11,8 @@ class ExpressenIE(InfoExtractor):
    _VALID_URL = r'''(?x)
                    https?://
                        (?:www\.)?(?:expressen|di)\.se/
-                        (?:(?:tvspelare/video|videoplayer/embed)/)?
+                        (?:(?:tvspelare/video|video-?player/embed)/)?
-                        tv/(?:[^/]+/)*
+                        (?:tv|nyheter)/(?:[^/?#]+/)*
                        (?P<id>[^/?#&]+)
                    '''
    _EMBED_REGEX = [r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?(?:expressen|di)\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1']
@ -42,6 +42,12 @@ class ExpressenIE(InfoExtractor):
    }, {
        'url': 'https://www.di.se/videoplayer/embed/tv/ditv/borsmorgon/implantica-rusar-70--under-borspremiaren-hor-styrelsemedlemmen/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
        'only_matching': True,
    }, {
        'url': 'https://www.expressen.se/video-player/embed/tv/nyheter/ekero-fodda-olof-gustafsson-forvaltar-knarkbaronen-pablo-escobars-namn',
        'only_matching': True,
    }, {
        'url': 'https://www.expressen.se/nyheter/efter-egna-telefonbluffen-escobar-stammer-klarna/',
        'only_matching': True,
    }]
    def _real_extract(self, url):
--- a/yt_dlp/extractor/facebook.py
+++ b/yt_dlp/extractor/facebook.py
@ -74,6 +74,22 @@ class FacebookIE(InfoExtractor):
    _VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary'
    _TESTS = [{
        'url': 'https://www.facebook.com/radiokicksfm/videos/3676516585958356/',
        'info_dict': {
            'id': '3676516585958356',
            'ext': 'mp4',
            'title': 'dr Adam Przygoda',
            'description': 'md5:34675bda53336b1d16400265c2bb9b3b',
            'uploader': 'RADIO KICKS FM',
            'upload_date': '20230818',
            'timestamp': 1692346159,
            'thumbnail': r're:^https?://.*',
            'uploader_id': '100063551323670',
            'duration': 3132.184,
            'view_count': int,
            'concurrent_view_count': 0,
        },
    }, {
        'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
        'md5': '6a40d33c0eccbb1af76cf0485a052659',
        'info_dict': {
@ -97,7 +113,7 @@ class FacebookIE(InfoExtractor):
            'upload_date': '20140506',
            'timestamp': 1399398998,
            'thumbnail': r're:^https?://.*',
-            'uploader_id': 'pfbid04scW44U4P9iTyLZAGy8y8W3pR3i2VugvHCimiRudUAVbN3MPp9eXBaYFcgVworZwl',
+            'uploader_id': 'pfbid028wxorhX2ErLFJ578N6P3crHD3PHmXTCqCvfBpsnbSLmbokwSY75p5hWBjHGkG4zxl',
            'duration': 131.03,
            'concurrent_view_count': int,
        },
@ -179,7 +195,7 @@ class FacebookIE(InfoExtractor):
            'timestamp': 1486648217,
            'upload_date': '20170209',
            'uploader': 'Yaroslav Korpan',
-            'uploader_id': 'pfbid029y8j22EwH3ikeqgH3SEP9G3CAi9kmWKgXJJG9s5geV7mo3J2bvURqHCdgucRgAyhl',
+            'uploader_id': 'pfbid06AScABAWcW91qpiuGrLt99Ef9tvwHoXP6t8KeFYEqkSfreMtfa9nTveh8b2ZEVSWl',
            'concurrent_view_count': int,
            'thumbnail': r're:^https?://.*',
            'view_count': int,
@ -274,7 +290,7 @@ class FacebookIE(InfoExtractor):
            'title': 'Josef',
            'thumbnail': r're:^https?://.*',
            'concurrent_view_count': int,
-            'uploader_id': 'pfbid02gXHbDwxumkaKJQaTGUf3znYfYzTuidGEWawiramNx4YamSj2afwYSRkpcjtHtMRJl',
+            'uploader_id': 'pfbid0cibUN6tV7DYgdbJdsUFN46wc4jKpVSPAvJQhFofGqBGmVn3V3JtAs2tfUwziw2hUl',
            'timestamp': 1549275572,
            'duration': 3.413,
            'uploader': 'Josef Novak',
@ -401,9 +417,9 @@ def _extract_from_url(self, url, video_id):
        def extract_metadata(webpage):
            post_data = [self._parse_json(j, video_id, fatal=False) for j in re.findall(
-                r'handleWithCustomApplyEach\(\s*ScheduledApplyEach\s*,\s*(\{.+?\})\s*\);', webpage)]
+                r'data-sjs>({.*?ScheduledServerJS.*?})</script>', webpage)]
            post = traverse_obj(post_data, (
-                ..., 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or []
+                ..., 'require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or []
            media = traverse_obj(post, (..., 'attachments', ..., lambda k, v: (
                k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict)
            title = get_first(media, ('title', 'text'))
@ -489,18 +505,17 @@ def process_formats(info):
            # with non-browser User-Agent.
            for f in info['formats']:
                f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1'
            info['_format_sort_fields'] = ('res', 'quality')
        def extract_relay_data(_filter):
            return self._parse_json(self._search_regex(
-                r'handleWithCustomApplyEach\([^,]+,\s*({.*?%s.*?})\);' % _filter,
+                r'data-sjs>({.*?%s.*?})</script>' % _filter,
                webpage, 'replay data', default='{}'), video_id, fatal=False) or {}
        def extract_relay_prefetched_data(_filter):
-            replay_data = extract_relay_data(_filter)
+            return traverse_obj(extract_relay_data(_filter), (
-            for require in (replay_data.get('require') or []):
+                'require', (None, (..., ..., ..., '__bbox', 'require')),
-                if require[0] == 'RelayPrefetchedStreamCache':
+                lambda _, v: 'RelayPrefetchedStreamCache' in v, ..., ...,
-                    return try_get(require, lambda x: x[3][1]['__bbox']['result']['data'], dict) or {}
+                '__bbox', 'result', 'data', {dict}), get_all=False) or {}
        if not video_data:
            server_js_data = self._parse_json(self._search_regex([
@ -511,7 +526,7 @@ def extract_relay_prefetched_data(_filter):
        if not video_data:
            data = extract_relay_prefetched_data(
-                r'"(?:dash_manifest|playable_url(?:_quality_hd)?)"\s*:\s*"[^"]+"')
+                r'"(?:dash_manifest|playable_url(?:_quality_hd)?)')
            if data:
                entries = []
@ -526,7 +541,8 @@ def parse_graphql_video(video):
                    formats = []
                    q = qualities(['sd', 'hd'])
                    for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'),
-                                           ('playable_url_dash', '')):
+                                           ('playable_url_dash', ''), ('browser_native_hd_url', 'hd'),
                                           ('browser_native_sd_url', 'sd')):
                        playable_url = video.get(key)
                        if not playable_url:
                            continue
@ -535,7 +551,8 @@ def parse_graphql_video(video):
                        else:
                            formats.append({
                                'format_id': format_id,
-                                'quality': q(format_id),
+                                # sd, hd formats w/o resolution info should be deprioritized below DASH
                                'quality': q(format_id) - 3,
                                'url': playable_url,
                            })
                    extract_dash_manifest(video, formats)
@ -702,9 +719,11 @@ def parse_attachment(attachment, key='media'):
                for src_type in ('src', 'src_no_ratelimit'):
                    src = f[0].get('%s_%s' % (quality, src_type))
                    if src:
-                        preference = -10 if format_id == 'progressive' else -1
+                        # sd, hd formats w/o resolution info should be deprioritized below DASH
                        # TODO: investigate if progressive or src formats still exist
                        preference = -10 if format_id == 'progressive' else -3
                        if quality == 'hd':
-                            preference += 5
+                            preference += 1
                        formats.append({
                            'format_id': '%s_%s_%s' % (format_id, quality, src_type),
                            'url': src,
--- a/yt_dlp/extractor/funker530.py
+++ b/yt_dlp/extractor/funker530.py
@ -60,6 +60,7 @@ class Funker530IE(InfoExtractor):
    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
        info = {}
        rumble_url = list(RumbleEmbedIE._extract_embed_urls(url, webpage))
        if rumble_url:
            info = {'url': rumble_url[0], 'ie_key': RumbleEmbedIE.ie_key()}
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@ -2370,7 +2370,7 @@ def _extract_kvs(self, url, webpage, video_id):
            'id': flashvars['video_id'],
            'display_id': display_id,
            'title': title,
-            'thumbnail': thumbnail,
+            'thumbnail': urljoin(url, thumbnail),
            'formats': formats,
        }
--- a/yt_dlp/extractor/gofile.py
+++ b/yt_dlp/extractor/gofile.py
@ -66,7 +66,7 @@ def _entries(self, file_id):
        query_params = {
            'contentId': file_id,
            'token': self._TOKEN,
-            'websiteToken': 12345,
+            'websiteToken': '7fd94ds12fds4',  # From https://gofile.io/dist/js/alljs.js
        }
        password = self.get_param('videopassword')
        if password:
--- a/yt_dlp/extractor/goplay.py
+++ b/yt_dlp/extractor/goplay.py
@ -383,9 +383,9 @@ def __get_current_timestamp():
        months = [None, 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
        days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
-        time_now = datetime.datetime.utcnow()
+        time_now = datetime.datetime.now(datetime.timezone.utc)
        format_string = "{} {} {} %H:%M:%S UTC %Y".format(days[time_now.weekday()], months[time_now.month], time_now.day)
-        time_string = datetime.datetime.utcnow().strftime(format_string)
+        time_string = time_now.strftime(format_string)
        return time_string
    def __str__(self):
--- a/yt_dlp/extractor/indavideo.py
+++ b/yt_dlp/extractor/indavideo.py
@ -1,9 +1,9 @@
 from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import (
    int_or_none,
    parse_age_limit,
    parse_iso8601,
    time_seconds,
    update_url_query,
 )
@ -11,15 +11,14 @@
 class IndavideoEmbedIE(InfoExtractor):
    _VALID_URL = r'https?://(?:(?:embed\.)?indavideo\.hu/player/video/|assets\.indavideo\.hu/swf/player\.swf\?.*\b(?:v(?:ID|id))=)(?P<id>[\da-f]+)'
    # Some example URLs covered by generic extractor:
-    #   http://indavideo.hu/video/Vicces_cica_1
+    #   https://indavideo.hu/video/Vicces_cica_1
-    #   http://index.indavideo.hu/video/2015_0728_beregszasz
+    #   https://index.indavideo.hu/video/Hod_Nemetorszagban
-    #   http://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko
+    #   https://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko
-    #   http://erotika.indavideo.hu/video/Amator_tini_punci
+    #   https://film.indavideo.hu/video/f_farkaslesen
-    #   http://film.indavideo.hu/video/f_hrom_nagymamm_volt
+    #   https://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes
-    #   http://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes
+    _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)//embed\.indavideo\.hu/player/video/[\da-f]+)']
    _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//embed\.indavideo\.hu/player/video/[\da-f]+)']
    _TESTS = [{
-        'url': 'http://indavideo.hu/player/video/1bdc3c6d80/',
+        'url': 'https://indavideo.hu/player/video/1bdc3c6d80/',
        'md5': 'c8a507a1c7410685f83a06eaeeaafeab',
        'info_dict': {
            'id': '1837039',
@ -36,21 +35,33 @@ class IndavideoEmbedIE(InfoExtractor):
            'tags': ['tánc', 'cica', 'cuki', 'cukiajanlo', 'newsroom'],
        },
    }, {
-        'url': 'http://embed.indavideo.hu/player/video/1bdc3c6d80?autostart=1&hide=1',
+        'url': 'https://embed.indavideo.hu/player/video/1bdc3c6d80?autostart=1&hide=1',
        'only_matching': True,
    }, {
        'url': 'http://assets.indavideo.hu/swf/player.swf?v=fe25e500&vID=1bdc3c6d80&autostart=1&hide=1&i=1',
        'only_matching': True,
    }]
    _WEBPAGE_TESTS = [{
        'url': 'https://indavideo.hu/video/Vicces_cica_1',
        'info_dict': {
            'id': '1335611',
            'ext': 'mp4',
            'title': 'Vicces cica',
            'description': 'Játszik a tablettel. :D',
            'thumbnail': r're:^https?://.*\.jpg$',
            'uploader': 'Jet_Pack',
            'uploader_id': '491217',
            'timestamp': 1390821212,
            'upload_date': '20140127',
            'duration': 7,
            'age_limit': 0,
            'tags': ['cica', 'Jet_Pack'],
        },
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        video = self._download_json(
-            'https://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/%s' % video_id,
+            f'https://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/{video_id}/',
-            video_id)['data']
+            video_id, query={'_': time_seconds()})['data']
        title = video['title']
        video_urls = []
@ -60,33 +71,21 @@ def _real_extract(self, url):
        elif isinstance(video_files, dict):
            video_urls.extend(video_files.values())
        video_file = video.get('video_file')
        if video:
            video_urls.append(video_file)
        video_urls = list(set(video_urls))
-        video_prefix = video_urls[0].rsplit('/', 1)[0]
+        filesh = video.get('filesh') or {}
        for flv_file in video.get('flv_files', []):
            flv_url = '%s/%s' % (video_prefix, flv_file)
            if flv_url not in video_urls:
                video_urls.append(flv_url)
        filesh = video.get('filesh')
        formats = []
        for video_url in video_urls:
            height = int_or_none(self._search_regex(
                r'\.(\d{3,4})\.mp4(?:\?|$)', video_url, 'height', default=None))
-            if filesh:
+            if not height and len(filesh) == 1:
-                if not height:
+                height = int_or_none(list(filesh.keys())[0])
-                    continue
+            token = filesh.get(str(height))
-                token = filesh.get(compat_str(height))
+            if token is None:
-                if token is None:
+                continue
                    continue
                video_url = update_url_query(video_url, {'token': token})
            formats.append({
-                'url': video_url,
+                'url': update_url_query(video_url, {'token': token}),
                'height': height,
            })
@ -103,7 +102,7 @@ def _real_extract(self, url):
        return {
            'id': video.get('id') or video_id,
-            'title': title,
+            'title': video.get('title'),
            'description': video.get('description'),
            'thumbnails': thumbnails,
            'uploader': video.get('user_name'),
--- a/yt_dlp/extractor/lecturio.py
+++ b/yt_dlp/extractor/lecturio.py
@ -57,8 +57,8 @@ class LecturioIE(LecturioBaseIE):
    _VALID_URL = r'''(?x)
                    https://
                        (?:
-                            app\.lecturio\.com/([^/]+/(?P<nt>[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P<id>\d+))|
+                            app\.lecturio\.com/([^/?#]+/(?P<nt>[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P<id>\d+))|
-                            (?:www\.)?lecturio\.de/[^/]+/(?P<nt_de>[^/?#&]+)\.vortrag
+                            (?:www\.)?lecturio\.de/(?:[^/?#]+/)+(?P<nt_de>[^/?#&]+)\.vortrag
                        )
                    '''
    _TESTS = [{
@ -73,6 +73,9 @@ class LecturioIE(LecturioBaseIE):
    }, {
        'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag',
        'only_matching': True,
    }, {
        'url': 'https://www.lecturio.de/jura/oeffentliches-recht-at-1-staatsexamen/oeffentliches-recht-staatsexamen.vortrag',
        'only_matching': True,
    }, {
        'url': 'https://app.lecturio.com/#/lecture/c/6434/39634',
        'only_matching': True,
--- a/yt_dlp/extractor/massengeschmacktv.py
+++ b/yt_dlp/extractor/massengeschmacktv.py
@ -17,11 +17,12 @@ class MassengeschmackTVIE(InfoExtractor):
    _TEST = {
        'url': 'https://massengeschmack.tv/play/fktv202',
-        'md5': 'a9e054db9c2b5a08f0a0527cc201e8d3',
+        'md5': '9996f314994a49fefe5f39aa1b07ae21',
        'info_dict': {
            'id': 'fktv202',
            'ext': 'mp4',
-            'title': 'Fernsehkritik-TV - Folge 202',
+            'title': 'Fernsehkritik-TV #202',
            'thumbnail': 'https://cache.massengeschmack.tv/img/mag/fktv202.jpg'
        },
    }
@ -29,9 +30,6 @@ def _real_extract(self, url):
        episode = self._match_id(url)
        webpage = self._download_webpage(url, episode)
        title = clean_html(self._html_search_regex(
            '<h3>([^<]+)</h3>', webpage, 'title'))
        thumbnail = self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False)
        sources = self._parse_json(self._search_regex(r'(?s)MEDIA\s*=\s*(\[.+?\]);', webpage, 'media'), episode, js_to_json)
        formats = []
@ -67,7 +65,8 @@ def _real_extract(self, url):
        return {
            'id': episode,
-            'title': title,
+            'title': clean_html(self._html_search_regex(
                r'<span[^>]+\bid=["\']clip-title["\'][^>]*>([^<]+)', webpage, 'title', fatal=False)),
            'formats': formats,
-            'thumbnail': thumbnail,
+            'thumbnail': self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False),
        }
--- a/yt_dlp/extractor/mediaklikk.py
+++ b/yt_dlp/extractor/mediaklikk.py
@ -1,5 +1,8 @@
 from ..utils import (
-    unified_strdate
+    ExtractorError,
    traverse_obj,
    unified_strdate,
    url_or_none,
 )
 from .common import InfoExtractor
 from ..compat import (
@ -15,7 +18,7 @@ class MediaKlikkIE(InfoExtractor):
                        (?P<id>[^/#?_]+)'''
    _TESTS = [{
-        # mediaklikk. date in html.
+        # (old) mediaklikk. date in html.
        'url': 'https://mediaklikk.hu/video/hazajaro-delnyugat-bacska-a-duna-menten-palankatol-doroszloig/',
        'info_dict': {
            'id': '4754129',
@ -23,9 +26,21 @@ class MediaKlikkIE(InfoExtractor):
            'ext': 'mp4',
            'upload_date': '20210901',
            'thumbnail': 'http://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg'
        },
        'skip': 'Webpage redirects to 404 page',
    }, {
        # mediaklikk. date in html.
        'url': 'https://mediaklikk.hu/video/hazajaro-fabova-hegyseg-kishont-koronaja/',
        'info_dict': {
            'id': '6696133',
            'title': 'Hazajáró, Fabova-hegység - Kishont koronája',
            'display_id': 'hazajaro-fabova-hegyseg-kishont-koronaja',
            'ext': 'mp4',
            'upload_date': '20230903',
            'thumbnail': 'https://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg'
        }
    }, {
-        # m4sport
+        # (old) m4sport
        'url': 'https://m4sport.hu/video/2021/08/30/gyemant-liga-parizs/',
        'info_dict': {
            'id': '4754999',
@ -33,6 +48,18 @@ class MediaKlikkIE(InfoExtractor):
            'ext': 'mp4',
            'upload_date': '20210830',
            'thumbnail': 'http://m4sport.hu/wp-content/uploads/sites/4/2021/08/vlcsnap-2021-08-30-18h21m20s10-1024x576.jpg'
        },
        'skip': 'Webpage redirects to 404 page',
    }, {
        # m4sport
        'url': 'https://m4sport.hu/sportkozvetitesek/video/2023/09/08/atletika-gyemant-liga-brusszel/',
        'info_dict': {
            'id': '6711136',
            'title': 'Atlétika – Gyémánt Liga, Brüsszel',
            'display_id': 'atletika-gyemant-liga-brusszel',
            'ext': 'mp4',
            'upload_date': '20230908',
            'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-08-22h43m18s691.jpg'
        }
    }, {
        # m4sport with *video/ url and no date
@ -40,20 +67,33 @@ class MediaKlikkIE(InfoExtractor):
        'info_dict': {
            'id': '4492099',
            'title': 'Real Madrid - Chelsea 1-1',
            'display_id': 'real-madrid-chelsea-1-1',
            'ext': 'mp4',
-            'thumbnail': 'http://m4sport.hu/wp-content/uploads/sites/4/2021/04/Sequence-01.Still001-1024x576.png'
+            'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2021/04/Sequence-01.Still001-1024x576.png'
        }
    }, {
-        # hirado
+        # (old) hirado
        'url': 'https://hirado.hu/videok/felteteleket-szabott-a-fovaros/',
        'info_dict': {
            'id': '4760120',
            'title': 'Feltételeket szabott a főváros',
            'ext': 'mp4',
            'thumbnail': 'http://hirado.hu/wp-content/uploads/sites/4/2021/09/vlcsnap-2021-09-01-20h20m37s165.jpg'
        },
        'skip': 'Webpage redirects to video list page',
    }, {
        # hirado
        'url': 'https://hirado.hu/belfold/video/2023/09/11/marad-az-eves-elszamolas-a-napelemekre-beruhazo-csaladoknal',
        'info_dict': {
            'id': '6716068',
            'title': 'Marad az éves elszámolás a napelemekre beruházó családoknál',
            'display_id': 'marad-az-eves-elszamolas-a-napelemekre-beruhazo-csaladoknal',
            'ext': 'mp4',
            'upload_date': '20230911',
            'thumbnail': 'https://hirado.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-11-09h16m09s882.jpg'
        }
    }, {
-        # petofilive
+        # (old) petofilive
        'url': 'https://petofilive.hu/video/2021/06/07/tha-shudras-az-akusztikban/',
        'info_dict': {
            'id': '4571948',
@ -61,6 +101,18 @@ class MediaKlikkIE(InfoExtractor):
            'ext': 'mp4',
            'upload_date': '20210607',
            'thumbnail': 'http://petofilive.hu/wp-content/uploads/sites/4/2021/06/vlcsnap-2021-06-07-22h14m23s915-1024x576.jpg'
        },
        'skip': 'Webpage redirects to empty page',
    }, {
        # petofilive
        'url': 'https://petofilive.hu/video/2023/09/09/futball-fesztival-a-margitszigeten/',
        'info_dict': {
            'id': '6713233',
            'title': 'Futball Fesztivál a Margitszigeten',
            'display_id': 'futball-fesztival-a-margitszigeten',
            'ext': 'mp4',
            'upload_date': '20230909',
            'thumbnail': 'https://petofilive.hu/wp-content/uploads/sites/4/2023/09/Clipboard11-2.jpg'
        }
    }]
@ -84,8 +136,12 @@ def _real_extract(self, url):
        player_data['video'] = player_data.pop('token')
        player_page = self._download_webpage('https://player.mediaklikk.hu/playernew/player.php', video_id, query=player_data)
-        playlist_url = self._proto_relative_url(compat_urllib_parse_unquote(
+        player_json = self._search_json(
-            self._html_search_regex(r'\"file\":\s*\"(\\?/\\?/.*playlist\.m3u8)\"', player_page, 'playlist_url')).replace('\\/', '/'))
+            r'\bpl\.setup\s*\(', player_page, 'player json', video_id, end_pattern=r'\);')
        playlist_url = traverse_obj(
            player_json, ('playlist', lambda _, v: v['type'] == 'hls', 'file', {url_or_none}), get_all=False)
        if not playlist_url:
            raise ExtractorError('Unable to extract playlist url')
        formats = self._extract_wowza_formats(
            playlist_url, video_id, skip_protocols=['f4m', 'smil', 'dash'])
--- a/yt_dlp/extractor/mediastream.py
+++ b/yt_dlp/extractor/mediastream.py
@ -14,7 +14,7 @@ class MediaStreamBaseIE(InfoExtractor):
    _BASE_URL_RE = r'https?://mdstrm\.com/(?:embed|live-stream)'
    def _extract_mediastream_urls(self, webpage):
-        yield from traverse_obj(list(self._yield_json_ld(webpage, None)), (
+        yield from traverse_obj(list(self._yield_json_ld(webpage, None, fatal=False)), (
            lambda _, v: v['@type'] == 'VideoObject', ('embedUrl', 'contentUrl'),
            {lambda x: x if re.match(rf'{self._BASE_URL_RE}/\w+', x) else None}))
@ -106,8 +106,12 @@ def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
-        if 'Debido a tu ubicación no puedes ver el contenido' in webpage:
+        for message in [
-            self.raise_geo_restricted()
+            'Debido a tu ubicación no puedes ver el contenido',
            'You are not allowed to watch this video: Geo Fencing Restriction'
        ]:
            if message in webpage:
                self.raise_geo_restricted()
        player_config = self._search_json(r'window\.MDSTRM\.OPTIONS\s*=', webpage, 'metadata', video_id)
--- a/yt_dlp/extractor/mixcloud.py
+++ b/yt_dlp/extractor/mixcloud.py
@ -20,7 +20,7 @@ class MixcloudBaseIE(InfoExtractor):
    def _call_api(self, object_type, object_fields, display_id, username, slug=None):
        lookup_key = object_type + 'Lookup'
        return self._download_json(
-            'https://www.mixcloud.com/graphql', display_id, query={
+            'https://app.mixcloud.com/graphql', display_id, query={
                'query': '''{
  %s(lookup: {username: "%s"%s}) {
    %s
@ -46,7 +46,15 @@ class MixcloudIE(MixcloudBaseIE):
            'view_count': int,
            'timestamp': 1321359578,
            'upload_date': '20111115',
            'uploader_url': 'https://www.mixcloud.com/dholbach/',
            'artist': 'Submorphics & Chino , Telekinesis, Porter Robinson, Enei, Breakage ft Jess Mills',
            'duration': 3723,
            'tags': [],
            'comment_count': int,
            'repost_count': int,
            'like_count': int,
        },
        'params': {'skip_download': 'm3u8'},
    }, {
        'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/',
        'info_dict': {
@ -60,7 +68,14 @@ class MixcloudIE(MixcloudBaseIE):
            'view_count': int,
            'timestamp': 1422987057,
            'upload_date': '20150203',
            'uploader_url': 'https://www.mixcloud.com/gillespeterson/',
            'duration': 2992,
            'tags': [],
            'comment_count': int,
            'repost_count': int,
            'like_count': int,
        },
        'params': {'skip_download': '404 playback error on site'},
    }, {
        'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/',
        'only_matching': True,
@ -259,9 +274,9 @@ def _real_extract(self, url):
                cloudcast_url = cloudcast.get('url')
                if not cloudcast_url:
                    continue
-                slug = try_get(cloudcast, lambda x: x['slug'], compat_str)
+                item_slug = try_get(cloudcast, lambda x: x['slug'], compat_str)
                owner_username = try_get(cloudcast, lambda x: x['owner']['username'], compat_str)
-                video_id = '%s_%s' % (owner_username, slug) if slug and owner_username else None
+                video_id = f'{owner_username}_{item_slug}' if item_slug and owner_username else None
                entries.append(self.url_result(
                    cloudcast_url, MixcloudIE.ie_key(), video_id))
@ -284,7 +299,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
        'info_dict': {
            'id': 'dholbach_uploads',
            'title': 'Daniel Holbach (uploads)',
-            'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
+            'description': 'md5:a3f468a60ac8c3e1f8616380fc469b2b',
        },
        'playlist_mincount': 36,
    }, {
@ -292,7 +307,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
        'info_dict': {
            'id': 'dholbach_uploads',
            'title': 'Daniel Holbach (uploads)',
-            'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
+            'description': 'md5:a3f468a60ac8c3e1f8616380fc469b2b',
        },
        'playlist_mincount': 36,
    }, {
@ -300,7 +315,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
        'info_dict': {
            'id': 'dholbach_favorites',
            'title': 'Daniel Holbach (favorites)',
-            'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
+            'description': 'md5:a3f468a60ac8c3e1f8616380fc469b2b',
        },
        # 'params': {
        #     'playlist_items': '1-100',
@ -323,9 +338,9 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
        'info_dict': {
            'id': 'FirstEar_stream',
            'title': 'First Ear (stream)',
-            'description': 'Curators of good music\r\n\r\nfirstearmusic.com',
+            'description': 'we maraud for ears',
        },
-        'playlist_mincount': 271,
+        'playlist_mincount': 269,
    }]
    _TITLE_KEY = 'displayName'
--- a/yt_dlp/extractor/motherless.py
+++ b/yt_dlp/extractor/motherless.py
@ -151,7 +151,7 @@ def _real_extract(self, url):
                    'd': 'days',
                }
                kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta}
-                upload_date = (datetime.datetime.utcnow() - datetime.timedelta(**kwargs)).strftime('%Y%m%d')
+                upload_date = (datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(**kwargs)).strftime('%Y%m%d')
        comment_count = len(re.findall(r'''class\s*=\s*['"]media-comment-contents\b''', webpage))
        uploader_id = self._html_search_regex(
--- a/yt_dlp/extractor/n1.py
+++ b/yt_dlp/extractor/n1.py
@ -33,7 +33,7 @@ def _real_extract(self, url):
 class N1InfoIIE(InfoExtractor):
    IE_NAME = 'N1Info:article'
-    _VALID_URL = r'https?://(?:(?:(?:ba|rs|hr)\.)?n1info\.(?:com|si)|nova\.rs)/(?:[^/]+/){1,2}(?P<id>[^/]+)'
+    _VALID_URL = r'https?://(?:(?:\w+\.)?n1info\.\w+|nova\.rs)/(?:[^/?#]+/){1,2}(?P<id>[^/?#]+)'
    _TESTS = [{
        # Youtube embedded
        'url': 'https://rs.n1info.com/sport-klub/tenis/kako-je-djokovic-propustio-istorijsku-priliku-video/',
@ -94,6 +94,16 @@ class N1InfoIIE(InfoExtractor):
            'upload_date': '20211102',
            'timestamp': 1635861677,
        },
    }, {
        'url': 'https://n1info.rs/vesti/cuta-biti-u-kosovskoj-mitrovici-znaci-da-te-docekaju-eksplozivnim-napravama/',
        'info_dict': {
            'id': '1332368',
            'ext': 'mp4',
            'title': 'Ćuta: Biti u Kosovskoj Mitrovici znači da te dočekaju eksplozivnim napravama',
            'upload_date': '20230620',
            'timestamp': 1687290536,
            'thumbnail': 'https://cdn.brid.tv/live/partners/26827/snapshot/1332368_th_6492013a8356f_1687290170.jpg'
        },
    }, {
        'url': 'https://hr.n1info.com/vijesti/pravobraniteljica-o-ubojstvu-u-zagrebu-radi-se-o-doista-nezapamcenoj-situaciji/',
        'only_matching': True,
@ -105,19 +115,35 @@ def _real_extract(self, url):
        title = self._html_search_regex(r'<h1[^>]+>(.+?)</h1>', webpage, 'title')
        timestamp = unified_timestamp(self._html_search_meta('article:published_time', webpage))
-
+        plugin_data = self._html_search_meta('BridPlugin', webpage)
        videos = re.findall(r'(?m)(<video[^>]+>)', webpage)
        entries = []
-        for video in videos:
+        if plugin_data:
-            video_data = extract_attributes(video)
+            site_id = self._html_search_regex(r'site:(\d+)', webpage, 'site id')
-            entries.append({
+            for video_data in re.findall(r'\$bp\("Brid_\d+", (.+)\);', webpage):
-                '_type': 'url_transparent',
+                video_id = self._parse_json(video_data, title)['video']
-                'url': video_data.get('data-url'),
+                entries.append({
-                'id': video_data.get('id'),
+                    'id': video_id,
-                'title': title,
+                    'title': title,
-                'thumbnail': video_data.get('data-thumbnail'),
+                    'timestamp': timestamp,
-                'timestamp': timestamp,
+                    'thumbnail': self._html_search_meta('thumbnailURL', webpage),
-                'ie_key': 'N1InfoAsset'})
+                    'formats': self._extract_m3u8_formats(
                        f'https://cdn-uc.brid.tv/live/partners/{site_id}/streaming/{video_id}/{video_id}.m3u8',
                        video_id, fatal=False),
                })
        else:
            # Old player still present in older articles
            videos = re.findall(r'(?m)(<video[^>]+>)', webpage)
            for video in videos:
                video_data = extract_attributes(video)
                entries.append({
                    '_type': 'url_transparent',
                    'url': video_data.get('data-url'),
                    'id': video_data.get('id'),
                    'title': title,
                    'thumbnail': video_data.get('data-thumbnail'),
                    'timestamp': timestamp,
                    'ie_key': 'N1InfoAsset',
                })
        embedded_videos = re.findall(r'(<iframe[^>]+>)', webpage)
        for embedded_video in embedded_videos:
--- a/yt_dlp/extractor/naver.py
+++ b/yt_dlp/extractor/naver.py
@ -21,7 +21,7 @@
 class NaverBaseIE(InfoExtractor):
    _CAPTION_EXT_RE = r'\.(?:ttml|vtt)'
-    @staticmethod  # NB: Used in VLiveWebArchiveIE, WeverseIE
+    @staticmethod  # NB: Used in WeverseIE
    def process_subtitles(vod_data, process_url):
        ret = {'subtitles': {}, 'automatic_captions': {}}
        for caption in traverse_obj(vod_data, ('captions', 'list', ...)):
--- a/yt_dlp/extractor/nitter.py
+++ b/yt_dlp/extractor/nitter.py
@ -265,6 +265,26 @@ class NitterIE(InfoExtractor):
                'repost_count': int,
                'comment_count': int,
            }
        }, {  # no OpenGraph title
            'url': f'https://{current_instance}/LocalBateman/status/1678455464038735895#m',
            'info_dict': {
                'id': '1678455464038735895',
                'ext': 'mp4',
                'title': 'Your Typical Local Man - Local man, what did Romanians ever do to you?',
                'description': 'Local man, what did Romanians ever do to you?',
                'thumbnail': r're:^https?://.*\.jpg$',
                'uploader': 'Your Typical Local Man',
                'uploader_id': 'LocalBateman',
                'uploader_url': f'https://{current_instance}/LocalBateman',
                'upload_date': '20230710',
                'timestamp': 1689009900,
                'view_count': int,
                'like_count': int,
                'repost_count': int,
                'comment_count': int,
            },
            'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
            'params': {'skip_download': 'm3u8'},
        }
    ]
@ -292,7 +312,7 @@ def _real_extract(self, url):
                'ext': ext
            }]
-        title = description = self._og_search_description(full_webpage) or self._html_search_regex(
+        title = description = self._og_search_description(full_webpage, default=None) or self._html_search_regex(
            r'<div class="tweet-content[^>]+>([^<]+)</div>', webpage, 'title', fatal=False)
        uploader_id = self._html_search_regex(
--- a/yt_dlp/extractor/nova.py
+++ b/yt_dlp/extractor/nova.py
@ -6,7 +6,6 @@
    determine_ext,
    int_or_none,
    js_to_json,
    qualities,
    traverse_obj,
    unified_strdate,
    url_or_none,
@ -49,77 +48,52 @@ def _real_extract(self, url):
        duration = None
        formats = []
-        player = self._parse_json(
+        def process_format_list(format_list, format_id=""):
-            self._search_regex(
+            nonlocal formats, has_drm
-                (r'(?:(?:replacePlaceholders|processAdTagModifier).*?:\s*)?(?:replacePlaceholders|processAdTagModifier)\s*\(\s*(?P<json>{.*?})\s*\)(?:\s*\))?\s*,',
+            if not isinstance(format_list, list):
-                    r'Player\.init\s*\([^,]+,(?P<cndn>\s*\w+\s*\?)?\s*(?P<json>{(?(cndn).+?|.+)})\s*(?(cndn):|,\s*{.+?}\s*\)\s*;)'),
+                format_list = [format_list]
-                webpage, 'player', default='{}', group='json'), video_id, fatal=False)
+            for format_dict in format_list:
-        if player:
+                if not isinstance(format_dict, dict):
-            for format_id, format_list in player['tracks'].items():
+                    continue
-                if not isinstance(format_list, list):
+                if (not self.get_param('allow_unplayable_formats')
-                    format_list = [format_list]
+                        and traverse_obj(format_dict, ('drm', 'keySystem'))):
-                for format_dict in format_list:
+                    has_drm = True
-                    if not isinstance(format_dict, dict):
+                    continue
-                        continue
+                format_url = url_or_none(format_dict.get('src'))
-                    if (not self.get_param('allow_unplayable_formats')
+                format_type = format_dict.get('type')
-                            and traverse_obj(format_dict, ('drm', 'keySystem'))):
+                ext = determine_ext(format_url)
-                        has_drm = True
+                if (format_type == 'application/x-mpegURL'
-                        continue
+                        or format_id == 'HLS' or ext == 'm3u8'):
-                    format_url = url_or_none(format_dict.get('src'))
+                    formats.extend(self._extract_m3u8_formats(
-                    format_type = format_dict.get('type')
+                        format_url, video_id, 'mp4',
-                    ext = determine_ext(format_url)
+                        entry_protocol='m3u8_native', m3u8_id='hls',
-                    if (format_type == 'application/x-mpegURL'
+                        fatal=False))
-                            or format_id == 'HLS' or ext == 'm3u8'):
+                elif (format_type == 'application/dash+xml'
-                        formats.extend(self._extract_m3u8_formats(
+                      or format_id == 'DASH' or ext == 'mpd'):
-                            format_url, video_id, 'mp4',
+                    formats.extend(self._extract_mpd_formats(
-                            entry_protocol='m3u8_native', m3u8_id='hls',
+                        format_url, video_id, mpd_id='dash', fatal=False))
-                            fatal=False))
+                else:
-                    elif (format_type == 'application/dash+xml'
+                    formats.append({
                          or format_id == 'DASH' or ext == 'mpd'):
                        formats.extend(self._extract_mpd_formats(
                            format_url, video_id, mpd_id='dash', fatal=False))
                    else:
                        formats.append({
                            'url': format_url,
                        })
            duration = int_or_none(player.get('duration'))
        else:
            # Old path, not actual as of 08.04.2020
            bitrates = self._parse_json(
                self._search_regex(
                    r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'),
                video_id, transform_source=js_to_json)
            QUALITIES = ('lq', 'mq', 'hq', 'hd')
            quality_key = qualities(QUALITIES)
            for format_id, format_list in bitrates.items():
                if not isinstance(format_list, list):
                    format_list = [format_list]
                for format_url in format_list:
                    format_url = url_or_none(format_url)
                    if not format_url:
                        continue
                    if format_id == 'hls':
                        formats.extend(self._extract_m3u8_formats(
                            format_url, video_id, ext='mp4',
                            entry_protocol='m3u8_native', m3u8_id='hls',
                            fatal=False))
                        continue
                    f = {
                        'url': format_url,
-                    }
+                    })
-                    f_id = format_id
+
-                    for quality in QUALITIES:
+        player = self._search_json(
-                        if '%s.mp4' % quality in format_url:
+            r'player:', webpage, 'player', video_id, fatal=False, end_pattern=r';\s*</script>')
-                            f_id += '-%s' % quality
+        if player:
-                            f.update({
+            for src in traverse_obj(player, ('lib', 'source', 'sources', ...)):
-                                'quality': quality_key(quality),
+                process_format_list(src)
-                                'format_note': quality.upper(),
+            duration = traverse_obj(player, ('sourceInfo', 'duration', {int_or_none}))
-                            })
+        if not formats and not has_drm:
-                            break
+            # older code path, in use before August 2023
-                    f['format_id'] = f_id
+            player = self._parse_json(
-                    formats.append(f)
+                self._search_regex(
                    (r'(?:(?:replacePlaceholders|processAdTagModifier).*?:\s*)?(?:replacePlaceholders|processAdTagModifier)\s*\(\s*(?P<json>{.*?})\s*\)(?:\s*\))?\s*,',
                     r'Player\.init\s*\([^,]+,(?P<cndn>\s*\w+\s*\?)?\s*(?P<json>{(?(cndn).+?|.+)})\s*(?(cndn):|,\s*{.+?}\s*\)\s*;)'),
                    webpage, 'player', group='json'), video_id)
            if player:
                for format_id, format_list in player['tracks'].items():
                    process_format_list(format_list, format_id)
                duration = int_or_none(player.get('duration'))
        if not formats and has_drm:
            self.report_drm(video_id)
--- a/yt_dlp/extractor/panopto.py
+++ b/yt_dlp/extractor/panopto.py
@ -1,7 +1,7 @@
 import calendar
 import json
 import functools
-from datetime import datetime
+from datetime import datetime, timezone
 from random import random
 from .common import InfoExtractor
@ -243,7 +243,7 @@ def _mark_watched(self, base_url, video_id, delivery_info):
        invocation_id = delivery_info.get('InvocationId')
        stream_id = traverse_obj(delivery_info, ('Delivery', 'Streams', ..., 'PublicID'), get_all=False, expected_type=str)
        if invocation_id and stream_id and duration:
-            timestamp_str = f'/Date({calendar.timegm(datetime.utcnow().timetuple())}000)/'
+            timestamp_str = f'/Date({calendar.timegm(datetime.now(timezone.utc).timetuple())}000)/'
            data = {
                'streamRequests': [
                    {
--- a/yt_dlp/extractor/pornbox.py
+++ b/yt_dlp/extractor/pornbox.py
@ -0,0 +1,113 @@
 from .common import InfoExtractor
 from ..compat import functools
 from ..utils import (
    int_or_none,
    parse_duration,
    parse_iso8601,
    qualities,
    str_or_none,
    traverse_obj,
    url_or_none,
 )
 class PornboxIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?pornbox\.com/application/watch-page/(?P<id>[0-9]+)'
    _TESTS = [{
        'url': 'https://pornbox.com/application/watch-page/212108',
        'md5': '3ff6b6e206f263be4c5e987a3162ac6e',
        'info_dict': {
            'id': '212108',
            'ext': 'mp4',
            'title': 'md5:ececc5c6e6c9dd35d290c45fed05fd49',
            'uploader': 'Lily Strong',
            'timestamp': 1665871200,
            'upload_date': '20221015',
            'age_limit': 18,
            'availability': 'needs_auth',
            'duration': 1505,
            'cast': ['Lily Strong', 'John Strong'],
            'tags': 'count:11',
            'description': 'md5:589c7f33e183aa8aa939537300efb859',
            'thumbnail': r're:^https?://cdn-image\.gtflixtv\.com.*\.jpg.*$'
        }
    }, {
        'url': 'https://pornbox.com/application/watch-page/216045',
        'info_dict': {
            'id': '216045',
            'title': 'md5:3e48528e73a9a2b12f7a2772ed0b26a2',
            'description': 'md5:3e631dcaac029f15ed434e402d1b06c7',
            'uploader': 'VK Studio',
            'timestamp': 1618264800,
            'upload_date': '20210412',
            'age_limit': 18,
            'availability': 'premium_only',
            'duration': 2710,
            'cast': 'count:3',
            'tags': 'count:29',
            'thumbnail': r're:^https?://cdn-image\.gtflixtv\.com.*\.jpg.*$',
            'subtitles': 'count:6'
        },
        'params': {
            'skip_download': True,
            'ignore_no_formats_error': True
        },
        'expected_warnings': [
            'You are either not logged in or do not have access to this scene',
            'No video formats found', 'Requested format is not available']
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        public_data = self._download_json(f'https://pornbox.com/contents/{video_id}', video_id)
        subtitles = {country_code: [{
            'url': f'https://pornbox.com/contents/{video_id}/subtitles/{country_code}',
            'ext': 'srt'
        }] for country_code in traverse_obj(public_data, ('subtitles', ..., {str}))}
        is_free_scene = traverse_obj(
            public_data, ('price', 'is_available_for_free', {bool}), default=False)
        metadata = {
            'id': video_id,
            **traverse_obj(public_data, {
                'title': ('scene_name', {str.strip}),
                'description': ('small_description', {str.strip}),
                'uploader': 'studio',
                'duration': ('runtime', {parse_duration}),
                'cast': (('models', 'male_models'), ..., 'model_name'),
                'thumbnail': ('player_poster', {url_or_none}),
                'tags': ('niches', ..., 'niche'),
            }),
            'age_limit': 18,
            'timestamp': parse_iso8601(traverse_obj(
                public_data, ('studios', 'release_date'), 'publish_date')),
            'availability': self._availability(needs_auth=True, needs_premium=not is_free_scene),
            'subtitles': subtitles,
        }
        if not public_data.get('is_purchased') or not is_free_scene:
            self.raise_login_required(
                'You are either not logged in or do not have access to this scene', metadata_available=True)
            return metadata
        media_id = traverse_obj(public_data, (
            'medias', lambda _, v: v['title'] == 'Full video', 'media_id', {int}), get_all=False)
        if not media_id:
            self.raise_no_formats('Could not find stream id', video_id=video_id)
        stream_data = self._download_json(
            f'https://pornbox.com/media/{media_id}/stream', video_id=video_id, note='Getting manifest urls')
        get_quality = qualities(['web', 'vga', 'hd', '1080p', '4k', '8k'])
        metadata['formats'] = traverse_obj(stream_data, ('qualities', lambda _, v: v['src'], {
            'url': 'src',
            'vbr': ('bitrate', {functools.partial(int_or_none, scale=1000)}),
            'format_id': ('quality', {str_or_none}),
            'quality': ('quality', {get_quality}),
            'width': ('size', {lambda x: int(x[:-1])}),
        }))
        return metadata
--- a/yt_dlp/extractor/pr0gramm.py
+++ b/yt_dlp/extractor/pr0gramm.py
@ -1,97 +1,155 @@
-import re
+import json
 from datetime import date
 from urllib.parse import unquote
 from .common import InfoExtractor
-from ..utils import merge_dicts
+from ..compat import functools
 from ..utils import ExtractorError, make_archive_id, urljoin
 from ..utils.traversal import traverse_obj
 class Pr0grammStaticIE(InfoExtractor):
    # Possible urls:
    # https://pr0gramm.com/static/5466437
    _VALID_URL = r'https?://pr0gramm\.com/static/(?P<id>[0-9]+)'
    _TEST = {
        'url': 'https://pr0gramm.com/static/5466437',
        'md5': '52fa540d70d3edc286846f8ca85938aa',
        'info_dict': {
            'id': '5466437',
            'ext': 'mp4',
            'title': 'pr0gramm-5466437 by g11st',
            'uploader': 'g11st',
            'upload_date': '20221221',
        }
    }
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        # Fetch media sources
        entries = self._parse_html5_media_entries(url, webpage, video_id)
        media_info = entries[0]
        # Fetch author
        uploader = self._html_search_regex(r'by\W+([\w-]+)\W+', webpage, 'uploader')
        # Fetch approx upload timestamp from filename
        # Have None-defaults in case the extraction fails
        uploadDay = None
        uploadMon = None
        uploadYear = None
        uploadTimestr = None
        # (//img.pr0gramm.com/2022/12/21/62ae8aa5e2da0ebf.mp4)
        m = re.search(r'//img\.pr0gramm\.com/(?P<year>[\d]+)/(?P<mon>[\d]+)/(?P<day>[\d]+)/\w+\.\w{,4}', webpage)
        if (m):
            # Up to a day of accuracy should suffice...
            uploadDay = m.groupdict().get('day')
            uploadMon = m.groupdict().get('mon')
            uploadYear = m.groupdict().get('year')
            uploadTimestr = uploadYear + uploadMon + uploadDay
        return merge_dicts({
            'id': video_id,
            'title': 'pr0gramm-%s%s' % (video_id, (' by ' + uploader) if uploader else ''),
            'uploader': uploader,
            'upload_date': uploadTimestr
        }, media_info)
 # This extractor is for the primary url (used for sharing, and appears in the
 # location bar) Since this page loads the DOM via JS, yt-dl can't find any
 # video information here. So let's redirect to a compatibility version of
 # the site, which does contain the <video>-element  by itself,  without requiring
 # js to be ran.
 class Pr0grammIE(InfoExtractor):
-    # Possible urls:
+    _VALID_URL = r'https?://pr0gramm\.com\/(?:[^/?#]+/)+(?P<id>[\d]+)(?:[/?#:]|$)'
-    # https://pr0gramm.com/new/546637
+    _TESTS = [{
-    # https://pr0gramm.com/new/video/546637
+        # Tags require account
    # https://pr0gramm.com/top/546637
    # https://pr0gramm.com/top/video/546637
    # https://pr0gramm.com/user/g11st/uploads/5466437
    # https://pr0gramm.com/user/froschler/dafur-ist-man-hier/5091290
    # https://pr0gramm.com/user/froschler/reinziehen-1elf/5232030
    # https://pr0gramm.com/user/froschler/1elf/5232030
    # https://pr0gramm.com/new/5495710:comment62621020 <- this is not the id!
    # https://pr0gramm.com/top/fruher war alles damals/5498175
    _VALID_URL = r'https?:\/\/pr0gramm\.com\/(?!static/\d+).+?\/(?P<id>[\d]+)(:|$)'
    _TEST = {
        'url': 'https://pr0gramm.com/new/video/5466437',
        'info_dict': {
            'id': '5466437',
            'ext': 'mp4',
            'title': 'pr0gramm-5466437 by g11st',
            'tags': ['Neon Genesis Evangelion', 'Touhou Project', 'Fly me to the Moon', 'Marisad', 'Marisa Kirisame', 'video', 'sound', 'Marisa', 'Anime'],
            'uploader': 'g11st',
            'uploader_id': 394718,
            'upload_timestamp': 1671590240,
            'upload_date': '20221221',
-        }
+            'like_count': int,
-    }
+            'dislike_count': int,
            'age_limit': 0,
            'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg',
        },
    }, {
        # Tags require account
        'url': 'https://pr0gramm.com/new/3052805:comment28391322',
        'info_dict': {
            'id': '3052805',
            'ext': 'mp4',
            'title': 'pr0gramm-3052805 by Hansking1',
            'tags': 'count:15',
            'uploader': 'Hansking1',
            'uploader_id': 385563,
            'upload_timestamp': 1552930408,
            'upload_date': '20190318',
            'like_count': int,
            'dislike_count': int,
            'age_limit': 0,
            'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg',
        },
    }, {
        # Requires verified account
        'url': 'https://pr0gramm.com/new/Gianna%20Michaels/5848332',
        'info_dict': {
            'id': '5848332',
            'ext': 'mp4',
            'title': 'pr0gramm-5848332 by erd0pfel',
            'tags': 'count:18',
            'uploader': 'erd0pfel',
            'uploader_id': 349094,
            'upload_timestamp': 1694489652,
            'upload_date': '20230912',
            'like_count': int,
            'dislike_count': int,
            'age_limit': 18,
            'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg',
        },
    }, {
        'url': 'https://pr0gramm.com/static/5466437',
        'only_matching': True,
    }, {
        'url': 'https://pr0gramm.com/new/rowan%20atkinson%20herr%20bohne/3052805',
        'only_matching': True,
    }, {
        'url': 'https://pr0gramm.com/user/froschler/dafur-ist-man-hier/5091290',
        'only_matching': True,
    }]
-    def _generic_title():
+    BASE_URL = 'https://pr0gramm.com'
-        return "oof"
+
    @functools.cached_property
    def _is_logged_in(self):
        return 'pp' in self._get_cookies(self.BASE_URL)
    @functools.cached_property
    def _maximum_flags(self):
        # We need to guess the flags for the content otherwise the api will raise an error
        # We can guess the maximum allowed flags for the account from the cookies
        # Bitflags are (msbf): nsfp, nsfl, nsfw, sfw
        flags = 0b0001
        if self._is_logged_in:
            flags |= 0b1000
            cookies = self._get_cookies(self.BASE_URL)
            if 'me' not in cookies:
                self._download_webpage(self.BASE_URL, None, 'Refreshing verification information')
            if traverse_obj(cookies, ('me', {lambda x: x.value}, {unquote}, {json.loads}, 'verified')):
                flags |= 0b0110
        return flags
    def _call_api(self, endpoint, video_id, query={}, note='Downloading API json'):
        data = self._download_json(
            f'https://pr0gramm.com/api/items/{endpoint}',
            video_id, note, query=query, expected_status=403)
        error = traverse_obj(data, ('error', {str}))
        if error in ('nsfwRequired', 'nsflRequired', 'nsfpRequired', 'verificationRequired'):
            if not self._is_logged_in:
                self.raise_login_required()
            raise ExtractorError(f'Unverified account cannot access NSFW/NSFL ({error})', expected=True)
        elif error:
            message = traverse_obj(data, ('msg', {str})) or error
            raise ExtractorError(f'API returned error: {message}', expected=True)
        return data
    def _real_extract(self, url):
        video_id = self._match_id(url)
        video_info = traverse_obj(
            self._call_api('get', video_id, {'id': video_id, 'flags': self._maximum_flags}),
            ('items', 0, {dict}))
-        return self.url_result(
+        source = urljoin('https://img.pr0gramm.com', video_info.get('image'))
-            'https://pr0gramm.com/static/' + video_id,
+        if not source or not source.endswith('mp4'):
-            video_id=video_id,
+            self.raise_no_formats('Could not extract a video', expected=bool(source), video_id=video_id)
-            ie=Pr0grammStaticIE.ie_key())
+
        tags = None
        if self._is_logged_in:
            metadata = self._call_api('info', video_id, {'itemId': video_id})
            tags = traverse_obj(metadata, ('tags', ..., 'tag', {str}))
            # Sorted by "confidence", higher confidence = earlier in list
            confidences = traverse_obj(metadata, ('tags', ..., 'confidence', ({int}, {float})))
            if confidences:
                tags = [tag for _, tag in sorted(zip(confidences, tags), reverse=True)]
        return {
            'id': video_id,
            'title': f'pr0gramm-{video_id} by {video_info.get("user")}',
            'formats': [{
                'url': source,
                'ext': 'mp4',
                **traverse_obj(video_info, {
                    'width': ('width', {int}),
                    'height': ('height', {int}),
                }),
            }],
            'tags': tags,
            'age_limit': 18 if traverse_obj(video_info, ('flags', {0b110.__and__})) else 0,
            '_old_archive_ids': [make_archive_id('Pr0grammStatic', video_id)],
            **traverse_obj(video_info, {
                'uploader': ('user', {str}),
                'uploader_id': ('userId', {int}),
                'like_count': ('up', {int}),
                'dislike_count': ('down', {int}),
                'upload_timestamp': ('created', {int}),
                'upload_date': ('created', {int}, {date.fromtimestamp}, {lambda x: x.strftime('%Y%m%d')}),
                'thumbnail': ('thumb', {lambda x: urljoin('https://thumb.pr0gramm.com', x)})
            }),
        }
--- a/yt_dlp/extractor/radiofrance.py
+++ b/yt_dlp/extractor/radiofrance.py
@ -1,7 +1,18 @@
 import itertools
 import re
 import urllib.parse
 from .common import InfoExtractor
-from ..utils import parse_duration, unified_strdate
+from ..utils import (
    int_or_none,
    join_nonempty,
    js_to_json,
    parse_duration,
    strftime_or_none,
    traverse_obj,
    unified_strdate,
    urljoin,
 )
 class RadioFranceIE(InfoExtractor):
@ -56,8 +67,32 @@ def _real_extract(self, url):
        }
-class FranceCultureIE(InfoExtractor):
+class RadioFranceBaseIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?radiofrance\.fr/(?:franceculture|fip|francemusique|mouv|franceinter)/podcasts/(?:[^?#]+/)?(?P<display_id>[^?#]+)-(?P<id>\d+)($|[?#])'
+    _VALID_URL_BASE = r'https?://(?:www\.)?radiofrance\.fr'
    _STATIONS_RE = '|'.join(map(re.escape, (
        'franceculture',
        'franceinfo',
        'franceinter',
        'francemusique',
        'fip',
        'mouv',
    )))
    def _extract_data_from_webpage(self, webpage, display_id, key):
        return traverse_obj(self._search_json(
            r'\bconst\s+data\s*=', webpage, key, display_id,
            contains_pattern=r'(\[\{.*?\}\]);', transform_source=js_to_json),
            (..., 'data', key, {dict}), get_all=False) or {}
 class FranceCultureIE(RadioFranceBaseIE):
    _VALID_URL = rf'''(?x)
        {RadioFranceBaseIE._VALID_URL_BASE}
        /(?:{RadioFranceBaseIE._STATIONS_RE})
        /podcasts/(?:[^?#]+/)?(?P<display_id>[^?#]+)-(?P<id>\d{{6,}})(?:$|[?#])
    '''
    _TESTS = [
        {
            'url': 'https://www.radiofrance.fr/franceculture/podcasts/science-en-questions/la-physique-d-einstein-aiderait-elle-a-comprendre-le-cerveau-8440487',
@ -67,14 +102,30 @@ class FranceCultureIE(InfoExtractor):
                'ext': 'mp3',
                'title': 'La physique d’Einstein aiderait-elle à comprendre le cerveau ?',
                'description': 'Existerait-il un pont conceptuel entre la physique de l’espace-temps et les neurosciences ?',
-                'thumbnail': 'https://cdn.radiofrance.fr/s3/cruiser-production/2022/05/d184e7a3-4827-4494-bf94-04ed7b120db4/1200x630_gettyimages-200171095-001.jpg',
+                'thumbnail': r're:^https?://.*\.(?:jpg|png)',
                'upload_date': '20220514',
                'duration': 2750,
            },
        },
        {
            'url': 'https://www.radiofrance.fr/franceinter/podcasts/le-7-9-30/le-7-9-30-du-vendredi-10-mars-2023-2107675',
            'info_dict': {
                'id': '2107675',
                'display_id': 'le-7-9-30-du-vendredi-10-mars-2023',
                'title': 'Inflation alimentaire : comment en sortir ? - Régis Debray et Claude Grange - Cybèle Idelot',
                'description': 'md5:36ee74351ede77a314fdebb94026b916',
                'thumbnail': r're:^https?://.*\.(?:jpg|png)',
                'upload_date': '20230310',
                'duration': 8977,
                'ext': 'mp3',
            },
        },
        {
            'url': 'https://www.radiofrance.fr/franceinter/podcasts/la-rafle-du-vel-d-hiv-une-affaire-d-etat/les-racines-du-crime-episode-1-3715507',
            'only_matching': True,
        }, {
            'url': 'https://www.radiofrance.fr/franceinfo/podcasts/le-billet-sciences/sante-bientot-un-vaccin-contre-l-asthme-allergique-3057200',
            'only_matching': True,
        }
    ]
@ -89,7 +140,6 @@ def _real_extract(self, url):
            'id': video_id,
            'display_id': display_id,
            'url': video_data['contentUrl'],
            'ext': video_data.get('encodingFormat'),
            'vcodec': 'none' if video_data.get('encodingFormat') == 'mp3' else None,
            'duration': parse_duration(video_data.get('duration')),
            'title': self._html_search_regex(r'(?s)<h1[^>]*itemprop="[^"]*name[^"]*"[^>]*>(.+?)</h1>',
@ -102,3 +152,322 @@ def _real_extract(self, url):
            'upload_date': unified_strdate(self._search_regex(
                r'"datePublished"\s*:\s*"([^"]+)', webpage, 'timestamp', fatal=False))
        }
 class RadioFranceLiveIE(RadioFranceBaseIE):
    _VALID_URL = rf'''(?x)
        https?://(?:www\.)?radiofrance\.fr
        /(?P<id>{RadioFranceBaseIE._STATIONS_RE})
        /?(?P<substation_id>radio-[\w-]+)?(?:[#?]|$)
    '''
    _TESTS = [{
        'url': 'https://www.radiofrance.fr/franceinter/',
        'info_dict': {
            'id': 'franceinter',
            'title': str,
            'live_status': 'is_live',
            'ext': 'aac',
        },
        'params': {
            'skip_download': 'Livestream',
        },
    }, {
        'url': 'https://www.radiofrance.fr/franceculture',
        'info_dict': {
            'id': 'franceculture',
            'title': str,
            'live_status': 'is_live',
            'ext': 'aac',
        },
        'params': {
            'skip_download': 'Livestream',
        },
    }, {
        'url': 'https://www.radiofrance.fr/mouv/radio-musique-kids-family',
        'info_dict': {
            'id': 'mouv-radio-musique-kids-family',
            'title': str,
            'live_status': 'is_live',
            'ext': 'aac',
        },
        'params': {
            'skip_download': 'Livestream',
        },
    }, {
        'url': 'https://www.radiofrance.fr/mouv/radio-rnb-soul',
        'info_dict': {
            'id': 'mouv-radio-rnb-soul',
            'title': str,
            'live_status': 'is_live',
            'ext': 'aac',
        },
        'params': {
            'skip_download': 'Livestream',
        },
    }, {
        'url': 'https://www.radiofrance.fr/mouv/radio-musique-mix',
        'info_dict': {
            'id': 'mouv-radio-musique-mix',
            'title': str,
            'live_status': 'is_live',
            'ext': 'aac',
        },
        'params': {
            'skip_download': 'Livestream',
        },
    }, {
        'url': 'https://www.radiofrance.fr/fip/radio-rock',
        'info_dict': {
            'id': 'fip-radio-rock',
            'title': str,
            'live_status': 'is_live',
            'ext': 'aac',
        },
        'params': {
            'skip_download': 'Livestream',
        },
    }, {
        'url': 'https://www.radiofrance.fr/mouv',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        station_id, substation_id = self._match_valid_url(url).group('id', 'substation_id')
        if substation_id:
            webpage = self._download_webpage(url, station_id)
            api_response = self._extract_data_from_webpage(webpage, station_id, 'webRadioData')
        else:
            api_response = self._download_json(
                f'https://www.radiofrance.fr/{station_id}/api/live', station_id)
        formats, subtitles = [], {}
        for media_source in traverse_obj(api_response, (('now', None), 'media', 'sources', lambda _, v: v['url'])):
            if media_source.get('format') == 'hls':
                fmts, subs = self._extract_m3u8_formats_and_subtitles(media_source['url'], station_id, fatal=False)
                formats.extend(fmts)
                self._merge_subtitles(subs, target=subtitles)
            else:
                formats.append({
                    'url': media_source['url'],
                    'abr': media_source.get('bitrate'),
                })
        return {
            'id': join_nonempty(station_id, substation_id),
            'title': traverse_obj(api_response, ('visual', 'legend')) or join_nonempty(
                ('now', 'firstLine', 'title'), ('now', 'secondLine', 'title'), from_dict=api_response, delim=' - '),
            'formats': formats,
            'subtitles': subtitles,
            'is_live': True,
        }
 class RadioFrancePlaylistBase(RadioFranceBaseIE):
    """Subclasses must set _METADATA_KEY"""
    def _call_api(self, content_id, cursor, page_num):
        raise NotImplementedError('This method must be implemented by subclasses')
    def _generate_playlist_entries(self, content_id, content_response):
        for page_num in itertools.count(2):
            for entry in content_response['items']:
                yield self.url_result(
                    f'https://www.radiofrance.fr/{entry["path"]}', url_transparent=True, **traverse_obj(entry, {
                        'title': 'title',
                        'description': 'standFirst',
                        'timestamp': ('publishedDate', {int_or_none}),
                        'thumbnail': ('visual', 'src'),
                    }))
            next_cursor = traverse_obj(content_response, (('pagination', None), 'next'), get_all=False)
            if not next_cursor:
                break
            content_response = self._call_api(content_id, next_cursor, page_num)
    def _real_extract(self, url):
        display_id = self._match_id(url)
        metadata = self._download_json(
            'https://www.radiofrance.fr/api/v2.1/path', display_id,
            query={'value': urllib.parse.urlparse(url).path})['content']
        content_id = metadata['id']
        return self.playlist_result(
            self._generate_playlist_entries(content_id, metadata[self._METADATA_KEY]), content_id,
            display_id=display_id, **{**traverse_obj(metadata, {
                'title': 'title',
                'description': 'standFirst',
                'thumbnail': ('visual', 'src'),
            }), **traverse_obj(metadata, {
                'title': 'name',
                'description': 'role',
            })})
 class RadioFrancePodcastIE(RadioFrancePlaylistBase):
    _VALID_URL = rf'''(?x)
        {RadioFranceBaseIE._VALID_URL_BASE}
        /(?:{RadioFranceBaseIE._STATIONS_RE})
        /podcasts/(?P<id>[\w-]+)/?(?:[?#]|$)
    '''
    _TESTS = [{
        'url': 'https://www.radiofrance.fr/franceinfo/podcasts/le-billet-vert',
        'info_dict': {
            'id': 'eaf6ef81-a980-4f1c-a7d1-8a75ecd54b17',
            'display_id': 'le-billet-vert',
            'title': 'Le billet sciences',
            'description': 'md5:eb1007b34b0c0a680daaa71525bbd4c1',
            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
        },
        'playlist_mincount': 11,
    }, {
        'url': 'https://www.radiofrance.fr/franceinter/podcasts/jean-marie-le-pen-l-obsession-nationale',
        'info_dict': {
            'id': '566fd524-3074-4fbc-ac69-8696f2152a54',
            'display_id': 'jean-marie-le-pen-l-obsession-nationale',
            'title': 'Jean-Marie Le Pen, l\'obsession nationale',
            'description': 'md5:a07c0cfb894f6d07a62d0ad12c4b7d73',
            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
        },
        'playlist_count': 7,
    }, {
        'url': 'https://www.radiofrance.fr/franceculture/podcasts/serie-thomas-grjebine',
        'info_dict': {
            'id': '63c1ddc9-9f15-457a-98b2-411bac63f48d',
            'display_id': 'serie-thomas-grjebine',
            'title': 'Thomas Grjebine',
        },
        'playlist_count': 1,
    }, {
        'url': 'https://www.radiofrance.fr/fip/podcasts/certains-l-aiment-fip',
        'info_dict': {
            'id': '143dff38-e956-4a5d-8576-1c0b7242b99e',
            'display_id': 'certains-l-aiment-fip',
            'title': 'Certains l’aiment Fip',
            'description': 'md5:ff974672ba00d4fd5be80fb001c5b27e',
            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
        },
        'playlist_mincount': 321,
    }, {
        'url': 'https://www.radiofrance.fr/franceinter/podcasts/le-7-9',
        'only_matching': True,
    }, {
        'url': 'https://www.radiofrance.fr/mouv/podcasts/dirty-mix',
        'only_matching': True,
    }]
    _METADATA_KEY = 'expressions'
    def _call_api(self, podcast_id, cursor, page_num):
        return self._download_json(
            f'https://www.radiofrance.fr/api/v2.1/concepts/{podcast_id}/expressions', podcast_id,
            note=f'Downloading page {page_num}', query={'pageCursor': cursor})
 class RadioFranceProfileIE(RadioFrancePlaylistBase):
    _VALID_URL = rf'{RadioFranceBaseIE._VALID_URL_BASE}/personnes/(?P<id>[\w-]+)'
    _TESTS = [{
        'url': 'https://www.radiofrance.fr/personnes/thomas-pesquet?p=3',
        'info_dict': {
            'id': '86c62790-e481-11e2-9f7b-782bcb6744eb',
            'display_id': 'thomas-pesquet',
            'title': 'Thomas Pesquet',
            'description': 'Astronaute à l\'agence spatiale européenne',
        },
        'playlist_mincount': 212,
    }, {
        'url': 'https://www.radiofrance.fr/personnes/eugenie-bastie',
        'info_dict': {
            'id': '9593050b-0183-4972-a0b5-d8f699079e02',
            'display_id': 'eugenie-bastie',
            'title': 'Eugénie Bastié',
            'description': 'Journaliste et essayiste',
            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
        },
        'playlist_mincount': 39,
    }, {
        'url': 'https://www.radiofrance.fr/personnes/lea-salame',
        'only_matching': True,
    }]
    _METADATA_KEY = 'documents'
    def _call_api(self, profile_id, cursor, page_num):
        resp = self._download_json(
            f'https://www.radiofrance.fr/api/v2.1/taxonomy/{profile_id}/documents', profile_id,
            note=f'Downloading page {page_num}', query={
                'relation': 'personality',
                'cursor': cursor,
            })
        resp['next'] = traverse_obj(resp, ('pagination', 'next'))
        return resp
 class RadioFranceProgramScheduleIE(RadioFranceBaseIE):
    _VALID_URL = rf'''(?x)
        {RadioFranceBaseIE._VALID_URL_BASE}
        /(?P<station>{RadioFranceBaseIE._STATIONS_RE})
        /grille-programmes(?:\?date=(?P<date>[\d-]+))?
    '''
    _TESTS = [{
        'url': 'https://www.radiofrance.fr/franceinter/grille-programmes?date=17-02-2023',
        'info_dict': {
            'id': 'franceinter-program-20230217',
            'upload_date': '20230217',
        },
        'playlist_count': 25,
    }, {
        'url': 'https://www.radiofrance.fr/franceculture/grille-programmes?date=01-02-2023',
        'info_dict': {
            'id': 'franceculture-program-20230201',
            'upload_date': '20230201',
        },
        'playlist_count': 25,
    }, {
        'url': 'https://www.radiofrance.fr/mouv/grille-programmes?date=19-03-2023',
        'info_dict': {
            'id': 'mouv-program-20230319',
            'upload_date': '20230319',
        },
        'playlist_count': 3,
    }, {
        'url': 'https://www.radiofrance.fr/francemusique/grille-programmes?date=18-03-2023',
        'info_dict': {
            'id': 'francemusique-program-20230318',
            'upload_date': '20230318',
        },
        'playlist_count': 15,
    }, {
        'url': 'https://www.radiofrance.fr/franceculture/grille-programmes',
        'only_matching': True,
    }]
    def _generate_playlist_entries(self, webpage_url, api_response):
        for entry in traverse_obj(api_response, ('steps', lambda _, v: v['expression']['path'])):
            yield self.url_result(
                urljoin(webpage_url, f'/{entry["expression"]["path"]}'), ie=FranceCultureIE,
                url_transparent=True, **traverse_obj(entry, {
                    'title': ('expression', 'title'),
                    'thumbnail': ('expression', 'visual', 'src'),
                    'timestamp': ('startTime', {int_or_none}),
                    'series_id': ('concept', 'id'),
                    'series': ('concept', 'title'),
                }))
    def _real_extract(self, url):
        station, date = self._match_valid_url(url).group('station', 'date')
        webpage = self._download_webpage(url, station)
        grid_data = self._extract_data_from_webpage(webpage, station, 'grid')
        upload_date = strftime_or_none(grid_data.get('date'), '%Y%m%d')
        return self.playlist_result(
            self._generate_playlist_entries(url, grid_data),
            join_nonempty(station, 'program', upload_date), upload_date=upload_date)
--- a/yt_dlp/extractor/rbgtum.py
+++ b/yt_dlp/extractor/rbgtum.py
@ -1,10 +1,11 @@
 import re
 from .common import InfoExtractor
 from ..utils import parse_qs, remove_start, traverse_obj, ExtractorError
 class RbgTumIE(InfoExtractor):
-    _VALID_URL = r'https://live\.rbg\.tum\.de/w/(?P<id>.+)'
+    _VALID_URL = r'https://(?:live\.rbg\.tum\.de|tum\.live)/w/(?P<id>[^?#]+)'
    _TESTS = [{
        # Combined view
        'url': 'https://live.rbg.tum.de/w/cpp/22128',
@ -35,16 +36,18 @@ class RbgTumIE(InfoExtractor):
            'title': 'Fachschaftsvollversammlung',
            'series': 'Fachschaftsvollversammlung Informatik',
        }
    }, {
        'url': 'https://tum.live/w/linalginfo/27102',
        'only_matching': True,
    }, ]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
-        m3u8 = self._html_search_regex(r'(https://.+?\.m3u8)', webpage, 'm3u8')
+        m3u8 = self._html_search_regex(r'"(https://[^"]+\.m3u8[^"]*)', webpage, 'm3u8')
-        lecture_title = self._html_search_regex(r'(?si)<h1.*?>(.*)</h1>', webpage, 'title')
+        lecture_title = self._html_search_regex(r'<h1[^>]*>([^<]+)</h1>', webpage, 'title', fatal=False)
-        lecture_series_title = self._html_search_regex(
+        lecture_series_title = remove_start(self._html_extract_title(webpage), 'TUM-Live | ')
            r'(?s)<title\b[^>]*>\s*(?:TUM-Live\s\|\s?)?([^:]+):?.*?</title>', webpage, 'series')
        formats = self._extract_m3u8_formats(m3u8, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
@ -57,9 +60,9 @@ def _real_extract(self, url):
 class RbgTumCourseIE(InfoExtractor):
-    _VALID_URL = r'https://live\.rbg\.tum\.de/course/(?P<id>.+)'
+    _VALID_URL = r'https://(?P<hostname>(?:live\.rbg\.tum\.de|tum\.live))/old/course/(?P<id>(?P<year>\d+)/(?P<term>\w+)/(?P<slug>[^/?#]+))'
    _TESTS = [{
-        'url': 'https://live.rbg.tum.de/course/2022/S/fpv',
+        'url': 'https://live.rbg.tum.de/old/course/2022/S/fpv',
        'info_dict': {
            'title': 'Funktionale Programmierung und Verifikation (IN0003)',
            'id': '2022/S/fpv',
@ -69,7 +72,7 @@ class RbgTumCourseIE(InfoExtractor):
        },
        'playlist_count': 13,
    }, {
-        'url': 'https://live.rbg.tum.de/course/2022/W/set',
+        'url': 'https://live.rbg.tum.de/old/course/2022/W/set',
        'info_dict': {
            'title': 'SET FSMPIC',
            'id': '2022/W/set',
@ -78,16 +81,62 @@ class RbgTumCourseIE(InfoExtractor):
            'noplaylist': False,
        },
        'playlist_count': 6,
    }, {
        'url': 'https://tum.live/old/course/2023/S/linalginfo',
        'only_matching': True,
    }, ]
    def _real_extract(self, url):
-        course_id = self._match_id(url)
+        course_id, hostname, year, term, slug = self._match_valid_url(url).group('id', 'hostname', 'year', 'term', 'slug')
-        webpage = self._download_webpage(url, course_id)
+        meta = self._download_json(
            f'https://{hostname}/api/courses/{slug}/', course_id, fatal=False,
            query={'year': year, 'term': term}) or {}
        lecture_series_title = meta.get('Name')
        lectures = [self.url_result(f'https://{hostname}/w/{slug}/{stream_id}', RbgTumIE)
                    for stream_id in traverse_obj(meta, ('Streams', ..., 'ID'))]
-        lecture_series_title = self._html_search_regex(r'(?si)<h1.*?>(.*)</h1>', webpage, 'title')
+        if not lectures:
            webpage = self._download_webpage(url, course_id)
            lecture_series_title = remove_start(self._html_extract_title(webpage), 'TUM-Live | ')
            lectures = [self.url_result(f'https://{hostname}{lecture_path}', RbgTumIE)
                        for lecture_path in re.findall(r'href="(/w/[^/"]+/[^/"]+)"', webpage)]
-        lecture_urls = []
+        return self.playlist_result(lectures, course_id, lecture_series_title)
        for lecture_url in re.findall(r'(?i)href="/w/(.+)(?<!/cam)(?<!/pres)(?<!/chat)"', webpage):
            lecture_urls.append(self.url_result('https://live.rbg.tum.de/w/' + lecture_url, ie=RbgTumIE.ie_key()))
-        return self.playlist_result(lecture_urls, course_id, lecture_series_title)
+
 class RbgTumNewCourseIE(InfoExtractor):
    _VALID_URL = r'https://(?P<hostname>(?:live\.rbg\.tum\.de|tum\.live))/\?'
    _TESTS = [{
        'url': 'https://live.rbg.tum.de/?year=2022&term=S&slug=fpv&view=3',
        'info_dict': {
            'title': 'Funktionale Programmierung und Verifikation (IN0003)',
            'id': '2022/S/fpv',
        },
        'params': {
            'noplaylist': False,
        },
        'playlist_count': 13,
    }, {
        'url': 'https://live.rbg.tum.de/?year=2022&term=W&slug=set&view=3',
        'info_dict': {
            'title': 'SET FSMPIC',
            'id': '2022/W/set',
        },
        'params': {
            'noplaylist': False,
        },
        'playlist_count': 6,
    }, {
        'url': 'https://tum.live/?year=2023&term=S&slug=linalginfo&view=3',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        query = parse_qs(url)
        errors = [key for key in ('year', 'term', 'slug') if not query.get(key)]
        if errors:
            raise ExtractorError(f'Input URL is missing query parameters: {", ".join(errors)}')
        year, term, slug = query['year'][0], query['term'][0], query['slug'][0]
        hostname = self._match_valid_url(url).group('hostname')
        return self.url_result(f'https://{hostname}/old/course/{year}/{term}/{slug}', RbgTumCourseIE)
--- a/yt_dlp/extractor/reddit.py
+++ b/yt_dlp/extractor/reddit.py
@ -319,16 +319,20 @@ def add_thumbnail(src):
                'format_id': 'fallback',
                'format_note': 'DASH video, mp4_dash',
            }]
-            formats.extend(self._extract_m3u8_formats(
+            hls_fmts, subtitles = self._extract_m3u8_formats_and_subtitles(
-                hls_playlist_url, display_id, 'mp4', m3u8_id='hls', fatal=False))
+                hls_playlist_url, display_id, 'mp4', m3u8_id='hls', fatal=False)
-            formats.extend(self._extract_mpd_formats(
+            formats.extend(hls_fmts)
-                dash_playlist_url, display_id, mpd_id='dash', fatal=False))
+            dash_fmts, dash_subs = self._extract_mpd_formats_and_subtitles(
                dash_playlist_url, display_id, mpd_id='dash', fatal=False)
            formats.extend(dash_fmts)
            self._merge_subtitles(dash_subs, target=subtitles)
            return {
                **info,
                'id': video_id,
                'display_id': display_id,
                'formats': formats,
                'subtitles': subtitles,
                'duration': int_or_none(reddit_video.get('duration')),
            }
--- a/yt_dlp/extractor/rtvslo.py
+++ b/yt_dlp/extractor/rtvslo.py
@ -1,6 +1,7 @@
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    int_or_none,
    parse_duration,
    traverse_obj,
    unified_timestamp,
@ -25,7 +26,7 @@ class RTVSLOIE(InfoExtractor):
            'url': 'https://www.rtvslo.si/rtv365/arhiv/174842550?s=tv',
            'info_dict': {
                'id': '174842550',
-                'ext': 'flv',
+                'ext': 'mp4',
                'release_timestamp': 1643140032,
                'upload_date': '20220125',
                'series': 'Dnevnik',
@ -69,7 +70,21 @@ class RTVSLOIE(InfoExtractor):
                'tbr': 128000,
                'release_date': '20220201',
            },
-
+        }, {
            'url': 'https://365.rtvslo.si/arhiv/razred-zase/148350750',
            'info_dict': {
                'id': '148350750',
                'ext': 'mp4',
                'title': 'Prvi šolski dan, mozaična oddaja za mlade',
                'series': 'Razred zase',
                'series_id': '148185730',
                'duration': 1481,
                'upload_date': '20121019',
                'timestamp': 1350672122,
                'release_date': '20121019',
                'release_timestamp': 1350672122,
                'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/148185730/razred_zase_2014_logo_4d_wide2.jpg',
            },
        }, {
            'url': 'https://4d.rtvslo.si/arhiv/dnevnik/174842550',
            'only_matching': True
@ -98,13 +113,14 @@ def _real_extract(self, url):
        media = self._download_json(self._API_BASE.format('getMedia', v_id), v_id, query={'jwt': jwt})['response']
        formats = []
        skip_protocols = ['smil', 'f4m', 'dash']
        adaptive_url = traverse_obj(media, ('addaptiveMedia', 'hls_sec'), expected_type=url_or_none)
        if adaptive_url:
-            formats = self._extract_wowza_formats(adaptive_url, v_id, skip_protocols=['smil'])
+            formats = self._extract_wowza_formats(adaptive_url, v_id, skip_protocols=skip_protocols)
        adaptive_url = traverse_obj(media, ('addaptiveMedia_sl', 'hls_sec'), expected_type=url_or_none)
        if adaptive_url:
-            for f in self._extract_wowza_formats(adaptive_url, v_id, skip_protocols=['smil']):
+            for f in self._extract_wowza_formats(adaptive_url, v_id, skip_protocols=skip_protocols):
                formats.append({
                    **f,
                    'format_id': 'sign-' + f['format_id'],
@ -114,19 +130,19 @@ def _real_extract(self, url):
                        else f.get('language'))
                })
-        formats.extend(
+        for mediafile in traverse_obj(media, ('mediaFiles', lambda _, v: url_or_none(v['streams']['https']))):
-            {
+            formats.append(traverse_obj(mediafile, {
-                'url': f['streams'][strm],
+                'url': ('streams', 'https'),
-                'ext': traverse_obj(f, 'mediaType', expected_type=str.lower),
+                'ext': ('mediaType', {str.lower}),
-                'width': f.get('width'),
+                'width': ('width', {int_or_none}),
-                'height': f.get('height'),
+                'height': ('height', {int_or_none}),
-                'tbr': f.get('bitrate'),
+                'tbr': ('bitrate', {int_or_none}),
-                'filesize': f.get('filesize'),
+                'filesize': ('filesize', {int_or_none}),
-            }
+            }))
-            for strm in ('http', 'https')
+
-            for f in media.get('mediaFiles') or []
+        for mediafile in traverse_obj(media, ('mediaFiles', lambda _, v: url_or_none(v['streams']['hls_sec']))):
-            if traverse_obj(f, ('streams', strm))
+            formats.extend(self._extract_wowza_formats(
-        )
+                mediafile['streams']['hls_sec'], v_id, skip_protocols=skip_protocols))
        if any('intermission.mp4' in x['url'] for x in formats):
            self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
--- a/yt_dlp/extractor/rule34video.py
+++ b/yt_dlp/extractor/rule34video.py
@ -1,6 +1,6 @@
 import re
-from ..utils import parse_duration
+from ..utils import parse_duration, unescapeHTML
 from .common import InfoExtractor
@ -16,7 +16,8 @@ class Rule34VideoIE(InfoExtractor):
                'title': 'Shot It-(mmd hmv)',
                'thumbnail': 'https://rule34video.com/contents/videos_screenshots/3065000/3065157/preview.jpg',
                'duration': 347.0,
-                'age_limit': 18
+                'age_limit': 18,
                'tags': 'count:14'
            }
        },
        {
@ -28,7 +29,8 @@ class Rule34VideoIE(InfoExtractor):
                'title': 'Lara in Trouble Ep. 7 [WildeerStudio]',
                'thumbnail': 'https://rule34video.com/contents/videos_screenshots/3065000/3065296/preview.jpg',
                'duration': 938.0,
-                'age_limit': 18
+                'age_limit': 18,
                'tags': 'count:50'
            }
        },
    ]
@ -57,5 +59,7 @@ def _real_extract(self, url):
            'title': title,
            'thumbnail': thumbnail,
            'duration': parse_duration(duration),
-            'age_limit': 18
+            'age_limit': 18,
            'tags': list(map(unescapeHTML, re.findall(
                r'<a class="tag_item"[^>]+\bhref="https://rule34video\.com/tags/\d+/"[^>]*>(?P<tag>[^>]*)</a>', webpage))),
        }
--- a/yt_dlp/extractor/rumble.py
+++ b/yt_dlp/extractor/rumble.py
@ -33,7 +33,7 @@ class RumbleEmbedIE(InfoExtractor):
            'upload_date': '20191020',
            'channel_url': 'https://rumble.com/c/WMAR',
            'channel': 'WMAR',
-            'thumbnail': 'https://sp.rmbl.ws/s8/1/5/M/z/1/5Mz1a.OvCc-small-WMAR-2-News-Latest-Headline.jpg',
+            'thumbnail': 'https://sp.rmbl.ws/s8/1/5/M/z/1/5Mz1a.qR4e-small-WMAR-2-News-Latest-Headline.jpg',
            'duration': 234,
            'uploader': 'WMAR',
            'live_status': 'not_live',
@ -84,7 +84,7 @@ class RumbleEmbedIE(InfoExtractor):
        'info_dict': {
            'id': 'v1essrt',
            'ext': 'mp4',
-            'title': 'startswith:lofi hip hop radio - beats to relax/study',
+            'title': 'startswith:lofi hip hop radio 📚 - beats to relax/study to',
            'timestamp': 1661519399,
            'upload_date': '20220826',
            'channel_url': 'https://rumble.com/c/LofiGirl',
@ -99,7 +99,7 @@ class RumbleEmbedIE(InfoExtractor):
        'url': 'https://rumble.com/embed/v1amumr',
        'info_dict': {
            'id': 'v1amumr',
-            'ext': 'webm',
+            'ext': 'mp4',
            'fps': 60,
            'title': 'Turning Point USA 2022 Student Action Summit DAY 1  - Rumble Exclusive Live',
            'timestamp': 1658518457,
@ -129,7 +129,7 @@ class RumbleEmbedIE(InfoExtractor):
                'duration': 92,
                'title': '911 Audio From The Man Who Wanted To Kill Supreme Court Justice Kavanaugh',
                'channel_url': 'https://rumble.com/c/RichSementa',
-                'thumbnail': 'https://sp.rmbl.ws/s8/1/P/j/f/A/PjfAe.OvCc-small-911-Audio-From-The-Man-Who-.jpg',
+                'thumbnail': 'https://sp.rmbl.ws/s8/1/P/j/f/A/PjfAe.qR4e-small-911-Audio-From-The-Man-Who-.jpg',
                'timestamp': 1654892716,
                'uploader': 'Mr Producer Media',
                'upload_date': '20220610',
@ -144,7 +144,7 @@ def _extract_embed_urls(cls, url, webpage):
        if embeds:
            return embeds
        return [f'https://rumble.com/embed/{mobj.group("id")}' for mobj in re.finditer(
-            r'<script>[^<]*\bRumble\(\s*"play"\s*,\s*{\s*[\'"]?video[\'"]?\s*:\s*[\'"](?P<id>[0-9a-z]+)[\'"]', webpage)]
+            r'<script>[^<]*\bRumble\(\s*"play"\s*,\s*{[^}]*[\'"]?video[\'"]?\s*:\s*[\'"](?P<id>[0-9a-z]+)[\'"]', webpage)]
    def _real_extract(self, url):
        video_id = self._match_id(url)
@ -236,7 +236,9 @@ def _real_extract(self, url):
 class RumbleIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?rumble\.com/(?P<id>v(?!ideos)[\w.-]+)[^/]*$'
-    _EMBED_REGEX = [r'<a class=video-item--a href=(?P<url>/v[\w.-]+\.html)>']
+    _EMBED_REGEX = [
        r'<a class=video-item--a href=(?P<url>/v[\w.-]+\.html)>',
        r'<a[^>]+class="videostream__link link"[^>]+href=(?P<url>/v[\w.-]+\.html)[^>]*>']
    _TESTS = [{
        'add_ie': ['RumbleEmbed'],
        'url': 'https://rumble.com/vdmum1-moose-the-dog-helps-girls-dig-a-snow-fort.html',
@ -254,6 +256,7 @@ class RumbleIE(InfoExtractor):
            'thumbnail': r're:https://.+\.jpg',
            'duration': 103,
            'like_count': int,
            'dislike_count': int,
            'view_count': int,
            'live_status': 'not_live',
        }
@ -278,6 +281,9 @@ class RumbleIE(InfoExtractor):
            'channel_url': 'https://rumble.com/c/Redacted',
            'live_status': 'not_live',
            'thumbnail': 'https://sp.rmbl.ws/s8/1/d/x/2/O/dx2Oi.qR4e-small-The-U.S.-CANNOT-hide-this-i.jpg',
            'like_count': int,
            'dislike_count': int,
            'view_count': int,
        },
    }, {
        'url': 'https://rumble.com/v2e7fju-the-covid-twitter-files-drop-protecting-fauci-while-censoring-the-truth-wma.html',
@ -296,12 +302,15 @@ class RumbleIE(InfoExtractor):
            'channel_url': 'https://rumble.com/c/KimIversen',
            'channel': 'Kim Iversen',
            'thumbnail': 'https://sp.rmbl.ws/s8/1/6/b/w/O/6bwOi.qR4e-small-The-Covid-Twitter-Files-Dro.jpg',
            'like_count': int,
            'dislike_count': int,
            'view_count': int,
        },
    }]
    _WEBPAGE_TESTS = [{
        'url': 'https://rumble.com/videos?page=2',
-        'playlist_count': 25,
+        'playlist_mincount': 24,
        'info_dict': {
            'id': 'videos?page=2',
            'title': 'All videos',
@ -309,17 +318,16 @@ class RumbleIE(InfoExtractor):
            'age_limit': 0,
        },
    }, {
-        'url': 'https://rumble.com/live-videos',
+        'url': 'https://rumble.com/browse/live',
-        'playlist_mincount': 19,
+        'playlist_mincount': 25,
        'info_dict': {
-            'id': 'live-videos',
+            'id': 'live',
-            'title': 'Live Videos',
+            'title': 'Browse',
            'description': 'Live videos on Rumble.com',
            'age_limit': 0,
        },
    }, {
        'url': 'https://rumble.com/search/video?q=rumble&sort=views',
-        'playlist_count': 24,
+        'playlist_mincount': 24,
        'info_dict': {
            'id': 'video?q=rumble&sort=views',
            'title': 'Search results for: rumble',
@ -334,19 +342,20 @@ def _real_extract(self, url):
        if not url_info:
            raise UnsupportedError(url)
-        release_ts_str = self._search_regex(
+        return {
-            r'(?:Livestream begins|Streamed on):\s+<time datetime="([^"]+)',
+            '_type': 'url_transparent',
-            webpage, 'release date', fatal=False, default=None)
+            'ie_key': url_info['ie_key'],
-        view_count_str = self._search_regex(r'<span class="media-heading-info">([\d,]+) Views',
+            'url': url_info['url'],
-                                            webpage, 'view count', fatal=False, default=None)
+            'release_timestamp': parse_iso8601(self._search_regex(
-
+                r'(?:Livestream begins|Streamed on):\s+<time datetime="([^"]+)', webpage, 'release date', default=None)),
-        return self.url_result(
+            'view_count': int_or_none(self._search_regex(
-            url_info['url'], ie_key=url_info['ie_key'], url_transparent=True,
+                r'"userInteractionCount"\s*:\s*(\d+)', webpage, 'view count', default=None)),
-            view_count=parse_count(view_count_str),
+            'like_count': parse_count(self._search_regex(
-            release_timestamp=parse_iso8601(release_ts_str),
+                r'<span data-js="rumbles_up_votes">\s*([\d,.KM]+)', webpage, 'like count', default=None)),
-            like_count=parse_count(get_element_by_class('rumbles-count', webpage)),
+            'dislike_count': parse_count(self._search_regex(
-            description=clean_html(get_element_by_class('media-description', webpage)),
+                r'<span data-js="rumbles_down_votes">\s*([\d,.KM]+)', webpage, 'dislike count', default=None)),
-        )
+            'description': clean_html(get_element_by_class('media-description', webpage))
        }
 class RumbleChannelIE(InfoExtractor):
--- a/yt_dlp/extractor/s4c.py
+++ b/yt_dlp/extractor/s4c.py
@ -1,5 +1,5 @@
 from .common import InfoExtractor
-from ..utils import traverse_obj
+from ..utils import traverse_obj, url_or_none
 class S4CIE(InfoExtractor):
@ -11,7 +11,8 @@ class S4CIE(InfoExtractor):
            'ext': 'mp4',
            'title': 'Y Swn',
            'description': 'md5:f7681a30e4955b250b3224aa9fe70cf0',
-            'duration': 5340
+            'duration': 5340,
            'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Y_Swn_2023S4C_099_ii.jpg'
        },
    }, {
        'url': 'https://www.s4c.cymru/clic/programme/856636948',
@ -21,6 +22,7 @@ class S4CIE(InfoExtractor):
            'title': 'Am Dro',
            'duration': 2880,
            'description': 'md5:100d8686fc9a632a0cb2db52a3433ffe',
            'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Am_Dro_2022-23S4C_P6_4005.jpg'
        },
    }]
@ -30,7 +32,7 @@ def _real_extract(self, url):
            f'https://www.s4c.cymru/df/full_prog_details?lang=e&programme_id={video_id}',
            video_id, fatal=False)
-        filename = self._download_json(
+        player_config = self._download_json(
            'https://player-api.s4c-cdn.co.uk/player-configuration/prod', video_id, query={
                'programme_id': video_id,
                'signed': '0',
@ -38,7 +40,13 @@ def _real_extract(self, url):
                'mode': 'od',
                'appId': 'clic',
                'streamName': '',
-            }, note='Downloading player config JSON')['filename']
+            }, note='Downloading player config JSON')
        subtitles = {}
        for sub in traverse_obj(player_config, ('subtitles', lambda _, v: url_or_none(v['0']))):
            subtitles.setdefault(sub.get('3', 'en'), []).append({
                'url': sub['0'],
                'name': sub.get('1'),
            })
        m3u8_url = self._download_json(
            'https://player-api.s4c-cdn.co.uk/streaming-urls/prod', video_id, query={
                'mode': 'od',
@ -46,17 +54,52 @@ def _real_extract(self, url):
                'region': 'WW',
                'extra': 'false',
                'thirdParty': 'false',
-                'filename': filename,
+                'filename': player_config['filename'],
            }, note='Downloading streaming urls JSON')['hls']
        formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')
        return {
            'id': video_id,
-            'formats': formats,
+            'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls'),
            'subtitles': subtitles,
            'thumbnail': url_or_none(player_config.get('poster')),
            **traverse_obj(details, ('full_prog_details', 0, {
                'title': (('programme_title', 'series_title'), {str}),
                'description': ('full_billing', {str.strip}),
                'duration': ('duration', {lambda x: int(x) * 60}),
            }), get_all=False),
        }
 class S4CSeriesIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?s4c\.cymru/clic/series/(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://www.s4c.cymru/clic/series/864982911',
        'playlist_mincount': 6,
        'info_dict': {
            'id': '864982911',
            'title': 'Iaith ar Daith',
            'description': 'md5:e878ebf660dce89bd2ef521d7ce06397'
        },
    }, {
        'url': 'https://www.s4c.cymru/clic/series/866852587',
        'playlist_mincount': 8,
        'info_dict': {
            'id': '866852587',
            'title': 'FFIT Cymru',
            'description': 'md5:abcb3c129cb68dbb6cd304fd33b07e96'
        },
    }]
    def _real_extract(self, url):
        series_id = self._match_id(url)
        series_details = self._download_json(
            'https://www.s4c.cymru/df/series_details', series_id, query={
                'lang': 'e',
                'series_id': series_id,
                'show_prog_in_series': 'Y'
            }, note='Downloading series details JSON')
        return self.playlist_result(
            [self.url_result(f'https://www.s4c.cymru/clic/programme/{episode_id}', S4CIE, episode_id)
             for episode_id in traverse_obj(series_details, ('other_progs_in_series', ..., 'id'))],
            series_id, traverse_obj(series_details, ('full_prog_details', 0, 'series_title', {str})))
--- a/yt_dlp/extractor/sohu.py
+++ b/yt_dlp/extractor/sohu.py
@ -1,3 +1,4 @@
 import base64
 import re
 from .common import InfoExtractor
@ -8,7 +9,12 @@
 from ..utils import (
    ExtractorError,
    int_or_none,
    float_or_none,
    url_or_none,
    unified_timestamp,
    try_get,
    urljoin,
    traverse_obj,
 )
@ -31,13 +37,20 @@ class SohuIE(InfoExtractor):
            'id': '409385080',
            'ext': 'mp4',
            'title': '《2015湖南卫视羊年元宵晚会》唐嫣《花好月圆》',
-        }
+        },
        'skip': 'no longer available',
    }, {
        'url': 'http://my.tv.sohu.com/us/232799889/78693464.shtml',
        'info_dict': {
            'id': '78693464',
            'ext': 'mp4',
            'title': '【爱范品】第31期：MWC见不到的奇葩手机',
            'uploader': '爱范儿视频',
            'duration': 213,
            'timestamp': 1425519600,
            'upload_date': '20150305',
            'thumbnail': 'http://e3f49eaa46b57.cdn.sohucs.com//group1/M10/83/FA/MTAuMTAuODguODA=/6_14cbccdde5eg104SysCutcloud_78693464_7_0b.jpg',
            'tags': ['爱范儿', '爱范品', 'MWC', '手机'],
        }
    }, {
        'note': 'Multipart video',
@ -45,6 +58,12 @@ class SohuIE(InfoExtractor):
        'info_dict': {
            'id': '78910339',
            'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
            'uploader': '小苍cany',
            'duration': 744.0,
            'timestamp': 1426269360,
            'upload_date': '20150313',
            'thumbnail': 'http://e3f49eaa46b57.cdn.sohucs.com//group1/M11/89/57/MTAuMTAuODguODA=/6_14cea022a1dg102SysCutcloud_78910339_8_0b.jpg',
            'tags': ['小苍MM', '英雄联盟', '实战秘籍'],
        },
        'playlist': [{
            'info_dict': {
@ -75,6 +94,11 @@ class SohuIE(InfoExtractor):
            'id': '78932792',
            'ext': 'mp4',
            'title': 'youtube-dl testing video',
            'duration': 360,
            'timestamp': 1426348620,
            'upload_date': '20150314',
            'thumbnail': 'http://e3f49eaa46b57.cdn.sohucs.com//group1/M02/8A/00/MTAuMTAuODguNzk=/6_14cee1be192g102SysCutcloud_78932792_7_7b.jpg',
            'tags': [],
        },
        'params': {
            'skip_download': True
@ -100,7 +124,7 @@ def _fetch_data(vid_id, mytv=False):
        webpage = self._download_webpage(url, video_id)
-        title = re.sub(r' - 搜狐视频$', '', self._og_search_title(webpage))
+        title = re.sub(r'( - 高清正版在线观看)? - 搜狐视频$', '', self._og_search_title(webpage))
        vid = self._html_search_regex(
            r'var vid ?= ?["\'](\d+)["\']',
@ -132,7 +156,9 @@ def _fetch_data(vid_id, mytv=False):
                allot = format_data['allot']
                data = format_data['data']
-                clips_url = data['clipsURL']
+                clip_url = traverse_obj(data, (('clipsURL', 'mp4PlayUrl'), i, {url_or_none}), get_all=False)
                if not clip_url:
                    raise ExtractorError(f'Unable to extract url for clip {i}')
                su = data['su']
                video_url = 'newflv.sohu.ccgslb.net'
@ -142,9 +168,9 @@ def _fetch_data(vid_id, mytv=False):
                while 'newflv.sohu.ccgslb.net' in video_url:
                    params = {
                        'prot': 9,
-                        'file': clips_url[i],
+                        'file': clip_url,
                        'new': su[i],
-                        'prod': 'flash',
+                        'prod': 'h5n',
                        'rb': 1,
                    }
@ -193,6 +219,75 @@ def _fetch_data(vid_id, mytv=False):
                'entries': playlist,
                'id': video_id,
                'title': title,
                'duration': traverse_obj(vid_data, ('data', 'totalDuration', {float_or_none})),
            }
-        return info
+        if mytv:
            publish_time = unified_timestamp(self._search_regex(
                r'publishTime:\s*["\'](\d+-\d+-\d+ \d+:\d+)["\']', webpage, 'publish time', fatal=False))
        else:
            publish_time = traverse_obj(vid_data, ('tv_application_time', {unified_timestamp}))
        return {
            'timestamp': publish_time - 8 * 3600 if publish_time else None,
            **traverse_obj(vid_data, {
                'alt_title': ('data', 'subName', {str}),
                'uploader': ('wm_data', 'wm_username', {str}),
                'thumbnail': ('data', 'coverImg', {url_or_none}),
                'tags': ('data', 'tag', {str.split}),
            }),
            **info,
        }
 class SohuVIE(InfoExtractor):
    _VALID_URL = r'https?://tv\.sohu\.com/v/(?P<id>[\w=-]+)\.html(?:$|[#?])'
    _TESTS = [{
        'note': 'Multipart video',
        'url': 'https://tv.sohu.com/v/MjAyMzA2MTQvbjYwMTMxNTE5Mi5zaHRtbA==.html',
        'info_dict': {
            'id': '601315192',
            'title': '《淬火丹心》第1集',
            'alt_title': '“点天灯”发生事故',
            'duration': 2701.692,
            'timestamp': 1686758040,
            'upload_date': '20230614',
            'thumbnail': 'http://photocdn.tv.sohu.com/img/20230614/vrsa_hor_1686738763256_454010551.jpg',
        },
        'playlist_mincount': 9,
        'skip': 'Only available in China',
    }, {
        'url': 'https://tv.sohu.com/v/dXMvMjMyNzk5ODg5Lzc4NjkzNDY0LnNodG1s.html',
        'info_dict': {
            'id': '78693464',
            'ext': 'mp4',
            'title': '【爱范品】第31期：MWC见不到的奇葩手机',
            'uploader': '爱范儿视频',
            'duration': 213,
            'timestamp': 1425519600,
            'upload_date': '20150305',
            'thumbnail': 'http://e3f49eaa46b57.cdn.sohucs.com//group1/M10/83/FA/MTAuMTAuODguODA=/6_14cbccdde5eg104SysCutcloud_78693464_7_0b.jpg',
            'tags': ['爱范儿', '爱范品', 'MWC', '手机'],
        }
    }, {
        'note': 'Multipart video',
        'url': 'https://tv.sohu.com/v/dXMvMjQyNTYyMTYzLzc4OTEwMzM5LnNodG1s.html?src=pl',
        'info_dict': {
            'id': '78910339',
            'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
            'uploader': '小苍cany',
            'duration': 744.0,
            'timestamp': 1426269360,
            'upload_date': '20150313',
            'thumbnail': 'http://e3f49eaa46b57.cdn.sohucs.com//group1/M11/89/57/MTAuMTAuODguODA=/6_14cea022a1dg102SysCutcloud_78910339_8_0b.jpg',
            'tags': ['小苍MM', '英雄联盟', '实战秘籍'],
        },
        'playlist_mincount': 3,
    }]
    def _real_extract(self, url):
        encoded_id = self._match_id(url)
        path = base64.urlsafe_b64decode(encoded_id).decode()
        subdomain = 'tv' if re.match(r'\d+/n\d+\.shtml', path) else 'my.tv'
        return self.url_result(urljoin(f'http://{subdomain}.sohu.com/', path), SohuIE)
--- a/yt_dlp/extractor/tiktok.py
+++ b/yt_dlp/extractor/tiktok.py
@ -15,7 +15,6 @@
    UserNotLive,
    determine_ext,
    format_field,
    get_element_by_id,
    get_first,
    int_or_none,
    join_nonempty,
@ -50,8 +49,9 @@ def _create_url(user_id, video_id):
        return f'https://www.tiktok.com/@{user_id or "_"}/video/{video_id}'
    def _get_sigi_state(self, webpage, display_id):
-        return self._parse_json(get_element_by_id(
+        return self._search_json(
-            'SIGI_STATE|sigi-persisted-data', webpage, escape_value=False), display_id)
+            r'<script[^>]+\bid="(?:SIGI_STATE|sigi-persisted-data)"[^>]*>', webpage,
            'sigi state', display_id, end_pattern=r'</script>')
    def _call_api_impl(self, ep, query, manifest_app_version, video_id, fatal=True,
                       note='Downloading API JSON', errnote='Unable to download API page'):
--- a/yt_dlp/extractor/tv5mondeplus.py
+++ b/yt_dlp/extractor/tv5mondeplus.py
@ -1,10 +1,14 @@
 import urllib.parse
 from .common import InfoExtractor
 from ..utils import (
    determine_ext,
    extract_attributes,
    int_or_none,
    parse_duration,
    traverse_obj,
    try_get,
    url_or_none,
 )
@ -12,6 +16,36 @@ class TV5MondePlusIE(InfoExtractor):
    IE_DESC = 'TV5MONDE+'
    _VALID_URL = r'https?://(?:www\.)?(?:tv5mondeplus|revoir\.tv5monde)\.com/toutes-les-videos/[^/]+/(?P<id>[^/?#]+)'
    _TESTS = [{
        # movie
        'url': 'https://revoir.tv5monde.com/toutes-les-videos/cinema/les-novices',
        'md5': 'c86f60bf8b75436455b1b205f9745955',
        'info_dict': {
            'id': 'ZX0ipMyFQq_6D4BA7b',
            'display_id': 'les-novices',
            'ext': 'mp4',
            'title': 'Les novices',
            'description': 'md5:2e7c33ba3ad48dabfcc2a956b88bde2b',
            'upload_date': '20230821',
            'thumbnail': 'https://revoir.tv5monde.com/uploads/media/video_thumbnail/0738/60/01e952b7ccf36b7c6007ec9131588954ab651de9.jpeg',
            'duration': 5177,
            'episode': 'Les novices',
        },
    }, {
        # series episode
        'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/opj-les-dents-de-la-terre-2',
        'info_dict': {
            'id': 'wJ0eeEPozr_6D4BA7b',
            'display_id': 'opj-les-dents-de-la-terre-2',
            'ext': 'mp4',
            'title': "OPJ - Les dents de la Terre (2)",
            'description': 'md5:288f87fd68d993f814e66e60e5302d9d',
            'upload_date': '20230823',
            'series': 'OPJ',
            'episode': 'Les dents de la Terre (2)',
            'duration': 2877,
            'thumbnail': 'https://dl-revoir.tv5monde.com/images/1a/5753448.jpg'
        },
    }, {
        # movie
        'url': 'https://revoir.tv5monde.com/toutes-les-videos/cinema/ceux-qui-travaillent',
        'md5': '32fa0cde16a4480d1251502a66856d5f',
@ -23,6 +57,7 @@ class TV5MondePlusIE(InfoExtractor):
            'description': 'md5:570e8bb688036ace873b2d50d24c026d',
            'upload_date': '20210819',
        },
        'skip': 'no longer available',
    }, {
        # series episode
        'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/vestiaires-caro-actrice',
@ -39,6 +74,7 @@ class TV5MondePlusIE(InfoExtractor):
        'params': {
            'skip_download': True,
        },
        'skip': 'no longer available',
    }, {
        'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/neuf-jours-en-hiver-neuf-jours-en-hiver',
        'only_matching': True,
@ -63,20 +99,45 @@ def _real_extract(self, url):
        video_files = self._parse_json(
            vpl_data['data-broadcast'], display_id)
        formats = []
-        for video_file in video_files:
+        video_id = None
-            v_url = video_file.get('url')
+
-            if not v_url:
+        def process_video_files(v):
-                continue
+            nonlocal video_id
-            video_format = video_file.get('format') or determine_ext(v_url)
+            for video_file in v:
-            if video_format == 'm3u8':
+                v_url = video_file.get('url')
-                formats.extend(self._extract_m3u8_formats(
+                if not v_url:
-                    v_url, display_id, 'mp4', 'm3u8_native',
+                    continue
-                    m3u8_id='hls', fatal=False))
+                if video_file.get('type') == 'application/deferred':
-            else:
+                    d_param = urllib.parse.quote(v_url)
-                formats.append({
+                    token = video_file.get('token')
-                    'url': v_url,
+                    if not token:
-                    'format_id': video_format,
+                        continue
-                })
+                    deferred_json = self._download_json(
                        f'https://api.tv5monde.com/player/asset/{d_param}/resolve?condenseKS=true', display_id,
                        note='Downloading deferred info', headers={'Authorization': f'Bearer {token}'}, fatal=False)
                    v_url = traverse_obj(deferred_json, (0, 'url', {url_or_none}))
                    if not v_url:
                        continue
                    # data-guid from the webpage isn't stable, use the material id from the json urls
                    video_id = self._search_regex(
                        r'materials/([\da-zA-Z]{10}_[\da-fA-F]{7})/', v_url, 'video id', default=None)
                    process_video_files(deferred_json)
                video_format = video_file.get('format') or determine_ext(v_url)
                if video_format == 'm3u8':
                    formats.extend(self._extract_m3u8_formats(
                        v_url, display_id, 'mp4', 'm3u8_native',
                        m3u8_id='hls', fatal=False))
                elif video_format == 'mpd':
                    formats.extend(self._extract_mpd_formats(
                        v_url, display_id, fatal=False))
                else:
                    formats.append({
                        'url': v_url,
                        'format_id': video_format,
                    })
        process_video_files(video_files)
        metadata = self._parse_json(
            vpl_data['data-metadata'], display_id)
@ -100,10 +161,11 @@ def _real_extract(self, url):
        if upload_date:
            upload_date = upload_date.replace('_', '')
-        video_id = self._search_regex(
+        if not video_id:
-            (r'data-guid=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
+            video_id = self._search_regex(
-             r'id_contenu["\']\s:\s*(\d+)'), webpage, 'video id',
+                (r'data-guid=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
-            default=display_id)
+                 r'id_contenu["\']\s:\s*(\d+)'), webpage, 'video id',
                default=display_id)
        return {
            'id': video_id,
--- a/yt_dlp/extractor/twitcasting.py
+++ b/yt_dlp/extractor/twitcasting.py
@ -22,7 +22,7 @@
 class TwitCastingIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<uploader_id>[^/]+)/(?:movie|twplayer)/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:[^/?#]+\.)?twitcasting\.tv/(?P<uploader_id>[^/?#]+)/(?:movie|twplayer)/(?P<id>\d+)'
    _M3U8_HEADERS = {
        'Origin': 'https://twitcasting.tv',
        'Referer': 'https://twitcasting.tv/',
@ -231,7 +231,7 @@ def find_dmu(x):
 class TwitCastingLiveIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<id>[^/]+)/?(?:[#?]|$)'
+    _VALID_URL = r'https?://(?:[^/?#]+\.)?twitcasting\.tv/(?P<id>[^/?#]+)/?(?:[#?]|$)'
    _TESTS = [{
        'url': 'https://twitcasting.tv/ivetesangalo',
        'only_matching': True,
@ -265,8 +265,15 @@ def _real_extract(self, url):
 class TwitCastingUserIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<id>[^/]+)/show/?(?:[#?]|$)'
+    _VALID_URL = r'https?://(?:[^/?#]+\.)?twitcasting\.tv/(?P<id>[^/?#]+)/(:?show|archive)/?(?:[#?]|$)'
    _TESTS = [{
        'url': 'https://twitcasting.tv/natsuiromatsuri/archive/',
        'info_dict': {
            'id': 'natsuiromatsuri',
            'title': 'natsuiromatsuri - Live History',
        },
        'playlist_mincount': 235,
    }, {
        'url': 'https://twitcasting.tv/noriyukicas/show',
        'only_matching': True,
    }]
--- a/yt_dlp/extractor/twitter.py
+++ b/yt_dlp/extractor/twitter.py
@ -1,9 +1,10 @@
 import functools
 import json
 import random
 import re
 from .common import InfoExtractor
 from .periscope import PeriscopeBaseIE, PeriscopeIE
 from ..compat import functools  # isort: split
 from ..compat import (
    compat_parse_qs,
    compat_urllib_parse_unquote,
@ -147,10 +148,14 @@ def _search_dimensions_in_video_url(a_format, video_url):
    def is_logged_in(self):
        return bool(self._get_cookies(self._API_BASE).get('auth_token'))
    @functools.cached_property
    def _selected_api(self):
        return self._configuration_arg('api', ['graphql'], ie_key='Twitter')[0]
    def _fetch_guest_token(self, display_id):
        guest_token = traverse_obj(self._download_json(
            f'{self._API_BASE}guest/activate.json', display_id, 'Downloading guest token', data=b'',
-            headers=self._set_base_headers(legacy=display_id and self._configuration_arg('legacy_api'))),
+            headers=self._set_base_headers(legacy=display_id and self._selected_api == 'legacy')),
            ('guest_token', {str}))
        if not guest_token:
            raise ExtractorError('Could not retrieve guest token')
@ -295,7 +300,7 @@ def input_dict(subtask_id, text):
        self.report_login()
    def _call_api(self, path, video_id, query={}, graphql=False):
-        headers = self._set_base_headers(legacy=not graphql and self._configuration_arg('legacy_api'))
+        headers = self._set_base_headers(legacy=not graphql and self._selected_api == 'legacy')
        headers.update({
            'x-twitter-auth-type': 'OAuth2Session',
            'x-twitter-client-language': 'en',
@ -707,6 +712,7 @@ class TwitterIE(TwitterBaseIE):
            'tags': [],
            'age_limit': 0,
        },
        'skip': 'This Tweet is unavailable',
    }, {
        # not available in Periscope
        'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
@ -721,6 +727,7 @@ class TwitterIE(TwitterBaseIE):
            'view_count': int,
        },
        'add_ie': ['TwitterBroadcast'],
        'skip': 'Broadcast no longer exists',
    }, {
        # unified card
        'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
@ -773,9 +780,9 @@ class TwitterIE(TwitterBaseIE):
        'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
        'info_dict': {
            'id': '1577719286659006464',
-            'title': 'Ultima📛 | #вʟм - Test',
+            'title': 'Ultima📛| New Era - Test',
            'description': 'Test https://t.co/Y3KEZD7Dad',
-            'uploader': 'Ultima📛 | #вʟм',
+            'uploader': 'Ultima📛| New Era',
            'uploader_id': 'UltimaShadowX',
            'uploader_url': 'https://twitter.com/UltimaShadowX',
            'upload_date': '20221005',
@ -811,7 +818,7 @@ class TwitterIE(TwitterBaseIE):
            'age_limit': 0,
        },
    }, {
-        # Adult content, fails if not logged in (GraphQL)
+        # Adult content, fails if not logged in
        'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
        'info_dict': {
            'id': '1575199163847000068',
@ -831,9 +838,10 @@ class TwitterIE(TwitterBaseIE):
            'age_limit': 18,
            'tags': []
        },
        'params': {'skip_download': 'The media could not be played'},
        'skip': 'Requires authentication',
    }, {
-        # Playlist result only with auth
+        # Playlist result only with graphql API
        'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
        'playlist_mincount': 2,
        'info_dict': {
@ -898,7 +906,7 @@ class TwitterIE(TwitterBaseIE):
            'uploader_id': 'MoniqueCamarra',
            'live_status': 'was_live',
            'release_timestamp': 1658417414,
-            'description': 'md5:4dc8e972f1d8b3c6580376fabb02a3ad',
+            'description': 'md5:acce559345fd49f129c20dbcda3f1201',
            'timestamp': 1658407771,
            'release_date': '20220721',
            'upload_date': '20220721',
@ -1007,10 +1015,10 @@ class TwitterIE(TwitterBaseIE):
            'view_count': int,
            'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
            'age_limit': 0,
-            'uploader': 'Mün The Friend Of YWAP',
+            'uploader': 'Mün',
            'repost_count': int,
            'upload_date': '20221206',
-            'title': 'Mün The Friend Of YWAP - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
+            'title': 'Mün - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
            'comment_count': int,
            'like_count': int,
            'tags': [],
@ -1019,7 +1027,7 @@ class TwitterIE(TwitterBaseIE):
            'timestamp': 1670306984.0,
        },
    }, {
-        # url to retweet id w/ legacy api
+        # retweeted_status (private)
        'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
        'info_dict': {
            'id': '1623274794488659969',
@ -1039,32 +1047,84 @@ class TwitterIE(TwitterBaseIE):
            'like_count': int,
            'repost_count': int,
        },
        'params': {'extractor_args': {'twitter': {'legacy_api': ['']}}},
        'skip': 'Protected tweet',
    }, {
-        # orig tweet w/ graphql
+        # retweeted_status
-        'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
+        'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
        'info_dict': {
-            'id': '1623274794488659969',
+            'id': '1694928337846538240',
            'display_id': '1623739803874349067',
            'ext': 'mp4',
-            'title': '@selfisekai@hackerspace.pl 🐀 - RT @Johnnybull3ts: Me after going viral to over 30million people:    Whoopsie-daisy',
+            'display_id': '1695424220702888009',
-            'description': 'md5:9258bdbb54793bdc124fe1cd47e96c6a',
+            'title': 'md5:e8daa9527bc2b947121395494f786d9d',
-            'uploader': '@selfisekai@hackerspace.pl 🐀',
+            'description': 'md5:004f2d37fd58737724ec75bc7e679938',
-            'uploader_id': 'liberdalau',
+            'uploader': 'Benny Johnson',
-            'uploader_url': 'https://twitter.com/liberdalau',
+            'uploader_id': 'bennyjohnson',
            'uploader_url': 'https://twitter.com/bennyjohnson',
            'age_limit': 0,
            'tags': [],
-            'duration': 8.033,
+            'duration': 45.001,
-            'timestamp': 1675964711.0,
+            'timestamp': 1692962814.0,
-            'upload_date': '20230209',
+            'upload_date': '20230825',
-            'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
+            'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
            'like_count': int,
            'view_count': int,
            'repost_count': int,
            'view_count': int,
            'comment_count': int,
        },
-        'skip': 'Protected tweet',
+    }, {
        # retweeted_status w/ legacy API
        'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
        'info_dict': {
            'id': '1694928337846538240',
            'ext': 'mp4',
            'display_id': '1695424220702888009',
            'title': 'md5:e8daa9527bc2b947121395494f786d9d',
            'description': 'md5:004f2d37fd58737724ec75bc7e679938',
            'uploader': 'Benny Johnson',
            'uploader_id': 'bennyjohnson',
            'uploader_url': 'https://twitter.com/bennyjohnson',
            'age_limit': 0,
            'tags': [],
            'duration': 45.001,
            'timestamp': 1692962814.0,
            'upload_date': '20230825',
            'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
            'like_count': int,
            'repost_count': int,
        },
        'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
    }, {
        # Broadcast embedded in tweet
        'url': 'https://twitter.com/JessicaDobsonWX/status/1693057346933600402',
        'info_dict': {
            'id': '1yNGaNLjEblJj',
            'ext': 'mp4',
            'title': 'Jessica Dobson - WAVE Weather Now - Saturday 8/19/23 Update',
            'uploader': 'Jessica Dobson',
            'uploader_id': '1DZEoDwDovRQa',
            'thumbnail': r're:^https?://.*\.jpg',
            'view_count': int,
        },
        'add_ie': ['TwitterBroadcast'],
    }, {
        # Animated gif and quote tweet video, with syndication API
        'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950',
        'playlist_mincount': 2,
        'info_dict': {
            'id': '1696256659889565950',
            'title': 'BAKOON - https://t.co/zom968d0a0',
            'description': 'https://t.co/zom968d0a0',
            'tags': [],
            'uploader': 'BAKOON',
            'uploader_id': 'BAKKOOONN',
            'uploader_url': 'https://twitter.com/BAKKOOONN',
            'age_limit': 18,
            'timestamp': 1693254077.0,
            'upload_date': '20230828',
            'like_count': int,
        },
        'params': {'extractor_args': {'twitter': {'api': ['syndication']}}},
        'expected_warnings': ['Not all metadata'],
    }, {
        # onion route
        'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
@ -1103,6 +1163,14 @@ class TwitterIE(TwitterBaseIE):
        'only_matching': True,
    }]
    _MEDIA_ID_RE = re.compile(r'_video/(\d+)/')
    @property
    def _GRAPHQL_ENDPOINT(self):
        if self.is_logged_in:
            return 'zZXycP0V6H7m-2r0mOnFcA/TweetDetail'
        return '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
    def _graphql_to_legacy(self, data, twid):
        result = traverse_obj(data, (
            'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
@ -1130,9 +1198,14 @@ def _graphql_to_legacy(self, data, twid):
            'user': ('core', 'user_results', 'result', 'legacy'),
            'card': ('card', 'legacy'),
            'quoted_status': ('quoted_status_result', 'result', 'legacy'),
            'retweeted_status': ('legacy', 'retweeted_status_result', 'result', 'legacy'),
        }, expected_type=dict, default={}))
-        # extra transformation is needed since result does not match legacy format
+        # extra transformations needed since result does not match legacy format
        if status.get('retweeted_status'):
            status['retweeted_status']['user'] = traverse_obj(status, (
                'retweeted_status_result', 'result', 'core', 'user_results', 'result', 'legacy', {dict})) or {}
        binding_values = {
            binding_value.get('key'): binding_value.get('value')
            for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
@ -1208,33 +1281,42 @@ def _build_graphql_query(self, media_id):
        }
    def _extract_status(self, twid):
-        if self.is_logged_in:
+        if self.is_logged_in or self._selected_api == 'graphql':
-            return self._graphql_to_legacy(
+            status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
                self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid), twid)
-        try:
+        elif self._selected_api == 'legacy':
-            if not self._configuration_arg('legacy_api'):
+            status = self._call_api(f'statuses/show/{twid}.json', twid, {
                return self._graphql_to_legacy(
                    self._call_graphql_api('2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId', twid), twid)
            return traverse_obj(self._call_api(f'statuses/show/{twid}.json', twid, {
                'cards_platform': 'Web-12',
                'include_cards': 1,
                'include_reply_count': 1,
                'include_user_entities': 0,
                'tweet_mode': 'extended',
-            }), 'retweeted_status', None)
+            })
-        except ExtractorError as e:
+        elif self._selected_api == 'syndication':
            if e.expected:
                raise
            self.report_warning(
-                f'{e.orig_msg}. Falling back to syndication endpoint; some metadata may be missing', twid)
+                'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
            status = self._download_json(
                'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
                headers={'User-Agent': 'Googlebot'}, query={
                    'id': twid,
                    # TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
                    'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
                })
            if not status:
                raise ExtractorError('Syndication endpoint returned empty JSON response')
            # Transform the result so its structure matches that of legacy/graphql
            media = []
            for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
                detail['id_str'] = traverse_obj(detail, (
                    'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
                media.append(detail)
            status['extended_entities'] = {'media': media}
-        status = self._download_json(
+        else:
-            'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
+            raise ExtractorError(f'"{self._selected_api}" is not a valid API selection', expected=True)
-            headers={'User-Agent': 'Googlebot'}, query={'id': twid})
+
-        status['extended_entities'] = {'media': status.get('mediaDetails')}
+        return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {}
        return status
    def _real_extract(self, url):
        twid, selected_index = self._match_valid_url(url).group('id', 'index')
@ -1266,10 +1348,7 @@ def _real_extract(self, url):
        }
        def extract_from_video_info(media):
-            media_id = traverse_obj(media, 'id_str', 'id', (
+            media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
                'video_info', 'variants', ..., 'url',
                {functools.partial(re.search, r'_video/(\d+)/')}, 1
            ), get_all=False, expected_type=str_or_none) or twid
            self.write_debug(f'Extracting from video info: {media_id}')
            formats = []
@ -1503,6 +1582,8 @@ def _real_extract(self, url):
        broadcast = self._call_api(
            'broadcasts/show.json', broadcast_id,
            {'ids': broadcast_id})['broadcasts'][broadcast_id]
        if not broadcast:
            raise ExtractorError('Broadcast no longer exists', expected=True)
        info = self._parse_broadcast_data(broadcast, broadcast_id)
        media_key = broadcast['media_key']
        source = self._call_api(
--- a/yt_dlp/extractor/videa.py
+++ b/yt_dlp/extractor/videa.py
@ -38,6 +38,7 @@ class VideaIE(InfoExtractor):
            'title': 'Az őrült kígyász 285 kígyót enged szabadon',
            'thumbnail': r're:^https?://.*',
            'duration': 21,
            'age_limit': 0,
        },
    }, {
        'url': 'http://videa.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH',
@ -48,6 +49,7 @@ class VideaIE(InfoExtractor):
            'title': 'Supercars előzés',
            'thumbnail': r're:^https?://.*',
            'duration': 64,
            'age_limit': 0,
        },
    }, {
        'url': 'http://videa.hu/player?v=8YfIAjxwWGwT8HVQ',
@ -58,6 +60,7 @@ class VideaIE(InfoExtractor):
            'title': 'Az őrült kígyász 285 kígyót enged szabadon',
            'thumbnail': r're:^https?://.*',
            'duration': 21,
            'age_limit': 0,
        },
    }, {
        'url': 'http://videa.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
@ -124,7 +127,7 @@ def _real_extract(self, url):
        query['_t'] = result[:16]
        b64_info, handle = self._download_webpage_handle(
-            'http://videa.hu/videaplayer_get_xml.php', video_id, query=query)
+            'http://videa.hu/player/xml', video_id, query=query)
        if b64_info.startswith('<?xml'):
            info = self._parse_xml(b64_info, video_id)
        else:
--- a/yt_dlp/extractor/wdr.py
+++ b/yt_dlp/extractor/wdr.py
@ -173,6 +173,7 @@ class WDRPageIE(WDRIE):  # XXX: Do not subclass from concrete IE
            'skip': 'HTTP Error 404: Not Found',
        },
        {
            # FIXME: Asset JSON is directly embedded in webpage
            'url': 'http://www1.wdr.de/mediathek/video/live/index.html',
            'info_dict': {
                'id': 'mdb-2296252',
@ -221,6 +222,8 @@ class WDRPageIE(WDRIE):  # XXX: Do not subclass from concrete IE
                'id': 'mdb-869971',
                'ext': 'mp4',
                'title': r're:^COSMO Livestream [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
                'alt_title': 'COSMO Livestream',
                'live_status': 'is_live',
                'upload_date': '20160101',
            },
            'params': {
@ -248,6 +251,16 @@ class WDRPageIE(WDRIE):  # XXX: Do not subclass from concrete IE
            'url': 'https://kinder.wdr.de/tv/die-sendung-mit-dem-elefanten/av/video-folge---astronaut-100.html',
            'only_matching': True,
        },
        {
            'url': 'https://www1.wdr.de/mediathek/video/sendungen/rockpalast/video-baroness---freak-valley-festival--100.html',
            'info_dict': {
                'id': 'mdb-2741028',
                'ext': 'mp4',
                'title': 'Baroness - Freak Valley Festival 2022',
                'alt_title': 'Rockpalast',
                'upload_date': '20220725',
            },
        }
    ]
    def _real_extract(self, url):
@ -259,7 +272,7 @@ def _real_extract(self, url):
        # Article with several videos
-        # for wdr.de the data-extension is in a tag with the class "mediaLink"
+        # for wdr.de the data-extension-ard is in a tag with the class "mediaLink"
        # for wdr.de radio players, in a tag with the class "wdrrPlayerPlayBtn"
        # for wdrmaus, in a tag with the class "videoButton" (previously a link
        # to the page in a multiline "videoLink"-tag)
@ -268,7 +281,7 @@ def _real_extract(self, url):
                    (?:
                        (["\'])(?:mediaLink|wdrrPlayerPlayBtn|videoButton)\b.*?\1[^>]+|
                        (["\'])videoLink\b.*?\2[\s]*>\n[^\n]*
-                    )data-extension=(["\'])(?P<data>(?:(?!\3).)+)\3
+                    )data-extension(?:-ard)?=(["\'])(?P<data>(?:(?!\3).)+)\3
                    ''', webpage):
            media_link_obj = self._parse_json(
                mobj.group('data'), display_id, transform_source=js_to_json,
@ -295,7 +308,7 @@ def _real_extract(self, url):
                    compat_urlparse.urljoin(url, mobj.group('href')),
                    ie=WDRPageIE.ie_key())
                for mobj in re.finditer(
-                    r'<a[^>]+\bhref=(["\'])(?P<href>(?:(?!\1).)+)\1[^>]+\bdata-extension=',
+                    r'<a[^>]+\bhref=(["\'])(?P<href>(?:(?!\1).)+)\1[^>]+\bdata-extension(?:-ard)?=',
                    webpage) if re.match(self._PAGE_REGEX, mobj.group('href'))
            ]
--- a/yt_dlp/extractor/weibo.py
+++ b/yt_dlp/extractor/weibo.py
@ -1,134 +1,241 @@
 from .common import InfoExtractor
 import json
 import random
-import re
+import itertools
 import urllib.parse
-from ..compat import (
+from .common import InfoExtractor
    compat_parse_qs,
    compat_str,
 )
 from ..utils import (
-    js_to_json,
+    int_or_none,
    make_archive_id,
    mimetype2ext,
    parse_resolution,
    str_or_none,
    strip_jsonp,
    traverse_obj,
    url_or_none,
    urlencode_postdata,
    urljoin,
 )
-class WeiboIE(InfoExtractor):
+class WeiboBaseIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?weibo\.com/[0-9]+/(?P<id>[a-zA-Z0-9]+)'
+    def _update_visitor_cookies(self, video_id):
-    _TEST = {
+        visitor_data = self._download_json(
-        'url': 'https://weibo.com/6275294458/Fp6RGfbff?type=comment',
+            'https://passport.weibo.com/visitor/genvisitor', video_id,
-        'info_dict': {
+            note='Generating first-visit guest request',
-            'id': 'Fp6RGfbff',
+            transform_source=strip_jsonp,
-            'ext': 'mp4',
+            data=urlencode_postdata({
-            'title': 'You should have servants to massage you,... 来自Hosico_猫 - 微博',
+                'cb': 'gen_callback',
-        }
+                'fp': '{"os":"2","browser":"Gecko57,0,0,0","fonts":"undefined","screenInfo":"1440*900*24","plugins":""}',
-    }
+            }))
-    def _real_extract(self, url):
+        self._download_webpage(
-        video_id = self._match_id(url)
+            'https://passport.weibo.com/visitor/visitor', video_id,
-        # to get Referer url for genvisitor
+            note='Running first-visit callback to get guest cookies',
-        webpage, urlh = self._download_webpage_handle(url, video_id)
+            query={
-
+                'a': 'incarnate',
-        visitor_url = urlh.url
+                't': visitor_data['data']['tid'],
-
+                'w': 2,
-        if 'passport.weibo.com' in visitor_url:
+                'c': '%03d' % visitor_data['data']['confidence'],
-            # first visit
+                'cb': 'cross_domain',
-            visitor_data = self._download_json(
+                'from': 'weibo',
-                'https://passport.weibo.com/visitor/genvisitor', video_id,
+                '_rand': random.random(),
                note='Generating first-visit data',
                transform_source=strip_jsonp,
                headers={'Referer': visitor_url},
                data=urlencode_postdata({
                    'cb': 'gen_callback',
                    'fp': json.dumps({
                        'os': '2',
                        'browser': 'Gecko57,0,0,0',
                        'fonts': 'undefined',
                        'screenInfo': '1440*900*24',
                        'plugins': '',
                    }),
                }))
            tid = visitor_data['data']['tid']
            cnfd = '%03d' % visitor_data['data']['confidence']
            self._download_webpage(
                'https://passport.weibo.com/visitor/visitor', video_id,
                note='Running first-visit callback',
                query={
                    'a': 'incarnate',
                    't': tid,
                    'w': 2,
                    'c': cnfd,
                    'cb': 'cross_domain',
                    'from': 'weibo',
                    '_rand': random.random(),
                })
            webpage = self._download_webpage(
                url, video_id, note='Revisiting webpage')
        title = self._html_extract_title(webpage)
        video_formats = compat_parse_qs(self._search_regex(
            r'video-sources=\\\"(.+?)\"', webpage, 'video_sources'))
        formats = []
        supported_resolutions = (480, 720)
        for res in supported_resolutions:
            vid_urls = video_formats.get(compat_str(res))
            if not vid_urls or not isinstance(vid_urls, list):
                continue
            vid_url = vid_urls[0]
            formats.append({
                'url': vid_url,
                'height': res,
            })
-        uploader = self._og_search_property(
+    def _weibo_download_json(self, url, video_id, *args, fatal=True, note='Downloading JSON metadata', **kwargs):
-            'nick-name', webpage, 'uploader', default=None)
+        webpage, urlh = self._download_webpage_handle(url, video_id, *args, fatal=fatal, note=note, **kwargs)
        if urllib.parse.urlparse(urlh.url).netloc == 'passport.weibo.com':
            self._update_visitor_cookies(video_id)
            webpage = self._download_webpage(url, video_id, *args, fatal=fatal, note=note, **kwargs)
        return self._parse_json(webpage, video_id, fatal=fatal)
    def _extract_formats(self, video_info):
        media_info = traverse_obj(video_info, ('page_info', 'media_info'))
        formats = traverse_obj(media_info, (
            'playback_list', lambda _, v: url_or_none(v['play_info']['url']), 'play_info', {
                'url': 'url',
                'format': ('quality_desc', {str}),
                'format_id': ('label', {str}),
                'ext': ('mime', {mimetype2ext}),
                'tbr': ('bitrate', {int_or_none}, {lambda x: x or None}),
                'vcodec': ('video_codecs', {str}),
                'fps': ('fps', {int_or_none}),
                'width': ('width', {int_or_none}),
                'height': ('height', {int_or_none}),
                'filesize': ('size', {int_or_none}),
                'acodec': ('audio_codecs', {str}),
                'asr': ('audio_sample_rate', {int_or_none}),
                'audio_channels': ('audio_channels', {int_or_none}),
            }))
        if not formats:  # fallback, should be barely used
            for url in set(traverse_obj(media_info, (..., {url_or_none}))):
                if 'label=' in url:  # filter out non-video urls
                    format_id, resolution = self._search_regex(
                        r'label=(\w+)&template=(\d+x\d+)', url, 'format info',
                        group=(1, 2), default=(None, None))
                    formats.append({
                        'url': url,
                        'format_id': format_id,
                        **parse_resolution(resolution),
                        **traverse_obj(media_info, (
                            'video_details', lambda _, v: v['label'].startswith(format_id), {
                                'size': ('size', {int_or_none}),
                                'tbr': ('bitrate', {int_or_none}),
                            }
                        ), get_all=False),
                    })
        return formats
    def _parse_video_info(self, video_info, video_id=None):
        return {
            'id': video_id,
-            'title': title,
+            'extractor_key': WeiboIE.ie_key(),
-            'uploader': uploader,
+            'extractor': WeiboIE.IE_NAME,
-            'formats': formats
+            'formats': self._extract_formats(video_info),
            'http_headers': {'Referer': 'https://weibo.com/'},
            '_old_archive_ids': [make_archive_id('WeiboMobile', video_id)],
            **traverse_obj(video_info, {
                'id': (('id', 'id_str', 'mid'), {str_or_none}),
                'display_id': ('mblogid', {str_or_none}),
                'title': ('page_info', 'media_info', ('video_title', 'kol_title', 'name'), {str}, {lambda x: x or None}),
                'description': ('text_raw', {str}),
                'duration': ('page_info', 'media_info', 'duration', {int_or_none}),
                'timestamp': ('page_info', 'media_info', 'video_publish_time', {int_or_none}),
                'thumbnail': ('page_info', 'page_pic', {url_or_none}),
                'uploader': ('user', 'screen_name', {str}),
                'uploader_id': ('user', ('id', 'id_str'), {str_or_none}),
                'uploader_url': ('user', 'profile_url', {lambda x: urljoin('https://weibo.com/', x)}),
                'view_count': ('page_info', 'media_info', 'online_users_number', {int_or_none}),
                'like_count': ('attitudes_count', {int_or_none}),
                'repost_count': ('reposts_count', {int_or_none}),
            }, get_all=False),
            'tags': traverse_obj(video_info, ('topic_struct', ..., 'topic_title', {str})) or None,
        }
-class WeiboMobileIE(InfoExtractor):
+class WeiboIE(WeiboBaseIE):
-    _VALID_URL = r'https?://m\.weibo\.cn/status/(?P<id>[0-9]+)(\?.+)?'
+    _VALID_URL = r'https?://(?:m\.weibo\.cn/status|(?:www\.)?weibo\.com/\d+)/(?P<id>[a-zA-Z0-9]+)'
-    _TEST = {
+    _TESTS = [{
-        'url': 'https://m.weibo.cn/status/4189191225395228?wm=3333_2001&sourcetype=weixin&featurecode=newtitle&from=singlemessage&isappinstalled=0',
+        'url': 'https://weibo.com/7827771738/N4xlMvjhI',
        'info_dict': {
            'id': '4910815147462302',
            'ext': 'mp4',
            'display_id': 'N4xlMvjhI',
            'title': '【睡前消息暑假版第一期：拉泰国一把  对中国有好处】',
            'description': 'md5:e2637a7673980d68694ea7c43cf12a5f',
            'duration': 918,
            'timestamp': 1686312819,
            'upload_date': '20230609',
            'thumbnail': r're:https://.*\.jpg',
            'uploader': '睡前视频基地',
            'uploader_id': '7827771738',
            'uploader_url': 'https://weibo.com/u/7827771738',
            'view_count': int,
            'like_count': int,
            'repost_count': int,
            'tags': ['泰国大选远进党获胜', '睡前消息', '暑期版'],
        },
    }, {
        'url': 'https://m.weibo.cn/status/4189191225395228',
        'info_dict': {
            'id': '4189191225395228',
            'ext': 'mp4',
-            'title': '午睡当然是要甜甜蜜蜜的啦',
+            'display_id': 'FBqgOmDxO',
-            'uploader': '柴犬柴犬'
+            'title': '柴犬柴犬的秒拍视频',
            'description': 'md5:80f461ab5cdae6bbdb70efbf5a1db24f',
            'duration': 53,
            'timestamp': 1514264429,
            'upload_date': '20171226',
            'thumbnail': r're:https://.*\.jpg',
            'uploader': '柴犬柴犬',
            'uploader_id': '5926682210',
            'uploader_url': 'https://weibo.com/u/5926682210',
            'view_count': int,
            'like_count': int,
            'repost_count': int,
        }
-    }
+    }, {
        'url': 'https://weibo.com/0/4224132150961381',
        'note': 'no playback_list example',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        # to get Referer url for genvisitor
        webpage = self._download_webpage(url, video_id, note='visit the page')
-        weibo_info = self._parse_json(self._search_regex(
+        return self._parse_video_info(self._weibo_download_json(
-            r'var\s+\$render_data\s*=\s*\[({.*})\]\[0\]\s*\|\|\s*{};',
+            f'https://weibo.com/ajax/statuses/show?id={video_id}', video_id))
            webpage, 'js_code', flags=re.DOTALL),
            video_id, transform_source=js_to_json)
        status_data = weibo_info.get('status', {})
        page_info = status_data.get('page_info')
        title = status_data['status_title']
        uploader = status_data.get('user', {}).get('screen_name')
-        return {
+class WeiboVideoIE(WeiboBaseIE):
-            'id': video_id,
+    _VALID_URL = r'https?://(?:www\.)?weibo\.com/tv/show/(?P<id>\d+:\d+)'
-            'title': title,
+    _TESTS = [{
-            'uploader': uploader,
+        'url': 'https://weibo.com/tv/show/1034:4797699866951785?from=old_pc_videoshow',
-            'url': page_info['media_info']['stream_url']
+        'info_dict': {
            'id': '4797700463137878',
            'ext': 'mp4',
            'display_id': 'LEZDodaiW',
            'title': '呃，稍微了解了一下靡烟miya，感觉这东西也太二了',
            'description': '呃，稍微了解了一下靡烟miya，感觉这东西也太二了 http://t.cn/A6aerGsM ',
            'duration': 76,
            'timestamp': 1659344278,
            'upload_date': '20220801',
            'thumbnail': r're:https://.*\.jpg',
            'uploader': '君子爱财陈平安',
            'uploader_id': '3905382233',
            'uploader_url': 'https://weibo.com/u/3905382233',
            'view_count': int,
            'like_count': int,
            'repost_count': int,
        }
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        post_data = f'data={{"Component_Play_Playinfo":{{"oid":"{video_id}"}}}}'.encode()
        video_info = self._weibo_download_json(
            f'https://weibo.com/tv/api/component?page=%2Ftv%2Fshow%2F{video_id.replace(":", "%3A")}',
            video_id, headers={'Referer': url}, data=post_data)['data']['Component_Play_Playinfo']
        return self.url_result(f'https://weibo.com/0/{video_info["mid"]}', WeiboIE)
 class WeiboUserIE(WeiboBaseIE):
    _VALID_URL = r'https?://(?:www\.)?weibo\.com/u/(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://weibo.com/u/2066652961?tabtype=video',
        'info_dict': {
            'id': '2066652961',
            'title': '萧影殿下的视频',
            'description': '萧影殿下的全部视频',
            'uploader': '萧影殿下',
        },
        'playlist_mincount': 195,
    }]
    def _fetch_page(self, uid, cursor=0, page=1):
        return self._weibo_download_json(
            'https://weibo.com/ajax/profile/getWaterFallContent',
            uid, note=f'Downloading videos page {page}',
            query={'uid': uid, 'cursor': cursor})['data']
    def _entries(self, uid, first_page):
        cursor = 0
        for page in itertools.count(1):
            response = first_page if page == 1 else self._fetch_page(uid, cursor, page)
            for video_info in traverse_obj(response, ('list', ..., {dict})):
                yield self._parse_video_info(video_info)
            cursor = response.get('next_cursor')
            if (int_or_none(cursor) or -1) < 0:
                break
    def _real_extract(self, url):
        uid = self._match_id(url)
        first_page = self._fetch_page(uid)
        uploader = traverse_obj(first_page, ('list', ..., 'user', 'screen_name', {str}), get_all=False)
        metainfo = {
            'title': f'{uploader}的视频',
            'description': f'{uploader}的全部视频',
            'uploader': uploader,
        } if uploader else {}
        return self.playlist_result(self._entries(uid, first_page), uid, **metainfo)
--- a/yt_dlp/extractor/zaiko.py
+++ b/yt_dlp/extractor/zaiko.py
@ -9,6 +9,7 @@
    traverse_obj,
    try_call,
    unescapeHTML,
    url_basename,
    url_or_none,
 )
@ -45,12 +46,14 @@ class ZaikoIE(ZaikoBaseIE):
            'uploader_id': '454',
            'uploader': 'ZAIKO ZERO',
            'release_timestamp': 1583809200,
-            'thumbnail': r're:https://[a-z0-9]+.cloudfront.net/[a-z0-9_]+/[a-z0-9_]+',
+            'thumbnail': r're:^https://[\w.-]+/\w+/\w+',
            'thumbnails': 'maxcount:2',
            'release_date': '20200310',
            'categories': ['Tech House'],
            'live_status': 'was_live',
        },
        'params': {'skip_download': 'm3u8'},
        'skip': 'Your account does not have tickets to this event',
    }]
    def _real_extract(self, url):
@ -83,6 +86,12 @@ def _real_extract(self, url):
        if not formats:
            self.raise_no_formats(msg, expected=expected)
        thumbnail_urls = [
            traverse_obj(player_meta, ('initial_event_info', 'poster_url')),
            self._og_search_thumbnail(self._download_webpage(
                f'https://zaiko.io/event/{video_id}', video_id, 'Downloading event page', fatal=False) or ''),
        ]
        return {
            'id': video_id,
            'formats': formats,
@ -96,8 +105,8 @@ def _real_extract(self, url):
            }),
            **traverse_obj(player_meta, ('initial_event_info', {
                'alt_title': ('title', {str}),
                'thumbnail': ('poster_url', {url_or_none}),
            })),
            'thumbnails': [{'url': url, 'id': url_basename(url)} for url in thumbnail_urls if url_or_none(url)]
        }
--- a/yt_dlp/extractor/zoom.py
+++ b/yt_dlp/extractor/zoom.py
@ -127,6 +127,7 @@ def _real_extract(self, url):
        return {
            'id': video_id,
            'title': str_or_none(traverse_obj(data, ('meet', 'topic'))),
            'duration': int_or_none(data.get('duration')),
            'subtitles': subtitles,
            'formats': formats,
            'http_headers': {
--- a/yt_dlp/networking/_helper.py
+++ b/yt_dlp/networking/_helper.py
@ -2,6 +2,7 @@
 import contextlib
 import functools
 import socket
 import ssl
 import sys
 import typing
@ -206,3 +207,59 @@ def wrapper(self, *args, **kwargs):
                e.handler = self
            raise
    return wrapper
 def _socket_connect(ip_addr, timeout, source_address):
    af, socktype, proto, canonname, sa = ip_addr
    sock = socket.socket(af, socktype, proto)
    try:
        if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
            sock.settimeout(timeout)
        if source_address:
            sock.bind(source_address)
        sock.connect(sa)
        return sock
    except socket.error:
        sock.close()
        raise
 def create_connection(
    address,
    timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
    source_address=None,
    *,
    _create_socket_func=_socket_connect
 ):
    # Work around socket.create_connection() which tries all addresses from getaddrinfo() including IPv6.
    # This filters the addresses based on the given source_address.
    # Based on: https://github.com/python/cpython/blob/main/Lib/socket.py#L810
    host, port = address
    ip_addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
    if not ip_addrs:
        raise socket.error('getaddrinfo returns an empty list')
    if source_address is not None:
        af = socket.AF_INET if ':' not in source_address[0] else socket.AF_INET6
        ip_addrs = [addr for addr in ip_addrs if addr[0] == af]
        if not ip_addrs:
            raise OSError(
                f'No remote IPv{4 if af == socket.AF_INET else 6} addresses available for connect. '
                f'Can\'t use "{source_address[0]}" as source address')
    err = None
    for ip_addr in ip_addrs:
        try:
            sock = _create_socket_func(ip_addr, timeout, source_address)
            # Explicitly break __traceback__ reference cycle
            # https://bugs.python.org/issue36820
            err = None
            return sock
        except socket.error as e:
            err = e
    try:
        raise err
    finally:
        # Explicitly break __traceback__ reference cycle
        # https://bugs.python.org/issue36820
        err = None
--- a/yt_dlp/networking/_urllib.py
+++ b/yt_dlp/networking/_urllib.py
@ -23,6 +23,7 @@
 from ._helper import (
    InstanceStoreMixin,
    add_accept_encoding_header,
    create_connection,
    get_redirect_method,
    make_socks_proxy_opts,
    select_proxy,
@ -54,44 +55,10 @@
 def _create_http_connection(http_class, source_address, *args, **kwargs):
    hc = http_class(*args, **kwargs)
    if hasattr(hc, '_create_connection'):
        hc._create_connection = create_connection
    if source_address is not None:
        # This is to workaround _create_connection() from socket where it will try all
        # address data from getaddrinfo() including IPv6. This filters the result from
        # getaddrinfo() based on the source_address value.
        # This is based on the cpython socket.create_connection() function.
        # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
        def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
            host, port = address
            err = None
            addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
            af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
            ip_addrs = [addr for addr in addrs if addr[0] == af]
            if addrs and not ip_addrs:
                ip_version = 'v4' if af == socket.AF_INET else 'v6'
                raise OSError(
                    "No remote IP%s addresses available for connect, can't use '%s' as source address"
                    % (ip_version, source_address[0]))
            for res in ip_addrs:
                af, socktype, proto, canonname, sa = res
                sock = None
                try:
                    sock = socket.socket(af, socktype, proto)
                    if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
                        sock.settimeout(timeout)
                    sock.bind(source_address)
                    sock.connect(sa)
                    err = None  # Explicitly break reference cycle
                    return sock
                except OSError as _:
                    err = _
                    if sock is not None:
                        sock.close()
            if err is not None:
                raise err
            else:
                raise OSError('getaddrinfo returns an empty list')
        if hasattr(hc, '_create_connection'):
            hc._create_connection = _create_connection
        hc.source_address = (source_address, 0)
    return hc
@ -220,13 +187,28 @@ def make_socks_conn_class(base_class, socks_proxy):
    proxy_args = make_socks_proxy_opts(socks_proxy)
    class SocksConnection(base_class):
-        def connect(self):
+        _create_connection = create_connection
            self.sock = sockssocket()
            self.sock.setproxy(**proxy_args)
            if type(self.timeout) in (int, float):  # noqa: E721
                self.sock.settimeout(self.timeout)
            self.sock.connect((self.host, self.port))
        def connect(self):
            def sock_socket_connect(ip_addr, timeout, source_address):
                af, socktype, proto, canonname, sa = ip_addr
                sock = sockssocket(af, socktype, proto)
                try:
                    connect_proxy_args = proxy_args.copy()
                    connect_proxy_args.update({'addr': sa[0], 'port': sa[1]})
                    sock.setproxy(**connect_proxy_args)
                    if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:  # noqa: E721
                        sock.settimeout(timeout)
                    if source_address:
                        sock.bind(source_address)
                    sock.connect((self.host, self.port))
                    return sock
                except socket.error:
                    sock.close()
                    raise
            self.sock = create_connection(
                (proxy_args['addr'], proxy_args['port']), timeout=self.timeout,
                source_address=self.source_address, _create_socket_func=sock_socket_connect)
            if isinstance(self, http.client.HTTPSConnection):
                self.sock = self._context.wrap_socket(self.sock, server_hostname=self.host)
@ -429,7 +411,7 @@ def _send(self, request):
        except urllib.error.HTTPError as e:
            if isinstance(e.fp, (http.client.HTTPResponse, urllib.response.addinfourl)):
                # Prevent file object from being closed when urllib.error.HTTPError is destroyed.
-                e._closer.file = None
+                e._closer.close_called = True
                raise HTTPError(UrllibResponseAdapter(e.fp), redirect_loop='redirect error' in str(e)) from e
            raise  # unexpected
        except urllib.error.URLError as e:
--- a/yt_dlp/networking/exceptions.py
+++ b/yt_dlp/networking/exceptions.py
@ -115,7 +115,7 @@ def __init__(self, http_error: HTTPError):
            hdrs=http_error.response.headers,
            fp=http_error.response
        )
-        self._closer.file = None  # Disable auto close
+        self._closer.close_called = True  # Disable auto close
        self._http_error = http_error
        HTTPError.__init__(self, http_error.response, redirect_loop=http_error.redirect_loop)
--- a/yt_dlp/socks.py
+++ b/yt_dlp/socks.py
@ -134,26 +134,31 @@ def _check_response_version(self, expected_version, got_version):
            self.close()
            raise InvalidVersionError(expected_version, got_version)
-    def _resolve_address(self, destaddr, default, use_remote_dns):
+    def _resolve_address(self, destaddr, default, use_remote_dns, family=None):
-        try:
+        for f in (family,) if family else (socket.AF_INET, socket.AF_INET6):
-            return socket.inet_aton(destaddr)
+            try:
-        except OSError:
+                return f, socket.inet_pton(f, destaddr)
-            if use_remote_dns and self._proxy.remote_dns:
+            except OSError:
-                return default
+                continue
-            else:
+
-                return socket.inet_aton(socket.gethostbyname(destaddr))
+        if use_remote_dns and self._proxy.remote_dns:
            return 0, default
        else:
            res = socket.getaddrinfo(destaddr, None, family=family or 0)
            f, _, _, _, ipaddr = res[0]
            return f, socket.inet_pton(f, ipaddr[0])
    def _setup_socks4(self, address, is_4a=False):
        destaddr, port = address
-        ipaddr = self._resolve_address(destaddr, SOCKS4_DEFAULT_DSTIP, use_remote_dns=is_4a)
+        _, ipaddr = self._resolve_address(destaddr, SOCKS4_DEFAULT_DSTIP, use_remote_dns=is_4a, family=socket.AF_INET)
        packet = struct.pack('!BBH', SOCKS4_VERSION, Socks4Command.CMD_CONNECT, port) + ipaddr
        username = (self._proxy.username or '').encode()
        packet += username + b'\x00'
-        if is_4a and self._proxy.remote_dns:
+        if is_4a and self._proxy.remote_dns and ipaddr == SOCKS4_DEFAULT_DSTIP:
            packet += destaddr.encode() + b'\x00'
        self.sendall(packet)
@ -210,7 +215,7 @@ def _socks5_auth(self):
    def _setup_socks5(self, address):
        destaddr, port = address
-        ipaddr = self._resolve_address(destaddr, None, use_remote_dns=True)
+        family, ipaddr = self._resolve_address(destaddr, None, use_remote_dns=True)
        self._socks5_auth()
@ -220,8 +225,10 @@ def _setup_socks5(self, address):
            destaddr = destaddr.encode()
            packet += struct.pack('!B', Socks5AddressType.ATYP_DOMAINNAME)
            packet += self._len_and_data(destaddr)
-        else:
+        elif family == socket.AF_INET:
            packet += struct.pack('!B', Socks5AddressType.ATYP_IPV4) + ipaddr
        elif family == socket.AF_INET6:
            packet += struct.pack('!B', Socks5AddressType.ATYP_IPV6) + ipaddr
        packet += struct.pack('!H', port)
        self.sendall(packet)
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@ -669,6 +669,7 @@ def replace_insane(char):
 def sanitize_path(s, force=False):
    """Sanitizes and normalizes path on Windows"""
    # XXX: this handles drive relative paths (c:sth) incorrectly
    if sys.platform == 'win32':
        force = False
        drive_or_unc, _ = os.path.splitdrive(s)
@ -687,7 +688,10 @@ def sanitize_path(s, force=False):
        sanitized_path.insert(0, drive_or_unc + os.path.sep)
    elif force and s and s[0] == os.path.sep:
        sanitized_path.insert(0, os.path.sep)
-    return os.path.join(*sanitized_path)
+    # TODO: Fix behavioral differences <3.12
    # The workaround using `normpath` only superficially passes tests
    # Ref: https://github.com/python/cpython/pull/100351
    return os.path.normpath(os.path.join(*sanitized_path))
 def sanitize_url(url, *, scheme='http'):
@ -1256,7 +1260,7 @@ def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
    if precision == 'auto':
        auto_precision = True
        precision = 'microsecond'
-    today = datetime_round(datetime.datetime.utcnow(), precision)
+    today = datetime_round(datetime.datetime.now(datetime.timezone.utc), precision)
    if date_str in ('now', 'today'):
        return today
    if date_str == 'yesterday':
@ -1319,8 +1323,8 @@ def datetime_round(dt, precision='day'):
        'second': 1,
    }
    roundto = lambda x, n: ((x + n / 2) // n) * n
-    timestamp = calendar.timegm(dt.timetuple())
+    timestamp = roundto(calendar.timegm(dt.timetuple()), unit_seconds[precision])
-    return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
+    return datetime.datetime.fromtimestamp(timestamp, datetime.timezone.utc)
 def hyphenate_date(date_str):
@ -2847,6 +2851,7 @@ def mimetype2ext(mt, default=NO_DEFAULT):
        'quicktime': 'mov',
        'webm': 'webm',
        'vp9': 'vp9',
        'video/ogg': 'ogv',
        'x-flv': 'flv',
        'x-m4v': 'm4v',
        'x-matroska': 'mkv',