Merge remote-tracking branch 'upstream/master'

2023-09-21 22:58:17 +02:00 · 2023-09-21 22:58:17 +02:00 · d8d31be98e
parent 86d98dab3b 2269065ad6
commit d8d31be98e
68 changed files with 2986 additions and 1077 deletions
--- a/.github/workflows/core.yml
+++ b/.github/workflows/core.yml
@ -13,13 +13,16 @@ jobs:
      matrix:
        os: [ubuntu-latest]
        # CPython 3.11 is in quick-test
-        python-version: ['3.8', '3.9', '3.10', pypy-3.7, pypy-3.8]
+        python-version: ['3.8', '3.9', '3.10', '3.12-dev', pypy-3.7, pypy-3.8, pypy-3.10]
        run-tests-ext: [sh]
        include:
        # atleast one of each CPython/PyPy tests must be in windows
        - os: windows-latest
          python-version: '3.7'
          run-tests-ext: bat
+        - os: windows-latest
+          python-version: '3.12-dev'
+          run-tests-ext: bat
        - os: windows-latest
          python-version: pypy-3.9
          run-tests-ext: bat
--- a/README.md
+++ b/README.md
@ -76,7 +76,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t

 # NEW FEATURES

-* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@42f2d4**](https://github.com/ytdl-org/youtube-dl/commit/07af47960f3bb262ead02490ce65c8c45c01741e) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))
+* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@66ab08**](https://github.com/ytdl-org/youtube-dl/commit/66ab0814c4baa2dc79c2dd5287bc0ad61a37c5b9) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))

 * **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in YouTube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API

@ -1854,7 +1854,7 @@ The following extractors use this feature:
 * `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks`

 #### twitter
-* `legacy_api`: Force usage of the legacy Twitter API instead of the GraphQL API for tweet extraction. Has no effect if login cookies are passed
+* `api`: Select one of `graphql` (default), `legacy` or `syndication` as the API for tweet extraction. Has no effect if logged in

 #### stacommu, wrestleuniverse
 * `device_id`: UUID value assigned by the website and used to enforce device limits for paid livestream content. Can be found in browser local storage
--- a/devscripts/changelog_override.json
+++ b/devscripts/changelog_override.json
@ -68,6 +68,25 @@
    {
        "action": "change",
        "when": "b03fa7834579a01cc5fba48c0e73488a16683d48",
-        "short": "[ie/twitter] Revert 92315c03774cfabb3a921884326beb4b981f786b"
+        "short": "[ie/twitter] Revert 92315c03774cfabb3a921884326beb4b981f786b",
+        "authors": ["pukkandan"]
+    },
+    {
+        "action": "change",
+        "when": "fcd6a76adc49d5cd8783985c7ce35384b72e545f",
+        "short": "[test] Add tests for socks proxies (#7908)",
+        "authors": ["coletdjnz"]
+    },
+    {
+        "action": "change",
+        "when": "4bf912282a34b58b6b35d8f7e6be535770c89c76",
+        "short": "[rh:urllib] Remove dot segments during URL normalization (#7662)",
+        "authors": ["coletdjnz"]
+    },
+    {
+        "action": "change",
+        "when": "59e92b1f1833440bb2190f847eb735cf0f90bc85",
+        "short": "[rh:urllib] Simplify gzip decoding (#7611)",
+        "authors": ["Grub4K"]
    }
 ]
--- a/devscripts/make_changelog.py
+++ b/devscripts/make_changelog.py
@ -31,35 +31,27 @@ class CommitGroup(enum.Enum):
    EXTRACTOR = 'Extractor'
    DOWNLOADER = 'Downloader'
    POSTPROCESSOR = 'Postprocessor'
+    NETWORKING = 'Networking'
    MISC = 'Misc.'

-    @classmethod
-    @property
-    def ignorable_prefixes(cls):
-        return ('core', 'downloader', 'extractor', 'misc', 'postprocessor', 'upstream')
-
    @classmethod
    @lru_cache
-    def commit_lookup(cls):
+    def subgroup_lookup(cls):
        return {
            name: group
            for group, names in {
-                cls.PRIORITY: {'priority'},
                cls.CORE: {
                    'aes',
                    'cache',
                    'compat_utils',
                    'compat',
                    'cookies',
-                    'core',
                    'dependencies',
                    'formats',
                    'jsinterp',
-                    'networking',
                    'outtmpl',
                    'plugins',
                    'update',
-                    'upstream',
                    'utils',
                },
                cls.MISC: {
@ -67,23 +59,40 @@ class CommitGroup(enum.Enum):
                    'cleanup',
                    'devscripts',
                    'docs',
-                    'misc',
                    'test',
                },
-                cls.EXTRACTOR: {'extractor', 'ie'},
-                cls.DOWNLOADER: {'downloader', 'fd'},
-                cls.POSTPROCESSOR: {'postprocessor', 'pp'},
+                cls.NETWORKING: {
+                    'rh',
+                },
            }.items()
            for name in names
        }

    @classmethod
-    def get(cls, value):
-        result = cls.commit_lookup().get(value)
-        if result:
-            logger.debug(f'Mapped {value!r} => {result.name}')
+    @lru_cache
+    def group_lookup(cls):
+        result = {
+            'fd': cls.DOWNLOADER,
+            'ie': cls.EXTRACTOR,
+            'pp': cls.POSTPROCESSOR,
+            'upstream': cls.CORE,
+        }
+        result.update({item.name.lower(): item for item in iter(cls)})
        return result

+    @classmethod
+    def get(cls, value: str) -> tuple[CommitGroup | None, str | None]:
+        group, _, subgroup = (group.strip().lower() for group in value.partition('/'))
+
+        result = cls.group_lookup().get(group)
+        if not result:
+            if subgroup:
+                return None, value
+            subgroup = group
+            result = cls.subgroup_lookup().get(subgroup)
+
+        return result, subgroup or None
+

@dataclass
 class Commit:
@ -198,19 +207,23 @@ class Changelog:
        for commit_infos in cleanup_misc_items.values():
            sorted_items.append(CommitInfo(
                'cleanup', ('Miscellaneous',), ', '.join(
-                    self._format_message_link(None, info.commit.hash).strip()
+                    self._format_message_link(None, info.commit.hash)
                    for info in sorted(commit_infos, key=lambda item: item.commit.hash or '')),
                [], Commit(None, '', commit_infos[0].commit.authors), []))

        return sorted_items

-    def format_single_change(self, info):
-        message = self._format_message_link(info.message, info.commit.hash)
+    def format_single_change(self, info: CommitInfo):
+        message, sep, rest = info.message.partition('\n')
+        if '[' not in message:
+            # If the message doesn't already contain markdown links, try to add a link to the commit
+            message = self._format_message_link(message, info.commit.hash)
+
        if info.issues:
-            message = message.replace('\n', f' ({self._format_issues(info.issues)})\n', 1)
+            message = f'{message} ({self._format_issues(info.issues)})'

        if info.commit.authors:
-            message = message.replace('\n', f' by {self._format_authors(info.commit.authors)}\n', 1)
+            message = f'{message} by {self._format_authors(info.commit.authors)}'

        if info.fixes:
            fix_message = ', '.join(f'{self._format_message_link(None, fix.hash)}' for fix in info.fixes)
@ -219,16 +232,14 @@ class Changelog:
            if authors != info.commit.authors:
                fix_message = f'{fix_message} by {self._format_authors(authors)}'

-            message = message.replace('\n', f' (With fixes in {fix_message})\n', 1)
+            message = f'{message} (With fixes in {fix_message})'

-        return message[:-1]
+        return message if not sep else f'{message}{sep}{rest}'

    def _format_message_link(self, message, hash):
        assert message or hash, 'Improperly defined commit message or override'
        message = message if message else hash[:HASH_LENGTH]
-        if not hash:
-            return f'{message}\n'
-        return f'[{message}\n'.replace('\n', f']({self.repo_url}/commit/{hash})\n', 1)
+        return f'[{message}]({self.repo_url}/commit/{hash})' if hash else message

    def _format_issues(self, issues):
        return ', '.join(f'[#{issue}]({self.repo_url}/issues/{issue})' for issue in issues)
@ -318,7 +329,7 @@ class CommitRange:
        for commitish, revert_commit in reverts.items():
            reverted = commits.pop(commitish, None)
            if reverted:
-                logger.debug(f'{commit} fully reverted {reverted}')
+                logger.debug(f'{commitish} fully reverted {reverted}')
            else:
                commits[revert_commit.hash] = revert_commit

@ -337,7 +348,7 @@ class CommitRange:
        for override in overrides:
            when = override.get('when')
            if when and when not in self and when != self._start:
-                logger.debug(f'Ignored {when!r}, not in commits {self._start!r}')
+                logger.debug(f'Ignored {when!r} override')
                continue

            override_hash = override.get('hash') or when
@ -365,7 +376,7 @@ class CommitRange:
        for commit in self:
            upstream_re = self.UPSTREAM_MERGE_RE.search(commit.short)
            if upstream_re:
-                commit.short = f'[core/upstream] Merged with youtube-dl {upstream_re.group(1)}'
+                commit.short = f'[upstream] Merged with youtube-dl {upstream_re.group(1)}'

            match = self.MESSAGE_RE.fullmatch(commit.short)
            if not match:
@ -410,25 +421,20 @@ class CommitRange:
        if not prefix:
            return CommitGroup.CORE, None, ()

-        prefix, _, details = prefix.partition('/')
-        prefix = prefix.strip()
-        details = details.strip()
+        prefix, *sub_details = prefix.split(':')

-        group = CommitGroup.get(prefix.lower())
-        if group is CommitGroup.PRIORITY:
-            prefix, _, details = details.partition('/')
+        group, details = CommitGroup.get(prefix)
+        if group is CommitGroup.PRIORITY and details:
+            details = details.partition('/')[2].strip()

-        if not details and prefix and prefix not in CommitGroup.ignorable_prefixes:
-            logger.debug(f'Replaced details with {prefix!r}')
-            details = prefix or None
+        if details and '/' in details:
+            logger.error(f'Prefix is overnested, using first part: {prefix}')
+            details = details.partition('/')[0].strip()

        if details == 'common':
            details = None
-
-        if details:
-            details, *sub_details = details.split(':')
-        else:
-            sub_details = []
+        elif group is CommitGroup.NETWORKING and details == 'rh':
+            details = 'Request Handler'

        return group, details, sub_details

--- a/devscripts/update-version.py
+++ b/devscripts/update-version.py
@ -10,14 +10,14 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import argparse
 import contextlib
 import sys
-from datetime import datetime
+from datetime import datetime, timezone

 from devscripts.utils import read_version, run_process, write_file


 def get_new_version(version, revision):
    if not version:
-        version = datetime.utcnow().strftime('%Y.%m.%d')
+        version = datetime.now(timezone.utc).strftime('%Y.%m.%d')

    if revision:
        assert revision.isdigit(), 'Revision must be a number'
--- a/test/test_socks.py
+++ b/test/test_socks.py
@ -281,17 +281,13 @@ class TestSocks4Proxy:
                    rh, proxies={'all': f'socks4://user:@{server_address}'})
                assert response['version'] == 4

-    @pytest.mark.parametrize('handler,ctx', [
-        pytest.param('Urllib', 'http', marks=pytest.mark.xfail(
-            reason='socks4a implementation currently broken when destination is not a domain name'))
-    ], indirect=True)
+    @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
    def test_socks4a_ipv4_target(self, handler, ctx):
        with ctx.socks_server(Socks4ProxyHandler) as server_address:
            with handler(proxies={'all': f'socks4a://{server_address}'}) as rh:
                response = ctx.socks_info_request(rh, target_domain='127.0.0.1')
                assert response['version'] == 4
-                assert response['ipv4_address'] == '127.0.0.1'
-                assert response['domain_address'] is None
+                assert (response['ipv4_address'] == '127.0.0.1') != (response['domain_address'] == '127.0.0.1')

    @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
    def test_socks4a_domain_target(self, handler, ctx):
@ -302,10 +298,7 @@ class TestSocks4Proxy:
                assert response['ipv4_address'] is None
                assert response['domain_address'] == 'localhost'

-    @pytest.mark.parametrize('handler,ctx', [
-        pytest.param('Urllib', 'http', marks=pytest.mark.xfail(
-            reason='source_address is not yet supported for socks4 proxies'))
-    ], indirect=True)
+    @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
    def test_ipv4_client_source_address(self, handler, ctx):
        with ctx.socks_server(Socks4ProxyHandler) as server_address:
            source_address = f'127.0.0.{random.randint(5, 255)}'
@ -327,10 +320,7 @@ class TestSocks4Proxy:
                with pytest.raises(ProxyError):
                    ctx.socks_info_request(rh)

-    @pytest.mark.parametrize('handler,ctx', [
-        pytest.param('Urllib', 'http', marks=pytest.mark.xfail(
-            reason='IPv6 socks4 proxies are not yet supported'))
-    ], indirect=True)
+    @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
    def test_ipv6_socks4_proxy(self, handler, ctx):
        with ctx.socks_server(Socks4ProxyHandler, bind_ip='::1') as server_address:
            with handler(proxies={'all': f'socks4://{server_address}'}) as rh:
@ -342,7 +332,7 @@ class TestSocks4Proxy:
    @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
    def test_timeout(self, handler, ctx):
        with ctx.socks_server(Socks4ProxyHandler, sleep=2) as server_address:
-            with handler(proxies={'all': f'socks4://{server_address}'}, timeout=1) as rh:
+            with handler(proxies={'all': f'socks4://{server_address}'}, timeout=0.5) as rh:
                with pytest.raises(TransportError):
                    ctx.socks_info_request(rh)

@ -383,7 +373,7 @@ class TestSocks5Proxy:
        with ctx.socks_server(Socks5ProxyHandler) as server_address:
            with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
                response = ctx.socks_info_request(rh, target_domain='localhost')
-                assert response['ipv4_address'] == '127.0.0.1'
+                assert (response['ipv4_address'] == '127.0.0.1') != (response['ipv6_address'] == '::1')
                assert response['version'] == 5

    @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
@ -404,22 +394,15 @@ class TestSocks5Proxy:
                assert response['domain_address'] is None
                assert response['version'] == 5

-    @pytest.mark.parametrize('handler,ctx', [
-        pytest.param('Urllib', 'http', marks=pytest.mark.xfail(
-            reason='IPv6 destination addresses are not yet supported'))
-    ], indirect=True)
+    @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
    def test_socks5_ipv6_destination(self, handler, ctx):
        with ctx.socks_server(Socks5ProxyHandler) as server_address:
            with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
                response = ctx.socks_info_request(rh, target_domain='[::1]')
                assert response['ipv6_address'] == '::1'
-                assert response['port'] == 80
                assert response['version'] == 5

-    @pytest.mark.parametrize('handler,ctx', [
-        pytest.param('Urllib', 'http', marks=pytest.mark.xfail(
-            reason='IPv6 socks5 proxies are not yet supported'))
-    ], indirect=True)
+    @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
    def test_ipv6_socks5_proxy(self, handler, ctx):
        with ctx.socks_server(Socks5ProxyHandler, bind_ip='::1') as server_address:
            with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
@ -430,10 +413,7 @@ class TestSocks5Proxy:

    # XXX: is there any feasible way of testing IPv6 source addresses?
    # Same would go for non-proxy source_address test...
-    @pytest.mark.parametrize('handler,ctx', [
-        pytest.param('Urllib', 'http', marks=pytest.mark.xfail(
-            reason='source_address is not yet supported for socks5 proxies'))
-    ], indirect=True)
+    @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
    def test_ipv4_client_source_address(self, handler, ctx):
        with ctx.socks_server(Socks5ProxyHandler) as server_address:
            source_address = f'127.0.0.{random.randint(5, 255)}'
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@ -2591,7 +2591,7 @@ class YoutubeDL:
                # Working around out-of-range timestamp values (e.g. negative ones on Windows,
                # see http://bugs.python.org/issue1646728)
                with contextlib.suppress(ValueError, OverflowError, OSError):
-                    upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
+                    upload_date = datetime.datetime.fromtimestamp(info_dict[ts_key], datetime.timezone.utc)
                    info_dict[date_key] = upload_date.strftime('%Y%m%d')

        live_keys = ('is_live', 'was_live')
--- a/yt_dlp/compat/compat_utils.py
+++ b/yt_dlp/compat/compat_utils.py
@ -15,7 +15,7 @@ def get_package_info(module):
        name=getattr(module, '_yt_dlp__identifier', module.__name__),
        version=str(next(filter(None, (
            getattr(module, attr, None)
-            for attr in ('__version__', 'version_string', 'version')
+            for attr in ('_yt_dlp__version', '__version__', 'version_string', 'version')
        )), None)))


--- a/yt_dlp/dependencies/init.py
+++ b/yt_dlp/dependencies/init.py
@ -43,6 +43,8 @@ except Exception as _err:

 try:
    import sqlite3
+    # We need to get the underlying `sqlite` version, see https://github.com/yt-dlp/yt-dlp/issues/8152
+    sqlite3._yt_dlp__version = sqlite3.sqlite_version
 except ImportError:
    # although sqlite3 is part of the standard library, it is possible to compile python without
    # sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -122,7 +122,6 @@ from .applepodcasts import ApplePodcastsIE
 from .archiveorg import (
    ArchiveOrgIE,
    YoutubeWebArchiveIE,
-    VLiveWebArchiveIE,
 )
 from .arcpublishing import ArcPublishingIE
 from .arkena import ArkenaIE
@ -165,6 +164,7 @@ from .awaan import (
    AWAANLiveIE,
    AWAANSeasonIE,
 )
+from .axs import AxsIE
 from .azmedien import AZMedienIE
 from .baidu import BaiduVideoIE
 from .banbye import (
@ -223,7 +223,11 @@ from .bilibili import (
    BiliBiliPlayerIE,
    BilibiliSpaceVideoIE,
    BilibiliSpaceAudioIE,
-    BilibiliSpacePlaylistIE,
+    BilibiliCollectionListIE,
+    BilibiliSeriesListIE,
+    BilibiliFavoritesListIE,
+    BilibiliWatchlaterIE,
+    BilibiliPlaylistIE,
    BiliIntlIE,
    BiliIntlSeriesIE,
    BiliLiveIE,
@ -292,9 +296,11 @@ from .cammodels import CamModelsIE
 from .camsoda import CamsodaIE
 from .camtasia import CamtasiaEmbedIE
 from .camwithher import CamWithHerIE
+from .canal1 import Canal1IE
 from .canalalpha import CanalAlphaIE
 from .canalplus import CanalplusIE
 from .canalc2 import Canalc2IE
+from .caracoltv import CaracolTvPlayIE
 from .carambatv import (
    CarambaTVIE,
    CarambaTVPageIE,
@ -561,6 +567,7 @@ from .epicon import (
    EpiconIE,
    EpiconSeriesIE,
 )
+from .eplus import EplusIbIE
 from .epoch import EpochIE
 from .eporner import EpornerIE
 from .eroprofile import (
@ -1501,6 +1508,7 @@ from .polskieradio import (
 from .popcorntimes import PopcorntimesIE
 from .popcorntv import PopcornTVIE
 from .porn91 import Porn91IE
+from .pornbox import PornboxIE
 from .porncom import PornComIE
 from .pornflip import PornFlipIE
 from .pornhd import PornHdIE
@ -1519,7 +1527,7 @@ from .puhutv import (
    PuhuTVIE,
    PuhuTVSerieIE,
 )
-from .pr0gramm import Pr0grammStaticIE, Pr0grammIE
+from .pr0gramm import Pr0grammIE
 from .prankcast import PrankCastIE
 from .premiershiprugby import PremiershipRugbyIE
 from .presstv import PressTVIE
@ -1555,7 +1563,14 @@ from .radiocanada import (
 from .radiode import RadioDeIE
 from .radiojavan import RadioJavanIE
 from .radiobremen import RadioBremenIE
-from .radiofrance import FranceCultureIE, RadioFranceIE
+from .radiofrance import (
+    FranceCultureIE,
+    RadioFranceIE,
+    RadioFranceLiveIE,
+    RadioFrancePodcastIE,
+    RadioFranceProfileIE,
+    RadioFranceProgramScheduleIE,
+)
 from .radiozet import RadioZetPodcastIE
 from .radiokapital import (
    RadioKapitalIE,
@ -1586,6 +1601,7 @@ from .rbmaradio import RBMARadioIE
 from .rbgtum import (
    RbgTumIE,
    RbgTumCourseIE,
+    RbgTumNewCourseIE,
 )
 from .rcs import (
    RCSIE,
@ -1710,7 +1726,10 @@ from .ruv import (
    RuvIE,
    RuvSpilaIE
 )
-from .s4c import S4CIE
+from .s4c import (
+    S4CIE,
+    S4CSeriesIE
+)
 from .safari import (
    SafariIE,
    SafariApiIE,
@ -1791,7 +1810,10 @@ from .slideslive import SlidesLiveIE
 from .slutload import SlutloadIE
 from .smotrim import SmotrimIE
 from .snotr import SnotrIE
-from .sohu import SohuIE
+from .sohu import (
+    SohuIE,
+    SohuVIE,
+)
 from .sonyliv import (
    SonyLIVIE,
    SonyLIVSeriesIE,
@ -2354,7 +2376,8 @@ from .webofstories import (
 )
 from .weibo import (
    WeiboIE,
-    WeiboMobileIE
+    WeiboVideoIE,
+    WeiboUserIE,
 )
 from .weiqitv import WeiqiTVIE
 from .weverse import (
--- a/yt_dlp/extractor/abematv.py
+++ b/yt_dlp/extractor/abematv.py
@ -12,7 +12,7 @@ import urllib.parse
 import urllib.request
 import urllib.response
 import uuid
-
+from ..utils.networking import clean_proxies
 from .common import InfoExtractor
 from ..aes import aes_ecb_decrypt
 from ..utils import (
@ -35,7 +35,10 @@ def add_opener(ydl, handler):  # FIXME: Create proper API in .networking
    rh = ydl._request_director.handlers['Urllib']
    if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES:
        return
-    opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=ydl.proxies)
+    headers = ydl.params['http_headers'].copy()
+    proxies = ydl.proxies.copy()
+    clean_proxies(proxies, headers)
+    opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=proxies)
    assert isinstance(opener, urllib.request.OpenerDirector)
    opener.add_handler(handler)
    rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license')
--- a/yt_dlp/extractor/amazonminitv.py
+++ b/yt_dlp/extractor/amazonminitv.py
@ -22,8 +22,11 @@ class AmazonMiniTVBaseIE(InfoExtractor):

        resp = self._download_json(
            f'https://www.amazon.in/minitv/api/web/{"graphql" if data else "prs"}',
-            asin, note=note, headers={'Content-Type': 'application/json'},
-            data=json.dumps(data).encode() if data else None,
+            asin, note=note, headers={
+                'Content-Type': 'application/json',
+                'currentpageurl': '/',
+                'currentplatform': 'dWeb'
+            }, data=json.dumps(data).encode() if data else None,
            query=None if data else {
                'deviceType': 'A1WMMUXPCUJL4N',
                'contentId': asin,
@ -46,7 +49,7 @@ class AmazonMiniTVIE(AmazonMiniTVBaseIE):
            'ext': 'mp4',
            'title': 'May I Kiss You?',
            'language': 'Hindi',
-            'thumbnail': r're:^https?://.*\.jpg$',
+            'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
            'description': 'md5:a549bfc747973e04feb707833474e59d',
            'release_timestamp': 1644710400,
            'release_date': '20220213',
@ -68,7 +71,7 @@ class AmazonMiniTVIE(AmazonMiniTVBaseIE):
            'ext': 'mp4',
            'title': 'Jahaan',
            'language': 'Hindi',
-            'thumbnail': r're:^https?://.*\.jpg',
+            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
            'description': 'md5:05eb765a77bf703f322f120ec6867339',
            'release_timestamp': 1647475200,
            'release_date': '20220317',
--- a/yt_dlp/extractor/archiveorg.py
+++ b/yt_dlp/extractor/archiveorg.py
@ -3,7 +3,6 @@ import re
 import urllib.parse

 from .common import InfoExtractor
-from .naver import NaverBaseIE
 from .youtube import YoutubeBaseInfoExtractor, YoutubeIE
 from ..compat import compat_urllib_parse_unquote
 from ..networking import HEADRequest
@ -947,237 +946,3 @@ class YoutubeWebArchiveIE(InfoExtractor):
        if not info.get('title'):
            info['title'] = video_id
        return info
-
-
-class VLiveWebArchiveIE(InfoExtractor):
-    IE_NAME = 'web.archive:vlive'
-    IE_DESC = 'web.archive.org saved vlive videos'
-    _VALID_URL = r'''(?x)
-            (?:https?://)?web\.archive\.org/
-            (?:web/)?(?:(?P<date>[0-9]{14})?[0-9A-Za-z_*]*/)?  # /web and the version index is optional
-            (?:https?(?::|%3[Aa])//)?(?:
-                (?:(?:www|m)\.)?vlive\.tv(?::(?:80|443))?/(?:video|embed)/(?P<id>[0-9]+)  # VLive URL
-            )
-        '''
-    _TESTS = [{
-        'url': 'https://web.archive.org/web/20221221144331/http://www.vlive.tv/video/1326',
-        'md5': 'cc7314812855ce56de70a06a27314983',
-        'info_dict': {
-            'id': '1326',
-            'ext': 'mp4',
-            'title': "Girl's Day's Broadcast",
-            'creator': "Girl's Day",
-            'view_count': int,
-            'uploader_id': 'muploader_a',
-            'uploader_url': None,
-            'uploader': None,
-            'upload_date': '20150817',
-            'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
-            'timestamp': 1439816449,
-            'like_count': int,
-            'channel': 'Girl\'s Day',
-            'channel_id': 'FDF27',
-            'comment_count': int,
-            'release_timestamp': 1439818140,
-            'release_date': '20150817',
-            'duration': 1014,
-        },
-        'params': {
-            'skip_download': True,
-        },
-    }, {
-        'url': 'https://web.archive.org/web/20221221182103/http://www.vlive.tv/video/16937',
-        'info_dict': {
-            'id': '16937',
-            'ext': 'mp4',
-            'title': '첸백시 걍방',
-            'creator': 'EXO',
-            'view_count': int,
-            'subtitles': 'mincount:12',
-            'uploader_id': 'muploader_j',
-            'uploader_url': 'http://vlive.tv',
-            'uploader': None,
-            'upload_date': '20161112',
-            'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
-            'timestamp': 1478923074,
-            'like_count': int,
-            'channel': 'EXO',
-            'channel_id': 'F94BD',
-            'comment_count': int,
-            'release_timestamp': 1478924280,
-            'release_date': '20161112',
-            'duration': 906,
-        },
-        'params': {
-            'skip_download': True,
-        },
-    }, {
-        'url': 'https://web.archive.org/web/20221127190050/http://www.vlive.tv/video/101870',
-        'info_dict': {
-            'id': '101870',
-            'ext': 'mp4',
-            'title': '[ⓓ xV] “레벨이들 매력에 반해? 안 반해?” 움직이는 HD 포토 (레드벨벳:Red Velvet)',
-            'creator': 'Dispatch',
-            'view_count': int,
-            'subtitles': 'mincount:6',
-            'uploader_id': 'V__FRA08071',
-            'uploader_url': 'http://vlive.tv',
-            'uploader': None,
-            'upload_date': '20181130',
-            'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
-            'timestamp': 1543601327,
-            'like_count': int,
-            'channel': 'Dispatch',
-            'channel_id': 'C796F3',
-            'comment_count': int,
-            'release_timestamp': 1543601040,
-            'release_date': '20181130',
-            'duration': 279,
-        },
-        'params': {
-            'skip_download': True,
-        },
-    }]
-
-    # The wayback machine has special timestamp and "mode" values:
-    # timestamp:
-    #   1 = the first capture
-    #   2 = the last capture
-    # mode:
-    #   id_ = Identity - perform no alterations of the original resource, return it as it was archived.
-    _WAYBACK_BASE_URL = 'https://web.archive.org/web/2id_/'
-
-    def _download_archived_page(self, url, video_id, *, timestamp='2', **kwargs):
-        for retry in self.RetryManager():
-            try:
-                return self._download_webpage(f'https://web.archive.org/web/{timestamp}id_/{url}', video_id, **kwargs)
-            except ExtractorError as e:
-                if isinstance(e.cause, HTTPError) and e.cause.status == 404:
-                    raise ExtractorError('Page was not archived', expected=True)
-                retry.error = e
-                continue
-
-    def _download_archived_json(self, url, video_id, **kwargs):
-        page = self._download_archived_page(url, video_id, **kwargs)
-        if not page:
-            raise ExtractorError('Page was not archived', expected=True)
-        else:
-            return self._parse_json(page, video_id)
-
-    def _extract_formats_from_m3u8(self, m3u8_url, params, video_id):
-        m3u8_doc = self._download_archived_page(m3u8_url, video_id, note='Downloading m3u8', query=params, fatal=False)
-        if not m3u8_doc:
-            return
-
-        # M3U8 document should be changed to archive domain
-        m3u8_doc = m3u8_doc.splitlines()
-        url_base = m3u8_url.rsplit('/', 1)[0]
-        first_segment = None
-        for i, line in enumerate(m3u8_doc):
-            if not line.startswith('#'):
-                m3u8_doc[i] = f'{self._WAYBACK_BASE_URL}{url_base}/{line}?{urllib.parse.urlencode(params)}'
-                first_segment = first_segment or m3u8_doc[i]
-
-        # Segments may not have been archived. See https://web.archive.org/web/20221127190050/http://www.vlive.tv/video/101870
-        urlh = self._request_webpage(HEADRequest(first_segment), video_id, errnote=False,
-                                     fatal=False, note='Check first segment availablity')
-        if urlh:
-            formats, subtitles = self._parse_m3u8_formats_and_subtitles('\n'.join(m3u8_doc), ext='mp4', video_id=video_id)
-            if subtitles:
-                self._report_ignoring_subs('m3u8')
-            return formats
-
-    # Closely follows the logic of the ArchiveTeam grab script
-    # See: https://github.com/ArchiveTeam/vlive-grab/blob/master/vlive.lua
-    def _real_extract(self, url):
-        video_id, url_date = self._match_valid_url(url).group('id', 'date')
-
-        webpage = self._download_archived_page(f'https://www.vlive.tv/video/{video_id}', video_id, timestamp=url_date)
-
-        player_info = self._search_json(r'__PRELOADED_STATE__\s*=', webpage, 'player info', video_id)
-        user_country = traverse_obj(player_info, ('common', 'userCountry'))
-
-        main_script_url = self._search_regex(r'<script\s+src="([^"]+/js/main\.[^"]+\.js)"', webpage, 'main script url')
-        main_script = self._download_archived_page(main_script_url, video_id, note='Downloading main script')
-        app_id = self._search_regex(r'appId\s*=\s*"([^"]+)"', main_script, 'app id')
-
-        inkey = self._download_archived_json(
-            f'https://www.vlive.tv/globalv-web/vam-web/video/v1.0/vod/{video_id}/inkey', video_id, note='Fetching inkey', query={
-                'appId': app_id,
-                'platformType': 'PC',
-                'gcc': user_country,
-                'locale': 'en_US',
-            }, fatal=False)
-
-        vod_id = traverse_obj(player_info, ('postDetail', 'post', 'officialVideo', 'vodId'))
-
-        vod_data = self._download_archived_json(
-            f'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{vod_id}', video_id, note='Fetching vod data', query={
-                'key': inkey.get('inkey'),
-                'pid': 'rmcPlayer_16692457559726800',  # partially unix time and partially random. Fixed value used by archiveteam project
-                'sid': '2024',
-                'ver': '2.0',
-                'devt': 'html5_pc',
-                'doct': 'json',
-                'ptc': 'https',
-                'sptc': 'https',
-                'cpt': 'vtt',
-                'ctls': '%7B%22visible%22%3A%7B%22fullscreen%22%3Atrue%2C%22logo%22%3Afalse%2C%22playbackRate%22%3Afalse%2C%22scrap%22%3Afalse%2C%22playCount%22%3Atrue%2C%22commentCount%22%3Atrue%2C%22title%22%3Atrue%2C%22writer%22%3Atrue%2C%22expand%22%3Afalse%2C%22subtitles%22%3Atrue%2C%22thumbnails%22%3Atrue%2C%22quality%22%3Atrue%2C%22setting%22%3Atrue%2C%22script%22%3Afalse%2C%22logoDimmed%22%3Atrue%2C%22badge%22%3Atrue%2C%22seekingTime%22%3Atrue%2C%22muted%22%3Atrue%2C%22muteButton%22%3Afalse%2C%22viewerNotice%22%3Afalse%2C%22linkCount%22%3Afalse%2C%22createTime%22%3Afalse%2C%22thumbnail%22%3Atrue%7D%2C%22clicked%22%3A%7B%22expand%22%3Afalse%2C%22subtitles%22%3Afalse%7D%7D',
-                'pv': '4.26.9',
-                'dr': '1920x1080',
-                'cpl': 'en_US',
-                'lc': 'en_US',
-                'adi': '%5B%7B%22type%22%3A%22pre%22%2C%22exposure%22%3Afalse%2C%22replayExposure%22%3Afalse%7D%5D',
-                'adu': '%2F',
-                'videoId': vod_id,
-                'cc': user_country,
-            })
-
-        formats = []
-
-        streams = traverse_obj(vod_data, ('streams', ...))
-        if len(streams) > 1:
-            self.report_warning('Multiple streams found. Only the first stream will be downloaded.')
-        stream = streams[0]
-
-        max_stream = max(
-            stream.get('videos') or [],
-            key=lambda v: traverse_obj(v, ('bitrate', 'video'), default=0), default=None)
-        if max_stream is not None:
-            params = {arg.get('name'): arg.get('value') for arg in stream.get('keys', []) if arg.get('type') == 'param'}
-            formats = self._extract_formats_from_m3u8(max_stream.get('source'), params, video_id) or []
-
-        # For parts of the project MP4 files were archived
-        max_video = max(
-            traverse_obj(vod_data, ('videos', 'list', ...)),
-            key=lambda v: traverse_obj(v, ('bitrate', 'video'), default=0), default=None)
-        if max_video is not None:
-            video_url = self._WAYBACK_BASE_URL + max_video.get('source')
-            urlh = self._request_webpage(HEADRequest(video_url), video_id, errnote=False,
-                                         fatal=False, note='Check video availablity')
-            if urlh:
-                formats.append({'url': video_url})
-
-        return {
-            'id': video_id,
-            'formats': formats,
-            **traverse_obj(player_info, ('postDetail', 'post', {
-                'title': ('officialVideo', 'title', {str}),
-                'creator': ('author', 'nickname', {str}),
-                'channel': ('channel', 'channelName', {str}),
-                'channel_id': ('channel', 'channelCode', {str}),
-                'duration': ('officialVideo', 'playTime', {int_or_none}),
-                'view_count': ('officialVideo', 'playCount', {int_or_none}),
-                'like_count': ('officialVideo', 'likeCount', {int_or_none}),
-                'comment_count': ('officialVideo', 'commentCount', {int_or_none}),
-                'timestamp': ('officialVideo', 'createdAt', {lambda x: int_or_none(x, scale=1000)}),
-                'release_timestamp': ('officialVideo', 'willStartAt', {lambda x: int_or_none(x, scale=1000)}),
-            })),
-            **traverse_obj(vod_data, ('meta', {
-                'uploader_id': ('user', 'id', {str}),
-                'uploader': ('user', 'name', {str}),
-                'uploader_url': ('user', 'url', {url_or_none}),
-                'thumbnail': ('cover', 'source', {url_or_none}),
-            }), expected_type=lambda x: x or None),
-            **NaverBaseIE.process_subtitles(vod_data, lambda x: [self._WAYBACK_BASE_URL + x]),
-        }
--- a/yt_dlp/extractor/aws.py
+++ b/yt_dlp/extractor/aws.py
@ -12,7 +12,7 @@ class AWSIE(InfoExtractor):  # XXX: Conventionally, base classes should end with

    def _aws_execute_api(self, aws_dict, video_id, query=None):
        query = query or {}
-        amz_date = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
+        amz_date = datetime.datetime.now(datetime.timezone.utc).strftime('%Y%m%dT%H%M%SZ')
        date = amz_date[:8]
        headers = {
            'Accept': 'application/json',
--- a/yt_dlp/extractor/axs.py
+++ b/yt_dlp/extractor/axs.py
@ -0,0 +1,87 @@
+from .common import InfoExtractor
+from ..utils import (
+    float_or_none,
+    js_to_json,
+    parse_iso8601,
+    traverse_obj,
+    url_or_none,
+)
+
+
+class AxsIE(InfoExtractor):
+    IE_NAME = 'axs.tv'
+    _VALID_URL = r'https?://(?:www\.)?axs\.tv/(?:channel/(?:[^/?#]+/)+)?video/(?P<id>[^/?#]+)'
+
+    _TESTS = [{
+        'url': 'https://www.axs.tv/video/5f4dc776b70e4f1c194f22ef/',
+        'md5': '8d97736ae8e50c64df528e5e676778cf',
+        'info_dict': {
+            'id': '5f4dc776b70e4f1c194f22ef',
+            'title': 'Small Town',
+            'ext': 'mp4',
+            'description': 'md5:e314d28bfaa227a4d7ec965fae19997f',
+            'upload_date': '20230602',
+            'timestamp': 1685729564,
+            'duration': 1284.216,
+            'series': 'Rock & Roll Road Trip with Sammy Hagar',
+            'season': 2,
+            'episode': '3',
+            'thumbnail': 'https://images.dotstudiopro.com/5f4e9d330a0c3b295a7e8394',
+        },
+    }, {
+        'url': 'https://www.axs.tv/channel/rock-star-interview/video/daryl-hall',
+        'md5': '300ae795cd8f9984652c0949734ffbdc',
+        'info_dict': {
+            'id': '5f488148b70e4f392572977c',
+            'display_id': 'daryl-hall',
+            'title': 'Daryl Hall',
+            'ext': 'mp4',
+            'description': 'md5:e54ecaa0f4b5683fc9259e9e4b196628',
+            'upload_date': '20230214',
+            'timestamp': 1676403615,
+            'duration': 2570.668,
+            'series': 'The Big Interview with Dan Rather',
+            'season': 3,
+            'episode': '5',
+            'thumbnail': 'https://images.dotstudiopro.com/5f4d1901f340b50d937cec32',
+        },
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+
+        webpage_json_data = self._search_json(
+            r'mountObj\s*=', webpage, 'video ID data', display_id,
+            transform_source=js_to_json)
+        video_id = webpage_json_data['video_id']
+        company_id = webpage_json_data['company_id']
+
+        meta = self._download_json(
+            f'https://api.myspotlight.tv/dotplayer/video/{company_id}/{video_id}',
+            video_id, query={'device_type': 'desktop_web'})['video']
+
+        formats = self._extract_m3u8_formats(
+            meta['video_m3u8'], video_id, 'mp4', m3u8_id='hls')
+
+        subtitles = {}
+        for cc in traverse_obj(meta, ('closeCaption', lambda _, v: url_or_none(v['srtPath']))):
+            subtitles.setdefault(cc.get('srtShortLang') or 'en', []).append(
+                {'ext': cc.get('srtExt'), 'url': cc['srtPath']})
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'formats': formats,
+            **traverse_obj(meta, {
+                'title': ('title', {str}),
+                'description': ('description', {str}),
+                'series': ('seriestitle', {str}),
+                'season': ('season', {int}),
+                'episode': ('episode', {str}),
+                'duration': ('duration', {float_or_none}),
+                'timestamp': ('updated_at', {parse_iso8601}),
+                'thumbnail': ('thumb', {url_or_none}),
+            }),
+            'subtitles': subtitles,
+        }
--- a/yt_dlp/extractor/banbye.py
+++ b/yt_dlp/extractor/banbye.py
@ -31,7 +31,7 @@ class BanByeBaseIE(InfoExtractor):


 class BanByeIE(BanByeBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?banbye.com/(?:en/)?watch/(?P<id>\w+)'
+    _VALID_URL = r'https?://(?:www\.)?banbye.com/(?:en/)?watch/(?P<id>[\w-]+)'
    _TESTS = [{
        'url': 'https://banbye.com/watch/v_ytfmvkVYLE8T',
        'md5': '2f4ea15c5ca259a73d909b2cfd558eb5',
@ -59,7 +59,27 @@ class BanByeIE(BanByeBaseIE):
            'title': 'Krzysztof Karoń',
            'id': 'p_Ld82N6gBw_OJ',
        },
-        'playlist_count': 9,
+        'playlist_mincount': 9,
+    }, {
+        'url': 'https://banbye.com/watch/v_kb6_o1Kyq-CD',
+        'info_dict': {
+            'id': 'v_kb6_o1Kyq-CD',
+            'ext': 'mp4',
+            'title': 'Co tak naprawdę dzieje się we Francji?! Czy Warszawa a potem cała Polska będzie drugim Paryżem?!🤔🇵🇱',
+            'description': 'md5:82be4c0e13eae8ea1ca8b9f2e07226a8',
+            'uploader': 'Marcin Rola - MOIM ZDANIEM!🇵🇱',
+            'channel_id': 'ch_QgWnHvDG2fo5',
+            'channel_url': 'https://banbye.com/channel/ch_QgWnHvDG2fo5',
+            'duration': 597,
+            'timestamp': 1688642656,
+            'upload_date': '20230706',
+            'thumbnail': 'https://cdn.banbye.com/video/v_kb6_o1Kyq-CD/96.webp',
+            'tags': ['Paryż', 'Francja', 'Polska', 'Imigranci', 'Morawiecki', 'Tusk'],
+            'like_count': int,
+            'dislike_count': int,
+            'view_count': int,
+            'comment_count': int,
+        },
    }]

    def _real_extract(self, url):
--- a/yt_dlp/extractor/bbc.py
+++ b/yt_dlp/extractor/bbc.py
@ -15,11 +15,13 @@ from ..utils import (
    float_or_none,
    get_element_by_class,
    int_or_none,
+    join_nonempty,
    js_to_json,
    parse_duration,
    parse_iso8601,
    parse_qs,
    strip_or_none,
+    traverse_obj,
    try_get,
    unescapeHTML,
    unified_timestamp,
@ -41,7 +43,6 @@ class BBCCoUkIE(InfoExtractor):
                            iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
                            music/(?:clips|audiovideo/popular)[/#]|
                            radio/player/|
-                            sounds/play/|
                            events/[^/]+/play/[^/]+/
                        )
                        (?P<id>%s)(?!/(?:episodes|broadcasts|clips))
@ -218,20 +219,6 @@ class BBCCoUkIE(InfoExtractor):
                # rtmp download
                'skip_download': True,
            },
-        }, {
-            'url': 'https://www.bbc.co.uk/sounds/play/m0007jzb',
-            'note': 'Audio',
-            'info_dict': {
-                'id': 'm0007jz9',
-                'ext': 'mp4',
-                'title': 'BBC Proms, 2019, Prom 34: West–Eastern Divan Orchestra',
-                'description': "Live BBC Proms. West–Eastern Divan Orchestra with Daniel Barenboim and Martha Argerich.",
-                'duration': 9840,
-            },
-            'params': {
-                # rtmp download
-                'skip_download': True,
-            }
        }, {
            'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
            'only_matching': True,
@ -844,6 +831,20 @@ class BBCIE(BBCCoUkIE):  # XXX: Do not subclass from concrete IE
            'upload_date': '20190604',
            'categories': ['Psychology'],
        },
+    }, {
+        # BBC Sounds
+        'url': 'https://www.bbc.co.uk/sounds/play/m001q78b',
+        'info_dict': {
+            'id': 'm001q789',
+            'ext': 'mp4',
+            'title': 'The Night Tracks Mix - Music for the darkling hour',
+            'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0c00hym.jpg',
+            'chapters': 'count:8',
+            'description': 'md5:815fb51cbdaa270040aab8145b3f1d67',
+            'uploader': 'Radio 3',
+            'duration': 1800,
+            'uploader_id': 'bbc_radio_three',
+        },
    }, {  # onion routes
        'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576',
        'only_matching': True,
@ -1128,6 +1129,13 @@ class BBCIE(BBCCoUkIE):  # XXX: Do not subclass from concrete IE
                    'uploader_id': network.get('id'),
                    'formats': formats,
                    'subtitles': subtitles,
+                    'chapters': traverse_obj(preload_state, (
+                        'tracklist', 'tracks', lambda _, v: float_or_none(v['offset']['start']), {
+                            'title': ('titles', {lambda x: join_nonempty(
+                                'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
+                            'start_time': ('offset', 'start', {float_or_none}),
+                            'end_time': ('offset', 'end', {float_or_none}),
+                        })) or None,
                }

        bbc3_config = self._parse_json(
--- a/yt_dlp/extractor/bild.py
+++ b/yt_dlp/extractor/bild.py
@ -1,6 +1,7 @@
 from .common import InfoExtractor
 from ..utils import (
    int_or_none,
+    traverse_obj,
    unescapeHTML,
 )

@ -8,7 +9,8 @@ from ..utils import (
 class BildIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?bild\.de/(?:[^/]+/)+(?P<display_id>[^/]+)-(?P<id>\d+)(?:,auto=true)?\.bild\.html'
    IE_DESC = 'Bild.de'
-    _TEST = {
+    _TESTS = [{
+        'note': 'static MP4 only',
        'url': 'http://www.bild.de/video/clip/apple-ipad-air/das-koennen-die-neuen-ipads-38184146.bild.html',
        'md5': 'dd495cbd99f2413502a1713a1156ac8a',
        'info_dict': {
@ -19,7 +21,19 @@ class BildIE(InfoExtractor):
            'thumbnail': r're:^https?://.*\.jpg$',
            'duration': 196,
        }
-    }
+    }, {
+        'note': 'static MP4 and HLS',
+        'url': 'https://www.bild.de/video/clip/news-ausland/deftiger-abgang-vom-10m-turm-bademeister-sorgt-fuer-skandal-85158620.bild.html',
+        'md5': 'fb0ed4f09c495d4ba7ce2eee0bb90de1',
+        'info_dict': {
+            'id': '85158620',
+            'ext': 'mp4',
+            'title': 'Der Sprungturm-Skandal',
+            'description': 'md5:709b543c24dc31bbbffee73bccda34ad',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'duration': 69,
+        }
+    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)
@ -27,11 +41,23 @@ class BildIE(InfoExtractor):
        video_data = self._download_json(
            url.split('.bild.html')[0] + ',view=json.bild.html', video_id)

+        formats = []
+        for src in traverse_obj(video_data, ('clipList', 0, 'srces', lambda _, v: v['src'])):
+            src_type = src.get('type')
+            if src_type == 'application/x-mpegURL':
+                formats.extend(
+                    self._extract_m3u8_formats(
+                        src['src'], video_id, 'mp4', m3u8_id='hls', fatal=False))
+            elif src_type == 'video/mp4':
+                formats.append({'url': src['src'], 'format_id': 'http-mp4'})
+            else:
+                self.report_warning(f'Skipping unsupported format type: "{src_type}"')
+
        return {
            'id': video_id,
            'title': unescapeHTML(video_data['title']).strip(),
            'description': unescapeHTML(video_data.get('description')),
-            'url': video_data['clipList'][0]['srces'][0]['src'],
+            'formats': formats,
            'thumbnail': video_data.get('poster'),
            'duration': int_or_none(video_data.get('durationSec')),
        }
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@ -3,6 +3,7 @@ import functools
 import hashlib
 import itertools
 import math
+import re
 import time
 import urllib.parse

@ -14,6 +15,7 @@ from ..utils import (
    GeoRestrictedError,
    InAdvancePagedList,
    OnDemandPagedList,
+    bool_or_none,
    filter_dict,
    float_or_none,
    format_field,
@ -34,27 +36,31 @@ from ..utils import (
    unsmuggle_url,
    url_or_none,
    urlencode_postdata,
+    variadic,
 )


 class BilibiliBaseIE(InfoExtractor):
+    _FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
+
    def extract_formats(self, play_info):
        format_names = {
            r['quality']: traverse_obj(r, 'new_description', 'display_desc')
            for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
        }

-        audios = traverse_obj(play_info, ('dash', 'audio', ...))
+        audios = traverse_obj(play_info, ('dash', (None, 'dolby'), 'audio', ..., {dict}))
        flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
        if flac_audio:
            audios.append(flac_audio)
        formats = [{
            'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
            'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
-            'acodec': audio.get('codecs'),
+            'acodec': traverse_obj(audio, ('codecs', {str.lower})),
            'vcodec': 'none',
            'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
-            'filesize': int_or_none(audio.get('size'))
+            'filesize': int_or_none(audio.get('size')),
+            'format_id': str_or_none(audio.get('id')),
        } for audio in audios]

        formats.extend({
@ -65,9 +71,13 @@ class BilibiliBaseIE(InfoExtractor):
            'height': int_or_none(video.get('height')),
            'vcodec': video.get('codecs'),
            'acodec': 'none' if audios else None,
+            'dynamic_range': {126: 'DV', 125: 'HDR10'}.get(int_or_none(video.get('id'))),
            'tbr': float_or_none(video.get('bandwidth'), scale=1000),
            'filesize': int_or_none(video.get('size')),
            'quality': int_or_none(video.get('id')),
+            'format_id': traverse_obj(
+                video, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}, 1),
+                ('id', {str_or_none}), get_all=False),
            'format': format_names.get(video.get('id')),
        } for video in traverse_obj(play_info, ('dash', 'video', ...)))

@ -149,7 +159,7 @@ class BilibiliBaseIE(InfoExtractor):


 class BiliBiliIE(BilibiliBaseIE):
-    _VALID_URL = r'https?://www\.bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'

    _TESTS = [{
        'url': 'https://www.bilibili.com/video/BV13x41117TL',
@ -245,7 +255,7 @@ class BiliBiliIE(BilibiliBaseIE):
            'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
            'duration': 313.557,
            'upload_date': '20220709',
-            'uploader': '小夫Tech',
+            'uploader': '小夫太渴',
            'timestamp': 1657347907,
            'uploader_id': '1326814124',
            'comment_count': int,
@ -502,7 +512,7 @@ class BiliBiliBangumiIE(BilibiliBaseIE):


 class BiliBiliBangumiMediaIE(BilibiliBaseIE):
-    _VALID_URL = r'https?://www\.bilibili\.com/bangumi/media/md(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://www.bilibili.com/bangumi/media/md24097891',
        'info_dict': {
@ -521,7 +531,7 @@ class BiliBiliBangumiMediaIE(BilibiliBaseIE):


 class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
-    _VALID_URL = r'(?x)https?://www\.bilibili\.com/bangumi/play/ss(?P<id>\d+)'
+    _VALID_URL = r'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://www.bilibili.com/bangumi/play/ss26801',
        'info_dict': {
@ -672,13 +682,35 @@ class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
        return self.playlist_result(paged_list, playlist_id)


-class BilibiliSpacePlaylistIE(BilibiliSpaceBaseIE):
-    _VALID_URL = r'https?://space.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail\?sid=(?P<sid>\d+)'
+class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE):
+    def _get_entries(self, page_data, bvid_keys, ending_key='bvid'):
+        for bvid in traverse_obj(page_data, (*variadic(bvid_keys, (str, bytes, dict, set)), ..., ending_key, {str})):
+            yield self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE, bvid)
+
+    def _get_uploader(self, uid, playlist_id):
+        webpage = self._download_webpage(f'https://space.bilibili.com/{uid}', playlist_id, fatal=False)
+        return self._search_regex(r'(?s)<title\b[^>]*>([^<]+)的个人空间-', webpage, 'uploader', fatal=False)
+
+    def _extract_playlist(self, fetch_page, get_metadata, get_entries):
+        metadata, page_list = super()._extract_playlist(fetch_page, get_metadata, get_entries)
+        metadata.pop('page_count', None)
+        metadata.pop('page_size', None)
+        return metadata, page_list
+
+
+class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
+    _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
    _TESTS = [{
        'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
        'info_dict': {
            'id': '2142762_57445',
-            'title': '《底特律 变人》'
+            'title': '【完结】《底特律 变人》全结局流程解说',
+            'description': '',
+            'uploader': '老戴在此',
+            'uploader_id': '2142762',
+            'timestamp': int,
+            'upload_date': str,
+            'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
        },
        'playlist_mincount': 31,
    }]
@ -699,22 +731,251 @@ class BilibiliSpacePlaylistIE(BilibiliSpaceBaseIE):
            return {
                'page_count': math.ceil(entry_count / page_size),
                'page_size': page_size,
-                'title': traverse_obj(page_data, ('meta', 'name'))
+                'uploader': self._get_uploader(mid, playlist_id),
+                **traverse_obj(page_data, {
+                    'title': ('meta', 'name', {str}),
+                    'description': ('meta', 'description', {str}),
+                    'uploader_id': ('meta', 'mid', {str_or_none}),
+                    'timestamp': ('meta', 'ptime', {int_or_none}),
+                    'thumbnail': ('meta', 'cover', {url_or_none}),
+                })
            }

        def get_entries(page_data):
-            for entry in page_data.get('archives', []):
-                yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}',
-                                      BiliBiliIE, entry['bvid'])
+            return self._get_entries(page_data, 'archives')

        metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
-        return self.playlist_result(paged_list, playlist_id, metadata['title'])
+        return self.playlist_result(paged_list, playlist_id, **metadata)
+
+
+class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
+    _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
+    _TESTS = [{
+        'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
+        'info_dict': {
+            'id': '1958703906_547718',
+            'title': '直播回放',
+            'description': '直播回放',
+            'uploader': '靡烟miya',
+            'uploader_id': '1958703906',
+            'timestamp': 1637985853,
+            'upload_date': '20211127',
+            'modified_timestamp': int,
+            'modified_date': str,
+        },
+        'playlist_mincount': 513,
+    }]
+
+    def _real_extract(self, url):
+        mid, sid = self._match_valid_url(url).group('mid', 'sid')
+        playlist_id = f'{mid}_{sid}'
+        playlist_meta = traverse_obj(self._download_json(
+            f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False
+        ), {
+            'title': ('data', 'meta', 'name', {str}),
+            'description': ('data', 'meta', 'description', {str}),
+            'uploader_id': ('data', 'meta', 'mid', {str_or_none}),
+            'timestamp': ('data', 'meta', 'ctime', {int_or_none}),
+            'modified_timestamp': ('data', 'meta', 'mtime', {int_or_none}),
+        })
+
+        def fetch_page(page_idx):
+            return self._download_json(
+                'https://api.bilibili.com/x/series/archives',
+                playlist_id, note=f'Downloading page {page_idx}',
+                query={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30})['data']
+
+        def get_metadata(page_data):
+            page_size = page_data['page']['size']
+            entry_count = page_data['page']['total']
+            return {
+                'page_count': math.ceil(entry_count / page_size),
+                'page_size': page_size,
+                'uploader': self._get_uploader(mid, playlist_id),
+                **playlist_meta
+            }
+
+        def get_entries(page_data):
+            return self._get_entries(page_data, 'archives')
+
+        metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
+        return self.playlist_result(paged_list, playlist_id, **metadata)
+
+
+class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE):
+    _VALID_URL = r'https?://(?:space\.bilibili\.com/\d+/favlist/?\?fid=|(?:www\.)?bilibili\.com/medialist/detail/ml)(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://space.bilibili.com/84912/favlist?fid=1103407912&ftype=create',
+        'info_dict': {
+            'id': '1103407912',
+            'title': '【V2】（旧）',
+            'description': '',
+            'uploader': '晓月春日',
+            'uploader_id': '84912',
+            'timestamp': 1604905176,
+            'upload_date': '20201109',
+            'modified_timestamp': int,
+            'modified_date': str,
+            'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
+            'view_count': int,
+            'like_count': int,
+        },
+        'playlist_mincount': 22,
+    }, {
+        'url': 'https://www.bilibili.com/medialist/detail/ml1103407912',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        fid = self._match_id(url)
+
+        list_info = self._download_json(
+            f'https://api.bilibili.com/x/v3/fav/resource/list?media_id={fid}&pn=1&ps=20',
+            fid, note='Downloading favlist metadata')
+        if list_info['code'] == -403:
+            self.raise_login_required(msg='This is a private favorites list. You need to log in as its owner')
+
+        entries = self._get_entries(self._download_json(
+            f'https://api.bilibili.com/x/v3/fav/resource/ids?media_id={fid}',
+            fid, note='Download favlist entries'), 'data')
+
+        return self.playlist_result(entries, fid, **traverse_obj(list_info, ('data', 'info', {
+            'title': ('title', {str}),
+            'description': ('intro', {str}),
+            'uploader': ('upper', 'name', {str}),
+            'uploader_id': ('upper', 'mid', {str_or_none}),
+            'timestamp': ('ctime', {int_or_none}),
+            'modified_timestamp': ('mtime', {int_or_none}),
+            'thumbnail': ('cover', {url_or_none}),
+            'view_count': ('cnt_info', 'play', {int_or_none}),
+            'like_count': ('cnt_info', 'thumb_up', {int_or_none}),
+        })))
+
+
+class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
+    _TESTS = [{
+        'url': 'https://www.bilibili.com/watchlater/#/list',
+        'info_dict': {'id': 'watchlater'},
+        'playlist_mincount': 0,
+        'skip': 'login required',
+    }]
+
+    def _real_extract(self, url):
+        list_id = getattr(self._get_cookies(url).get('DedeUserID'), 'value', 'watchlater')
+        watchlater_info = self._download_json(
+            'https://api.bilibili.com/x/v2/history/toview/web?jsonp=jsonp', list_id)
+        if watchlater_info['code'] == -101:
+            self.raise_login_required(msg='You need to login to access your watchlater list')
+        entries = self._get_entries(watchlater_info, ('data', 'list'))
+        return self.playlist_result(entries, id=list_id, title='稍后再看')
+
+
+class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:medialist/play|list)/(?P<id>\w+)'
+    _TESTS = [{
+        'url': 'https://www.bilibili.com/list/1958703906?sid=547718',
+        'info_dict': {
+            'id': '5_547718',
+            'title': '直播回放',
+            'uploader': '靡烟miya',
+            'uploader_id': '1958703906',
+            'timestamp': 1637985853,
+            'upload_date': '20211127',
+        },
+        'playlist_mincount': 513,
+    }, {
+        'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
+        'info_dict': {
+            'id': '5_547718',
+        },
+        'playlist_mincount': 513,
+        'skip': 'redirect url',
+    }, {
+        'url': 'https://www.bilibili.com/list/ml1103407912',
+        'info_dict': {
+            'id': '3_1103407912',
+            'title': '【V2】（旧）',
+            'uploader': '晓月春日',
+            'uploader_id': '84912',
+            'timestamp': 1604905176,
+            'upload_date': '20201109',
+            'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
+        },
+        'playlist_mincount': 22,
+    }, {
+        'url': 'https://www.bilibili.com/medialist/play/ml1103407912',
+        'info_dict': {
+            'id': '3_1103407912',
+        },
+        'playlist_mincount': 22,
+        'skip': 'redirect url',
+    }, {
+        'url': 'https://www.bilibili.com/list/watchlater',
+        'info_dict': {'id': 'watchlater'},
+        'playlist_mincount': 0,
+        'skip': 'login required',
+    }, {
+        'url': 'https://www.bilibili.com/medialist/play/watchlater',
+        'info_dict': {'id': 'watchlater'},
+        'playlist_mincount': 0,
+        'skip': 'login required',
+    }]
+
+    def _extract_medialist(self, query, list_id):
+        for page_num in itertools.count(1):
+            page_data = self._download_json(
+                'https://api.bilibili.com/x/v2/medialist/resource/list',
+                list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}'
+            )['data']
+            yield from self._get_entries(page_data, 'media_list', ending_key='bv_id')
+            query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id'))
+            if not page_data.get('has_more', False):
+                break
+
+    def _real_extract(self, url):
+        list_id = self._match_id(url)
+        webpage = self._download_webpage(url, list_id)
+        initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
+        if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
+            error_code = traverse_obj(initial_state, ('error', 'trueCode', {int_or_none}))
+            error_message = traverse_obj(initial_state, ('error', 'message', {str_or_none}))
+            if error_code == -400 and list_id == 'watchlater':
+                self.raise_login_required('You need to login to access your watchlater playlist')
+            elif error_code == -403:
+                self.raise_login_required('This is a private playlist. You need to login as its owner')
+            elif error_code == 11010:
+                raise ExtractorError('Playlist is no longer available', expected=True)
+            raise ExtractorError(f'Could not access playlist: {error_code} {error_message}')
+
+        query = {
+            'ps': 20,
+            'with_current': False,
+            **traverse_obj(initial_state, {
+                'type': ('playlist', 'type', {int_or_none}),
+                'biz_id': ('playlist', 'id', {int_or_none}),
+                'tid': ('tid', {int_or_none}),
+                'sort_field': ('sortFiled', {int_or_none}),
+                'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}),
+            })
+        }
+        metadata = {
+            'id': f'{query["type"]}_{query["biz_id"]}',
+            **traverse_obj(initial_state, ('mediaListInfo', {
+                'title': ('title', {str}),
+                'uploader': ('upper', 'name', {str}),
+                'uploader_id': ('upper', 'mid', {str_or_none}),
+                'timestamp': ('ctime', {int_or_none}),
+                'thumbnail': ('cover', {url_or_none}),
+            })),
+        }
+        return self.playlist_result(self._extract_medialist(query, list_id), **metadata)


 class BilibiliCategoryIE(InfoExtractor):
    IE_NAME = 'Bilibili category extractor'
    _MAX_RESULTS = 1000000
-    _VALID_URL = r'https?://www\.bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
+    _VALID_URL = r'https?://(?:www\.)?bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
    _TESTS = [{
        'url': 'https://www.bilibili.com/v/kichiku/mad',
        'info_dict': {
@ -1399,7 +1660,7 @@ class BiliIntlSeriesIE(BiliIntlBaseIE):


 class BiliLiveIE(InfoExtractor):
-    _VALID_URL = r'https?://live.bilibili.com/(?:blanc/)?(?P<id>\d+)'
+    _VALID_URL = r'https?://live\.bilibili\.com/(?:blanc/)?(?P<id>\d+)'

    _TESTS = [{
        'url': 'https://live.bilibili.com/196',
--- a/yt_dlp/extractor/bpb.py
+++ b/yt_dlp/extractor/bpb.py
@ -1,56 +1,170 @@
+import functools
 import re

 from .common import InfoExtractor
 from ..utils import (
+    clean_html,
+    extract_attributes,
+    get_element_text_and_html_by_tag,
+    get_elements_by_class,
+    join_nonempty,
    js_to_json,
-    determine_ext,
+    mimetype2ext,
+    unified_strdate,
+    url_or_none,
+    urljoin,
+    variadic,
 )
+from ..utils.traversal import traverse_obj
+
+
+def html_get_element(tag=None, cls=None):
+    assert tag or cls, 'One of tag or class is required'
+
+    if cls:
+        func = functools.partial(get_elements_by_class, cls, tag=tag)
+    else:
+        func = functools.partial(get_element_text_and_html_by_tag, tag)
+
+    def html_get_element_wrapper(html):
+        return variadic(func(html))[0]
+
+    return html_get_element_wrapper


 class BpbIE(InfoExtractor):
    IE_DESC = 'Bundeszentrale für politische Bildung'
-    _VALID_URL = r'https?://(?:www\.)?bpb\.de/mediathek/(?P<id>[0-9]+)/'
+    _VALID_URL = r'https?://(?:www\.|m\.)?bpb\.de/(?:[^/?#]+/)*(?P<id>\d+)(?:[/?#]|$)'

-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr',
-        'md5': 'c4f84c8a8044ca9ff68bb8441d300b3f',
        'info_dict': {
            'id': '297',
            'ext': 'mp4',
+            'creator': 'Kooperative Berlin',
+            'description': 'md5:f4f75885ba009d3e2b156247a8941ce6',
+            'release_date': '20160115',
+            'series': 'Interview auf dem Geschichtsforum 1989 | 2009',
+            'tags': ['Friedliche Revolution', 'Erinnerungskultur', 'Vergangenheitspolitik', 'DDR 1949 - 1990', 'Freiheitsrecht', 'BStU', 'Deutschland'],
+            'thumbnail': 'https://www.bpb.de/cache/images/7/297_teaser_16x9_1240.jpg?8839D',
            'title': 'Joachim Gauck zu 1989 und die Erinnerung an die DDR',
-            'description': 'Joachim Gauck, erster Beauftragter für die Stasi-Unterlagen, spricht auf dem Geschichtsforum über die friedliche Revolution 1989 und eine "gewisse Traurigkeit" im Umgang mit der DDR-Vergangenheit.'
+            'uploader': 'Bundeszentrale für politische Bildung',
+        },
+    }, {
+        'url': 'https://www.bpb.de/mediathek/video/522184/krieg-flucht-und-falschmeldungen-wirstattdesinformation-2/',
+        'info_dict': {
+            'id': '522184',
+            'ext': 'mp4',
+            'creator': 'Institute for Strategic Dialogue Germany gGmbH (ISD)',
+            'description': 'md5:f83c795ff8f825a69456a9e51fc15903',
+            'release_date': '20230621',
+            'tags': ['Desinformation', 'Ukraine', 'Russland', 'Geflüchtete'],
+            'thumbnail': 'https://www.bpb.de/cache/images/4/522184_teaser_16x9_1240.png?EABFB',
+            'title': 'md5:9b01ccdbf58dbf9e5c9f6e771a803b1c',
+            'uploader': 'Bundeszentrale für politische Bildung',
+        },
+    }, {
+        'url': 'https://www.bpb.de/lernen/bewegtbild-und-politische-bildung/webvideo/518789/krieg-flucht-und-falschmeldungen-wirstattdesinformation-1/',
+        'info_dict': {
+            'id': '518789',
+            'ext': 'mp4',
+            'creator': 'Institute for Strategic Dialogue Germany gGmbH (ISD)',
+            'description': 'md5:85228aed433e84ff0ff9bc582abd4ea8',
+            'release_date': '20230302',
+            'tags': ['Desinformation', 'Ukraine', 'Russland', 'Geflüchtete'],
+            'thumbnail': 'https://www.bpb.de/cache/images/9/518789_teaser_16x9_1240.jpeg?56D0D',
+            'title': 'md5:3e956f264bb501f6383f10495a401da4',
+            'uploader': 'Bundeszentrale für politische Bildung',
+        },
+    }, {
+        'url': 'https://www.bpb.de/mediathek/podcasts/apuz-podcast/539727/apuz-20-china/',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.bpb.de/mediathek/audio/315813/folge-1-eine-einfuehrung/',
+        'info_dict': {
+            'id': '315813',
+            'ext': 'mp3',
+            'creator': 'Axel Schröder',
+            'description': 'md5:eda9d1af34e5912efef5baf54fba4427',
+            'release_date': '20200921',
+            'series': 'Auf Endlagersuche. Der deutsche Weg zu einem sicheren Atommülllager',
+            'tags': ['Atomenergie', 'Endlager', 'hoch-radioaktiver Abfall', 'Endlagersuche', 'Atommüll', 'Atomendlager', 'Gorleben', 'Deutschland'],
+            'thumbnail': 'https://www.bpb.de/cache/images/3/315813_teaser_16x9_1240.png?92A94',
+            'title': 'Folge 1: Eine Einführung',
+            'uploader': 'Bundeszentrale für politische Bildung',
+        },
+    }, {
+        'url': 'https://www.bpb.de/517806/die-weltanschauung-der-neuen-rechten/',
+        'info_dict': {
+            'id': '517806',
+            'ext': 'mp3',
+            'creator': 'Bundeszentrale für politische Bildung',
+            'description': 'md5:594689600e919912aade0b2871cc3fed',
+            'release_date': '20230127',
+            'series': 'Vorträge des Fachtags "Modernisierer. Grenzgänger. Anstifter. Sechs Jahrzehnte \'Neue Rechte\'"',
+            'tags': ['Rechtsextremismus', 'Konservatismus', 'Konservativismus', 'neue Rechte', 'Rechtspopulismus', 'Schnellroda', 'Deutschland'],
+            'thumbnail': 'https://www.bpb.de/cache/images/6/517806_teaser_16x9_1240.png?7A7A0',
+            'title': 'Die Weltanschauung der "Neuen Rechten"',
+            'uploader': 'Bundeszentrale für politische Bildung',
+        },
+    }, {
+        'url': 'https://www.bpb.de/mediathek/reihen/zahlen-und-fakten-soziale-situation-filme/520153/zahlen-und-fakten-die-soziale-situation-in-deutschland-migration/',
+        'only_matching': True,
+    }]
+
+    _TITLE_RE = re.compile('(?P<title>[^<]*)<[^>]+>(?P<series>[^<]*)')
+
+    def _parse_vue_attributes(self, name, string, video_id):
+        attributes = extract_attributes(self._search_regex(rf'(<{name}(?:"[^"]*?"|[^>])*>)', string, name))
+
+        for key, value in attributes.items():
+            if key.startswith(':'):
+                attributes[key] = self._parse_json(value, video_id, transform_source=js_to_json, fatal=False)
+
+        return attributes
+
+    @staticmethod
+    def _process_source(source):
+        url = url_or_none(source['src'])
+        if not url:
+            return None
+
+        source_type = source.get('type', '')
+        extension = mimetype2ext(source_type)
+        is_video = source_type.startswith('video')
+        note = url.rpartition('.')[0].rpartition('_')[2] if is_video else None
+
+        return {
+            'url': url,
+            'ext': extension,
+            'vcodec': None if is_video else 'none',
+            'quality': 10 if note == 'high' else 0,
+            'format_note': note,
+            'format_id': join_nonempty(extension, note),
        }
-    }

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)

-        title = self._html_search_regex(
-            r'<h2 class="white">(.*?)</h2>', webpage, 'title')
-        video_info_dicts = re.findall(
-            r"({\s*src\s*:\s*'https?://film\.bpb\.de/[^}]+})", webpage)
-
-        formats = []
-        for video_info in video_info_dicts:
-            video_info = self._parse_json(
-                video_info, video_id, transform_source=js_to_json, fatal=False)
-            if not video_info:
-                continue
-            video_url = video_info.get('src')
-            if not video_url:
-                continue
-            quality = 'high' if '_high' in video_url else 'low'
-            formats.append({
-                'url': video_url,
-                'quality': 10 if quality == 'high' else 0,
-                'format_note': quality,
-                'format_id': '%s-%s' % (quality, determine_ext(video_url)),
-            })
+        title_result = traverse_obj(webpage, ({html_get_element(cls='opening-header__title')}, {self._TITLE_RE.match}))
+        json_lds = list(self._yield_json_ld(webpage, video_id, fatal=False))

        return {
            'id': video_id,
-            'formats': formats,
-            'title': title,
-            'description': self._og_search_description(webpage),
+            'title': traverse_obj(title_result, ('title', {str.strip})) or None,
+            # This metadata could be interpreted otherwise, but it fits "series" the most
+            'series': traverse_obj(title_result, ('series', {str.strip})) or None,
+            'description': join_nonempty(*traverse_obj(webpage, [(
+                {html_get_element(cls='opening-intro')},
+                [{html_get_element(tag='bpb-accordion-item')}, {html_get_element(cls='text-content')}],
+            ), {clean_html}]), delim='\n\n') or None,
+            'creator': self._html_search_meta('author', webpage),
+            'uploader': self._html_search_meta('publisher', webpage),
+            'release_date': unified_strdate(self._html_search_meta('date', webpage)),
+            'tags': traverse_obj(json_lds, (..., 'keywords', {lambda x: x.split(',')}, ...)),
+            **traverse_obj(self._parse_vue_attributes('bpb-player', webpage, video_id), {
+                'formats': (':sources', ..., {self._process_source}),
+                'thumbnail': ('poster', {lambda x: urljoin(url, x)}),
+            }),
        }
--- a/yt_dlp/extractor/canal1.py
+++ b/yt_dlp/extractor/canal1.py
@ -0,0 +1,39 @@
+from .common import InfoExtractor
+
+
+class Canal1IE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.|noticias\.)?canal1\.com\.co/(?:[^?#&])+/(?P<id>[\w-]+)'
+
+    _TESTS = [{
+        'url': 'https://canal1.com.co/noticias/napa-i-una-cadena-de-produccion-de-arroz-que-se-quedo-en-veremos-y-abandonada-en-el-departamento-del-choco/',
+        'info_dict': {
+            'id': '63b39f6b354977084b85ab54',
+            'display_id': 'napa-i-una-cadena-de-produccion-de-arroz-que-se-quedo-en-veremos-y-abandonada-en-el-departamento-del-choco',
+            'title': 'Ñapa I Una cadena de producción de arroz que se quedó en veremos y abandonada en el departamento del Chocó',
+            'description': 'md5:bc49c6d64d20610ea1e7daf079a0d013',
+            'thumbnail': r're:^https?://[^?#]+63b39f6b354977084b85ab54',
+            'ext': 'mp4',
+        },
+    }, {
+        'url': 'https://noticias.canal1.com.co/noticias/tres-i-el-triste-record-que-impuso-elon-musk-el-dueno-de-tesla-y-de-twitter/',
+        'info_dict': {
+            'id': '63b39e93f5fd223aa32250fb',
+            'display_id': 'tres-i-el-triste-record-que-impuso-elon-musk-el-dueno-de-tesla-y-de-twitter',
+            'title': 'Tres I El triste récord que impuso Elon Musk, el dueño de Tesla y de Twitter',
+            'description': 'md5:d9f691f131a21ce6767ca6c05d17d791',
+            'thumbnail': r're:^https?://[^?#]+63b39e93f5fd223aa32250fb',
+            'ext': 'mp4',
+        },
+    }, {
+        # Geo-restricted to Colombia
+        'url': 'https://canal1.com.co/programas/guerreros-canal-1/video-inedito-guerreros-despedida-kewin-zarate/',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+
+        return self.url_result(
+            self._search_regex(r'"embedUrl"\s*:\s*"([^"]+)', webpage, 'embed url'),
+            display_id=display_id, url_transparent=True)
--- a/yt_dlp/extractor/caracoltv.py
+++ b/yt_dlp/extractor/caracoltv.py
@ -0,0 +1,136 @@
+import base64
+import json
+import uuid
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    js_to_json,
+    traverse_obj,
+    urljoin,
+)
+
+
+class CaracolTvPlayIE(InfoExtractor):
+    _VALID_URL = r'https?://play\.caracoltv\.com/videoDetails/(?P<id>[^/?#]+)'
+    _NETRC_MACHINE = 'caracoltv-play'
+
+    _TESTS = [{
+        'url': 'https://play.caracoltv.com/videoDetails/OTo4NGFmNjUwOWQ2ZmM0NTg2YWRiOWU0MGNhOWViOWJkYQ==',
+        'info_dict': {
+            'id': 'OTo4NGFmNjUwOWQ2ZmM0NTg2YWRiOWU0MGNhOWViOWJkYQ==',
+            'title': 'La teoría del promedio',
+            'description': 'md5:1cdd6d2c13f19ef0d9649ab81a023ac3',
+        },
+        'playlist_count': 6,
+    }, {
+        'url': 'https://play.caracoltv.com/videoDetails/OTo3OWM4ZTliYzQxMmM0MTMxYTk4Mjk2YjdjNGQ4NGRkOQ==/ella?season=0',
+        'info_dict': {
+            'id': 'OTo3OWM4ZTliYzQxMmM0MTMxYTk4Mjk2YjdjNGQ4NGRkOQ==',
+            'title': 'Ella',
+            'description': 'md5:a639b1feb5ddcc0cff92a489b4e544b8',
+        },
+        'playlist_count': 10,
+    }, {
+        'url': 'https://play.caracoltv.com/videoDetails/OTpiYTY1YTVmOTI5MzI0ZWJhOGZiY2Y3MmRlOWZlYmJkOA==/la-vuelta-al-mundo-en-80-risas-2022?season=0',
+        'info_dict': {
+            'id': 'OTpiYTY1YTVmOTI5MzI0ZWJhOGZiY2Y3MmRlOWZlYmJkOA==',
+            'title': 'La vuelta al mundo en 80 risas 2022',
+            'description': 'md5:e97aac36106e5c37ebf947b3350106a4',
+        },
+        'playlist_count': 17,
+    }, {
+        'url': 'https://play.caracoltv.com/videoDetails/MzoxX3BwbjRmNjB1',
+        'only_matching': True,
+    }]
+
+    _USER_TOKEN = None
+
+    def _extract_app_token(self, webpage):
+        config_js_path = self._search_regex(
+            r'<script[^>]+src\s*=\s*"([^"]+coreConfig.js[^"]+)', webpage, 'config js url', fatal=False)
+
+        mediation_config = {} if not config_js_path else self._search_json(
+            r'mediation\s*:', self._download_webpage(
+                urljoin('https://play.caracoltv.com/', config_js_path), None, fatal=False, note='Extracting JS config'),
+            'mediation_config', None, transform_source=js_to_json, fatal=False)
+
+        key = traverse_obj(
+            mediation_config, ('live', 'key')) or '795cd9c089a1fc48094524a5eba85a3fca1331817c802f601735907c8bbb4f50'
+        secret = traverse_obj(
+            mediation_config, ('live', 'secret')) or '64dec00a6989ba83d087621465b5e5d38bdac22033b0613b659c442c78976fa0'
+
+        return base64.b64encode(f'{key}:{secret}'.encode()).decode()
+
+    def _perform_login(self, email, password):
+        webpage = self._download_webpage('https://play.caracoltv.com/', None, fatal=False)
+        app_token = self._extract_app_token(webpage)
+
+        bearer_token = self._download_json(
+            'https://eu-gateway.inmobly.com/applications/oauth', None, data=b'', note='Retrieving bearer token',
+            headers={'Authorization': f'Basic {app_token}'})['token']
+
+        self._USER_TOKEN = self._download_json(
+            'https://eu-gateway.inmobly.com/user/login', None, note='Performing login', headers={
+                'Content-Type': 'application/json',
+                'Authorization': f'Bearer {bearer_token}',
+            }, data=json.dumps({
+                'device_data': {
+                    'device_id': str(uuid.uuid4()),
+                    'device_token': '',
+                    'device_type': 'web'
+                },
+                'login_data': {
+                    'enabled': True,
+                    'email': email,
+                    'password': password,
+                }
+            }).encode())['user_token']
+
+    def _extract_video(self, video_data, series_id=None, season_id=None, season_number=None):
+        formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_data['stream_url'], series_id, 'mp4')
+
+        return {
+            'id': video_data['id'],
+            'title': video_data.get('name'),
+            'description': video_data.get('description'),
+            'formats': formats,
+            'subtitles': subtitles,
+            'thumbnails': traverse_obj(
+                video_data, ('extra_thumbs', ..., {'url': 'thumb_url', 'height': 'height', 'width': 'width'})),
+            'series_id': series_id,
+            'season_id': season_id,
+            'season_number': int_or_none(season_number),
+            'episode_number': int_or_none(video_data.get('item_order')),
+            'is_live': video_data.get('entry_type') == 3,
+        }
+
+    def _extract_series_seasons(self, seasons, series_id):
+        for season in seasons:
+            api_response = self._download_json(
+                'https://eu-gateway.inmobly.com/feed', series_id, query={'season_id': season['id']},
+                headers={'Authorization': f'Bearer {self._USER_TOKEN}'})
+
+            season_number = season.get('order')
+            for episode in api_response['items']:
+                yield self._extract_video(episode, series_id, season['id'], season_number)
+
+    def _real_extract(self, url):
+        series_id = self._match_id(url)
+
+        if self._USER_TOKEN is None:
+            self._perform_login('guest@inmobly.com', 'Test@gus1')
+
+        api_response = self._download_json(
+            'https://eu-gateway.inmobly.com/feed', series_id, query={'include_ids': series_id},
+            headers={'Authorization': f'Bearer {self._USER_TOKEN}'})['items'][0]
+
+        if not api_response.get('seasons'):
+            return self._extract_video(api_response)
+
+        return self.playlist_result(
+            self._extract_series_seasons(api_response['seasons'], series_id),
+            series_id, **traverse_obj(api_response, {
+                'title': 'name',
+                'description': 'description',
+            }))
--- a/yt_dlp/extractor/cbc.py
+++ b/yt_dlp/extractor/cbc.py
@ -339,12 +339,12 @@ class CBCGemIE(InfoExtractor):
        data = json.dumps({'jwt': sig}).encode()
        headers = {'content-type': 'application/json', 'ott-device-type': 'web'}
        resp = self._download_json('https://services.radio-canada.ca/ott/cbc-api/v2/token',
-                                   None, data=data, headers=headers)
+                                   None, data=data, headers=headers, expected_status=426)
        cbc_access_token = resp['accessToken']

        headers = {'content-type': 'application/json', 'ott-device-type': 'web', 'ott-access-token': cbc_access_token}
        resp = self._download_json('https://services.radio-canada.ca/ott/cbc-api/v2/profile',
-                                   None, headers=headers)
+                                   None, headers=headers, expected_status=426)
        return resp['claimsToken']

    def _get_claims_token_expiry(self):
--- a/yt_dlp/extractor/ccc.py
+++ b/yt_dlp/extractor/ccc.py
@ -90,10 +90,17 @@ class CCCPlaylistIE(InfoExtractor):
            'id': '30c3',
        },
        'playlist_count': 135,
+    }, {
+        'url': 'https://media.ccc.de/c/DS2023',
+        'info_dict': {
+            'title': 'Datenspuren 2023',
+            'id': 'DS2023',
+        },
+        'playlist_count': 37
    }]

    def _real_extract(self, url):
-        playlist_id = self._match_id(url).lower()
+        playlist_id = self._match_id(url)

        conf = self._download_json(
            'https://media.ccc.de/public/conferences/' + playlist_id,
--- a/yt_dlp/extractor/douyutv.py
+++ b/yt_dlp/extractor/douyutv.py
@ -1,31 +1,72 @@
 import time
 import hashlib
-import re
 import urllib
+import uuid

 from .common import InfoExtractor
+from .openload import PhantomJSwrapper
 from ..utils import (
    ExtractorError,
+    UserNotLive,
+    determine_ext,
+    int_or_none,
+    js_to_json,
+    parse_resolution,
+    str_or_none,
+    traverse_obj,
    unescapeHTML,
-    unified_strdate,
+    url_or_none,
+    urlencode_postdata,
    urljoin,
 )


-class DouyuTVIE(InfoExtractor):
-    IE_DESC = '斗鱼'
+class DouyuBaseIE(InfoExtractor):
+    def _download_cryptojs_md5(self, video_id):
+        for url in [
+            'https://cdnjs.cloudflare.com/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
+            'https://cdn.bootcdn.net/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
+        ]:
+            js_code = self._download_webpage(
+                url, video_id, note='Downloading signing dependency', fatal=False)
+            if js_code:
+                self.cache.store('douyu', 'crypto-js-md5', js_code)
+                return js_code
+        raise ExtractorError('Unable to download JS dependency (crypto-js/md5)')
+
+    def _get_cryptojs_md5(self, video_id):
+        return self.cache.load('douyu', 'crypto-js-md5') or self._download_cryptojs_md5(video_id)
+
+    def _calc_sign(self, sign_func, video_id, a):
+        b = uuid.uuid4().hex
+        c = round(time.time())
+        js_script = f'{self._get_cryptojs_md5(video_id)};{sign_func};console.log(ub98484234("{a}","{b}","{c}"))'
+        phantom = PhantomJSwrapper(self)
+        result = phantom.execute(js_script, video_id,
+                                 note='Executing JS signing script').strip()
+        return {i: v[0] for i, v in urllib.parse.parse_qs(result).items()}
+
+    def _search_js_sign_func(self, webpage, fatal=True):
+        # The greedy look-behind ensures last possible script tag is matched
+        return self._search_regex(
+            r'(?:<script.*)?<script[^>]*>(.*?ub98484234.*?)</script>', webpage, 'JS sign func', fatal=fatal)
+
+
+class DouyuTVIE(DouyuBaseIE):
+    IE_DESC = '斗鱼直播'
    _VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(topic/\w+\?rid=|(?:[^/]+/))*(?P<id>[A-Za-z0-9]+)'
    _TESTS = [{
-        'url': 'http://www.douyutv.com/iseven',
+        'url': 'https://www.douyu.com/pigff',
        'info_dict': {
-            'id': '17732',
-            'display_id': 'iseven',
-            'ext': 'flv',
-            'title': 're:^清晨醒脑！根本停不下来！ [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
-            'description': r're:.*m7show@163\.com.*',
-            'thumbnail': r're:^https?://.*\.png',
-            'uploader': '7师傅',
+            'id': '24422',
+            'display_id': 'pigff',
+            'ext': 'mp4',
+            'title': 're:^【PIGFF】.* [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+            'description': r'≥15级牌子看鱼吧置顶帖进粉丝vx群',
+            'thumbnail': str,
+            'uploader': 'pigff',
            'is_live': True,
+            'live_status': 'is_live',
        },
        'params': {
            'skip_download': True,
@ -85,15 +126,43 @@ class DouyuTVIE(InfoExtractor):
        'only_matching': True,
    }]

+    def _get_sign_func(self, room_id, video_id):
+        return self._download_json(
+            f'https://www.douyu.com/swf_api/homeH5Enc?rids={room_id}', video_id,
+            note='Getting signing script')['data'][f'room{room_id}']
+
+    def _extract_stream_formats(self, stream_formats):
+        formats = []
+        for stream_info in traverse_obj(stream_formats, (..., 'data')):
+            stream_url = urljoin(
+                traverse_obj(stream_info, 'rtmp_url'), traverse_obj(stream_info, 'rtmp_live'))
+            if stream_url:
+                rate_id = traverse_obj(stream_info, ('rate', {int_or_none}))
+                rate_info = traverse_obj(stream_info, ('multirates', lambda _, v: v['rate'] == rate_id), get_all=False)
+                ext = determine_ext(stream_url)
+                formats.append({
+                    'url': stream_url,
+                    'format_id': str_or_none(rate_id),
+                    'ext': 'mp4' if ext == 'm3u8' else ext,
+                    'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
+                    'quality': rate_id % -10000 if rate_id is not None else None,
+                    **traverse_obj(rate_info, {
+                        'format': ('name', {str_or_none}),
+                        'tbr': ('bit', {int_or_none}),
+                    }),
+                })
+        return formats
+
    def _real_extract(self, url):
        video_id = self._match_id(url)

-        if video_id.isdigit():
-            room_id = video_id
-        else:
-            page = self._download_webpage(url, video_id)
-            room_id = self._html_search_regex(
-                r'"room_id\\?"\s*:\s*(\d+),', page, 'room id')
+        webpage = self._download_webpage(url, video_id)
+        room_id = self._search_regex(r'\$ROOM\.room_id\s*=\s*(\d+)', webpage, 'room id')
+
+        if self._search_regex(r'"videoLoop"\s*:\s*(\d+)', webpage, 'loop', default='') == '1':
+            raise UserNotLive('The channel is auto-playing VODs', video_id=video_id)
+        if self._search_regex(r'\$ROOM\.show_status\s*=\s*(\d+)', webpage, 'status', default='') == '2':
+            raise UserNotLive(video_id=video_id)

        # Grab metadata from API
        params = {
@ -102,110 +171,136 @@ class DouyuTVIE(InfoExtractor):
            'time': int(time.time()),
        }
        params['auth'] = hashlib.md5(
-            f'room/{video_id}?{urllib.parse.urlencode(params)}zNzMV1y4EMxOHS6I5WKm'.encode()).hexdigest()
-        room = self._download_json(
+            f'room/{room_id}?{urllib.parse.urlencode(params)}zNzMV1y4EMxOHS6I5WKm'.encode()).hexdigest()
+        room = traverse_obj(self._download_json(
            f'http://www.douyutv.com/api/v1/room/{room_id}', video_id,
-            note='Downloading room info', query=params)['data']
+            note='Downloading room info', query=params, fatal=False), 'data')

        # 1 = live, 2 = offline
-        if room.get('show_status') == '2':
-            raise ExtractorError('Live stream is offline', expected=True)
+        if traverse_obj(room, 'show_status') == '2':
+            raise UserNotLive(video_id=video_id)

-        video_url = urljoin('https://hls3-akm.douyucdn.cn/', self._search_regex(r'(live/.*)', room['hls_url'], 'URL'))
-        formats, subs = self._extract_m3u8_formats_and_subtitles(video_url, room_id)
+        js_sign_func = self._search_js_sign_func(webpage, fatal=False) or self._get_sign_func(room_id, video_id)
+        form_data = {
+            'rate': 0,
+            **self._calc_sign(js_sign_func, video_id, room_id),
+        }
+        stream_formats = [self._download_json(
+            f'https://www.douyu.com/lapi/live/getH5Play/{room_id}',
+            video_id, note="Downloading livestream format",
+            data=urlencode_postdata(form_data))]

-        title = unescapeHTML(room['room_name'])
-        description = room.get('show_details')
-        thumbnail = room.get('room_src')
-        uploader = room.get('nickname')
+        for rate_id in traverse_obj(stream_formats[0], ('data', 'multirates', ..., 'rate')):
+            if rate_id != traverse_obj(stream_formats[0], ('data', 'rate')):
+                form_data['rate'] = rate_id
+                stream_formats.append(self._download_json(
+                    f'https://www.douyu.com/lapi/live/getH5Play/{room_id}',
+                    video_id, note=f'Downloading livestream format {rate_id}',
+                    data=urlencode_postdata(form_data)))

        return {
            'id': room_id,
-            'display_id': video_id,
-            'title': title,
-            'description': description,
-            'thumbnail': thumbnail,
-            'uploader': uploader,
+            'formats': self._extract_stream_formats(stream_formats),
            'is_live': True,
-            'subtitles': subs,
-            'formats': formats,
+            **traverse_obj(room, {
+                'display_id': ('url', {str}, {lambda i: i[1:]}),
+                'title': ('room_name', {unescapeHTML}),
+                'description': ('show_details', {str}),
+                'uploader': ('nickname', {str}),
+                'thumbnail': ('room_src', {url_or_none}),
+            })
        }


-class DouyuShowIE(InfoExtractor):
+class DouyuShowIE(DouyuBaseIE):
    _VALID_URL = r'https?://v(?:mobile)?\.douyu\.com/show/(?P<id>[0-9a-zA-Z]+)'

    _TESTS = [{
-        'url': 'https://v.douyu.com/show/rjNBdvnVXNzvE2yw',
-        'md5': '0c2cfd068ee2afe657801269b2d86214',
+        'url': 'https://v.douyu.com/show/mPyq7oVNe5Yv1gLY',
        'info_dict': {
-            'id': 'rjNBdvnVXNzvE2yw',
+            'id': 'mPyq7oVNe5Yv1gLY',
            'ext': 'mp4',
-            'title': '陈一发儿：砒霜 我有个室友系列！04-01 22点场',
-            'duration': 7150.08,
-            'thumbnail': r're:^https?://.*\.jpg$',
-            'uploader': '陈一发儿',
-            'uploader_id': 'XrZwYelr5wbK',
-            'uploader_url': 'https://v.douyu.com/author/XrZwYelr5wbK',
-            'upload_date': '20170402',
+            'title': '四川人小时候的味道“蒜苗回锅肉”，传统菜不能丢，要常做来吃',
+            'duration': 633,
+            'thumbnail': str,
+            'uploader': '美食作家王刚V',
+            'uploader_id': 'OVAO4NVx1m7Q',
+            'timestamp': 1661850002,
+            'upload_date': '20220830',
+            'view_count': int,
+            'tags': ['美食', '美食综合'],
        },
    }, {
        'url': 'https://vmobile.douyu.com/show/rjNBdvnVXNzvE2yw',
        'only_matching': True,
    }]

+    _FORMATS = {
+        'super': '原画',
+        'high': '超清',
+        'normal': '高清',
+    }
+
+    _QUALITIES = {
+        'super': -1,
+        'high': -2,
+        'normal': -3,
+    }
+
+    _RESOLUTIONS = {
+        'super': '1920x1080',
+        'high': '1280x720',
+        'normal': '852x480',
+    }
+
    def _real_extract(self, url):
        url = url.replace('vmobile.', 'v.')
        video_id = self._match_id(url)

        webpage = self._download_webpage(url, video_id)

-        room_info = self._parse_json(self._search_regex(
-            r'var\s+\$ROOM\s*=\s*({.+});', webpage, 'room info'), video_id)
+        video_info = self._search_json(
+            r'<script>\s*window\.\$DATA\s*=', webpage,
+            'video info', video_id, transform_source=js_to_json)

-        video_info = None
+        js_sign_func = self._search_js_sign_func(webpage)
+        form_data = {
+            'vid': video_id,
+            **self._calc_sign(js_sign_func, video_id, video_info['ROOM']['point_id']),
+        }
+        url_info = self._download_json(
+            'https://v.douyu.com/api/stream/getStreamUrl', video_id,
+            data=urlencode_postdata(form_data), note="Downloading video formats")

-        for trial in range(5):
-            # Sometimes Douyu rejects our request. Let's try it more times
-            try:
-                video_info = self._download_json(
-                    'https://vmobile.douyu.com/video/getInfo', video_id,
-                    query={'vid': video_id},
-                    headers={
-                        'Referer': url,
-                        'x-requested-with': 'XMLHttpRequest',
-                    })
-                break
-            except ExtractorError:
-                self._sleep(1, video_id)
-
-        if not video_info:
-            raise ExtractorError('Can\'t fetch video info')
-
-        formats = self._extract_m3u8_formats(
-            video_info['data']['video_url'], video_id,
-            entry_protocol='m3u8_native', ext='mp4')
-
-        upload_date = unified_strdate(self._html_search_regex(
-            r'<em>上传时间：</em><span>([^<]+)</span>', webpage,
-            'upload date', fatal=False))
-
-        uploader = uploader_id = uploader_url = None
-        mobj = re.search(
-            r'(?m)<a[^>]+href="/author/([0-9a-zA-Z]+)".+?<strong[^>]+title="([^"]+)"',
-            webpage)
-        if mobj:
-            uploader_id, uploader = mobj.groups()
-            uploader_url = urljoin(url, '/author/' + uploader_id)
+        formats = []
+        for name, url in traverse_obj(url_info, ('data', 'thumb_video', {dict.items}, ...)):
+            video_url = traverse_obj(url, ('url', {url_or_none}))
+            if video_url:
+                ext = determine_ext(video_url)
+                formats.append({
+                    'format': self._FORMATS.get(name),
+                    'format_id': name,
+                    'url': video_url,
+                    'quality': self._QUALITIES.get(name),
+                    'ext': 'mp4' if ext == 'm3u8' else ext,
+                    'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
+                    **parse_resolution(self._RESOLUTIONS.get(name))
+                })
+            else:
+                self.to_screen(
+                    f'"{self._FORMATS.get(name, name)}" format may require logging in. {self._login_hint()}')

        return {
            'id': video_id,
-            'title': room_info['name'],
            'formats': formats,
-            'duration': room_info.get('duration'),
-            'thumbnail': room_info.get('pic'),
-            'upload_date': upload_date,
-            'uploader': uploader,
-            'uploader_id': uploader_id,
-            'uploader_url': uploader_url,
+            **traverse_obj(video_info, ('DATA', {
+                'title': ('content', 'title', {str}),
+                'uploader': ('content', 'author', {str}),
+                'uploader_id': ('content', 'up_id', {str_or_none}),
+                'duration': ('content', 'video_duration', {int_or_none}),
+                'thumbnail': ('content', 'video_pic', {url_or_none}),
+                'timestamp': ('content', 'create_time', {int_or_none}),
+                'view_count': ('content', 'view_num', {int_or_none}),
+                'tags': ('videoTag', ..., 'tagName', {str}),
+            }))
        }
--- a/yt_dlp/extractor/eplus.py
+++ b/yt_dlp/extractor/eplus.py
@ -0,0 +1,96 @@
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    try_call,
+    unified_timestamp,
+)
+
+
+class EplusIbIE(InfoExtractor):
+    IE_NAME = 'eplus:inbound'
+    IE_DESC = 'e+ (イープラス) overseas'
+    _VALID_URL = r'https?://live\.eplus\.jp/ex/player\?ib=(?P<id>(?:\w|%2B|%2F){86}%3D%3D)'
+    _TESTS = [{
+        'url': 'https://live.eplus.jp/ex/player?ib=YEFxb3Vyc2Dombnjg7blkrLlrablnJLjgrnjgq%2Fjg7zjg6vjgqLjgqTjg4njg6vlkIzlpb3kvJpgTGllbGxhIQ%3D%3D',
+        'info_dict': {
+            'id': '354502-0001-002',
+            'title': 'LoveLive!Series Presents COUNTDOWN LoveLive! 2021→2022～LIVE with a smile!～【Streaming+(配信)】',
+            'live_status': 'was_live',
+            'release_date': '20211231',
+            'release_timestamp': 1640952000,
+            'description': str,
+        },
+        'params': {
+            'skip_download': True,
+            'ignore_no_formats_error': True,
+        },
+        'expected_warnings': [
+            'Could not find the playlist URL. This event may not be accessible',
+            'No video formats found!',
+            'Requested format is not available',
+        ],
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        data_json = self._search_json(r'<script>\s*var app\s*=', webpage, 'data json', video_id)
+
+        delivery_status = data_json.get('delivery_status')
+        archive_mode = data_json.get('archive_mode')
+        release_timestamp = try_call(lambda: unified_timestamp(data_json['event_datetime']) - 32400)
+        release_timestamp_str = data_json.get('event_datetime_text')  # JST
+
+        self.write_debug(f'delivery_status = {delivery_status}, archive_mode = {archive_mode}')
+
+        if delivery_status == 'PREPARING':
+            live_status = 'is_upcoming'
+        elif delivery_status == 'STARTED':
+            live_status = 'is_live'
+        elif delivery_status == 'STOPPED':
+            if archive_mode != 'ON':
+                raise ExtractorError(
+                    'This event has ended and there is no archive for this event', expected=True)
+            live_status = 'post_live'
+        elif delivery_status == 'WAIT_CONFIRM_ARCHIVED':
+            live_status = 'post_live'
+        elif delivery_status == 'CONFIRMED_ARCHIVE':
+            live_status = 'was_live'
+        else:
+            self.report_warning(f'Unknown delivery_status {delivery_status}, treat it as a live')
+            live_status = 'is_live'
+
+        formats = []
+
+        m3u8_playlist_urls = self._search_json(
+            r'var listChannels\s*=', webpage, 'hls URLs', video_id, contains_pattern=r'\[.+\]', default=[])
+        if not m3u8_playlist_urls:
+            if live_status == 'is_upcoming':
+                self.raise_no_formats(
+                    f'Could not find the playlist URL. This live event will begin at {release_timestamp_str} JST', expected=True)
+            else:
+                self.raise_no_formats(
+                    'Could not find the playlist URL. This event may not be accessible', expected=True)
+        elif live_status == 'is_upcoming':
+            self.raise_no_formats(f'This live event will begin at {release_timestamp_str} JST', expected=True)
+        elif live_status == 'post_live':
+            self.raise_no_formats('This event has ended, and the archive will be available shortly', expected=True)
+        else:
+            for m3u8_playlist_url in m3u8_playlist_urls:
+                formats.extend(self._extract_m3u8_formats(m3u8_playlist_url, video_id))
+            # FIXME: HTTP request headers need to be updated to continue download
+            warning = 'Due to technical limitations, the download will be interrupted after one hour'
+            if live_status == 'is_live':
+                self.report_warning(warning)
+            elif live_status == 'was_live':
+                self.report_warning(f'{warning}. You can restart to continue the download')
+
+        return {
+            'id': data_json['app_id'],
+            'title': data_json.get('app_name'),
+            'formats': formats,
+            'live_status': live_status,
+            'description': data_json.get('content'),
+            'release_timestamp': release_timestamp,
+        }
--- a/yt_dlp/extractor/expressen.py
+++ b/yt_dlp/extractor/expressen.py
@ -11,8 +11,8 @@ class ExpressenIE(InfoExtractor):
    _VALID_URL = r'''(?x)
                    https?://
                        (?:www\.)?(?:expressen|di)\.se/
-                        (?:(?:tvspelare/video|videoplayer/embed)/)?
-                        tv/(?:[^/]+/)*
+                        (?:(?:tvspelare/video|video-?player/embed)/)?
+                        (?:tv|nyheter)/(?:[^/?#]+/)*
                        (?P<id>[^/?#&]+)
                    '''
    _EMBED_REGEX = [r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?(?:expressen|di)\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1']
@ -42,6 +42,12 @@ class ExpressenIE(InfoExtractor):
    }, {
        'url': 'https://www.di.se/videoplayer/embed/tv/ditv/borsmorgon/implantica-rusar-70--under-borspremiaren-hor-styrelsemedlemmen/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
        'only_matching': True,
+    }, {
+        'url': 'https://www.expressen.se/video-player/embed/tv/nyheter/ekero-fodda-olof-gustafsson-forvaltar-knarkbaronen-pablo-escobars-namn',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.expressen.se/nyheter/efter-egna-telefonbluffen-escobar-stammer-klarna/',
+        'only_matching': True,
    }]

    def _real_extract(self, url):
--- a/yt_dlp/extractor/facebook.py
+++ b/yt_dlp/extractor/facebook.py
@ -74,6 +74,22 @@ class FacebookIE(InfoExtractor):
    _VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary'

    _TESTS = [{
+        'url': 'https://www.facebook.com/radiokicksfm/videos/3676516585958356/',
+        'info_dict': {
+            'id': '3676516585958356',
+            'ext': 'mp4',
+            'title': 'dr Adam Przygoda',
+            'description': 'md5:34675bda53336b1d16400265c2bb9b3b',
+            'uploader': 'RADIO KICKS FM',
+            'upload_date': '20230818',
+            'timestamp': 1692346159,
+            'thumbnail': r're:^https?://.*',
+            'uploader_id': '100063551323670',
+            'duration': 3132.184,
+            'view_count': int,
+            'concurrent_view_count': 0,
+        },
+    }, {
        'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
        'md5': '6a40d33c0eccbb1af76cf0485a052659',
        'info_dict': {
@ -97,7 +113,7 @@ class FacebookIE(InfoExtractor):
            'upload_date': '20140506',
            'timestamp': 1399398998,
            'thumbnail': r're:^https?://.*',
-            'uploader_id': 'pfbid04scW44U4P9iTyLZAGy8y8W3pR3i2VugvHCimiRudUAVbN3MPp9eXBaYFcgVworZwl',
+            'uploader_id': 'pfbid028wxorhX2ErLFJ578N6P3crHD3PHmXTCqCvfBpsnbSLmbokwSY75p5hWBjHGkG4zxl',
            'duration': 131.03,
            'concurrent_view_count': int,
        },
@ -179,7 +195,7 @@ class FacebookIE(InfoExtractor):
            'timestamp': 1486648217,
            'upload_date': '20170209',
            'uploader': 'Yaroslav Korpan',
-            'uploader_id': 'pfbid029y8j22EwH3ikeqgH3SEP9G3CAi9kmWKgXJJG9s5geV7mo3J2bvURqHCdgucRgAyhl',
+            'uploader_id': 'pfbid06AScABAWcW91qpiuGrLt99Ef9tvwHoXP6t8KeFYEqkSfreMtfa9nTveh8b2ZEVSWl',
            'concurrent_view_count': int,
            'thumbnail': r're:^https?://.*',
            'view_count': int,
@ -274,7 +290,7 @@ class FacebookIE(InfoExtractor):
            'title': 'Josef',
            'thumbnail': r're:^https?://.*',
            'concurrent_view_count': int,
-            'uploader_id': 'pfbid02gXHbDwxumkaKJQaTGUf3znYfYzTuidGEWawiramNx4YamSj2afwYSRkpcjtHtMRJl',
+            'uploader_id': 'pfbid0cibUN6tV7DYgdbJdsUFN46wc4jKpVSPAvJQhFofGqBGmVn3V3JtAs2tfUwziw2hUl',
            'timestamp': 1549275572,
            'duration': 3.413,
            'uploader': 'Josef Novak',
@ -401,9 +417,9 @@ class FacebookIE(InfoExtractor):

        def extract_metadata(webpage):
            post_data = [self._parse_json(j, video_id, fatal=False) for j in re.findall(
-                r'handleWithCustomApplyEach\(\s*ScheduledApplyEach\s*,\s*(\{.+?\})\s*\);', webpage)]
+                r'data-sjs>({.*?ScheduledServerJS.*?})</script>', webpage)]
            post = traverse_obj(post_data, (
-                ..., 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or []
+                ..., 'require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or []
            media = traverse_obj(post, (..., 'attachments', ..., lambda k, v: (
                k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict)
            title = get_first(media, ('title', 'text'))
@ -489,18 +505,17 @@ class FacebookIE(InfoExtractor):
            # with non-browser User-Agent.
            for f in info['formats']:
                f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1'
-            info['_format_sort_fields'] = ('res', 'quality')

        def extract_relay_data(_filter):
            return self._parse_json(self._search_regex(
-                r'handleWithCustomApplyEach\([^,]+,\s*({.*?%s.*?})\);' % _filter,
+                r'data-sjs>({.*?%s.*?})</script>' % _filter,
                webpage, 'replay data', default='{}'), video_id, fatal=False) or {}

        def extract_relay_prefetched_data(_filter):
-            replay_data = extract_relay_data(_filter)
-            for require in (replay_data.get('require') or []):
-                if require[0] == 'RelayPrefetchedStreamCache':
-                    return try_get(require, lambda x: x[3][1]['__bbox']['result']['data'], dict) or {}
+            return traverse_obj(extract_relay_data(_filter), (
+                'require', (None, (..., ..., ..., '__bbox', 'require')),
+                lambda _, v: 'RelayPrefetchedStreamCache' in v, ..., ...,
+                '__bbox', 'result', 'data', {dict}), get_all=False) or {}

        if not video_data:
            server_js_data = self._parse_json(self._search_regex([
@ -511,7 +526,7 @@ class FacebookIE(InfoExtractor):

        if not video_data:
            data = extract_relay_prefetched_data(
-                r'"(?:dash_manifest|playable_url(?:_quality_hd)?)"\s*:\s*"[^"]+"')
+                r'"(?:dash_manifest|playable_url(?:_quality_hd)?)')
            if data:
                entries = []

@ -526,7 +541,8 @@ class FacebookIE(InfoExtractor):
                    formats = []
                    q = qualities(['sd', 'hd'])
                    for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'),
-                                           ('playable_url_dash', '')):
+                                           ('playable_url_dash', ''), ('browser_native_hd_url', 'hd'),
+                                           ('browser_native_sd_url', 'sd')):
                        playable_url = video.get(key)
                        if not playable_url:
                            continue
@ -535,7 +551,8 @@ class FacebookIE(InfoExtractor):
                        else:
                            formats.append({
                                'format_id': format_id,
-                                'quality': q(format_id),
+                                # sd, hd formats w/o resolution info should be deprioritized below DASH
+                                'quality': q(format_id) - 3,
                                'url': playable_url,
                            })
                    extract_dash_manifest(video, formats)
@ -702,9 +719,11 @@ class FacebookIE(InfoExtractor):
                for src_type in ('src', 'src_no_ratelimit'):
                    src = f[0].get('%s_%s' % (quality, src_type))
                    if src:
-                        preference = -10 if format_id == 'progressive' else -1
+                        # sd, hd formats w/o resolution info should be deprioritized below DASH
+                        # TODO: investigate if progressive or src formats still exist
+                        preference = -10 if format_id == 'progressive' else -3
                        if quality == 'hd':
-                            preference += 5
+                            preference += 1
                        formats.append({
                            'format_id': '%s_%s_%s' % (format_id, quality, src_type),
                            'url': src,
--- a/yt_dlp/extractor/funker530.py
+++ b/yt_dlp/extractor/funker530.py
@ -60,6 +60,7 @@ class Funker530IE(InfoExtractor):
    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
+        info = {}
        rumble_url = list(RumbleEmbedIE._extract_embed_urls(url, webpage))
        if rumble_url:
            info = {'url': rumble_url[0], 'ie_key': RumbleEmbedIE.ie_key()}
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@ -2370,7 +2370,7 @@ class GenericIE(InfoExtractor):
            'id': flashvars['video_id'],
            'display_id': display_id,
            'title': title,
-            'thumbnail': thumbnail,
+            'thumbnail': urljoin(url, thumbnail),
            'formats': formats,
        }

--- a/yt_dlp/extractor/gofile.py
+++ b/yt_dlp/extractor/gofile.py
@ -66,7 +66,7 @@ class GofileIE(InfoExtractor):
        query_params = {
            'contentId': file_id,
            'token': self._TOKEN,
-            'websiteToken': 12345,
+            'websiteToken': '7fd94ds12fds4',  # From https://gofile.io/dist/js/alljs.js
        }
        password = self.get_param('videopassword')
        if password:
--- a/yt_dlp/extractor/goplay.py
+++ b/yt_dlp/extractor/goplay.py
@ -383,9 +383,9 @@ class AwsIdp:
        months = [None, 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
        days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']

-        time_now = datetime.datetime.utcnow()
+        time_now = datetime.datetime.now(datetime.timezone.utc)
        format_string = "{} {} {} %H:%M:%S UTC %Y".format(days[time_now.weekday()], months[time_now.month], time_now.day)
-        time_string = datetime.datetime.utcnow().strftime(format_string)
+        time_string = time_now.strftime(format_string)
        return time_string

    def __str__(self):
--- a/yt_dlp/extractor/indavideo.py
+++ b/yt_dlp/extractor/indavideo.py
@ -1,9 +1,9 @@
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
    int_or_none,
    parse_age_limit,
    parse_iso8601,
+    time_seconds,
    update_url_query,
 )

@ -11,15 +11,14 @@ from ..utils import (
 class IndavideoEmbedIE(InfoExtractor):
    _VALID_URL = r'https?://(?:(?:embed\.)?indavideo\.hu/player/video/|assets\.indavideo\.hu/swf/player\.swf\?.*\b(?:v(?:ID|id))=)(?P<id>[\da-f]+)'
    # Some example URLs covered by generic extractor:
-    #   http://indavideo.hu/video/Vicces_cica_1
-    #   http://index.indavideo.hu/video/2015_0728_beregszasz
-    #   http://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko
-    #   http://erotika.indavideo.hu/video/Amator_tini_punci
-    #   http://film.indavideo.hu/video/f_hrom_nagymamm_volt
-    #   http://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes
-    _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//embed\.indavideo\.hu/player/video/[\da-f]+)']
+    #   https://indavideo.hu/video/Vicces_cica_1
+    #   https://index.indavideo.hu/video/Hod_Nemetorszagban
+    #   https://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko
+    #   https://film.indavideo.hu/video/f_farkaslesen
+    #   https://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes
+    _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)//embed\.indavideo\.hu/player/video/[\da-f]+)']
    _TESTS = [{
-        'url': 'http://indavideo.hu/player/video/1bdc3c6d80/',
+        'url': 'https://indavideo.hu/player/video/1bdc3c6d80/',
        'md5': 'c8a507a1c7410685f83a06eaeeaafeab',
        'info_dict': {
            'id': '1837039',
@ -36,21 +35,33 @@ class IndavideoEmbedIE(InfoExtractor):
            'tags': ['tánc', 'cica', 'cuki', 'cukiajanlo', 'newsroom'],
        },
    }, {
-        'url': 'http://embed.indavideo.hu/player/video/1bdc3c6d80?autostart=1&hide=1',
-        'only_matching': True,
-    }, {
-        'url': 'http://assets.indavideo.hu/swf/player.swf?v=fe25e500&vID=1bdc3c6d80&autostart=1&hide=1&i=1',
+        'url': 'https://embed.indavideo.hu/player/video/1bdc3c6d80?autostart=1&hide=1',
        'only_matching': True,
    }]
+    _WEBPAGE_TESTS = [{
+        'url': 'https://indavideo.hu/video/Vicces_cica_1',
+        'info_dict': {
+            'id': '1335611',
+            'ext': 'mp4',
+            'title': 'Vicces cica',
+            'description': 'Játszik a tablettel. :D',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'uploader': 'Jet_Pack',
+            'uploader_id': '491217',
+            'timestamp': 1390821212,
+            'upload_date': '20140127',
+            'duration': 7,
+            'age_limit': 0,
+            'tags': ['cica', 'Jet_Pack'],
+        },
+    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)

        video = self._download_json(
-            'https://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/%s' % video_id,
-            video_id)['data']
-
-        title = video['title']
+            f'https://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/{video_id}/',
+            video_id, query={'_': time_seconds()})['data']

        video_urls = []

@ -60,33 +71,21 @@ class IndavideoEmbedIE(InfoExtractor):
        elif isinstance(video_files, dict):
            video_urls.extend(video_files.values())

-        video_file = video.get('video_file')
-        if video:
-            video_urls.append(video_file)
        video_urls = list(set(video_urls))

-        video_prefix = video_urls[0].rsplit('/', 1)[0]
-
-        for flv_file in video.get('flv_files', []):
-            flv_url = '%s/%s' % (video_prefix, flv_file)
-            if flv_url not in video_urls:
-                video_urls.append(flv_url)
-
-        filesh = video.get('filesh')
+        filesh = video.get('filesh') or {}

        formats = []
        for video_url in video_urls:
            height = int_or_none(self._search_regex(
                r'\.(\d{3,4})\.mp4(?:\?|$)', video_url, 'height', default=None))
-            if filesh:
-                if not height:
-                    continue
-                token = filesh.get(compat_str(height))
-                if token is None:
-                    continue
-                video_url = update_url_query(video_url, {'token': token})
+            if not height and len(filesh) == 1:
+                height = int_or_none(list(filesh.keys())[0])
+            token = filesh.get(str(height))
+            if token is None:
+                continue
            formats.append({
-                'url': video_url,
+                'url': update_url_query(video_url, {'token': token}),
                'height': height,
            })

@ -103,7 +102,7 @@ class IndavideoEmbedIE(InfoExtractor):

        return {
            'id': video.get('id') or video_id,
-            'title': title,
+            'title': video.get('title'),
            'description': video.get('description'),
            'thumbnails': thumbnails,
            'uploader': video.get('user_name'),
--- a/yt_dlp/extractor/lecturio.py
+++ b/yt_dlp/extractor/lecturio.py
@ -57,8 +57,8 @@ class LecturioIE(LecturioBaseIE):
    _VALID_URL = r'''(?x)
                    https://
                        (?:
-                            app\.lecturio\.com/([^/]+/(?P<nt>[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P<id>\d+))|
-                            (?:www\.)?lecturio\.de/[^/]+/(?P<nt_de>[^/?#&]+)\.vortrag
+                            app\.lecturio\.com/([^/?#]+/(?P<nt>[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P<id>\d+))|
+                            (?:www\.)?lecturio\.de/(?:[^/?#]+/)+(?P<nt_de>[^/?#&]+)\.vortrag
                        )
                    '''
    _TESTS = [{
@ -73,6 +73,9 @@ class LecturioIE(LecturioBaseIE):
    }, {
        'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag',
        'only_matching': True,
+    }, {
+        'url': 'https://www.lecturio.de/jura/oeffentliches-recht-at-1-staatsexamen/oeffentliches-recht-staatsexamen.vortrag',
+        'only_matching': True,
    }, {
        'url': 'https://app.lecturio.com/#/lecture/c/6434/39634',
        'only_matching': True,
--- a/yt_dlp/extractor/massengeschmacktv.py
+++ b/yt_dlp/extractor/massengeschmacktv.py
@ -17,11 +17,12 @@ class MassengeschmackTVIE(InfoExtractor):

    _TEST = {
        'url': 'https://massengeschmack.tv/play/fktv202',
-        'md5': 'a9e054db9c2b5a08f0a0527cc201e8d3',
+        'md5': '9996f314994a49fefe5f39aa1b07ae21',
        'info_dict': {
            'id': 'fktv202',
            'ext': 'mp4',
-            'title': 'Fernsehkritik-TV - Folge 202',
+            'title': 'Fernsehkritik-TV #202',
+            'thumbnail': 'https://cache.massengeschmack.tv/img/mag/fktv202.jpg'
        },
    }

@ -29,9 +30,6 @@ class MassengeschmackTVIE(InfoExtractor):
        episode = self._match_id(url)

        webpage = self._download_webpage(url, episode)
-        title = clean_html(self._html_search_regex(
-            '<h3>([^<]+)</h3>', webpage, 'title'))
-        thumbnail = self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False)
        sources = self._parse_json(self._search_regex(r'(?s)MEDIA\s*=\s*(\[.+?\]);', webpage, 'media'), episode, js_to_json)

        formats = []
@ -67,7 +65,8 @@ class MassengeschmackTVIE(InfoExtractor):

        return {
            'id': episode,
-            'title': title,
+            'title': clean_html(self._html_search_regex(
+                r'<span[^>]+\bid=["\']clip-title["\'][^>]*>([^<]+)', webpage, 'title', fatal=False)),
            'formats': formats,
-            'thumbnail': thumbnail,
+            'thumbnail': self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False),
        }
--- a/yt_dlp/extractor/mediaklikk.py
+++ b/yt_dlp/extractor/mediaklikk.py
@ -1,5 +1,8 @@
 from ..utils import (
-    unified_strdate
+    ExtractorError,
+    traverse_obj,
+    unified_strdate,
+    url_or_none,
 )
 from .common import InfoExtractor
 from ..compat import (
@ -15,7 +18,7 @@ class MediaKlikkIE(InfoExtractor):
                        (?P<id>[^/#?_]+)'''

    _TESTS = [{
-        # mediaklikk. date in html.
+        # (old) mediaklikk. date in html.
        'url': 'https://mediaklikk.hu/video/hazajaro-delnyugat-bacska-a-duna-menten-palankatol-doroszloig/',
        'info_dict': {
            'id': '4754129',
@ -23,9 +26,21 @@ class MediaKlikkIE(InfoExtractor):
            'ext': 'mp4',
            'upload_date': '20210901',
            'thumbnail': 'http://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg'
+        },
+        'skip': 'Webpage redirects to 404 page',
+    }, {
+        # mediaklikk. date in html.
+        'url': 'https://mediaklikk.hu/video/hazajaro-fabova-hegyseg-kishont-koronaja/',
+        'info_dict': {
+            'id': '6696133',
+            'title': 'Hazajáró, Fabova-hegység - Kishont koronája',
+            'display_id': 'hazajaro-fabova-hegyseg-kishont-koronaja',
+            'ext': 'mp4',
+            'upload_date': '20230903',
+            'thumbnail': 'https://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg'
        }
    }, {
-        # m4sport
+        # (old) m4sport
        'url': 'https://m4sport.hu/video/2021/08/30/gyemant-liga-parizs/',
        'info_dict': {
            'id': '4754999',
@ -33,6 +48,18 @@ class MediaKlikkIE(InfoExtractor):
            'ext': 'mp4',
            'upload_date': '20210830',
            'thumbnail': 'http://m4sport.hu/wp-content/uploads/sites/4/2021/08/vlcsnap-2021-08-30-18h21m20s10-1024x576.jpg'
+        },
+        'skip': 'Webpage redirects to 404 page',
+    }, {
+        # m4sport
+        'url': 'https://m4sport.hu/sportkozvetitesek/video/2023/09/08/atletika-gyemant-liga-brusszel/',
+        'info_dict': {
+            'id': '6711136',
+            'title': 'Atlétika – Gyémánt Liga, Brüsszel',
+            'display_id': 'atletika-gyemant-liga-brusszel',
+            'ext': 'mp4',
+            'upload_date': '20230908',
+            'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-08-22h43m18s691.jpg'
        }
    }, {
        # m4sport with *video/ url and no date
@ -40,20 +67,33 @@ class MediaKlikkIE(InfoExtractor):
        'info_dict': {
            'id': '4492099',
            'title': 'Real Madrid - Chelsea 1-1',
+            'display_id': 'real-madrid-chelsea-1-1',
            'ext': 'mp4',
-            'thumbnail': 'http://m4sport.hu/wp-content/uploads/sites/4/2021/04/Sequence-01.Still001-1024x576.png'
+            'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2021/04/Sequence-01.Still001-1024x576.png'
        }
    }, {
-        # hirado
+        # (old) hirado
        'url': 'https://hirado.hu/videok/felteteleket-szabott-a-fovaros/',
        'info_dict': {
            'id': '4760120',
            'title': 'Feltételeket szabott a főváros',
            'ext': 'mp4',
            'thumbnail': 'http://hirado.hu/wp-content/uploads/sites/4/2021/09/vlcsnap-2021-09-01-20h20m37s165.jpg'
+        },
+        'skip': 'Webpage redirects to video list page',
+    }, {
+        # hirado
+        'url': 'https://hirado.hu/belfold/video/2023/09/11/marad-az-eves-elszamolas-a-napelemekre-beruhazo-csaladoknal',
+        'info_dict': {
+            'id': '6716068',
+            'title': 'Marad az éves elszámolás a napelemekre beruházó családoknál',
+            'display_id': 'marad-az-eves-elszamolas-a-napelemekre-beruhazo-csaladoknal',
+            'ext': 'mp4',
+            'upload_date': '20230911',
+            'thumbnail': 'https://hirado.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-11-09h16m09s882.jpg'
        }
    }, {
-        # petofilive
+        # (old) petofilive
        'url': 'https://petofilive.hu/video/2021/06/07/tha-shudras-az-akusztikban/',
        'info_dict': {
            'id': '4571948',
@ -61,6 +101,18 @@ class MediaKlikkIE(InfoExtractor):
            'ext': 'mp4',
            'upload_date': '20210607',
            'thumbnail': 'http://petofilive.hu/wp-content/uploads/sites/4/2021/06/vlcsnap-2021-06-07-22h14m23s915-1024x576.jpg'
+        },
+        'skip': 'Webpage redirects to empty page',
+    }, {
+        # petofilive
+        'url': 'https://petofilive.hu/video/2023/09/09/futball-fesztival-a-margitszigeten/',
+        'info_dict': {
+            'id': '6713233',
+            'title': 'Futball Fesztivál a Margitszigeten',
+            'display_id': 'futball-fesztival-a-margitszigeten',
+            'ext': 'mp4',
+            'upload_date': '20230909',
+            'thumbnail': 'https://petofilive.hu/wp-content/uploads/sites/4/2023/09/Clipboard11-2.jpg'
        }
    }]

@ -84,8 +136,12 @@ class MediaKlikkIE(InfoExtractor):

        player_data['video'] = player_data.pop('token')
        player_page = self._download_webpage('https://player.mediaklikk.hu/playernew/player.php', video_id, query=player_data)
-        playlist_url = self._proto_relative_url(compat_urllib_parse_unquote(
-            self._html_search_regex(r'\"file\":\s*\"(\\?/\\?/.*playlist\.m3u8)\"', player_page, 'playlist_url')).replace('\\/', '/'))
+        player_json = self._search_json(
+            r'\bpl\.setup\s*\(', player_page, 'player json', video_id, end_pattern=r'\);')
+        playlist_url = traverse_obj(
+            player_json, ('playlist', lambda _, v: v['type'] == 'hls', 'file', {url_or_none}), get_all=False)
+        if not playlist_url:
+            raise ExtractorError('Unable to extract playlist url')

        formats = self._extract_wowza_formats(
            playlist_url, video_id, skip_protocols=['f4m', 'smil', 'dash'])
--- a/yt_dlp/extractor/mediastream.py
+++ b/yt_dlp/extractor/mediastream.py
@ -14,7 +14,7 @@ class MediaStreamBaseIE(InfoExtractor):
    _BASE_URL_RE = r'https?://mdstrm\.com/(?:embed|live-stream)'

    def _extract_mediastream_urls(self, webpage):
-        yield from traverse_obj(list(self._yield_json_ld(webpage, None)), (
+        yield from traverse_obj(list(self._yield_json_ld(webpage, None, fatal=False)), (
            lambda _, v: v['@type'] == 'VideoObject', ('embedUrl', 'contentUrl'),
            {lambda x: x if re.match(rf'{self._BASE_URL_RE}/\w+', x) else None}))

@ -106,8 +106,12 @@ class MediaStreamIE(MediaStreamBaseIE):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)

-        if 'Debido a tu ubicación no puedes ver el contenido' in webpage:
-            self.raise_geo_restricted()
+        for message in [
+            'Debido a tu ubicación no puedes ver el contenido',
+            'You are not allowed to watch this video: Geo Fencing Restriction'
+        ]:
+            if message in webpage:
+                self.raise_geo_restricted()

        player_config = self._search_json(r'window\.MDSTRM\.OPTIONS\s*=', webpage, 'metadata', video_id)

--- a/yt_dlp/extractor/mixcloud.py
+++ b/yt_dlp/extractor/mixcloud.py
@ -20,7 +20,7 @@ class MixcloudBaseIE(InfoExtractor):
    def _call_api(self, object_type, object_fields, display_id, username, slug=None):
        lookup_key = object_type + 'Lookup'
        return self._download_json(
-            'https://www.mixcloud.com/graphql', display_id, query={
+            'https://app.mixcloud.com/graphql', display_id, query={
                'query': '''{
  %s(lookup: {username: "%s"%s}) {
    %s
@ -46,7 +46,15 @@ class MixcloudIE(MixcloudBaseIE):
            'view_count': int,
            'timestamp': 1321359578,
            'upload_date': '20111115',
+            'uploader_url': 'https://www.mixcloud.com/dholbach/',
+            'artist': 'Submorphics & Chino , Telekinesis, Porter Robinson, Enei, Breakage ft Jess Mills',
+            'duration': 3723,
+            'tags': [],
+            'comment_count': int,
+            'repost_count': int,
+            'like_count': int,
        },
+        'params': {'skip_download': 'm3u8'},
    }, {
        'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/',
        'info_dict': {
@ -60,7 +68,14 @@ class MixcloudIE(MixcloudBaseIE):
            'view_count': int,
            'timestamp': 1422987057,
            'upload_date': '20150203',
+            'uploader_url': 'https://www.mixcloud.com/gillespeterson/',
+            'duration': 2992,
+            'tags': [],
+            'comment_count': int,
+            'repost_count': int,
+            'like_count': int,
        },
+        'params': {'skip_download': '404 playback error on site'},
    }, {
        'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/',
        'only_matching': True,
@ -259,9 +274,9 @@ class MixcloudPlaylistBaseIE(MixcloudBaseIE):
                cloudcast_url = cloudcast.get('url')
                if not cloudcast_url:
                    continue
-                slug = try_get(cloudcast, lambda x: x['slug'], compat_str)
+                item_slug = try_get(cloudcast, lambda x: x['slug'], compat_str)
                owner_username = try_get(cloudcast, lambda x: x['owner']['username'], compat_str)
-                video_id = '%s_%s' % (owner_username, slug) if slug and owner_username else None
+                video_id = f'{owner_username}_{item_slug}' if item_slug and owner_username else None
                entries.append(self.url_result(
                    cloudcast_url, MixcloudIE.ie_key(), video_id))

@ -284,7 +299,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
        'info_dict': {
            'id': 'dholbach_uploads',
            'title': 'Daniel Holbach (uploads)',
-            'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
+            'description': 'md5:a3f468a60ac8c3e1f8616380fc469b2b',
        },
        'playlist_mincount': 36,
    }, {
@ -292,7 +307,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
        'info_dict': {
            'id': 'dholbach_uploads',
            'title': 'Daniel Holbach (uploads)',
-            'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
+            'description': 'md5:a3f468a60ac8c3e1f8616380fc469b2b',
        },
        'playlist_mincount': 36,
    }, {
@ -300,7 +315,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
        'info_dict': {
            'id': 'dholbach_favorites',
            'title': 'Daniel Holbach (favorites)',
-            'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
+            'description': 'md5:a3f468a60ac8c3e1f8616380fc469b2b',
        },
        # 'params': {
        #     'playlist_items': '1-100',
@ -323,9 +338,9 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
        'info_dict': {
            'id': 'FirstEar_stream',
            'title': 'First Ear (stream)',
-            'description': 'Curators of good music\r\n\r\nfirstearmusic.com',
+            'description': 'we maraud for ears',
        },
-        'playlist_mincount': 271,
+        'playlist_mincount': 269,
    }]

    _TITLE_KEY = 'displayName'
--- a/yt_dlp/extractor/motherless.py
+++ b/yt_dlp/extractor/motherless.py
@ -151,7 +151,7 @@ class MotherlessIE(InfoExtractor):
                    'd': 'days',
                }
                kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta}
-                upload_date = (datetime.datetime.utcnow() - datetime.timedelta(**kwargs)).strftime('%Y%m%d')
+                upload_date = (datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(**kwargs)).strftime('%Y%m%d')

        comment_count = len(re.findall(r'''class\s*=\s*['"]media-comment-contents\b''', webpage))
        uploader_id = self._html_search_regex(
--- a/yt_dlp/extractor/n1.py
+++ b/yt_dlp/extractor/n1.py
@ -33,7 +33,7 @@ class N1InfoAssetIE(InfoExtractor):

 class N1InfoIIE(InfoExtractor):
    IE_NAME = 'N1Info:article'
-    _VALID_URL = r'https?://(?:(?:(?:ba|rs|hr)\.)?n1info\.(?:com|si)|nova\.rs)/(?:[^/]+/){1,2}(?P<id>[^/]+)'
+    _VALID_URL = r'https?://(?:(?:\w+\.)?n1info\.\w+|nova\.rs)/(?:[^/?#]+/){1,2}(?P<id>[^/?#]+)'
    _TESTS = [{
        # Youtube embedded
        'url': 'https://rs.n1info.com/sport-klub/tenis/kako-je-djokovic-propustio-istorijsku-priliku-video/',
@ -94,6 +94,16 @@ class N1InfoIIE(InfoExtractor):
            'upload_date': '20211102',
            'timestamp': 1635861677,
        },
+    }, {
+        'url': 'https://n1info.rs/vesti/cuta-biti-u-kosovskoj-mitrovici-znaci-da-te-docekaju-eksplozivnim-napravama/',
+        'info_dict': {
+            'id': '1332368',
+            'ext': 'mp4',
+            'title': 'Ćuta: Biti u Kosovskoj Mitrovici znači da te dočekaju eksplozivnim napravama',
+            'upload_date': '20230620',
+            'timestamp': 1687290536,
+            'thumbnail': 'https://cdn.brid.tv/live/partners/26827/snapshot/1332368_th_6492013a8356f_1687290170.jpg'
+        },
    }, {
        'url': 'https://hr.n1info.com/vijesti/pravobraniteljica-o-ubojstvu-u-zagrebu-radi-se-o-doista-nezapamcenoj-situaciji/',
        'only_matching': True,
@ -105,19 +115,35 @@ class N1InfoIIE(InfoExtractor):

        title = self._html_search_regex(r'<h1[^>]+>(.+?)</h1>', webpage, 'title')
        timestamp = unified_timestamp(self._html_search_meta('article:published_time', webpage))
-
-        videos = re.findall(r'(?m)(<video[^>]+>)', webpage)
+        plugin_data = self._html_search_meta('BridPlugin', webpage)
        entries = []
-        for video in videos:
-            video_data = extract_attributes(video)
-            entries.append({
-                '_type': 'url_transparent',
-                'url': video_data.get('data-url'),
-                'id': video_data.get('id'),
-                'title': title,
-                'thumbnail': video_data.get('data-thumbnail'),
-                'timestamp': timestamp,
-                'ie_key': 'N1InfoAsset'})
+        if plugin_data:
+            site_id = self._html_search_regex(r'site:(\d+)', webpage, 'site id')
+            for video_data in re.findall(r'\$bp\("Brid_\d+", (.+)\);', webpage):
+                video_id = self._parse_json(video_data, title)['video']
+                entries.append({
+                    'id': video_id,
+                    'title': title,
+                    'timestamp': timestamp,
+                    'thumbnail': self._html_search_meta('thumbnailURL', webpage),
+                    'formats': self._extract_m3u8_formats(
+                        f'https://cdn-uc.brid.tv/live/partners/{site_id}/streaming/{video_id}/{video_id}.m3u8',
+                        video_id, fatal=False),
+                })
+        else:
+            # Old player still present in older articles
+            videos = re.findall(r'(?m)(<video[^>]+>)', webpage)
+            for video in videos:
+                video_data = extract_attributes(video)
+                entries.append({
+                    '_type': 'url_transparent',
+                    'url': video_data.get('data-url'),
+                    'id': video_data.get('id'),
+                    'title': title,
+                    'thumbnail': video_data.get('data-thumbnail'),
+                    'timestamp': timestamp,
+                    'ie_key': 'N1InfoAsset',
+                })

        embedded_videos = re.findall(r'(<iframe[^>]+>)', webpage)
        for embedded_video in embedded_videos:
--- a/yt_dlp/extractor/naver.py
+++ b/yt_dlp/extractor/naver.py
@ -21,7 +21,7 @@ from ..utils import (
 class NaverBaseIE(InfoExtractor):
    _CAPTION_EXT_RE = r'\.(?:ttml|vtt)'

-    @staticmethod  # NB: Used in VLiveWebArchiveIE, WeverseIE
+    @staticmethod  # NB: Used in WeverseIE
    def process_subtitles(vod_data, process_url):
        ret = {'subtitles': {}, 'automatic_captions': {}}
        for caption in traverse_obj(vod_data, ('captions', 'list', ...)):
--- a/yt_dlp/extractor/nitter.py
+++ b/yt_dlp/extractor/nitter.py
@ -265,6 +265,26 @@ class NitterIE(InfoExtractor):
                'repost_count': int,
                'comment_count': int,
            }
+        }, {  # no OpenGraph title
+            'url': f'https://{current_instance}/LocalBateman/status/1678455464038735895#m',
+            'info_dict': {
+                'id': '1678455464038735895',
+                'ext': 'mp4',
+                'title': 'Your Typical Local Man - Local man, what did Romanians ever do to you?',
+                'description': 'Local man, what did Romanians ever do to you?',
+                'thumbnail': r're:^https?://.*\.jpg$',
+                'uploader': 'Your Typical Local Man',
+                'uploader_id': 'LocalBateman',
+                'uploader_url': f'https://{current_instance}/LocalBateman',
+                'upload_date': '20230710',
+                'timestamp': 1689009900,
+                'view_count': int,
+                'like_count': int,
+                'repost_count': int,
+                'comment_count': int,
+            },
+            'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
+            'params': {'skip_download': 'm3u8'},
        }
    ]

@ -292,7 +312,7 @@ class NitterIE(InfoExtractor):
                'ext': ext
            }]

-        title = description = self._og_search_description(full_webpage) or self._html_search_regex(
+        title = description = self._og_search_description(full_webpage, default=None) or self._html_search_regex(
            r'<div class="tweet-content[^>]+>([^<]+)</div>', webpage, 'title', fatal=False)

        uploader_id = self._html_search_regex(
--- a/yt_dlp/extractor/nova.py
+++ b/yt_dlp/extractor/nova.py
@ -6,7 +6,6 @@ from ..utils import (
    determine_ext,
    int_or_none,
    js_to_json,
-    qualities,
    traverse_obj,
    unified_strdate,
    url_or_none,
@ -49,77 +48,52 @@ class NovaEmbedIE(InfoExtractor):
        duration = None
        formats = []

-        player = self._parse_json(
-            self._search_regex(
-                (r'(?:(?:replacePlaceholders|processAdTagModifier).*?:\s*)?(?:replacePlaceholders|processAdTagModifier)\s*\(\s*(?P<json>{.*?})\s*\)(?:\s*\))?\s*,',
-                    r'Player\.init\s*\([^,]+,(?P<cndn>\s*\w+\s*\?)?\s*(?P<json>{(?(cndn).+?|.+)})\s*(?(cndn):|,\s*{.+?}\s*\)\s*;)'),
-                webpage, 'player', default='{}', group='json'), video_id, fatal=False)
-        if player:
-            for format_id, format_list in player['tracks'].items():
-                if not isinstance(format_list, list):
-                    format_list = [format_list]
-                for format_dict in format_list:
-                    if not isinstance(format_dict, dict):
-                        continue
-                    if (not self.get_param('allow_unplayable_formats')
-                            and traverse_obj(format_dict, ('drm', 'keySystem'))):
-                        has_drm = True
-                        continue
-                    format_url = url_or_none(format_dict.get('src'))
-                    format_type = format_dict.get('type')
-                    ext = determine_ext(format_url)
-                    if (format_type == 'application/x-mpegURL'
-                            or format_id == 'HLS' or ext == 'm3u8'):
-                        formats.extend(self._extract_m3u8_formats(
-                            format_url, video_id, 'mp4',
-                            entry_protocol='m3u8_native', m3u8_id='hls',
-                            fatal=False))
-                    elif (format_type == 'application/dash+xml'
-                          or format_id == 'DASH' or ext == 'mpd'):
-                        formats.extend(self._extract_mpd_formats(
-                            format_url, video_id, mpd_id='dash', fatal=False))
-                    else:
-                        formats.append({
-                            'url': format_url,
-                        })
-            duration = int_or_none(player.get('duration'))
-        else:
-            # Old path, not actual as of 08.04.2020
-            bitrates = self._parse_json(
-                self._search_regex(
-                    r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'),
-                video_id, transform_source=js_to_json)
-
-            QUALITIES = ('lq', 'mq', 'hq', 'hd')
-            quality_key = qualities(QUALITIES)
-
-            for format_id, format_list in bitrates.items():
-                if not isinstance(format_list, list):
-                    format_list = [format_list]
-                for format_url in format_list:
-                    format_url = url_or_none(format_url)
-                    if not format_url:
-                        continue
-                    if format_id == 'hls':
-                        formats.extend(self._extract_m3u8_formats(
-                            format_url, video_id, ext='mp4',
-                            entry_protocol='m3u8_native', m3u8_id='hls',
-                            fatal=False))
-                        continue
-                    f = {
+        def process_format_list(format_list, format_id=""):
+            nonlocal formats, has_drm
+            if not isinstance(format_list, list):
+                format_list = [format_list]
+            for format_dict in format_list:
+                if not isinstance(format_dict, dict):
+                    continue
+                if (not self.get_param('allow_unplayable_formats')
+                        and traverse_obj(format_dict, ('drm', 'keySystem'))):
+                    has_drm = True
+                    continue
+                format_url = url_or_none(format_dict.get('src'))
+                format_type = format_dict.get('type')
+                ext = determine_ext(format_url)
+                if (format_type == 'application/x-mpegURL'
+                        or format_id == 'HLS' or ext == 'm3u8'):
+                    formats.extend(self._extract_m3u8_formats(
+                        format_url, video_id, 'mp4',
+                        entry_protocol='m3u8_native', m3u8_id='hls',
+                        fatal=False))
+                elif (format_type == 'application/dash+xml'
+                      or format_id == 'DASH' or ext == 'mpd'):
+                    formats.extend(self._extract_mpd_formats(
+                        format_url, video_id, mpd_id='dash', fatal=False))
+                else:
+                    formats.append({
                        'url': format_url,
-                    }
-                    f_id = format_id
-                    for quality in QUALITIES:
-                        if '%s.mp4' % quality in format_url:
-                            f_id += '-%s' % quality
-                            f.update({
-                                'quality': quality_key(quality),
-                                'format_note': quality.upper(),
-                            })
-                            break
-                    f['format_id'] = f_id
-                    formats.append(f)
+                    })
+
+        player = self._search_json(
+            r'player:', webpage, 'player', video_id, fatal=False, end_pattern=r';\s*</script>')
+        if player:
+            for src in traverse_obj(player, ('lib', 'source', 'sources', ...)):
+                process_format_list(src)
+            duration = traverse_obj(player, ('sourceInfo', 'duration', {int_or_none}))
+        if not formats and not has_drm:
+            # older code path, in use before August 2023
+            player = self._parse_json(
+                self._search_regex(
+                    (r'(?:(?:replacePlaceholders|processAdTagModifier).*?:\s*)?(?:replacePlaceholders|processAdTagModifier)\s*\(\s*(?P<json>{.*?})\s*\)(?:\s*\))?\s*,',
+                     r'Player\.init\s*\([^,]+,(?P<cndn>\s*\w+\s*\?)?\s*(?P<json>{(?(cndn).+?|.+)})\s*(?(cndn):|,\s*{.+?}\s*\)\s*;)'),
+                    webpage, 'player', group='json'), video_id)
+            if player:
+                for format_id, format_list in player['tracks'].items():
+                    process_format_list(format_list, format_id)
+                duration = int_or_none(player.get('duration'))

        if not formats and has_drm:
            self.report_drm(video_id)
--- a/yt_dlp/extractor/panopto.py
+++ b/yt_dlp/extractor/panopto.py
@ -1,7 +1,7 @@
 import calendar
 import json
 import functools
-from datetime import datetime
+from datetime import datetime, timezone
 from random import random

 from .common import InfoExtractor
@ -243,7 +243,7 @@ class PanoptoIE(PanoptoBaseIE):
        invocation_id = delivery_info.get('InvocationId')
        stream_id = traverse_obj(delivery_info, ('Delivery', 'Streams', ..., 'PublicID'), get_all=False, expected_type=str)
        if invocation_id and stream_id and duration:
-            timestamp_str = f'/Date({calendar.timegm(datetime.utcnow().timetuple())}000)/'
+            timestamp_str = f'/Date({calendar.timegm(datetime.now(timezone.utc).timetuple())}000)/'
            data = {
                'streamRequests': [
                    {
--- a/yt_dlp/extractor/pornbox.py
+++ b/yt_dlp/extractor/pornbox.py
@ -0,0 +1,113 @@
+from .common import InfoExtractor
+from ..compat import functools
+from ..utils import (
+    int_or_none,
+    parse_duration,
+    parse_iso8601,
+    qualities,
+    str_or_none,
+    traverse_obj,
+    url_or_none,
+)
+
+
+class PornboxIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?pornbox\.com/application/watch-page/(?P<id>[0-9]+)'
+    _TESTS = [{
+        'url': 'https://pornbox.com/application/watch-page/212108',
+        'md5': '3ff6b6e206f263be4c5e987a3162ac6e',
+        'info_dict': {
+            'id': '212108',
+            'ext': 'mp4',
+            'title': 'md5:ececc5c6e6c9dd35d290c45fed05fd49',
+            'uploader': 'Lily Strong',
+            'timestamp': 1665871200,
+            'upload_date': '20221015',
+            'age_limit': 18,
+            'availability': 'needs_auth',
+            'duration': 1505,
+            'cast': ['Lily Strong', 'John Strong'],
+            'tags': 'count:11',
+            'description': 'md5:589c7f33e183aa8aa939537300efb859',
+            'thumbnail': r're:^https?://cdn-image\.gtflixtv\.com.*\.jpg.*$'
+        }
+    }, {
+        'url': 'https://pornbox.com/application/watch-page/216045',
+        'info_dict': {
+            'id': '216045',
+            'title': 'md5:3e48528e73a9a2b12f7a2772ed0b26a2',
+            'description': 'md5:3e631dcaac029f15ed434e402d1b06c7',
+            'uploader': 'VK Studio',
+            'timestamp': 1618264800,
+            'upload_date': '20210412',
+            'age_limit': 18,
+            'availability': 'premium_only',
+            'duration': 2710,
+            'cast': 'count:3',
+            'tags': 'count:29',
+            'thumbnail': r're:^https?://cdn-image\.gtflixtv\.com.*\.jpg.*$',
+            'subtitles': 'count:6'
+        },
+        'params': {
+            'skip_download': True,
+            'ignore_no_formats_error': True
+        },
+        'expected_warnings': [
+            'You are either not logged in or do not have access to this scene',
+            'No video formats found', 'Requested format is not available']
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        public_data = self._download_json(f'https://pornbox.com/contents/{video_id}', video_id)
+
+        subtitles = {country_code: [{
+            'url': f'https://pornbox.com/contents/{video_id}/subtitles/{country_code}',
+            'ext': 'srt'
+        }] for country_code in traverse_obj(public_data, ('subtitles', ..., {str}))}
+
+        is_free_scene = traverse_obj(
+            public_data, ('price', 'is_available_for_free', {bool}), default=False)
+
+        metadata = {
+            'id': video_id,
+            **traverse_obj(public_data, {
+                'title': ('scene_name', {str.strip}),
+                'description': ('small_description', {str.strip}),
+                'uploader': 'studio',
+                'duration': ('runtime', {parse_duration}),
+                'cast': (('models', 'male_models'), ..., 'model_name'),
+                'thumbnail': ('player_poster', {url_or_none}),
+                'tags': ('niches', ..., 'niche'),
+            }),
+            'age_limit': 18,
+            'timestamp': parse_iso8601(traverse_obj(
+                public_data, ('studios', 'release_date'), 'publish_date')),
+            'availability': self._availability(needs_auth=True, needs_premium=not is_free_scene),
+            'subtitles': subtitles,
+        }
+
+        if not public_data.get('is_purchased') or not is_free_scene:
+            self.raise_login_required(
+                'You are either not logged in or do not have access to this scene', metadata_available=True)
+            return metadata
+
+        media_id = traverse_obj(public_data, (
+            'medias', lambda _, v: v['title'] == 'Full video', 'media_id', {int}), get_all=False)
+        if not media_id:
+            self.raise_no_formats('Could not find stream id', video_id=video_id)
+
+        stream_data = self._download_json(
+            f'https://pornbox.com/media/{media_id}/stream', video_id=video_id, note='Getting manifest urls')
+
+        get_quality = qualities(['web', 'vga', 'hd', '1080p', '4k', '8k'])
+        metadata['formats'] = traverse_obj(stream_data, ('qualities', lambda _, v: v['src'], {
+            'url': 'src',
+            'vbr': ('bitrate', {functools.partial(int_or_none, scale=1000)}),
+            'format_id': ('quality', {str_or_none}),
+            'quality': ('quality', {get_quality}),
+            'width': ('size', {lambda x: int(x[:-1])}),
+        }))
+
+        return metadata
--- a/yt_dlp/extractor/pr0gramm.py
+++ b/yt_dlp/extractor/pr0gramm.py
@ -1,97 +1,155 @@
-import re
+import json
+from datetime import date
+from urllib.parse import unquote

 from .common import InfoExtractor
-from ..utils import merge_dicts
+from ..compat import functools
+from ..utils import ExtractorError, make_archive_id, urljoin
+from ..utils.traversal import traverse_obj


-class Pr0grammStaticIE(InfoExtractor):
-    # Possible urls:
-    # https://pr0gramm.com/static/5466437
-    _VALID_URL = r'https?://pr0gramm\.com/static/(?P<id>[0-9]+)'
-    _TEST = {
-        'url': 'https://pr0gramm.com/static/5466437',
-        'md5': '52fa540d70d3edc286846f8ca85938aa',
-        'info_dict': {
-            'id': '5466437',
-            'ext': 'mp4',
-            'title': 'pr0gramm-5466437 by g11st',
-            'uploader': 'g11st',
-            'upload_date': '20221221',
-        }
-    }
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-
-        # Fetch media sources
-        entries = self._parse_html5_media_entries(url, webpage, video_id)
-        media_info = entries[0]
-
-        # Fetch author
-        uploader = self._html_search_regex(r'by\W+([\w-]+)\W+', webpage, 'uploader')
-
-        # Fetch approx upload timestamp from filename
-        # Have None-defaults in case the extraction fails
-        uploadDay = None
-        uploadMon = None
-        uploadYear = None
-        uploadTimestr = None
-        # (//img.pr0gramm.com/2022/12/21/62ae8aa5e2da0ebf.mp4)
-        m = re.search(r'//img\.pr0gramm\.com/(?P<year>[\d]+)/(?P<mon>[\d]+)/(?P<day>[\d]+)/\w+\.\w{,4}', webpage)
-
-        if (m):
-            # Up to a day of accuracy should suffice...
-            uploadDay = m.groupdict().get('day')
-            uploadMon = m.groupdict().get('mon')
-            uploadYear = m.groupdict().get('year')
-            uploadTimestr = uploadYear + uploadMon + uploadDay
-
-        return merge_dicts({
-            'id': video_id,
-            'title': 'pr0gramm-%s%s' % (video_id, (' by ' + uploader) if uploader else ''),
-            'uploader': uploader,
-            'upload_date': uploadTimestr
-        }, media_info)
-
-
-# This extractor is for the primary url (used for sharing, and appears in the
-# location bar) Since this page loads the DOM via JS, yt-dl can't find any
-# video information here. So let's redirect to a compatibility version of
-# the site, which does contain the <video>-element  by itself,  without requiring
-# js to be ran.
 class Pr0grammIE(InfoExtractor):
-    # Possible urls:
-    # https://pr0gramm.com/new/546637
-    # https://pr0gramm.com/new/video/546637
-    # https://pr0gramm.com/top/546637
-    # https://pr0gramm.com/top/video/546637
-    # https://pr0gramm.com/user/g11st/uploads/5466437
-    # https://pr0gramm.com/user/froschler/dafur-ist-man-hier/5091290
-    # https://pr0gramm.com/user/froschler/reinziehen-1elf/5232030
-    # https://pr0gramm.com/user/froschler/1elf/5232030
-    # https://pr0gramm.com/new/5495710:comment62621020 <- this is not the id!
-    # https://pr0gramm.com/top/fruher war alles damals/5498175
-
-    _VALID_URL = r'https?:\/\/pr0gramm\.com\/(?!static/\d+).+?\/(?P<id>[\d]+)(:|$)'
-    _TEST = {
+    _VALID_URL = r'https?://pr0gramm\.com\/(?:[^/?#]+/)+(?P<id>[\d]+)(?:[/?#:]|$)'
+    _TESTS = [{
+        # Tags require account
        'url': 'https://pr0gramm.com/new/video/5466437',
        'info_dict': {
            'id': '5466437',
            'ext': 'mp4',
            'title': 'pr0gramm-5466437 by g11st',
+            'tags': ['Neon Genesis Evangelion', 'Touhou Project', 'Fly me to the Moon', 'Marisad', 'Marisa Kirisame', 'video', 'sound', 'Marisa', 'Anime'],
            'uploader': 'g11st',
+            'uploader_id': 394718,
+            'upload_timestamp': 1671590240,
            'upload_date': '20221221',
-        }
-    }
+            'like_count': int,
+            'dislike_count': int,
+            'age_limit': 0,
+            'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg',
+        },
+    }, {
+        # Tags require account
+        'url': 'https://pr0gramm.com/new/3052805:comment28391322',
+        'info_dict': {
+            'id': '3052805',
+            'ext': 'mp4',
+            'title': 'pr0gramm-3052805 by Hansking1',
+            'tags': 'count:15',
+            'uploader': 'Hansking1',
+            'uploader_id': 385563,
+            'upload_timestamp': 1552930408,
+            'upload_date': '20190318',
+            'like_count': int,
+            'dislike_count': int,
+            'age_limit': 0,
+            'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg',
+        },
+    }, {
+        # Requires verified account
+        'url': 'https://pr0gramm.com/new/Gianna%20Michaels/5848332',
+        'info_dict': {
+            'id': '5848332',
+            'ext': 'mp4',
+            'title': 'pr0gramm-5848332 by erd0pfel',
+            'tags': 'count:18',
+            'uploader': 'erd0pfel',
+            'uploader_id': 349094,
+            'upload_timestamp': 1694489652,
+            'upload_date': '20230912',
+            'like_count': int,
+            'dislike_count': int,
+            'age_limit': 18,
+            'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg',
+        },
+    }, {
+        'url': 'https://pr0gramm.com/static/5466437',
+        'only_matching': True,
+    }, {
+        'url': 'https://pr0gramm.com/new/rowan%20atkinson%20herr%20bohne/3052805',
+        'only_matching': True,
+    }, {
+        'url': 'https://pr0gramm.com/user/froschler/dafur-ist-man-hier/5091290',
+        'only_matching': True,
+    }]

-    def _generic_title():
-        return "oof"
+    BASE_URL = 'https://pr0gramm.com'
+
+    @functools.cached_property
+    def _is_logged_in(self):
+        return 'pp' in self._get_cookies(self.BASE_URL)
+
+    @functools.cached_property
+    def _maximum_flags(self):
+        # We need to guess the flags for the content otherwise the api will raise an error
+        # We can guess the maximum allowed flags for the account from the cookies
+        # Bitflags are (msbf): nsfp, nsfl, nsfw, sfw
+        flags = 0b0001
+        if self._is_logged_in:
+            flags |= 0b1000
+            cookies = self._get_cookies(self.BASE_URL)
+            if 'me' not in cookies:
+                self._download_webpage(self.BASE_URL, None, 'Refreshing verification information')
+            if traverse_obj(cookies, ('me', {lambda x: x.value}, {unquote}, {json.loads}, 'verified')):
+                flags |= 0b0110
+
+        return flags
+
+    def _call_api(self, endpoint, video_id, query={}, note='Downloading API json'):
+        data = self._download_json(
+            f'https://pr0gramm.com/api/items/{endpoint}',
+            video_id, note, query=query, expected_status=403)
+
+        error = traverse_obj(data, ('error', {str}))
+        if error in ('nsfwRequired', 'nsflRequired', 'nsfpRequired', 'verificationRequired'):
+            if not self._is_logged_in:
+                self.raise_login_required()
+            raise ExtractorError(f'Unverified account cannot access NSFW/NSFL ({error})', expected=True)
+        elif error:
+            message = traverse_obj(data, ('msg', {str})) or error
+            raise ExtractorError(f'API returned error: {message}', expected=True)
+
+        return data

    def _real_extract(self, url):
        video_id = self._match_id(url)
+        video_info = traverse_obj(
+            self._call_api('get', video_id, {'id': video_id, 'flags': self._maximum_flags}),
+            ('items', 0, {dict}))

-        return self.url_result(
-            'https://pr0gramm.com/static/' + video_id,
-            video_id=video_id,
-            ie=Pr0grammStaticIE.ie_key())
+        source = urljoin('https://img.pr0gramm.com', video_info.get('image'))
+        if not source or not source.endswith('mp4'):
+            self.raise_no_formats('Could not extract a video', expected=bool(source), video_id=video_id)
+
+        tags = None
+        if self._is_logged_in:
+            metadata = self._call_api('info', video_id, {'itemId': video_id})
+            tags = traverse_obj(metadata, ('tags', ..., 'tag', {str}))
+            # Sorted by "confidence", higher confidence = earlier in list
+            confidences = traverse_obj(metadata, ('tags', ..., 'confidence', ({int}, {float})))
+            if confidences:
+                tags = [tag for _, tag in sorted(zip(confidences, tags), reverse=True)]
+
+        return {
+            'id': video_id,
+            'title': f'pr0gramm-{video_id} by {video_info.get("user")}',
+            'formats': [{
+                'url': source,
+                'ext': 'mp4',
+                **traverse_obj(video_info, {
+                    'width': ('width', {int}),
+                    'height': ('height', {int}),
+                }),
+            }],
+            'tags': tags,
+            'age_limit': 18 if traverse_obj(video_info, ('flags', {0b110.__and__})) else 0,
+            '_old_archive_ids': [make_archive_id('Pr0grammStatic', video_id)],
+            **traverse_obj(video_info, {
+                'uploader': ('user', {str}),
+                'uploader_id': ('userId', {int}),
+                'like_count': ('up', {int}),
+                'dislike_count': ('down', {int}),
+                'upload_timestamp': ('created', {int}),
+                'upload_date': ('created', {int}, {date.fromtimestamp}, {lambda x: x.strftime('%Y%m%d')}),
+                'thumbnail': ('thumb', {lambda x: urljoin('https://thumb.pr0gramm.com', x)})
+            }),
+        }
--- a/yt_dlp/extractor/radiofrance.py
+++ b/yt_dlp/extractor/radiofrance.py
@ -1,7 +1,18 @@
+import itertools
 import re
+import urllib.parse

 from .common import InfoExtractor
-from ..utils import parse_duration, unified_strdate
+from ..utils import (
+    int_or_none,
+    join_nonempty,
+    js_to_json,
+    parse_duration,
+    strftime_or_none,
+    traverse_obj,
+    unified_strdate,
+    urljoin,
+)


 class RadioFranceIE(InfoExtractor):
@ -56,8 +67,32 @@ class RadioFranceIE(InfoExtractor):
        }


-class FranceCultureIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?radiofrance\.fr/(?:franceculture|fip|francemusique|mouv|franceinter)/podcasts/(?:[^?#]+/)?(?P<display_id>[^?#]+)-(?P<id>\d+)($|[?#])'
+class RadioFranceBaseIE(InfoExtractor):
+    _VALID_URL_BASE = r'https?://(?:www\.)?radiofrance\.fr'
+
+    _STATIONS_RE = '|'.join(map(re.escape, (
+        'franceculture',
+        'franceinfo',
+        'franceinter',
+        'francemusique',
+        'fip',
+        'mouv',
+    )))
+
+    def _extract_data_from_webpage(self, webpage, display_id, key):
+        return traverse_obj(self._search_json(
+            r'\bconst\s+data\s*=', webpage, key, display_id,
+            contains_pattern=r'(\[\{.*?\}\]);', transform_source=js_to_json),
+            (..., 'data', key, {dict}), get_all=False) or {}
+
+
+class FranceCultureIE(RadioFranceBaseIE):
+    _VALID_URL = rf'''(?x)
+        {RadioFranceBaseIE._VALID_URL_BASE}
+        /(?:{RadioFranceBaseIE._STATIONS_RE})
+        /podcasts/(?:[^?#]+/)?(?P<display_id>[^?#]+)-(?P<id>\d{{6,}})(?:$|[?#])
+    '''
+
    _TESTS = [
        {
            'url': 'https://www.radiofrance.fr/franceculture/podcasts/science-en-questions/la-physique-d-einstein-aiderait-elle-a-comprendre-le-cerveau-8440487',
@ -67,14 +102,30 @@ class FranceCultureIE(InfoExtractor):
                'ext': 'mp3',
                'title': 'La physique d’Einstein aiderait-elle à comprendre le cerveau ?',
                'description': 'Existerait-il un pont conceptuel entre la physique de l’espace-temps et les neurosciences ?',
-                'thumbnail': 'https://cdn.radiofrance.fr/s3/cruiser-production/2022/05/d184e7a3-4827-4494-bf94-04ed7b120db4/1200x630_gettyimages-200171095-001.jpg',
+                'thumbnail': r're:^https?://.*\.(?:jpg|png)',
                'upload_date': '20220514',
                'duration': 2750,
            },
        },
+        {
+            'url': 'https://www.radiofrance.fr/franceinter/podcasts/le-7-9-30/le-7-9-30-du-vendredi-10-mars-2023-2107675',
+            'info_dict': {
+                'id': '2107675',
+                'display_id': 'le-7-9-30-du-vendredi-10-mars-2023',
+                'title': 'Inflation alimentaire : comment en sortir ? - Régis Debray et Claude Grange - Cybèle Idelot',
+                'description': 'md5:36ee74351ede77a314fdebb94026b916',
+                'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+                'upload_date': '20230310',
+                'duration': 8977,
+                'ext': 'mp3',
+            },
+        },
        {
            'url': 'https://www.radiofrance.fr/franceinter/podcasts/la-rafle-du-vel-d-hiv-une-affaire-d-etat/les-racines-du-crime-episode-1-3715507',
            'only_matching': True,
+        }, {
+            'url': 'https://www.radiofrance.fr/franceinfo/podcasts/le-billet-sciences/sante-bientot-un-vaccin-contre-l-asthme-allergique-3057200',
+            'only_matching': True,
        }
    ]

@ -89,7 +140,6 @@ class FranceCultureIE(InfoExtractor):
            'id': video_id,
            'display_id': display_id,
            'url': video_data['contentUrl'],
-            'ext': video_data.get('encodingFormat'),
            'vcodec': 'none' if video_data.get('encodingFormat') == 'mp3' else None,
            'duration': parse_duration(video_data.get('duration')),
            'title': self._html_search_regex(r'(?s)<h1[^>]*itemprop="[^"]*name[^"]*"[^>]*>(.+?)</h1>',
@ -102,3 +152,322 @@ class FranceCultureIE(InfoExtractor):
            'upload_date': unified_strdate(self._search_regex(
                r'"datePublished"\s*:\s*"([^"]+)', webpage, 'timestamp', fatal=False))
        }
+
+
+class RadioFranceLiveIE(RadioFranceBaseIE):
+    _VALID_URL = rf'''(?x)
+        https?://(?:www\.)?radiofrance\.fr
+        /(?P<id>{RadioFranceBaseIE._STATIONS_RE})
+        /?(?P<substation_id>radio-[\w-]+)?(?:[#?]|$)
+    '''
+
+    _TESTS = [{
+        'url': 'https://www.radiofrance.fr/franceinter/',
+        'info_dict': {
+            'id': 'franceinter',
+            'title': str,
+            'live_status': 'is_live',
+            'ext': 'aac',
+        },
+        'params': {
+            'skip_download': 'Livestream',
+        },
+    }, {
+        'url': 'https://www.radiofrance.fr/franceculture',
+        'info_dict': {
+            'id': 'franceculture',
+            'title': str,
+            'live_status': 'is_live',
+            'ext': 'aac',
+        },
+        'params': {
+            'skip_download': 'Livestream',
+        },
+    }, {
+        'url': 'https://www.radiofrance.fr/mouv/radio-musique-kids-family',
+        'info_dict': {
+            'id': 'mouv-radio-musique-kids-family',
+            'title': str,
+            'live_status': 'is_live',
+            'ext': 'aac',
+        },
+        'params': {
+            'skip_download': 'Livestream',
+        },
+    }, {
+        'url': 'https://www.radiofrance.fr/mouv/radio-rnb-soul',
+        'info_dict': {
+            'id': 'mouv-radio-rnb-soul',
+            'title': str,
+            'live_status': 'is_live',
+            'ext': 'aac',
+        },
+        'params': {
+            'skip_download': 'Livestream',
+        },
+    }, {
+        'url': 'https://www.radiofrance.fr/mouv/radio-musique-mix',
+        'info_dict': {
+            'id': 'mouv-radio-musique-mix',
+            'title': str,
+            'live_status': 'is_live',
+            'ext': 'aac',
+        },
+        'params': {
+            'skip_download': 'Livestream',
+        },
+    }, {
+        'url': 'https://www.radiofrance.fr/fip/radio-rock',
+        'info_dict': {
+            'id': 'fip-radio-rock',
+            'title': str,
+            'live_status': 'is_live',
+            'ext': 'aac',
+        },
+        'params': {
+            'skip_download': 'Livestream',
+        },
+    }, {
+        'url': 'https://www.radiofrance.fr/mouv',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        station_id, substation_id = self._match_valid_url(url).group('id', 'substation_id')
+
+        if substation_id:
+            webpage = self._download_webpage(url, station_id)
+            api_response = self._extract_data_from_webpage(webpage, station_id, 'webRadioData')
+        else:
+            api_response = self._download_json(
+                f'https://www.radiofrance.fr/{station_id}/api/live', station_id)
+
+        formats, subtitles = [], {}
+        for media_source in traverse_obj(api_response, (('now', None), 'media', 'sources', lambda _, v: v['url'])):
+            if media_source.get('format') == 'hls':
+                fmts, subs = self._extract_m3u8_formats_and_subtitles(media_source['url'], station_id, fatal=False)
+                formats.extend(fmts)
+                self._merge_subtitles(subs, target=subtitles)
+            else:
+                formats.append({
+                    'url': media_source['url'],
+                    'abr': media_source.get('bitrate'),
+                })
+
+        return {
+            'id': join_nonempty(station_id, substation_id),
+            'title': traverse_obj(api_response, ('visual', 'legend')) or join_nonempty(
+                ('now', 'firstLine', 'title'), ('now', 'secondLine', 'title'), from_dict=api_response, delim=' - '),
+            'formats': formats,
+            'subtitles': subtitles,
+            'is_live': True,
+        }
+
+
+class RadioFrancePlaylistBase(RadioFranceBaseIE):
+    """Subclasses must set _METADATA_KEY"""
+
+    def _call_api(self, content_id, cursor, page_num):
+        raise NotImplementedError('This method must be implemented by subclasses')
+
+    def _generate_playlist_entries(self, content_id, content_response):
+        for page_num in itertools.count(2):
+            for entry in content_response['items']:
+                yield self.url_result(
+                    f'https://www.radiofrance.fr/{entry["path"]}', url_transparent=True, **traverse_obj(entry, {
+                        'title': 'title',
+                        'description': 'standFirst',
+                        'timestamp': ('publishedDate', {int_or_none}),
+                        'thumbnail': ('visual', 'src'),
+                    }))
+
+            next_cursor = traverse_obj(content_response, (('pagination', None), 'next'), get_all=False)
+            if not next_cursor:
+                break
+
+            content_response = self._call_api(content_id, next_cursor, page_num)
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        metadata = self._download_json(
+            'https://www.radiofrance.fr/api/v2.1/path', display_id,
+            query={'value': urllib.parse.urlparse(url).path})['content']
+
+        content_id = metadata['id']
+
+        return self.playlist_result(
+            self._generate_playlist_entries(content_id, metadata[self._METADATA_KEY]), content_id,
+            display_id=display_id, **{**traverse_obj(metadata, {
+                'title': 'title',
+                'description': 'standFirst',
+                'thumbnail': ('visual', 'src'),
+            }), **traverse_obj(metadata, {
+                'title': 'name',
+                'description': 'role',
+            })})
+
+
+class RadioFrancePodcastIE(RadioFrancePlaylistBase):
+    _VALID_URL = rf'''(?x)
+        {RadioFranceBaseIE._VALID_URL_BASE}
+        /(?:{RadioFranceBaseIE._STATIONS_RE})
+        /podcasts/(?P<id>[\w-]+)/?(?:[?#]|$)
+    '''
+
+    _TESTS = [{
+        'url': 'https://www.radiofrance.fr/franceinfo/podcasts/le-billet-vert',
+        'info_dict': {
+            'id': 'eaf6ef81-a980-4f1c-a7d1-8a75ecd54b17',
+            'display_id': 'le-billet-vert',
+            'title': 'Le billet sciences',
+            'description': 'md5:eb1007b34b0c0a680daaa71525bbd4c1',
+            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+        },
+        'playlist_mincount': 11,
+    }, {
+        'url': 'https://www.radiofrance.fr/franceinter/podcasts/jean-marie-le-pen-l-obsession-nationale',
+        'info_dict': {
+            'id': '566fd524-3074-4fbc-ac69-8696f2152a54',
+            'display_id': 'jean-marie-le-pen-l-obsession-nationale',
+            'title': 'Jean-Marie Le Pen, l\'obsession nationale',
+            'description': 'md5:a07c0cfb894f6d07a62d0ad12c4b7d73',
+            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+        },
+        'playlist_count': 7,
+    }, {
+        'url': 'https://www.radiofrance.fr/franceculture/podcasts/serie-thomas-grjebine',
+        'info_dict': {
+            'id': '63c1ddc9-9f15-457a-98b2-411bac63f48d',
+            'display_id': 'serie-thomas-grjebine',
+            'title': 'Thomas Grjebine',
+        },
+        'playlist_count': 1,
+    }, {
+        'url': 'https://www.radiofrance.fr/fip/podcasts/certains-l-aiment-fip',
+        'info_dict': {
+            'id': '143dff38-e956-4a5d-8576-1c0b7242b99e',
+            'display_id': 'certains-l-aiment-fip',
+            'title': 'Certains l’aiment Fip',
+            'description': 'md5:ff974672ba00d4fd5be80fb001c5b27e',
+            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+        },
+        'playlist_mincount': 321,
+    }, {
+        'url': 'https://www.radiofrance.fr/franceinter/podcasts/le-7-9',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.radiofrance.fr/mouv/podcasts/dirty-mix',
+        'only_matching': True,
+    }]
+
+    _METADATA_KEY = 'expressions'
+
+    def _call_api(self, podcast_id, cursor, page_num):
+        return self._download_json(
+            f'https://www.radiofrance.fr/api/v2.1/concepts/{podcast_id}/expressions', podcast_id,
+            note=f'Downloading page {page_num}', query={'pageCursor': cursor})
+
+
+class RadioFranceProfileIE(RadioFrancePlaylistBase):
+    _VALID_URL = rf'{RadioFranceBaseIE._VALID_URL_BASE}/personnes/(?P<id>[\w-]+)'
+
+    _TESTS = [{
+        'url': 'https://www.radiofrance.fr/personnes/thomas-pesquet?p=3',
+        'info_dict': {
+            'id': '86c62790-e481-11e2-9f7b-782bcb6744eb',
+            'display_id': 'thomas-pesquet',
+            'title': 'Thomas Pesquet',
+            'description': 'Astronaute à l\'agence spatiale européenne',
+        },
+        'playlist_mincount': 212,
+    }, {
+        'url': 'https://www.radiofrance.fr/personnes/eugenie-bastie',
+        'info_dict': {
+            'id': '9593050b-0183-4972-a0b5-d8f699079e02',
+            'display_id': 'eugenie-bastie',
+            'title': 'Eugénie Bastié',
+            'description': 'Journaliste et essayiste',
+            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+        },
+        'playlist_mincount': 39,
+    }, {
+        'url': 'https://www.radiofrance.fr/personnes/lea-salame',
+        'only_matching': True,
+    }]
+
+    _METADATA_KEY = 'documents'
+
+    def _call_api(self, profile_id, cursor, page_num):
+        resp = self._download_json(
+            f'https://www.radiofrance.fr/api/v2.1/taxonomy/{profile_id}/documents', profile_id,
+            note=f'Downloading page {page_num}', query={
+                'relation': 'personality',
+                'cursor': cursor,
+            })
+
+        resp['next'] = traverse_obj(resp, ('pagination', 'next'))
+        return resp
+
+
+class RadioFranceProgramScheduleIE(RadioFranceBaseIE):
+    _VALID_URL = rf'''(?x)
+        {RadioFranceBaseIE._VALID_URL_BASE}
+        /(?P<station>{RadioFranceBaseIE._STATIONS_RE})
+        /grille-programmes(?:\?date=(?P<date>[\d-]+))?
+    '''
+
+    _TESTS = [{
+        'url': 'https://www.radiofrance.fr/franceinter/grille-programmes?date=17-02-2023',
+        'info_dict': {
+            'id': 'franceinter-program-20230217',
+            'upload_date': '20230217',
+        },
+        'playlist_count': 25,
+    }, {
+        'url': 'https://www.radiofrance.fr/franceculture/grille-programmes?date=01-02-2023',
+        'info_dict': {
+            'id': 'franceculture-program-20230201',
+            'upload_date': '20230201',
+        },
+        'playlist_count': 25,
+    }, {
+        'url': 'https://www.radiofrance.fr/mouv/grille-programmes?date=19-03-2023',
+        'info_dict': {
+            'id': 'mouv-program-20230319',
+            'upload_date': '20230319',
+        },
+        'playlist_count': 3,
+    }, {
+        'url': 'https://www.radiofrance.fr/francemusique/grille-programmes?date=18-03-2023',
+        'info_dict': {
+            'id': 'francemusique-program-20230318',
+            'upload_date': '20230318',
+        },
+        'playlist_count': 15,
+    }, {
+        'url': 'https://www.radiofrance.fr/franceculture/grille-programmes',
+        'only_matching': True,
+    }]
+
+    def _generate_playlist_entries(self, webpage_url, api_response):
+        for entry in traverse_obj(api_response, ('steps', lambda _, v: v['expression']['path'])):
+            yield self.url_result(
+                urljoin(webpage_url, f'/{entry["expression"]["path"]}'), ie=FranceCultureIE,
+                url_transparent=True, **traverse_obj(entry, {
+                    'title': ('expression', 'title'),
+                    'thumbnail': ('expression', 'visual', 'src'),
+                    'timestamp': ('startTime', {int_or_none}),
+                    'series_id': ('concept', 'id'),
+                    'series': ('concept', 'title'),
+                }))
+
+    def _real_extract(self, url):
+        station, date = self._match_valid_url(url).group('station', 'date')
+        webpage = self._download_webpage(url, station)
+        grid_data = self._extract_data_from_webpage(webpage, station, 'grid')
+        upload_date = strftime_or_none(grid_data.get('date'), '%Y%m%d')
+
+        return self.playlist_result(
+            self._generate_playlist_entries(url, grid_data),
+            join_nonempty(station, 'program', upload_date), upload_date=upload_date)
--- a/yt_dlp/extractor/rbgtum.py
+++ b/yt_dlp/extractor/rbgtum.py
@ -1,10 +1,11 @@
 import re

 from .common import InfoExtractor
+from ..utils import parse_qs, remove_start, traverse_obj, ExtractorError


 class RbgTumIE(InfoExtractor):
-    _VALID_URL = r'https://live\.rbg\.tum\.de/w/(?P<id>.+)'
+    _VALID_URL = r'https://(?:live\.rbg\.tum\.de|tum\.live)/w/(?P<id>[^?#]+)'
    _TESTS = [{
        # Combined view
        'url': 'https://live.rbg.tum.de/w/cpp/22128',
@ -35,16 +36,18 @@ class RbgTumIE(InfoExtractor):
            'title': 'Fachschaftsvollversammlung',
            'series': 'Fachschaftsvollversammlung Informatik',
        }
+    }, {
+        'url': 'https://tum.live/w/linalginfo/27102',
+        'only_matching': True,
    }, ]

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)

-        m3u8 = self._html_search_regex(r'(https://.+?\.m3u8)', webpage, 'm3u8')
-        lecture_title = self._html_search_regex(r'(?si)<h1.*?>(.*)</h1>', webpage, 'title')
-        lecture_series_title = self._html_search_regex(
-            r'(?s)<title\b[^>]*>\s*(?:TUM-Live\s\|\s?)?([^:]+):?.*?</title>', webpage, 'series')
+        m3u8 = self._html_search_regex(r'"(https://[^"]+\.m3u8[^"]*)', webpage, 'm3u8')
+        lecture_title = self._html_search_regex(r'<h1[^>]*>([^<]+)</h1>', webpage, 'title', fatal=False)
+        lecture_series_title = remove_start(self._html_extract_title(webpage), 'TUM-Live | ')

        formats = self._extract_m3u8_formats(m3u8, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')

@ -57,9 +60,9 @@ class RbgTumIE(InfoExtractor):


 class RbgTumCourseIE(InfoExtractor):
-    _VALID_URL = r'https://live\.rbg\.tum\.de/course/(?P<id>.+)'
+    _VALID_URL = r'https://(?P<hostname>(?:live\.rbg\.tum\.de|tum\.live))/old/course/(?P<id>(?P<year>\d+)/(?P<term>\w+)/(?P<slug>[^/?#]+))'
    _TESTS = [{
-        'url': 'https://live.rbg.tum.de/course/2022/S/fpv',
+        'url': 'https://live.rbg.tum.de/old/course/2022/S/fpv',
        'info_dict': {
            'title': 'Funktionale Programmierung und Verifikation (IN0003)',
            'id': '2022/S/fpv',
@ -69,7 +72,7 @@ class RbgTumCourseIE(InfoExtractor):
        },
        'playlist_count': 13,
    }, {
-        'url': 'https://live.rbg.tum.de/course/2022/W/set',
+        'url': 'https://live.rbg.tum.de/old/course/2022/W/set',
        'info_dict': {
            'title': 'SET FSMPIC',
            'id': '2022/W/set',
@ -78,16 +81,62 @@ class RbgTumCourseIE(InfoExtractor):
            'noplaylist': False,
        },
        'playlist_count': 6,
+    }, {
+        'url': 'https://tum.live/old/course/2023/S/linalginfo',
+        'only_matching': True,
    }, ]

    def _real_extract(self, url):
-        course_id = self._match_id(url)
-        webpage = self._download_webpage(url, course_id)
+        course_id, hostname, year, term, slug = self._match_valid_url(url).group('id', 'hostname', 'year', 'term', 'slug')
+        meta = self._download_json(
+            f'https://{hostname}/api/courses/{slug}/', course_id, fatal=False,
+            query={'year': year, 'term': term}) or {}
+        lecture_series_title = meta.get('Name')
+        lectures = [self.url_result(f'https://{hostname}/w/{slug}/{stream_id}', RbgTumIE)
+                    for stream_id in traverse_obj(meta, ('Streams', ..., 'ID'))]

-        lecture_series_title = self._html_search_regex(r'(?si)<h1.*?>(.*)</h1>', webpage, 'title')
+        if not lectures:
+            webpage = self._download_webpage(url, course_id)
+            lecture_series_title = remove_start(self._html_extract_title(webpage), 'TUM-Live | ')
+            lectures = [self.url_result(f'https://{hostname}{lecture_path}', RbgTumIE)
+                        for lecture_path in re.findall(r'href="(/w/[^/"]+/[^/"]+)"', webpage)]

-        lecture_urls = []
-        for lecture_url in re.findall(r'(?i)href="/w/(.+)(?<!/cam)(?<!/pres)(?<!/chat)"', webpage):
-            lecture_urls.append(self.url_result('https://live.rbg.tum.de/w/' + lecture_url, ie=RbgTumIE.ie_key()))
+        return self.playlist_result(lectures, course_id, lecture_series_title)

-        return self.playlist_result(lecture_urls, course_id, lecture_series_title)
+
+class RbgTumNewCourseIE(InfoExtractor):
+    _VALID_URL = r'https://(?P<hostname>(?:live\.rbg\.tum\.de|tum\.live))/\?'
+    _TESTS = [{
+        'url': 'https://live.rbg.tum.de/?year=2022&term=S&slug=fpv&view=3',
+        'info_dict': {
+            'title': 'Funktionale Programmierung und Verifikation (IN0003)',
+            'id': '2022/S/fpv',
+        },
+        'params': {
+            'noplaylist': False,
+        },
+        'playlist_count': 13,
+    }, {
+        'url': 'https://live.rbg.tum.de/?year=2022&term=W&slug=set&view=3',
+        'info_dict': {
+            'title': 'SET FSMPIC',
+            'id': '2022/W/set',
+        },
+        'params': {
+            'noplaylist': False,
+        },
+        'playlist_count': 6,
+    }, {
+        'url': 'https://tum.live/?year=2023&term=S&slug=linalginfo&view=3',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        query = parse_qs(url)
+        errors = [key for key in ('year', 'term', 'slug') if not query.get(key)]
+        if errors:
+            raise ExtractorError(f'Input URL is missing query parameters: {", ".join(errors)}')
+        year, term, slug = query['year'][0], query['term'][0], query['slug'][0]
+        hostname = self._match_valid_url(url).group('hostname')
+
+        return self.url_result(f'https://{hostname}/old/course/{year}/{term}/{slug}', RbgTumCourseIE)
--- a/yt_dlp/extractor/reddit.py
+++ b/yt_dlp/extractor/reddit.py
@ -319,16 +319,20 @@ class RedditIE(InfoExtractor):
                'format_id': 'fallback',
                'format_note': 'DASH video, mp4_dash',
            }]
-            formats.extend(self._extract_m3u8_formats(
-                hls_playlist_url, display_id, 'mp4', m3u8_id='hls', fatal=False))
-            formats.extend(self._extract_mpd_formats(
-                dash_playlist_url, display_id, mpd_id='dash', fatal=False))
+            hls_fmts, subtitles = self._extract_m3u8_formats_and_subtitles(
+                hls_playlist_url, display_id, 'mp4', m3u8_id='hls', fatal=False)
+            formats.extend(hls_fmts)
+            dash_fmts, dash_subs = self._extract_mpd_formats_and_subtitles(
+                dash_playlist_url, display_id, mpd_id='dash', fatal=False)
+            formats.extend(dash_fmts)
+            self._merge_subtitles(dash_subs, target=subtitles)

            return {
                **info,
                'id': video_id,
                'display_id': display_id,
                'formats': formats,
+                'subtitles': subtitles,
                'duration': int_or_none(reddit_video.get('duration')),
            }

--- a/yt_dlp/extractor/rtvslo.py
+++ b/yt_dlp/extractor/rtvslo.py
@ -1,6 +1,7 @@
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
+    int_or_none,
    parse_duration,
    traverse_obj,
    unified_timestamp,
@ -25,7 +26,7 @@ class RTVSLOIE(InfoExtractor):
            'url': 'https://www.rtvslo.si/rtv365/arhiv/174842550?s=tv',
            'info_dict': {
                'id': '174842550',
-                'ext': 'flv',
+                'ext': 'mp4',
                'release_timestamp': 1643140032,
                'upload_date': '20220125',
                'series': 'Dnevnik',
@ -69,7 +70,21 @@ class RTVSLOIE(InfoExtractor):
                'tbr': 128000,
                'release_date': '20220201',
            },
-
+        }, {
+            'url': 'https://365.rtvslo.si/arhiv/razred-zase/148350750',
+            'info_dict': {
+                'id': '148350750',
+                'ext': 'mp4',
+                'title': 'Prvi šolski dan, mozaična oddaja za mlade',
+                'series': 'Razred zase',
+                'series_id': '148185730',
+                'duration': 1481,
+                'upload_date': '20121019',
+                'timestamp': 1350672122,
+                'release_date': '20121019',
+                'release_timestamp': 1350672122,
+                'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/148185730/razred_zase_2014_logo_4d_wide2.jpg',
+            },
        }, {
            'url': 'https://4d.rtvslo.si/arhiv/dnevnik/174842550',
            'only_matching': True
@ -98,13 +113,14 @@ class RTVSLOIE(InfoExtractor):
        media = self._download_json(self._API_BASE.format('getMedia', v_id), v_id, query={'jwt': jwt})['response']

        formats = []
+        skip_protocols = ['smil', 'f4m', 'dash']
        adaptive_url = traverse_obj(media, ('addaptiveMedia', 'hls_sec'), expected_type=url_or_none)
        if adaptive_url:
-            formats = self._extract_wowza_formats(adaptive_url, v_id, skip_protocols=['smil'])
+            formats = self._extract_wowza_formats(adaptive_url, v_id, skip_protocols=skip_protocols)

        adaptive_url = traverse_obj(media, ('addaptiveMedia_sl', 'hls_sec'), expected_type=url_or_none)
        if adaptive_url:
-            for f in self._extract_wowza_formats(adaptive_url, v_id, skip_protocols=['smil']):
+            for f in self._extract_wowza_formats(adaptive_url, v_id, skip_protocols=skip_protocols):
                formats.append({
                    **f,
                    'format_id': 'sign-' + f['format_id'],
@ -114,19 +130,19 @@ class RTVSLOIE(InfoExtractor):
                        else f.get('language'))
                })

-        formats.extend(
-            {
-                'url': f['streams'][strm],
-                'ext': traverse_obj(f, 'mediaType', expected_type=str.lower),
-                'width': f.get('width'),
-                'height': f.get('height'),
-                'tbr': f.get('bitrate'),
-                'filesize': f.get('filesize'),
-            }
-            for strm in ('http', 'https')
-            for f in media.get('mediaFiles') or []
-            if traverse_obj(f, ('streams', strm))
-        )
+        for mediafile in traverse_obj(media, ('mediaFiles', lambda _, v: url_or_none(v['streams']['https']))):
+            formats.append(traverse_obj(mediafile, {
+                'url': ('streams', 'https'),
+                'ext': ('mediaType', {str.lower}),
+                'width': ('width', {int_or_none}),
+                'height': ('height', {int_or_none}),
+                'tbr': ('bitrate', {int_or_none}),
+                'filesize': ('filesize', {int_or_none}),
+            }))
+
+        for mediafile in traverse_obj(media, ('mediaFiles', lambda _, v: url_or_none(v['streams']['hls_sec']))):
+            formats.extend(self._extract_wowza_formats(
+                mediafile['streams']['hls_sec'], v_id, skip_protocols=skip_protocols))

        if any('intermission.mp4' in x['url'] for x in formats):
            self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
--- a/yt_dlp/extractor/rule34video.py
+++ b/yt_dlp/extractor/rule34video.py
@ -1,6 +1,6 @@
 import re

-from ..utils import parse_duration
+from ..utils import parse_duration, unescapeHTML
 from .common import InfoExtractor


@ -16,7 +16,8 @@ class Rule34VideoIE(InfoExtractor):
                'title': 'Shot It-(mmd hmv)',
                'thumbnail': 'https://rule34video.com/contents/videos_screenshots/3065000/3065157/preview.jpg',
                'duration': 347.0,
-                'age_limit': 18
+                'age_limit': 18,
+                'tags': 'count:14'
            }
        },
        {
@ -28,7 +29,8 @@ class Rule34VideoIE(InfoExtractor):
                'title': 'Lara in Trouble Ep. 7 [WildeerStudio]',
                'thumbnail': 'https://rule34video.com/contents/videos_screenshots/3065000/3065296/preview.jpg',
                'duration': 938.0,
-                'age_limit': 18
+                'age_limit': 18,
+                'tags': 'count:50'
            }
        },
    ]
@ -57,5 +59,7 @@ class Rule34VideoIE(InfoExtractor):
            'title': title,
            'thumbnail': thumbnail,
            'duration': parse_duration(duration),
-            'age_limit': 18
+            'age_limit': 18,
+            'tags': list(map(unescapeHTML, re.findall(
+                r'<a class="tag_item"[^>]+\bhref="https://rule34video\.com/tags/\d+/"[^>]*>(?P<tag>[^>]*)</a>', webpage))),
        }
--- a/yt_dlp/extractor/rumble.py
+++ b/yt_dlp/extractor/rumble.py
@ -33,7 +33,7 @@ class RumbleEmbedIE(InfoExtractor):
            'upload_date': '20191020',
            'channel_url': 'https://rumble.com/c/WMAR',
            'channel': 'WMAR',
-            'thumbnail': 'https://sp.rmbl.ws/s8/1/5/M/z/1/5Mz1a.OvCc-small-WMAR-2-News-Latest-Headline.jpg',
+            'thumbnail': 'https://sp.rmbl.ws/s8/1/5/M/z/1/5Mz1a.qR4e-small-WMAR-2-News-Latest-Headline.jpg',
            'duration': 234,
            'uploader': 'WMAR',
            'live_status': 'not_live',
@ -84,7 +84,7 @@ class RumbleEmbedIE(InfoExtractor):
        'info_dict': {
            'id': 'v1essrt',
            'ext': 'mp4',
-            'title': 'startswith:lofi hip hop radio - beats to relax/study',
+            'title': 'startswith:lofi hip hop radio 📚 - beats to relax/study to',
            'timestamp': 1661519399,
            'upload_date': '20220826',
            'channel_url': 'https://rumble.com/c/LofiGirl',
@ -99,7 +99,7 @@ class RumbleEmbedIE(InfoExtractor):
        'url': 'https://rumble.com/embed/v1amumr',
        'info_dict': {
            'id': 'v1amumr',
-            'ext': 'webm',
+            'ext': 'mp4',
            'fps': 60,
            'title': 'Turning Point USA 2022 Student Action Summit DAY 1  - Rumble Exclusive Live',
            'timestamp': 1658518457,
@ -129,7 +129,7 @@ class RumbleEmbedIE(InfoExtractor):
                'duration': 92,
                'title': '911 Audio From The Man Who Wanted To Kill Supreme Court Justice Kavanaugh',
                'channel_url': 'https://rumble.com/c/RichSementa',
-                'thumbnail': 'https://sp.rmbl.ws/s8/1/P/j/f/A/PjfAe.OvCc-small-911-Audio-From-The-Man-Who-.jpg',
+                'thumbnail': 'https://sp.rmbl.ws/s8/1/P/j/f/A/PjfAe.qR4e-small-911-Audio-From-The-Man-Who-.jpg',
                'timestamp': 1654892716,
                'uploader': 'Mr Producer Media',
                'upload_date': '20220610',
@ -144,7 +144,7 @@ class RumbleEmbedIE(InfoExtractor):
        if embeds:
            return embeds
        return [f'https://rumble.com/embed/{mobj.group("id")}' for mobj in re.finditer(
-            r'<script>[^<]*\bRumble\(\s*"play"\s*,\s*{\s*[\'"]?video[\'"]?\s*:\s*[\'"](?P<id>[0-9a-z]+)[\'"]', webpage)]
+            r'<script>[^<]*\bRumble\(\s*"play"\s*,\s*{[^}]*[\'"]?video[\'"]?\s*:\s*[\'"](?P<id>[0-9a-z]+)[\'"]', webpage)]

    def _real_extract(self, url):
        video_id = self._match_id(url)
@ -236,7 +236,9 @@ class RumbleEmbedIE(InfoExtractor):

 class RumbleIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?rumble\.com/(?P<id>v(?!ideos)[\w.-]+)[^/]*$'
-    _EMBED_REGEX = [r'<a class=video-item--a href=(?P<url>/v[\w.-]+\.html)>']
+    _EMBED_REGEX = [
+        r'<a class=video-item--a href=(?P<url>/v[\w.-]+\.html)>',
+        r'<a[^>]+class="videostream__link link"[^>]+href=(?P<url>/v[\w.-]+\.html)[^>]*>']
    _TESTS = [{
        'add_ie': ['RumbleEmbed'],
        'url': 'https://rumble.com/vdmum1-moose-the-dog-helps-girls-dig-a-snow-fort.html',
@ -254,6 +256,7 @@ class RumbleIE(InfoExtractor):
            'thumbnail': r're:https://.+\.jpg',
            'duration': 103,
            'like_count': int,
+            'dislike_count': int,
            'view_count': int,
            'live_status': 'not_live',
        }
@ -278,6 +281,9 @@ class RumbleIE(InfoExtractor):
            'channel_url': 'https://rumble.com/c/Redacted',
            'live_status': 'not_live',
            'thumbnail': 'https://sp.rmbl.ws/s8/1/d/x/2/O/dx2Oi.qR4e-small-The-U.S.-CANNOT-hide-this-i.jpg',
+            'like_count': int,
+            'dislike_count': int,
+            'view_count': int,
        },
    }, {
        'url': 'https://rumble.com/v2e7fju-the-covid-twitter-files-drop-protecting-fauci-while-censoring-the-truth-wma.html',
@ -296,12 +302,15 @@ class RumbleIE(InfoExtractor):
            'channel_url': 'https://rumble.com/c/KimIversen',
            'channel': 'Kim Iversen',
            'thumbnail': 'https://sp.rmbl.ws/s8/1/6/b/w/O/6bwOi.qR4e-small-The-Covid-Twitter-Files-Dro.jpg',
+            'like_count': int,
+            'dislike_count': int,
+            'view_count': int,
        },
    }]

    _WEBPAGE_TESTS = [{
        'url': 'https://rumble.com/videos?page=2',
-        'playlist_count': 25,
+        'playlist_mincount': 24,
        'info_dict': {
            'id': 'videos?page=2',
            'title': 'All videos',
@ -309,17 +318,16 @@ class RumbleIE(InfoExtractor):
            'age_limit': 0,
        },
    }, {
-        'url': 'https://rumble.com/live-videos',
-        'playlist_mincount': 19,
+        'url': 'https://rumble.com/browse/live',
+        'playlist_mincount': 25,
        'info_dict': {
-            'id': 'live-videos',
-            'title': 'Live Videos',
-            'description': 'Live videos on Rumble.com',
+            'id': 'live',
+            'title': 'Browse',
            'age_limit': 0,
        },
    }, {
        'url': 'https://rumble.com/search/video?q=rumble&sort=views',
-        'playlist_count': 24,
+        'playlist_mincount': 24,
        'info_dict': {
            'id': 'video?q=rumble&sort=views',
            'title': 'Search results for: rumble',
@ -334,19 +342,20 @@ class RumbleIE(InfoExtractor):
        if not url_info:
            raise UnsupportedError(url)

-        release_ts_str = self._search_regex(
-            r'(?:Livestream begins|Streamed on):\s+<time datetime="([^"]+)',
-            webpage, 'release date', fatal=False, default=None)
-        view_count_str = self._search_regex(r'<span class="media-heading-info">([\d,]+) Views',
-                                            webpage, 'view count', fatal=False, default=None)
-
-        return self.url_result(
-            url_info['url'], ie_key=url_info['ie_key'], url_transparent=True,
-            view_count=parse_count(view_count_str),
-            release_timestamp=parse_iso8601(release_ts_str),
-            like_count=parse_count(get_element_by_class('rumbles-count', webpage)),
-            description=clean_html(get_element_by_class('media-description', webpage)),
-        )
+        return {
+            '_type': 'url_transparent',
+            'ie_key': url_info['ie_key'],
+            'url': url_info['url'],
+            'release_timestamp': parse_iso8601(self._search_regex(
+                r'(?:Livestream begins|Streamed on):\s+<time datetime="([^"]+)', webpage, 'release date', default=None)),
+            'view_count': int_or_none(self._search_regex(
+                r'"userInteractionCount"\s*:\s*(\d+)', webpage, 'view count', default=None)),
+            'like_count': parse_count(self._search_regex(
+                r'<span data-js="rumbles_up_votes">\s*([\d,.KM]+)', webpage, 'like count', default=None)),
+            'dislike_count': parse_count(self._search_regex(
+                r'<span data-js="rumbles_down_votes">\s*([\d,.KM]+)', webpage, 'dislike count', default=None)),
+            'description': clean_html(get_element_by_class('media-description', webpage))
+        }


 class RumbleChannelIE(InfoExtractor):
--- a/yt_dlp/extractor/s4c.py
+++ b/yt_dlp/extractor/s4c.py
@ -1,5 +1,5 @@
 from .common import InfoExtractor
-from ..utils import traverse_obj
+from ..utils import traverse_obj, url_or_none


 class S4CIE(InfoExtractor):
@ -11,7 +11,8 @@ class S4CIE(InfoExtractor):
            'ext': 'mp4',
            'title': 'Y Swn',
            'description': 'md5:f7681a30e4955b250b3224aa9fe70cf0',
-            'duration': 5340
+            'duration': 5340,
+            'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Y_Swn_2023S4C_099_ii.jpg'
        },
    }, {
        'url': 'https://www.s4c.cymru/clic/programme/856636948',
@ -21,6 +22,7 @@ class S4CIE(InfoExtractor):
            'title': 'Am Dro',
            'duration': 2880,
            'description': 'md5:100d8686fc9a632a0cb2db52a3433ffe',
+            'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Am_Dro_2022-23S4C_P6_4005.jpg'
        },
    }]

@ -30,7 +32,7 @@ class S4CIE(InfoExtractor):
            f'https://www.s4c.cymru/df/full_prog_details?lang=e&programme_id={video_id}',
            video_id, fatal=False)

-        filename = self._download_json(
+        player_config = self._download_json(
            'https://player-api.s4c-cdn.co.uk/player-configuration/prod', video_id, query={
                'programme_id': video_id,
                'signed': '0',
@ -38,7 +40,13 @@ class S4CIE(InfoExtractor):
                'mode': 'od',
                'appId': 'clic',
                'streamName': '',
-            }, note='Downloading player config JSON')['filename']
+            }, note='Downloading player config JSON')
+        subtitles = {}
+        for sub in traverse_obj(player_config, ('subtitles', lambda _, v: url_or_none(v['0']))):
+            subtitles.setdefault(sub.get('3', 'en'), []).append({
+                'url': sub['0'],
+                'name': sub.get('1'),
+            })
        m3u8_url = self._download_json(
            'https://player-api.s4c-cdn.co.uk/streaming-urls/prod', video_id, query={
                'mode': 'od',
@ -46,17 +54,52 @@ class S4CIE(InfoExtractor):
                'region': 'WW',
                'extra': 'false',
                'thirdParty': 'false',
-                'filename': filename,
+                'filename': player_config['filename'],
            }, note='Downloading streaming urls JSON')['hls']
-        formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')

        return {
            'id': video_id,
-            'formats': formats,
+            'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls'),
            'subtitles': subtitles,
+            'thumbnail': url_or_none(player_config.get('poster')),
            **traverse_obj(details, ('full_prog_details', 0, {
                'title': (('programme_title', 'series_title'), {str}),
                'description': ('full_billing', {str.strip}),
                'duration': ('duration', {lambda x: int(x) * 60}),
            }), get_all=False),
        }
+
+
+class S4CSeriesIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?s4c\.cymru/clic/series/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://www.s4c.cymru/clic/series/864982911',
+        'playlist_mincount': 6,
+        'info_dict': {
+            'id': '864982911',
+            'title': 'Iaith ar Daith',
+            'description': 'md5:e878ebf660dce89bd2ef521d7ce06397'
+        },
+    }, {
+        'url': 'https://www.s4c.cymru/clic/series/866852587',
+        'playlist_mincount': 8,
+        'info_dict': {
+            'id': '866852587',
+            'title': 'FFIT Cymru',
+            'description': 'md5:abcb3c129cb68dbb6cd304fd33b07e96'
+        },
+    }]
+
+    def _real_extract(self, url):
+        series_id = self._match_id(url)
+        series_details = self._download_json(
+            'https://www.s4c.cymru/df/series_details', series_id, query={
+                'lang': 'e',
+                'series_id': series_id,
+                'show_prog_in_series': 'Y'
+            }, note='Downloading series details JSON')
+
+        return self.playlist_result(
+            [self.url_result(f'https://www.s4c.cymru/clic/programme/{episode_id}', S4CIE, episode_id)
+             for episode_id in traverse_obj(series_details, ('other_progs_in_series', ..., 'id'))],
+            series_id, traverse_obj(series_details, ('full_prog_details', 0, 'series_title', {str})))
--- a/yt_dlp/extractor/sohu.py
+++ b/yt_dlp/extractor/sohu.py
@ -1,3 +1,4 @@
+import base64
 import re

 from .common import InfoExtractor
@ -8,7 +9,12 @@ from ..compat import (
 from ..utils import (
    ExtractorError,
    int_or_none,
+    float_or_none,
+    url_or_none,
+    unified_timestamp,
    try_get,
+    urljoin,
+    traverse_obj,
 )


@ -31,13 +37,20 @@ class SohuIE(InfoExtractor):
            'id': '409385080',
            'ext': 'mp4',
            'title': '《2015湖南卫视羊年元宵晚会》唐嫣《花好月圆》',
-        }
+        },
+        'skip': 'no longer available',
    }, {
        'url': 'http://my.tv.sohu.com/us/232799889/78693464.shtml',
        'info_dict': {
            'id': '78693464',
            'ext': 'mp4',
            'title': '【爱范品】第31期：MWC见不到的奇葩手机',
+            'uploader': '爱范儿视频',
+            'duration': 213,
+            'timestamp': 1425519600,
+            'upload_date': '20150305',
+            'thumbnail': 'http://e3f49eaa46b57.cdn.sohucs.com//group1/M10/83/FA/MTAuMTAuODguODA=/6_14cbccdde5eg104SysCutcloud_78693464_7_0b.jpg',
+            'tags': ['爱范儿', '爱范品', 'MWC', '手机'],
        }
    }, {
        'note': 'Multipart video',
@ -45,6 +58,12 @@ class SohuIE(InfoExtractor):
        'info_dict': {
            'id': '78910339',
            'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
+            'uploader': '小苍cany',
+            'duration': 744.0,
+            'timestamp': 1426269360,
+            'upload_date': '20150313',
+            'thumbnail': 'http://e3f49eaa46b57.cdn.sohucs.com//group1/M11/89/57/MTAuMTAuODguODA=/6_14cea022a1dg102SysCutcloud_78910339_8_0b.jpg',
+            'tags': ['小苍MM', '英雄联盟', '实战秘籍'],
        },
        'playlist': [{
            'info_dict': {
@ -75,6 +94,11 @@ class SohuIE(InfoExtractor):
            'id': '78932792',
            'ext': 'mp4',
            'title': 'youtube-dl testing video',
+            'duration': 360,
+            'timestamp': 1426348620,
+            'upload_date': '20150314',
+            'thumbnail': 'http://e3f49eaa46b57.cdn.sohucs.com//group1/M02/8A/00/MTAuMTAuODguNzk=/6_14cee1be192g102SysCutcloud_78932792_7_7b.jpg',
+            'tags': [],
        },
        'params': {
            'skip_download': True
@ -100,7 +124,7 @@ class SohuIE(InfoExtractor):

        webpage = self._download_webpage(url, video_id)

-        title = re.sub(r' - 搜狐视频$', '', self._og_search_title(webpage))
+        title = re.sub(r'( - 高清正版在线观看)? - 搜狐视频$', '', self._og_search_title(webpage))

        vid = self._html_search_regex(
            r'var vid ?= ?["\'](\d+)["\']',
@ -132,7 +156,9 @@ class SohuIE(InfoExtractor):
                allot = format_data['allot']

                data = format_data['data']
-                clips_url = data['clipsURL']
+                clip_url = traverse_obj(data, (('clipsURL', 'mp4PlayUrl'), i, {url_or_none}), get_all=False)
+                if not clip_url:
+                    raise ExtractorError(f'Unable to extract url for clip {i}')
                su = data['su']

                video_url = 'newflv.sohu.ccgslb.net'
@ -142,9 +168,9 @@ class SohuIE(InfoExtractor):
                while 'newflv.sohu.ccgslb.net' in video_url:
                    params = {
                        'prot': 9,
-                        'file': clips_url[i],
+                        'file': clip_url,
                        'new': su[i],
-                        'prod': 'flash',
+                        'prod': 'h5n',
                        'rb': 1,
                    }

@ -193,6 +219,75 @@ class SohuIE(InfoExtractor):
                'entries': playlist,
                'id': video_id,
                'title': title,
+                'duration': traverse_obj(vid_data, ('data', 'totalDuration', {float_or_none})),
            }

-        return info
+        if mytv:
+            publish_time = unified_timestamp(self._search_regex(
+                r'publishTime:\s*["\'](\d+-\d+-\d+ \d+:\d+)["\']', webpage, 'publish time', fatal=False))
+        else:
+            publish_time = traverse_obj(vid_data, ('tv_application_time', {unified_timestamp}))
+
+        return {
+            'timestamp': publish_time - 8 * 3600 if publish_time else None,
+            **traverse_obj(vid_data, {
+                'alt_title': ('data', 'subName', {str}),
+                'uploader': ('wm_data', 'wm_username', {str}),
+                'thumbnail': ('data', 'coverImg', {url_or_none}),
+                'tags': ('data', 'tag', {str.split}),
+            }),
+            **info,
+        }
+
+
+class SohuVIE(InfoExtractor):
+    _VALID_URL = r'https?://tv\.sohu\.com/v/(?P<id>[\w=-]+)\.html(?:$|[#?])'
+
+    _TESTS = [{
+        'note': 'Multipart video',
+        'url': 'https://tv.sohu.com/v/MjAyMzA2MTQvbjYwMTMxNTE5Mi5zaHRtbA==.html',
+        'info_dict': {
+            'id': '601315192',
+            'title': '《淬火丹心》第1集',
+            'alt_title': '“点天灯”发生事故',
+            'duration': 2701.692,
+            'timestamp': 1686758040,
+            'upload_date': '20230614',
+            'thumbnail': 'http://photocdn.tv.sohu.com/img/20230614/vrsa_hor_1686738763256_454010551.jpg',
+        },
+        'playlist_mincount': 9,
+        'skip': 'Only available in China',
+    }, {
+        'url': 'https://tv.sohu.com/v/dXMvMjMyNzk5ODg5Lzc4NjkzNDY0LnNodG1s.html',
+        'info_dict': {
+            'id': '78693464',
+            'ext': 'mp4',
+            'title': '【爱范品】第31期：MWC见不到的奇葩手机',
+            'uploader': '爱范儿视频',
+            'duration': 213,
+            'timestamp': 1425519600,
+            'upload_date': '20150305',
+            'thumbnail': 'http://e3f49eaa46b57.cdn.sohucs.com//group1/M10/83/FA/MTAuMTAuODguODA=/6_14cbccdde5eg104SysCutcloud_78693464_7_0b.jpg',
+            'tags': ['爱范儿', '爱范品', 'MWC', '手机'],
+        }
+    }, {
+        'note': 'Multipart video',
+        'url': 'https://tv.sohu.com/v/dXMvMjQyNTYyMTYzLzc4OTEwMzM5LnNodG1s.html?src=pl',
+        'info_dict': {
+            'id': '78910339',
+            'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
+            'uploader': '小苍cany',
+            'duration': 744.0,
+            'timestamp': 1426269360,
+            'upload_date': '20150313',
+            'thumbnail': 'http://e3f49eaa46b57.cdn.sohucs.com//group1/M11/89/57/MTAuMTAuODguODA=/6_14cea022a1dg102SysCutcloud_78910339_8_0b.jpg',
+            'tags': ['小苍MM', '英雄联盟', '实战秘籍'],
+        },
+        'playlist_mincount': 3,
+    }]
+
+    def _real_extract(self, url):
+        encoded_id = self._match_id(url)
+        path = base64.urlsafe_b64decode(encoded_id).decode()
+        subdomain = 'tv' if re.match(r'\d+/n\d+\.shtml', path) else 'my.tv'
+        return self.url_result(urljoin(f'http://{subdomain}.sohu.com/', path), SohuIE)
--- a/yt_dlp/extractor/tiktok.py
+++ b/yt_dlp/extractor/tiktok.py
@ -15,7 +15,6 @@ from ..utils import (
    UserNotLive,
    determine_ext,
    format_field,
-    get_element_by_id,
    get_first,
    int_or_none,
    join_nonempty,
@ -50,8 +49,9 @@ class TikTokBaseIE(InfoExtractor):
        return f'https://www.tiktok.com/@{user_id or "_"}/video/{video_id}'

    def _get_sigi_state(self, webpage, display_id):
-        return self._parse_json(get_element_by_id(
-            'SIGI_STATE|sigi-persisted-data', webpage, escape_value=False), display_id)
+        return self._search_json(
+            r'<script[^>]+\bid="(?:SIGI_STATE|sigi-persisted-data)"[^>]*>', webpage,
+            'sigi state', display_id, end_pattern=r'</script>')

    def _call_api_impl(self, ep, query, manifest_app_version, video_id, fatal=True,
                       note='Downloading API JSON', errnote='Unable to download API page'):
--- a/yt_dlp/extractor/tv5mondeplus.py
+++ b/yt_dlp/extractor/tv5mondeplus.py
@ -1,10 +1,14 @@
+import urllib.parse
+
 from .common import InfoExtractor
 from ..utils import (
    determine_ext,
    extract_attributes,
    int_or_none,
    parse_duration,
+    traverse_obj,
    try_get,
+    url_or_none,
 )


@ -12,6 +16,36 @@ class TV5MondePlusIE(InfoExtractor):
    IE_DESC = 'TV5MONDE+'
    _VALID_URL = r'https?://(?:www\.)?(?:tv5mondeplus|revoir\.tv5monde)\.com/toutes-les-videos/[^/]+/(?P<id>[^/?#]+)'
    _TESTS = [{
+        # movie
+        'url': 'https://revoir.tv5monde.com/toutes-les-videos/cinema/les-novices',
+        'md5': 'c86f60bf8b75436455b1b205f9745955',
+        'info_dict': {
+            'id': 'ZX0ipMyFQq_6D4BA7b',
+            'display_id': 'les-novices',
+            'ext': 'mp4',
+            'title': 'Les novices',
+            'description': 'md5:2e7c33ba3ad48dabfcc2a956b88bde2b',
+            'upload_date': '20230821',
+            'thumbnail': 'https://revoir.tv5monde.com/uploads/media/video_thumbnail/0738/60/01e952b7ccf36b7c6007ec9131588954ab651de9.jpeg',
+            'duration': 5177,
+            'episode': 'Les novices',
+        },
+    }, {
+        # series episode
+        'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/opj-les-dents-de-la-terre-2',
+        'info_dict': {
+            'id': 'wJ0eeEPozr_6D4BA7b',
+            'display_id': 'opj-les-dents-de-la-terre-2',
+            'ext': 'mp4',
+            'title': "OPJ - Les dents de la Terre (2)",
+            'description': 'md5:288f87fd68d993f814e66e60e5302d9d',
+            'upload_date': '20230823',
+            'series': 'OPJ',
+            'episode': 'Les dents de la Terre (2)',
+            'duration': 2877,
+            'thumbnail': 'https://dl-revoir.tv5monde.com/images/1a/5753448.jpg'
+        },
+    }, {
        # movie
        'url': 'https://revoir.tv5monde.com/toutes-les-videos/cinema/ceux-qui-travaillent',
        'md5': '32fa0cde16a4480d1251502a66856d5f',
@ -23,6 +57,7 @@ class TV5MondePlusIE(InfoExtractor):
            'description': 'md5:570e8bb688036ace873b2d50d24c026d',
            'upload_date': '20210819',
        },
+        'skip': 'no longer available',
    }, {
        # series episode
        'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/vestiaires-caro-actrice',
@ -39,6 +74,7 @@ class TV5MondePlusIE(InfoExtractor):
        'params': {
            'skip_download': True,
        },
+        'skip': 'no longer available',
    }, {
        'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/neuf-jours-en-hiver-neuf-jours-en-hiver',
        'only_matching': True,
@ -63,20 +99,45 @@ class TV5MondePlusIE(InfoExtractor):
        video_files = self._parse_json(
            vpl_data['data-broadcast'], display_id)
        formats = []
-        for video_file in video_files:
-            v_url = video_file.get('url')
-            if not v_url:
-                continue
-            video_format = video_file.get('format') or determine_ext(v_url)
-            if video_format == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(
-                    v_url, display_id, 'mp4', 'm3u8_native',
-                    m3u8_id='hls', fatal=False))
-            else:
-                formats.append({
-                    'url': v_url,
-                    'format_id': video_format,
-                })
+        video_id = None
+
+        def process_video_files(v):
+            nonlocal video_id
+            for video_file in v:
+                v_url = video_file.get('url')
+                if not v_url:
+                    continue
+                if video_file.get('type') == 'application/deferred':
+                    d_param = urllib.parse.quote(v_url)
+                    token = video_file.get('token')
+                    if not token:
+                        continue
+                    deferred_json = self._download_json(
+                        f'https://api.tv5monde.com/player/asset/{d_param}/resolve?condenseKS=true', display_id,
+                        note='Downloading deferred info', headers={'Authorization': f'Bearer {token}'}, fatal=False)
+                    v_url = traverse_obj(deferred_json, (0, 'url', {url_or_none}))
+                    if not v_url:
+                        continue
+                    # data-guid from the webpage isn't stable, use the material id from the json urls
+                    video_id = self._search_regex(
+                        r'materials/([\da-zA-Z]{10}_[\da-fA-F]{7})/', v_url, 'video id', default=None)
+                    process_video_files(deferred_json)
+
+                video_format = video_file.get('format') or determine_ext(v_url)
+                if video_format == 'm3u8':
+                    formats.extend(self._extract_m3u8_formats(
+                        v_url, display_id, 'mp4', 'm3u8_native',
+                        m3u8_id='hls', fatal=False))
+                elif video_format == 'mpd':
+                    formats.extend(self._extract_mpd_formats(
+                        v_url, display_id, fatal=False))
+                else:
+                    formats.append({
+                        'url': v_url,
+                        'format_id': video_format,
+                    })
+
+        process_video_files(video_files)

        metadata = self._parse_json(
            vpl_data['data-metadata'], display_id)
@ -100,10 +161,11 @@ class TV5MondePlusIE(InfoExtractor):
        if upload_date:
            upload_date = upload_date.replace('_', '')

-        video_id = self._search_regex(
-            (r'data-guid=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
-             r'id_contenu["\']\s:\s*(\d+)'), webpage, 'video id',
-            default=display_id)
+        if not video_id:
+            video_id = self._search_regex(
+                (r'data-guid=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
+                 r'id_contenu["\']\s:\s*(\d+)'), webpage, 'video id',
+                default=display_id)

        return {
            'id': video_id,
--- a/yt_dlp/extractor/twitcasting.py
+++ b/yt_dlp/extractor/twitcasting.py
@ -22,7 +22,7 @@ from ..utils import (


 class TwitCastingIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<uploader_id>[^/]+)/(?:movie|twplayer)/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:[^/?#]+\.)?twitcasting\.tv/(?P<uploader_id>[^/?#]+)/(?:movie|twplayer)/(?P<id>\d+)'
    _M3U8_HEADERS = {
        'Origin': 'https://twitcasting.tv',
        'Referer': 'https://twitcasting.tv/',
@ -231,7 +231,7 @@ class TwitCastingIE(InfoExtractor):


 class TwitCastingLiveIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<id>[^/]+)/?(?:[#?]|$)'
+    _VALID_URL = r'https?://(?:[^/?#]+\.)?twitcasting\.tv/(?P<id>[^/?#]+)/?(?:[#?]|$)'
    _TESTS = [{
        'url': 'https://twitcasting.tv/ivetesangalo',
        'only_matching': True,
@ -265,8 +265,15 @@ class TwitCastingLiveIE(InfoExtractor):


 class TwitCastingUserIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<id>[^/]+)/show/?(?:[#?]|$)'
+    _VALID_URL = r'https?://(?:[^/?#]+\.)?twitcasting\.tv/(?P<id>[^/?#]+)/(:?show|archive)/?(?:[#?]|$)'
    _TESTS = [{
+        'url': 'https://twitcasting.tv/natsuiromatsuri/archive/',
+        'info_dict': {
+            'id': 'natsuiromatsuri',
+            'title': 'natsuiromatsuri - Live History',
+        },
+        'playlist_mincount': 235,
+    }, {
        'url': 'https://twitcasting.tv/noriyukicas/show',
        'only_matching': True,
    }]
--- a/yt_dlp/extractor/twitter.py
+++ b/yt_dlp/extractor/twitter.py
@ -1,9 +1,10 @@
-import functools
 import json
+import random
 import re

 from .common import InfoExtractor
 from .periscope import PeriscopeBaseIE, PeriscopeIE
+from ..compat import functools  # isort: split
 from ..compat import (
    compat_parse_qs,
    compat_urllib_parse_unquote,
@ -147,10 +148,14 @@ class TwitterBaseIE(InfoExtractor):
    def is_logged_in(self):
        return bool(self._get_cookies(self._API_BASE).get('auth_token'))

+    @functools.cached_property
+    def _selected_api(self):
+        return self._configuration_arg('api', ['graphql'], ie_key='Twitter')[0]
+
    def _fetch_guest_token(self, display_id):
        guest_token = traverse_obj(self._download_json(
            f'{self._API_BASE}guest/activate.json', display_id, 'Downloading guest token', data=b'',
-            headers=self._set_base_headers(legacy=display_id and self._configuration_arg('legacy_api'))),
+            headers=self._set_base_headers(legacy=display_id and self._selected_api == 'legacy')),
            ('guest_token', {str}))
        if not guest_token:
            raise ExtractorError('Could not retrieve guest token')
@ -295,7 +300,7 @@ class TwitterBaseIE(InfoExtractor):
        self.report_login()

    def _call_api(self, path, video_id, query={}, graphql=False):
-        headers = self._set_base_headers(legacy=not graphql and self._configuration_arg('legacy_api'))
+        headers = self._set_base_headers(legacy=not graphql and self._selected_api == 'legacy')
        headers.update({
            'x-twitter-auth-type': 'OAuth2Session',
            'x-twitter-client-language': 'en',
@ -707,6 +712,7 @@ class TwitterIE(TwitterBaseIE):
            'tags': [],
            'age_limit': 0,
        },
+        'skip': 'This Tweet is unavailable',
    }, {
        # not available in Periscope
        'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
@ -721,6 +727,7 @@ class TwitterIE(TwitterBaseIE):
            'view_count': int,
        },
        'add_ie': ['TwitterBroadcast'],
+        'skip': 'Broadcast no longer exists',
    }, {
        # unified card
        'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
@ -773,9 +780,9 @@ class TwitterIE(TwitterBaseIE):
        'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
        'info_dict': {
            'id': '1577719286659006464',
-            'title': 'Ultima📛 | #вʟм - Test',
+            'title': 'Ultima📛| New Era - Test',
            'description': 'Test https://t.co/Y3KEZD7Dad',
-            'uploader': 'Ultima📛 | #вʟм',
+            'uploader': 'Ultima📛| New Era',
            'uploader_id': 'UltimaShadowX',
            'uploader_url': 'https://twitter.com/UltimaShadowX',
            'upload_date': '20221005',
@ -811,7 +818,7 @@ class TwitterIE(TwitterBaseIE):
            'age_limit': 0,
        },
    }, {
-        # Adult content, fails if not logged in (GraphQL)
+        # Adult content, fails if not logged in
        'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
        'info_dict': {
            'id': '1575199163847000068',
@ -831,9 +838,10 @@ class TwitterIE(TwitterBaseIE):
            'age_limit': 18,
            'tags': []
        },
+        'params': {'skip_download': 'The media could not be played'},
        'skip': 'Requires authentication',
    }, {
-        # Playlist result only with auth
+        # Playlist result only with graphql API
        'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
        'playlist_mincount': 2,
        'info_dict': {
@ -898,7 +906,7 @@ class TwitterIE(TwitterBaseIE):
            'uploader_id': 'MoniqueCamarra',
            'live_status': 'was_live',
            'release_timestamp': 1658417414,
-            'description': 'md5:4dc8e972f1d8b3c6580376fabb02a3ad',
+            'description': 'md5:acce559345fd49f129c20dbcda3f1201',
            'timestamp': 1658407771,
            'release_date': '20220721',
            'upload_date': '20220721',
@ -1007,10 +1015,10 @@ class TwitterIE(TwitterBaseIE):
            'view_count': int,
            'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
            'age_limit': 0,
-            'uploader': 'Mün The Friend Of YWAP',
+            'uploader': 'Mün',
            'repost_count': int,
            'upload_date': '20221206',
-            'title': 'Mün The Friend Of YWAP - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
+            'title': 'Mün - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
            'comment_count': int,
            'like_count': int,
            'tags': [],
@ -1019,7 +1027,7 @@ class TwitterIE(TwitterBaseIE):
            'timestamp': 1670306984.0,
        },
    }, {
-        # url to retweet id w/ legacy api
+        # retweeted_status (private)
        'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
        'info_dict': {
            'id': '1623274794488659969',
@ -1039,32 +1047,84 @@ class TwitterIE(TwitterBaseIE):
            'like_count': int,
            'repost_count': int,
        },
-        'params': {'extractor_args': {'twitter': {'legacy_api': ['']}}},
        'skip': 'Protected tweet',
    }, {
-        # orig tweet w/ graphql
-        'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
+        # retweeted_status
+        'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
        'info_dict': {
-            'id': '1623274794488659969',
-            'display_id': '1623739803874349067',
+            'id': '1694928337846538240',
            'ext': 'mp4',
-            'title': '@selfisekai@hackerspace.pl 🐀 - RT @Johnnybull3ts: Me after going viral to over 30million people:    Whoopsie-daisy',
-            'description': 'md5:9258bdbb54793bdc124fe1cd47e96c6a',
-            'uploader': '@selfisekai@hackerspace.pl 🐀',
-            'uploader_id': 'liberdalau',
-            'uploader_url': 'https://twitter.com/liberdalau',
+            'display_id': '1695424220702888009',
+            'title': 'md5:e8daa9527bc2b947121395494f786d9d',
+            'description': 'md5:004f2d37fd58737724ec75bc7e679938',
+            'uploader': 'Benny Johnson',
+            'uploader_id': 'bennyjohnson',
+            'uploader_url': 'https://twitter.com/bennyjohnson',
            'age_limit': 0,
            'tags': [],
-            'duration': 8.033,
-            'timestamp': 1675964711.0,
-            'upload_date': '20230209',
-            'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
+            'duration': 45.001,
+            'timestamp': 1692962814.0,
+            'upload_date': '20230825',
+            'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
            'like_count': int,
-            'view_count': int,
            'repost_count': int,
+            'view_count': int,
            'comment_count': int,
        },
-        'skip': 'Protected tweet',
+    }, {
+        # retweeted_status w/ legacy API
+        'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
+        'info_dict': {
+            'id': '1694928337846538240',
+            'ext': 'mp4',
+            'display_id': '1695424220702888009',
+            'title': 'md5:e8daa9527bc2b947121395494f786d9d',
+            'description': 'md5:004f2d37fd58737724ec75bc7e679938',
+            'uploader': 'Benny Johnson',
+            'uploader_id': 'bennyjohnson',
+            'uploader_url': 'https://twitter.com/bennyjohnson',
+            'age_limit': 0,
+            'tags': [],
+            'duration': 45.001,
+            'timestamp': 1692962814.0,
+            'upload_date': '20230825',
+            'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
+            'like_count': int,
+            'repost_count': int,
+        },
+        'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
+    }, {
+        # Broadcast embedded in tweet
+        'url': 'https://twitter.com/JessicaDobsonWX/status/1693057346933600402',
+        'info_dict': {
+            'id': '1yNGaNLjEblJj',
+            'ext': 'mp4',
+            'title': 'Jessica Dobson - WAVE Weather Now - Saturday 8/19/23 Update',
+            'uploader': 'Jessica Dobson',
+            'uploader_id': '1DZEoDwDovRQa',
+            'thumbnail': r're:^https?://.*\.jpg',
+            'view_count': int,
+        },
+        'add_ie': ['TwitterBroadcast'],
+    }, {
+        # Animated gif and quote tweet video, with syndication API
+        'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950',
+        'playlist_mincount': 2,
+        'info_dict': {
+            'id': '1696256659889565950',
+            'title': 'BAKOON - https://t.co/zom968d0a0',
+            'description': 'https://t.co/zom968d0a0',
+            'tags': [],
+            'uploader': 'BAKOON',
+            'uploader_id': 'BAKKOOONN',
+            'uploader_url': 'https://twitter.com/BAKKOOONN',
+            'age_limit': 18,
+            'timestamp': 1693254077.0,
+            'upload_date': '20230828',
+            'like_count': int,
+        },
+        'params': {'extractor_args': {'twitter': {'api': ['syndication']}}},
+        'expected_warnings': ['Not all metadata'],
    }, {
        # onion route
        'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
@ -1103,6 +1163,14 @@ class TwitterIE(TwitterBaseIE):
        'only_matching': True,
    }]

+    _MEDIA_ID_RE = re.compile(r'_video/(\d+)/')
+
+    @property
+    def _GRAPHQL_ENDPOINT(self):
+        if self.is_logged_in:
+            return 'zZXycP0V6H7m-2r0mOnFcA/TweetDetail'
+        return '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
+
    def _graphql_to_legacy(self, data, twid):
        result = traverse_obj(data, (
            'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
@ -1130,9 +1198,14 @@ class TwitterIE(TwitterBaseIE):
            'user': ('core', 'user_results', 'result', 'legacy'),
            'card': ('card', 'legacy'),
            'quoted_status': ('quoted_status_result', 'result', 'legacy'),
+            'retweeted_status': ('legacy', 'retweeted_status_result', 'result', 'legacy'),
        }, expected_type=dict, default={}))

-        # extra transformation is needed since result does not match legacy format
+        # extra transformations needed since result does not match legacy format
+        if status.get('retweeted_status'):
+            status['retweeted_status']['user'] = traverse_obj(status, (
+                'retweeted_status_result', 'result', 'core', 'user_results', 'result', 'legacy', {dict})) or {}
+
        binding_values = {
            binding_value.get('key'): binding_value.get('value')
            for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
@ -1208,33 +1281,42 @@ class TwitterIE(TwitterBaseIE):
        }

    def _extract_status(self, twid):
-        if self.is_logged_in:
-            return self._graphql_to_legacy(
-                self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid), twid)
+        if self.is_logged_in or self._selected_api == 'graphql':
+            status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)

-        try:
-            if not self._configuration_arg('legacy_api'):
-                return self._graphql_to_legacy(
-                    self._call_graphql_api('2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId', twid), twid)
-            return traverse_obj(self._call_api(f'statuses/show/{twid}.json', twid, {
+        elif self._selected_api == 'legacy':
+            status = self._call_api(f'statuses/show/{twid}.json', twid, {
                'cards_platform': 'Web-12',
                'include_cards': 1,
                'include_reply_count': 1,
                'include_user_entities': 0,
                'tweet_mode': 'extended',
-            }), 'retweeted_status', None)
+            })

-        except ExtractorError as e:
-            if e.expected:
-                raise
+        elif self._selected_api == 'syndication':
            self.report_warning(
-                f'{e.orig_msg}. Falling back to syndication endpoint; some metadata may be missing', twid)
+                'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
+            status = self._download_json(
+                'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
+                headers={'User-Agent': 'Googlebot'}, query={
+                    'id': twid,
+                    # TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
+                    'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
+                })
+            if not status:
+                raise ExtractorError('Syndication endpoint returned empty JSON response')
+            # Transform the result so its structure matches that of legacy/graphql
+            media = []
+            for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
+                detail['id_str'] = traverse_obj(detail, (
+                    'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
+                media.append(detail)
+            status['extended_entities'] = {'media': media}

-        status = self._download_json(
-            'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
-            headers={'User-Agent': 'Googlebot'}, query={'id': twid})
-        status['extended_entities'] = {'media': status.get('mediaDetails')}
-        return status
+        else:
+            raise ExtractorError(f'"{self._selected_api}" is not a valid API selection', expected=True)
+
+        return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {}

    def _real_extract(self, url):
        twid, selected_index = self._match_valid_url(url).group('id', 'index')
@ -1266,10 +1348,7 @@ class TwitterIE(TwitterBaseIE):
        }

        def extract_from_video_info(media):
-            media_id = traverse_obj(media, 'id_str', 'id', (
-                'video_info', 'variants', ..., 'url',
-                {functools.partial(re.search, r'_video/(\d+)/')}, 1
-            ), get_all=False, expected_type=str_or_none) or twid
+            media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
            self.write_debug(f'Extracting from video info: {media_id}')

            formats = []
@ -1503,6 +1582,8 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
        broadcast = self._call_api(
            'broadcasts/show.json', broadcast_id,
            {'ids': broadcast_id})['broadcasts'][broadcast_id]
+        if not broadcast:
+            raise ExtractorError('Broadcast no longer exists', expected=True)
        info = self._parse_broadcast_data(broadcast, broadcast_id)
        media_key = broadcast['media_key']
        source = self._call_api(
--- a/yt_dlp/extractor/videa.py
+++ b/yt_dlp/extractor/videa.py
@ -38,6 +38,7 @@ class VideaIE(InfoExtractor):
            'title': 'Az őrült kígyász 285 kígyót enged szabadon',
            'thumbnail': r're:^https?://.*',
            'duration': 21,
+            'age_limit': 0,
        },
    }, {
        'url': 'http://videa.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH',
@ -48,6 +49,7 @@ class VideaIE(InfoExtractor):
            'title': 'Supercars előzés',
            'thumbnail': r're:^https?://.*',
            'duration': 64,
+            'age_limit': 0,
        },
    }, {
        'url': 'http://videa.hu/player?v=8YfIAjxwWGwT8HVQ',
@ -58,6 +60,7 @@ class VideaIE(InfoExtractor):
            'title': 'Az őrült kígyász 285 kígyót enged szabadon',
            'thumbnail': r're:^https?://.*',
            'duration': 21,
+            'age_limit': 0,
        },
    }, {
        'url': 'http://videa.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
@ -124,7 +127,7 @@ class VideaIE(InfoExtractor):
        query['_t'] = result[:16]

        b64_info, handle = self._download_webpage_handle(
-            'http://videa.hu/videaplayer_get_xml.php', video_id, query=query)
+            'http://videa.hu/player/xml', video_id, query=query)
        if b64_info.startswith('<?xml'):
            info = self._parse_xml(b64_info, video_id)
        else:
--- a/yt_dlp/extractor/wdr.py
+++ b/yt_dlp/extractor/wdr.py
@ -173,6 +173,7 @@ class WDRPageIE(WDRIE):  # XXX: Do not subclass from concrete IE
            'skip': 'HTTP Error 404: Not Found',
        },
        {
+            # FIXME: Asset JSON is directly embedded in webpage
            'url': 'http://www1.wdr.de/mediathek/video/live/index.html',
            'info_dict': {
                'id': 'mdb-2296252',
@ -221,6 +222,8 @@ class WDRPageIE(WDRIE):  # XXX: Do not subclass from concrete IE
                'id': 'mdb-869971',
                'ext': 'mp4',
                'title': r're:^COSMO Livestream [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+                'alt_title': 'COSMO Livestream',
+                'live_status': 'is_live',
                'upload_date': '20160101',
            },
            'params': {
@ -248,6 +251,16 @@ class WDRPageIE(WDRIE):  # XXX: Do not subclass from concrete IE
            'url': 'https://kinder.wdr.de/tv/die-sendung-mit-dem-elefanten/av/video-folge---astronaut-100.html',
            'only_matching': True,
        },
+        {
+            'url': 'https://www1.wdr.de/mediathek/video/sendungen/rockpalast/video-baroness---freak-valley-festival--100.html',
+            'info_dict': {
+                'id': 'mdb-2741028',
+                'ext': 'mp4',
+                'title': 'Baroness - Freak Valley Festival 2022',
+                'alt_title': 'Rockpalast',
+                'upload_date': '20220725',
+            },
+        }
    ]

    def _real_extract(self, url):
@ -259,7 +272,7 @@ class WDRPageIE(WDRIE):  # XXX: Do not subclass from concrete IE

        # Article with several videos

-        # for wdr.de the data-extension is in a tag with the class "mediaLink"
+        # for wdr.de the data-extension-ard is in a tag with the class "mediaLink"
        # for wdr.de radio players, in a tag with the class "wdrrPlayerPlayBtn"
        # for wdrmaus, in a tag with the class "videoButton" (previously a link
        # to the page in a multiline "videoLink"-tag)
@ -268,7 +281,7 @@ class WDRPageIE(WDRIE):  # XXX: Do not subclass from concrete IE
                    (?:
                        (["\'])(?:mediaLink|wdrrPlayerPlayBtn|videoButton)\b.*?\1[^>]+|
                        (["\'])videoLink\b.*?\2[\s]*>\n[^\n]*
-                    )data-extension=(["\'])(?P<data>(?:(?!\3).)+)\3
+                    )data-extension(?:-ard)?=(["\'])(?P<data>(?:(?!\3).)+)\3
                    ''', webpage):
            media_link_obj = self._parse_json(
                mobj.group('data'), display_id, transform_source=js_to_json,
@ -295,7 +308,7 @@ class WDRPageIE(WDRIE):  # XXX: Do not subclass from concrete IE
                    compat_urlparse.urljoin(url, mobj.group('href')),
                    ie=WDRPageIE.ie_key())
                for mobj in re.finditer(
-                    r'<a[^>]+\bhref=(["\'])(?P<href>(?:(?!\1).)+)\1[^>]+\bdata-extension=',
+                    r'<a[^>]+\bhref=(["\'])(?P<href>(?:(?!\1).)+)\1[^>]+\bdata-extension(?:-ard)?=',
                    webpage) if re.match(self._PAGE_REGEX, mobj.group('href'))
            ]

--- a/yt_dlp/extractor/weibo.py
+++ b/yt_dlp/extractor/weibo.py
@ -1,134 +1,241 @@
-from .common import InfoExtractor
-
-import json
 import random
-import re
+import itertools
+import urllib.parse

-from ..compat import (
-    compat_parse_qs,
-    compat_str,
-)
+from .common import InfoExtractor
 from ..utils import (
-    js_to_json,
+    int_or_none,
+    make_archive_id,
+    mimetype2ext,
+    parse_resolution,
+    str_or_none,
    strip_jsonp,
+    traverse_obj,
+    url_or_none,
    urlencode_postdata,
+    urljoin,
 )


-class WeiboIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?weibo\.com/[0-9]+/(?P<id>[a-zA-Z0-9]+)'
-    _TEST = {
-        'url': 'https://weibo.com/6275294458/Fp6RGfbff?type=comment',
-        'info_dict': {
-            'id': 'Fp6RGfbff',
-            'ext': 'mp4',
-            'title': 'You should have servants to massage you,... 来自Hosico_猫 - 微博',
-        }
-    }
+class WeiboBaseIE(InfoExtractor):
+    def _update_visitor_cookies(self, video_id):
+        visitor_data = self._download_json(
+            'https://passport.weibo.com/visitor/genvisitor', video_id,
+            note='Generating first-visit guest request',
+            transform_source=strip_jsonp,
+            data=urlencode_postdata({
+                'cb': 'gen_callback',
+                'fp': '{"os":"2","browser":"Gecko57,0,0,0","fonts":"undefined","screenInfo":"1440*900*24","plugins":""}',
+            }))

-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        # to get Referer url for genvisitor
-        webpage, urlh = self._download_webpage_handle(url, video_id)
-
-        visitor_url = urlh.url
-
-        if 'passport.weibo.com' in visitor_url:
-            # first visit
-            visitor_data = self._download_json(
-                'https://passport.weibo.com/visitor/genvisitor', video_id,
-                note='Generating first-visit data',
-                transform_source=strip_jsonp,
-                headers={'Referer': visitor_url},
-                data=urlencode_postdata({
-                    'cb': 'gen_callback',
-                    'fp': json.dumps({
-                        'os': '2',
-                        'browser': 'Gecko57,0,0,0',
-                        'fonts': 'undefined',
-                        'screenInfo': '1440*900*24',
-                        'plugins': '',
-                    }),
-                }))
-
-            tid = visitor_data['data']['tid']
-            cnfd = '%03d' % visitor_data['data']['confidence']
-
-            self._download_webpage(
-                'https://passport.weibo.com/visitor/visitor', video_id,
-                note='Running first-visit callback',
-                query={
-                    'a': 'incarnate',
-                    't': tid,
-                    'w': 2,
-                    'c': cnfd,
-                    'cb': 'cross_domain',
-                    'from': 'weibo',
-                    '_rand': random.random(),
-                })
-
-            webpage = self._download_webpage(
-                url, video_id, note='Revisiting webpage')
-
-        title = self._html_extract_title(webpage)
-
-        video_formats = compat_parse_qs(self._search_regex(
-            r'video-sources=\\\"(.+?)\"', webpage, 'video_sources'))
-
-        formats = []
-        supported_resolutions = (480, 720)
-        for res in supported_resolutions:
-            vid_urls = video_formats.get(compat_str(res))
-            if not vid_urls or not isinstance(vid_urls, list):
-                continue
-
-            vid_url = vid_urls[0]
-            formats.append({
-                'url': vid_url,
-                'height': res,
+        self._download_webpage(
+            'https://passport.weibo.com/visitor/visitor', video_id,
+            note='Running first-visit callback to get guest cookies',
+            query={
+                'a': 'incarnate',
+                't': visitor_data['data']['tid'],
+                'w': 2,
+                'c': '%03d' % visitor_data['data']['confidence'],
+                'cb': 'cross_domain',
+                'from': 'weibo',
+                '_rand': random.random(),
            })

-        uploader = self._og_search_property(
-            'nick-name', webpage, 'uploader', default=None)
+    def _weibo_download_json(self, url, video_id, *args, fatal=True, note='Downloading JSON metadata', **kwargs):
+        webpage, urlh = self._download_webpage_handle(url, video_id, *args, fatal=fatal, note=note, **kwargs)
+        if urllib.parse.urlparse(urlh.url).netloc == 'passport.weibo.com':
+            self._update_visitor_cookies(video_id)
+            webpage = self._download_webpage(url, video_id, *args, fatal=fatal, note=note, **kwargs)
+        return self._parse_json(webpage, video_id, fatal=fatal)

+    def _extract_formats(self, video_info):
+        media_info = traverse_obj(video_info, ('page_info', 'media_info'))
+        formats = traverse_obj(media_info, (
+            'playback_list', lambda _, v: url_or_none(v['play_info']['url']), 'play_info', {
+                'url': 'url',
+                'format': ('quality_desc', {str}),
+                'format_id': ('label', {str}),
+                'ext': ('mime', {mimetype2ext}),
+                'tbr': ('bitrate', {int_or_none}, {lambda x: x or None}),
+                'vcodec': ('video_codecs', {str}),
+                'fps': ('fps', {int_or_none}),
+                'width': ('width', {int_or_none}),
+                'height': ('height', {int_or_none}),
+                'filesize': ('size', {int_or_none}),
+                'acodec': ('audio_codecs', {str}),
+                'asr': ('audio_sample_rate', {int_or_none}),
+                'audio_channels': ('audio_channels', {int_or_none}),
+            }))
+        if not formats:  # fallback, should be barely used
+            for url in set(traverse_obj(media_info, (..., {url_or_none}))):
+                if 'label=' in url:  # filter out non-video urls
+                    format_id, resolution = self._search_regex(
+                        r'label=(\w+)&template=(\d+x\d+)', url, 'format info',
+                        group=(1, 2), default=(None, None))
+                    formats.append({
+                        'url': url,
+                        'format_id': format_id,
+                        **parse_resolution(resolution),
+                        **traverse_obj(media_info, (
+                            'video_details', lambda _, v: v['label'].startswith(format_id), {
+                                'size': ('size', {int_or_none}),
+                                'tbr': ('bitrate', {int_or_none}),
+                            }
+                        ), get_all=False),
+                    })
+        return formats
+
+    def _parse_video_info(self, video_info, video_id=None):
        return {
            'id': video_id,
-            'title': title,
-            'uploader': uploader,
-            'formats': formats
+            'extractor_key': WeiboIE.ie_key(),
+            'extractor': WeiboIE.IE_NAME,
+            'formats': self._extract_formats(video_info),
+            'http_headers': {'Referer': 'https://weibo.com/'},
+            '_old_archive_ids': [make_archive_id('WeiboMobile', video_id)],
+            **traverse_obj(video_info, {
+                'id': (('id', 'id_str', 'mid'), {str_or_none}),
+                'display_id': ('mblogid', {str_or_none}),
+                'title': ('page_info', 'media_info', ('video_title', 'kol_title', 'name'), {str}, {lambda x: x or None}),
+                'description': ('text_raw', {str}),
+                'duration': ('page_info', 'media_info', 'duration', {int_or_none}),
+                'timestamp': ('page_info', 'media_info', 'video_publish_time', {int_or_none}),
+                'thumbnail': ('page_info', 'page_pic', {url_or_none}),
+                'uploader': ('user', 'screen_name', {str}),
+                'uploader_id': ('user', ('id', 'id_str'), {str_or_none}),
+                'uploader_url': ('user', 'profile_url', {lambda x: urljoin('https://weibo.com/', x)}),
+                'view_count': ('page_info', 'media_info', 'online_users_number', {int_or_none}),
+                'like_count': ('attitudes_count', {int_or_none}),
+                'repost_count': ('reposts_count', {int_or_none}),
+            }, get_all=False),
+            'tags': traverse_obj(video_info, ('topic_struct', ..., 'topic_title', {str})) or None,
        }


-class WeiboMobileIE(InfoExtractor):
-    _VALID_URL = r'https?://m\.weibo\.cn/status/(?P<id>[0-9]+)(\?.+)?'
-    _TEST = {
-        'url': 'https://m.weibo.cn/status/4189191225395228?wm=3333_2001&sourcetype=weixin&featurecode=newtitle&from=singlemessage&isappinstalled=0',
+class WeiboIE(WeiboBaseIE):
+    _VALID_URL = r'https?://(?:m\.weibo\.cn/status|(?:www\.)?weibo\.com/\d+)/(?P<id>[a-zA-Z0-9]+)'
+    _TESTS = [{
+        'url': 'https://weibo.com/7827771738/N4xlMvjhI',
+        'info_dict': {
+            'id': '4910815147462302',
+            'ext': 'mp4',
+            'display_id': 'N4xlMvjhI',
+            'title': '【睡前消息暑假版第一期：拉泰国一把  对中国有好处】',
+            'description': 'md5:e2637a7673980d68694ea7c43cf12a5f',
+            'duration': 918,
+            'timestamp': 1686312819,
+            'upload_date': '20230609',
+            'thumbnail': r're:https://.*\.jpg',
+            'uploader': '睡前视频基地',
+            'uploader_id': '7827771738',
+            'uploader_url': 'https://weibo.com/u/7827771738',
+            'view_count': int,
+            'like_count': int,
+            'repost_count': int,
+            'tags': ['泰国大选远进党获胜', '睡前消息', '暑期版'],
+        },
+    }, {
+        'url': 'https://m.weibo.cn/status/4189191225395228',
        'info_dict': {
            'id': '4189191225395228',
            'ext': 'mp4',
-            'title': '午睡当然是要甜甜蜜蜜的啦',
-            'uploader': '柴犬柴犬'
+            'display_id': 'FBqgOmDxO',
+            'title': '柴犬柴犬的秒拍视频',
+            'description': 'md5:80f461ab5cdae6bbdb70efbf5a1db24f',
+            'duration': 53,
+            'timestamp': 1514264429,
+            'upload_date': '20171226',
+            'thumbnail': r're:https://.*\.jpg',
+            'uploader': '柴犬柴犬',
+            'uploader_id': '5926682210',
+            'uploader_url': 'https://weibo.com/u/5926682210',
+            'view_count': int,
+            'like_count': int,
+            'repost_count': int,
        }
-    }
+    }, {
+        'url': 'https://weibo.com/0/4224132150961381',
+        'note': 'no playback_list example',
+        'only_matching': True,
+    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)
-        # to get Referer url for genvisitor
-        webpage = self._download_webpage(url, video_id, note='visit the page')

-        weibo_info = self._parse_json(self._search_regex(
-            r'var\s+\$render_data\s*=\s*\[({.*})\]\[0\]\s*\|\|\s*{};',
-            webpage, 'js_code', flags=re.DOTALL),
-            video_id, transform_source=js_to_json)
+        return self._parse_video_info(self._weibo_download_json(
+            f'https://weibo.com/ajax/statuses/show?id={video_id}', video_id))

-        status_data = weibo_info.get('status', {})
-        page_info = status_data.get('page_info')
-        title = status_data['status_title']
-        uploader = status_data.get('user', {}).get('screen_name')

-        return {
-            'id': video_id,
-            'title': title,
-            'uploader': uploader,
-            'url': page_info['media_info']['stream_url']
+class WeiboVideoIE(WeiboBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?weibo\.com/tv/show/(?P<id>\d+:\d+)'
+    _TESTS = [{
+        'url': 'https://weibo.com/tv/show/1034:4797699866951785?from=old_pc_videoshow',
+        'info_dict': {
+            'id': '4797700463137878',
+            'ext': 'mp4',
+            'display_id': 'LEZDodaiW',
+            'title': '呃，稍微了解了一下靡烟miya，感觉这东西也太二了',
+            'description': '呃，稍微了解了一下靡烟miya，感觉这东西也太二了 http://t.cn/A6aerGsM ',
+            'duration': 76,
+            'timestamp': 1659344278,
+            'upload_date': '20220801',
+            'thumbnail': r're:https://.*\.jpg',
+            'uploader': '君子爱财陈平安',
+            'uploader_id': '3905382233',
+            'uploader_url': 'https://weibo.com/u/3905382233',
+            'view_count': int,
+            'like_count': int,
+            'repost_count': int,
        }
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        post_data = f'data={{"Component_Play_Playinfo":{{"oid":"{video_id}"}}}}'.encode()
+        video_info = self._weibo_download_json(
+            f'https://weibo.com/tv/api/component?page=%2Ftv%2Fshow%2F{video_id.replace(":", "%3A")}',
+            video_id, headers={'Referer': url}, data=post_data)['data']['Component_Play_Playinfo']
+        return self.url_result(f'https://weibo.com/0/{video_info["mid"]}', WeiboIE)
+
+
+class WeiboUserIE(WeiboBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?weibo\.com/u/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://weibo.com/u/2066652961?tabtype=video',
+        'info_dict': {
+            'id': '2066652961',
+            'title': '萧影殿下的视频',
+            'description': '萧影殿下的全部视频',
+            'uploader': '萧影殿下',
+        },
+        'playlist_mincount': 195,
+    }]
+
+    def _fetch_page(self, uid, cursor=0, page=1):
+        return self._weibo_download_json(
+            'https://weibo.com/ajax/profile/getWaterFallContent',
+            uid, note=f'Downloading videos page {page}',
+            query={'uid': uid, 'cursor': cursor})['data']
+
+    def _entries(self, uid, first_page):
+        cursor = 0
+        for page in itertools.count(1):
+            response = first_page if page == 1 else self._fetch_page(uid, cursor, page)
+            for video_info in traverse_obj(response, ('list', ..., {dict})):
+                yield self._parse_video_info(video_info)
+            cursor = response.get('next_cursor')
+            if (int_or_none(cursor) or -1) < 0:
+                break
+
+    def _real_extract(self, url):
+        uid = self._match_id(url)
+        first_page = self._fetch_page(uid)
+        uploader = traverse_obj(first_page, ('list', ..., 'user', 'screen_name', {str}), get_all=False)
+        metainfo = {
+            'title': f'{uploader}的视频',
+            'description': f'{uploader}的全部视频',
+            'uploader': uploader,
+        } if uploader else {}
+
+        return self.playlist_result(self._entries(uid, first_page), uid, **metainfo)
--- a/yt_dlp/extractor/zaiko.py
+++ b/yt_dlp/extractor/zaiko.py
@ -9,6 +9,7 @@ from ..utils import (
    traverse_obj,
    try_call,
    unescapeHTML,
+    url_basename,
    url_or_none,
 )

@ -45,12 +46,14 @@ class ZaikoIE(ZaikoBaseIE):
            'uploader_id': '454',
            'uploader': 'ZAIKO ZERO',
            'release_timestamp': 1583809200,
-            'thumbnail': r're:https://[a-z0-9]+.cloudfront.net/[a-z0-9_]+/[a-z0-9_]+',
+            'thumbnail': r're:^https://[\w.-]+/\w+/\w+',
+            'thumbnails': 'maxcount:2',
            'release_date': '20200310',
            'categories': ['Tech House'],
            'live_status': 'was_live',
        },
        'params': {'skip_download': 'm3u8'},
+        'skip': 'Your account does not have tickets to this event',
    }]

    def _real_extract(self, url):
@ -83,6 +86,12 @@ class ZaikoIE(ZaikoBaseIE):
        if not formats:
            self.raise_no_formats(msg, expected=expected)

+        thumbnail_urls = [
+            traverse_obj(player_meta, ('initial_event_info', 'poster_url')),
+            self._og_search_thumbnail(self._download_webpage(
+                f'https://zaiko.io/event/{video_id}', video_id, 'Downloading event page', fatal=False) or ''),
+        ]
+
        return {
            'id': video_id,
            'formats': formats,
@ -96,8 +105,8 @@ class ZaikoIE(ZaikoBaseIE):
            }),
            **traverse_obj(player_meta, ('initial_event_info', {
                'alt_title': ('title', {str}),
-                'thumbnail': ('poster_url', {url_or_none}),
            })),
+            'thumbnails': [{'url': url, 'id': url_basename(url)} for url in thumbnail_urls if url_or_none(url)]
        }


--- a/yt_dlp/extractor/zoom.py
+++ b/yt_dlp/extractor/zoom.py
@ -127,6 +127,7 @@ class ZoomIE(InfoExtractor):
        return {
            'id': video_id,
            'title': str_or_none(traverse_obj(data, ('meet', 'topic'))),
+            'duration': int_or_none(data.get('duration')),
            'subtitles': subtitles,
            'formats': formats,
            'http_headers': {
--- a/yt_dlp/networking/_helper.py
+++ b/yt_dlp/networking/_helper.py
@ -2,6 +2,7 @@ from __future__ import annotations

 import contextlib
 import functools
+import socket
 import ssl
 import sys
 import typing
@ -206,3 +207,59 @@ def wrap_request_errors(func):
                e.handler = self
            raise
    return wrapper
+
+
+def _socket_connect(ip_addr, timeout, source_address):
+    af, socktype, proto, canonname, sa = ip_addr
+    sock = socket.socket(af, socktype, proto)
+    try:
+        if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
+            sock.settimeout(timeout)
+        if source_address:
+            sock.bind(source_address)
+        sock.connect(sa)
+        return sock
+    except socket.error:
+        sock.close()
+        raise
+
+
+def create_connection(
+    address,
+    timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
+    source_address=None,
+    *,
+    _create_socket_func=_socket_connect
+):
+    # Work around socket.create_connection() which tries all addresses from getaddrinfo() including IPv6.
+    # This filters the addresses based on the given source_address.
+    # Based on: https://github.com/python/cpython/blob/main/Lib/socket.py#L810
+    host, port = address
+    ip_addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
+    if not ip_addrs:
+        raise socket.error('getaddrinfo returns an empty list')
+    if source_address is not None:
+        af = socket.AF_INET if ':' not in source_address[0] else socket.AF_INET6
+        ip_addrs = [addr for addr in ip_addrs if addr[0] == af]
+        if not ip_addrs:
+            raise OSError(
+                f'No remote IPv{4 if af == socket.AF_INET else 6} addresses available for connect. '
+                f'Can\'t use "{source_address[0]}" as source address')
+
+    err = None
+    for ip_addr in ip_addrs:
+        try:
+            sock = _create_socket_func(ip_addr, timeout, source_address)
+            # Explicitly break __traceback__ reference cycle
+            # https://bugs.python.org/issue36820
+            err = None
+            return sock
+        except socket.error as e:
+            err = e
+
+    try:
+        raise err
+    finally:
+        # Explicitly break __traceback__ reference cycle
+        # https://bugs.python.org/issue36820
+        err = None
--- a/yt_dlp/networking/_urllib.py
+++ b/yt_dlp/networking/_urllib.py
@ -23,6 +23,7 @@ from urllib.request import (
 from ._helper import (
    InstanceStoreMixin,
    add_accept_encoding_header,
+    create_connection,
    get_redirect_method,
    make_socks_proxy_opts,
    select_proxy,
@ -54,44 +55,10 @@ if brotli:
 def _create_http_connection(http_class, source_address, *args, **kwargs):
    hc = http_class(*args, **kwargs)

+    if hasattr(hc, '_create_connection'):
+        hc._create_connection = create_connection
+
    if source_address is not None:
-        # This is to workaround _create_connection() from socket where it will try all
-        # address data from getaddrinfo() including IPv6. This filters the result from
-        # getaddrinfo() based on the source_address value.
-        # This is based on the cpython socket.create_connection() function.
-        # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
-        def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
-            host, port = address
-            err = None
-            addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
-            af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
-            ip_addrs = [addr for addr in addrs if addr[0] == af]
-            if addrs and not ip_addrs:
-                ip_version = 'v4' if af == socket.AF_INET else 'v6'
-                raise OSError(
-                    "No remote IP%s addresses available for connect, can't use '%s' as source address"
-                    % (ip_version, source_address[0]))
-            for res in ip_addrs:
-                af, socktype, proto, canonname, sa = res
-                sock = None
-                try:
-                    sock = socket.socket(af, socktype, proto)
-                    if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
-                        sock.settimeout(timeout)
-                    sock.bind(source_address)
-                    sock.connect(sa)
-                    err = None  # Explicitly break reference cycle
-                    return sock
-                except OSError as _:
-                    err = _
-                    if sock is not None:
-                        sock.close()
-            if err is not None:
-                raise err
-            else:
-                raise OSError('getaddrinfo returns an empty list')
-        if hasattr(hc, '_create_connection'):
-            hc._create_connection = _create_connection
        hc.source_address = (source_address, 0)

    return hc
@ -220,13 +187,28 @@ def make_socks_conn_class(base_class, socks_proxy):
    proxy_args = make_socks_proxy_opts(socks_proxy)

    class SocksConnection(base_class):
-        def connect(self):
-            self.sock = sockssocket()
-            self.sock.setproxy(**proxy_args)
-            if type(self.timeout) in (int, float):  # noqa: E721
-                self.sock.settimeout(self.timeout)
-            self.sock.connect((self.host, self.port))
+        _create_connection = create_connection

+        def connect(self):
+            def sock_socket_connect(ip_addr, timeout, source_address):
+                af, socktype, proto, canonname, sa = ip_addr
+                sock = sockssocket(af, socktype, proto)
+                try:
+                    connect_proxy_args = proxy_args.copy()
+                    connect_proxy_args.update({'addr': sa[0], 'port': sa[1]})
+                    sock.setproxy(**connect_proxy_args)
+                    if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:  # noqa: E721
+                        sock.settimeout(timeout)
+                    if source_address:
+                        sock.bind(source_address)
+                    sock.connect((self.host, self.port))
+                    return sock
+                except socket.error:
+                    sock.close()
+                    raise
+            self.sock = create_connection(
+                (proxy_args['addr'], proxy_args['port']), timeout=self.timeout,
+                source_address=self.source_address, _create_socket_func=sock_socket_connect)
            if isinstance(self, http.client.HTTPSConnection):
                self.sock = self._context.wrap_socket(self.sock, server_hostname=self.host)

@ -429,7 +411,7 @@ class UrllibRH(RequestHandler, InstanceStoreMixin):
        except urllib.error.HTTPError as e:
            if isinstance(e.fp, (http.client.HTTPResponse, urllib.response.addinfourl)):
                # Prevent file object from being closed when urllib.error.HTTPError is destroyed.
-                e._closer.file = None
+                e._closer.close_called = True
                raise HTTPError(UrllibResponseAdapter(e.fp), redirect_loop='redirect error' in str(e)) from e
            raise  # unexpected
        except urllib.error.URLError as e:
--- a/yt_dlp/networking/exceptions.py
+++ b/yt_dlp/networking/exceptions.py
@ -115,7 +115,7 @@ class _CompatHTTPError(urllib.error.HTTPError, HTTPError):
            hdrs=http_error.response.headers,
            fp=http_error.response
        )
-        self._closer.file = None  # Disable auto close
+        self._closer.close_called = True  # Disable auto close
        self._http_error = http_error
        HTTPError.__init__(self, http_error.response, redirect_loop=http_error.redirect_loop)

--- a/yt_dlp/socks.py
+++ b/yt_dlp/socks.py
@ -134,26 +134,31 @@ class sockssocket(socket.socket):
            self.close()
            raise InvalidVersionError(expected_version, got_version)

-    def _resolve_address(self, destaddr, default, use_remote_dns):
-        try:
-            return socket.inet_aton(destaddr)
-        except OSError:
-            if use_remote_dns and self._proxy.remote_dns:
-                return default
-            else:
-                return socket.inet_aton(socket.gethostbyname(destaddr))
+    def _resolve_address(self, destaddr, default, use_remote_dns, family=None):
+        for f in (family,) if family else (socket.AF_INET, socket.AF_INET6):
+            try:
+                return f, socket.inet_pton(f, destaddr)
+            except OSError:
+                continue
+
+        if use_remote_dns and self._proxy.remote_dns:
+            return 0, default
+        else:
+            res = socket.getaddrinfo(destaddr, None, family=family or 0)
+            f, _, _, _, ipaddr = res[0]
+            return f, socket.inet_pton(f, ipaddr[0])

    def _setup_socks4(self, address, is_4a=False):
        destaddr, port = address

-        ipaddr = self._resolve_address(destaddr, SOCKS4_DEFAULT_DSTIP, use_remote_dns=is_4a)
+        _, ipaddr = self._resolve_address(destaddr, SOCKS4_DEFAULT_DSTIP, use_remote_dns=is_4a, family=socket.AF_INET)

        packet = struct.pack('!BBH', SOCKS4_VERSION, Socks4Command.CMD_CONNECT, port) + ipaddr

        username = (self._proxy.username or '').encode()
        packet += username + b'\x00'

-        if is_4a and self._proxy.remote_dns:
+        if is_4a and self._proxy.remote_dns and ipaddr == SOCKS4_DEFAULT_DSTIP:
            packet += destaddr.encode() + b'\x00'

        self.sendall(packet)
@ -210,7 +215,7 @@ class sockssocket(socket.socket):
    def _setup_socks5(self, address):
        destaddr, port = address

-        ipaddr = self._resolve_address(destaddr, None, use_remote_dns=True)
+        family, ipaddr = self._resolve_address(destaddr, None, use_remote_dns=True)

        self._socks5_auth()

@ -220,8 +225,10 @@ class sockssocket(socket.socket):
            destaddr = destaddr.encode()
            packet += struct.pack('!B', Socks5AddressType.ATYP_DOMAINNAME)
            packet += self._len_and_data(destaddr)
-        else:
+        elif family == socket.AF_INET:
            packet += struct.pack('!B', Socks5AddressType.ATYP_IPV4) + ipaddr
+        elif family == socket.AF_INET6:
+            packet += struct.pack('!B', Socks5AddressType.ATYP_IPV6) + ipaddr
        packet += struct.pack('!H', port)

        self.sendall(packet)
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@ -669,6 +669,7 @@ def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):

 def sanitize_path(s, force=False):
    """Sanitizes and normalizes path on Windows"""
+    # XXX: this handles drive relative paths (c:sth) incorrectly
    if sys.platform == 'win32':
        force = False
        drive_or_unc, _ = os.path.splitdrive(s)
@ -687,7 +688,10 @@ def sanitize_path(s, force=False):
        sanitized_path.insert(0, drive_or_unc + os.path.sep)
    elif force and s and s[0] == os.path.sep:
        sanitized_path.insert(0, os.path.sep)
-    return os.path.join(*sanitized_path)
+    # TODO: Fix behavioral differences <3.12
+    # The workaround using `normpath` only superficially passes tests
+    # Ref: https://github.com/python/cpython/pull/100351
+    return os.path.normpath(os.path.join(*sanitized_path))


 def sanitize_url(url, *, scheme='http'):
@ -1256,7 +1260,7 @@ def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
    if precision == 'auto':
        auto_precision = True
        precision = 'microsecond'
-    today = datetime_round(datetime.datetime.utcnow(), precision)
+    today = datetime_round(datetime.datetime.now(datetime.timezone.utc), precision)
    if date_str in ('now', 'today'):
        return today
    if date_str == 'yesterday':
@ -1319,8 +1323,8 @@ def datetime_round(dt, precision='day'):
        'second': 1,
    }
    roundto = lambda x, n: ((x + n / 2) // n) * n
-    timestamp = calendar.timegm(dt.timetuple())
-    return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
+    timestamp = roundto(calendar.timegm(dt.timetuple()), unit_seconds[precision])
+    return datetime.datetime.fromtimestamp(timestamp, datetime.timezone.utc)


 def hyphenate_date(date_str):
@ -2847,6 +2851,7 @@ def mimetype2ext(mt, default=NO_DEFAULT):
        'quicktime': 'mov',
        'webm': 'webm',
        'vp9': 'vp9',
+        'video/ogg': 'ogv',
        'x-flv': 'flv',
        'x-m4v': 'm4v',
        'x-matroska': 'mkv',