Merge branch 'yt-dlp:master' into rls/arm-ubuntu-bump

2024-09-28 13:47:53 +00:00 · 2024-03-04 11:01:23 -06:00 · 2024-03-04 11:01:23 -06:00 · 23ad448446
parent 5da26be37e ac340d0745
commit 23ad448446
23 changed files with 693 additions and 319 deletions
--- a/test/test_websockets.py
+++ b/test/test_websockets.py
@ -192,8 +192,8 @@ def test_raise_http_error(self, handler, status):

    @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
    @pytest.mark.parametrize('params,extensions', [
-        ({'timeout': 0.00001}, {}),
-        ({}, {'timeout': 0.00001}),
+        ({'timeout': sys.float_info.min}, {}),
+        ({}, {'timeout': sys.float_info.min}),
    ])
    def test_timeout(self, handler, params, extensions):
        with handler(**params) as rh:
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@ -690,7 +690,6 @@ def process_color_policy(stream):
        self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
        self._load_cookies(self.params['http_headers'].get('Cookie'))  # compat
        self.params['http_headers'].pop('Cookie', None)
-        self._request_director = self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)

        if auto_init and auto_init != 'no_verbose_header':
            self.print_debug_header()
@ -964,6 +963,7 @@ def __exit__(self, *args):
    def close(self):
        self.save_cookies()
        self._request_director.close()
+        del self._request_director

    def trouble(self, message=None, tb=None, is_error=True):
        """Determine action to take when a download problem appears.
@ -4160,6 +4160,10 @@ def build_request_director(self, handlers, preferences=None):
            director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0)
        return director

+    @functools.cached_property
+    def _request_director(self):
+        return self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
+
    def encode(self, s):
        if isinstance(s, bytes):
            return s  # Already encoded
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -444,6 +444,7 @@
 from .dailymotion import (
    DailymotionIE,
    DailymotionPlaylistIE,
+    DailymotionSearchIE,
    DailymotionUserIE,
 )
 from .dailywire import (
@ -2499,6 +2500,7 @@
    Zee5SeriesIE,
 )
 from .zeenews import ZeeNewsIE
+from .zenporn import ZenPornIE
 from .zetland import ZetlandDKArticleIE
 from .zhihu import ZhihuIE
 from .zingmp3 import (
--- a/yt_dlp/extractor/altcensored.py
+++ b/yt_dlp/extractor/altcensored.py
@ -22,7 +22,7 @@ class AltCensoredIE(InfoExtractor):
            'title': "QUELLES SONT LES CONSÉQUENCES DE L'HYPERSEXUALISATION DE LA SOCIÉTÉ ?",
            'display_id': 'k0srjLSkga8.webm',
            'release_date': '20180403',
-            'creator': 'Virginie Vota',
+            'creators': ['Virginie Vota'],
            'release_year': 2018,
            'upload_date': '20230318',
            'uploader': 'admin@altcensored.com',
@ -32,7 +32,7 @@ class AltCensoredIE(InfoExtractor):
            'duration': 926.09,
            'thumbnail': 'https://archive.org/download/youtube-k0srjLSkga8/youtube-k0srjLSkga8.thumbs/k0srjLSkga8_000925.jpg',
            'view_count': int,
-            'categories': ['News & Politics'],
+            'categories': ['News & Politics'],  # FIXME
        }
    }]

@ -62,14 +62,21 @@ class AltCensoredChannelIE(InfoExtractor):
            'title': 'Virginie Vota',
            'id': 'UCFPTO55xxHqFqkzRZHu4kcw',
        },
-        'playlist_count': 91
+        'playlist_count': 85,
    }, {
        'url': 'https://altcensored.com/channel/UC9CcJ96HKMWn0LZlcxlpFTw',
        'info_dict': {
            'title': 'yukikaze775',
            'id': 'UC9CcJ96HKMWn0LZlcxlpFTw',
        },
-        'playlist_count': 4
+        'playlist_count': 4,
+    }, {
+        'url': 'https://altcensored.com/channel/UCfYbb7nga6-icsFWWgS-kWw',
+        'info_dict': {
+            'title': 'Mister Metokur',
+            'id': 'UCfYbb7nga6-icsFWWgS-kWw',
+        },
+        'playlist_count': 121,
    }]

    def _real_extract(self, url):
@ -78,7 +85,7 @@ def _real_extract(self, url):
            url, channel_id, 'Download channel webpage', 'Unable to get channel webpage')
        title = self._html_search_meta('altcen_title', webpage, 'title', fatal=False)
        page_count = int_or_none(self._html_search_regex(
-            r'<a[^>]+href="/channel/\w+/page/(\d+)">(?:\1)</a>',
+            r'<a[^>]+href="/channel/[\w-]+/page/(\d+)">(?:\1)</a>',
            webpage, 'page count', default='1'))

        def page_func(page_num):
--- a/yt_dlp/extractor/archiveorg.py
+++ b/yt_dlp/extractor/archiveorg.py
@ -300,7 +300,7 @@ def _real_extract(self, url):
            is_logged_in = bool(self._get_cookies('https://archive.org').get('logged-in-sig'))
            if extension in KNOWN_EXTENSIONS and (not f.get('private') or is_logged_in):
                entry['formats'].append({
-                    'url': 'https://archive.org/download/' + identifier + '/' + f['name'],
+                    'url': 'https://archive.org/download/' + identifier + '/' + urllib.parse.quote(f['name']),
                    'format': f.get('format'),
                    'width': int_or_none(f.get('width')),
                    'height': int_or_none(f.get('height')),
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@ -1996,7 +1996,7 @@ def _extract_video_metadata(self, url, video_id, season_id):
                'title': get_element_by_class(
                    'bstar-meta__title', webpage) or self._html_search_meta('og:title', webpage),
                'description': get_element_by_class(
-                    'bstar-meta__desc', webpage) or self._html_search_meta('og:description'),
+                    'bstar-meta__desc', webpage) or self._html_search_meta('og:description', webpage),
            }, self._search_json_ld(webpage, video_id, default={}))

    def _get_comments_reply(self, root_id, next_id=0, display_id=None):
--- a/yt_dlp/extractor/cctv.py
+++ b/yt_dlp/extractor/cctv.py
@ -88,6 +88,20 @@ class CCTVIE(InfoExtractor):
        'params': {
            'skip_download': True,
        },
+    }, {
+        # videoCenterId: "id"
+        'url': 'http://news.cctv.com/2024/02/21/ARTIcU5tKIOIF2myEGCATkLo240221.shtml',
+        'info_dict': {
+            'id': '5c846c0518444308ba32c4159df3b3e0',
+            'ext': 'mp4',
+            'title': '《平“语”近人——习近平喜欢的典故》第三季 第5集：风物长宜放眼量',
+            'uploader': 'yangjuan',
+            'timestamp': 1708554940,
+            'upload_date': '20240221',
+        },
+        'params': {
+            'skip_download': True,
+        },
    }, {
        # var ids = ["id"]
        'url': 'http://www.ncpa-classic.com/clt/more/416/index.shtml',
@ -128,7 +142,7 @@ def _real_extract(self, url):

        video_id = self._search_regex(
            [r'var\s+guid\s*=\s*["\']([\da-fA-F]+)',
-             r'videoCenterId["\']\s*,\s*["\']([\da-fA-F]+)',
+             r'videoCenterId(?:["\']\s*,|:)\s*["\']([\da-fA-F]+)',
             r'changePlayer\s*\(\s*["\']([\da-fA-F]+)',
             r'load[Vv]ideo\s*\(\s*["\']([\da-fA-F]+)',
             r'var\s+initMyAray\s*=\s*["\']([\da-fA-F]+)',
--- a/yt_dlp/extractor/chzzk.py
+++ b/yt_dlp/extractor/chzzk.py
@ -2,7 +2,7 @@

 from .common import InfoExtractor
 from ..utils import (
-    ExtractorError,
+    UserNotLive,
    float_or_none,
    int_or_none,
    parse_iso8601,
@ -40,7 +40,7 @@ def _real_extract(self, url):
            note='Downloading channel info', errnote='Unable to download channel info')['content']

        if live_detail.get('status') == 'CLOSE':
-            raise ExtractorError('The channel is not currently live', expected=True)
+            raise UserNotLive(video_id=channel_id)

        live_playback = self._parse_json(live_detail['livePlaybackJson'], channel_id)

--- a/yt_dlp/extractor/cloudflarestream.py
+++ b/yt_dlp/extractor/cloudflarestream.py
@ -4,27 +4,25 @@


 class CloudflareStreamIE(InfoExtractor):
+    _SUBDOMAIN_RE = r'(?:(?:watch|iframe|customer-\w+)\.)?'
    _DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
-    _EMBED_RE = r'embed\.%s/embed/[^/]+\.js\?.*?\bvideo=' % _DOMAIN_RE
+    _EMBED_RE = rf'embed\.{_DOMAIN_RE}/embed/[^/]+\.js\?.*?\bvideo='
    _ID_RE = r'[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+'
-    _VALID_URL = r'''(?x)
-                    https?://
-                        (?:
-                            (?:watch\.)?%s/|
-                            %s
-                        )
-                        (?P<id>%s)
-                    ''' % (_DOMAIN_RE, _EMBED_RE, _ID_RE)
-    _EMBED_REGEX = [fr'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE}).*?)\1']
+    _VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}{_DOMAIN_RE}/|{_EMBED_RE})(?P<id>{_ID_RE})'
+    _EMBED_REGEX = [
+        rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE}).*?)\1',
+        rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})',
+    ]
    _TESTS = [{
        'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717',
        'info_dict': {
            'id': '31c9291ab41fac05471db4e73aa11717',
            'ext': 'mp4',
            'title': '31c9291ab41fac05471db4e73aa11717',
+            'thumbnail': 'https://videodelivery.net/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg',
        },
        'params': {
-            'skip_download': True,
+            'skip_download': 'm3u8',
        },
    }, {
        'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
@ -35,6 +33,21 @@ class CloudflareStreamIE(InfoExtractor):
    }, {
        'url': 'https://embed.videodelivery.net/embed/r4xu.fla9.latest.js?video=81d80727f3022488598f68d323c1ad5e',
        'only_matching': True,
+    }, {
+        'url': 'https://customer-aw5py76sw8wyqzmh.cloudflarestream.com/2463f6d3e06fa29710a337f5f5389fd8/iframe',
+        'only_matching': True,
+    }]
+    _WEBPAGE_TESTS = [{
+        'url': 'https://upride.cc/incident/shoulder-pass-at-light/',
+        'info_dict': {
+            'id': 'eaef9dea5159cf968be84241b5cedfe7',
+            'ext': 'mp4',
+            'title': 'eaef9dea5159cf968be84241b5cedfe7',
+            'thumbnail': 'https://videodelivery.net/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg',
+        },
+        'params': {
+            'skip_download': 'm3u8',
+        },
    }]

    def _real_extract(self, url):
--- a/yt_dlp/extractor/dailymotion.py
+++ b/yt_dlp/extractor/dailymotion.py
@ -1,6 +1,7 @@
 import functools
 import json
 import re
+import urllib.parse

 from .common import InfoExtractor
 from ..networking.exceptions import HTTPError
@ -44,36 +45,41 @@ def _real_initialize(self):
        self._FAMILY_FILTER = ff == 'on' if ff else age_restricted(18, self.get_param('age_limit'))
        self._set_dailymotion_cookie('ff', 'on' if self._FAMILY_FILTER else 'off')

+    def _get_token(self, xid):
+        cookies = self._get_dailymotion_cookies()
+        token = self._get_cookie_value(cookies, 'access_token') or self._get_cookie_value(cookies, 'client_token')
+        if token:
+            return token
+
+        data = {
+            'client_id': 'f1a362d288c1b98099c7',
+            'client_secret': 'eea605b96e01c796ff369935357eca920c5da4c5',
+        }
+        username, password = self._get_login_info()
+        if username:
+            data.update({
+                'grant_type': 'password',
+                'password': password,
+                'username': username,
+            })
+        else:
+            data['grant_type'] = 'client_credentials'
+        try:
+            token = self._download_json(
+                'https://graphql.api.dailymotion.com/oauth/token',
+                None, 'Downloading Access Token',
+                data=urlencode_postdata(data))['access_token']
+        except ExtractorError as e:
+            if isinstance(e.cause, HTTPError) and e.cause.status == 400:
+                raise ExtractorError(self._parse_json(
+                    e.cause.response.read().decode(), xid)['error_description'], expected=True)
+            raise
+        self._set_dailymotion_cookie('access_token' if username else 'client_token', token)
+        return token
+
    def _call_api(self, object_type, xid, object_fields, note, filter_extra=None):
        if not self._HEADERS.get('Authorization'):
-            cookies = self._get_dailymotion_cookies()
-            token = self._get_cookie_value(cookies, 'access_token') or self._get_cookie_value(cookies, 'client_token')
-            if not token:
-                data = {
-                    'client_id': 'f1a362d288c1b98099c7',
-                    'client_secret': 'eea605b96e01c796ff369935357eca920c5da4c5',
-                }
-                username, password = self._get_login_info()
-                if username:
-                    data.update({
-                        'grant_type': 'password',
-                        'password': password,
-                        'username': username,
-                    })
-                else:
-                    data['grant_type'] = 'client_credentials'
-                try:
-                    token = self._download_json(
-                        'https://graphql.api.dailymotion.com/oauth/token',
-                        None, 'Downloading Access Token',
-                        data=urlencode_postdata(data))['access_token']
-                except ExtractorError as e:
-                    if isinstance(e.cause, HTTPError) and e.cause.status == 400:
-                        raise ExtractorError(self._parse_json(
-                            e.cause.response.read().decode(), xid)['error_description'], expected=True)
-                    raise
-                self._set_dailymotion_cookie('access_token' if username else 'client_token', token)
-            self._HEADERS['Authorization'] = 'Bearer ' + token
+            self._HEADERS['Authorization'] = f'Bearer {self._get_token(xid)}'

        resp = self._download_json(
            'https://graphql.api.dailymotion.com/', xid, note, data=json.dumps({
@ -393,9 +399,55 @@ def _extract_embed_urls(cls, url, webpage):
                yield '//dailymotion.com/playlist/%s' % p


+class DailymotionSearchIE(DailymotionPlaylistBaseIE):
+    IE_NAME = 'dailymotion:search'
+    _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/search/(?P<id>[^/?#]+)/videos'
+    _PAGE_SIZE = 20
+    _TESTS = [{
+        'url': 'http://www.dailymotion.com/search/king of turtles/videos',
+        'info_dict': {
+            'id': 'king of turtles',
+            'title': 'king of turtles',
+        },
+        'playlist_mincount': 90,
+    }]
+    _SEARCH_QUERY = 'query SEARCH_QUERY( $query: String! $page: Int $limit: Int ) { search { videos( query: $query first: $limit page: $page ) { edges { node { xid } } } } } '
+
+    def _call_search_api(self, term, page, note):
+        if not self._HEADERS.get('Authorization'):
+            self._HEADERS['Authorization'] = f'Bearer {self._get_token(term)}'
+        resp = self._download_json(
+            'https://graphql.api.dailymotion.com/', None, note, data=json.dumps({
+                'operationName': 'SEARCH_QUERY',
+                'query': self._SEARCH_QUERY,
+                'variables': {
+                    'limit': 20,
+                    'page': page,
+                    'query': term,
+                }
+            }).encode(), headers=self._HEADERS)
+        obj = traverse_obj(resp, ('data', 'search', {dict}))
+        if not obj:
+            raise ExtractorError(
+                traverse_obj(resp, ('errors', 0, 'message', {str})) or 'Could not fetch search data')
+
+        return obj
+
+    def _fetch_page(self, term, page):
+        page += 1
+        response = self._call_search_api(term, page, f'Searching "{term}" page {page}')
+        for xid in traverse_obj(response, ('videos', 'edges', ..., 'node', 'xid')):
+            yield self.url_result(f'https://www.dailymotion.com/video/{xid}', DailymotionIE, xid)
+
+    def _real_extract(self, url):
+        term = urllib.parse.unquote_plus(self._match_id(url))
+        return self.playlist_result(
+            OnDemandPagedList(functools.partial(self._fetch_page, term), self._PAGE_SIZE), term, term)
+
+
 class DailymotionUserIE(DailymotionPlaylistBaseIE):
    IE_NAME = 'dailymotion:user'
-    _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<id>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist|search)/)(?:(?:old/)?user/)?(?P<id>[^/?#]+)'
    _TESTS = [{
        'url': 'https://www.dailymotion.com/user/nqtv',
        'info_dict': {
--- a/yt_dlp/extractor/dumpert.py
+++ b/yt_dlp/extractor/dumpert.py
@ -8,9 +8,9 @@

 class DumpertIE(InfoExtractor):
    _VALID_URL = r'''(?x)
-        (?P<protocol>https?)://(?:(?:www|legacy)\.)?dumpert\.nl(?:
-            /(?:mediabase|embed|item)/|
-            (?:/toppers|/latest|/?)\?selectedId=
+        (?P<protocol>https?)://(?:(?:www|legacy)\.)?dumpert\.nl/(?:
+            (?:mediabase|embed|item)/|
+            [^#]*[?&]selectedId=
        )(?P<id>[0-9]+[/_][0-9a-zA-Z]+)'''
    _TESTS = [{
        'url': 'https://www.dumpert.nl/item/6646981_951bc60f',
@ -56,6 +56,9 @@ class DumpertIE(InfoExtractor):
    }, {
        'url': 'https://www.dumpert.nl/?selectedId=100031688_b317a185',
        'only_matching': True,
+    }, {
+        'url': 'https://www.dumpert.nl/toppers/dag?selectedId=100086074_f5cef3ac',
+        'only_matching': True,
    }]

    def _real_extract(self, url):
--- a/yt_dlp/extractor/francetv.py
+++ b/yt_dlp/extractor/francetv.py
@ -1,60 +1,49 @@
+import re
+import urllib.parse
+
 from .common import InfoExtractor
 from .dailymotion import DailymotionIE
+from ..networking import HEADRequest
 from ..utils import (
-    ExtractorError,
    determine_ext,
+    filter_dict,
    format_field,
    int_or_none,
    join_nonempty,
    parse_iso8601,
-    parse_qs,
+    smuggle_url,
+    unsmuggle_url,
+    url_or_none,
 )
+from ..utils.traversal import traverse_obj


 class FranceTVBaseInfoExtractor(InfoExtractor):
-    def _make_url_result(self, video_or_full_id, catalog=None):
-        full_id = 'francetv:%s' % video_or_full_id
-        if '@' not in video_or_full_id and catalog:
-            full_id += '@%s' % catalog
-        return self.url_result(
-            full_id, ie=FranceTVIE.ie_key(),
-            video_id=video_or_full_id.split('@')[0])
+    def _make_url_result(self, video_id, url=None):
+        video_id = video_id.split('@')[0]  # for compat with old @catalog IDs
+        full_id = f'francetv:{video_id}'
+        if url:
+            full_id = smuggle_url(full_id, {'hostname': urllib.parse.urlparse(url).hostname})
+        return self.url_result(full_id, FranceTVIE, video_id)


 class FranceTVIE(InfoExtractor):
-    _VALID_URL = r'''(?x)
-                    (?:
-                        https?://
-                            sivideo\.webservices\.francetelevisions\.fr/tools/getInfosOeuvre/v2/\?
-                            .*?\bidDiffusion=[^&]+|
-                        (?:
-                            https?://videos\.francetv\.fr/video/|
-                            francetv:
-                        )
-                        (?P<id>[^@]+)(?:@(?P<catalog>.+))?
-                    )
-                    '''
-    _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1']
+    _VALID_URL = r'francetv:(?P<id>[^@#]+)'
+    _GEO_COUNTRIES = ['FR']
+    _GEO_BYPASS = False

    _TESTS = [{
-        # without catalog
-        'url': 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=162311093&callback=_jsonp_loader_callback_request_0',
-        'md5': 'c2248a8de38c4e65ea8fae7b5df2d84f',
+        'url': 'francetv:ec217ecc-0733-48cf-ac06-af1347b849d1',
        'info_dict': {
-            'id': '162311093',
+            'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1',
            'ext': 'mp4',
            'title': '13h15, le dimanche... - Les mystères de Jésus',
-            'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42',
            'timestamp': 1502623500,
+            'duration': 2580,
+            'thumbnail': r're:^https?://.*\.jpg$',
            'upload_date': '20170813',
        },
-    }, {
-        # with catalog
-        'url': 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=NI_1004933&catalogue=Zouzous&callback=_jsonp_loader_callback_request_4',
-        'only_matching': True,
-    }, {
-        'url': 'http://videos.francetv.fr/video/NI_657393@Regions',
-        'only_matching': True,
+        'params': {'skip_download': 'm3u8'},
    }, {
        'url': 'francetv:162311093',
        'only_matching': True,
@ -76,10 +65,7 @@ class FranceTVIE(InfoExtractor):
        'only_matching': True,
    }]

-    def _extract_video(self, video_id, catalogue=None):
-        # Videos are identified by idDiffusion so catalogue part is optional.
-        # However when provided, some extra formats may be returned so we pass
-        # it if available.
+    def _extract_video(self, video_id, hostname=None):
        is_live = None
        videos = []
        title = None
@ -91,18 +77,20 @@ def _extract_video(self, video_id, catalogue=None):
        timestamp = None
        spritesheets = None

-        for device_type in ('desktop', 'mobile'):
+        # desktop+chrome returns dash; mobile+safari returns hls
+        for device_type, browser in [('desktop', 'chrome'), ('mobile', 'safari')]:
            dinfo = self._download_json(
-                'https://player.webservices.francetelevisions.fr/v1/videos/%s' % video_id,
-                video_id, 'Downloading %s video JSON' % device_type, query={
+                f'https://k7.ftven.fr/videos/{video_id}', video_id,
+                f'Downloading {device_type} {browser} video JSON', query=filter_dict({
                    'device_type': device_type,
-                    'browser': 'chrome',
-                }, fatal=False)
+                    'browser': browser,
+                    'domain': hostname,
+                }), fatal=False)

            if not dinfo:
                continue

-            video = dinfo.get('video')
+            video = traverse_obj(dinfo, ('video', {dict}))
            if video:
                videos.append(video)
                if duration is None:
@ -112,7 +100,7 @@ def _extract_video(self, video_id, catalogue=None):
                if spritesheets is None:
                    spritesheets = video.get('spritesheets')

-            meta = dinfo.get('meta')
+            meta = traverse_obj(dinfo, ('meta', {dict}))
            if meta:
                if title is None:
                    title = meta.get('title')
@ -126,43 +114,47 @@ def _extract_video(self, video_id, catalogue=None):
                if timestamp is None:
                    timestamp = parse_iso8601(meta.get('broadcasted_at'))

-        formats = []
-        subtitles = {}
-        for video in videos:
+        formats, subtitles, video_url = [], {}, None
+        for video in traverse_obj(videos, lambda _, v: url_or_none(v['url'])):
+            video_url = video['url']
            format_id = video.get('format')

-            video_url = None
-            if video.get('workflow') == 'token-akamai':
-                token_url = video.get('token')
-                if token_url:
-                    token_json = self._download_json(
-                        token_url, video_id,
-                        'Downloading signed %s manifest URL' % format_id)
-                    if token_json:
-                        video_url = token_json.get('url')
-            if not video_url:
-                video_url = video.get('url')
+            token_url = url_or_none(video.get('token'))
+            if token_url and video.get('workflow') == 'token-akamai':
+                tokenized_url = traverse_obj(self._download_json(
+                    token_url, video_id, f'Downloading signed {format_id} manifest URL',
+                    fatal=False, query={
+                        'format': 'json',
+                        'url': video_url,
+                    }), ('url', {url_or_none}))
+                if tokenized_url:
+                    video_url = tokenized_url

            ext = determine_ext(video_url)
            if ext == 'f4m':
                formats.extend(self._extract_f4m_formats(
-                    video_url, video_id, f4m_id=format_id, fatal=False))
+                    video_url, video_id, f4m_id=format_id or ext, fatal=False))
            elif ext == 'm3u8':
+                format_id = format_id or 'hls'
                fmts, subs = self._extract_m3u8_formats_and_subtitles(
-                    video_url, video_id, 'mp4',
-                    entry_protocol='m3u8_native', m3u8_id=format_id,
-                    fatal=False)
+                    video_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)
+                for f in traverse_obj(fmts, lambda _, v: v['vcodec'] == 'none' and v.get('tbr') is None):
+                    if mobj := re.match(rf'{format_id}-[Aa]udio-\w+-(?P<bitrate>\d+)', f['format_id']):
+                        f.update({
+                            'tbr': int_or_none(mobj.group('bitrate')),
+                            'acodec': 'mp4a',
+                        })
                formats.extend(fmts)
                self._merge_subtitles(subs, target=subtitles)
            elif ext == 'mpd':
                fmts, subs = self._extract_mpd_formats_and_subtitles(
-                    video_url, video_id, mpd_id=format_id, fatal=False)
+                    video_url, video_id, mpd_id=format_id or 'dash', fatal=False)
                formats.extend(fmts)
                self._merge_subtitles(subs, target=subtitles)
            elif video_url.startswith('rtmp'):
                formats.append({
                    'url': video_url,
-                    'format_id': 'rtmp-%s' % format_id,
+                    'format_id': join_nonempty('rtmp', format_id),
                    'ext': 'flv',
                })
            else:
@ -174,6 +166,13 @@ def _extract_video(self, video_id, catalogue=None):

            # XXX: what is video['captions']?

+        if not formats and video_url:
+            urlh = self._request_webpage(
+                HEADRequest(video_url), video_id, 'Checking for geo-restriction',
+                fatal=False, expected_status=403)
+            if urlh and urlh.headers.get('x-errortype') == 'geo':
+                self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
+
        for f in formats:
            if f.get('acodec') != 'none' and f.get('language') in ('qtz', 'qad'):
                f['language_preference'] = -10
@ -194,7 +193,7 @@ def _extract_video(self, video_id, catalogue=None):
                    # a 10×10 grid of thumbnails corresponding to approximately
                    # 2 seconds of the video; the last spritesheet may be shorter
                    'duration': 200,
-                } for sheet in spritesheets]
+                } for sheet in traverse_obj(spritesheets, (..., {url_or_none}))]
            })

        return {
@ -210,21 +209,15 @@ def _extract_video(self, video_id, catalogue=None):
            'series': title if episode_number else None,
            'episode_number': int_or_none(episode_number),
            'season_number': int_or_none(season_number),
+            '_format_sort_fields': ('res', 'tbr', 'proto'),  # prioritize m3u8 over dash
        }

    def _real_extract(self, url):
-        mobj = self._match_valid_url(url)
-        video_id = mobj.group('id')
-        catalog = mobj.group('catalog')
+        url, smuggled_data = unsmuggle_url(url, {})
+        video_id = self._match_id(url)
+        hostname = smuggled_data.get('hostname') or 'www.france.tv'

-        if not video_id:
-            qs = parse_qs(url)
-            video_id = qs.get('idDiffusion', [None])[0]
-            catalog = qs.get('catalogue', [None])[0]
-            if not video_id:
-                raise ExtractorError('Invalid URL', expected=True)
-
-        return self._extract_video(video_id, catalog)
+        return self._extract_video(video_id, hostname=hostname)


 class FranceTVSiteIE(FranceTVBaseInfoExtractor):
@ -246,6 +239,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
        },
        'add_ie': [FranceTVIE.ie_key()],
    }, {
+        # geo-restricted
        'url': 'https://www.france.tv/enfants/six-huit-ans/foot2rue/saison-1/3066387-duel-au-vieux-port.html',
        'info_dict': {
            'id': 'a9050959-eedd-4b4a-9b0d-de6eeaa73e44',
@ -304,17 +298,16 @@ def _real_extract(self, url):

        webpage = self._download_webpage(url, display_id)

-        catalogue = None
        video_id = self._search_regex(
            r'(?:data-main-video\s*=|videoId["\']?\s*[:=])\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
            webpage, 'video id', default=None, group='id')

        if not video_id:
-            video_id, catalogue = self._html_search_regex(
-                r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@]+@[^"]+)"',
-                webpage, 'video ID').split('@')
+            video_id = self._html_search_regex(
+                r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@"]+@[^"]+)"',
+                webpage, 'video ID')

-        return self._make_url_result(video_id, catalogue)
+        return self._make_url_result(video_id, url=url)


 class FranceTVInfoIE(FranceTVBaseInfoExtractor):
@ -328,8 +321,9 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
            'ext': 'mp4',
            'title': 'Soir 3',
            'upload_date': '20190822',
-            'timestamp': 1566510900,
-            'description': 'md5:72d167097237701d6e8452ff03b83c00',
+            'timestamp': 1566510730,
+            'thumbnail': r're:^https?://.*\.jpe?g$',
+            'duration': 1637,
            'subtitles': {
                'fr': 'mincount:2',
            },
@ -344,8 +338,8 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
        'info_dict': {
            'id': '7d204c9e-a2d3-11eb-9e4c-000d3a23d482',
            'ext': 'mp4',
-            'title': 'Covid-19 : une situation catastrophique à New Dehli',
-            'thumbnail': str,
+            'title': 'Covid-19 : une situation catastrophique à New Dehli - Édition du mercredi 21 avril 2021',
+            'thumbnail': r're:^https?://.*\.jpe?g$',
            'duration': 76,
            'timestamp': 1619028518,
            'upload_date': '20210421',
@ -371,11 +365,17 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
            'id': 'x4iiko0',
            'ext': 'mp4',
            'title': 'NDDL, référendum, Brexit : Cécile Duflot répond à Patrick Cohen',
-            'description': 'Au lendemain de la victoire du "oui" au référendum sur l\'aéroport de Notre-Dame-des-Landes, l\'ancienne ministre écologiste est l\'invitée de Patrick Cohen. Plus d\'info : https://www.franceinter.fr/emissions/le-7-9/le-7-9-27-juin-2016',
+            'description': 'md5:fdcb582c370756293a65cdfbc6ecd90e',
            'timestamp': 1467011958,
-            'upload_date': '20160627',
            'uploader': 'France Inter',
            'uploader_id': 'x2q2ez',
+            'upload_date': '20160627',
+            'view_count': int,
+            'tags': ['Politique', 'France Inter', '27 juin 2016', 'Linvité de 8h20', 'Cécile Duflot', 'Patrick Cohen'],
+            'age_limit': 0,
+            'duration': 640,
+            'like_count': int,
+            'thumbnail': r're:https://[^/?#]+/v/[^/?#]+/x1080',
        },
        'add_ie': ['Dailymotion'],
    }, {
@ -405,4 +405,4 @@ def _real_extract(self, url):
             r'(?:data-id|<figure[^<]+\bid)=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'),
            webpage, 'video id')

-        return self._make_url_result(video_id)
+        return self._make_url_result(video_id, url=url)
--- a/yt_dlp/extractor/lumni.py
+++ b/yt_dlp/extractor/lumni.py
@ -1,8 +1,7 @@
-from .common import InfoExtractor
-from .francetv import FranceTVIE
+from .francetv import FranceTVBaseInfoExtractor


-class LumniIE(InfoExtractor):
+class LumniIE(FranceTVBaseInfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?lumni\.fr/video/(?P<id>[\w-]+)'
    _TESTS = [{
        'url': 'https://www.lumni.fr/video/l-homme-et-son-environnement-dans-la-revolution-industrielle',
@ -21,4 +20,4 @@ def _real_extract(self, url):
        webpage = self._download_webpage(url, display_id)
        video_id = self._html_search_regex(
            r'<div[^>]+data-factoryid\s*=\s*["\']([^"\']+)', webpage, 'video id')
-        return self.url_result(f'francetv:{video_id}', FranceTVIE, video_id)
+        return self._make_url_result(video_id, url=url)
--- a/yt_dlp/extractor/niconico.py
+++ b/yt_dlp/extractor/niconico.py
@ -13,13 +13,11 @@
 from ..utils import (
    ExtractorError,
    OnDemandPagedList,
-    bug_reports_message,
    clean_html,
    float_or_none,
    int_or_none,
    join_nonempty,
    parse_duration,
-    parse_filesize,
    parse_iso8601,
    parse_resolution,
    qualities,
@ -38,6 +36,8 @@
 class NiconicoIE(InfoExtractor):
    IE_NAME = 'niconico'
    IE_DESC = 'ニコニコ動画'
+    _GEO_COUNTRIES = ['JP']
+    _GEO_BYPASS = False

    _TESTS = [{
        'url': 'http://www.nicovideo.jp/watch/sm22312215',
@ -55,25 +55,31 @@ class NiconicoIE(InfoExtractor):
            'duration': 33,
            'view_count': int,
            'comment_count': int,
+            'genres': ['未設定'],
+            'tags': [],
+            'expected_protocol': str,
        },
-        'skip': 'Requires an account',
    }, {
        # File downloaded with and without credentials are different, so omit
        # the md5 field
        'url': 'http://www.nicovideo.jp/watch/nm14296458',
        'info_dict': {
            'id': 'nm14296458',
-            'ext': 'swf',
-            'title': '【鏡音リン】Dance on media【オリジナル】take2!',
-            'description': 'md5:689f066d74610b3b22e0f1739add0f58',
+            'ext': 'mp4',
+            'title': '【Kagamine Rin】Dance on media【Original】take2!',
+            'description': 'md5:9368f2b1f4178de64f2602c2f3d6cbf5',
            'thumbnail': r're:https?://.*',
            'uploader': 'りょうた',
            'uploader_id': '18822557',
            'upload_date': '20110429',
            'timestamp': 1304065916,
-            'duration': 209,
+            'duration': 208.0,
+            'comment_count': int,
+            'view_count': int,
+            'genres': ['音楽・サウンド'],
+            'tags': ['Translation_Request', 'Kagamine_Rin', 'Rin_Original'],
+            'expected_protocol': str,
        },
-        'skip': 'Requires an account',
    }, {
        # 'video exists but is marked as "deleted"
        # md5 is unstable
@ -107,22 +113,24 @@ class NiconicoIE(InfoExtractor):
    }, {
        # video not available via `getflv`; "old" HTML5 video
        'url': 'http://www.nicovideo.jp/watch/sm1151009',
-        'md5': '8fa81c364eb619d4085354eab075598a',
+        'md5': 'f95a3d259172667b293530cc2e41ebda',
        'info_dict': {
            'id': 'sm1151009',
            'ext': 'mp4',
            'title': 'マスターシステム本体内蔵のスペハリのメインテーマ（ＰＳＧ版）',
-            'description': 'md5:6ee077e0581ff5019773e2e714cdd0b7',
+            'description': 'md5:f95a3d259172667b293530cc2e41ebda',
            'thumbnail': r're:https?://.*',
            'duration': 184,
-            'timestamp': 1190868283,
-            'upload_date': '20070927',
+            'timestamp': 1190835883,
+            'upload_date': '20070926',
            'uploader': 'denden2',
            'uploader_id': '1392194',
            'view_count': int,
            'comment_count': int,
+            'genres': ['ゲーム'],
+            'tags': [],
+            'expected_protocol': str,
        },
-        'skip': 'Requires an account',
    }, {
        # "New" HTML5 video
        # md5 is unstable
@ -132,16 +140,18 @@ class NiconicoIE(InfoExtractor):
            'ext': 'mp4',
            'title': '新作TVアニメ「戦姫絶唱シンフォギアAXZ」PV 最高画質',
            'description': 'md5:e52974af9a96e739196b2c1ca72b5feb',
-            'timestamp': 1498514060,
+            'timestamp': 1498481660,
            'upload_date': '20170626',
-            'uploader': 'ゲスト',
+            'uploader': 'no-namamae',
            'uploader_id': '40826363',
            'thumbnail': r're:https?://.*',
            'duration': 198,
            'view_count': int,
            'comment_count': int,
+            'genres': ['アニメ'],
+            'tags': [],
+            'expected_protocol': str,
        },
-        'skip': 'Requires an account',
    }, {
        # Video without owner
        'url': 'http://www.nicovideo.jp/watch/sm18238488',
@ -151,7 +161,7 @@ class NiconicoIE(InfoExtractor):
            'ext': 'mp4',
            'title': '【実写版】ミュータントタートルズ',
            'description': 'md5:15df8988e47a86f9e978af2064bf6d8e',
-            'timestamp': 1341160408,
+            'timestamp': 1341128008,
            'upload_date': '20120701',
            'uploader': None,
            'uploader_id': None,
@ -159,8 +169,10 @@ class NiconicoIE(InfoExtractor):
            'duration': 5271,
            'view_count': int,
            'comment_count': int,
+            'genres': ['エンターテイメント'],
+            'tags': [],
+            'expected_protocol': str,
        },
-        'skip': 'Requires an account',
    }, {
        'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
        'only_matching': True,
@ -353,15 +365,10 @@ def _extract_format_for_quality(self, video_id, audio_quality, video_quality, dm
        if not audio_quality.get('isAvailable') or not video_quality.get('isAvailable'):
            return None

-        def extract_video_quality(video_quality):
-            return parse_filesize('%sB' % self._search_regex(
-                r'\| ([0-9]*\.?[0-9]*[MK])', video_quality, 'vbr', default=''))
-
        format_id = '-'.join(
            [remove_start(s['id'], 'archive_') for s in (video_quality, audio_quality)] + [dmc_protocol])

        vid_qual_label = traverse_obj(video_quality, ('metadata', 'label'))
-        vid_quality = traverse_obj(video_quality, ('metadata', 'bitrate'))

        return {
            'url': 'niconico_dmc:%s/%s/%s' % (video_id, video_quality['id'], audio_quality['id']),
@ -370,10 +377,15 @@ def extract_video_quality(video_quality):
            'ext': 'mp4',  # Session API are used in HTML5, which always serves mp4
            'acodec': 'aac',
            'vcodec': 'h264',
-            'abr': float_or_none(traverse_obj(audio_quality, ('metadata', 'bitrate')), 1000),
-            'vbr': float_or_none(vid_quality if vid_quality > 0 else extract_video_quality(vid_qual_label), 1000),
-            'height': traverse_obj(video_quality, ('metadata', 'resolution', 'height')),
-            'width': traverse_obj(video_quality, ('metadata', 'resolution', 'width')),
+            **traverse_obj(audio_quality, ('metadata', {
+                'abr': ('bitrate', {functools.partial(float_or_none, scale=1000)}),
+                'asr': ('samplingRate', {int_or_none}),
+            })),
+            **traverse_obj(video_quality, ('metadata', {
+                'vbr': ('bitrate', {functools.partial(float_or_none, scale=1000)}),
+                'height': ('resolution', 'height', {int_or_none}),
+                'width': ('resolution', 'width', {int_or_none}),
+            })),
            'quality': -2 if 'low' in video_quality['id'] else None,
            'protocol': 'niconico_dmc',
            'expected_protocol': dmc_protocol,  # XXX: This is not a documented field
@ -383,6 +395,63 @@ def extract_video_quality(video_quality):
            }
        }

+    def _yield_dmc_formats(self, api_data, video_id):
+        dmc_data = traverse_obj(api_data, ('media', 'delivery', 'movie'))
+        audios = traverse_obj(dmc_data, ('audios', ..., {dict}))
+        videos = traverse_obj(dmc_data, ('videos', ..., {dict}))
+        protocols = traverse_obj(dmc_data, ('session', 'protocols', ..., {str}))
+        if not all((audios, videos, protocols)):
+            return
+
+        for audio_quality, video_quality, protocol in itertools.product(audios, videos, protocols):
+            if fmt := self._extract_format_for_quality(video_id, audio_quality, video_quality, protocol):
+                yield fmt
+
+    def _yield_dms_formats(self, api_data, video_id):
+        fmt_filter = lambda _, v: v['isAvailable'] and v['id']
+        videos = traverse_obj(api_data, ('media', 'domand', 'videos', fmt_filter))
+        audios = traverse_obj(api_data, ('media', 'domand', 'audios', fmt_filter))
+        access_key = traverse_obj(api_data, ('media', 'domand', 'accessRightKey', {str}))
+        track_id = traverse_obj(api_data, ('client', 'watchTrackId', {str}))
+        if not all((videos, audios, access_key, track_id)):
+            return
+
+        dms_m3u8_url = self._download_json(
+            f'https://nvapi.nicovideo.jp/v1/watch/{video_id}/access-rights/hls', video_id,
+            data=json.dumps({
+                'outputs': list(itertools.product((v['id'] for v in videos), (a['id'] for a in audios)))
+            }).encode(), query={'actionTrackId': track_id}, headers={
+                'x-access-right-key': access_key,
+                'x-frontend-id': 6,
+                'x-frontend-version': 0,
+                'x-request-with': 'https://www.nicovideo.jp',
+            })['data']['contentUrl']
+        # Getting all audio formats results in duplicate video formats which we filter out later
+        dms_fmts = self._extract_m3u8_formats(dms_m3u8_url, video_id)
+
+        # m3u8 extraction does not provide audio bitrates, so extract from the API data and fix
+        for audio_fmt in traverse_obj(dms_fmts, lambda _, v: v['vcodec'] == 'none'):
+            yield {
+                **audio_fmt,
+                **traverse_obj(audios, (lambda _, v: audio_fmt['format_id'].startswith(v['id']), {
+                    'format_id': ('id', {str}),
+                    'abr': ('bitRate', {functools.partial(float_or_none, scale=1000)}),
+                    'asr': ('samplingRate', {int_or_none}),
+                }), get_all=False),
+                'acodec': 'aac',
+                'ext': 'm4a',
+            }
+
+        # Sort before removing dupes to keep the format dicts with the lowest tbr
+        video_fmts = sorted((fmt for fmt in dms_fmts if fmt['vcodec'] != 'none'), key=lambda f: f['tbr'])
+        self._remove_duplicate_formats(video_fmts)
+        # Calculate the true vbr/tbr by subtracting the lowest abr
+        min_abr = min(traverse_obj(audios, (..., 'bitRate', {float_or_none})), default=0) / 1000
+        for video_fmt in video_fmts:
+            video_fmt['tbr'] -= min_abr
+            video_fmt['format_id'] = f'video-{video_fmt["tbr"]:.0f}'
+            yield video_fmt
+
    def _real_extract(self, url):
        video_id = self._match_id(url)

@ -409,19 +478,29 @@ def _real_extract(self, url):
                    webpage, 'error reason', default=None)
                if not error_msg:
                    raise
-                raise ExtractorError(re.sub(r'\s+', ' ', error_msg), expected=True)
+                raise ExtractorError(clean_html(error_msg), expected=True)

-        formats = []
-
-        def get_video_info(*items, get_first=True, **kwargs):
-            return traverse_obj(api_data, ('video', *items), get_all=not get_first, **kwargs)
-
-        quality_info = api_data['media']['delivery']['movie']
-        session_api_data = quality_info['session']
-        for (audio_quality, video_quality, protocol) in itertools.product(quality_info['audios'], quality_info['videos'], session_api_data['protocols']):
-            fmt = self._extract_format_for_quality(video_id, audio_quality, video_quality, protocol)
-            if fmt:
-                formats.append(fmt)
+        availability = self._availability(**(traverse_obj(api_data, ('payment', 'video', {
+            'needs_premium': ('isPremium', {bool}),
+            'needs_subscription': ('isAdmission', {bool}),
+        })) or {'needs_auth': True}))
+        formats = [*self._yield_dmc_formats(api_data, video_id),
+                   *self._yield_dms_formats(api_data, video_id)]
+        if not formats:
+            fail_msg = clean_html(self._html_search_regex(
+                r'<p[^>]+\bclass="fail-message"[^>]*>(?P<msg>.+?)</p>',
+                webpage, 'fail message', default=None, group='msg'))
+            if fail_msg:
+                self.to_screen(f'Niconico said: {fail_msg}')
+            if fail_msg and 'された地域と同じ地域からのみ視聴できます。' in fail_msg:
+                availability = None
+                self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
+            elif availability == 'premium_only':
+                self.raise_login_required('This video requires premium', metadata_available=True)
+            elif availability == 'subscriber_only':
+                self.raise_login_required('This video is for members only', metadata_available=True)
+            elif availability == 'needs_auth':
+                self.raise_login_required(metadata_available=False)

        # Start extracting information
        tags = None
@ -440,11 +519,15 @@ def get_video_info(*items, get_first=True, **kwargs):

        thumb_prefs = qualities(['url', 'middleUrl', 'largeUrl', 'player', 'ogp'])

+        def get_video_info(*items, get_first=True, **kwargs):
+            return traverse_obj(api_data, ('video', *items), get_all=not get_first, **kwargs)
+
        return {
            'id': video_id,
            '_api_data': api_data,
            'title': get_video_info(('originalTitle', 'title')) or self._og_search_title(webpage, default=None),
            'formats': formats,
+            'availability': availability,
            'thumbnails': [{
                'id': key,
                'url': url,
@ -472,8 +555,11 @@ def get_video_info(*items, get_first=True, **kwargs):

    def _get_subtitles(self, video_id, api_data):
        comments_info = traverse_obj(api_data, ('comment', 'nvComment', {dict})) or {}
+        if not comments_info.get('server'):
+            return
+
        danmaku = traverse_obj(self._download_json(
-            f'{comments_info.get("server")}/v1/threads', video_id, data=json.dumps({
+            f'{comments_info["server"]}/v1/threads', video_id, data=json.dumps({
                'additionals': {},
                'params': comments_info.get('params'),
                'threadKey': comments_info.get('threadKey'),
@ -489,10 +575,6 @@ def _get_subtitles(self, video_id, api_data):
            note='Downloading comments', errnote='Failed to download comments'),
            ('data', 'threads', ..., 'comments', ...))

-        if not danmaku:
-            self.report_warning(f'Failed to get comments. {bug_reports_message()}')
-            return
-
        return {
            'comments': [{
                'ext': 'json',
--- a/yt_dlp/extractor/ntvru.py
+++ b/yt_dlp/extractor/ntvru.py
@ -35,6 +35,7 @@ class NTVRuIE(InfoExtractor):
            'duration': 172,
            'view_count': int,
        },
+        'skip': '404 Not Found',
    }, {
        'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
        'md5': '82dbd49b38e3af1d00df16acbeab260c',
@ -78,7 +79,8 @@ class NTVRuIE(InfoExtractor):
    }]

    _VIDEO_ID_REGEXES = [
-        r'<meta property="og:url" content="http://www\.ntv\.ru/video/(\d+)',
+        r'<meta property="og:url" content="https?://www\.ntv\.ru/video/(\d+)',
+        r'<meta property="og:video:(?:url|iframe)" content="https?://www\.ntv\.ru/embed/(\d+)',
        r'<video embed=[^>]+><id>(\d+)</id>',
        r'<video restriction[^>]+><key>(\d+)</key>',
    ]
--- a/yt_dlp/extractor/rai.py
+++ b/yt_dlp/extractor/rai.py
@ -28,6 +28,29 @@ class RaiBaseIE(InfoExtractor):
    _GEO_COUNTRIES = ['IT']
    _GEO_BYPASS = False

+    def _fix_m3u8_formats(self, media_url, video_id):
+        fmts = self._extract_m3u8_formats(
+            media_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
+
+        # Fix malformed m3u8 manifests by setting audio-only/video-only formats
+        for f in fmts:
+            if not f.get('acodec'):
+                f['acodec'] = 'mp4a'
+            if not f.get('vcodec'):
+                f['vcodec'] = 'avc1'
+            man_url = f['url']
+            if re.search(r'chunklist(?:_b\d+)*_ao[_.]', man_url):  # audio only
+                f['vcodec'] = 'none'
+            elif re.search(r'chunklist(?:_b\d+)*_vo[_.]', man_url):  # video only
+                f['acodec'] = 'none'
+            else:  # video+audio
+                if f['acodec'] == 'none':
+                    f['acodec'] = 'mp4a'
+                if f['vcodec'] == 'none':
+                    f['vcodec'] = 'avc1'
+
+        return fmts
+
    def _extract_relinker_info(self, relinker_url, video_id, audio_only=False):
        def fix_cdata(s):
            # remove \r\n\t before and after <![CDATA[ ]]> to avoid
@ -69,8 +92,7 @@ def fix_cdata(s):
                'format_id': 'https-mp3',
            })
        elif ext == 'm3u8' or 'format=m3u8' in media_url:
-            formats.extend(self._extract_m3u8_formats(
-                media_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+            formats.extend(self._fix_m3u8_formats(media_url, video_id))
        elif ext == 'f4m':
            # very likely no longer needed. Cannot find any url that uses it.
            manifest_url = update_url_query(
@ -153,10 +175,10 @@ def get_format_info(tbr):
                'format_id': f'https-{tbr}',
                'width': format_copy.get('width'),
                'height': format_copy.get('height'),
-                'tbr': format_copy.get('tbr'),
-                'vcodec': format_copy.get('vcodec'),
-                'acodec': format_copy.get('acodec'),
-                'fps': format_copy.get('fps'),
+                'tbr': format_copy.get('tbr') or tbr,
+                'vcodec': format_copy.get('vcodec') or 'avc1',
+                'acodec': format_copy.get('acodec') or 'mp4a',
+                'fps': format_copy.get('fps') or 25,
            } if format_copy else {
                'format_id': f'https-{tbr}',
                'width': _QUALITY[tbr][0],
@ -245,7 +267,7 @@ class RaiPlayIE(RaiBaseIE):
            'series': 'Report',
            'season': '2013/14',
            'subtitles': {'it': 'count:4'},
-            'release_year': 2022,
+            'release_year': 2024,
            'episode': 'Espresso nel caffè - 07/04/2014',
            'timestamp': 1396919880,
            'upload_date': '20140408',
@ -253,7 +275,7 @@ class RaiPlayIE(RaiBaseIE):
        },
        'params': {'skip_download': True},
    }, {
-        # 1080p direct mp4 url
+        # 1080p
        'url': 'https://www.raiplay.it/video/2021/11/Blanca-S1E1-Senza-occhi-b1255a4a-8e72-4a2f-b9f3-fc1308e00736.html',
        'md5': 'aeda7243115380b2dd5e881fd42d949a',
        'info_dict': {
@ -274,7 +296,7 @@ class RaiPlayIE(RaiBaseIE):
            'episode': 'Senza occhi',
            'timestamp': 1637318940,
            'upload_date': '20211119',
-            'formats': 'count:12',
+            'formats': 'count:7',
        },
        'params': {'skip_download': True},
        'expected_warnings': ['Video not available. Likely due to geo-restriction.']
@ -527,7 +549,7 @@ class RaiPlaySoundPlaylistIE(InfoExtractor):
        'info_dict': {
            'id': 'ilruggitodelconiglio',
            'title': 'Il Ruggito del Coniglio',
-            'description': 'md5:48cff6972435964284614d70474132e6',
+            'description': 'md5:62a627b3a2d0635d08fa8b6e0a04f27e',
        },
        'playlist_mincount': 65,
    }, {
@ -634,19 +656,20 @@ def _real_extract(self, url):
        }


-class RaiNewsIE(RaiIE):  # XXX: Do not subclass from concrete IE
+class RaiNewsIE(RaiBaseIE):
    _VALID_URL = rf'https?://(www\.)?rainews\.it/(?!articoli)[^?#]+-(?P<id>{RaiBaseIE._UUID_RE})(?:-[^/?#]+)?\.html'
    _EMBED_REGEX = [rf'<iframe[^>]+data-src="(?P<url>/iframe/[^?#]+?{RaiBaseIE._UUID_RE}\.html)']
    _TESTS = [{
        # new rainews player (#3911)
-        'url': 'https://www.rainews.it/rubriche/24mm/video/2022/05/24mm-del-29052022-12cf645d-1ffd-4220-b27c-07c226dbdecf.html',
+        'url': 'https://www.rainews.it/video/2024/02/membri-della-croce-rossa-evacuano-gli-abitanti-di-un-villaggio-nella-regione-ucraina-di-kharkiv-il-filmato-dallucraina--31e8017c-845c-43f5-9c48-245b43c3a079.html',
        'info_dict': {
-            'id': '12cf645d-1ffd-4220-b27c-07c226dbdecf',
+            'id': '31e8017c-845c-43f5-9c48-245b43c3a079',
            'ext': 'mp4',
-            'title': 'Puntata del 29/05/2022',
-            'duration': 1589,
-            'upload_date': '20220529',
+            'title': 'md5:1e81364b09de4a149042bac3c7d36f0b',
+            'duration': 196,
+            'upload_date': '20240225',
            'uploader': 'rainews',
+            'formats': 'count:2',
        },
        'params': {'skip_download': True},
    }, {
@ -659,7 +682,8 @@ class RaiNewsIE(RaiIE):  # XXX: Do not subclass from concrete IE
            'description': 'I film in uscita questa settimana.',
            'thumbnail': r're:^https?://.*\.png$',
            'duration': 833,
-            'upload_date': '20161103'
+            'upload_date': '20161103',
+            'formats': 'count:8',
        },
        'params': {'skip_download': True},
        'expected_warnings': ['unable to extract player_data'],
@ -684,7 +708,7 @@ def _real_extract(self, url):
        if not relinker_url:
            # fallback on old implementation for some old content
            try:
-                return self._extract_from_content_id(video_id, url)
+                return RaiIE._real_extract(self, url)
            except GeoRestrictedError:
                raise
            except ExtractorError as e:
--- a/yt_dlp/extractor/rozhlas.py
+++ b/yt_dlp/extractor/rozhlas.py
@ -247,17 +247,17 @@ class MujRozhlasIE(RozhlasBaseIE):
        'url': 'https://www.mujrozhlas.cz/vykopavky/ach-jo-zase-teleci-rizek-je-mnohem-min-cesky-nez-jsme-si-mysleli',
        'md5': '6f8fd68663e64936623e67c152a669e0',
        'info_dict': {
-            'id': '10739193',
+            'id': '10787730',
            'ext': 'mp3',
            'title': 'Ach jo, zase to telecí! Řízek je mnohem míň český, než jsme si mysleli',
            'description': 'md5:db7141e9caaedc9041ec7cefb9a62908',
            'timestamp': 1684915200,
-            'modified_timestamp': 1684922446,
+            'modified_timestamp': 1687550432,
            'series': 'Vykopávky',
            'thumbnail': 'https://portal.rozhlas.cz/sites/default/files/images/84377046610af6ddc54d910b1dd7a22b.jpg',
            'channel_id': 'radio-wave',
            'upload_date': '20230524',
-            'modified_date': '20230524',
+            'modified_date': '20230623',
        },
    }, {
        # serial extraction
@ -277,6 +277,26 @@ class MujRozhlasIE(RozhlasBaseIE):
            'title': 'Nespavci',
            'description': 'md5:c430adcbf9e2b9eac88b745881e814dc',
        },
+    }, {
+        # serialPart
+        'url': 'https://www.mujrozhlas.cz/povidka/gustavo-adolfo-becquer-hora-duchu',
+        'info_dict': {
+            'id': '8889035',
+            'ext': 'm4a',
+            'title': 'Gustavo Adolfo Bécquer: Hora duchů',
+            'description': 'md5:343a15257b376c276e210b78e900ffea',
+            'chapter': 'Hora duchů a Polibek – dva tajemné příběhy Gustava Adolfa Bécquera',
+            'thumbnail': 'https://portal.rozhlas.cz/sites/default/files/images/2adfe1387fb140634be725c1ccf26214.jpg',
+            'timestamp': 1708173000,
+            'episode': 'Episode 1',
+            'episode_number': 1,
+            'series': 'Povídka',
+            'modified_date': '20240217',
+            'upload_date': '20240217',
+            'modified_timestamp': 1708173198,
+            'channel_id': 'vltava',
+        },
+        'params': {'skip_download': 'dash'},
    }]

    def _call_api(self, path, item_id, msg='API JSON'):
@ -322,7 +342,7 @@ def _real_extract(self, url):

        entity = info['siteEntityBundle']

-        if entity == 'episode':
+        if entity in ('episode', 'serialPart'):
            return self._extract_audio_entry(self._call_api(
                'episodes', info['contentId'], 'episode info API JSON'))

--- a/yt_dlp/extractor/swearnet.py
+++ b/yt_dlp/extractor/swearnet.py
@ -1,5 +1,5 @@
 from .common import InfoExtractor
-from ..utils import int_or_none, traverse_obj
+from ..utils import ExtractorError, int_or_none, traverse_obj


 class SwearnetEpisodeIE(InfoExtractor):
@ -51,7 +51,13 @@ def _real_extract(self, url):
        display_id, season_number, episode_number = self._match_valid_url(url).group('id', 'season_num', 'episode_num')
        webpage = self._download_webpage(url, display_id)

-        external_id = self._search_regex(r'externalid\s*=\s*"([^"]+)', webpage, 'externalid')
+        try:
+            external_id = self._search_regex(r'externalid\s*=\s*"([^"]+)', webpage, 'externalid')
+        except ExtractorError:
+            if 'Upgrade Now' in webpage:
+                self.raise_login_required()
+            raise
+
        json_data = self._download_json(
            f'https://play.vidyard.com/player/{external_id}.json', display_id)['payload']['chapters'][0]

--- a/yt_dlp/extractor/tiktok.py
+++ b/yt_dlp/extractor/tiktok.py
@ -6,7 +6,7 @@
 import time

 from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote, compat_urllib_parse_urlparse
+from ..compat import compat_urllib_parse_urlparse
 from ..networking import HEADRequest
 from ..utils import (
    ExtractorError,
@ -15,7 +15,6 @@
    UserNotLive,
    determine_ext,
    format_field,
-    get_first,
    int_or_none,
    join_nonempty,
    merge_dicts,
@ -51,7 +50,13 @@ def _create_url(user_id, video_id):
    def _get_sigi_state(self, webpage, display_id):
        return self._search_json(
            r'<script[^>]+\bid="(?:SIGI_STATE|sigi-persisted-data)"[^>]*>', webpage,
-            'sigi state', display_id, end_pattern=r'</script>')
+            'sigi state', display_id, end_pattern=r'</script>', default={})
+
+    def _get_universal_data(self, webpage, display_id):
+        return traverse_obj(self._search_json(
+            r'<script[^>]+\bid="__UNIVERSAL_DATA_FOR_REHYDRATION__"[^>]*>', webpage,
+            'universal data', display_id, end_pattern=r'</script>', default={}),
+            ('__DEFAULT_SCOPE__', {dict})) or {}

    def _call_api_impl(self, ep, query, manifest_app_version, video_id, fatal=True,
                       note='Downloading API JSON', errnote='Unable to download API page'):
@ -219,8 +224,8 @@ def audio_meta(url):
        def extract_addr(addr, add_meta={}):
            parsed_meta, res = parse_url_key(addr.get('url_key', ''))
            if res:
-                known_resolutions.setdefault(res, {}).setdefault('height', add_meta.get('height') or addr.get('height'))
-                known_resolutions[res].setdefault('width', add_meta.get('width') or addr.get('width'))
+                known_resolutions.setdefault(res, {}).setdefault('height', int_or_none(addr.get('height')))
+                known_resolutions[res].setdefault('width', int_or_none(addr.get('width')))
                parsed_meta.update(known_resolutions.get(res, {}))
                add_meta.setdefault('height', int_or_none(res[:-1]))
            return [{
@ -237,22 +242,26 @@ def extract_addr(addr, add_meta={}):

        # Hack: Add direct video links first to prioritize them when removing duplicate formats
        formats = []
+        width = int_or_none(video_info.get('width'))
+        height = int_or_none(video_info.get('height'))
        if video_info.get('play_addr'):
            formats.extend(extract_addr(video_info['play_addr'], {
                'format_id': 'play_addr',
                'format_note': 'Direct video',
                'vcodec': 'h265' if traverse_obj(
                    video_info, 'is_bytevc1', 'is_h265') else 'h264',  # TODO: Check for "direct iOS" videos, like https://www.tiktok.com/@cookierun_dev/video/7039716639834656002
-                'width': video_info.get('width'),
-                'height': video_info.get('height'),
+                'width': width,
+                'height': height,
            }))
        if video_info.get('download_addr'):
-            formats.extend(extract_addr(video_info['download_addr'], {
+            download_addr = video_info['download_addr']
+            dl_width = int_or_none(download_addr.get('width'))
+            formats.extend(extract_addr(download_addr, {
                'format_id': 'download_addr',
                'format_note': 'Download video%s' % (', watermarked' if video_info.get('has_watermark') else ''),
                'vcodec': 'h264',
-                'width': video_info.get('width'),
-                'height': video_info.get('height'),
+                'width': dl_width or width,
+                'height': try_call(lambda: int(dl_width / 0.5625)) or height,  # download_addr['height'] is wrong
                'preference': -2 if video_info.get('has_watermark') else -1,
            }))
        if video_info.get('play_addr_h264'):
@ -315,9 +324,6 @@ def extract_addr(addr, add_meta={}):

        return {
            'id': aweme_id,
-            'extractor_key': TikTokIE.ie_key(),
-            'extractor': TikTokIE.IE_NAME,
-            'webpage_url': self._create_url(author_info.get('uid'), aweme_id),
            **traverse_obj(aweme_detail, {
                'title': ('desc', {str}),
                'description': ('desc', {str}),
@ -609,11 +615,12 @@ class TikTokIE(TikTokBaseIE):
            'title': 'md5:1d95c0b96560ca0e8a231af4172b2c0a',
            'description': 'md5:1d95c0b96560ca0e8a231af4172b2c0a',
            'creator': 'MoxyPatch',
+            'creators': ['MoxyPatch'],
            'uploader': 'moxypatch',
            'uploader_id': '7039142049363379205',
            'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAFhqKnngMHJSsifL0w1vFOP5kn3Ndo1ODp0XuIBkNMBCkALTvwILdpu12g3pTtL4V',
            'channel_id': 'MS4wLjABAAAAFhqKnngMHJSsifL0w1vFOP5kn3Ndo1ODp0XuIBkNMBCkALTvwILdpu12g3pTtL4V',
-            'artist': 'your worst nightmare',
+            'artists': ['your worst nightmare'],
            'track': 'original sound',
            'upload_date': '20230303',
            'timestamp': 1677866781,
@ -651,7 +658,7 @@ class TikTokIE(TikTokBaseIE):
            'comment_count': int,
            'thumbnail': r're:^https://.+\.webp',
        },
-        'params': {'format': 'bytevc1_1080p_808907-0'},
+        'skip': 'Unavailable via feed API, no formats available via web',
    }, {
        # Slideshow, audio-only m4a format
        'url': 'https://www.tiktok.com/@hara_yoimiya/video/7253412088251534594',
@ -688,24 +695,35 @@ def _real_extract(self, url):
        try:
            return self._extract_aweme_app(video_id)
        except ExtractorError as e:
+            e.expected = True
            self.report_warning(f'{e}; trying with webpage')

        url = self._create_url(user_id, video_id)
        webpage = self._download_webpage(url, video_id, headers={'User-Agent': 'Mozilla/5.0'})
-        next_data = self._search_nextjs_data(webpage, video_id, default='{}')
-        if next_data:
-            status = traverse_obj(next_data, ('props', 'pageProps', 'statusCode'), expected_type=int) or 0
-            video_data = traverse_obj(next_data, ('props', 'pageProps', 'itemInfo', 'itemStruct'), expected_type=dict)
-        else:
-            sigi_data = self._get_sigi_state(webpage, video_id)
-            status = traverse_obj(sigi_data, ('VideoPage', 'statusCode'), expected_type=int) or 0
-            video_data = traverse_obj(sigi_data, ('ItemModule', video_id), expected_type=dict)

-        if status == 0:
+        if universal_data := self._get_universal_data(webpage, video_id):
+            self.write_debug('Found universal data for rehydration')
+            status = traverse_obj(universal_data, ('webapp.video-detail', 'statusCode', {int})) or 0
+            video_data = traverse_obj(universal_data, ('webapp.video-detail', 'itemInfo', 'itemStruct', {dict}))
+
+        elif sigi_data := self._get_sigi_state(webpage, video_id):
+            self.write_debug('Found sigi state data')
+            status = traverse_obj(sigi_data, ('VideoPage', 'statusCode', {int})) or 0
+            video_data = traverse_obj(sigi_data, ('ItemModule', video_id, {dict}))
+
+        elif next_data := self._search_nextjs_data(webpage, video_id, default='{}'):
+            self.write_debug('Found next.js data')
+            status = traverse_obj(next_data, ('props', 'pageProps', 'statusCode', {int})) or 0
+            video_data = traverse_obj(next_data, ('props', 'pageProps', 'itemInfo', 'itemStruct', {dict}))
+
+        else:
+            raise ExtractorError('Unable to extract webpage video data')
+
+        if video_data and status == 0:
            return self._parse_aweme_video_web(video_data, url, video_id)
        elif status == 10216:
            raise ExtractorError('This video is private', expected=True)
-        raise ExtractorError('Video not available', video_id=video_id)
+        raise ExtractorError(f'Video not available, status code {status}', video_id=video_id)


 class TikTokUserIE(TikTokBaseIE):
@ -921,20 +939,23 @@ class DouyinIE(TikTokBaseIE):
    _VALID_URL = r'https?://(?:www\.)?douyin\.com/video/(?P<id>[0-9]+)'
    _TESTS = [{
        'url': 'https://www.douyin.com/video/6961737553342991651',
-        'md5': 'a97db7e3e67eb57bf40735c022ffa228',
+        'md5': '9ecce7bc5b302601018ecb2871c63a75',
        'info_dict': {
            'id': '6961737553342991651',
            'ext': 'mp4',
            'title': '#杨超越  小小水手带你去远航❤️',
            'description': '#杨超越  小小水手带你去远航❤️',
+            'uploader': '6897520xka',
            'uploader_id': '110403406559',
            'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
            'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
            'creator': '杨超越',
-            'duration': 19782,
+            'creators': ['杨超越'],
+            'duration': 19,
            'timestamp': 1620905839,
            'upload_date': '20210513',
            'track': '@杨超越创作的原声',
+            'artists': ['杨超越'],
            'view_count': int,
            'like_count': int,
            'repost_count': int,
@ -943,20 +964,23 @@ class DouyinIE(TikTokBaseIE):
        },
    }, {
        'url': 'https://www.douyin.com/video/6982497745948921092',
-        'md5': '34a87ebff3833357733da3fe17e37c0e',
+        'md5': '15c5e660b7048af3707304e3cc02bbb5',
        'info_dict': {
            'id': '6982497745948921092',
            'ext': 'mp4',
            'title': '这个夏日和小羊@杨超越 一起遇见白色幻想',
            'description': '这个夏日和小羊@杨超越 一起遇见白色幻想',
+            'uploader': '0731chaoyue',
            'uploader_id': '408654318141572',
            'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA',
            'channel_id': 'MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA',
            'creator': '杨超越工作室',
-            'duration': 42479,
+            'creators': ['杨超越工作室'],
+            'duration': 42,
            'timestamp': 1625739481,
            'upload_date': '20210708',
            'track': '@杨超越工作室创作的原声',
+            'artists': ['杨超越工作室'],
            'view_count': int,
            'like_count': int,
            'repost_count': int,
@ -965,20 +989,23 @@ class DouyinIE(TikTokBaseIE):
        },
    }, {
        'url': 'https://www.douyin.com/video/6953975910773099811',
-        'md5': 'dde3302460f19db59c47060ff013b902',
+        'md5': '0e6443758b8355db9a3c34864a4276be',
        'info_dict': {
            'id': '6953975910773099811',
            'ext': 'mp4',
            'title': '#一起看海  出现在你的夏日里',
            'description': '#一起看海  出现在你的夏日里',
+            'uploader': '6897520xka',
            'uploader_id': '110403406559',
            'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
            'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
            'creator': '杨超越',
-            'duration': 17343,
+            'creators': ['杨超越'],
+            'duration': 17,
            'timestamp': 1619098692,
            'upload_date': '20210422',
            'track': '@杨超越创作的原声',
+            'artists': ['杨超越'],
            'view_count': int,
            'like_count': int,
            'repost_count': int,
@ -1004,20 +1031,23 @@ class DouyinIE(TikTokBaseIE):
        'skip': 'No longer available',
    }, {
        'url': 'https://www.douyin.com/video/6963263655114722595',
-        'md5': 'cf9f11f0ec45d131445ec2f06766e122',
+        'md5': '1440bcf59d8700f8e014da073a4dfea8',
        'info_dict': {
            'id': '6963263655114722595',
            'ext': 'mp4',
            'title': '#哪个爱豆的105度最甜 换个角度看看我哈哈',
            'description': '#哪个爱豆的105度最甜 换个角度看看我哈哈',
+            'uploader': '6897520xka',
            'uploader_id': '110403406559',
            'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
            'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
            'creator': '杨超越',
-            'duration': 15115,
+            'creators': ['杨超越'],
+            'duration': 15,
            'timestamp': 1621261163,
            'upload_date': '20210517',
            'track': '@杨超越创作的原声',
+            'artists': ['杨超越'],
            'view_count': int,
            'like_count': int,
            'repost_count': int,
@ -1025,34 +1055,23 @@ class DouyinIE(TikTokBaseIE):
            'thumbnail': r're:https?://.+\.jpe?g',
        },
    }]
-    _APP_VERSIONS = [('23.3.0', '230300')]
-    _APP_NAME = 'aweme'
-    _AID = 1128
-    _API_HOSTNAME = 'aweme.snssdk.com'
    _UPLOADER_URL_FORMAT = 'https://www.douyin.com/user/%s'
    _WEBPAGE_HOST = 'https://www.douyin.com/'

    def _real_extract(self, url):
        video_id = self._match_id(url)

-        try:
-            return self._extract_aweme_app(video_id)
-        except ExtractorError as e:
-            e.expected = True
-            self.to_screen(f'{e}; trying with webpage')
-
-        webpage = self._download_webpage(url, video_id)
-        render_data = self._search_json(
-            r'<script [^>]*\bid=[\'"]RENDER_DATA[\'"][^>]*>', webpage, 'render data', video_id,
-            contains_pattern=r'%7B(?s:.+)%7D', fatal=False, transform_source=compat_urllib_parse_unquote)
-        if not render_data:
+        detail = traverse_obj(self._download_json(
+            'https://www.douyin.com/aweme/v1/web/aweme/detail/', video_id,
+            'Downloading web detail JSON', 'Failed to download web detail JSON',
+            query={'aweme_id': video_id}, fatal=False), ('aweme_detail', {dict}))
+        if not detail:
            # TODO: Run verification challenge code to generate signature cookies
-            cookies = self._get_cookies(self._WEBPAGE_HOST)
-            expected = not cookies.get('s_v_web_id') or not cookies.get('ttwid')
            raise ExtractorError(
-                'Fresh cookies (not necessarily logged in) are needed', expected=expected)
+                'Fresh cookies (not necessarily logged in) are needed',
+                expected=not self._get_cookies(self._WEBPAGE_HOST).get('s_v_web_id'))

-        return self._parse_aweme_video_web(get_first(render_data, ('aweme', 'detail')), url, video_id)
+        return self._parse_aweme_video_app(detail)


 class TikTokVMIE(InfoExtractor):
@ -1181,7 +1200,7 @@ def _real_extract(self, url):
            url, uploader or room_id, headers={'User-Agent': 'Mozilla/5.0'}, fatal=not room_id)

        if webpage:
-            data = try_call(lambda: self._get_sigi_state(webpage, uploader or room_id))
+            data = self._get_sigi_state(webpage, uploader or room_id)
            room_id = (traverse_obj(data, ('UserModule', 'users', ..., 'roomId', {str_or_none}), get_all=False)
                       or self._search_regex(r'snssdk\d*://live\?room_id=(\d+)', webpage, 'room ID', default=None)
                       or room_id)
--- a/yt_dlp/extractor/vimeo.py
+++ b/yt_dlp/extractor/vimeo.py
@ -21,6 +21,7 @@
    parse_qs,
    smuggle_url,
    str_or_none,
+    traverse_obj,
    try_get,
    unified_timestamp,
    unsmuggle_url,
@ -48,17 +49,15 @@ def _unsmuggle_headers(self, url):
        return url, data, headers

    def _perform_login(self, username, password):
-        webpage = self._download_webpage(
-            self._LOGIN_URL, None, 'Downloading login page')
-        token, vuid = self._extract_xsrft_and_vuid(webpage)
+        viewer = self._download_json('https://vimeo.com/_next/viewer', None, 'Downloading login token')
        data = {
            'action': 'login',
            'email': username,
            'password': password,
            'service': 'vimeo',
-            'token': token,
+            'token': viewer['xsrft'],
        }
-        self._set_vimeo_cookie('vuid', vuid)
+        self._set_vimeo_cookie('vuid', viewer['vuid'])
        try:
            self._download_webpage(
                self._LOGIN_URL, None, 'Logging in',
@ -123,7 +122,13 @@ def _parse_config(self, config, video_id):
        video_data = config['video']
        video_title = video_data.get('title')
        live_event = video_data.get('live_event') or {}
-        is_live = live_event.get('status') == 'started'
+        live_status = {
+            'pending': 'is_upcoming',
+            'active': 'is_upcoming',
+            'started': 'is_live',
+            'ended': 'post_live',
+        }.get(live_event.get('status'))
+        is_live = live_status == 'is_live'
        request = config.get('request') or {}

        formats = []
@ -232,7 +237,8 @@ def _parse_config(self, config, video_id):
            'chapters': chapters or None,
            'formats': formats,
            'subtitles': subtitles,
-            'is_live': is_live,
+            'live_status': live_status,
+            'release_timestamp': traverse_obj(live_event, ('ingest', 'scheduled_start_time', {parse_iso8601})),
            # Note: Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps
            # at the same time without actual units specified.
            '_format_sort_fields': ('quality', 'res', 'fps', 'hdr:12', 'source'),
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@ -114,9 +114,9 @@
        'INNERTUBE_CONTEXT': {
            'client': {
                'clientName': 'ANDROID',
-                'clientVersion': '17.31.35',
+                'clientVersion': '18.11.34',
                'androidSdkVersion': 30,
-                'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
+                'userAgent': 'com.google.android.youtube/18.11.34 (Linux; U; Android 11) gzip'
            }
        },
        'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
@ -127,9 +127,9 @@
        'INNERTUBE_CONTEXT': {
            'client': {
                'clientName': 'ANDROID_EMBEDDED_PLAYER',
-                'clientVersion': '17.31.35',
+                'clientVersion': '18.11.34',
                'androidSdkVersion': 30,
-                'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
+                'userAgent': 'com.google.android.youtube/18.11.34 (Linux; U; Android 11) gzip'
            },
        },
        'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
@ -168,9 +168,9 @@
        'INNERTUBE_CONTEXT': {
            'client': {
                'clientName': 'IOS',
-                'clientVersion': '17.33.2',
+                'clientVersion': '18.11.34',
                'deviceModel': 'iPhone14,3',
-                'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
+                'userAgent': 'com.google.ios.youtube/18.11.34 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
            }
        },
        'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
@ -180,9 +180,9 @@
        'INNERTUBE_CONTEXT': {
            'client': {
                'clientName': 'IOS_MESSAGES_EXTENSION',
-                'clientVersion': '17.33.2',
+                'clientVersion': '18.11.34',
                'deviceModel': 'iPhone14,3',
-                'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
+                'userAgent': 'com.google.ios.youtube/18.11.34 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
            },
        },
        'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
@ -3640,15 +3640,28 @@ def _get_requested_clients(self, url, smuggled_data):

        return orderedSet(requested_clients)

+    def _invalid_player_response(self, pr, video_id):
+        # YouTube may return a different video player response than expected.
+        # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
+        if (pr_id := traverse_obj(pr, ('videoDetails', 'videoId'))) != video_id:
+            return pr_id
+
    def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
        initial_pr = None
        if webpage:
            initial_pr = self._search_json(
                self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)

+        prs = []
+        if initial_pr and not self._invalid_player_response(initial_pr, video_id):
+            # Android player_response does not have microFormats which are needed for
+            # extraction of some data. So we return the initial_pr with formats
+            # stripped out even if not requested by the user
+            # See: https://github.com/yt-dlp/yt-dlp/issues/501
+            prs.append({**initial_pr, 'streamingData': None})
+
        all_clients = set(clients)
        clients = clients[::-1]
-        prs = []

        def append_client(*client_names):
            """ Append the first client name that exists but not already used """
@ -3660,18 +3673,9 @@ def append_client(*client_names):
                        all_clients.add(actual_client)
                        return

-        # Android player_response does not have microFormats which are needed for
-        # extraction of some data. So we return the initial_pr with formats
-        # stripped out even if not requested by the user
-        # See: https://github.com/yt-dlp/yt-dlp/issues/501
-        if initial_pr:
-            pr = dict(initial_pr)
-            pr['streamingData'] = None
-            prs.append(pr)
-
-        last_error = None
        tried_iframe_fallback = False
        player_url = None
+        skipped_clients = {}
        while clients:
            client, base_client, variant = _split_innertube_client(clients.pop())
            player_ytcfg = master_ytcfg if client == 'web' else {}
@ -3692,26 +3696,19 @@ def append_client(*client_names):
                pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
                    client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
            except ExtractorError as e:
-                if last_error:
-                    self.report_warning(last_error)
-                last_error = e
+                self.report_warning(e)
                continue

-            if pr:
-                # YouTube may return a different video player response than expected.
-                # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
-                pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))
-                if pr_video_id and pr_video_id != video_id:
-                    self.report_warning(
-                        f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())
-                else:
-                    # Save client name for introspection later
-                    name = short_client_name(client)
-                    sd = traverse_obj(pr, ('streamingData', {dict})) or {}
-                    sd[STREAMING_DATA_CLIENT_NAME] = name
-                    for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
-                        f[STREAMING_DATA_CLIENT_NAME] = name
-                    prs.append(pr)
+            if pr_id := self._invalid_player_response(pr, video_id):
+                skipped_clients[client] = pr_id
+            elif pr:
+                # Save client name for introspection later
+                name = short_client_name(client)
+                sd = traverse_obj(pr, ('streamingData', {dict})) or {}
+                sd[STREAMING_DATA_CLIENT_NAME] = name
+                for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
+                    f[STREAMING_DATA_CLIENT_NAME] = name
+                prs.append(pr)

            # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
            if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
@ -3722,10 +3719,15 @@ def append_client(*client_names):
                elif not variant:
                    append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')

-        if last_error:
-            if not len(prs):
-                raise last_error
-            self.report_warning(last_error)
+        if skipped_clients:
+            self.report_warning(
+                f'Skipping player responses from {"/".join(skipped_clients)} clients '
+                f'(got player responses for video "{"/".join(set(skipped_clients.values()))}" instead of "{video_id}")')
+            if not prs:
+                raise ExtractorError(
+                    'All player responses are invalid. Your IP is likely being blocked by Youtube', expected=True)
+        elif not prs:
+            raise ExtractorError('Failed to extract any player response')
        return prs, player_url

    def _needs_live_processing(self, live_status, duration):
--- a/yt_dlp/extractor/zenporn.py
+++ b/yt_dlp/extractor/zenporn.py
@ -0,0 +1,118 @@
+import base64
+import binascii
+
+from .common import InfoExtractor
+from ..utils import ExtractorError, determine_ext, unified_strdate, url_or_none
+from ..utils.traversal import traverse_obj
+
+
+class ZenPornIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?zenporn\.com/video/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://zenporn.com/video/15627016/desi-bhabi-ki-chudai',
+        'md5': '07bd576b5920714d74975c054ca28dee',
+        'info_dict': {
+            'id': '9563799',
+            'display_id': '15627016',
+            'ext': 'mp4',
+            'title': 'md5:669eafd3bbc688aa29770553b738ada2',
+            'description': '',
+            'thumbnail': 'md5:2fc044a19bab450fef8f1931e7920a18',
+            'upload_date': '20230925',
+            'uploader': 'md5:9fae59847f1f58d1da8f2772016c12f3',
+            'age_limit': 18,
+        }
+    }, {
+        'url': 'https://zenporn.com/video/15570701',
+        'md5': 'acba0d080d692664fcc8c4e5502b1a67',
+        'info_dict': {
+            'id': '2297875',
+            'display_id': '15570701',
+            'ext': 'mp4',
+            'title': 'md5:47aebdf87644ec91e8b1a844bc832451',
+            'description': '',
+            'thumbnail': 'https://mstn.nv7s.com/contents/videos_screenshots/2297000/2297875/480x270/1.jpg',
+            'upload_date': '20230921',
+            'uploader': 'Lois Clarke',
+            'age_limit': 18,
+        }
+    }, {
+        'url': 'https://zenporn.com/video/8531117/amateur-students-having-a-fuck-fest-at-club/',
+        'md5': '67411256aa9451449e4d29f3be525541',
+        'info_dict': {
+            'id': '12791908',
+            'display_id': '8531117',
+            'ext': 'mp4',
+            'title': 'Amateur students having a fuck fest at club',
+            'description': '',
+            'thumbnail': 'https://tn.txxx.tube/contents/videos_screenshots/12791000/12791908/288x162/1.jpg',
+            'upload_date': '20191005',
+            'uploader': 'Jackopenass',
+            'age_limit': 18,
+        }
+    }, {
+        'url': 'https://zenporn.com/video/15872038/glad-you-came/',
+        'md5': '296ccab437f5bac6099433768449d8e1',
+        'info_dict': {
+            'id': '111585',
+            'display_id': '15872038',
+            'ext': 'mp4',
+            'title': 'Glad You Came',
+            'description': '',
+            'thumbnail': 'https://vpim.m3pd.com/contents/videos_screenshots/111000/111585/480x270/1.jpg',
+            'upload_date': '20231024',
+            'uploader': 'Martin Rudenko',
+            'age_limit': 18,
+        }
+    }]
+
+    def _gen_info_url(self, ext_domain, extr_id, lifetime=86400):
+        """ This function is a reverse engineering from the website javascript """
+        result = '/'.join(str(int(extr_id) // i * i) for i in (1_000_000, 1_000, 1))
+        return f'https://{ext_domain}/api/json/video/{lifetime}/{result}.json'
+
+    @staticmethod
+    def _decode_video_url(encoded_url):
+        """ This function is a reverse engineering from the website javascript """
+        # Replace lookalike characters and standardize map
+        translation = str.maketrans('АВСЕМ.,~', 'ABCEM+/=')
+        try:
+            return base64.b64decode(encoded_url.translate(translation), validate=True).decode()
+        except (binascii.Error, ValueError):
+            return None
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+
+        ext_domain, video_id = self._search_regex(
+            r'https://(?P<ext_domain>[\w.-]+\.\w{3})/embed/(?P<extr_id>\d+)/',
+            webpage, 'embed info', group=('ext_domain', 'extr_id'))
+
+        info_json = self._download_json(
+            self._gen_info_url(ext_domain, video_id), video_id, fatal=False)
+
+        video_json = self._download_json(
+            f'https://{ext_domain}/api/videofile.php', video_id, query={
+                'video_id': video_id,
+                'lifetime': 8640000,
+            }, note='Downloading video file JSON', errnote='Failed to download video file JSON')
+
+        decoded_url = self._decode_video_url(video_json[0]['video_url'])
+        if not decoded_url:
+            raise ExtractorError('Unable to decode the video url')
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'ext': traverse_obj(video_json, (0, 'format', {determine_ext})),
+            'url': f'https://{ext_domain}{decoded_url}',
+            'age_limit': 18,
+            **traverse_obj(info_json, ('video', {
+                'title': ('title', {str}),
+                'description': ('description', {str}),
+                'thumbnail': ('thumb', {url_or_none}),
+                'upload_date': ('post_date', {unified_strdate}),
+                'uploader': ('user', 'username', {str}),
+            })),
+        }
--- a/yt_dlp/networking/common.py
+++ b/yt_dlp/networking/common.py
@ -68,6 +68,7 @@ def __init__(self, logger, verbose=False):
    def close(self):
        for handler in self.handlers.values():
            handler.close()
+        self.handlers = {}

    def add_handler(self, handler: RequestHandler):
        """Add a handler. If a handler of the same RH_KEY exists, it will overwrite it"""