wbi key cache

2024-07-03 09:23:35 +00:00 · 2024-02-04 01:44:43 -05:00 · 2024-02-04 01:44:43 -05:00 · 1ce48dba7e
parent 88db8b4679
commit 1ce48dba7e
1 changed files with 19 additions and 24 deletions
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@ -45,9 +45,8 @@

 class BilibiliBaseIE(InfoExtractor):
    _FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
-    _WBI_KEY_CACHE = {}
-    _WBI_KEY_CACHE_TIMEOUT = 30
-    # exact expire timeout is not clear, though 30s is good for one session
+    _WBI_KEY_CACHE_TIMEOUT = 30  # exact expire timeout is unclear, use 30s for one session
+    _wbi_key_cache = {}

    def check_missing_formats(self, play_info, formats):
        parsed_qualites = set(traverse_obj(formats, (..., 'quality')))
@ -123,12 +122,6 @@ def extract_formats(self, play_info):
            })
        return formats

-    def _download_playinfo(self, video_id, cid):
-        return self._download_json(
-            'https://api.bilibili.com/x/player/playurl', video_id,
-            query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
-            note=f'Downloading video formats for cid {cid}')['data']
-
    def json2srt(self, json_data):
        srt_data = ''
        for idx, line in enumerate(json_data.get('body') or []):
@ -206,15 +199,15 @@ def _get_episodes_from_season(self, ss_id, url):
            yield self.url_result(entry['share_url'], BiliBiliBangumiIE, str_or_none(entry.get('id')))

    def _get_wbi_key(self, video_id):
-        if self._WBI_KEY_CACHE.get('ts', 0) > time.time() - 30:
-            return self._WBI_KEY_CACHE['key']
+        if time.time() < self._wbi_key_cache.get('ts', 0) + self._WBI_KEY_CACHE_TIMEOUT:
+            return self._wbi_key_cache['key']

        session_data = self._download_json(
            'https://api.bilibili.com/x/web-interface/nav', video_id, note='Downloading wbi sign')

        lookup = ''.join(traverse_obj(session_data, (
            'data', 'wbi_img', ('img_url', 'sub_url'),
-            {lambda x: x.split('/')[-1].split('.')[0]})))
+            {lambda x: x.rpartition('/')[2].partition('.')[0]})))

        mixin_key_enc_tab = [
            46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49,
@ -223,9 +216,11 @@ def _get_wbi_key(self, video_id):
            36, 20, 34, 44, 52
        ]

-        self._WBI_KEY_CACHE['key'] = ''.join(lookup[i] for i in mixin_key_enc_tab)[:32]
-        self._WBI_KEY_CACHE['ts'] = time.time()
-        return self._WBI_KEY_CACHE['key']
+        self._wbi_key_cache.update({
+            'key': ''.join(lookup[i] for i in mixin_key_enc_tab)[:32],
+            'ts': time.time(),
+        })
+        return self._wbi_key_cache['key']

    def _sign_wbi(self, params, video_id):
        params['wts'] = round(time.time())
@ -242,8 +237,8 @@ def _get_play_url(self, bvid, cid, headers={}, qn=None):
        if qn:
            params['qn'] = qn
        return self._download_json(
-            'https://api.bilibili.com/x/player/wbi/playurl', bvid, headers=headers,
-            query=self._sign_wbi(params, bvid),
+            'https://api.bilibili.com/x/player/wbi/playurl', bvid,
+            query=self._sign_wbi(params, bvid), headers=headers,
            note=f'Downloading video formats for cid {cid} {qn or ""}')['data']

    def _get_divisions(self, video_id, graph_version, edges, edge_id, cid_edges=None):
@ -282,7 +277,7 @@ def _get_interactive_entries(self, video_id, cid, metainfo):
            ('data', 'interaction', 'graph_version', {int_or_none}))
        cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
        for cid, edges in cid_edges.items():
-            play_info = self._download_playinfo(video_id, cid)
+            play_info = self._get_play_url(video_id, cid, metainfo.get('http_headers', {}))
            yield {
                **metainfo,
                'id': f'{video_id}_{cid}',
@ -720,14 +715,15 @@ def _real_extract(self, url):
            formats = self.extract_formats(play_info)

            if not traverse_obj(play_info, ('dash')):  # for legacy-only formats
-                has_qn = lambda x: str_or_none(x) in traverse_obj(formats, (..., 'format_id'))
+                has_qn = lambda x: x in traverse_obj(formats, (..., 'quality'))
                for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})):
                    formats.extend(traverse_obj(
                        self.extract_formats(self._get_play_url(video_id, cid, headers=headers, qn=qn)),
-                        (lambda _, v: not has_qn(v.get('format_id')))))
+                        (lambda _, v: not has_qn(v.get('quality')))))
                self.check_missing_formats(play_info, formats)
                if traverse_obj(formats, lambda _, v: v['fragments']):
                    if not self._configuration_arg('_prefer_multi_flv'):
+                        # `_prefer_multi_flv` is mainly for writing test case since user can hardly need this
                        dropping = ', '.join(traverse_obj(formats, (
                            lambda _, v: v['fragments'], {lambda x: f'{x["format"]} ({x["format_id"]})'})))
                        formats = traverse_obj(formats, lambda _, v: not v.get('fragments'))
@ -736,10 +732,9 @@ def _real_extract(self, url):
                    else:
                        formats = traverse_obj(
                            formats, lambda _, v: v['quality'] == int(self._configuration_arg('_prefer_multi_flv')[0])
-                        ) or traverse_obj(formats, lambda _, v: v['fragments'])
+                        ) or [max(traverse_obj(formats, lambda _, v: v['fragments']), key=lambda x: x['quality'])]

            if formats[0].get('fragments'):  # transform multi_video format
-                format = max(traverse_obj(formats, lambda _, v: v['fragments']), key=lambda x: x['quality'])
                return {
                    **metainfo,
                    '_type': 'multi_video',
@ -749,11 +744,11 @@ def _real_extract(self, url):
                        'http_headers': metainfo['http_headers'],
                        'formats': [{
                            **fragment,
-                            'format_id': format.get('format_id'),
+                            'format_id': formats[0].get('format_id'),
                        }],
                        'subtitles': self.extract_subtitles(video_id, cid) if idx == 0 else None,
                        '__post_extractor': self.extract_comments(aid) if idx == 0 else None,
-                    } for idx, fragment in enumerate(format['fragments'])],
+                    } for idx, fragment in enumerate(formats[0]['fragments'])],
                    'duration': float_or_none(play_info.get('timelength'), scale=1000),
                }
            else: