do not extract storyboard in extract_formats

This commit is contained in:
grqx_wsl 2024-10-23 21:10:05 +13:00
parent 1cf870df6a
commit eb03632cc7

View file

@ -42,6 +42,7 @@
unsmuggle_url, unsmuggle_url,
url_or_none, url_or_none,
urlencode_postdata, urlencode_postdata,
value,
variadic, variadic,
) )
@ -145,7 +146,7 @@ def _extract_storyboard(self, duration, aid=None, bvid=None, cid=None):
'fragments': fragments, 'fragments': fragments,
} }
def extract_formats(self, play_info, aid=None, bvid=None, cid=None): def extract_formats(self, play_info):
format_names = { format_names = {
r['quality']: traverse_obj(r, 'new_description', 'display_desc') r['quality']: traverse_obj(r, 'new_description', 'display_desc')
for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality'])) for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
@ -207,9 +208,6 @@ def extract_formats(self, play_info, aid=None, bvid=None, cid=None):
}), }),
**parse_resolution(format_names.get(play_info.get('quality'))), **parse_resolution(format_names.get(play_info.get('quality'))),
}) })
if storyboard_format := self._extract_storyboard(
float_or_none(play_info.get('timelength'), scale=1000), aid=aid, bvid=bvid, cid=cid):
formats.append(storyboard_format)
return formats return formats
def _get_wbi_key(self, video_id): def _get_wbi_key(self, video_id):
@ -369,13 +367,19 @@ def _get_interactive_entries(self, video_id, cid, metainfo, headers=None):
cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1) cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
for cid, edges in cid_edges.items(): for cid, edges in cid_edges.items():
play_info = self._download_playinfo(video_id, cid, headers=headers) play_info = self._download_playinfo(video_id, cid, headers=headers)
formats = self.extract_formats(play_info)
duration = float_or_none(play_info.get('timelength'), scale=1000)
if storyboard_format := self._extract_storyboard(
duration=duration,
bvid=video_id, cid=cid):
formats.append(storyboard_format)
yield { yield {
**metainfo, **metainfo,
'id': f'{video_id}_{cid}', 'id': f'{video_id}_{cid}',
'title': f'{metainfo.get("title")} - {next(iter(edges.values())).get("title")}', 'title': f'{metainfo.get("title")} - {next(iter(edges.values())).get("title")}',
'formats': self.extract_formats(play_info, bvid=video_id, cid=cid), 'formats': formats,
'description': f'{json.dumps(edges, ensure_ascii=False)}\n{metainfo.get("description", "")}', 'description': f'{json.dumps(edges, ensure_ascii=False)}\n{metainfo.get("description", "")}',
'duration': float_or_none(play_info.get('timelength'), scale=1000), 'duration': duration,
'subtitles': self.extract_subtitles(video_id, cid), 'subtitles': self.extract_subtitles(video_id, cid),
'heatmap': list(self._extract_heatmap(cid)), 'heatmap': list(self._extract_heatmap(cid)),
} }
@ -845,14 +849,17 @@ def _real_extract(self, url):
duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})), duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
__post_extractor=self.extract_comments(aid)) __post_extractor=self.extract_comments(aid))
else: else:
formats = self.extract_formats(play_info, bvid=video_id, cid=cid) formats = self.extract_formats(play_info)
formats.append(self._extract_storyboard(
duration=float_or_none(play_info.get('timelength'), scale=1000),
bvid=video_id, cid=cid))
if not traverse_obj(play_info, ('dash')): if not traverse_obj(play_info, ('dash')):
# we only have legacy formats and need additional work # we only have legacy formats and need additional work
has_qn = lambda x: x in traverse_obj(formats, (..., 'quality')) has_qn = lambda x: x in traverse_obj(formats, (..., 'quality'))
for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})): for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})):
formats.extend(traverse_obj( formats.extend(traverse_obj(
self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, qn=qn), bvid=video_id, cid=cid), self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, qn=qn)),
lambda _, v: not has_qn(v['quality']))) lambda _, v: not has_qn(v['quality'])))
self._check_missing_formats(play_info, formats) self._check_missing_formats(play_info, formats)
flv_formats = traverse_obj(formats, lambda _, v: v['fragments']) flv_formats = traverse_obj(formats, lambda _, v: v['fragments'])
@ -990,7 +997,7 @@ def _real_extract(self, url):
aid, cid = episode_info.get('aid'), episode_info.get('cid') aid, cid = episode_info.get('aid'), episode_info.get('cid')
play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {} play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
formats = self.extract_formats(play_info, aid=aid, cid=cid) formats = self.extract_formats(play_info)
if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage): if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage):
self.raise_login_required('This video is for premium members only') self.raise_login_required('This video is for premium members only')
@ -1011,7 +1018,9 @@ def _real_extract(self, url):
), (None, None)) ), (None, None))
aid, cid = episode_info.get('aid', aid), episode_info.get('cid', cid) aid, cid = episode_info.get('aid', aid), episode_info.get('cid', cid)
duration = float_or_none(play_info.get('timelength'), scale=1000)
if storyboard_format := self._extract_storyboard(duration=duration, aid=aid, cid=cid):
formats.append(storyboard_format)
return { return {
'id': episode_id, 'id': episode_id,
'formats': formats, 'formats': formats,
@ -1030,7 +1039,7 @@ def _real_extract(self, url):
'season': str_or_none(season_title), 'season': str_or_none(season_title),
'season_id': str_or_none(season_id), 'season_id': str_or_none(season_id),
'season_number': season_number, 'season_number': season_number,
'duration': float_or_none(play_info.get('timelength'), scale=1000), 'duration': duration,
'subtitles': self.extract_subtitles(episode_id, cid, aid=aid), 'subtitles': self.extract_subtitles(episode_id, cid, aid=aid),
'__post_extractor': self.extract_comments(aid), '__post_extractor': self.extract_comments(aid),
'http_headers': {'Referer': url}, 'http_headers': {'Referer': url},
@ -1163,10 +1172,14 @@ def _extract_episode(self, season_info, ep_id):
query={'avid': aid, 'cid': cid, 'ep_id': ep_id, 'fnval': 16, 'fourk': 1}, query={'avid': aid, 'cid': cid, 'ep_id': ep_id, 'fnval': 16, 'fourk': 1},
headers=self._HEADERS, note='Downloading playinfo')['data'] headers=self._HEADERS, note='Downloading playinfo')['data']
formats = self.extract_formats(play_info)
duration = traverse_obj(episode_info, ('duration', {int_or_none}))
if storyboard_format := self._extract_storyboard(duration=duration, aid=aid, cid=cid):
formats.append(storyboard_format)
return { return {
'id': str_or_none(ep_id), 'id': str_or_none(ep_id),
'episode_id': str_or_none(ep_id), 'episode_id': str_or_none(ep_id),
'formats': self.extract_formats(play_info, aid=aid, cid=cid), 'formats': formats,
'extractor_key': BilibiliCheeseIE.ie_key(), 'extractor_key': BilibiliCheeseIE.ie_key(),
'extractor': BilibiliCheeseIE.IE_NAME, 'extractor': BilibiliCheeseIE.IE_NAME,
'webpage_url': f'https://www.bilibili.com/cheese/play/ep{ep_id}', 'webpage_url': f'https://www.bilibili.com/cheese/play/ep{ep_id}',
@ -1174,7 +1187,7 @@ def _extract_episode(self, season_info, ep_id):
'episode': ('title', {str}), 'episode': ('title', {str}),
'title': {lambda v: v and join_nonempty('index', 'title', delim=' - ', from_dict=v)}, 'title': {lambda v: v and join_nonempty('index', 'title', delim=' - ', from_dict=v)},
'alt_title': ('subtitle', {str}), 'alt_title': ('subtitle', {str}),
'duration': ('duration', {int_or_none}), 'duration': {value(duration)},
'episode_number': ('index', {int_or_none}), 'episode_number': ('index', {int_or_none}),
'thumbnail': ('cover', {url_or_none}), 'thumbnail': ('cover', {url_or_none}),
'timestamp': ('release_date', {int_or_none}), 'timestamp': ('release_date', {int_or_none}),