mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-09 18:03:25 +00:00
[ie/bilibili] Support legacy formats (#9117)
Adds extractor-arg `prefer_multi_flv` Closes #6438, Closes #8525, Closes #8553, Closes #10243 Authored by: c-basalt, GD-Slime Co-authored-by: GD-Slime <82302542+GD-Slime@users.noreply.github.com>
This commit is contained in:
parent
9200bc70c9
commit
1d6ab17d07
|
@ -1856,6 +1856,9 @@ #### soundcloud
|
||||||
#### orfon (orf:on)
|
#### orfon (orf:on)
|
||||||
* `prefer_segments_playlist`: Prefer a playlist of program segments instead of a single complete video when available. If individual segments are desired, use `--concat-playlist never --extractor-args "orfon:prefer_segments_playlist"`
|
* `prefer_segments_playlist`: Prefer a playlist of program segments instead of a single complete video when available. If individual segments are desired, use `--concat-playlist never --extractor-args "orfon:prefer_segments_playlist"`
|
||||||
|
|
||||||
|
#### bilibili
|
||||||
|
* `prefer_multi_flv`: Prefer extracting flv formats over mp4 for older videos that still provide legacy formats
|
||||||
|
|
||||||
**Note**: These options may be changed/removed in the future without concern for backward compatibility
|
**Note**: These options may be changed/removed in the future without concern for backward compatibility
|
||||||
|
|
||||||
<!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->
|
<!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->
|
||||||
|
|
|
@ -31,12 +31,12 @@
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
parse_count,
|
parse_count,
|
||||||
parse_qs,
|
parse_qs,
|
||||||
|
parse_resolution,
|
||||||
qualities,
|
qualities,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
srt_subtitles_timecode,
|
srt_subtitles_timecode,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
try_call,
|
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
|
@ -47,6 +47,23 @@
|
||||||
|
|
||||||
class BilibiliBaseIE(InfoExtractor):
|
class BilibiliBaseIE(InfoExtractor):
|
||||||
_FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
|
_FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
|
||||||
|
_WBI_KEY_CACHE_TIMEOUT = 30 # exact expire timeout is unclear, use 30s for one session
|
||||||
|
_wbi_key_cache = {}
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_logged_in(self):
|
||||||
|
return bool(self._get_cookies('https://api.bilibili.com').get('SESSDATA'))
|
||||||
|
|
||||||
|
def _check_missing_formats(self, play_info, formats):
|
||||||
|
parsed_qualities = set(traverse_obj(formats, (..., 'quality')))
|
||||||
|
missing_formats = join_nonempty(*[
|
||||||
|
traverse_obj(fmt, 'new_description', 'display_desc', 'quality')
|
||||||
|
for fmt in traverse_obj(play_info, (
|
||||||
|
'support_formats', lambda _, v: v['quality'] not in parsed_qualities))], delim=', ')
|
||||||
|
if missing_formats:
|
||||||
|
self.to_screen(
|
||||||
|
f'Format(s) {missing_formats} are missing; you have to login or '
|
||||||
|
f'become a premium member to download them. {self._login_hint()}')
|
||||||
|
|
||||||
def extract_formats(self, play_info):
|
def extract_formats(self, play_info):
|
||||||
format_names = {
|
format_names = {
|
||||||
|
@ -86,18 +103,75 @@ def extract_formats(self, play_info):
|
||||||
'format': format_names.get(video.get('id')),
|
'format': format_names.get(video.get('id')),
|
||||||
} for video in traverse_obj(play_info, ('dash', 'video', ...)))
|
} for video in traverse_obj(play_info, ('dash', 'video', ...)))
|
||||||
|
|
||||||
missing_formats = format_names.keys() - set(traverse_obj(formats, (..., 'quality')))
|
if formats:
|
||||||
if missing_formats:
|
self._check_missing_formats(play_info, formats)
|
||||||
self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
|
|
||||||
f'you have to login or become premium member to download them. {self._login_hint()}')
|
|
||||||
|
|
||||||
|
fragments = traverse_obj(play_info, ('durl', lambda _, v: url_or_none(v['url']), {
|
||||||
|
'url': ('url', {url_or_none}),
|
||||||
|
'duration': ('length', {functools.partial(float_or_none, scale=1000)}),
|
||||||
|
'filesize': ('size', {int_or_none}),
|
||||||
|
}))
|
||||||
|
if fragments:
|
||||||
|
formats.append({
|
||||||
|
'url': fragments[0]['url'],
|
||||||
|
'filesize': sum(traverse_obj(fragments, (..., 'filesize'))),
|
||||||
|
**({
|
||||||
|
'fragments': fragments,
|
||||||
|
'protocol': 'http_dash_segments',
|
||||||
|
} if len(fragments) > 1 else {}),
|
||||||
|
**traverse_obj(play_info, {
|
||||||
|
'quality': ('quality', {int_or_none}),
|
||||||
|
'format_id': ('quality', {str_or_none}),
|
||||||
|
'format_note': ('quality', {lambda x: format_names.get(x)}),
|
||||||
|
'duration': ('timelength', {functools.partial(float_or_none, scale=1000)}),
|
||||||
|
}),
|
||||||
|
**parse_resolution(format_names.get(play_info.get('quality'))),
|
||||||
|
})
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
def _download_playinfo(self, video_id, cid, headers=None):
|
def _get_wbi_key(self, video_id):
|
||||||
|
if time.time() < self._wbi_key_cache.get('ts', 0) + self._WBI_KEY_CACHE_TIMEOUT:
|
||||||
|
return self._wbi_key_cache['key']
|
||||||
|
|
||||||
|
session_data = self._download_json(
|
||||||
|
'https://api.bilibili.com/x/web-interface/nav', video_id, note='Downloading wbi sign')
|
||||||
|
|
||||||
|
lookup = ''.join(traverse_obj(session_data, (
|
||||||
|
'data', 'wbi_img', ('img_url', 'sub_url'),
|
||||||
|
{lambda x: x.rpartition('/')[2].partition('.')[0]})))
|
||||||
|
|
||||||
|
# from getMixinKey() in the vendor js
|
||||||
|
mixin_key_enc_tab = [
|
||||||
|
46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49,
|
||||||
|
33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40,
|
||||||
|
61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, 57, 62, 11,
|
||||||
|
36, 20, 34, 44, 52,
|
||||||
|
]
|
||||||
|
|
||||||
|
self._wbi_key_cache.update({
|
||||||
|
'key': ''.join(lookup[i] for i in mixin_key_enc_tab)[:32],
|
||||||
|
'ts': time.time(),
|
||||||
|
})
|
||||||
|
return self._wbi_key_cache['key']
|
||||||
|
|
||||||
|
def _sign_wbi(self, params, video_id):
|
||||||
|
params['wts'] = round(time.time())
|
||||||
|
params = {
|
||||||
|
k: ''.join(filter(lambda char: char not in "!'()*", str(v)))
|
||||||
|
for k, v in sorted(params.items())
|
||||||
|
}
|
||||||
|
query = urllib.parse.urlencode(params)
|
||||||
|
params['w_rid'] = hashlib.md5(f'{query}{self._get_wbi_key(video_id)}'.encode()).hexdigest()
|
||||||
|
return params
|
||||||
|
|
||||||
|
def _download_playinfo(self, bvid, cid, headers=None, qn=None):
|
||||||
|
params = {'bvid': bvid, 'cid': cid, 'fnval': 4048}
|
||||||
|
if qn:
|
||||||
|
params['qn'] = qn
|
||||||
return self._download_json(
|
return self._download_json(
|
||||||
'https://api.bilibili.com/x/player/playurl', video_id,
|
'https://api.bilibili.com/x/player/wbi/playurl', bvid,
|
||||||
query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
|
query=self._sign_wbi(params, bvid), headers=headers,
|
||||||
note=f'Downloading video formats for cid {cid}', headers=headers)['data']
|
note=f'Downloading video formats for cid {cid} {qn or ""}')['data']
|
||||||
|
|
||||||
def json2srt(self, json_data):
|
def json2srt(self, json_data):
|
||||||
srt_data = ''
|
srt_data = ''
|
||||||
|
@ -115,15 +189,15 @@ def _get_subtitles(self, video_id, cid, aid=None):
|
||||||
}],
|
}],
|
||||||
}
|
}
|
||||||
|
|
||||||
subtitle_info = traverse_obj(self._download_json(
|
video_info = self._download_json(
|
||||||
'https://api.bilibili.com/x/player/v2', video_id,
|
'https://api.bilibili.com/x/player/v2', video_id,
|
||||||
query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
|
query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
|
||||||
note=f'Extracting subtitle info {cid}'), ('data', 'subtitle'))
|
note=f'Extracting subtitle info {cid}')
|
||||||
subs_list = traverse_obj(subtitle_info, ('subtitles', lambda _, v: v['subtitle_url'] and v['lan']))
|
if traverse_obj(video_info, ('data', 'need_login_subtitle')):
|
||||||
if not subs_list and traverse_obj(subtitle_info, 'allow_submit'):
|
self.report_warning(
|
||||||
if not self._get_cookies('https://api.bilibili.com').get('SESSDATA'): # no login session cookie
|
f'Subtitles are only available when logged in. {self._login_hint()}', only_once=True)
|
||||||
self.report_warning(f'CC subtitles (if any) are only visible when logged in. {self._login_hint()}', only_once=True)
|
for s in traverse_obj(video_info, (
|
||||||
for s in subs_list:
|
'data', 'subtitle', 'subtitles', lambda _, v: v['subtitle_url'] and v['lan'])):
|
||||||
subtitles.setdefault(s['lan'], []).append({
|
subtitles.setdefault(s['lan'], []).append({
|
||||||
'ext': 'srt',
|
'ext': 'srt',
|
||||||
'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)),
|
'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)),
|
||||||
|
@ -203,15 +277,15 @@ def _get_divisions(self, video_id, graph_version, edges, edge_id, cid_edges=None
|
||||||
self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges)
|
self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges)
|
||||||
return cid_edges
|
return cid_edges
|
||||||
|
|
||||||
def _get_interactive_entries(self, video_id, cid, metainfo):
|
def _get_interactive_entries(self, video_id, cid, metainfo, headers=None):
|
||||||
graph_version = traverse_obj(
|
graph_version = traverse_obj(
|
||||||
self._download_json(
|
self._download_json(
|
||||||
'https://api.bilibili.com/x/player/wbi/v2', video_id,
|
'https://api.bilibili.com/x/player/wbi/v2', video_id,
|
||||||
'Extracting graph version', query={'bvid': video_id, 'cid': cid}),
|
'Extracting graph version', query={'bvid': video_id, 'cid': cid}, headers=headers),
|
||||||
('data', 'interaction', 'graph_version', {int_or_none}))
|
('data', 'interaction', 'graph_version', {int_or_none}))
|
||||||
cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
|
cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
|
||||||
for cid, edges in cid_edges.items():
|
for cid, edges in cid_edges.items():
|
||||||
play_info = self._download_playinfo(video_id, cid)
|
play_info = self._download_playinfo(video_id, cid, headers=headers)
|
||||||
yield {
|
yield {
|
||||||
**metainfo,
|
**metainfo,
|
||||||
'id': f'{video_id}_{cid}',
|
'id': f'{video_id}_{cid}',
|
||||||
|
@ -243,17 +317,17 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||||
'timestamp': 1488353834,
|
'timestamp': 1488353834,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
|
'_old_archive_ids': ['bilibili 8903802_part1'],
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'note': 'old av URL version',
|
'note': 'old av URL version',
|
||||||
'url': 'http://www.bilibili.com/video/av1074402/',
|
'url': 'http://www.bilibili.com/video/av1074402/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
|
'id': 'BV11x411K7CN',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
'title': '【金坷垃】金泡沫',
|
||||||
'uploader': '菊子桑',
|
'uploader': '菊子桑',
|
||||||
'uploader_id': '156160',
|
'uploader_id': '156160',
|
||||||
'id': 'BV11x411K7CN',
|
|
||||||
'title': '【金坷垃】金泡沫',
|
|
||||||
'duration': 308.36,
|
'duration': 308.36,
|
||||||
'upload_date': '20140420',
|
'upload_date': '20140420',
|
||||||
'timestamp': 1397983878,
|
'timestamp': 1397983878,
|
||||||
|
@ -262,6 +336,8 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'tags': list,
|
'tags': list,
|
||||||
|
'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
|
||||||
|
'_old_archive_ids': ['bilibili 1074402_part1'],
|
||||||
},
|
},
|
||||||
'params': {'skip_download': True},
|
'params': {'skip_download': True},
|
||||||
}, {
|
}, {
|
||||||
|
@ -288,6 +364,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
|
'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
|
||||||
'duration': 90.314,
|
'duration': 90.314,
|
||||||
|
'_old_archive_ids': ['bilibili 498159642_part1'],
|
||||||
},
|
},
|
||||||
}],
|
}],
|
||||||
}, {
|
}, {
|
||||||
|
@ -308,28 +385,8 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
|
'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
|
||||||
'duration': 90.314,
|
'duration': 90.314,
|
||||||
|
'_old_archive_ids': ['bilibili 498159642_part1'],
|
||||||
},
|
},
|
||||||
}, {
|
|
||||||
'note': 'video has subtitles',
|
|
||||||
'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'BV12N4y1M7rh',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
|
|
||||||
'tags': list,
|
|
||||||
'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
|
|
||||||
'duration': 313.557,
|
|
||||||
'upload_date': '20220709',
|
|
||||||
'uploader': '小夫太渴',
|
|
||||||
'timestamp': 1657347907,
|
|
||||||
'uploader_id': '1326814124',
|
|
||||||
'comment_count': int,
|
|
||||||
'view_count': int,
|
|
||||||
'like_count': int,
|
|
||||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
|
||||||
'subtitles': 'count:2',
|
|
||||||
},
|
|
||||||
'params': {'listsubtitles': True},
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.bilibili.com/video/av8903802/',
|
'url': 'https://www.bilibili.com/video/av8903802/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -347,6 +404,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
|
'_old_archive_ids': ['bilibili 8903802_part1'],
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
|
@ -370,6 +428,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||||
|
'_old_archive_ids': ['bilibili 463665680_part1'],
|
||||||
},
|
},
|
||||||
'params': {'skip_download': True},
|
'params': {'skip_download': True},
|
||||||
}, {
|
}, {
|
||||||
|
@ -388,8 +447,8 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||||
|
'_old_archive_ids': ['bilibili 893839363_part1'],
|
||||||
},
|
},
|
||||||
'params': {'skip_download': True},
|
|
||||||
}, {
|
}, {
|
||||||
'note': 'newer festival video',
|
'note': 'newer festival video',
|
||||||
'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
|
'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
|
||||||
|
@ -406,8 +465,57 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||||
|
'_old_archive_ids': ['bilibili 778246196_part1'],
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'note': 'legacy flv/mp4 video',
|
||||||
|
'url': 'https://www.bilibili.com/video/BV1ms411Q7vw/?p=4',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'BV1ms411Q7vw_p4',
|
||||||
|
'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
|
||||||
|
'timestamp': 1458222815,
|
||||||
|
'upload_date': '20160317',
|
||||||
|
'description': '云南方言快乐生产线出品',
|
||||||
|
'duration': float,
|
||||||
|
'uploader': '一笑颠天',
|
||||||
|
'uploader_id': '3916081',
|
||||||
|
'view_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'tags': list,
|
||||||
|
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||||
|
'_old_archive_ids': ['bilibili 4120229_part4'],
|
||||||
|
},
|
||||||
|
'params': {'extractor_args': {'bilibili': {'prefer_multi_flv': ['32']}}},
|
||||||
|
'playlist_count': 19,
|
||||||
|
'playlist': [{
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'BV1ms411Q7vw_p4_0',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
|
||||||
|
'duration': 399.102,
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
}, {
|
||||||
|
'note': 'legacy mp4-only video',
|
||||||
|
'url': 'https://www.bilibili.com/video/BV1nx411u79K',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'BV1nx411u79K',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '【练习室】201603声乐练习《No Air》with VigoVan',
|
||||||
|
'timestamp': 1508893551,
|
||||||
|
'upload_date': '20171025',
|
||||||
|
'description': '@ZERO-G伯远\n声乐练习 《No Air》with Vigo Van',
|
||||||
|
'duration': 80.384,
|
||||||
|
'uploader': '伯远',
|
||||||
|
'uploader_id': '10584494',
|
||||||
|
'comment_count': int,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'tags': list,
|
||||||
|
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||||
|
'_old_archive_ids': ['bilibili 15700301_part1'],
|
||||||
},
|
},
|
||||||
'params': {'skip_download': True},
|
|
||||||
}, {
|
}, {
|
||||||
'note': 'interactive/split-path video',
|
'note': 'interactive/split-path video',
|
||||||
'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
|
'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
|
||||||
|
@ -425,6 +533,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||||
|
'_old_archive_ids': ['bilibili 292734508_part1'],
|
||||||
},
|
},
|
||||||
'playlist_count': 33,
|
'playlist_count': 33,
|
||||||
'playlist': [{
|
'playlist': [{
|
||||||
|
@ -443,6 +552,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||||
|
'_old_archive_ids': ['bilibili 292734508_part1'],
|
||||||
},
|
},
|
||||||
}],
|
}],
|
||||||
}, {
|
}, {
|
||||||
|
@ -465,6 +575,29 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||||
'upload_date': '20191021',
|
'upload_date': '20191021',
|
||||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'note': 'video has subtitles, which requires login',
|
||||||
|
'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'BV12N4y1M7rh',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
|
||||||
|
'tags': list,
|
||||||
|
'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
|
||||||
|
'duration': 313.557,
|
||||||
|
'upload_date': '20220709',
|
||||||
|
'uploader': '小夫太渴',
|
||||||
|
'timestamp': 1657347907,
|
||||||
|
'uploader_id': '1326814124',
|
||||||
|
'comment_count': int,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||||
|
'subtitles': 'count:2', # login required for CC subtitle
|
||||||
|
'_old_archive_ids': ['bilibili 898179753_part1'],
|
||||||
|
},
|
||||||
|
'params': {'listsubtitles': True},
|
||||||
|
'skip': 'login required for subtitle',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
|
'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -498,8 +631,9 @@ def _real_extract(self, url):
|
||||||
if not self._match_valid_url(urlh.url):
|
if not self._match_valid_url(urlh.url):
|
||||||
return self.url_result(urlh.url)
|
return self.url_result(urlh.url)
|
||||||
|
|
||||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
|
headers['Referer'] = url
|
||||||
|
|
||||||
|
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
|
||||||
is_festival = 'videoData' not in initial_state
|
is_festival = 'videoData' not in initial_state
|
||||||
if is_festival:
|
if is_festival:
|
||||||
video_data = initial_state['videoInfo']
|
video_data = initial_state['videoInfo']
|
||||||
|
@ -548,7 +682,6 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
aid = video_data.get('aid')
|
aid = video_data.get('aid')
|
||||||
old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
|
old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
|
||||||
|
|
||||||
cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
|
cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
|
||||||
|
|
||||||
festival_info = {}
|
festival_info = {}
|
||||||
|
@ -586,18 +719,65 @@ def _real_extract(self, url):
|
||||||
is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate'))
|
is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate'))
|
||||||
if is_interactive:
|
if is_interactive:
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
self._get_interactive_entries(video_id, cid, metainfo), **metainfo,
|
self._get_interactive_entries(video_id, cid, metainfo, headers=headers), **metainfo,
|
||||||
duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
|
duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
|
||||||
__post_extractor=self.extract_comments(aid))
|
__post_extractor=self.extract_comments(aid))
|
||||||
else:
|
else:
|
||||||
return {
|
formats = self.extract_formats(play_info)
|
||||||
**metainfo,
|
|
||||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
if not traverse_obj(play_info, ('dash')):
|
||||||
'chapters': self._get_chapters(aid, cid),
|
# we only have legacy formats and need additional work
|
||||||
'subtitles': self.extract_subtitles(video_id, cid),
|
has_qn = lambda x: x in traverse_obj(formats, (..., 'quality'))
|
||||||
'formats': self.extract_formats(play_info),
|
for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})):
|
||||||
'__post_extractor': self.extract_comments(aid),
|
formats.extend(traverse_obj(
|
||||||
}
|
self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, qn=qn)),
|
||||||
|
lambda _, v: not has_qn(v['quality'])))
|
||||||
|
self._check_missing_formats(play_info, formats)
|
||||||
|
flv_formats = traverse_obj(formats, lambda _, v: v['fragments'])
|
||||||
|
if flv_formats and len(flv_formats) < len(formats):
|
||||||
|
# Flv and mp4 are incompatible due to `multi_video` workaround, so drop one
|
||||||
|
if not self._configuration_arg('prefer_multi_flv'):
|
||||||
|
dropped_fmts = ', '.join(
|
||||||
|
f'{f.get("format_note")} ({f.get("format_id")})' for f in flv_formats)
|
||||||
|
formats = traverse_obj(formats, lambda _, v: not v.get('fragments'))
|
||||||
|
if dropped_fmts:
|
||||||
|
self.to_screen(
|
||||||
|
f'Dropping incompatible flv format(s) {dropped_fmts} since mp4 is available. '
|
||||||
|
'To extract flv, pass --extractor-args "bilibili:prefer_multi_flv"')
|
||||||
|
else:
|
||||||
|
formats = traverse_obj(
|
||||||
|
# XXX: Filtering by extractor-arg is for testing purposes
|
||||||
|
formats, lambda _, v: v['quality'] == int(self._configuration_arg('prefer_multi_flv')[0]),
|
||||||
|
) or [max(flv_formats, key=lambda x: x['quality'])]
|
||||||
|
|
||||||
|
if traverse_obj(formats, (0, 'fragments')):
|
||||||
|
# We have flv formats, which are individual short videos with their own timestamps and metainfo
|
||||||
|
# Binary concatenation corrupts their timestamps, so we need a `multi_video` workaround
|
||||||
|
return {
|
||||||
|
**metainfo,
|
||||||
|
'_type': 'multi_video',
|
||||||
|
'entries': [{
|
||||||
|
'id': f'{metainfo["id"]}_{idx}',
|
||||||
|
'title': metainfo['title'],
|
||||||
|
'http_headers': metainfo['http_headers'],
|
||||||
|
'formats': [{
|
||||||
|
**fragment,
|
||||||
|
'format_id': formats[0].get('format_id'),
|
||||||
|
}],
|
||||||
|
'subtitles': self.extract_subtitles(video_id, cid) if idx == 0 else None,
|
||||||
|
'__post_extractor': self.extract_comments(aid) if idx == 0 else None,
|
||||||
|
} for idx, fragment in enumerate(formats[0]['fragments'])],
|
||||||
|
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
return {
|
||||||
|
**metainfo,
|
||||||
|
'formats': formats,
|
||||||
|
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||||
|
'chapters': self._get_chapters(aid, cid),
|
||||||
|
'subtitles': self.extract_subtitles(video_id, cid),
|
||||||
|
'__post_extractor': self.extract_comments(aid),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class BiliBiliBangumiIE(BilibiliBaseIE):
|
class BiliBiliBangumiIE(BilibiliBaseIE):
|
||||||
|
@ -968,7 +1148,7 @@ def _real_extract(self, url):
|
||||||
}))
|
}))
|
||||||
|
|
||||||
|
|
||||||
class BilibiliSpaceBaseIE(InfoExtractor):
|
class BilibiliSpaceBaseIE(BilibiliBaseIE):
|
||||||
def _extract_playlist(self, fetch_page, get_metadata, get_entries):
|
def _extract_playlist(self, fetch_page, get_metadata, get_entries):
|
||||||
first_page = fetch_page(0)
|
first_page = fetch_page(0)
|
||||||
metadata = get_metadata(first_page)
|
metadata = get_metadata(first_page)
|
||||||
|
@ -988,73 +1168,53 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
|
||||||
'id': '3985676',
|
'id': '3985676',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 178,
|
'playlist_mincount': 178,
|
||||||
|
'skip': 'login required',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://space.bilibili.com/313580179/video',
|
'url': 'https://space.bilibili.com/313580179/video',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '313580179',
|
'id': '313580179',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 92,
|
'playlist_mincount': 92,
|
||||||
|
'skip': 'login required',
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_signature(self, playlist_id):
|
|
||||||
session_data = self._download_json('https://api.bilibili.com/x/web-interface/nav', playlist_id, fatal=False)
|
|
||||||
|
|
||||||
key_from_url = lambda x: x[x.rfind('/') + 1:].split('.')[0]
|
|
||||||
img_key = traverse_obj(
|
|
||||||
session_data, ('data', 'wbi_img', 'img_url', {key_from_url})) or '34478ba821254d9d93542680e3b86100'
|
|
||||||
sub_key = traverse_obj(
|
|
||||||
session_data, ('data', 'wbi_img', 'sub_url', {key_from_url})) or '7e16a90d190a4355a78fd00b32a38de6'
|
|
||||||
|
|
||||||
session_key = img_key + sub_key
|
|
||||||
|
|
||||||
signature_values = []
|
|
||||||
for position in (
|
|
||||||
46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
|
|
||||||
12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
|
|
||||||
57, 62, 11, 36, 20, 34, 44, 52,
|
|
||||||
):
|
|
||||||
char_at_position = try_call(lambda: session_key[position])
|
|
||||||
if char_at_position:
|
|
||||||
signature_values.append(char_at_position)
|
|
||||||
|
|
||||||
return ''.join(signature_values)[:32]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
|
playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
|
||||||
if not is_video_url:
|
if not is_video_url:
|
||||||
self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
|
self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
|
||||||
'To download audios, add a "/audio" to the URL')
|
'To download audios, add a "/audio" to the URL')
|
||||||
|
|
||||||
signature = self._extract_signature(playlist_id)
|
|
||||||
|
|
||||||
def fetch_page(page_idx):
|
def fetch_page(page_idx):
|
||||||
query = {
|
query = {
|
||||||
'keyword': '',
|
'keyword': '',
|
||||||
'mid': playlist_id,
|
'mid': playlist_id,
|
||||||
'order': 'pubdate',
|
'order': traverse_obj(parse_qs(url), ('order', 0)) or 'pubdate',
|
||||||
'order_avoided': 'true',
|
'order_avoided': 'true',
|
||||||
'platform': 'web',
|
'platform': 'web',
|
||||||
'pn': page_idx + 1,
|
'pn': page_idx + 1,
|
||||||
'ps': 30,
|
'ps': 30,
|
||||||
'tid': 0,
|
'tid': 0,
|
||||||
'web_location': 1550101,
|
'web_location': 1550101,
|
||||||
'wts': int(time.time()),
|
|
||||||
}
|
}
|
||||||
query['w_rid'] = hashlib.md5(f'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
|
response = self._download_json(
|
||||||
playlist_id, note=f'Downloading page {page_idx}', query=query,
|
'https://api.bilibili.com/x/space/wbi/arc/search', playlist_id,
|
||||||
headers={'referer': url})
|
query=self._sign_wbi(query, playlist_id),
|
||||||
|
note=f'Downloading space page {page_idx}', headers={'Referer': url})
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, HTTPError) and e.cause.status == 412:
|
if isinstance(e.cause, HTTPError) and e.cause.status == 412:
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
|
'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
|
||||||
raise
|
raise
|
||||||
if response['code'] in (-352, -401):
|
status_code = response['code']
|
||||||
|
if status_code == -401:
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
f'Request is blocked by server ({-response["code"]}), '
|
'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
|
||||||
'please add cookies, wait and try later.', expected=True)
|
elif status_code == -352 and not self.is_logged_in:
|
||||||
|
self.raise_login_required('Request is rejected, you need to login to access playlist')
|
||||||
|
elif status_code != 0:
|
||||||
|
raise ExtractorError(f'Request failed ({status_code}): {response.get("message") or "Unknown error"}')
|
||||||
return response['data']
|
return response['data']
|
||||||
|
|
||||||
def get_metadata(page_data):
|
def get_metadata(page_data):
|
||||||
|
@ -1280,7 +1440,10 @@ class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
|
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.bilibili.com/watchlater/#/list',
|
'url': 'https://www.bilibili.com/watchlater/#/list',
|
||||||
'info_dict': {'id': 'watchlater'},
|
'info_dict': {
|
||||||
|
'id': r're:\d+',
|
||||||
|
'title': '稍后再看',
|
||||||
|
},
|
||||||
'playlist_mincount': 0,
|
'playlist_mincount': 0,
|
||||||
'skip': 'login required',
|
'skip': 'login required',
|
||||||
}]
|
}]
|
||||||
|
@ -1356,14 +1519,19 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
|
||||||
'skip': 'redirect url',
|
'skip': 'redirect url',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.bilibili.com/list/watchlater',
|
'url': 'https://www.bilibili.com/list/watchlater',
|
||||||
'info_dict': {'id': 'watchlater'},
|
'info_dict': {
|
||||||
|
'id': r're:2_\d+',
|
||||||
|
'title': '稍后再看',
|
||||||
|
'uploader': str,
|
||||||
|
'uploader_id': str,
|
||||||
|
},
|
||||||
'playlist_mincount': 0,
|
'playlist_mincount': 0,
|
||||||
'skip': 'login required',
|
'skip': 'login required',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.bilibili.com/medialist/play/watchlater',
|
'url': 'https://www.bilibili.com/medialist/play/watchlater',
|
||||||
'info_dict': {'id': 'watchlater'},
|
'info_dict': {'id': 'watchlater'},
|
||||||
'playlist_mincount': 0,
|
'playlist_mincount': 0,
|
||||||
'skip': 'login required',
|
'skip': 'redirect url & login required',
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_medialist(self, query, list_id):
|
def _extract_medialist(self, query, list_id):
|
||||||
|
@ -1414,7 +1582,7 @@ def _real_extract(self, url):
|
||||||
'title': ('title', {str}),
|
'title': ('title', {str}),
|
||||||
'uploader': ('upper', 'name', {str}),
|
'uploader': ('upper', 'name', {str}),
|
||||||
'uploader_id': ('upper', 'mid', {str_or_none}),
|
'uploader_id': ('upper', 'mid', {str_or_none}),
|
||||||
'timestamp': ('ctime', {int_or_none}),
|
'timestamp': ('ctime', {int_or_none}, {lambda x: x or None}),
|
||||||
'thumbnail': ('cover', {url_or_none}),
|
'thumbnail': ('cover', {url_or_none}),
|
||||||
})),
|
})),
|
||||||
}
|
}
|
||||||
|
@ -1808,7 +1976,8 @@ def _perform_login(self, username, password):
|
||||||
public_key = Cryptodome.RSA.importKey(key_data['key'])
|
public_key = Cryptodome.RSA.importKey(key_data['key'])
|
||||||
password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode())
|
password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode())
|
||||||
login_post = self._download_json(
|
login_post = self._download_json(
|
||||||
'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({
|
'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None,
|
||||||
|
data=urlencode_postdata({
|
||||||
'username': username,
|
'username': username,
|
||||||
'password': base64.b64encode(password_hash).decode('ascii'),
|
'password': base64.b64encode(password_hash).decode('ascii'),
|
||||||
'keep_me': 'true',
|
'keep_me': 'true',
|
||||||
|
@ -2140,7 +2309,8 @@ def _entries(self, series_id):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
series_id = self._match_id(url)
|
series_id = self._match_id(url)
|
||||||
series_info = self._call_api(f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
|
series_info = self._call_api(
|
||||||
|
f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
|
self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
|
||||||
categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),
|
categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),
|
||||||
|
|
Loading…
Reference in a new issue