import base64 import functools import hashlib import itertools import json import math import re import time import urllib.parse from .common import InfoExtractor, SearchInfoExtractor from ..dependencies import Cryptodome from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, GeoRestrictedError, InAdvancePagedList, OnDemandPagedList, bool_or_none, clean_html, determine_ext, filter_dict, float_or_none, format_field, get_element_by_class, int_or_none, join_nonempty, make_archive_id, merge_dicts, mimetype2ext, parse_count, parse_qs, qualities, smuggle_url, srt_subtitles_timecode, str_or_none, traverse_obj, try_call, unified_timestamp, unsmuggle_url, url_or_none, urlencode_postdata, variadic, ) class BilibiliBaseIE(InfoExtractor): _FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?') def extract_formats(self, play_info): format_names = { r['quality']: traverse_obj(r, 'new_description', 'display_desc') for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality'])) } audios = traverse_obj(play_info, ('dash', (None, 'dolby'), 'audio', ..., {dict})) flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio')) if flac_audio: audios.append(flac_audio) formats = [{ 'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'), 'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')), 'acodec': traverse_obj(audio, ('codecs', {str.lower})), 'vcodec': 'none', 'tbr': float_or_none(audio.get('bandwidth'), scale=1000), 'filesize': int_or_none(audio.get('size')), 'format_id': str_or_none(audio.get('id')), } for audio in audios] formats.extend({ 'url': traverse_obj(video, 'baseUrl', 'base_url', 'url'), 'ext': mimetype2ext(traverse_obj(video, 'mimeType', 'mime_type')), 'fps': float_or_none(traverse_obj(video, 'frameRate', 'frame_rate')), 'width': int_or_none(video.get('width')), 'height': int_or_none(video.get('height')), 'vcodec': video.get('codecs'), 'acodec': 'none' if audios else None, 'dynamic_range': {126: 'DV', 125: 'HDR10'}.get(int_or_none(video.get('id'))), 'tbr': float_or_none(video.get('bandwidth'), scale=1000), 'filesize': int_or_none(video.get('size')), 'quality': int_or_none(video.get('id')), 'format_id': traverse_obj( video, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}, 1), ('id', {str_or_none}), get_all=False), 'format': format_names.get(video.get('id')), } for video in traverse_obj(play_info, ('dash', 'video', ...))) missing_formats = format_names.keys() - set(traverse_obj(formats, (..., 'quality'))) if missing_formats: self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; ' f'you have to login or become premium member to download them. {self._login_hint()}') return formats def _download_playinfo(self, video_id, cid): return self._download_json( 'https://api.bilibili.com/x/player/playurl', video_id, query={'bvid': video_id, 'cid': cid, 'fnval': 4048}, note=f'Downloading video formats for cid {cid}')['data'] def json2srt(self, json_data): srt_data = '' for idx, line in enumerate(json_data.get('body') or []): srt_data += (f'{idx + 1}\n' f'{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n' f'{line["content"]}\n\n') return srt_data def _get_subtitles(self, video_id, cid, aid=None): subtitles = { 'danmaku': [{ 'ext': 'xml', 'url': f'https://comment.bilibili.com/{cid}.xml', }] } subtitle_info = traverse_obj(self._download_json( 'https://api.bilibili.com/x/player/v2', video_id, query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid}, note=f'Extracting subtitle info {cid}'), ('data', 'subtitle')) subs_list = traverse_obj(subtitle_info, ('subtitles', lambda _, v: v['subtitle_url'] and v['lan'])) if not subs_list and traverse_obj(subtitle_info, 'allow_submit'): if not self._get_cookies('https://api.bilibili.com').get('SESSDATA'): # no login session cookie self.report_warning(f'CC subtitles (if any) are only visible when logged in. {self._login_hint()}', only_once=True) for s in subs_list: subtitles.setdefault(s['lan'], []).append({ 'ext': 'srt', 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)) }) return subtitles def _get_chapters(self, aid, cid): chapters = aid and cid and self._download_json( 'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid}, note='Extracting chapters', fatal=False) return traverse_obj(chapters, ('data', 'view_points', ..., { 'title': 'content', 'start_time': 'from', 'end_time': 'to', })) or None def _get_comments(self, aid): for idx in itertools.count(1): replies = traverse_obj( self._download_json( f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={aid}&type=1&jsonp=jsonp&sort=2&_=1567227301685', aid, note=f'Extracting comments from page {idx}', fatal=False), ('data', 'replies')) if not replies: return for children in map(self._get_all_children, replies): yield from children def _get_all_children(self, reply): yield { 'author': traverse_obj(reply, ('member', 'uname')), 'author_id': traverse_obj(reply, ('member', 'mid')), 'id': reply.get('rpid'), 'text': traverse_obj(reply, ('content', 'message')), 'timestamp': reply.get('ctime'), 'parent': reply.get('parent') or 'root', } for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))): yield from children def _get_episodes_from_season(self, ss_id, url): season_info = self._download_json( 'https://api.bilibili.com/pgc/web/season/section', ss_id, note='Downloading season info', query={'season_id': ss_id}, headers={'Referer': url, **self.geo_verification_headers()}) for entry in traverse_obj(season_info, ( 'result', 'main_section', 'episodes', lambda _, v: url_or_none(v['share_url']) and v['id'])): yield self.url_result(entry['share_url'], BiliBiliBangumiIE, str_or_none(entry.get('id'))) def _get_divisions(self, video_id, graph_version, edges, edge_id, cid_edges=None): cid_edges = cid_edges or {} division_data = self._download_json( 'https://api.bilibili.com/x/stein/edgeinfo_v2', video_id, query={'graph_version': graph_version, 'edge_id': edge_id, 'bvid': video_id}, note=f'Extracting divisions from edge {edge_id}') edges.setdefault(edge_id, {}).update( traverse_obj(division_data, ('data', 'story_list', lambda _, v: v['edge_id'] == edge_id, { 'title': ('title', {str}), 'cid': ('cid', {int_or_none}), }), get_all=False)) edges[edge_id].update(traverse_obj(division_data, ('data', { 'title': ('title', {str}), 'choices': ('edges', 'questions', ..., 'choices', ..., { 'edge_id': ('id', {int_or_none}), 'cid': ('cid', {int_or_none}), 'text': ('option', {str}), }), }))) # use dict to combine edges that use the same video section (same cid) cid_edges.setdefault(edges[edge_id]['cid'], {})[edge_id] = edges[edge_id] for choice in traverse_obj(edges, (edge_id, 'choices', ...)): if choice['edge_id'] not in edges: edges[choice['edge_id']] = {'cid': choice['cid']} self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges) return cid_edges def _get_interactive_entries(self, video_id, cid, metainfo): graph_version = traverse_obj( self._download_json( 'https://api.bilibili.com/x/player/wbi/v2', video_id, 'Extracting graph version', query={'bvid': video_id, 'cid': cid}), ('data', 'interaction', 'graph_version', {int_or_none})) cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1) for cid, edges in cid_edges.items(): play_info = self._download_playinfo(video_id, cid) yield { **metainfo, 'id': f'{video_id}_{cid}', 'title': f'{metainfo.get("title")} - {list(edges.values())[0].get("title")}', 'formats': self.extract_formats(play_info), 'description': f'{json.dumps(edges, ensure_ascii=False)}\n{metainfo.get("description", "")}', 'duration': float_or_none(play_info.get('timelength'), scale=1000), 'subtitles': self.extract_subtitles(video_id, cid), } class BiliBiliIE(BilibiliBaseIE): _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P[^/?#&]+)' _TESTS = [{ 'url': 'https://www.bilibili.com/video/BV13x41117TL', 'info_dict': { 'id': 'BV13x41117TL', 'title': '阿滴英文|英文歌分享#6 "Closer', 'ext': 'mp4', 'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文', 'uploader_id': '65880958', 'uploader': '阿滴英文', 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', 'duration': 554.117, 'tags': list, 'comment_count': int, 'upload_date': '20170301', 'timestamp': 1488353834, 'like_count': int, 'view_count': int, }, }, { 'note': 'old av URL version', 'url': 'http://www.bilibili.com/video/av1074402/', 'info_dict': { 'thumbnail': r're:^https?://.*\.(jpg|jpeg)$', 'ext': 'mp4', 'uploader': '菊子桑', 'uploader_id': '156160', 'id': 'BV11x411K7CN', 'title': '【金坷垃】金泡沫', 'duration': 308.36, 'upload_date': '20140420', 'timestamp': 1397983878, 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923', 'like_count': int, 'comment_count': int, 'view_count': int, 'tags': list, }, 'params': {'skip_download': True}, }, { 'note': 'Anthology', 'url': 'https://www.bilibili.com/video/BV1bK411W797', 'info_dict': { 'id': 'BV1bK411W797', 'title': '物语中的人物是如何吐槽自己的OP的' }, 'playlist_count': 18, 'playlist': [{ 'info_dict': { 'id': 'BV1bK411W797_p1', 'ext': 'mp4', 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川', 'tags': 'count:10', 'timestamp': 1589601697, 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', 'uploader': '打牌还是打桩', 'uploader_id': '150259984', 'like_count': int, 'comment_count': int, 'upload_date': '20200516', 'view_count': int, 'description': 'md5:e3c401cf7bc363118d1783dd74068a68', 'duration': 90.314, } }] }, { 'note': 'Specific page of Anthology', 'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1', 'info_dict': { 'id': 'BV1bK411W797_p1', 'ext': 'mp4', 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川', 'tags': 'count:10', 'timestamp': 1589601697, 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', 'uploader': '打牌还是打桩', 'uploader_id': '150259984', 'like_count': int, 'comment_count': int, 'upload_date': '20200516', 'view_count': int, 'description': 'md5:e3c401cf7bc363118d1783dd74068a68', 'duration': 90.314, } }, { 'note': 'video has subtitles', 'url': 'https://www.bilibili.com/video/BV12N4y1M7rh', 'info_dict': { 'id': 'BV12N4y1M7rh', 'ext': 'mp4', 'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1', 'tags': list, 'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4', 'duration': 313.557, 'upload_date': '20220709', 'uploader': '小夫太渴', 'timestamp': 1657347907, 'uploader_id': '1326814124', 'comment_count': int, 'view_count': int, 'like_count': int, 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', 'subtitles': 'count:2' }, 'params': {'listsubtitles': True}, }, { 'url': 'https://www.bilibili.com/video/av8903802/', 'info_dict': { 'id': 'BV13x41117TL', 'ext': 'mp4', 'title': '阿滴英文|英文歌分享#6 "Closer', 'upload_date': '20170301', 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a', 'timestamp': 1488353834, 'uploader_id': '65880958', 'uploader': '阿滴英文', 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', 'duration': 554.117, 'tags': list, 'comment_count': int, 'view_count': int, 'like_count': int, }, 'params': { 'skip_download': True, }, }, { 'note': 'video has chapter', 'url': 'https://www.bilibili.com/video/BV1vL411G7N7/', 'info_dict': { 'id': 'BV1vL411G7N7', 'ext': 'mp4', 'title': '如何为你的B站视频添加进度条分段', 'timestamp': 1634554558, 'upload_date': '20211018', 'description': 'md5:a9a3d6702b3a94518d419b2e9c320a6d', 'tags': list, 'uploader': '爱喝咖啡的当麻', 'duration': 669.482, 'uploader_id': '1680903', 'chapters': 'count:6', 'comment_count': int, 'view_count': int, 'like_count': int, 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, 'params': {'skip_download': True}, }, { 'note': 'video redirects to festival page', 'url': 'https://www.bilibili.com/video/BV1wP4y1P72h', 'info_dict': { 'id': 'BV1wP4y1P72h', 'ext': 'mp4', 'title': '牛虎年相交之际,一首传统民族打击乐《牛斗虎》祝大家新春快乐,虎年大吉!【bilibili音乐虎闹新春】', 'timestamp': 1643947497, 'upload_date': '20220204', 'description': 'md5:8681a0d4d2c06b4ae27e59c8080a7fe6', 'uploader': '叨叨冯聊音乐', 'duration': 246.719, 'uploader_id': '528182630', 'view_count': int, 'like_count': int, 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, 'params': {'skip_download': True}, }, { 'note': 'newer festival video', 'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f', 'info_dict': { 'id': 'BV1ay4y1d77f', 'ext': 'mp4', 'title': '【崩坏3新春剧场】为特别的你送上祝福!', 'timestamp': 1674273600, 'upload_date': '20230121', 'description': 'md5:58af66d15c6a0122dc30c8adfd828dd8', 'uploader': '果蝇轰', 'duration': 1111.722, 'uploader_id': '8469526', 'view_count': int, 'like_count': int, 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, 'params': {'skip_download': True}, }, { 'note': 'interactive/split-path video', 'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/', 'info_dict': { 'id': 'BV1af4y1H7ga', 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!!', 'timestamp': 1630500414, 'upload_date': '20210901', 'description': 'md5:01113e39ab06e28042d74ac356a08786', 'tags': list, 'uploader': '钉宫妮妮Ninico', 'duration': 1503, 'uploader_id': '8881297', 'comment_count': int, 'view_count': int, 'like_count': int, 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, 'playlist_count': 33, 'playlist': [{ 'info_dict': { 'id': 'BV1af4y1H7ga_400950101', 'ext': 'mp4', 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!! - 听见猫猫叫~', 'timestamp': 1630500414, 'upload_date': '20210901', 'description': 'md5:db66ac7a2813a94b8291dbce990cc5b2', 'tags': list, 'uploader': '钉宫妮妮Ninico', 'duration': 11.605, 'uploader_id': '8881297', 'comment_count': int, 'view_count': int, 'like_count': int, 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, }], }, { 'note': '301 redirect to bangumi link', 'url': 'https://www.bilibili.com/video/BV1TE411f7f1', 'info_dict': { 'id': '288525', 'title': '李永乐老师 钱学森弹道和乘波体飞行器是什么?', 'ext': 'mp4', 'series': '我和我的祖国', 'series_id': '4780', 'season': '幕后纪实', 'season_id': '28609', 'season_number': 1, 'episode': '钱学森弹道和乘波体飞行器是什么?', 'episode_id': '288525', 'episode_number': 105, 'duration': 1183.957, 'timestamp': 1571648124, 'upload_date': '20191021', 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, }, { 'url': 'https://www.bilibili.com/video/BV1jL41167ZG/', 'info_dict': { 'id': 'BV1jL41167ZG', 'title': '一场大火引发的离奇死亡!古典推理经典短篇集《不可能犯罪诊断书》!', 'ext': 'mp4', }, 'skip': 'supporter-only video', }, { 'url': 'https://www.bilibili.com/video/BV1Ks411f7aQ/', 'info_dict': { 'id': 'BV1Ks411f7aQ', 'title': '【BD1080P】狼与香辛料I【华盟】', 'ext': 'mp4', }, 'skip': 'login required', }, { 'url': 'https://www.bilibili.com/video/BV1GJ411x7h7/', 'info_dict': { 'id': 'BV1GJ411x7h7', 'title': '【官方 MV】Never Gonna Give You Up - Rick Astley', 'ext': 'mp4', }, 'skip': 'geo-restricted', }] def _real_extract(self, url): video_id = self._match_id(url) webpage, urlh = self._download_webpage_handle(url, video_id) if not self._match_valid_url(urlh.url): return self.url_result(urlh.url) initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id) is_festival = 'videoData' not in initial_state if is_festival: video_data = initial_state['videoInfo'] else: play_info_obj = self._search_json( r'window\.__playinfo__\s*=', webpage, 'play info', video_id, fatal=False) if not play_info_obj: if traverse_obj(initial_state, ('error', 'trueCode')) == -403: self.raise_login_required() if traverse_obj(initial_state, ('error', 'trueCode')) == -404: raise ExtractorError( 'This video may be deleted or geo-restricted. ' 'You might want to try a VPN or a proxy server (with --proxy)', expected=True) play_info = traverse_obj(play_info_obj, ('data', {dict})) if not play_info: if traverse_obj(play_info_obj, 'code') == 87007: toast = get_element_by_class('tips-toast', webpage) or '' msg = clean_html( f'{get_element_by_class("belongs-to", toast) or ""},' + (get_element_by_class('level', toast) or '')) raise ExtractorError( f'This is a supporter-only video: {msg}. {self._login_hint()}', expected=True) raise ExtractorError('Failed to extract play info') video_data = initial_state['videoData'] video_id, title = video_data['bvid'], video_data.get('title') # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself. page_list_json = not is_festival and traverse_obj( self._download_json( 'https://api.bilibili.com/x/player/pagelist', video_id, fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'}, note='Extracting videos in anthology'), 'data', expected_type=list) or [] is_anthology = len(page_list_json) > 1 part_id = int_or_none(parse_qs(url).get('p', [None])[-1]) if is_anthology and not part_id and self._yes_playlist(video_id, video_id): return self.playlist_from_matches( page_list_json, video_id, title, ie=BiliBiliIE, getter=lambda entry: f'https://www.bilibili.com/video/{video_id}?p={entry["page"]}') if is_anthology: part_id = part_id or 1 title += f' p{part_id:02d} {traverse_obj(page_list_json, (part_id - 1, "part")) or ""}' aid = video_data.get('aid') old_video_id = format_field(aid, None, f'%s_part{part_id or 1}') cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid') festival_info = {} if is_festival: play_info = self._download_playinfo(video_id, cid) festival_info = traverse_obj(initial_state, { 'uploader': ('videoInfo', 'upName'), 'uploader_id': ('videoInfo', 'upMid', {str_or_none}), 'like_count': ('videoStatus', 'like', {int_or_none}), 'thumbnail': ('sectionEpisodes', lambda _, v: v['bvid'] == video_id, 'cover'), }, get_all=False) metainfo = { **traverse_obj(initial_state, { 'uploader': ('upData', 'name'), 'uploader_id': ('upData', 'mid', {str_or_none}), 'like_count': ('videoData', 'stat', 'like', {int_or_none}), 'tags': ('tags', ..., 'tag_name'), 'thumbnail': ('videoData', 'pic', {url_or_none}), }), **festival_info, **traverse_obj(video_data, { 'description': 'desc', 'timestamp': ('pubdate', {int_or_none}), 'view_count': (('viewCount', ('stat', 'view')), {int_or_none}), 'comment_count': ('stat', 'reply', {int_or_none}), }, get_all=False), 'id': f'{video_id}{format_field(part_id, None, "_p%d")}', '_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None, 'title': title, 'http_headers': {'Referer': url}, } is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate')) if is_interactive: return self.playlist_result( self._get_interactive_entries(video_id, cid, metainfo), **metainfo, **{ 'duration': traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})), '__post_extractor': self.extract_comments(aid), }) else: return { **metainfo, 'duration': float_or_none(play_info.get('timelength'), scale=1000), 'chapters': self._get_chapters(aid, cid), 'subtitles': self.extract_subtitles(video_id, cid), 'formats': self.extract_formats(play_info), '__post_extractor': self.extract_comments(aid), } class BiliBiliBangumiIE(BilibiliBaseIE): _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/ep(?P\d+)' _TESTS = [{ 'url': 'https://www.bilibili.com/bangumi/play/ep21495/', 'info_dict': { 'id': '21495', 'ext': 'mp4', 'series': '悠久之翼', 'series_id': '774', 'season': '第二季', 'season_id': '1182', 'season_number': 2, 'episode': 'forever/ef', 'episode_id': '21495', 'episode_number': 12, 'title': '12 forever/ef', 'duration': 1420.791, 'timestamp': 1320412200, 'upload_date': '20111104', 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, }, { 'url': 'https://www.bilibili.com/bangumi/play/ep267851', 'info_dict': { 'id': '267851', 'ext': 'mp4', 'series': '鬼灭之刃', 'series_id': '4358', 'season': '立志篇', 'season_id': '26801', 'season_number': 1, 'episode': '残酷', 'episode_id': '267851', 'episode_number': 1, 'title': '1 残酷', 'duration': 1425.256, 'timestamp': 1554566400, 'upload_date': '20190406', 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$' }, 'skip': 'Geo-restricted', }, { 'note': 'a making-of which falls outside main section', 'url': 'https://www.bilibili.com/bangumi/play/ep345120', 'info_dict': { 'id': '345120', 'ext': 'mp4', 'series': '鬼灭之刃', 'series_id': '4358', 'season': '立志篇', 'season_id': '26801', 'season_number': 1, 'episode': '炭治郎篇', 'episode_id': '345120', 'episode_number': 27, 'title': '#1 炭治郎篇', 'duration': 1922.129, 'timestamp': 1602853860, 'upload_date': '20201016', 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$' }, }] def _real_extract(self, url): episode_id = self._match_id(url) webpage = self._download_webpage(url, episode_id) if '您所在的地区无法观看本片' in webpage: raise GeoRestrictedError('This video is restricted') elif '正在观看预览,大会员免费看全片' in webpage: self.raise_login_required('This video is for premium members only') headers = {'Referer': url, **self.geo_verification_headers()} play_info = self._download_json( 'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id, 'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id}, headers=headers) premium_only = play_info.get('code') == -10403 play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {} formats = self.extract_formats(play_info) if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage): self.raise_login_required('This video is for premium members only') bangumi_info = self._download_json( 'https://api.bilibili.com/pgc/view/web/season', episode_id, 'Get episode details', query={'ep_id': episode_id}, headers=headers)['result'] episode_number, episode_info = next(( (idx, ep) for idx, ep in enumerate(traverse_obj( bangumi_info, (('episodes', ('section', ..., 'episodes')), ..., {dict})), 1) if str_or_none(ep.get('id')) == episode_id), (1, {})) season_id = bangumi_info.get('season_id') season_number, season_title = season_id and next(( (idx + 1, e.get('season_title')) for idx, e in enumerate( traverse_obj(bangumi_info, ('seasons', ...))) if e.get('season_id') == season_id ), (None, None)) aid = episode_info.get('aid') return { 'id': episode_id, 'formats': formats, **traverse_obj(bangumi_info, { 'series': ('series', 'series_title', {str}), 'series_id': ('series', 'series_id', {str_or_none}), 'thumbnail': ('square_cover', {url_or_none}), }), **traverse_obj(episode_info, { 'episode': ('long_title', {str}), 'episode_number': ('title', {int_or_none}, {lambda x: x or episode_number}), 'timestamp': ('pub_time', {int_or_none}), 'title': {lambda v: v and join_nonempty('title', 'long_title', delim=' ', from_dict=v)}, }), 'episode_id': episode_id, 'season': str_or_none(season_title), 'season_id': str_or_none(season_id), 'season_number': season_number, 'duration': float_or_none(play_info.get('timelength'), scale=1000), 'subtitles': self.extract_subtitles(episode_id, episode_info.get('cid'), aid=aid), '__post_extractor': self.extract_comments(aid), 'http_headers': headers, } class BiliBiliBangumiMediaIE(BilibiliBaseIE): _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P\d+)' _TESTS = [{ 'url': 'https://www.bilibili.com/bangumi/media/md24097891', 'info_dict': { 'id': '24097891', 'title': 'CAROLE & TUESDAY', 'description': 'md5:42417ad33d1eaa1c93bfd2dd1626b829', }, 'playlist_mincount': 25, }, { 'url': 'https://www.bilibili.com/bangumi/media/md1565/', 'info_dict': { 'id': '1565', 'title': '攻壳机动队 S.A.C. 2nd GIG', 'description': 'md5:46cac00bafd645b97f4d6df616fc576d', }, 'playlist_count': 26, 'playlist': [{ 'info_dict': { 'id': '68540', 'ext': 'mp4', 'series': '攻壳机动队', 'series_id': '1077', 'season': '第二季', 'season_id': '1565', 'season_number': 2, 'episode': '再启动 REEMBODY', 'episode_id': '68540', 'episode_number': 1, 'title': '1 再启动 REEMBODY', 'duration': 1525.777, 'timestamp': 1425074413, 'upload_date': '20150227', 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$' }, }], }] def _real_extract(self, url): media_id = self._match_id(url) webpage = self._download_webpage(url, media_id) initial_state = self._search_json( r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id) ss_id = initial_state['mediaInfo']['season_id'] return self.playlist_result( self._get_episodes_from_season(ss_id, url), media_id, **traverse_obj(initial_state, ('mediaInfo', { 'title': ('title', {str}), 'description': ('evaluate', {str}), }))) class BiliBiliBangumiSeasonIE(BilibiliBaseIE): _VALID_URL = r'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P\d+)' _TESTS = [{ 'url': 'https://www.bilibili.com/bangumi/play/ss26801', 'info_dict': { 'id': '26801', 'title': '鬼灭之刃', 'description': 'md5:e2cc9848b6f69be6db79fc2a82d9661b', }, 'playlist_mincount': 26 }, { 'url': 'https://www.bilibili.com/bangumi/play/ss2251', 'info_dict': { 'id': '2251', 'title': '玲音', 'description': 'md5:1fd40e3df4c08d4d9d89a6a34844bdc4', }, 'playlist_count': 13, 'playlist': [{ 'info_dict': { 'id': '50188', 'ext': 'mp4', 'series': '玲音', 'series_id': '1526', 'season': 'TV', 'season_id': '2251', 'season_number': 1, 'episode': 'WEIRD', 'episode_id': '50188', 'episode_number': 1, 'title': '1 WEIRD', 'duration': 1436.992, 'timestamp': 1343185080, 'upload_date': '20120725', 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$' }, }], }] def _real_extract(self, url): ss_id = self._match_id(url) webpage = self._download_webpage(url, ss_id) metainfo = traverse_obj( self._search_json(r']+type="application/ld\+json"[^>]*>', webpage, 'info', ss_id), ('itemListElement', ..., { 'title': ('name', {str}), 'description': ('description', {str}), }), get_all=False) return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id, **metainfo) class BilibiliCheeseBaseIE(BilibiliBaseIE): _HEADERS = {'Referer': 'https://www.bilibili.com/'} def _extract_episode(self, season_info, ep_id): episode_info = traverse_obj(season_info, ( 'episodes', lambda _, v: v['id'] == int(ep_id)), get_all=False) aid, cid = episode_info['aid'], episode_info['cid'] if traverse_obj(episode_info, 'ep_status') == -1: raise ExtractorError('This course episode is not yet available.', expected=True) if not traverse_obj(episode_info, 'playable'): self.raise_login_required('You need to purchase the course to download this episode') play_info = self._download_json( 'https://api.bilibili.com/pugv/player/web/playurl', ep_id, query={'avid': aid, 'cid': cid, 'ep_id': ep_id, 'fnval': 16, 'fourk': 1}, headers=self._HEADERS, note='Downloading playinfo')['data'] return { 'id': str_or_none(ep_id), 'episode_id': str_or_none(ep_id), 'formats': self.extract_formats(play_info), 'extractor_key': BilibiliCheeseIE.ie_key(), 'extractor': BilibiliCheeseIE.IE_NAME, 'webpage_url': f'https://www.bilibili.com/cheese/play/ep{ep_id}', **traverse_obj(episode_info, { 'episode': ('title', {str}), 'title': {lambda v: v and join_nonempty('index', 'title', delim=' - ', from_dict=v)}, 'alt_title': ('subtitle', {str}), 'duration': ('duration', {int_or_none}), 'episode_number': ('index', {int_or_none}), 'thumbnail': ('cover', {url_or_none}), 'timestamp': ('release_date', {int_or_none}), 'view_count': ('play', {int_or_none}), }), **traverse_obj(season_info, { 'uploader': ('up_info', 'uname', {str}), 'uploader_id': ('up_info', 'mid', {str_or_none}), }), 'subtitles': self.extract_subtitles(ep_id, cid, aid=aid), '__post_extractor': self.extract_comments(aid), 'http_headers': self._HEADERS, } def _download_season_info(self, query_key, video_id): return self._download_json( f'https://api.bilibili.com/pugv/view/web/season?{query_key}={video_id}', video_id, headers=self._HEADERS, note='Downloading season info')['data'] class BilibiliCheeseIE(BilibiliCheeseBaseIE): _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ep(?P\d+)' _TESTS = [{ 'url': 'https://www.bilibili.com/cheese/play/ep229832', 'info_dict': { 'id': '229832', 'ext': 'mp4', 'title': '1 - 课程先导片', 'alt_title': '视频课 · 3分41秒', 'uploader': '马督工', 'uploader_id': '316568752', 'episode': '课程先导片', 'episode_id': '229832', 'episode_number': 1, 'duration': 221, 'timestamp': 1695549606, 'upload_date': '20230924', 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', 'view_count': int, } }] def _real_extract(self, url): ep_id = self._match_id(url) return self._extract_episode(self._download_season_info('ep_id', ep_id), ep_id) class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE): _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ss(?P\d+)' _TESTS = [{ 'url': 'https://www.bilibili.com/cheese/play/ss5918', 'info_dict': { 'id': '5918', 'title': '【限时五折】新闻系学不到:马督工教你做自媒体', 'description': '帮普通人建立世界模型,降低人与人的沟通门槛', }, 'playlist': [{ 'info_dict': { 'id': '229832', 'ext': 'mp4', 'title': '1 - 课程先导片', 'alt_title': '视频课 · 3分41秒', 'uploader': '马督工', 'uploader_id': '316568752', 'episode': '课程先导片', 'episode_id': '229832', 'episode_number': 1, 'duration': 221, 'timestamp': 1695549606, 'upload_date': '20230924', 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', 'view_count': int, } }], 'params': {'playlist_items': '1'}, }, { 'url': 'https://www.bilibili.com/cheese/play/ss5918', 'info_dict': { 'id': '5918', 'title': '【限时五折】新闻系学不到:马督工教你做自媒体', 'description': '帮普通人建立世界模型,降低人与人的沟通门槛', }, 'playlist_mincount': 5, 'skip': 'paid video in list', }] def _get_cheese_entries(self, season_info): for ep_id in traverse_obj(season_info, ('episodes', lambda _, v: v['episode_can_view'], 'id')): yield self._extract_episode(season_info, ep_id) def _real_extract(self, url): season_id = self._match_id(url) season_info = self._download_season_info('season_id', season_id) return self.playlist_result( self._get_cheese_entries(season_info), season_id, **traverse_obj(season_info, { 'title': ('title', {str}), 'description': ('subtitle', {str}), })) class BilibiliSpaceBaseIE(InfoExtractor): def _extract_playlist(self, fetch_page, get_metadata, get_entries): first_page = fetch_page(0) metadata = get_metadata(first_page) paged_list = InAdvancePagedList( lambda idx: get_entries(fetch_page(idx) if idx else first_page), metadata['page_count'], metadata['page_size']) return metadata, paged_list class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE): _VALID_URL = r'https?://space\.bilibili\.com/(?P\d+)(?P