[extractor/BilibiliSpace] Fix extractor, better error message (#5043)

Closes #5038
Authored by: lockmatrix
This commit is contained in:
Locke 2022-10-04 10:59:05 +08:00 committed by GitHub
parent 0d887f273a
commit 12f153a827
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -4,6 +4,7 @@
import functools import functools
import math import math
import re import re
import urllib
from .common import InfoExtractor, SearchInfoExtractor from .common import InfoExtractor, SearchInfoExtractor
from ..compat import ( from ..compat import (
@ -508,11 +509,11 @@ def _real_extract(self, url):
class BilibiliSpaceBaseIE(InfoExtractor): class BilibiliSpaceBaseIE(InfoExtractor):
def _extract_playlist(self, fetch_page, get_metadata, get_entries): def _extract_playlist(self, fetch_page, get_metadata, get_entries):
first_page = fetch_page(1) first_page = fetch_page(0)
metadata = get_metadata(first_page) metadata = get_metadata(first_page)
paged_list = InAdvancePagedList( paged_list = InAdvancePagedList(
lambda idx: get_entries(fetch_page(idx) if idx > 1 else first_page), lambda idx: get_entries(fetch_page(idx) if idx else first_page),
metadata['page_count'], metadata['page_size']) metadata['page_count'], metadata['page_size'])
return metadata, paged_list return metadata, paged_list
@ -535,10 +536,19 @@ def _real_extract(self, url):
'To download audios, add a "/audio" to the URL') 'To download audios, add a "/audio" to the URL')
def fetch_page(page_idx): def fetch_page(page_idx):
return self._download_json( try:
'https://api.bilibili.com/x/space/arc/search', playlist_id, response = self._download_json('https://api.bilibili.com/x/space/arc/search',
note=f'Downloading page {page_idx}', playlist_id, note=f'Downloading page {page_idx}',
query={'mid': playlist_id, 'pn': page_idx, 'jsonp': 'jsonp'})['data'] query={'mid': playlist_id, 'pn': page_idx + 1, 'jsonp': 'jsonp'})
except ExtractorError as e:
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 412:
raise ExtractorError(
'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
raise
if response['code'] == -401:
raise ExtractorError(
'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
return response['data']
def get_metadata(page_data): def get_metadata(page_data):
page_size = page_data['page']['ps'] page_size = page_data['page']['ps']
@ -573,7 +583,7 @@ def fetch_page(page_idx):
return self._download_json( return self._download_json(
'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id, 'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id,
note=f'Downloading page {page_idx}', note=f'Downloading page {page_idx}',
query={'uid': playlist_id, 'pn': page_idx, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'})['data'] query={'uid': playlist_id, 'pn': page_idx + 1, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'})['data']
def get_metadata(page_data): def get_metadata(page_data):
return { return {
@ -608,7 +618,7 @@ def fetch_page(page_idx):
return self._download_json( return self._download_json(
'https://api.bilibili.com/x/polymer/space/seasons_archives_list', 'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
playlist_id, note=f'Downloading page {page_idx}', playlist_id, note=f'Downloading page {page_idx}',
query={'mid': mid, 'season_id': sid, 'page_num': page_idx, 'page_size': 30})['data'] query={'mid': mid, 'season_id': sid, 'page_num': page_idx + 1, 'page_size': 30})['data']
def get_metadata(page_data): def get_metadata(page_data):
page_size = page_data['page']['page_size'] page_size = page_data['page']['page_size']