mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-27 10:31:29 +00:00
[extractor/BilibiliSpace] Fix extractor, better error message (#5043)
Closes #5038 Authored by: lockmatrix
This commit is contained in:
parent
0d887f273a
commit
12f153a827
|
@ -4,6 +4,7 @@
|
||||||
import functools
|
import functools
|
||||||
import math
|
import math
|
||||||
import re
|
import re
|
||||||
|
import urllib
|
||||||
|
|
||||||
from .common import InfoExtractor, SearchInfoExtractor
|
from .common import InfoExtractor, SearchInfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
|
@ -508,11 +509,11 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
class BilibiliSpaceBaseIE(InfoExtractor):
|
class BilibiliSpaceBaseIE(InfoExtractor):
|
||||||
def _extract_playlist(self, fetch_page, get_metadata, get_entries):
|
def _extract_playlist(self, fetch_page, get_metadata, get_entries):
|
||||||
first_page = fetch_page(1)
|
first_page = fetch_page(0)
|
||||||
metadata = get_metadata(first_page)
|
metadata = get_metadata(first_page)
|
||||||
|
|
||||||
paged_list = InAdvancePagedList(
|
paged_list = InAdvancePagedList(
|
||||||
lambda idx: get_entries(fetch_page(idx) if idx > 1 else first_page),
|
lambda idx: get_entries(fetch_page(idx) if idx else first_page),
|
||||||
metadata['page_count'], metadata['page_size'])
|
metadata['page_count'], metadata['page_size'])
|
||||||
|
|
||||||
return metadata, paged_list
|
return metadata, paged_list
|
||||||
|
@ -535,10 +536,19 @@ def _real_extract(self, url):
|
||||||
'To download audios, add a "/audio" to the URL')
|
'To download audios, add a "/audio" to the URL')
|
||||||
|
|
||||||
def fetch_page(page_idx):
|
def fetch_page(page_idx):
|
||||||
return self._download_json(
|
try:
|
||||||
'https://api.bilibili.com/x/space/arc/search', playlist_id,
|
response = self._download_json('https://api.bilibili.com/x/space/arc/search',
|
||||||
note=f'Downloading page {page_idx}',
|
playlist_id, note=f'Downloading page {page_idx}',
|
||||||
query={'mid': playlist_id, 'pn': page_idx, 'jsonp': 'jsonp'})['data']
|
query={'mid': playlist_id, 'pn': page_idx + 1, 'jsonp': 'jsonp'})
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 412:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
|
||||||
|
raise
|
||||||
|
if response['code'] == -401:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
|
||||||
|
return response['data']
|
||||||
|
|
||||||
def get_metadata(page_data):
|
def get_metadata(page_data):
|
||||||
page_size = page_data['page']['ps']
|
page_size = page_data['page']['ps']
|
||||||
|
@ -573,7 +583,7 @@ def fetch_page(page_idx):
|
||||||
return self._download_json(
|
return self._download_json(
|
||||||
'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id,
|
'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id,
|
||||||
note=f'Downloading page {page_idx}',
|
note=f'Downloading page {page_idx}',
|
||||||
query={'uid': playlist_id, 'pn': page_idx, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'})['data']
|
query={'uid': playlist_id, 'pn': page_idx + 1, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'})['data']
|
||||||
|
|
||||||
def get_metadata(page_data):
|
def get_metadata(page_data):
|
||||||
return {
|
return {
|
||||||
|
@ -608,7 +618,7 @@ def fetch_page(page_idx):
|
||||||
return self._download_json(
|
return self._download_json(
|
||||||
'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
|
'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
|
||||||
playlist_id, note=f'Downloading page {page_idx}',
|
playlist_id, note=f'Downloading page {page_idx}',
|
||||||
query={'mid': mid, 'season_id': sid, 'page_num': page_idx, 'page_size': 30})['data']
|
query={'mid': mid, 'season_id': sid, 'page_num': page_idx + 1, 'page_size': 30})['data']
|
||||||
|
|
||||||
def get_metadata(page_data):
|
def get_metadata(page_data):
|
||||||
page_size = page_data['page']['page_size']
|
page_size = page_data['page']['page_size']
|
||||||
|
|
Loading…
Reference in a new issue