[bandcamp] Fix user URLs (#2800)

Authored by: lyz-code
This commit is contained in:
lyz-code 2022-02-16 15:56:17 +00:00 committed by GitHub
parent 07ea0014ae
commit 85a0ad0117
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 42 additions and 28 deletions

View file

@ -212,7 +212,7 @@ def _real_extract(self, url):
class BandcampAlbumIE(BandcampIE): class BandcampAlbumIE(BandcampIE):
IE_NAME = 'Bandcamp:album' IE_NAME = 'Bandcamp:album'
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?!/music)(?:/album/(?P<id>[^/?#&]+))?' _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com/album/(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1', 'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
@ -257,14 +257,6 @@ class BandcampAlbumIE(BandcampIE):
'id': 'hierophany-of-the-open-grave', 'id': 'hierophany-of-the-open-grave',
}, },
'playlist_mincount': 9, 'playlist_mincount': 9,
}, {
'url': 'http://dotscale.bandcamp.com',
'info_dict': {
'title': 'Loom',
'id': 'dotscale',
'uploader_id': 'dotscale',
},
'playlist_mincount': 7,
}, { }, {
# with escaped quote in title # with escaped quote in title
'url': 'https://jstrecords.bandcamp.com/album/entropy-ep', 'url': 'https://jstrecords.bandcamp.com/album/entropy-ep',
@ -391,41 +383,63 @@ def _real_extract(self, url):
} }
class BandcampMusicIE(InfoExtractor): class BandcampUserIE(InfoExtractor):
_VALID_URL = r'https?://(?P<id>[^/]+)\.bandcamp\.com/music' IE_NAME = 'Bandcamp:user'
_VALID_URL = r'https?://(?!www\.)(?P<id>[^.]+)\.bandcamp\.com(?:/music)?/?(?:[#?]|$)'
_TESTS = [{ _TESTS = [{
# Type 1 Bandcamp user page.
'url': 'https://adrianvonziegler.bandcamp.com',
'info_dict': {
'id': 'adrianvonziegler',
'title': 'Discography of adrianvonziegler',
},
'playlist_mincount': 23,
}, {
# Bandcamp user page with only one album
'url': 'http://dotscale.bandcamp.com',
'info_dict': {
'id': 'dotscale',
'title': 'Discography of dotscale'
},
'playlist_count': 1,
}, {
# Type 2 Bandcamp user page.
'url': 'https://nightcallofficial.bandcamp.com',
'info_dict': {
'id': 'nightcallofficial',
'title': 'Discography of nightcallofficial',
},
'playlist_count': 4,
}, {
'url': 'https://steviasphere.bandcamp.com/music', 'url': 'https://steviasphere.bandcamp.com/music',
'playlist_mincount': 47, 'playlist_mincount': 47,
'info_dict': { 'info_dict': {
'id': 'steviasphere', 'id': 'steviasphere',
'title': 'Discography of steviasphere',
}, },
}, { }, {
'url': 'https://coldworldofficial.bandcamp.com/music', 'url': 'https://coldworldofficial.bandcamp.com/music',
'playlist_mincount': 10, 'playlist_mincount': 10,
'info_dict': { 'info_dict': {
'id': 'coldworldofficial', 'id': 'coldworldofficial',
'title': 'Discography of coldworldofficial',
}, },
}, { }, {
'url': 'https://nuclearwarnowproductions.bandcamp.com/music', 'url': 'https://nuclearwarnowproductions.bandcamp.com/music',
'playlist_mincount': 399, 'playlist_mincount': 399,
'info_dict': { 'info_dict': {
'id': 'nuclearwarnowproductions', 'id': 'nuclearwarnowproductions',
'title': 'Discography of nuclearwarnowproductions',
}, },
} }]
]
_TYPE_IE_DICT = {
'album': BandcampAlbumIE.ie_key(),
'track': BandcampIE.ie_key()
}
def _real_extract(self, url): def _real_extract(self, url):
id = self._match_id(url) uploader = self._match_id(url)
webpage = self._download_webpage(url, id) webpage = self._download_webpage(url, uploader)
items = re.findall(r'href\=\"\/(?P<path>(?P<type>album|track)+/[^\"]+)', webpage)
entries = [ discography_data = (re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\']([^"\']+)', webpage)
self.url_result( or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage))
f'https://{id}.bandcamp.com/{item[0]}',
ie=self._TYPE_IE_DICT[item[1]]) return self.playlist_from_matches(
for item in items] discography_data, uploader, f'Discography of {uploader}', getter=lambda x: urljoin(url, x))
return self.playlist_result(entries, id)

View file

@ -118,7 +118,7 @@
BandcampIE, BandcampIE,
BandcampAlbumIE, BandcampAlbumIE,
BandcampWeeklyIE, BandcampWeeklyIE,
BandcampMusicIE, BandcampUserIE,
) )
from .bannedvideo import BannedVideoIE from .bannedvideo import BannedVideoIE
from .bbc import ( from .bbc import (