mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-01-07 08:31:17 +00:00
[soundcloud:user] Rework extractor (Closes #6399)
This commit is contained in:
parent
1f04873517
commit
80fb6d4aa4
|
@ -29,7 +29,7 @@ class SoundcloudIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)^(?:https?://)?
|
_VALID_URL = r'''(?x)^(?:https?://)?
|
||||||
(?:(?:(?:www\.|m\.)?soundcloud\.com/
|
(?:(?:(?:www\.|m\.)?soundcloud\.com/
|
||||||
(?P<uploader>[\w\d-]+)/
|
(?P<uploader>[\w\d-]+)/
|
||||||
(?!sets/|(?:likes|tracks)/?(?:$|[?#]))
|
(?!(?:tracks|sets|reposts|likes|spotlight)/?(?:$|[?#]))
|
||||||
(?P<title>[\w\d-]+)/?
|
(?P<title>[\w\d-]+)/?
|
||||||
(?P<token>[^?]+?)?(?:[?].*)?$)
|
(?P<token>[^?]+?)?(?:[?].*)?$)
|
||||||
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)
|
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)
|
||||||
|
@ -293,60 +293,131 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
|
|
||||||
class SoundcloudUserIE(SoundcloudIE):
|
class SoundcloudUserIE(SoundcloudIE):
|
||||||
_VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$'
|
_VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|sets|reposts|likes|spotlight)/?)?(\?.*)?$'
|
||||||
IE_NAME = 'soundcloud:user'
|
IE_NAME = 'soundcloud:user'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://soundcloud.com/the-concept-band',
|
'url': 'https://soundcloud.com/the-akashic-chronicler',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '9615865',
|
'id': '114582580',
|
||||||
'title': 'The Royal Concept',
|
'title': 'The Akashic Chronicler (All)',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 12
|
'playlist_mincount': 112,
|
||||||
}, {
|
|
||||||
'url': 'https://soundcloud.com/the-concept-band/likes',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '9615865',
|
|
||||||
'title': 'The Royal Concept',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 1,
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://soundcloud.com/the-akashic-chronicler/tracks',
|
'url': 'https://soundcloud.com/the-akashic-chronicler/tracks',
|
||||||
'only_matching': True,
|
'info_dict': {
|
||||||
|
'id': '114582580',
|
||||||
|
'title': 'The Akashic Chronicler (Tracks)',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 50,
|
||||||
|
}, {
|
||||||
|
'url': 'https://soundcloud.com/the-akashic-chronicler/sets',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '114582580',
|
||||||
|
'title': 'The Akashic Chronicler (Playlists)',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 3,
|
||||||
|
}, {
|
||||||
|
'url': 'https://soundcloud.com/the-akashic-chronicler/reposts',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '114582580',
|
||||||
|
'title': 'The Akashic Chronicler (Reposts)',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 9,
|
||||||
|
}, {
|
||||||
|
'url': 'https://soundcloud.com/the-akashic-chronicler/likes',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '114582580',
|
||||||
|
'title': 'The Akashic Chronicler (Likes)',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 333,
|
||||||
|
}, {
|
||||||
|
'url': 'https://soundcloud.com/grynpyret/spotlight',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '7098329',
|
||||||
|
'title': 'Grynpyret (Spotlight)',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 1,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
_API_BASE = 'https://api.soundcloud.com'
|
||||||
|
_API_V2_BASE = 'https://api-v2.soundcloud.com'
|
||||||
|
|
||||||
|
_BASE_URL_MAP = {
|
||||||
|
'all': '%s/profile/soundcloud:users:%%s' % _API_V2_BASE,
|
||||||
|
'tracks': '%s/users/%%s/tracks' % _API_BASE,
|
||||||
|
'sets': '%s/users/%%s/playlists' % _API_V2_BASE,
|
||||||
|
'reposts': '%s/profile/soundcloud:users:%%s/reposts' % _API_V2_BASE,
|
||||||
|
'likes': '%s/users/%%s/likes' % _API_V2_BASE,
|
||||||
|
'spotlight': '%s/users/%%s/spotlight' % _API_V2_BASE,
|
||||||
|
}
|
||||||
|
|
||||||
|
_TITLE_MAP = {
|
||||||
|
'all': 'All',
|
||||||
|
'tracks': 'Tracks',
|
||||||
|
'sets': 'Playlists',
|
||||||
|
'reposts': 'Reposts',
|
||||||
|
'likes': 'Likes',
|
||||||
|
'spotlight': 'Spotlight',
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
uploader = mobj.group('user')
|
uploader = mobj.group('user')
|
||||||
resource = mobj.group('rsrc')
|
|
||||||
if resource is None:
|
|
||||||
resource = 'tracks'
|
|
||||||
elif resource == 'likes':
|
|
||||||
resource = 'favorites'
|
|
||||||
|
|
||||||
url = 'http://soundcloud.com/%s/' % uploader
|
url = 'http://soundcloud.com/%s/' % uploader
|
||||||
resolv_url = self._resolv_url(url)
|
resolv_url = self._resolv_url(url)
|
||||||
user = self._download_json(
|
user = self._download_json(
|
||||||
resolv_url, uploader, 'Downloading user info')
|
resolv_url, uploader, 'Downloading user info')
|
||||||
base_url = 'http://api.soundcloud.com/users/%s/%s.json?' % (uploader, resource)
|
|
||||||
|
resource = mobj.group('rsrc') or 'all'
|
||||||
|
base_url = self._BASE_URL_MAP[resource] % user['id']
|
||||||
|
|
||||||
|
next_href = None
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
for i in itertools.count():
|
for i in itertools.count():
|
||||||
data = compat_urllib_parse.urlencode({
|
if not next_href:
|
||||||
'offset': i * 50,
|
data = compat_urllib_parse.urlencode({
|
||||||
'limit': 50,
|
'offset': i * 50,
|
||||||
'client_id': self._CLIENT_ID,
|
'limit': 50,
|
||||||
})
|
'client_id': self._CLIENT_ID,
|
||||||
new_entries = self._download_json(
|
'linked_partitioning': '1',
|
||||||
base_url + data, uploader, 'Downloading track page %s' % (i + 1))
|
'representation': 'speedy',
|
||||||
if len(new_entries) == 0:
|
})
|
||||||
|
next_href = base_url + '?' + data
|
||||||
|
|
||||||
|
response = self._download_json(
|
||||||
|
next_href, uploader, 'Downloading track page %s' % (i + 1))
|
||||||
|
|
||||||
|
collection = response['collection']
|
||||||
|
|
||||||
|
if not collection:
|
||||||
self.to_screen('%s: End page received' % uploader)
|
self.to_screen('%s: End page received' % uploader)
|
||||||
break
|
break
|
||||||
entries.extend(self.url_result(e['permalink_url'], 'Soundcloud') for e in new_entries)
|
|
||||||
|
def resolve_permalink_url(candidates):
|
||||||
|
for cand in candidates:
|
||||||
|
if isinstance(cand, dict):
|
||||||
|
permalink_url = cand.get('permalink_url')
|
||||||
|
if permalink_url and permalink_url.startswith('http'):
|
||||||
|
return permalink_url
|
||||||
|
|
||||||
|
for e in collection:
|
||||||
|
permalink_url = resolve_permalink_url((e, e.get('track'), e.get('playlist')))
|
||||||
|
if permalink_url:
|
||||||
|
entries.append(self.url_result(permalink_url))
|
||||||
|
|
||||||
|
if 'next_href' in response:
|
||||||
|
next_href = response['next_href']
|
||||||
|
if not next_href:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
next_href = None
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'playlist',
|
'_type': 'playlist',
|
||||||
'id': compat_str(user['id']),
|
'id': compat_str(user['id']),
|
||||||
'title': user['username'],
|
'title': '%s (%s)' % (user['username'], self._TITLE_MAP[resource]),
|
||||||
'entries': entries,
|
'entries': entries,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue