[soundcloud] Restore previews extraction (closes #23739)

This commit is contained in:
Sergey M․ 2020-01-15 04:13:10 +07:00
parent e8cf0dbdd8
commit e4e5fa6e3c
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D

View file

@ -96,7 +96,7 @@ class SoundcloudIE(InfoExtractor):
'repost_count': int, 'repost_count': int,
} }
}, },
# not streamable song # not streamable song, preview
{ {
'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep', 'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
'info_dict': { 'info_dict': {
@ -119,7 +119,6 @@ class SoundcloudIE(InfoExtractor):
# rtmp # rtmp
'skip_download': True, 'skip_download': True,
}, },
'skip': 'Preview',
}, },
# private link # private link
{ {
@ -346,9 +345,9 @@ def _extract_info_dict(self, info, full_title=None, secret_token=None, version=2
}) })
def invalid_url(url): def invalid_url(url):
return not url or url in format_urls or re.search(r'/(?:preview|playlist)/0/30/', url) return not url or url in format_urls
def add_format(f, protocol): def add_format(f, protocol, is_preview=False):
mobj = re.search(r'\.(?P<abr>\d+)\.(?P<ext>[0-9a-z]{3,4})(?=[/?])', stream_url) mobj = re.search(r'\.(?P<abr>\d+)\.(?P<ext>[0-9a-z]{3,4})(?=[/?])', stream_url)
if mobj: if mobj:
for k, v in mobj.groupdict().items(): for k, v in mobj.groupdict().items():
@ -361,12 +360,16 @@ def add_format(f, protocol):
v = f.get(k) v = f.get(k)
if v: if v:
format_id_list.append(v) format_id_list.append(v)
preview = is_preview or re.search(r'/(?:preview|playlist)/0/30/', f['url'])
if preview:
format_id_list.append('preview')
abr = f.get('abr') abr = f.get('abr')
if abr: if abr:
f['abr'] = int(abr) f['abr'] = int(abr)
f.update({ f.update({
'format_id': '_'.join(format_id_list), 'format_id': '_'.join(format_id_list),
'protocol': 'm3u8_native' if protocol == 'hls' else 'http', 'protocol': 'm3u8_native' if protocol == 'hls' else 'http',
'preference': -10 if preview else None,
}) })
formats.append(f) formats.append(f)
@ -377,7 +380,7 @@ def add_format(f, protocol):
if not isinstance(t, dict): if not isinstance(t, dict):
continue continue
format_url = url_or_none(t.get('url')) format_url = url_or_none(t.get('url'))
if not format_url or t.get('snipped') or '/preview/' in format_url: if not format_url:
continue continue
stream = self._download_json( stream = self._download_json(
format_url, track_id, query=query, fatal=False) format_url, track_id, query=query, fatal=False)
@ -400,7 +403,8 @@ def add_format(f, protocol):
add_format({ add_format({
'url': stream_url, 'url': stream_url,
'ext': ext, 'ext': ext,
}, 'http' if protocol == 'progressive' else protocol) }, 'http' if protocol == 'progressive' else protocol,
t.get('snipped') or '/preview/' in format_url)
if not formats: if not formats:
# Old API, does not work for some tracks (e.g. # Old API, does not work for some tracks (e.g.