mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-30 12:01:28 +00:00
[extractor/npr] Use stream url from json-ld (#3455)
Closes #1934 Authored by: r5d
This commit is contained in:
parent
09d02ea429
commit
e50c3500b4
|
@ -1461,7 +1461,7 @@ def extract_video_object(e):
|
||||||
assert e['@type'] == 'VideoObject'
|
assert e['@type'] == 'VideoObject'
|
||||||
author = e.get('author')
|
author = e.get('author')
|
||||||
info.update({
|
info.update({
|
||||||
'url': url_or_none(e.get('contentUrl')),
|
'url': traverse_obj(e, 'contentUrl', 'embedUrl', expected_type=url_or_none),
|
||||||
'title': unescapeHTML(e.get('name')),
|
'title': unescapeHTML(e.get('name')),
|
||||||
'description': unescapeHTML(e.get('description')),
|
'description': unescapeHTML(e.get('description')),
|
||||||
'thumbnails': [{'url': url}
|
'thumbnails': [{'url': url}
|
||||||
|
@ -1529,6 +1529,8 @@ def traverse_json_ld(json_ld, at_top_level=True):
|
||||||
})
|
})
|
||||||
if traverse_obj(e, ('video', 0, '@type')) == 'VideoObject':
|
if traverse_obj(e, ('video', 0, '@type')) == 'VideoObject':
|
||||||
extract_video_object(e['video'][0])
|
extract_video_object(e['video'][0])
|
||||||
|
elif traverse_obj(e, ('subjectOf', 0, '@type')) == 'VideoObject':
|
||||||
|
extract_video_object(e['subjectOf'][0])
|
||||||
elif item_type == 'VideoObject':
|
elif item_type == 'VideoObject':
|
||||||
extract_video_object(e)
|
extract_video_object(e)
|
||||||
if expected_type is None:
|
if expected_type is None:
|
||||||
|
|
|
@ -51,6 +51,15 @@ class NprIE(InfoExtractor):
|
||||||
# multimedia, no formats, stream
|
# multimedia, no formats, stream
|
||||||
'url': 'https://www.npr.org/2020/02/14/805476846/laura-stevenson-tiny-desk-concert',
|
'url': 'https://www.npr.org/2020/02/14/805476846/laura-stevenson-tiny-desk-concert',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.npr.org/2022/03/15/1084896560/bonobo-tiny-desk-home-concert',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1086468851',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Bonobo: Tiny Desk (Home) Concert',
|
||||||
|
'duration': 1061,
|
||||||
|
'thumbnail': r're:^https?://media.npr.org/assets/img/.*\.jpg$',
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -65,6 +74,10 @@ def _real_extract(self, url):
|
||||||
})['list']['story'][0]
|
})['list']['story'][0]
|
||||||
playlist_title = story.get('title', {}).get('$text')
|
playlist_title = story.get('title', {}).get('$text')
|
||||||
|
|
||||||
|
# Fetch the JSON-LD from the npr page.
|
||||||
|
json_ld = self._search_json_ld(
|
||||||
|
self._download_webpage(url, playlist_id), playlist_id, 'NewsArticle', fatal=False)
|
||||||
|
|
||||||
KNOWN_FORMATS = ('threegp', 'm3u8', 'smil', 'mp4', 'mp3')
|
KNOWN_FORMATS = ('threegp', 'm3u8', 'smil', 'mp4', 'mp3')
|
||||||
quality = qualities(KNOWN_FORMATS)
|
quality = qualities(KNOWN_FORMATS)
|
||||||
|
|
||||||
|
@ -110,6 +123,10 @@ def _real_extract(self, url):
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
stream_url, stream_id, 'mp4', 'm3u8_native',
|
stream_url, stream_id, 'mp4', 'm3u8_native',
|
||||||
m3u8_id='hls', fatal=False))
|
m3u8_id='hls', fatal=False))
|
||||||
|
|
||||||
|
if not formats and json_ld.get('url'):
|
||||||
|
formats.extend(self._extract_m3u8_formats(json_ld['url'], media_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
entries.append({
|
entries.append({
|
||||||
|
|
Loading…
Reference in a new issue