mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-04 23:43:20 +00:00
[nova:embed] Fix extraction (closes #24700)
This commit is contained in:
parent
dcc8522fdb
commit
5caf88ccb4
|
@ -6,6 +6,7 @@
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
qualities,
|
qualities,
|
||||||
|
@ -33,6 +34,40 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
duration = None
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
player = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'Player\.init\s*\([^,]+,\s*({.+?})\s*,\s*{.+?}\s*\)\s*;',
|
||||||
|
webpage, 'player', default='{}'), video_id, fatal=False)
|
||||||
|
if player:
|
||||||
|
for format_id, format_list in player['tracks'].items():
|
||||||
|
if not isinstance(format_list, list):
|
||||||
|
format_list = [format_list]
|
||||||
|
for format_dict in format_list:
|
||||||
|
if not isinstance(format_dict, dict):
|
||||||
|
continue
|
||||||
|
format_url = url_or_none(format_dict.get('src'))
|
||||||
|
format_type = format_dict.get('type')
|
||||||
|
ext = determine_ext(format_url)
|
||||||
|
if (format_type == 'application/x-mpegURL'
|
||||||
|
or format_id == 'HLS' or ext == 'm3u8'):
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
format_url, video_id, 'mp4',
|
||||||
|
entry_protocol='m3u8_native', m3u8_id='hls',
|
||||||
|
fatal=False))
|
||||||
|
elif (format_type == 'application/dash+xml'
|
||||||
|
or format_id == 'DASH' or ext == 'mpd'):
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
format_url, video_id, mpd_id='dash', fatal=False))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': format_url,
|
||||||
|
})
|
||||||
|
duration = int_or_none(player.get('duration'))
|
||||||
|
else:
|
||||||
|
# Old path, not actual as of 08.04.2020
|
||||||
bitrates = self._parse_json(
|
bitrates = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'),
|
r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'),
|
||||||
|
@ -41,7 +76,6 @@ def _real_extract(self, url):
|
||||||
QUALITIES = ('lq', 'mq', 'hq', 'hd')
|
QUALITIES = ('lq', 'mq', 'hq', 'hd')
|
||||||
quality_key = qualities(QUALITIES)
|
quality_key = qualities(QUALITIES)
|
||||||
|
|
||||||
formats = []
|
|
||||||
for format_id, format_list in bitrates.items():
|
for format_id, format_list in bitrates.items():
|
||||||
if not isinstance(format_list, list):
|
if not isinstance(format_list, list):
|
||||||
format_list = [format_list]
|
format_list = [format_list]
|
||||||
|
@ -69,6 +103,7 @@ def _real_extract(self, url):
|
||||||
break
|
break
|
||||||
f['format_id'] = f_id
|
f['format_id'] = f_id
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
title = self._og_search_title(
|
title = self._og_search_title(
|
||||||
|
@ -81,7 +116,8 @@ def _real_extract(self, url):
|
||||||
r'poster\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
|
r'poster\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
|
||||||
'thumbnail', fatal=False, group='value')
|
'thumbnail', fatal=False, group='value')
|
||||||
duration = int_or_none(self._search_regex(
|
duration = int_or_none(self._search_regex(
|
||||||
r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
|
r'videoDuration\s*:\s*(\d+)', webpage, 'duration',
|
||||||
|
default=duration))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
|
Loading…
Reference in a new issue