[ie/NovaEmbed] Fix extractor (#7910)

Closes #8025
Authored by: std-move
This commit is contained in:
std-move 2023-09-21 20:19:52 +02:00 committed by GitHub
parent a5e264d74b
commit 2269065ad6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -6,7 +6,6 @@
determine_ext, determine_ext,
int_or_none, int_or_none,
js_to_json, js_to_json,
qualities,
traverse_obj, traverse_obj,
unified_strdate, unified_strdate,
url_or_none, url_or_none,
@ -49,77 +48,52 @@ def _real_extract(self, url):
duration = None duration = None
formats = [] formats = []
player = self._parse_json( def process_format_list(format_list, format_id=""):
self._search_regex( nonlocal formats, has_drm
(r'(?:(?:replacePlaceholders|processAdTagModifier).*?:\s*)?(?:replacePlaceholders|processAdTagModifier)\s*\(\s*(?P<json>{.*?})\s*\)(?:\s*\))?\s*,', if not isinstance(format_list, list):
r'Player\.init\s*\([^,]+,(?P<cndn>\s*\w+\s*\?)?\s*(?P<json>{(?(cndn).+?|.+)})\s*(?(cndn):|,\s*{.+?}\s*\)\s*;)'), format_list = [format_list]
webpage, 'player', default='{}', group='json'), video_id, fatal=False) for format_dict in format_list:
if player: if not isinstance(format_dict, dict):
for format_id, format_list in player['tracks'].items(): continue
if not isinstance(format_list, list): if (not self.get_param('allow_unplayable_formats')
format_list = [format_list] and traverse_obj(format_dict, ('drm', 'keySystem'))):
for format_dict in format_list: has_drm = True
if not isinstance(format_dict, dict): continue
continue format_url = url_or_none(format_dict.get('src'))
if (not self.get_param('allow_unplayable_formats') format_type = format_dict.get('type')
and traverse_obj(format_dict, ('drm', 'keySystem'))): ext = determine_ext(format_url)
has_drm = True if (format_type == 'application/x-mpegURL'
continue or format_id == 'HLS' or ext == 'm3u8'):
format_url = url_or_none(format_dict.get('src')) formats.extend(self._extract_m3u8_formats(
format_type = format_dict.get('type') format_url, video_id, 'mp4',
ext = determine_ext(format_url) entry_protocol='m3u8_native', m3u8_id='hls',
if (format_type == 'application/x-mpegURL' fatal=False))
or format_id == 'HLS' or ext == 'm3u8'): elif (format_type == 'application/dash+xml'
formats.extend(self._extract_m3u8_formats( or format_id == 'DASH' or ext == 'mpd'):
format_url, video_id, 'mp4', formats.extend(self._extract_mpd_formats(
entry_protocol='m3u8_native', m3u8_id='hls', format_url, video_id, mpd_id='dash', fatal=False))
fatal=False)) else:
elif (format_type == 'application/dash+xml' formats.append({
or format_id == 'DASH' or ext == 'mpd'):
formats.extend(self._extract_mpd_formats(
format_url, video_id, mpd_id='dash', fatal=False))
else:
formats.append({
'url': format_url,
})
duration = int_or_none(player.get('duration'))
else:
# Old path, not actual as of 08.04.2020
bitrates = self._parse_json(
self._search_regex(
r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'),
video_id, transform_source=js_to_json)
QUALITIES = ('lq', 'mq', 'hq', 'hd')
quality_key = qualities(QUALITIES)
for format_id, format_list in bitrates.items():
if not isinstance(format_list, list):
format_list = [format_list]
for format_url in format_list:
format_url = url_or_none(format_url)
if not format_url:
continue
if format_id == 'hls':
formats.extend(self._extract_m3u8_formats(
format_url, video_id, ext='mp4',
entry_protocol='m3u8_native', m3u8_id='hls',
fatal=False))
continue
f = {
'url': format_url, 'url': format_url,
} })
f_id = format_id
for quality in QUALITIES: player = self._search_json(
if '%s.mp4' % quality in format_url: r'player:', webpage, 'player', video_id, fatal=False, end_pattern=r';\s*</script>')
f_id += '-%s' % quality if player:
f.update({ for src in traverse_obj(player, ('lib', 'source', 'sources', ...)):
'quality': quality_key(quality), process_format_list(src)
'format_note': quality.upper(), duration = traverse_obj(player, ('sourceInfo', 'duration', {int_or_none}))
}) if not formats and not has_drm:
break # older code path, in use before August 2023
f['format_id'] = f_id player = self._parse_json(
formats.append(f) self._search_regex(
(r'(?:(?:replacePlaceholders|processAdTagModifier).*?:\s*)?(?:replacePlaceholders|processAdTagModifier)\s*\(\s*(?P<json>{.*?})\s*\)(?:\s*\))?\s*,',
r'Player\.init\s*\([^,]+,(?P<cndn>\s*\w+\s*\?)?\s*(?P<json>{(?(cndn).+?|.+)})\s*(?(cndn):|,\s*{.+?}\s*\)\s*;)'),
webpage, 'player', group='json'), video_id)
if player:
for format_id, format_list in player['tracks'].items():
process_format_list(format_list, format_id)
duration = int_or_none(player.get('duration'))
if not formats and has_drm: if not formats and has_drm:
self.report_drm(video_id) self.report_drm(video_id)