[extractor/artetv] Remove duplicate stream urls (#5047)

Closes #4510
Authored by: Grub4K
This commit is contained in:
Simon Sawicki 2022-09-26 18:43:54 +02:00 committed by GitHub
parent 0ca0f88121
commit 1534aba865
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -135,6 +135,7 @@ def _real_extract(self, url):
'Video is not available in this language edition of Arte or broadcast rights expired', expected=True)
formats, subtitles = [], {}
secondary_formats = []
for stream in config['data']['attributes']['streams']:
# official player contains code like `e.get("versions")[0].eStat.ml5`
stream_version = stream['versions'][0]
@ -152,14 +153,18 @@ def _real_extract(self, url):
not m.group('sdh_sub'), # and we prefer not the hard-of-hearing subtitles if there are subtitles
)))
short_label = traverse_obj(stream_version, 'shortLabel', expected_type=str, default='?')
if stream['protocol'].startswith('HLS'):
fmts, subs = self._extract_m3u8_formats_and_subtitles(
stream['url'], video_id=video_id, ext='mp4', m3u8_id=stream_version_code, fatal=False)
for fmt in fmts:
fmt.update({
'format_note': f'{stream_version.get("label", "unknown")} [{stream_version.get("shortLabel", "?")}]',
'format_note': f'{stream_version.get("label", "unknown")} [{short_label}]',
'language_preference': lang_pref,
})
if any(map(short_label.startswith, ('cc', 'OGsub'))):
secondary_formats.extend(fmts)
else:
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
@ -167,7 +172,7 @@ def _real_extract(self, url):
formats.append({
'format_id': f'{stream["protocol"]}-{stream_version_code}',
'url': stream['url'],
'format_note': f'{stream_version.get("label", "unknown")} [{stream_version.get("shortLabel", "?")}]',
'format_note': f'{stream_version.get("label", "unknown")} [{short_label}]',
'language_preference': lang_pref,
# 'ext': 'mp4', # XXX: may or may not be necessary, at least for HTTPS
})
@ -179,6 +184,8 @@ def _real_extract(self, url):
# The JS also looks for chapters in config['data']['attributes']['chapters'],
# but I am yet to find a video having those
formats.extend(secondary_formats)
self._remove_duplicate_formats(formats)
self._sort_formats(formats)
metadata = config['data']['attributes']['metadata']