Fix not adding two of the same manifest values

(Sometimes I hate it when technology is implemented like this in the first place: Link to a "alternative", but it's just the original)
This commit is contained in:
spookyahell 2023-03-16 00:59:46 +01:00 committed by GitHub
parent 071670cbea
commit 1a6fdcef9c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -59,6 +59,9 @@ def _real_extract(self, url):
formats = [] formats = []
subtitles = {} subtitles = {}
# list to track the urls and ensure that not a second manifest url with the same value is added
avoid_duplicate_manifest_urls = []
# check if the metadata contains a direct URL to a file # check if the metadata contains a direct URL to a file
for kind, media in media_resource.items(): for kind, media in media_resource.items():
@ -74,10 +77,15 @@ def _real_extract(self, url):
continue continue
if not isinstance(media, dict): if not isinstance(media, dict):
continue continue
for tag_name, medium_url in media.items(): for tag_name, medium_url in media.items():
if tag_name not in ('videoURL', 'audioURL'): if tag_name not in ('videoURL', 'audioURL'):
continue continue
if medium_url not in avoid_duplicate_manifest_urls:
avoid_duplicate_manifest_urls.append(medium_url)
else:
continue
ext = determine_ext(medium_url) ext = determine_ext(medium_url)
if ext == 'm3u8': if ext == 'm3u8':
@ -164,7 +172,7 @@ class WDRPageIE(WDRIE): # XXX: Do not subclass from concrete IE
'ext': 'mp3', 'ext': 'mp3',
'display_id': 'wdr3-gespraech-am-samstag/audio-schriftstellerin-juli-zeh-100', 'display_id': 'wdr3-gespraech-am-samstag/audio-schriftstellerin-juli-zeh-100',
'title': 'Schriftstellerin Juli Zeh', 'title': 'Schriftstellerin Juli Zeh',
'alt_title': 'WDR 3 Gespräch am Samstag', 'alt_title': 'WDR 3 Gespräch am Samstag',
'upload_date': '20160312', 'upload_date': '20160312',
'description': 'md5:e127d320bc2b1f149be697ce044a3dd7', 'description': 'md5:e127d320bc2b1f149be697ce044a3dd7',
'is_live': False, 'is_live': False,
@ -232,7 +240,7 @@ class WDRPageIE(WDRIE): # XXX: Do not subclass from concrete IE
'info_dict': { 'info_dict': {
'id': 'mdb-1556012', 'id': 'mdb-1556012',
'ext': 'mp4', 'ext': 'mp4',
'title': 'DHB-Vizepräsident Bob Hanning - "Die Weltspitze ist extrem breit"', 'title': 'DHB-Vizepräsident Bob Hanning - "Die Weltspitze ist extrem breit"',
'upload_date': '20180111', 'upload_date': '20180111',
}, },
'params': { 'params': {