From 685f96a0527e348c64f7c7848e3d111a62646bb2 Mon Sep 17 00:00:00 2001 From: eldonad Date: Tue, 31 Dec 2024 12:53:02 +0100 Subject: [PATCH] additionnal fallback when deliver_info not available --- yt_dlp/extractor/digiteka.py | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/digiteka.py b/yt_dlp/extractor/digiteka.py index 7bab8fd45..e70c34201 100644 --- a/yt_dlp/extractor/digiteka.py +++ b/yt_dlp/extractor/digiteka.py @@ -28,6 +28,27 @@ class DigitekaIE(InfoExtractor): {'url': 'https://www.boursorama.com/bourse/actualites/le-retour-des-taux-negatifs-est-il-possible-169e3e0cf337df132285b41e124dc98e'} # from an embed ] + def _fallback_to_iframe_content(self, url, video_id): + iframe_content = self._download_webpage(url, video_id) + VIDEO_URL_REGEX = '' + VIDEO_TITLE_REGEX = '' + VIDEO_THUMBNAIL_REGEX = '' + + video_url = self._search_regex(VIDEO_URL_REGEX, iframe_content, 'url') + video_format = video_url.split('.')[-1] + video_title = self._search_regex(VIDEO_TITLE_REGEX, iframe_content, 'title') + video_thumbnail = self._search_regex(VIDEO_THUMBNAIL_REGEX, iframe_content, 'thumbnail') + + return { + 'id': video_id, + 'title': video_title, + 'thumbnail': video_thumbnail, + 'formats': [{ + 'url': video_url, + 'ext': video_format, + }] + } + def _real_extract(self, url): mobj = self._match_valid_url(url) video_id = mobj.group('id') @@ -38,8 +59,9 @@ def _real_extract(self, url): deliver_info = self._download_json( f'http://www.ultimedia.com/deliver/video?video={video_id}&topic={video_type}', video_id) - - + if not deliver_info: + # Apparently some video's deliver_info are not accessible this way anymore + return self._fallback_to_iframe_content(url, video_id) yt_id = deliver_info.get('yt_id') if yt_id: return self.url_result(yt_id, 'Youtube') @@ -58,10 +80,7 @@ def _real_extract(self, url): if len(formats) == 0: # the file urls are not available from the json directly anymore, but # can be found in the iframe content - iframe_content = self._download_webpage(url, video_id) - IFRAME_REGEX = '' - video_url = self._search_regex(IFRAME_REGEX, iframe_content, 'url') - video_format = video_url.split('.')[-1] + return self._fallback_to_iframe_content(url, video_id) formats.append({ 'url': video_url,