additionnal fallback when deliver_info not available

2025-01-07 08:31:17 +00:00 · 2024-12-31 12:53:02 +01:00 · 2024-12-31 12:53:02 +01:00 · 685f96a052
parent 1c2cc0dde8
commit 685f96a052
1 changed files with 25 additions and 6 deletions
--- a/yt_dlp/extractor/digiteka.py
+++ b/yt_dlp/extractor/digiteka.py
@ -28,6 +28,27 @@ class DigitekaIE(InfoExtractor):
        {'url': 'https://www.boursorama.com/bourse/actualites/le-retour-des-taux-negatifs-est-il-possible-169e3e0cf337df132285b41e124dc98e'} # from an embed
    ]

+    def _fallback_to_iframe_content(self, url, video_id):
+        iframe_content = self._download_webpage(url, video_id)
+        VIDEO_URL_REGEX = '<meta property="og:video" content="(?P<url>.*)"/>'
+        VIDEO_TITLE_REGEX = '<meta property="og:title" content="(?P<title>.*)"/>'
+        VIDEO_THUMBNAIL_REGEX = '<meta property="og:image" content="(?P<thumbnail>.*)"/>'
+
+        video_url = self._search_regex(VIDEO_URL_REGEX, iframe_content, 'url')
+        video_format = video_url.split('.')[-1]
+        video_title = self._search_regex(VIDEO_TITLE_REGEX, iframe_content, 'title')
+        video_thumbnail = self._search_regex(VIDEO_THUMBNAIL_REGEX, iframe_content, 'thumbnail')
+
+        return {
+            'id': video_id,
+            'title': video_title,
+            'thumbnail': video_thumbnail,
+            'formats': [{
+                'url': video_url,
+                'ext': video_format,
+            }]
+        }
+
    def _real_extract(self, url):
        mobj = self._match_valid_url(url)
        video_id = mobj.group('id')
@ -38,8 +59,9 @@ def _real_extract(self, url):
        deliver_info = self._download_json(
            f'http://www.ultimedia.com/deliver/video?video={video_id}&topic={video_type}',
            video_id)
-
-
+        if not deliver_info:
+            # Apparently some video's deliver_info are not accessible this way anymore
+            return self._fallback_to_iframe_content(url, video_id)
        yt_id = deliver_info.get('yt_id')
        if yt_id:
            return self.url_result(yt_id, 'Youtube')
@ -58,10 +80,7 @@ def _real_extract(self, url):
        if len(formats) == 0:
            # the file urls are not available from the json directly anymore, but
            # can be found in the iframe content
-            iframe_content = self._download_webpage(url, video_id)
-            IFRAME_REGEX = '<meta property="og:video" content="(?P<url>.*)"/>'
-            video_url = self._search_regex(IFRAME_REGEX, iframe_content, 'url')
-            video_format = video_url.split('.')[-1]
+            return self._fallback_to_iframe_content(url, video_id)

        formats.append({
            'url': video_url,