[CBS] Add fallback (#579)

Related: https://github.com/ytdl-org/youtube-dl/issues/29564 Authored-by: llacb47, pukkandan
2024-11-27 10:31:29 +00:00 · 2021-08-01 22:16:12 -04:00 · 2021-08-01 22:16:12 -04:00 · 68f5867cf0
parent 605cad0be7
commit 68f5867cf0
2 changed files with 84 additions and 13 deletions
--- a/yt_dlp/extractor/cbs.py
+++ b/yt_dlp/extractor/cbs.py
@ -53,6 +53,54 @@ class CBSIE(CBSBaseIE):
            'skip_download': True,
        },
        '_skip': 'Blocked outside the US',
+    }, {
+        'url': 'https://www.paramountplus.com/shows/catdog/video/Oe44g5_NrlgiZE3aQVONleD6vXc8kP0k/catdog-climb-every-catdog-the-canine-mutiny/',
+        'info_dict': {
+            'id': 'Oe44g5_NrlgiZE3aQVONleD6vXc8kP0k',
+            'ext': 'mp4',
+            'title': 'CatDog - Climb Every CatDog/The Canine Mutiny',
+            'description': 'md5:7ac835000645a69933df226940e3c859',
+            'duration': 1418,
+            'timestamp': 920264400,
+            'upload_date': '19990301',
+            'uploader': 'CBSI-NEW',
+        },
+        'params': {
+            'skip_download': 'm3u8',
+        },
+        '_skip': 'Blocked outside the US',
+    }, {
+        'url': 'https://www.paramountplus.com/shows/tooning-out-the-news/video/6hSWYWRrR9EUTz7IEe5fJKBhYvSUfexd/7-23-21-week-in-review-rep-jahana-hayes-howard-fineman-sen-michael-bennet-sheera-frenkel-cecilia-kang-/',
+        'info_dict': {
+            'id': '6hSWYWRrR9EUTz7IEe5fJKBhYvSUfexd',
+            'ext': 'mp4',
+            'title': '7/23/21 WEEK IN REVIEW (Rep. Jahana Hayes/Howard Fineman/Sen. Michael Bennet/Sheera Frenkel & Cecilia Kang)',
+            'description': 'md5:f4adcea3e8b106192022e121f1565bae',
+            'duration': 2506,
+            'timestamp': 1627063200,
+            'upload_date': '20210723',
+            'uploader': 'CBSI-NEW',
+        },
+        'params': {
+            'skip_download': 'm3u8',
+        },
+        '_skip': 'Blocked outside the US',
+    }, {
+        'url': 'https://www.cbs.com/shows/the-late-show-with-stephen-colbert/video/60icOhMb9NcjbcWnF_gub9XXHdeBcNk2/the-late-show-6-23-21-christine-baranski-joy-oladokun-',
+        'info_dict': {
+            'id': '60icOhMb9NcjbcWnF_gub9XXHdeBcNk2',
+            'title': 'The Late Show - 6/23/21 (Christine Baranski, Joy Oladokun)',
+            'timestamp': 1624507140,
+            'description': 'md5:e01af24e95c74d55e8775aef86117b95',
+            'uploader': 'CBSI-NEW',
+            'upload_date': '20210624',
+        },
+        'params': {
+            'ignore_no_formats_error': True,
+            'skip_download': True,
+        },
+        'expected_warnings': [
+            'This content expired on', 'No video formats found', 'Requested format is not available'],
    }, {
        'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/',
        'only_matching': True,
@ -79,17 +127,26 @@ def _extract_video_info(self, content_id, site='cbs', mpx_acc=2198311517):
        asset_types = []
        subtitles = {}
        formats = []
+        useXMLmetadata = True
        last_e = None
        for item in items_data.findall('.//item'):
            asset_type = xpath_text(item, 'assetType')
-            if not asset_type or asset_type in asset_types or 'HLS_FPS' in asset_type or 'DASH_CENC' in asset_type:
-                continue
-            asset_types.append(asset_type)
            query = {
                'mbr': 'true',
                'assetTypes': asset_type,
            }
-            if asset_type.startswith('HLS') or asset_type in ('OnceURL', 'StreamPack'):
+            if not asset_type:
+                # fallback for content_ids that videoPlayerService doesn't return anything for
+                useXMLmetadata = False
+                asset_type = 'fallback'
+                query['formats'] = 'M3U+none,MPEG4,M3U+appleHlsEncryption,MP3'
+                del query['assetTypes']
+            elif asset_type in asset_types:
+                continue
+            elif any(excluded in asset_type for excluded in ('HLS_FPS', 'DASH_CENC', 'OnceURL')):
+                continue
+            asset_types.append(asset_type)
+            if asset_type.startswith('HLS') or 'StreamPack' in asset_type:
                query['formats'] = 'MPEG4,M3U'
            elif asset_type in ('RTMP', 'WIFI', '3G'):
                query['formats'] = 'MPEG4,FLV'
@ -97,26 +154,38 @@ def _extract_video_info(self, content_id, site='cbs', mpx_acc=2198311517):
                tp_formats, tp_subtitles = self._extract_theplatform_smil(
                    update_url_query(tp_release_url, query), content_id,
                    'Downloading %s SMIL data' % asset_type)
+            except ExtractorError as e:
+                last_e = e
+                if useXMLmetadata:
+                    continue
+                query['formats'] = ''  # blank query to check if expired
+                try:
+                    tp_formats, tp_subtitles = self._extract_theplatform_smil(
+                        update_url_query(tp_release_url, query), content_id,
+                        'Downloading %s SMIL data, trying again with another format' % asset_type)
                except ExtractorError as e:
                    last_e = e
                    continue
            formats.extend(tp_formats)
            subtitles = self._merge_subtitles(subtitles, tp_subtitles)
        if last_e and not formats:
-            raise last_e
+            self.raise_no_formats(last_e, True, content_id)
        self._sort_formats(formats)

        info = self._extract_theplatform_metadata(tp_path, content_id)
        info.update({
-            'id': content_id,
+            'formats': formats,
+            'subtitles': subtitles,
+            'id': content_id
+        })
+        if useXMLmetadata:
+            info.update({
                'title': title,
                'series': xpath_text(video_data, 'seriesTitle'),
                'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')),
                'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')),
                'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000),
-            'thumbnail': xpath_text(video_data, 'previewImageURL'),
-            'formats': formats,
-            'subtitles': subtitles,
+                'thumbnail': xpath_text(video_data, 'previewImageURL')
            })
        return info

--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@ -1052,6 +1052,8 @@ def raise_geo_restricted(
    def raise_no_formats(self, msg, expected=False, video_id=None):
        if expected and self.get_param('ignore_no_formats_error'):
            self.report_warning(msg, video_id)
+        elif isinstance(msg, ExtractorError):
+            raise msg
        else:
            raise ExtractorError(msg, expected=expected, video_id=video_id)