[CBS] Add fallback (#579)

Related: https://github.com/ytdl-org/youtube-dl/issues/29564 Authored-by: llacb47, pukkandan
2024-11-30 12:01:28 +00:00 · 2021-08-01 22:16:12 -04:00 · 2021-08-01 22:16:12 -04:00 · 68f5867cf0
parent 605cad0be7
commit 68f5867cf0
2 changed files with 84 additions and 13 deletions
--- a/yt_dlp/extractor/cbs.py
+++ b/yt_dlp/extractor/cbs.py
@ -53,6 +53,54 @@ class CBSIE(CBSBaseIE):
            'skip_download': True,
        },
        '_skip': 'Blocked outside the US',
    }, {
        'url': 'https://www.paramountplus.com/shows/catdog/video/Oe44g5_NrlgiZE3aQVONleD6vXc8kP0k/catdog-climb-every-catdog-the-canine-mutiny/',
        'info_dict': {
            'id': 'Oe44g5_NrlgiZE3aQVONleD6vXc8kP0k',
            'ext': 'mp4',
            'title': 'CatDog - Climb Every CatDog/The Canine Mutiny',
            'description': 'md5:7ac835000645a69933df226940e3c859',
            'duration': 1418,
            'timestamp': 920264400,
            'upload_date': '19990301',
            'uploader': 'CBSI-NEW',
        },
        'params': {
            'skip_download': 'm3u8',
        },
        '_skip': 'Blocked outside the US',
    }, {
        'url': 'https://www.paramountplus.com/shows/tooning-out-the-news/video/6hSWYWRrR9EUTz7IEe5fJKBhYvSUfexd/7-23-21-week-in-review-rep-jahana-hayes-howard-fineman-sen-michael-bennet-sheera-frenkel-cecilia-kang-/',
        'info_dict': {
            'id': '6hSWYWRrR9EUTz7IEe5fJKBhYvSUfexd',
            'ext': 'mp4',
            'title': '7/23/21 WEEK IN REVIEW (Rep. Jahana Hayes/Howard Fineman/Sen. Michael Bennet/Sheera Frenkel & Cecilia Kang)',
            'description': 'md5:f4adcea3e8b106192022e121f1565bae',
            'duration': 2506,
            'timestamp': 1627063200,
            'upload_date': '20210723',
            'uploader': 'CBSI-NEW',
        },
        'params': {
            'skip_download': 'm3u8',
        },
        '_skip': 'Blocked outside the US',
    }, {
        'url': 'https://www.cbs.com/shows/the-late-show-with-stephen-colbert/video/60icOhMb9NcjbcWnF_gub9XXHdeBcNk2/the-late-show-6-23-21-christine-baranski-joy-oladokun-',
        'info_dict': {
            'id': '60icOhMb9NcjbcWnF_gub9XXHdeBcNk2',
            'title': 'The Late Show - 6/23/21 (Christine Baranski, Joy Oladokun)',
            'timestamp': 1624507140,
            'description': 'md5:e01af24e95c74d55e8775aef86117b95',
            'uploader': 'CBSI-NEW',
            'upload_date': '20210624',
        },
        'params': {
            'ignore_no_formats_error': True,
            'skip_download': True,
        },
        'expected_warnings': [
            'This content expired on', 'No video formats found', 'Requested format is not available'],
    }, {
        'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/',
        'only_matching': True,
@ -79,17 +127,26 @@ def _extract_video_info(self, content_id, site='cbs', mpx_acc=2198311517):
        asset_types = []
        subtitles = {}
        formats = []
        useXMLmetadata = True
        last_e = None
        for item in items_data.findall('.//item'):
            asset_type = xpath_text(item, 'assetType')
            if not asset_type or asset_type in asset_types or 'HLS_FPS' in asset_type or 'DASH_CENC' in asset_type:
                continue
            asset_types.append(asset_type)
            query = {
                'mbr': 'true',
                'assetTypes': asset_type,
            }
-            if asset_type.startswith('HLS') or asset_type in ('OnceURL', 'StreamPack'):
+            if not asset_type:
                # fallback for content_ids that videoPlayerService doesn't return anything for
                useXMLmetadata = False
                asset_type = 'fallback'
                query['formats'] = 'M3U+none,MPEG4,M3U+appleHlsEncryption,MP3'
                del query['assetTypes']
            elif asset_type in asset_types:
                continue
            elif any(excluded in asset_type for excluded in ('HLS_FPS', 'DASH_CENC', 'OnceURL')):
                continue
            asset_types.append(asset_type)
            if asset_type.startswith('HLS') or 'StreamPack' in asset_type:
                query['formats'] = 'MPEG4,M3U'
            elif asset_type in ('RTMP', 'WIFI', '3G'):
                query['formats'] = 'MPEG4,FLV'
@ -99,25 +156,37 @@ def _extract_video_info(self, content_id, site='cbs', mpx_acc=2198311517):
                    'Downloading %s SMIL data' % asset_type)
            except ExtractorError as e:
                last_e = e
-                continue
+                if useXMLmetadata:
                    continue
                query['formats'] = ''  # blank query to check if expired
                try:
                    tp_formats, tp_subtitles = self._extract_theplatform_smil(
                        update_url_query(tp_release_url, query), content_id,
                        'Downloading %s SMIL data, trying again with another format' % asset_type)
                except ExtractorError as e:
                    last_e = e
                    continue
            formats.extend(tp_formats)
            subtitles = self._merge_subtitles(subtitles, tp_subtitles)
        if last_e and not formats:
-            raise last_e
+            self.raise_no_formats(last_e, True, content_id)
        self._sort_formats(formats)
        info = self._extract_theplatform_metadata(tp_path, content_id)
        info.update({
            'id': content_id,
            'title': title,
            'series': xpath_text(video_data, 'seriesTitle'),
            'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')),
            'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')),
            'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000),
            'thumbnail': xpath_text(video_data, 'previewImageURL'),
            'formats': formats,
            'subtitles': subtitles,
            'id': content_id
        })
        if useXMLmetadata:
            info.update({
                'title': title,
                'series': xpath_text(video_data, 'seriesTitle'),
                'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')),
                'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')),
                'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000),
                'thumbnail': xpath_text(video_data, 'previewImageURL')
            })
        return info
    def _real_extract(self, url):
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@ -1052,6 +1052,8 @@ def raise_geo_restricted(
    def raise_no_formats(self, msg, expected=False, video_id=None):
        if expected and self.get_param('ignore_no_formats_error'):
            self.report_warning(msg, video_id)
        elif isinstance(msg, ExtractorError):
            raise msg
        else:
            raise ExtractorError(msg, expected=expected, video_id=video_id)