Merge a376ab99f8 into 96da952504

[core] Warn if lack of ffmpeg alters format selection (#9805 )
Authored by: seproDev, pukkandan
2024-05-05 10:21:44 +05:30 · 2024-05-05 00:44:08 +02:00 · 2024-04-01 20:29:15 +05:30 · 2024-04-01 20:26:45 +05:30 · 2024-04-01 20:17:23 +05:30 · 2024-04-01 07:51:11 +05:30
8 changed files with 63 additions and 37 deletions
--- a/devscripts/make_lazy_extractors.py
+++ b/devscripts/make_lazy_extractors.py
@ -15,7 +15,7 @@ from devscripts.utils import get_filename_args, read_file, write_file
 NO_ATTR = object()
 STATIC_CLASS_PROPERTIES = [
    'IE_NAME', '_ENABLED', '_VALID_URL',  # Used for URL matching
-    '_WORKING', 'IE_DESC', '_NETRC_MACHINE', 'SEARCH_KEY',  # Used for --extractor-descriptions
+    '_REPORTED_BROKEN', 'IE_DESC', '_NETRC_MACHINE', 'SEARCH_KEY',  # Used for --extractor-descriptions
    'age_limit',  # Used for --age-limit (evaluated)
    '_RETURN_TYPE',  # Accessed in CLI only with instance (evaluated)
 ]
--- a/supportedsites.md
+++ b/supportedsites.md
@ -304,10 +304,10 @@
 - **CrowdBunker**
 - **CrowdBunkerChannel**
 - **Crtvg**
- - **crunchyroll**: [*crunchyroll*](## "netrc machine")
- - **crunchyroll:artist**: [*crunchyroll*](## "netrc machine")
- - **crunchyroll:music**: [*crunchyroll*](## "netrc machine")
- - **crunchyroll:playlist**: [*crunchyroll*](## "netrc machine")
+ - **crunchyroll**: [*crunchyroll*](## "netrc machine") ([**Currently broken**](https://github.com/yt-dlp/yt-dlp/issues/9453))
+ - **crunchyroll:artist**: [*crunchyroll*](## "netrc machine") ([**Currently broken**](https://github.com/yt-dlp/yt-dlp/issues/9453))
+ - **crunchyroll:music**: [*crunchyroll*](## "netrc machine") ([**Currently broken**](https://github.com/yt-dlp/yt-dlp/issues/9453))
+ - **crunchyroll:playlist**: [*crunchyroll*](## "netrc machine") ([**Currently broken**](https://github.com/yt-dlp/yt-dlp/issues/9453))
 - **CSpan**: C-SPAN
 - **CSpanCongress**
 - **CtsNews**: 華視新聞
@ -553,7 +553,7 @@
 - **hgtv.com:show**
 - **HGTVDe**
 - **HGTVUsa**
- - **HiDive**: [*hidive*](## "netrc machine")
+ - **HiDive**: [*hidive*](## "netrc machine") ([**Currently broken**](https://github.com/yt-dlp/yt-dlp/issues/9385))
 - **HistoricFilms**
 - **history:player**
 - **history:topic**: History.com Topic
--- a/test/test_download.py
+++ b/test/test_download.py
@ -98,7 +98,7 @@ def generator(test_case, tname):
            self.skipTest(reason)

        if not ie.working():
-            print_skipping('IE marked as not _WORKING')
+            print_skipping('IE is _REPORTED_BROKEN')

        for tc in test_cases:
            if tc.get('expected_exception'):
@ -117,7 +117,7 @@ def generator(test_case, tname):

        for other_ie in other_ies:
            if not other_ie.working():
-                print_skipping('test depends on %sIE, marked as not WORKING' % other_ie.ie_key())
+                print_skipping(f'test depends on {other_ie.ie_key()}IE, is _REPORTED_BROKEN')

        params = get_params(test_case.get('params', {}))
        params['outtmpl'] = tname + '_' + params['outtmpl']
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@ -40,8 +40,8 @@ class BaseTestSubtitles(unittest.TestCase):
        self.ie = self.IE()
        self.DL.add_info_extractor(self.ie)
        if not self.IE.working():
-            print('Skipping: %s marked as not _WORKING' % self.IE.ie_key())
-            self.skipTest('IE marked as not _WORKING')
+            print(f'Skipping: {self.IE.ie_key()} is _REPORTED_BROKEN')
+            self.skipTest('IE is _REPORTED_BROKEN')

    def getInfoDict(self):
        info_dict = self.DL.extract_info(self.url, download=False)
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@ -1582,8 +1582,12 @@ class YoutubeDL:
                continue

            if not ie.working():
-                self.report_warning('The program functionality for this site has been marked as broken, '
-                                    'and will probably not work.')
+                self.report_warning(join_nonempty(
+                    f'[{ie.IE_NAME}] The program\'s functionality for this site has been marked as '
+                    f'{self._format_err("BROKEN", self.Styles.ERROR)}, and will probably not work.',
+                    format_field(ie._REPORTED_BROKEN, None, f'See  {self._format_err("%s", self.Styles.EMPHASIS)}'
+                                 '  for more information. Do NOT open a new issue for this.'),
+                    delim='\n         '))

            temp_id = ie.get_temp_id(url)
            if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
@ -2136,6 +2140,11 @@ class YoutubeDL:

    def _check_formats(self, formats):
        for f in formats:
+            working = f.get('__working')
+            if working is not None:
+                if working:
+                    yield f
+                continue
            self.to_screen('[info] Testing format %s' % f['format_id'])
            path = self.get_output_path('temp')
            if not self._ensure_dir_exists(f'{path}/'):
@ -2152,33 +2161,44 @@ class YoutubeDL:
                        os.remove(temp_file.name)
                    except OSError:
                        self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
+            f['__working'] = success
            if success:
                yield f
            else:
                self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])

+    def _select_formats(self, formats, selector):
+        return list(selector({
+            'formats': formats,
+            'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
+            'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats)  # No formats with video
+                                   or all(f.get('acodec') == 'none' for f in formats)),  # OR, No formats with audio
+        }))
+
    def _default_format_spec(self, info_dict, download=True):
+        download = download and not self.params.get('simulate')
+        prefer_best = download and (
+            self.params['outtmpl']['default'] == '-'
+            or info_dict.get('is_live') and not self.params.get('live_from_start'))

        def can_merge():
            merger = FFmpegMergerPP(self)
            return merger.available and merger.can_merge()

-        prefer_best = (
-            not self.params.get('simulate')
-            and download
-            and (
-                not can_merge()
-                or info_dict.get('is_live') and not self.params.get('live_from_start')
-                or self.params['outtmpl']['default'] == '-'))
-        compat = (
-            prefer_best
-            or self.params.get('allow_multiple_audio_streams', False)
-            or 'format-spec' in self.params['compat_opts'])
+        if not prefer_best and download and not can_merge():
+            prefer_best = True
+            formats = self._get_formats(info_dict)
+            evaluate_formats = lambda spec: self._select_formats(formats, self.build_format_selector(spec))
+            if evaluate_formats('b/bv+ba') != evaluate_formats('bv*+ba/b'):
+                self.report_warning('ffmpeg not found. The downloaded format may not be the best available. '
+                                    'Installing ffmpeg is strongly recommended: https://github.com/yt-dlp/yt-dlp#dependencies')

-        return (
-            'best/bestvideo+bestaudio' if prefer_best
-            else 'bestvideo*+bestaudio/best' if not compat
-            else 'bestvideo+bestaudio/best')
+        compat = (self.params.get('allow_multiple_audio_streams')
+                  or 'format-spec' in self.params['compat_opts'])
+
+        return ('best/bestvideo+bestaudio' if prefer_best
+                else 'bestvideo+bestaudio/best' if compat
+                else 'bestvideo*+bestaudio/best')

    def build_format_selector(self, format_spec):
        def syntax_error(note, start):
@ -2928,12 +2948,7 @@ class YoutubeDL:
                self.write_debug(f'Default format spec: {req_format}')
                format_selector = self.build_format_selector(req_format)

-            formats_to_download = list(format_selector({
-                'formats': formats,
-                'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
-                'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats)  # No formats with video
-                                       or all(f.get('acodec') == 'none' for f in formats)),  # OR, No formats with audio
-            }))
+            formats_to_download = self._select_formats(formats, format_selector)
            if interactive_format_selection and not formats_to_download:
                self.report_error('Requested format is not available', tb=False, is_error=False)
                continue
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@ -546,8 +546,9 @@ class InfoExtractor:
    The _ENABLED attribute should be set to False for IEs that
    are disabled by default and must be explicitly enabled.

-    The _WORKING attribute should be set to False for broken IEs
+    For broken extractors, the _REPORTED_BROKEN attribute can be set to the issue URL
    in order to warn the users and skip the tests.
+    [Deprecated] If there is no open issue, set _WORKING = False instead.
    """

    _ready = False
@ -613,10 +614,13 @@ class InfoExtractor:
        except (IndexError, AttributeError):
            return None

+    @classproperty(cache=True)
+    def _REPORTED_BROKEN(cls):
+        return not cls._WORKING and ''
+
    @classmethod
    def working(cls):
-        """Getter method for _WORKING."""
-        return cls._WORKING
+        return cls._REPORTED_BROKEN is False

    @classmethod
    def supports_login(cls):
@ -3674,7 +3678,12 @@ class InfoExtractor:
                _COUNTS = ('', '5', '10', 'all')
                desc += f' (e.g. "{cls.SEARCH_KEY}{random.choice(_COUNTS)}:{random.choice(search_examples)}")'
        if not cls.working():
-            desc += ' (**Currently broken**)' if markdown else ' (Currently broken)'
+            msg = 'Currently broken'
+            if markdown:
+                msg = f'**{msg}**'
+                if cls._REPORTED_BROKEN:
+                    msg = f'[{msg}]({cls._REPORTED_BROKEN})'
+            desc += f' ({msg})'

        # Escape emojis. Ref: https://github.com/github/markup/issues/1153
        name = (' - **%s**' % re.sub(r':(\w+:)', ':\u200B\\g<1>', cls.IE_NAME)) if markdown else cls.IE_NAME
--- a/yt_dlp/extractor/crunchyroll.py
+++ b/yt_dlp/extractor/crunchyroll.py
@ -21,6 +21,7 @@ from ..utils import (


 class CrunchyrollBaseIE(InfoExtractor):
+    _REPORTED_BROKEN = 'https://github.com/yt-dlp/yt-dlp/issues/9453'
    _BASE_URL = 'https://www.crunchyroll.com'
    _API_BASE = 'https://api.crunchyroll.com'
    _NETRC_MACHINE = 'crunchyroll'
--- a/yt_dlp/extractor/hidive.py
+++ b/yt_dlp/extractor/hidive.py
@ -9,6 +9,7 @@ from ..utils import (


 class HiDiveIE(InfoExtractor):
+    _REPORTED_BROKEN = 'https://github.com/yt-dlp/yt-dlp/issues/9385'
    _VALID_URL = r'https?://(?:www\.)?hidive\.com/stream/(?P<id>(?P<title>[^/]+)/(?P<key>[^/?#&]+))'
    # Using X-Forwarded-For results in 403 HTTP error for HLS fragments,
    # so disabling geo bypass completely
Author	SHA1	Message	Date
pukkandan	c9da0e43a8	Merge `a376ab99f8` into `96da952504`	2024-05-05 10:21:44 +05:30
sepro	96da952504	[core] Warn if lack of ffmpeg alters format selection (#9805 ) Authored by: seproDev, pukkandan	2024-05-05 00:44:08 +02:00
pukkandan	a376ab99f8	fstrings need f	2024-04-01 20:29:15 +05:30
pukkandan	68b74d52ab	fstrings Co-authored-by: Simon Sawicki <accounts@grub4k.xyz>	2024-04-01 20:26:45 +05:30
pukkandan	c414c3d406	Rename	2024-04-01 20:17:23 +05:30
pukkandan	93e83fa261	lint	2024-04-01 07:51:11 +05:30
pukkandan	bce376140f	`make supportedsites`	2024-04-01 07:43:02 +05:30
pukkandan	93efacd098	[ie] Add `_BROKEN_ISSUE`	2024-04-01 07:42:08 +05:30