[extractor] Add a way to distinguish IEs that returns only videos

2025-01-05 23:54:24 +00:00 · 2022-11-13 10:56:04 +05:30 · 2022-11-13 10:56:04 +05:30 · 171a31dbe8
parent 83cc7b8aae
commit 171a31dbe8
2 changed files with 20 additions and 0 deletions
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@ -3702,6 +3702,24 @@ def age_limit(cls):
            (*cls.get_testcases(include_onlymatching=False), *cls.get_webpage_testcases()),
            (..., (('playlist', 0), None), 'info_dict', 'age_limit')) or [0])

+    @classproperty(cache=True)
+    def _RETURN_TYPE(cls):
+        """What the extractor returns: "video", "playlist", "any", or None (Unknown)"""
+        tests = tuple(cls.get_testcases(include_onlymatching=False))
+        if not tests:
+            return None
+        elif not any(k.startswith('playlist') for test in tests for k in test):
+            return 'video'
+        elif all(any(k.startswith('playlist') for k in test) for test in tests):
+            return 'playlist'
+        return 'any'
+
+    @classmethod
+    def is_single_video(cls, url):
+        """Returns whether the URL is of a single video, None if unknown"""
+        assert cls.suitable(url), 'The URL must be suitable for the extractor'
+        return {'video': True, 'playlist': False}.get(cls._RETURN_TYPE)
+
    @classmethod
    def is_suitable(cls, age_limit):
        """Test whether the extractor is generally suitable for the given age limit"""
@ -3953,6 +3971,7 @@ class SearchInfoExtractor(InfoExtractor):
    """

    _MAX_RESULTS = float('inf')
+    _RETURN_TYPE = 'playlist'

    @classproperty
    def _VALID_URL(cls):
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@ -1050,6 +1050,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            <a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"
            \s[^>]*\bclass="[^"]*\blazy-load-youtube''',
    ]
+    _RETURN_TYPE = 'video'  # While there are "multifeed" test cases, they don't seem to actually exist anymore

    _PLAYER_INFO_RE = (
        r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',