[extractor] Support multiple _VALID_URLs (#5812)

Authored by: nixxo
2025-01-05 23:54:24 +00:00 · 2023-06-22 02:57:00 +05:30 · 2023-06-22 02:57:00 +05:30 · 5fd8367496
parent 0dff8e4d1e
commit 5fd8367496
2 changed files with 5 additions and 4 deletions
--- a/devscripts/lazy_load_template.py
+++ b/devscripts/lazy_load_template.py
@ -6,6 +6,7 @@
    age_restricted,
    bug_reports_message,
    classproperty,
+    variadic,
    write_string,
 )

--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@ -475,8 +475,8 @@ class InfoExtractor:


    Subclasses of this should also be added to the list of extractors and
-    should define a _VALID_URL regexp and, re-define the _real_extract() and
-    (optionally) _real_initialize() methods.
+    should define _VALID_URL as a regexp or a Sequence of regexps, and
+    re-define the _real_extract() and (optionally) _real_initialize() methods.

    Subclasses may also override suitable() if necessary, but ensure the function
    signature is preserved and that this function imports everything it needs
@ -566,8 +566,8 @@ def _match_valid_url(cls, url):
        # we have cached the regexp for *this* class, whereas getattr would also
        # match the superclass
        if '_VALID_URL_RE' not in cls.__dict__:
-            cls._VALID_URL_RE = re.compile(cls._VALID_URL)
-        return cls._VALID_URL_RE.match(url)
+            cls._VALID_URL_RE = tuple(map(re.compile, variadic(cls._VALID_URL)))
+        return next(filter(None, (regex.match(url) for regex in cls._VALID_URL_RE)), None)

    @classmethod
    def suitable(cls, url):