[extractor] Fix pre-checking archive for some extractors

The `id` regex group must be present for `_match_id` and pre-checking archive to work correctly
2025-01-22 00:36:39 +00:00 · 2021-06-06 15:05:07 +05:30 · 2021-06-06 15:05:07 +05:30 · d3d8d8184a
parent e85a39717a
commit d3d8d8184a
4 changed files with 6 additions and 6 deletions
--- a/yt_dlp/extractor/awaan.py
+++ b/yt_dlp/extractor/awaan.py
@ -19,7 +19,7 @@


 class AWAANIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?'
+    _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<id>\d+)/(?P<season_id>\d+))?'

    def _real_extract(self, url):
        show_id, video_id, season_id = re.match(self._VALID_URL, url).groups()
--- a/yt_dlp/extractor/crunchyroll.py
+++ b/yt_dlp/extractor/crunchyroll.py
@ -120,7 +120,7 @@ def _add_skip_wall(url):

 class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
    IE_NAME = 'crunchyroll'
-    _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|(?:[^/]*/){1,2}[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
+    _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|(?:[^/]*/){1,2}[^/?&]*?)(?P<id>[0-9]+))(?:[/?&]|$)'
    _TESTS = [{
        'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
        'info_dict': {
@ -413,7 +413,7 @@ def _get_subtitles(self, video_id, webpage):

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('video_id')
+        video_id = mobj.group('id')

        if mobj.group('prefix') == 'm':
            mobile_webpage = self._download_webpage(url, video_id, 'Downloading mobile webpage')
--- a/yt_dlp/extractor/metacafe.py
+++ b/yt_dlp/extractor/metacafe.py
@ -19,7 +19,7 @@


 class MetacafeIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?metacafe\.com/watch/(?P<video_id>[^/]+)/(?P<display_id>[^/?#]+)'
+    _VALID_URL = r'https?://(?:www\.)?metacafe\.com/watch/(?P<id>[^/]+)/(?P<display_id>[^/?#]+)'
    _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
    _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
    IE_NAME = 'metacafe'
--- a/yt_dlp/extractor/sina.py
+++ b/yt_dlp/extractor/sina.py
@ -18,7 +18,7 @@
 class SinaIE(InfoExtractor):
    _VALID_URL = r'''(?x)https?://(?:.*?\.)?video\.sina\.com\.cn/
                        (?:
-                            (?:view/|.*\#)(?P<video_id>\d+)|
+                            (?:view/|.*\#)(?P<id>\d+)|
                            .+?/(?P<pseudo_id>[^/?#]+)(?:\.s?html)|
                            # This is used by external sites like Weibo
                            api/sinawebApi/outplay.php/(?P<token>.+?)\.swf
@ -58,7 +58,7 @@ class SinaIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)

-        video_id = mobj.group('video_id')
+        video_id = mobj.group('id')
        if not video_id:
            if mobj.group('token') is not None:
                # The video id is in the redirected url