From d3d8d8184aafc22ed11edcd3ac175482fbb468e7 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 6 Jun 2021 15:05:07 +0530 Subject: [PATCH] [extractor] Fix pre-checking archive for some extractors The `id` regex group must be present for `_match_id` and pre-checking archive to work correctly --- yt_dlp/extractor/awaan.py | 2 +- yt_dlp/extractor/crunchyroll.py | 4 ++-- yt_dlp/extractor/metacafe.py | 2 +- yt_dlp/extractor/sina.py | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/awaan.py b/yt_dlp/extractor/awaan.py index 3a7700cd4..822136dfb 100644 --- a/yt_dlp/extractor/awaan.py +++ b/yt_dlp/extractor/awaan.py @@ -19,7 +19,7 @@ class AWAANIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?show/(?P\d+)/[^/]+(?:/(?P\d+)/(?P\d+))?' + _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?show/(?P\d+)/[^/]+(?:/(?P\d+)/(?P\d+))?' def _real_extract(self, url): show_id, video_id, season_id = re.match(self._VALID_URL, url).groups() diff --git a/yt_dlp/extractor/crunchyroll.py b/yt_dlp/extractor/crunchyroll.py index d6c3f4f93..ec76ad1b2 100644 --- a/yt_dlp/extractor/crunchyroll.py +++ b/yt_dlp/extractor/crunchyroll.py @@ -120,7 +120,7 @@ def _add_skip_wall(url): class CrunchyrollIE(CrunchyrollBaseIE, VRVIE): IE_NAME = 'crunchyroll' - _VALID_URL = r'https?://(?:(?Pwww|m)\.)?(?Pcrunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|(?:[^/]*/){1,2}[^/?&]*?)(?P[0-9]+))(?:[/?&]|$)' + _VALID_URL = r'https?://(?:(?Pwww|m)\.)?(?Pcrunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|(?:[^/]*/){1,2}[^/?&]*?)(?P[0-9]+))(?:[/?&]|$)' _TESTS = [{ 'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513', 'info_dict': { @@ -413,7 +413,7 @@ def _get_subtitles(self, video_id, webpage): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('video_id') + video_id = mobj.group('id') if mobj.group('prefix') == 'm': mobile_webpage = self._download_webpage(url, video_id, 'Downloading mobile webpage') diff --git a/yt_dlp/extractor/metacafe.py b/yt_dlp/extractor/metacafe.py index 9e92416d1..6366028d2 100644 --- a/yt_dlp/extractor/metacafe.py +++ b/yt_dlp/extractor/metacafe.py @@ -19,7 +19,7 @@ class MetacafeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?metacafe\.com/watch/(?P[^/]+)/(?P[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?metacafe\.com/watch/(?P[^/]+)/(?P[^/?#]+)' _DISCLAIMER = 'http://www.metacafe.com/family_filter/' _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user' IE_NAME = 'metacafe' diff --git a/yt_dlp/extractor/sina.py b/yt_dlp/extractor/sina.py index 60f2dd053..408311418 100644 --- a/yt_dlp/extractor/sina.py +++ b/yt_dlp/extractor/sina.py @@ -18,7 +18,7 @@ class SinaIE(InfoExtractor): _VALID_URL = r'''(?x)https?://(?:.*?\.)?video\.sina\.com\.cn/ (?: - (?:view/|.*\#)(?P\d+)| + (?:view/|.*\#)(?P\d+)| .+?/(?P[^/?#]+)(?:\.s?html)| # This is used by external sites like Weibo api/sinawebApi/outplay.php/(?P.+?)\.swf @@ -58,7 +58,7 @@ class SinaIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('video_id') + video_id = mobj.group('id') if not video_id: if mobj.group('token') is not None: # The video id is in the redirected url