Fix lazy extractor bug in fe7866d0ed

and add test

Fixes https://github.com/yt-dlp/yt-dlp/pull/3234#issuecomment-1225347071
This commit is contained in:
pukkandan 2022-08-24 15:10:21 +05:30
parent b5e7a2e69d
commit e5458d1d88
No known key found for this signature in database
GPG key ID: 7EEE9E1E817D0A39
4 changed files with 38 additions and 26 deletions

View file

@ -11,13 +11,16 @@
# These bloat the lazy_extractors, so allow them to passthrough silently # These bloat the lazy_extractors, so allow them to passthrough silently
ALLOWED_CLASSMETHODS = {'get_testcases', 'extract_from_webpage'} ALLOWED_CLASSMETHODS = {'get_testcases', 'extract_from_webpage'}
_WARNED = False
class LazyLoadMetaClass(type): class LazyLoadMetaClass(type):
def __getattr__(cls, name): def __getattr__(cls, name):
if '_real_class' not in cls.__dict__ and name not in ALLOWED_CLASSMETHODS: global _WARNED
write_string( if ('_real_class' not in cls.__dict__
'WARNING: Falling back to normal extractor since lazy extractor ' and name not in ALLOWED_CLASSMETHODS and not _WARNED):
_WARNED = True
write_string('WARNING: Falling back to normal extractor since lazy extractor '
f'{cls.__name__} does not have attribute {name}{bug_reports_message()}\n') f'{cls.__name__} does not have attribute {name}{bug_reports_message()}\n')
return getattr(cls.real_class, name) return getattr(cls.real_class, name)

View file

@ -12,7 +12,9 @@
from devscripts.utils import get_filename_args, read_file, write_file from devscripts.utils import get_filename_args, read_file, write_file
NO_ATTR = object() NO_ATTR = object()
STATIC_CLASS_PROPERTIES = ['IE_NAME', 'IE_DESC', 'SEARCH_KEY', '_VALID_URL', '_WORKING', '_NETRC_MACHINE', 'age_limit'] STATIC_CLASS_PROPERTIES = [
'IE_NAME', 'IE_DESC', 'SEARCH_KEY', '_VALID_URL', '_WORKING', '_ENABLED', '_NETRC_MACHINE', 'age_limit'
]
CLASS_METHODS = [ CLASS_METHODS = [
'ie_key', 'working', 'description', 'suitable', '_match_valid_url', '_match_id', 'get_temp_id', 'is_suitable' 'ie_key', 'working', 'description', 'suitable', '_match_valid_url', '_match_id', 'get_temp_id', 'is_suitable'
] ]

View file

@ -11,41 +11,46 @@
import contextlib import contextlib
import subprocess import subprocess
from yt_dlp.utils import encodeArgument from yt_dlp.utils import Popen
rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
LAZY_EXTRACTORS = 'yt_dlp/extractor/lazy_extractors.py'
try:
_DEV_NULL = subprocess.DEVNULL
except AttributeError:
_DEV_NULL = open(os.devnull, 'wb')
class TestExecution(unittest.TestCase): class TestExecution(unittest.TestCase):
def test_import(self): def run_yt_dlp(self, exe=(sys.executable, 'yt_dlp/__main__.py'), opts=('--version', )):
subprocess.check_call([sys.executable, '-c', 'import yt_dlp'], cwd=rootDir) stdout, stderr, returncode = Popen.run(
[*exe, '--ignore-config', *opts], cwd=rootDir, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
def test_module_exec(self): print(stderr, file=sys.stderr)
subprocess.check_call([sys.executable, '-m', 'yt_dlp', '--ignore-config', '--version'], cwd=rootDir, stdout=_DEV_NULL) self.assertEqual(returncode, 0)
return stdout.strip(), stderr.strip()
def test_main_exec(self): def test_main_exec(self):
subprocess.check_call([sys.executable, 'yt_dlp/__main__.py', '--ignore-config', '--version'], cwd=rootDir, stdout=_DEV_NULL) self.run_yt_dlp()
def test_import(self):
self.run_yt_dlp(exe=(sys.executable, '-c', 'import yt_dlp'))
def test_module_exec(self):
self.run_yt_dlp(exe=(sys.executable, '-m', 'yt_dlp'))
def test_cmdline_umlauts(self): def test_cmdline_umlauts(self):
p = subprocess.Popen( _, stderr = self.run_yt_dlp(opts=('ä', '--version'))
[sys.executable, 'yt_dlp/__main__.py', '--ignore-config', encodeArgument('ä'), '--version'],
cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE)
_, stderr = p.communicate()
self.assertFalse(stderr) self.assertFalse(stderr)
def test_lazy_extractors(self): def test_lazy_extractors(self):
try: try:
subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', 'yt_dlp/extractor/lazy_extractors.py'], cwd=rootDir, stdout=_DEV_NULL) subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', LAZY_EXTRACTORS],
subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=_DEV_NULL) cwd=rootDir, stdout=subprocess.DEVNULL)
self.assertTrue(os.path.exists(LAZY_EXTRACTORS))
_, stderr = self.run_yt_dlp(opts=('-s', 'test:'))
self.assertFalse(stderr)
subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=subprocess.DEVNULL)
finally: finally:
with contextlib.suppress(OSError): with contextlib.suppress(OSError):
os.remove('yt_dlp/extractor/lazy_extractors.py') os.remove(LAZY_EXTRACTORS)
if __name__ == '__main__': if __name__ == '__main__':

View file

@ -8,12 +8,14 @@ class TestURLIE(InfoExtractor):
""" Allows addressing of the test cases as test:yout.*be_1 """ """ Allows addressing of the test cases as test:yout.*be_1 """
IE_DESC = False # Do not list IE_DESC = False # Do not list
_VALID_URL = r'test(?:url)?:(?P<extractor>.+?)(?:_(?P<num>[0-9]+))?$' _VALID_URL = r'test(?:url)?:(?P<extractor>.*?)(?:_(?P<num>[0-9]+))?$'
def _real_extract(self, url): def _real_extract(self, url):
from . import gen_extractor_classes from . import gen_extractor_classes
extractor_id, num = self._match_valid_url(url).group('extractor', 'num') extractor_id, num = self._match_valid_url(url).group('extractor', 'num')
if not extractor_id:
return {'id': ':test', 'title': '', 'url': url}
rex = re.compile(extractor_id, flags=re.IGNORECASE) rex = re.compile(extractor_id, flags=re.IGNORECASE)
matching_extractors = [e for e in gen_extractor_classes() if rex.search(e.IE_NAME)] matching_extractors = [e for e in gen_extractor_classes() if rex.search(e.IE_NAME)]