Fix lazy extractor bug in fe7866d0ed

and add test

Fixes https://github.com/yt-dlp/yt-dlp/pull/3234#issuecomment-1225347071
This commit is contained in:
pukkandan 2022-08-24 15:10:21 +05:30
parent b5e7a2e69d
commit e5458d1d88
No known key found for this signature in database
GPG key ID: 7EEE9E1E817D0A39
4 changed files with 38 additions and 26 deletions

View file

@ -11,13 +11,16 @@
# These bloat the lazy_extractors, so allow them to passthrough silently
ALLOWED_CLASSMETHODS = {'get_testcases', 'extract_from_webpage'}
_WARNED = False
class LazyLoadMetaClass(type):
def __getattr__(cls, name):
if '_real_class' not in cls.__dict__ and name not in ALLOWED_CLASSMETHODS:
write_string(
'WARNING: Falling back to normal extractor since lazy extractor '
global _WARNED
if ('_real_class' not in cls.__dict__
and name not in ALLOWED_CLASSMETHODS and not _WARNED):
_WARNED = True
write_string('WARNING: Falling back to normal extractor since lazy extractor '
f'{cls.__name__} does not have attribute {name}{bug_reports_message()}\n')
return getattr(cls.real_class, name)

View file

@ -12,7 +12,9 @@
from devscripts.utils import get_filename_args, read_file, write_file
NO_ATTR = object()
STATIC_CLASS_PROPERTIES = ['IE_NAME', 'IE_DESC', 'SEARCH_KEY', '_VALID_URL', '_WORKING', '_NETRC_MACHINE', 'age_limit']
STATIC_CLASS_PROPERTIES = [
'IE_NAME', 'IE_DESC', 'SEARCH_KEY', '_VALID_URL', '_WORKING', '_ENABLED', '_NETRC_MACHINE', 'age_limit'
]
CLASS_METHODS = [
'ie_key', 'working', 'description', 'suitable', '_match_valid_url', '_match_id', 'get_temp_id', 'is_suitable'
]

View file

@ -11,41 +11,46 @@
import contextlib
import subprocess
from yt_dlp.utils import encodeArgument
from yt_dlp.utils import Popen
rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
try:
_DEV_NULL = subprocess.DEVNULL
except AttributeError:
_DEV_NULL = open(os.devnull, 'wb')
LAZY_EXTRACTORS = 'yt_dlp/extractor/lazy_extractors.py'
class TestExecution(unittest.TestCase):
def test_import(self):
subprocess.check_call([sys.executable, '-c', 'import yt_dlp'], cwd=rootDir)
def test_module_exec(self):
subprocess.check_call([sys.executable, '-m', 'yt_dlp', '--ignore-config', '--version'], cwd=rootDir, stdout=_DEV_NULL)
def run_yt_dlp(self, exe=(sys.executable, 'yt_dlp/__main__.py'), opts=('--version', )):
stdout, stderr, returncode = Popen.run(
[*exe, '--ignore-config', *opts], cwd=rootDir, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
print(stderr, file=sys.stderr)
self.assertEqual(returncode, 0)
return stdout.strip(), stderr.strip()
def test_main_exec(self):
subprocess.check_call([sys.executable, 'yt_dlp/__main__.py', '--ignore-config', '--version'], cwd=rootDir, stdout=_DEV_NULL)
self.run_yt_dlp()
def test_import(self):
self.run_yt_dlp(exe=(sys.executable, '-c', 'import yt_dlp'))
def test_module_exec(self):
self.run_yt_dlp(exe=(sys.executable, '-m', 'yt_dlp'))
def test_cmdline_umlauts(self):
p = subprocess.Popen(
[sys.executable, 'yt_dlp/__main__.py', '--ignore-config', encodeArgument('ä'), '--version'],
cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE)
_, stderr = p.communicate()
_, stderr = self.run_yt_dlp(opts=('ä', '--version'))
self.assertFalse(stderr)
def test_lazy_extractors(self):
try:
subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', 'yt_dlp/extractor/lazy_extractors.py'], cwd=rootDir, stdout=_DEV_NULL)
subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=_DEV_NULL)
subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', LAZY_EXTRACTORS],
cwd=rootDir, stdout=subprocess.DEVNULL)
self.assertTrue(os.path.exists(LAZY_EXTRACTORS))
_, stderr = self.run_yt_dlp(opts=('-s', 'test:'))
self.assertFalse(stderr)
subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=subprocess.DEVNULL)
finally:
with contextlib.suppress(OSError):
os.remove('yt_dlp/extractor/lazy_extractors.py')
os.remove(LAZY_EXTRACTORS)
if __name__ == '__main__':

View file

@ -8,12 +8,14 @@ class TestURLIE(InfoExtractor):
""" Allows addressing of the test cases as test:yout.*be_1 """
IE_DESC = False # Do not list
_VALID_URL = r'test(?:url)?:(?P<extractor>.+?)(?:_(?P<num>[0-9]+))?$'
_VALID_URL = r'test(?:url)?:(?P<extractor>.*?)(?:_(?P<num>[0-9]+))?$'
def _real_extract(self, url):
from . import gen_extractor_classes
extractor_id, num = self._match_valid_url(url).group('extractor', 'num')
if not extractor_id:
return {'id': ':test', 'title': '', 'url': url}
rex = re.compile(extractor_id, flags=re.IGNORECASE)
matching_extractors = [e for e in gen_extractor_classes() if rex.search(e.IE_NAME)]