diff --git a/yt_dlp/extractor/glomex.py b/yt_dlp/extractor/glomex.py index 1d387bdfd..a6477faab 100644 --- a/yt_dlp/extractor/glomex.py +++ b/yt_dlp/extractor/glomex.py @@ -198,8 +198,13 @@ def _extract_urls(cls, webpage, origin_url): )+ )''' % {'quot_re': r'["\']', 'url_re': VALID_SRC} - for mobj in re.finditer(EMBED_RE, webpage): - mdict = mobj.groupdict() + for mtup in re.findall(EMBED_RE, webpage): + # re.finditer causes a memory spike. See https://github.com/yt-dlp/yt-dlp/issues/2512 + mdict = dict(zip(( + 'url', '_', + 'html_tag', '_', 'integration_html', '_', 'id_html', '_', 'glomex_player', + 'script_tag', '_', '_', 'integration_js', '_', 'id_js', + ), mtup)) if mdict.get('url'): url = unescapeHTML(mdict['url']) if not cls.suitable(url):