From 8b7491c8d1ce52af856c224c029b2d577323fe6a Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 23 Aug 2021 05:26:45 +0530 Subject: [PATCH] Fix `add_info_extractor` when used via API Bug from: 251ae04e6a057167e4eafaf8b7b565a984b48405 --- devscripts/make_lazy_extractors.py | 2 +- yt_dlp/YoutubeDL.py | 28 +++++++++++++++++----------- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py index d313e68a98..e7b024490c 100644 --- a/devscripts/make_lazy_extractors.py +++ b/devscripts/make_lazy_extractors.py @@ -66,7 +66,7 @@ def build_lazy_ie(ie, name): if valid_url: s += f' _VALID_URL = {valid_url!r}\n' if not ie._WORKING: - s += f' _WORKING = False\n' + s += ' _WORKING = False\n' if ie.suitable.__func__ is not InfoExtractor.suitable.__func__: s += f'\n{getsource(ie.suitable)}' if hasattr(ie, '_make_valid_url'): diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 5d40eb3337..422b26ffe9 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -461,7 +461,7 @@ class YoutubeDL(object): )) params = None - _ies = [] + _ies = {} _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []} _printed_messages = set() _first_webpage_request = True @@ -475,7 +475,7 @@ def __init__(self, params=None, auto_init=True): """Create a FileDownloader object with the given options.""" if params is None: params = {} - self._ies = [] + self._ies = {} self._ies_instances = {} self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []} self._printed_messages = set() @@ -631,11 +631,19 @@ def warn_if_short_id(self, argv): def add_info_extractor(self, ie): """Add an InfoExtractor object to the end of the list.""" - self._ies.append(ie) + ie_key = ie.ie_key() + self._ies[ie_key] = ie if not isinstance(ie, type): - self._ies_instances[ie.ie_key()] = ie + self._ies_instances[ie_key] = ie ie.set_downloader(self) + def _get_info_extractor_class(self, ie_key): + ie = self._ies.get(ie_key) + if ie is None: + ie = get_info_extractor(ie_key) + self.add_info_extractor(ie) + return ie + def get_info_extractor(self, ie_key): """ Get an instance of an IE with name ie_key, it will try to get one from @@ -1179,15 +1187,14 @@ def extract_info(self, url, download=True, ie_key=None, extra_info={}, ie_key = 'Generic' if ie_key: - ies = [get_info_extractor(ie_key)] + ies = {ie_key: self._get_info_extractor_class(ie_key)} else: ies = self._ies - for ie in ies: + for ie_key, ie in ies.items(): if not ie.suitable(url): continue - ie_key = ie.ie_key() if not ie.working(): self.report_warning('The program functionality for this site has been marked as broken, ' 'and will probably not work.') @@ -1197,8 +1204,7 @@ def extract_info(self, url, download=True, ie_key=None, extra_info={}, self.to_screen("[%s] %s: has already been recorded in archive" % ( ie_key, temp_id)) break - return self.__extract_info(url, self.get_info_extractor(ie.ie_key()), - download, extra_info, process) + return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process) else: self.report_error('no suitable InfoExtractor for URL %s' % url) @@ -3025,9 +3031,9 @@ def _make_archive_id(self, info_dict): if not url: return # Try to find matching extractor for the URL and take its ie_key - for ie in self._ies: + for ie_key, ie in self._ies.items(): if ie.suitable(url): - extractor = ie.ie_key() + extractor = ie_key break else: return