From 2eaf303b637361ecd27cceaab41eb86c540d1bfc Mon Sep 17 00:00:00 2001 From: Lucas Rademaker <44430780+lr4d@users.noreply.github.com> Date: Fri, 18 Oct 2024 11:50:17 +0700 Subject: [PATCH] [ie/Zoom] gh-7784 fix password handling logic --- yt_dlp/extractor/zoom.py | 54 +++++++++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 17 deletions(-) diff --git a/yt_dlp/extractor/zoom.py b/yt_dlp/extractor/zoom.py index a577af8181..12a4c0938c 100644 --- a/yt_dlp/extractor/zoom.py +++ b/yt_dlp/extractor/zoom.py @@ -7,6 +7,7 @@ parse_resolution, str_or_none, traverse_obj, + update_url, url_basename, urlencode_postdata, urljoin, @@ -62,39 +63,59 @@ def _get_page_data(self, webpage, video_id): return self._search_json( r'window\.__data__\s*=', webpage, 'data', video_id, transform_source=js_to_json) - def _get_real_webpage(self, url, base_url, video_id, url_type): - webpage = self._download_webpage(url, video_id, note=f'Downloading {url_type} webpage') - try: - form = self._form_hidden_inputs('password_form', webpage) - except ExtractorError: - return webpage - + def _try_login(self, url, base_url, video_id, form): + # This will most likely only work for password-protected meetings password = self.get_param('videopassword') if not password: raise ExtractorError( 'This video is protected by a passcode, use the --video-password option', expected=True) + is_meeting = form.get('useWhichPasswd') == 'meeting' validation = self._download_json( - base_url + 'rec/validate%s_passwd' % ('_meet' if is_meeting else ''), + base_url + 'nws/recording/1.0/validate%s-passwd' % ('-meeting' if is_meeting else ''), video_id, 'Validating passcode', 'Wrong passcode', data=urlencode_postdata({ - 'id': form[('meet' if is_meeting else 'file') + 'Id'], + 'id': form[('meeting' if is_meeting else 'file') + '_id'], 'passwd': password, 'action': form.get('action'), })) + if not validation.get('status'): raise ExtractorError(validation['errorMessage'], expected=True) - return self._download_webpage(url, video_id, note=f'Re-downloading {url_type} webpage') + + def _get_real_webpage(self, url, base_url, video_id, url_type): + webpage = self._download_webpage(url, video_id, note=f'Downloading {url_type} webpage') + + data = self._get_page_data(webpage, video_id) + if data.get('componentName') != 'need-password': # not password protected + return webpage + + # Password-protected: + self._try_login(url, base_url, video_id, form=data) + # Return the new HTML document + new_url = f"{base_url}rec/share/{data['meeting_id']}" + return self._download_webpage(new_url, video_id, note=f'Re-downloading {url_type} webpage') + + def _get_share_redirect_url(self, url, base_url, video_id): + """Converts a `/rec/share` url to the corresponding `/rec/play` url, performs login if necessary""" + webpage = self._get_real_webpage(url, base_url, video_id, 'share') + meeting_id = self._get_page_data(webpage, video_id)['meetingId'] + redirect_dict = self._download_json( + f'{base_url}nws/recording/1.0/play/share-info/{meeting_id}', + video_id, note='Downloading share info JSON')['result'] + redirect_path = redirect_dict.pop('redirectUrl') + url = update_url(urljoin(base_url, redirect_path), query_update=redirect_dict) + + if redirect_dict.get('componentName') == 'need-password': + # First login, then return redirection URL + return self._get_share_redirect_url(url, base_url, video_id) + + return url def _real_extract(self, url): base_url, url_type, video_id = self._match_valid_url(url).group('base_url', 'type', 'id') if url_type == 'share': - webpage = self._get_real_webpage(url, base_url, video_id, 'share') - meeting_id = self._get_page_data(webpage, video_id)['meetingId'] - redirect_path = self._download_json( - f'{base_url}nws/recording/1.0/play/share-info/{meeting_id}', - video_id, note='Downloading share info JSON')['result']['redirectUrl'] - url = urljoin(base_url, redirect_path) + url = self._get_share_redirect_url(url, base_url, video_id) webpage = self._get_real_webpage(url, base_url, video_id, 'play') file_id = self._get_page_data(webpage, video_id)['fileId'] @@ -107,7 +128,6 @@ def _real_extract(self, url): 'continueMode': 'true', # Makes this return value include interpreter audio information }, note='Downloading play info JSON')['result'] - subtitles = {} # XXX: Would be more appropriate to parse chapters separate from subtitles for _type in ('transcript', 'cc', 'chapter'):