[ie/vimeo] Fix password-protected video extraction (#10341)

Closes #6603
Authored by: bashonly
This commit is contained in:
bashonly 2024-07-05 13:32:53 -05:00 committed by GitHub
parent 6075a029db
commit c1c9bb4adb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1,6 +1,7 @@
import base64 import base64
import functools import functools
import itertools import itertools
import json
import re import re
import urllib.parse import urllib.parse
@ -14,6 +15,7 @@
determine_ext, determine_ext,
get_element_by_class, get_element_by_class,
int_or_none, int_or_none,
join_nonempty,
js_to_json, js_to_json,
merge_dicts, merge_dicts,
parse_filesize, parse_filesize,
@ -84,29 +86,23 @@ def _get_video_password(self):
expected=True) expected=True)
return password return password
def _verify_video_password(self, url, video_id, password, token, vuid): def _verify_video_password(self, video_id, password, token):
if url.startswith('http://'): url = f'https://vimeo.com/{video_id}'
# vimeo only supports https now, but the user can give an http url try:
url = url.replace('http://', 'https://') return self._download_webpage(
self._set_vimeo_cookie('vuid', vuid) f'{url}/password', video_id,
return self._download_webpage( 'Submitting video password', data=json.dumps({
url + '/password', video_id, 'Verifying the password', 'password': password,
'Wrong password', data=urlencode_postdata({ 'token': token,
'password': password, }, separators=(',', ':')).encode(), headers={
'token': token, 'Accept': '*/*',
}), headers={ 'Content-Type': 'application/json',
'Content-Type': 'application/x-www-form-urlencoded', 'Referer': url,
'Referer': url, }, impersonate=True)
}) except ExtractorError as error:
if isinstance(error.cause, HTTPError) and error.cause.status == 418:
def _extract_xsrft_and_vuid(self, webpage): raise ExtractorError('Wrong password', expected=True)
xsrft = self._search_regex( raise
r'(?:(?P<q1>["\'])xsrft(?P=q1)\s*:|xsrft\s*[=:])\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)',
webpage, 'login token', group='xsrft')
vuid = self._search_regex(
r'["\']vuid["\']\s*:\s*(["\'])(?P<vuid>.+?)\1',
webpage, 'vuid', group='vuid')
return xsrft, vuid
def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs): def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs):
vimeo_config = self._search_regex( vimeo_config = self._search_regex(
@ -745,21 +741,34 @@ def _verify_player_video_password(self, url, video_id, headers):
raise ExtractorError('Wrong video password', expected=True) raise ExtractorError('Wrong video password', expected=True)
return checked return checked
def _extract_from_api(self, video_id, unlisted_hash=None): def _call_videos_api(self, video_id, jwt_token, unlisted_hash=None):
token = self._download_json( return self._download_json(
'https://vimeo.com/_rv/jwt', video_id, headers={ join_nonempty(f'https://api.vimeo.com/videos/{video_id}', unlisted_hash, delim=':'),
'X-Requested-With': 'XMLHttpRequest', video_id, 'Downloading API JSON', headers={
})['token'] 'Authorization': f'jwt {jwt_token}',
api_url = 'https://api.vimeo.com/videos/' + video_id
if unlisted_hash:
api_url += ':' + unlisted_hash
video = self._download_json(
api_url, video_id, headers={
'Authorization': 'jwt ' + token,
'Accept': 'application/json', 'Accept': 'application/json',
}, query={ }, query={
'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays', 'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays',
}) })
def _extract_from_api(self, video_id, unlisted_hash=None):
viewer = self._download_json(
'https://vimeo.com/_next/viewer', video_id, 'Downloading viewer info')
for retry in (False, True):
try:
video = self._call_videos_api(video_id, viewer['jwt'], unlisted_hash)
except ExtractorError as e:
if (not retry and isinstance(e.cause, HTTPError) and e.cause.status == 400
and 'password' in traverse_obj(
e.cause.response.read(),
({bytes.decode}, {json.loads}, 'invalid_parameters', ..., 'field'),
)):
self._verify_video_password(
video_id, self._get_video_password(), viewer['xsrft'])
continue
raise
info = self._parse_config(self._download_json( info = self._parse_config(self._download_json(
video['config_url'], video_id), video_id) video['config_url'], video_id), video_id)
get_timestamp = lambda x: parse_iso8601(video.get(x + '_time')) get_timestamp = lambda x: parse_iso8601(video.get(x + '_time'))
@ -865,12 +874,6 @@ def _real_extract(self, url):
redirect_url, video_id, headers) redirect_url, video_id, headers)
return self._parse_config(config, video_id) return self._parse_config(config, video_id)
if re.search(r'<form[^>]+?id="pw_form"', webpage):
video_password = self._get_video_password()
token, vuid = self._extract_xsrft_and_vuid(webpage)
webpage = self._verify_video_password(
redirect_url, video_id, video_password, token, vuid)
vimeo_config = self._extract_vimeo_config(webpage, video_id, default=None) vimeo_config = self._extract_vimeo_config(webpage, video_id, default=None)
if vimeo_config: if vimeo_config:
seed_status = vimeo_config.get('seed_status') or {} seed_status = vimeo_config.get('seed_status') or {}
@ -1290,9 +1293,7 @@ def _real_extract(self, url):
video_password = self._get_video_password() video_password = self._get_video_password()
viewer = self._download_json( viewer = self._download_json(
'https://vimeo.com/_rv/viewer', video_id) 'https://vimeo.com/_rv/viewer', video_id)
webpage = self._verify_video_password( webpage = self._verify_video_password(video_id, video_password, viewer['xsrft'])
'https://vimeo.com/' + video_id, video_id,
video_password, viewer['xsrft'], viewer['vuid'])
clip_page_config = self._parse_json(self._search_regex( clip_page_config = self._parse_json(self._search_regex(
r'window\.vimeo\.clip_page_config\s*=\s*({.+?});', r'window\.vimeo\.clip_page_config\s*=\s*({.+?});',
webpage, 'clip page config'), video_id) webpage, 'clip page config'), video_id)