[vimeo] Detect source extension

and misc cleanup

Cherry-picked from #1477
Closes #1402

Authored by: flashdagger
This commit is contained in:
pukkandan 2021-11-06 02:15:08 +05:30
parent 2a6f8475ac
commit 9cb070f9c0
No known key found for this signature in database
GPG key ID: 0F00D95A001F4698

View file

@ -18,6 +18,7 @@
determine_ext, determine_ext,
ExtractorError, ExtractorError,
get_element_by_class, get_element_by_class,
HEADRequest,
js_to_json, js_to_json,
int_or_none, int_or_none,
merge_dicts, merge_dicts,
@ -35,6 +36,7 @@
urlencode_postdata, urlencode_postdata,
urljoin, urljoin,
unescapeHTML, unescapeHTML,
urlhandle_detect_ext,
) )
@ -229,27 +231,26 @@ def _extract_original_format(self, url, video_id, unlisted_hash=None):
query['unlisted_hash'] = unlisted_hash query['unlisted_hash'] = unlisted_hash
download_data = self._download_json( download_data = self._download_json(
url, video_id, fatal=False, query=query, url, video_id, fatal=False, query=query,
headers={'X-Requested-With': 'XMLHttpRequest'}) headers={'X-Requested-With': 'XMLHttpRequest'},
if download_data: expected_status=(403, 404)) or {}
source_file = download_data.get('source_file') source_file = download_data.get('source_file')
if isinstance(source_file, dict): download_url = try_get(source_file, lambda x: x['download_url'])
download_url = source_file.get('download_url') if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'):
if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'): source_name = source_file.get('public_name', 'Original')
source_name = source_file.get('public_name', 'Original') if self._is_valid_url(download_url, video_id, '%s video' % source_name):
if self._is_valid_url(download_url, video_id, '%s video' % source_name): ext = (try_get(
ext = (try_get( source_file, lambda x: x['extension'],
source_file, lambda x: x['extension'], compat_str) or determine_ext(
compat_str) or determine_ext( download_url, None) or 'mp4').lower()
download_url, None) or 'mp4').lower() return {
return { 'url': download_url,
'url': download_url, 'ext': ext,
'ext': ext, 'width': int_or_none(source_file.get('width')),
'width': int_or_none(source_file.get('width')), 'height': int_or_none(source_file.get('height')),
'height': int_or_none(source_file.get('height')), 'filesize': parse_filesize(source_file.get('size')),
'filesize': parse_filesize(source_file.get('size')), 'format_id': source_name,
'format_id': source_name, 'quality': 1,
'quality': 1, }
}
jwt_response = self._download_json( jwt_response = self._download_json(
'https://vimeo.com/_rv/viewer', video_id, note='Downloading jwt token', fatal=False) or {} 'https://vimeo.com/_rv/viewer', video_id, note='Downloading jwt token', fatal=False) or {}
@ -258,15 +259,19 @@ def _extract_original_format(self, url, video_id, unlisted_hash=None):
headers = {'Authorization': 'jwt %s' % jwt_response['jwt']} headers = {'Authorization': 'jwt %s' % jwt_response['jwt']}
original_response = self._download_json( original_response = self._download_json(
f'https://api.vimeo.com/videos/{video_id}', video_id, f'https://api.vimeo.com/videos/{video_id}', video_id,
headers=headers, fatal=False) or {} headers=headers, fatal=False, expected_status=(403, 404)) or {}
for download_data in original_response.get('download') or {}: for download_data in original_response.get('download') or []:
download_url = download_data.get('link') download_url = download_data.get('link')
if not download_url or download_data.get('quality') != 'source': if not download_url or download_data.get('quality') != 'source':
continue continue
query = parse_qs(download_url) ext = determine_ext(parse_qs(download_url).get('filename', [''])[0].lower(), default_ext=None)
if not ext:
urlh = self._request_webpage(
HEADRequest(download_url), video_id, fatal=False, note='Determining source extension')
ext = urlh and urlhandle_detect_ext(urlh)
return { return {
'url': download_url, 'url': download_url,
'ext': determine_ext(query.get('filename', [''])[0].lower()), 'ext': ext or 'unknown_video',
'format_id': download_data.get('public_name', 'Original'), 'format_id': download_data.get('public_name', 'Original'),
'width': int_or_none(download_data.get('width')), 'width': int_or_none(download_data.get('width')),
'height': int_or_none(download_data.get('height')), 'height': int_or_none(download_data.get('height')),
@ -362,7 +367,6 @@ class VimeoIE(VimeoBaseInfoExtractor):
'params': { 'params': {
'format': 'best[protocol=https]', 'format': 'best[protocol=https]',
}, },
'expected_warnings': ['Unable to download JSON metadata'],
}, },
{ {
'url': 'http://vimeo.com/68375962', 'url': 'http://vimeo.com/68375962',
@ -402,7 +406,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'upload_date': '20130928', 'upload_date': '20130928',
'duration': 187, 'duration': 187,
}, },
'expected_warnings': ['Unable to download JSON metadata'], 'params': {'format': 'http-1080p'},
}, },
{ {
'url': 'http://vimeo.com/76979871', 'url': 'http://vimeo.com/76979871',
@ -424,7 +428,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
'es': [{'ext': 'vtt'}], 'es': [{'ext': 'vtt'}],
'fr': [{'ext': 'vtt'}], 'fr': [{'ext': 'vtt'}],
}, },
} },
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
}, },
{ {
# from https://www.ouya.tv/game/Pier-Solar-and-the-Great-Architects/ # from https://www.ouya.tv/game/Pier-Solar-and-the-Great-Architects/
@ -469,7 +474,6 @@ class VimeoIE(VimeoBaseInfoExtractor):
'description': 'md5:f2edc61af3ea7a5592681ddbb683db73', 'description': 'md5:f2edc61af3ea7a5592681ddbb683db73',
'upload_date': '20200225', 'upload_date': '20200225',
}, },
'expected_warnings': ['Unable to download JSON metadata'],
}, },
{ {
# only available via https://vimeo.com/channels/tributes/6213729 and # only available via https://vimeo.com/channels/tributes/6213729 and
@ -491,7 +495,6 @@ class VimeoIE(VimeoBaseInfoExtractor):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'expected_warnings': ['Unable to download JSON metadata'],
}, },
{ {
# redirects to ondemand extractor and should be passed through it # redirects to ondemand extractor and should be passed through it
@ -511,7 +514,6 @@ class VimeoIE(VimeoBaseInfoExtractor):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'expected_warnings': ['Unable to download JSON metadata'],
'skip': 'this page is no longer available.', 'skip': 'this page is no longer available.',
}, },
{ {
@ -583,12 +585,30 @@ class VimeoIE(VimeoBaseInfoExtractor):
'timestamp': 1459259666, 'timestamp': 1459259666,
'upload_date': '20160329', 'upload_date': '20160329',
}, },
'params': {'skip_download': True},
},
{
'url': 'https://vimeo.com/138909882',
'info_dict': {
'id': '138909882',
'ext': 'mp4',
'title': 'Eastnor Castle 2015 Firework Champions - The Promo!',
'description': 'md5:5967e090768a831488f6e74b7821b3c1',
'uploader_id': 'fireworkchampions',
'uploader': 'Firework Champions',
'upload_date': '20150910',
'timestamp': 1441901895,
},
'params': {
'skip_download': True,
'format': 'Original',
},
}, },
{ {
# requires passing unlisted_hash(a52724358e) to load_download_config request # requires passing unlisted_hash(a52724358e) to load_download_config request
'url': 'https://vimeo.com/392479337/a52724358e', 'url': 'https://vimeo.com/392479337/a52724358e',
'only_matching': True, 'only_matching': True,
} },
# https://gettingthingsdone.com/workflowmap/ # https://gettingthingsdone.com/workflowmap/
# vimeo embed with check-password page protected by Referer header # vimeo embed with check-password page protected by Referer header
] ]
@ -1110,10 +1130,10 @@ class VimeoGroupsIE(VimeoChannelIE):
IE_NAME = 'vimeo:group' IE_NAME = 'vimeo:group'
_VALID_URL = r'https://vimeo\.com/groups/(?P<id>[^/]+)(?:/(?!videos?/\d+)|$)' _VALID_URL = r'https://vimeo\.com/groups/(?P<id>[^/]+)(?:/(?!videos?/\d+)|$)'
_TESTS = [{ _TESTS = [{
'url': 'https://vimeo.com/groups/kattykay', 'url': 'https://vimeo.com/groups/meetup',
'info_dict': { 'info_dict': {
'id': 'kattykay', 'id': 'meetup',
'title': 'Katty Kay', 'title': 'Vimeo Meetup!',
}, },
'playlist_mincount': 27, 'playlist_mincount': 27,
}] }]
@ -1135,7 +1155,6 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
'uploader_id': 'user21297594', 'uploader_id': 'user21297594',
'description': "Comedian Dick Hardwick's five minute demo filmed in front of a live theater audience.\nEdit by Doug Mattocks", 'description': "Comedian Dick Hardwick's five minute demo filmed in front of a live theater audience.\nEdit by Doug Mattocks",
}, },
'expected_warnings': ['Unable to download JSON metadata'],
}, { }, {
'note': 'video player needs Referer', 'note': 'video player needs Referer',
'url': 'https://vimeo.com/user22258446/review/91613211/13f927e053', 'url': 'https://vimeo.com/user22258446/review/91613211/13f927e053',