mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-27 10:31:29 +00:00
[utils] sanitize_path
: Reimplement function (#11198)
Authored by: Grub4K
This commit is contained in:
parent
16eb28026a
commit
85b87c991a
|
@ -221,9 +221,10 @@ def test_sanitize_ids(self):
|
||||||
self.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI')
|
self.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI')
|
||||||
|
|
||||||
def test_sanitize_path(self):
|
def test_sanitize_path(self):
|
||||||
if sys.platform != 'win32':
|
with unittest.mock.patch('sys.platform', 'win32'):
|
||||||
return
|
self._test_sanitize_path()
|
||||||
|
|
||||||
|
def _test_sanitize_path(self):
|
||||||
self.assertEqual(sanitize_path('abc'), 'abc')
|
self.assertEqual(sanitize_path('abc'), 'abc')
|
||||||
self.assertEqual(sanitize_path('abc/def'), 'abc\\def')
|
self.assertEqual(sanitize_path('abc/def'), 'abc\\def')
|
||||||
self.assertEqual(sanitize_path('abc\\def'), 'abc\\def')
|
self.assertEqual(sanitize_path('abc\\def'), 'abc\\def')
|
||||||
|
@ -256,6 +257,11 @@ def test_sanitize_path(self):
|
||||||
self.assertEqual(sanitize_path('./abc'), 'abc')
|
self.assertEqual(sanitize_path('./abc'), 'abc')
|
||||||
self.assertEqual(sanitize_path('./../abc'), '..\\abc')
|
self.assertEqual(sanitize_path('./../abc'), '..\\abc')
|
||||||
|
|
||||||
|
self.assertEqual(sanitize_path('\\abc'), '\\abc')
|
||||||
|
self.assertEqual(sanitize_path('C:abc'), 'C:abc')
|
||||||
|
self.assertEqual(sanitize_path('C:abc\\..\\'), 'C:..')
|
||||||
|
self.assertEqual(sanitize_path('C:\\abc:%(title)s.%(ext)s'), 'C:\\abc#%(title)s.%(ext)s')
|
||||||
|
|
||||||
def test_sanitize_url(self):
|
def test_sanitize_url(self):
|
||||||
self.assertEqual(sanitize_url('//foo.bar'), 'http://foo.bar')
|
self.assertEqual(sanitize_url('//foo.bar'), 'http://foo.bar')
|
||||||
self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar')
|
self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar')
|
||||||
|
|
|
@ -664,31 +664,51 @@ def replace_insane(char):
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _sanitize_path_parts(parts):
|
||||||
|
sanitized_parts = []
|
||||||
|
for part in parts:
|
||||||
|
if not part or part == '.':
|
||||||
|
continue
|
||||||
|
elif part == '..':
|
||||||
|
if sanitized_parts and sanitized_parts[-1] != '..':
|
||||||
|
sanitized_parts.pop()
|
||||||
|
sanitized_parts.append('..')
|
||||||
|
continue
|
||||||
|
# Replace invalid segments with `#`
|
||||||
|
# - trailing dots and spaces (`asdf...` => `asdf..#`)
|
||||||
|
# - invalid chars (`<>` => `##`)
|
||||||
|
sanitized_part = re.sub(r'[/<>:"\|\\?\*]|[\s.]$', '#', part)
|
||||||
|
sanitized_parts.append(sanitized_part)
|
||||||
|
|
||||||
|
return sanitized_parts
|
||||||
|
|
||||||
|
|
||||||
def sanitize_path(s, force=False):
|
def sanitize_path(s, force=False):
|
||||||
"""Sanitizes and normalizes path on Windows"""
|
"""Sanitizes and normalizes path on Windows"""
|
||||||
# XXX: this handles drive relative paths (c:sth) incorrectly
|
if sys.platform != 'win32':
|
||||||
if sys.platform == 'win32':
|
if not force:
|
||||||
force = False
|
|
||||||
drive_or_unc, _ = os.path.splitdrive(s)
|
|
||||||
elif force:
|
|
||||||
drive_or_unc = ''
|
|
||||||
else:
|
|
||||||
return s
|
return s
|
||||||
|
root = '/' if s.startswith('/') else ''
|
||||||
|
return root + '/'.join(_sanitize_path_parts(s.split('/')))
|
||||||
|
|
||||||
norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
|
normed = s.replace('/', '\\')
|
||||||
if drive_or_unc:
|
|
||||||
norm_path.pop(0)
|
if normed.startswith('\\\\'):
|
||||||
sanitized_path = [
|
# UNC path (`\\SERVER\SHARE`) or device path (`\\.`, `\\?`)
|
||||||
path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
|
parts = normed.split('\\')
|
||||||
for path_part in norm_path]
|
root = '\\'.join(parts[:4]) + '\\'
|
||||||
if drive_or_unc:
|
parts = parts[4:]
|
||||||
sanitized_path.insert(0, drive_or_unc + os.path.sep)
|
elif normed[1:2] == ':':
|
||||||
elif force and s and s[0] == os.path.sep:
|
# absolute path or drive relative path
|
||||||
sanitized_path.insert(0, os.path.sep)
|
offset = 3 if normed[2:3] == '\\' else 2
|
||||||
# TODO: Fix behavioral differences <3.12
|
root = normed[:offset]
|
||||||
# The workaround using `normpath` only superficially passes tests
|
parts = normed[offset:].split('\\')
|
||||||
# Ref: https://github.com/python/cpython/pull/100351
|
else:
|
||||||
return os.path.normpath(os.path.join(*sanitized_path))
|
# relative/drive root relative path
|
||||||
|
root = '\\' if normed[:1] == '\\' else ''
|
||||||
|
parts = normed.split('\\')
|
||||||
|
|
||||||
|
return root + '\\'.join(_sanitize_path_parts(parts))
|
||||||
|
|
||||||
|
|
||||||
def sanitize_url(url, *, scheme='http'):
|
def sanitize_url(url, *, scheme='http'):
|
||||||
|
|
Loading…
Reference in a new issue