[utils] sanitize_path: Reimplement function (#11198)

Authored by: Grub4K
This commit is contained in:
Simon Sawicki 2024-10-13 04:10:12 +02:00 committed by GitHub
parent 16eb28026a
commit 85b87c991a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 50 additions and 24 deletions

View file

@ -221,9 +221,10 @@ def test_sanitize_ids(self):
self.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI') self.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI')
def test_sanitize_path(self): def test_sanitize_path(self):
if sys.platform != 'win32': with unittest.mock.patch('sys.platform', 'win32'):
return self._test_sanitize_path()
def _test_sanitize_path(self):
self.assertEqual(sanitize_path('abc'), 'abc') self.assertEqual(sanitize_path('abc'), 'abc')
self.assertEqual(sanitize_path('abc/def'), 'abc\\def') self.assertEqual(sanitize_path('abc/def'), 'abc\\def')
self.assertEqual(sanitize_path('abc\\def'), 'abc\\def') self.assertEqual(sanitize_path('abc\\def'), 'abc\\def')
@ -256,6 +257,11 @@ def test_sanitize_path(self):
self.assertEqual(sanitize_path('./abc'), 'abc') self.assertEqual(sanitize_path('./abc'), 'abc')
self.assertEqual(sanitize_path('./../abc'), '..\\abc') self.assertEqual(sanitize_path('./../abc'), '..\\abc')
self.assertEqual(sanitize_path('\\abc'), '\\abc')
self.assertEqual(sanitize_path('C:abc'), 'C:abc')
self.assertEqual(sanitize_path('C:abc\\..\\'), 'C:..')
self.assertEqual(sanitize_path('C:\\abc:%(title)s.%(ext)s'), 'C:\\abc#%(title)s.%(ext)s')
def test_sanitize_url(self): def test_sanitize_url(self):
self.assertEqual(sanitize_url('//foo.bar'), 'http://foo.bar') self.assertEqual(sanitize_url('//foo.bar'), 'http://foo.bar')
self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar') self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar')

View file

@ -664,31 +664,51 @@ def replace_insane(char):
return result return result
def _sanitize_path_parts(parts):
sanitized_parts = []
for part in parts:
if not part or part == '.':
continue
elif part == '..':
if sanitized_parts and sanitized_parts[-1] != '..':
sanitized_parts.pop()
sanitized_parts.append('..')
continue
# Replace invalid segments with `#`
# - trailing dots and spaces (`asdf...` => `asdf..#`)
# - invalid chars (`<>` => `##`)
sanitized_part = re.sub(r'[/<>:"\|\\?\*]|[\s.]$', '#', part)
sanitized_parts.append(sanitized_part)
return sanitized_parts
def sanitize_path(s, force=False): def sanitize_path(s, force=False):
"""Sanitizes and normalizes path on Windows""" """Sanitizes and normalizes path on Windows"""
# XXX: this handles drive relative paths (c:sth) incorrectly if sys.platform != 'win32':
if sys.platform == 'win32': if not force:
force = False
drive_or_unc, _ = os.path.splitdrive(s)
elif force:
drive_or_unc = ''
else:
return s return s
root = '/' if s.startswith('/') else ''
return root + '/'.join(_sanitize_path_parts(s.split('/')))
norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep) normed = s.replace('/', '\\')
if drive_or_unc:
norm_path.pop(0) if normed.startswith('\\\\'):
sanitized_path = [ # UNC path (`\\SERVER\SHARE`) or device path (`\\.`, `\\?`)
path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part) parts = normed.split('\\')
for path_part in norm_path] root = '\\'.join(parts[:4]) + '\\'
if drive_or_unc: parts = parts[4:]
sanitized_path.insert(0, drive_or_unc + os.path.sep) elif normed[1:2] == ':':
elif force and s and s[0] == os.path.sep: # absolute path or drive relative path
sanitized_path.insert(0, os.path.sep) offset = 3 if normed[2:3] == '\\' else 2
# TODO: Fix behavioral differences <3.12 root = normed[:offset]
# The workaround using `normpath` only superficially passes tests parts = normed[offset:].split('\\')
# Ref: https://github.com/python/cpython/pull/100351 else:
return os.path.normpath(os.path.join(*sanitized_path)) # relative/drive root relative path
root = '\\' if normed[:1] == '\\' else ''
parts = normed.split('\\')
return root + '\\'.join(_sanitize_path_parts(parts))
def sanitize_url(url, *, scheme='http'): def sanitize_url(url, *, scheme='http'):