[utils] Fix urljoin for paths with non-http(s) schemes

This commit is contained in:
Sergey M․ 2019-01-20 20:21:24 +07:00
parent 6945b9e78f
commit fad4ceb534
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D
2 changed files with 3 additions and 1 deletions

View file

@ -507,6 +507,8 @@ def test_urljoin(self):
self.assertEqual(urljoin('http://foo.de/', ''), None) self.assertEqual(urljoin('http://foo.de/', ''), None)
self.assertEqual(urljoin('http://foo.de/', ['foobar']), None) self.assertEqual(urljoin('http://foo.de/', ['foobar']), None)
self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt') self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt')
self.assertEqual(urljoin('http://foo.de/a/b/c.txt', 'rtmp://foo.de'), 'rtmp://foo.de')
self.assertEqual(urljoin(None, 'rtmp://foo.de'), 'rtmp://foo.de')
def test_url_or_none(self): def test_url_or_none(self):
self.assertEqual(url_or_none(None), None) self.assertEqual(url_or_none(None), None)

View file

@ -1868,7 +1868,7 @@ def urljoin(base, path):
path = path.decode('utf-8') path = path.decode('utf-8')
if not isinstance(path, compat_str) or not path: if not isinstance(path, compat_str) or not path:
return None return None
if re.match(r'^(?:https?:)?//', path): if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
return path return path
if isinstance(base, bytes): if isinstance(base, bytes):
base = base.decode('utf-8') base = base.decode('utf-8')