From e34c33614d8e4f0208d96d71e9c0ac6571587555 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 13 Dec 2016 02:23:49 +0700 Subject: [PATCH] [utils] Add convenience urljoin --- test/test_utils.py | 14 ++++++++++++++ youtube_dl/utils.py | 10 ++++++++++ 2 files changed, 24 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index 2e3cd0179d..3f45b0bd1f 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -70,6 +70,7 @@ lowercase_escape, url_basename, base_url, + urljoin, urlencode_postdata, urshift, update_url_query, @@ -445,6 +446,19 @@ def test_base_url(self): self.assertEqual(base_url('http://foo.de/bar/baz'), 'http://foo.de/bar/') self.assertEqual(base_url('http://foo.de/bar/baz?x=z/x/c'), 'http://foo.de/bar/') + def test_urljoin(self): + self.assertEqual(urljoin('http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin('http://foo.de/', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin('http://foo.de', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin('http://foo.de', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin('http://foo.de/', 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin(None, 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin('', 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin(['foobar'], 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin('http://foo.de/', None), None) + self.assertEqual(urljoin('http://foo.de/', ''), None) + self.assertEqual(urljoin('http://foo.de/', ['foobar']), None) + def test_parse_age_limit(self): self.assertEqual(parse_age_limit(None), None) self.assertEqual(parse_age_limit(False), None) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 3d4951ad94..694e9a6003 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1700,6 +1700,16 @@ def base_url(url): return re.match(r'https?://[^?#&]+/', url).group() +def urljoin(base, path): + if not isinstance(path, compat_str) or not path: + return None + if re.match(r'https?://', path): + return path + if not isinstance(base, compat_str) or not re.match(r'https?://', base): + return None + return compat_urlparse.urljoin(base, path) + + class HEADRequest(compat_urllib_request.Request): def get_method(self): return 'HEAD'