From 7aefc49c4013efb5056b2c1237e22c52cb5d3c49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Nov 2015 20:20:16 +0600 Subject: [PATCH] [utils] Skip invalid/non HTML entities (Closes #7518) --- test/test_utils.py | 4 ++-- youtube_dl/utils.py | 6 +++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 01829f71e..ea1ff0547 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -210,8 +210,8 @@ def test_unescape_html(self): self.assertEqual(unescapeHTML('%20;'), '%20;') self.assertEqual(unescapeHTML('/'), '/') self.assertEqual(unescapeHTML('/'), '/') - self.assertEqual( - unescapeHTML('é'), 'é') + self.assertEqual(unescapeHTML('é'), 'é') + self.assertEqual(unescapeHTML('�'), '�') def test_daterange(self): _20century = DateRange("19000101", "20000101") diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d39f313a4..b7013a6aa 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -396,7 +396,11 @@ def _htmlentity_transform(entity): numstr = '0%s' % numstr else: base = 10 - return compat_chr(int(numstr, base)) + # See https://github.com/rg3/youtube-dl/issues/7518 + try: + return compat_chr(int(numstr, base)) + except ValueError: + pass # Unknown entity in name, return its literal representation return ('&%s;' % entity)