From ed777b3255182cf61e332ff8e06a91be27910c2e Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 10 Jun 2019 11:58:21 +0200 Subject: [PATCH 1/3] bpo-36742: Fix urlparse.urlsplit() error message for Unicode URL If urlparse.urlsplit() detects an invalid netloc according to NFKC normalization, the error message type is now str rather than unicode, and use repr() to format the URL, to prevent when display the error message. --- Lib/test/test_urlparse.py | 6 +++++- Lib/urlparse.py | 5 +++-- .../next/Library/2019-06-10-12-02-45.bpo-36742.UEdHXJ.rst | 3 +++ 3 files changed, 11 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2019-06-10-12-02-45.bpo-36742.UEdHXJ.rst diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 857ed96d92fe2d..272362a82465e8 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -644,8 +644,12 @@ def test_urlsplit_normalization(self): # bpo-36742: Verify port separators are ignored when they # existed prior to decomposition urlparse.urlsplit(u'http://\u30d5\u309a:80') - with self.assertRaises(ValueError): + with self.assertRaises(ValueError) as cm: urlparse.urlsplit(u'http://\u30d5\u309a\ufe1380') + self.assertEqual(str(cm.exception), + "netloc u'\u30d7:80' contains invalid characters " + "under NFKC normalization") + self.assertIsInstance(cm.exception.message, str) for scheme in [u"http", u"https", u"ftp"]: for netloc in [u"netloc{}false.netloc", u"n{}user@netloc"]: diff --git a/Lib/urlparse.py b/Lib/urlparse.py index 6834f3c1798b09..46cb29ef672903 100644 --- a/Lib/urlparse.py +++ b/Lib/urlparse.py @@ -180,8 +180,9 @@ def _checknetloc(netloc): return for c in '/?#@:': if c in netloc2: - raise ValueError(u"netloc '" + netloc + u"' contains invalid " + - u"characters under NFKC normalization") + raise ValueError("netloc %r contains invalid characters " + "under NFKC normalization" + % netloc2) def urlsplit(url, scheme='', allow_fragments=True): """Parse a URL into 5 components: diff --git a/Misc/NEWS.d/next/Library/2019-06-10-12-02-45.bpo-36742.UEdHXJ.rst b/Misc/NEWS.d/next/Library/2019-06-10-12-02-45.bpo-36742.UEdHXJ.rst new file mode 100644 index 00000000000000..3ba774056f15f7 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-06-10-12-02-45.bpo-36742.UEdHXJ.rst @@ -0,0 +1,3 @@ +:func:`urlparse.urlsplit` error message for invalid ``netloc`` according to +NFKC normalization is now a :class:`str` string, rather than a +:class:`unicode` string, to prevent error when displaying the error. From db8cbd27e9a66ec57099156c2c620e145d058d7e Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 10 Jun 2019 13:17:45 +0200 Subject: [PATCH 2/3] Fix unit test: use exc.args[0] rather than exc.message --- Lib/test/test_urlparse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 272362a82465e8..f6a6b44d9e1bc1 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -649,7 +649,7 @@ def test_urlsplit_normalization(self): self.assertEqual(str(cm.exception), "netloc u'\u30d7:80' contains invalid characters " "under NFKC normalization") - self.assertIsInstance(cm.exception.message, str) + self.assertIsInstance(cm.exception.args[0], str) for scheme in [u"http", u"https", u"ftp"]: for netloc in [u"netloc{}false.netloc", u"n{}user@netloc"]: From cb86a7107495e86ab68989cbf175425bac9716d9 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 10 Jun 2019 17:08:04 +0200 Subject: [PATCH 3/3] Use netloc (not netloc2) in error message --- Lib/test/test_urlparse.py | 15 ++++++++++----- Lib/urlparse.py | 2 +- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index f6a6b44d9e1bc1..86c4a0595c4f6b 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -644,12 +644,8 @@ def test_urlsplit_normalization(self): # bpo-36742: Verify port separators are ignored when they # existed prior to decomposition urlparse.urlsplit(u'http://\u30d5\u309a:80') - with self.assertRaises(ValueError) as cm: + with self.assertRaises(ValueError): urlparse.urlsplit(u'http://\u30d5\u309a\ufe1380') - self.assertEqual(str(cm.exception), - "netloc u'\u30d7:80' contains invalid characters " - "under NFKC normalization") - self.assertIsInstance(cm.exception.args[0], str) for scheme in [u"http", u"https", u"ftp"]: for netloc in [u"netloc{}false.netloc", u"n{}user@netloc"]: @@ -660,6 +656,15 @@ def test_urlsplit_normalization(self): with self.assertRaises(ValueError): urlparse.urlsplit(url) + # check error message: invalid netloc must be formated with repr() + # to get an ASCII error message + with self.assertRaises(ValueError) as cm: + urlparse.urlsplit(u'http://example.com\uFF03@bing.com') + self.assertEqual(str(cm.exception), + "netloc u'example.com\\uff03@bing.com' contains invalid characters " + "under NFKC normalization") + self.assertIsInstance(cm.exception.args[0], str) + def test_main(): test_support.run_unittest(UrlParseTestCase) diff --git a/Lib/urlparse.py b/Lib/urlparse.py index 46cb29ef672903..798b467b605f73 100644 --- a/Lib/urlparse.py +++ b/Lib/urlparse.py @@ -182,7 +182,7 @@ def _checknetloc(netloc): if c in netloc2: raise ValueError("netloc %r contains invalid characters " "under NFKC normalization" - % netloc2) + % netloc) def urlsplit(url, scheme='', allow_fragments=True): """Parse a URL into 5 components: