From 99ee836f1c7fcc0bfa38e75bdbd035147fbd4905 Mon Sep 17 00:00:00 2001 From: icanhasmath Date: Tue, 21 Apr 2026 11:16:30 -0500 Subject: [PATCH] Add parseaddr Unicode regression test (CVE-2023-27043 follow-up) The 2.7-branch Unicode refactor for CVE-2023-27043 (c98f6b9d0f8) does not port to Python 3: its code change uses the Python-2-only `unicode` name and UTF-8-encodes to bytes, both of which are no-ops / errors on 3.x where `str` is already Unicode. Port only the test intent. Verified against the current 3.7.17.x parseaddr: strict mode accepts ASCII and non-ASCII input and agrees with non-strict mode, so no source change to Lib/email/utils.py is required. --- Lib/test/test_email/test_email.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index 56a80631c97f18..0e3f6d1dfec4eb 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -3319,6 +3319,23 @@ def test_parsing_errors(self): # Test email.utils.supports_strict_parsing attribute self.assertEqual(email.utils.supports_strict_parsing, True) + def test_parseaddr_unicode(self): + # CVE-2023-27043 Unicode regression guard. The 2.7 refactor had to + # accept both str and unicode at the parseaddr() boundary; in Python 3 + # str is already Unicode, so this test just pins that strict-mode + # parsing still accepts valid non-ASCII input and agrees with the + # non-strict path. + for addr in ( + 'user@example.com', + 'Test User ', + '"Test User" ', + '"Sürname, Firstname" ', + ): + with self.subTest(addr=addr): + strict = utils.parseaddr(addr, strict=True) + self.assertNotEqual(strict, ('', '')) + self.assertEqual(strict, utils.parseaddr(addr, strict=False)) + def test_getaddresses_nasty(self): for addresses, expected in ( (['"Sürname, Firstname" '],