diff --git a/Lib/re/_compiler.py b/Lib/re/_compiler.py index c2ca8e25abe34d..0c76cef5f600be 100644 --- a/Lib/re/_compiler.py +++ b/Lib/re/_compiler.py @@ -266,7 +266,9 @@ def _optimize_charset(charset, iscased=None, fixup=None, fixes=None): else: charmap[av] = 1 elif op is RANGE: - r = range(av[0], av[1]+1) + start, end = av + end += 1 + r = range(start, end) if fixup: # IGNORECASE and not LOCALE if fixes: for i in map(fixup, r): @@ -280,8 +282,10 @@ def _optimize_charset(charset, iscased=None, fixup=None, fixes=None): if not hascased: hascased = any(map(iscased, r)) else: - for i in r: - charmap[i] = 1 + if end > len(charmap): + # Trigger the IndexError growth path below. + raise IndexError + charmap[start:end] = b'\x01' * (end - start) elif op is NEGATE: out.append((op, av)) elif op is CATEGORY and tail and (CATEGORY, CH_NEGATE[av]) in tail: diff --git a/Misc/NEWS.d/next/Library/2026-05-05-18-39-52.gh-issue-149427.0ggedQ.rst b/Misc/NEWS.d/next/Library/2026-05-05-18-39-52.gh-issue-149427.0ggedQ.rst new file mode 100644 index 00000000000000..38d6c21f1ef988 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-05-05-18-39-52.gh-issue-149427.0ggedQ.rst @@ -0,0 +1,3 @@ +Speed up :func:`re.compile` of patterns with character ranges by replacing +the per-byte loop in :mod:`!re._compiler` with a single bytearray slice +fill.