From 2c0410626fdfee9b599bbf68e54213514f7f3d89 Mon Sep 17 00:00:00 2001
From: Owen Carey <37121709+owenthcarey@users.noreply.github.com>
Date: Sun, 31 May 2026 15:11:29 -0700
Subject: [PATCH 1/2] feat: replace regex shim with a faithful CPython re/_sre
 engine

---
 crates/weavepy-compiler/src/lib.rs            |   21 +
 crates/weavepy-parser/src/parser.rs           |   14 +
 crates/weavepy-vm/src/builtin_types.rs        |   16 +
 crates/weavepy-vm/src/builtins.rs             |  161 +-
 crates/weavepy-vm/src/lib.rs                  |  560 ++++--
 crates/weavepy-vm/src/object.rs               |  101 +-
 crates/weavepy-vm/src/stdlib/mod.rs           |   56 +-
 crates/weavepy-vm/src/stdlib/python/enum.py   |   29 +-
 .../src/stdlib/python/re_casefix.py           |  106 ++
 .../src/stdlib/python/re_compiler.py          |  775 ++++++++
 .../src/stdlib/python/re_constants.py         |  222 +++
 .../weavepy-vm/src/stdlib/python/re_engine.py |  426 +++++
 .../weavepy-vm/src/stdlib/python/re_init.py   |  350 ++++
 .../weavepy-vm/src/stdlib/python/re_parser.py | 1081 ++++++++++++
 .../src/stdlib/python/sre_compile.py          |   11 +
 .../src/stdlib/python/sre_constants.py        |   11 +
 .../weavepy-vm/src/stdlib/python/sre_parse.py |   11 +
 crates/weavepy-vm/src/stdlib/re.rs            | 1089 ------------
 crates/weavepy-vm/src/stdlib/sre_mod.rs       | 1557 +++++++++++++++++
 crates/weavepy-vm/src/stdlib/thread_real.rs   |    2 +
 crates/weavepy-vm/src/stdlib/weakref_real.rs  |    6 +-
 crates/weavepy-vm/src/types.rs                |   20 +
 docs/rfcs/0035-faithful-re-sre-unicode.md     |  435 +++++
 tests/regrtest/test_re.py                     |  116 ++
 24 files changed, 5948 insertions(+), 1228 deletions(-)
 create mode 100644 crates/weavepy-vm/src/stdlib/python/re_casefix.py
 create mode 100644 crates/weavepy-vm/src/stdlib/python/re_compiler.py
 create mode 100644 crates/weavepy-vm/src/stdlib/python/re_constants.py
 create mode 100644 crates/weavepy-vm/src/stdlib/python/re_engine.py
 create mode 100644 crates/weavepy-vm/src/stdlib/python/re_init.py
 create mode 100644 crates/weavepy-vm/src/stdlib/python/re_parser.py
 create mode 100644 crates/weavepy-vm/src/stdlib/python/sre_compile.py
 create mode 100644 crates/weavepy-vm/src/stdlib/python/sre_constants.py
 create mode 100644 crates/weavepy-vm/src/stdlib/python/sre_parse.py
 delete mode 100644 crates/weavepy-vm/src/stdlib/re.rs
 create mode 100644 crates/weavepy-vm/src/stdlib/sre_mod.rs
 create mode 100644 docs/rfcs/0035-faithful-re-sre-unicode.md
 create mode 100644 tests/regrtest/test_re.py

diff --git a/crates/weavepy-compiler/src/lib.rs b/crates/weavepy-compiler/src/lib.rs
index ee7d423..6922b80 100644
--- a/crates/weavepy-compiler/src/lib.rs
+++ b/crates/weavepy-compiler/src/lib.rs
@@ -4337,6 +4337,27 @@ fn collect_decls(
                 collect_decls(s, globals, nonlocals, assigned);
             }
         }
+        // `import a.b.c` binds the top-level package `a` (or the
+        // asname); `from m import x as y` binds `y`. These are real
+        // local bindings and must be tracked so a name captured by a
+        // nested scope is promoted to a cellvar (CPython parity).
+        StmtKind::Import(aliases) => {
+            for a in aliases {
+                let bind = a
+                    .asname
+                    .clone()
+                    .unwrap_or_else(|| a.name.split('.').next().unwrap_or(&a.name).to_owned());
+                assigned.insert(bind);
+            }
+        }
+        StmtKind::ImportFrom { names, .. } => {
+            for a in names {
+                let bind = a.asname.clone().unwrap_or_else(|| a.name.clone());
+                if bind != "*" {
+                    assigned.insert(bind);
+                }
+            }
+        }
         _ => {}
     }
 }
diff --git a/crates/weavepy-parser/src/parser.rs b/crates/weavepy-parser/src/parser.rs
index ab0bec6..2805699 100644
--- a/crates/weavepy-parser/src/parser.rs
+++ b/crates/weavepy-parser/src/parser.rs
@@ -3359,6 +3359,20 @@ fn decode_str_body(s: &str, raw: bool) -> Result<String, String> {
                 let n = u32::from_str_radix(&hex, 16).map_err(|e| e.to_string())?;
                 out.push(char::from_u32(n).unwrap_or('\u{FFFD}'));
             }
+            'U' => {
+                // 8-hex code-point escape, e.g. `\U0001F600`. Required
+                // for non-BMP literals; CPython rejects out-of-range or
+                // surrogate values, so we surface a clear error.
+                let mut hex = String::new();
+                for _ in 0..8 {
+                    hex.push(chars.next().ok_or("incomplete \\U escape")?);
+                }
+                let n = u32::from_str_radix(&hex, 16).map_err(|e| e.to_string())?;
+                let ch = char::from_u32(n).ok_or_else(|| {
+                    format!("invalid \\U escape: {n:#x} is not a valid character")
+                })?;
+                out.push(ch);
+            }
             other => {
                 // CPython issues a DeprecationWarning for unknown
                 // escapes but emits both characters literally.
diff --git a/crates/weavepy-vm/src/builtin_types.rs b/crates/weavepy-vm/src/builtin_types.rs
index b174c47..b12dc5f 100644
--- a/crates/weavepy-vm/src/builtin_types.rs
+++ b/crates/weavepy-vm/src/builtin_types.rs
@@ -626,6 +626,22 @@ fn install_object_dunders(object_: &Rc<TypeObject>) {
                 ))
             }
         };
+        // When `cls` derives from a primitive immutable built-in (so far
+        // `int` — covering `_NamedIntConstant`, `enum.IntEnum`/`IntFlag`
+        // and hand-written `class C(int)`), capture the value the
+        // instance wraps. `super().__new__(cls, value)` passes it as the
+        // second positional argument; absent that it defaults to 0.
+        if cls.is_subclass_of(&builtin_types().int_) {
+            let native = match args.get(1) {
+                None => Object::Int(0),
+                Some(o @ (Object::Int(_) | Object::Long(_))) => o.clone(),
+                Some(Object::Bool(b)) => Object::Int(i64::from(*b)),
+                Some(o) => Object::Int(o.as_i64().unwrap_or(0)),
+            };
+            return Ok(Object::Instance(Rc::new(PyInstance::with_native(
+                cls, native,
+            ))));
+        }
         Ok(Object::Instance(Rc::new(PyInstance::new(cls))))
     }
     fn object_init(_args: &[Object]) -> Result<Object, RuntimeError> {
diff --git a/crates/weavepy-vm/src/builtins.rs b/crates/weavepy-vm/src/builtins.rs
index 3a01ebe..3752435 100644
--- a/crates/weavepy-vm/src/builtins.rs
+++ b/crates/weavepy-vm/src/builtins.rs
@@ -439,6 +439,8 @@ pub fn lookup_method(obj: &Object, name: &str) -> Option<Object> {
             "splitlines" => Some(method("splitlines", bytes_splitlines)),
             "join" => Some(method("join", bytes_join)),
             "replace" => Some(method("replace", bytes_replace)),
+            "translate" => Some(method("translate", bytes_translate)),
+            "maketrans" => Some(method("maketrans", bytes_maketrans)),
             "isalnum" => Some(method("isalnum", bytes_isalnum)),
             "isalpha" => Some(method("isalpha", bytes_isalpha)),
             "isdigit" => Some(method("isdigit", bytes_isdigit)),
@@ -618,6 +620,35 @@ fn b_str(args: &[Object]) -> Result<Object, RuntimeError> {
     if args.is_empty() {
         return Ok(Object::from_static(""));
     }
+    // `str(object, encoding[, errors])` decodes a bytes-like object,
+    // equivalent to `object.decode(encoding, errors)`. CPython's
+    // `re._parser.Tokenizer` relies on `str(pattern, 'latin1')` to
+    // tokenize bytes patterns, so this path must decode rather than
+    // fall back to `repr`-style stringification.
+    if args.len() >= 2 {
+        match &args[0] {
+            Object::Bytes(_) | Object::ByteArray(_) => {}
+            other => {
+                return Err(type_error(format!(
+                    "decoding to str: need a bytes-like object, {} found",
+                    other.type_name()
+                )));
+            }
+        }
+        let data = bytes_data(args)?;
+        let encoding = match &args[1] {
+            Object::Str(e) => e.to_string(),
+            Object::None => "utf-8".to_owned(),
+            _ => return Err(type_error("str() argument 'encoding' must be str")),
+        };
+        let errors = match args.get(2) {
+            Some(Object::Str(e)) => e.to_string(),
+            Some(Object::None) | None => "strict".to_owned(),
+            _ => return Err(type_error("str() argument 'errors' must be str")),
+        };
+        let s = crate::stdlib::codecs_mod::decode_bytes(&data, &encoding, &errors)?;
+        return Ok(Object::from_str(s));
+    }
     Ok(Object::from_str(args[0].to_str()))
 }
 
@@ -2628,6 +2659,7 @@ pub fn make_super(class: Rc<crate::types::TypeObject>, receiver: Object) -> Obje
             d.insert(DictKey(Object::from_static("__self__")), receiver);
             d
         })),
+        native: None,
     };
     Object::Instance(Rc::new(inst))
 }
@@ -3804,7 +3836,9 @@ fn str_isidentifier(args: &[Object]) -> Result<Object, RuntimeError> {
 
 fn str_isprintable(args: &[Object]) -> Result<Object, RuntimeError> {
     let s = str_self(args)?;
-    Ok(Object::Bool(s.chars().all(|c| !c.is_control())))
+    Ok(Object::Bool(
+        s.chars().all(crate::object::char_is_printable),
+    ))
 }
 
 fn str_zfill(args: &[Object]) -> Result<Object, RuntimeError> {
@@ -4827,17 +4861,56 @@ fn bytes_match_prefix_suffix(
     }
 }
 
+/// Resolve the optional `start`/`end` arguments of `bytes.find` and
+/// friends (positions 2 and 3) into a clamped `[start, end]` byte
+/// window, applying CPython's slice-style negative-index handling.
+fn bytes_search_range(args: &[Object], len: usize) -> (usize, usize) {
+    let n = len as i64;
+    let resolve = |o: Option<&Object>, default: i64| -> i64 {
+        match o {
+            None | Some(Object::None) => default,
+            Some(obj) => match obj.as_i64() {
+                Some(mut x) => {
+                    if x < 0 {
+                        x += n;
+                    }
+                    x.clamp(0, n)
+                }
+                None => default,
+            },
+        }
+    };
+    let start = resolve(args.get(2), 0).clamp(0, n) as usize;
+    let end = resolve(args.get(3), n).clamp(0, n) as usize;
+    (start, end.max(start))
+}
+
+/// Find `sub` within `data[start..end]`, returning the *absolute*
+/// position (or -1). Mirrors `bytes.find`'s empty-needle behaviour.
+fn bytes_find_in(data: &[u8], sub: &[u8], start: usize, end: usize) -> i64 {
+    if start > end || end > data.len() {
+        return -1;
+    }
+    let hay = &data[start..end];
+    if sub.is_empty() {
+        return start as i64;
+    }
+    if sub.len() > hay.len() {
+        return -1;
+    }
+    hay.windows(sub.len())
+        .position(|w| w == sub)
+        .map_or(-1, |i| (start + i) as i64)
+}
+
 fn bytes_find(args: &[Object]) -> Result<Object, RuntimeError> {
     let data = bytes_data(args)?;
     let sub = bytes_argview(
         args.get(1)
             .ok_or_else(|| type_error("find() expected 1 arg"))?,
     )?;
-    Ok(Object::Int(
-        data.windows(sub.len())
-            .position(|w| w == sub)
-            .map_or(-1, |i| i as i64),
-    ))
+    let (start, end) = bytes_search_range(args, data.len());
+    Ok(Object::Int(bytes_find_in(&data, &sub, start, end)))
 }
 
 fn bytes_rfind(args: &[Object]) -> Result<Object, RuntimeError> {
@@ -4846,9 +4919,16 @@ fn bytes_rfind(args: &[Object]) -> Result<Object, RuntimeError> {
         args.get(1)
             .ok_or_else(|| type_error("rfind() expected 1 arg"))?,
     )?;
+    let (start, end) = bytes_search_range(args, data.len());
+    if start > end || end > data.len() {
+        return Ok(Object::Int(-1));
+    }
+    if sub.is_empty() {
+        return Ok(Object::Int(end as i64));
+    }
     let mut last = -1i64;
-    if sub.len() <= data.len() {
-        for i in 0..=data.len() - sub.len() {
+    if sub.len() <= end - start {
+        for i in start..=end - sub.len() {
             if data[i..i + sub.len()] == sub[..] {
                 last = i as i64;
             }
@@ -4870,12 +4950,13 @@ fn bytes_count(args: &[Object]) -> Result<Object, RuntimeError> {
         args.get(1)
             .ok_or_else(|| type_error("count() expected 1 arg"))?,
     )?;
+    let (start, end) = bytes_search_range(args, data.len());
     if sub.is_empty() {
-        return Ok(Object::Int(data.len() as i64 + 1));
+        return Ok(Object::Int((end - start) as i64 + 1));
     }
     let mut n = 0i64;
-    let mut i = 0;
-    while i + sub.len() <= data.len() {
+    let mut i = start;
+    while i + sub.len() <= end {
         if data[i..i + sub.len()] == sub[..] {
             n += 1;
             i += sub.len();
@@ -5072,6 +5153,64 @@ fn bytes_replace(args: &[Object]) -> Result<Object, RuntimeError> {
     Ok(Object::new_bytes(out))
 }
 
+/// `bytes.translate(table, /, delete=b'')` and the `bytearray`
+/// equivalent. `table` is `None` (identity) or a bytes-like of length
+/// 256; bytes present in `delete` are dropped first. The receiver's
+/// type (bytes vs bytearray) is preserved.
+fn bytes_translate(args: &[Object]) -> Result<Object, RuntimeError> {
+    let data = bytes_data(args)?;
+    let table = match args.get(1) {
+        None | Some(Object::None) => None,
+        Some(o) => {
+            let t = bytes_argview(o)?;
+            if t.len() != 256 {
+                return Err(value_error("translation table must be 256 characters long"));
+            }
+            Some(t)
+        }
+    };
+    let delete = match args.get(2) {
+        None | Some(Object::None) => Vec::new(),
+        Some(o) => bytes_argview(o)?,
+    };
+    let mut out = Vec::with_capacity(data.len());
+    for &b in &data {
+        if delete.contains(&b) {
+            continue;
+        }
+        out.push(match &table {
+            Some(t) => t[b as usize],
+            None => b,
+        });
+    }
+    if matches!(args.first(), Some(Object::ByteArray(_))) {
+        Ok(Object::new_bytearray(out))
+    } else {
+        Ok(Object::new_bytes(out))
+    }
+}
+
+/// `bytes.maketrans(from, to)` — builds a 256-byte translation table
+/// mapping each byte in `from` to the byte at the same index in `to`.
+fn bytes_maketrans(args: &[Object]) -> Result<Object, RuntimeError> {
+    let from = bytes_argview(
+        args.first()
+            .ok_or_else(|| type_error("maketrans() takes exactly two arguments"))?,
+    )?;
+    let to = bytes_argview(
+        args.get(1)
+            .ok_or_else(|| type_error("maketrans() takes exactly two arguments"))?,
+    )?;
+    if from.len() != to.len() {
+        return Err(value_error("maketrans arguments must have same length"));
+    }
+    let mut table: Vec<u8> = (0u8..=255).collect();
+    for (f, t) in from.iter().zip(to.iter()) {
+        table[*f as usize] = *t;
+    }
+    Ok(Object::new_bytes(table))
+}
+
 fn bytes_isalnum(args: &[Object]) -> Result<Object, RuntimeError> {
     let data = bytes_data(args)?;
     Ok(Object::Bool(
diff --git a/crates/weavepy-vm/src/lib.rs b/crates/weavepy-vm/src/lib.rs
index 5a8285f..f23efa4 100644
--- a/crates/weavepy-vm/src/lib.rs
+++ b/crates/weavepy-vm/src/lib.rs
@@ -1427,14 +1427,15 @@ impl Interpreter {
                 let i = frame.pop()?;
                 let target = frame.pop()?;
                 let value = frame.pop()?;
+                let g = frame.globals.clone();
                 if let Object::Instance(_) = &target {
                     if let Some(method) = instance_method(&target, "__setitem__") {
-                        self.call(&method, &[i.clone(), value], &[], &frame.globals.clone())?;
+                        self.call(&method, &[i.clone(), value], &[], &g)?;
                     } else {
-                        self.store_subscr(&target, &i, value)?;
+                        self.store_subscr(&target, &i, value, &g)?;
                     }
                 } else {
-                    self.store_subscr(&target, &i, value)?;
+                    self.store_subscr(&target, &i, value, &g)?;
                 }
             }
             OpCode::DeleteSubscr => {
@@ -1467,7 +1468,13 @@ impl Interpreter {
             OpCode::UnaryOp => {
                 let v = frame.pop()?;
                 let kind: UnaryKind = unsafe { std::mem::transmute(ins.arg as u8) };
-                let r = unary_op(&v, kind)?;
+                let r = if matches!(kind, UnaryKind::Not) && matches!(v, Object::Instance(_)) {
+                    // `not obj` must honour __bool__/__len__.
+                    let g = frame.globals.clone();
+                    Object::Bool(!self.obj_truthy(&v, &g)?)
+                } else {
+                    unary_op(&v, kind)?
+                };
                 frame.push(r);
             }
             OpCode::CompareOp => {
@@ -1582,13 +1589,27 @@ impl Interpreter {
             }
             OpCode::PopJumpIfFalse => {
                 let v = frame.pop()?;
-                if !v.is_truthy() {
+                let truthy = match &v {
+                    Object::Instance(_) => {
+                        let g = frame.globals.clone();
+                        self.obj_truthy(&v, &g)?
+                    }
+                    _ => v.is_truthy(),
+                };
+                if !truthy {
                     frame.pc += ins.arg;
                 }
             }
             OpCode::PopJumpIfTrue => {
                 let v = frame.pop()?;
-                if v.is_truthy() {
+                let truthy = match &v {
+                    Object::Instance(_) => {
+                        let g = frame.globals.clone();
+                        self.obj_truthy(&v, &g)?
+                    }
+                    _ => v.is_truthy(),
+                };
+                if truthy {
                     frame.pc += ins.arg;
                 }
             }
@@ -3628,6 +3649,51 @@ impl Interpreter {
         Ok(Object::Int(v.len()? as i64))
     }
 
+    /// VM-aware Python truthiness. For instances this dispatches
+    /// `__bool__` (then `__len__`) so user classes that define either
+    /// dunder are honoured in boolean contexts; everything else falls
+    /// back to the pure [`Object::is_truthy`]. Mirrors CPython's
+    /// `PyObject_IsTrue`.
+    fn obj_truthy(
+        &mut self,
+        v: &Object,
+        globals: &Rc<RefCell<DictData>>,
+    ) -> Result<bool, RuntimeError> {
+        if let Object::Instance(_) = v {
+            if let Some(method) = instance_method(v, "__bool__") {
+                let r = self.call(&method, &[], &[], globals)?;
+                return match r {
+                    Object::Bool(b) => Ok(b),
+                    other => match other.as_i64() {
+                        Some(i) => Ok(i != 0),
+                        None => Err(type_error(format!(
+                            "__bool__ should return bool, returned {}",
+                            other.type_name()
+                        ))),
+                    },
+                };
+            }
+            if let Some(method) = instance_method(v, "__len__") {
+                let r = self.call(&method, &[], &[], globals)?;
+                return Ok(r.is_truthy());
+            }
+        }
+        Ok(v.is_truthy())
+    }
+
+    /// `bool(x)` constructor — routes through [`Self::obj_truthy`] so a
+    /// custom `__bool__`/`__len__` is respected.
+    fn do_bool_call(
+        &mut self,
+        args: &[Object],
+        globals: &Rc<RefCell<DictData>>,
+    ) -> Result<Object, RuntimeError> {
+        match args.first() {
+            None => Ok(Object::Bool(false)),
+            Some(v) => Ok(Object::Bool(self.obj_truthy(v, globals)?)),
+        }
+    }
+
     /// `int(x)` with a fallback to the user-defined `__int__`. Matches
     /// CPython's coercion rules well enough for the common cases —
     /// user classes that store an integer payload (enums, ipaddress,
@@ -3660,6 +3726,11 @@ impl Interpreter {
                         ))),
                     };
                 }
+                // `int` subclass instance with no `__int__` override:
+                // `int(x)` yields a plain int of the wrapped value.
+                if let Some(native) = other.native_value() {
+                    return self.do_int_call(&[native], globals);
+                }
                 Err(type_error(format!(
                     "int() argument must be a string or a real number, not '{}'",
                     other.type_name()
@@ -3696,6 +3767,9 @@ impl Interpreter {
                         ))),
                     };
                 }
+                if let Some(native) = other.native_value() {
+                    return self.do_float_call(&[native], globals);
+                }
                 Err(type_error(format!(
                     "float() argument must be a string or a real number, not '{}'",
                     other.type_name()
@@ -4181,57 +4255,6 @@ impl Interpreter {
         Ok(Object::new_list(items))
     }
 
-    /// VM-routed dispatch for ``re.sub(pattern, repl_callable, text,
-    /// count=0, flags=0)`` where ``repl`` is a callable. We
-    /// collect the spans up-front (no VM reentrancy mid-iteration)
-    /// and then call ``repl(match)`` once per match.
-    fn do_re_sub_callable(
-        &mut self,
-        args: &[Object],
-        globals: &Rc<RefCell<DictData>>,
-    ) -> Result<Object, RuntimeError> {
-        use crate::stdlib::re as remod;
-        let pat_obj = args
-            .first()
-            .ok_or_else(|| type_error("re.sub: missing pattern"))?;
-        let repl = args
-            .get(1)
-            .ok_or_else(|| type_error("re.sub: missing repl"))?
-            .clone();
-        let text = match args.get(2) {
-            Some(Object::Str(s)) => s.to_string(),
-            _ => return Err(type_error("re.sub: expected str text")),
-        };
-        let count = match args.get(3) {
-            Some(Object::Int(i)) => *i,
-            _ => 0,
-        };
-        let (pat, default_flags) = remod::extract_pattern_pub(pat_obj)?;
-        let flags = match args.get(4) {
-            Some(Object::Int(i)) => *i,
-            _ => default_flags,
-        };
-        let matches = remod::collect_all_matches(&pat, flags, &text)?;
-        let mut out = String::new();
-        let mut last_end = 0usize;
-        for (idx, (s, e, groups)) in matches.iter().enumerate() {
-            if count > 0 && (idx as i64) >= count {
-                break;
-            }
-            out.push_str(&text[last_end..*s]);
-            let m_obj = remod::build_match_object(&pat, &text, groups, *s, *e);
-            let ret = self.call_object(repl.clone(), &[m_obj], &[])?;
-            match ret {
-                Object::Str(rs) => out.push_str(&rs),
-                _ => return Err(type_error("re.sub callable must return str")),
-            }
-            last_end = *e;
-        }
-        out.push_str(&text[last_end..]);
-        let _ = globals;
-        Ok(Object::from_str(out))
-    }
-
     fn do_list_sort_call(
         &mut self,
         args: &[Object],
@@ -4330,6 +4353,27 @@ impl Interpreter {
                 if let Some(method) = instance_method(v, "__iter__") {
                     return self.call(&method, &[], &[], globals);
                 }
+                // Legacy sequence protocol: an object that defines
+                // `__getitem__` but no `__iter__` is still iterable —
+                // CPython calls `obj[0]`, `obj[1]`, … until `IndexError`.
+                // We materialise eagerly into a list (consistent with the
+                // `iter(callable, sentinel)` path above); the wrapped
+                // sequences this serves — `re`'s `SubPattern`, simple
+                // user containers — are finite and side-effect-free.
+                if let Some(getitem) = instance_method(v, "__getitem__") {
+                    let mut out: Vec<Object> = Vec::new();
+                    let mut i: i64 = 0;
+                    loop {
+                        match self.call(&getitem, &[Object::Int(i)], &[], globals) {
+                            Ok(val) => out.push(val),
+                            Err(e) if is_index_error(&e) => break,
+                            Err(e) => return Err(e),
+                        }
+                        i += 1;
+                    }
+                    let list = Object::new_list(out);
+                    return self.make_iter(&list, globals);
+                }
                 Err(type_error(format!(
                     "'{}' object is not iterable",
                     v.type_name_owned()
@@ -4997,6 +5041,33 @@ impl Interpreter {
         if let Some(method) = instance_method(b, rdunder) {
             return self.call(&method, std::slice::from_ref(a), &[], globals);
         }
+        // `str % args`: route through a VM-aware formatter so `%s` / `%r`
+        // of user instances dispatch `__str__` / `__repr__` (e.g.
+        // `"err: %s" % some_exception`). Other `%` operand types fall
+        // through to the pure `binary_op` path.
+        if matches!(op, BinOpKind::Mod) {
+            if let Object::Str(template) = a {
+                let template = template.clone();
+                let mut resolve =
+                    |obj: &Object, kind: char| -> Result<Option<String>, RuntimeError> {
+                        if let Object::Instance(_) = obj {
+                            let s = match kind {
+                                's' => self.stringify(obj, globals)?,
+                                'r' => self.repr_of(obj, globals)?,
+                                _ => return Ok(None),
+                            };
+                            Ok(Some(s))
+                        } else {
+                            Ok(None)
+                        }
+                    };
+                return Ok(Object::from_str(percent_format_with(
+                    &template,
+                    b,
+                    &mut resolve,
+                )?));
+            }
+        }
         binary_op(a, b, op)
     }
 
@@ -6108,6 +6179,15 @@ impl Interpreter {
         container: &Object,
         index: &Object,
     ) -> Result<Object, RuntimeError> {
+        // An `int` subclass instance used as an index (`xs[op]` where
+        // `op` is e.g. a `_NamedIntConstant`) acts as its int value.
+        let unwrapped = match index {
+            Object::Instance(_) => index
+                .native_value()
+                .filter(|n| matches!(n, Object::Int(_) | Object::Long(_) | Object::Bool(_))),
+            _ => None,
+        };
+        let index = unwrapped.as_ref().unwrap_or(index);
         match (container, index) {
             (Object::List(items), Object::Int(i)) => {
                 let items = items.borrow();
@@ -6149,6 +6229,18 @@ impl Interpreter {
                 let s: String = sliced.iter().map(|o| o.to_str()).collect();
                 Ok(Object::from_str(s))
             }
+            (Object::Range(r), Object::Int(i)) => {
+                let len = container.len()? as i64;
+                let idx = if *i < 0 { *i + len } else { *i };
+                if idx < 0 || idx >= len {
+                    return Err(index_error("range object index out of range"));
+                }
+                Ok(Object::Int(r.start + idx * r.step))
+            }
+            (Object::Range(r), Object::Slice(slc)) => {
+                let len = container.len()? as i64;
+                range_slice(r, len, slc)
+            }
             (Object::Bytes(buf), Object::Int(i)) => {
                 let idx = normalize_index(*i, buf.len())?;
                 Ok(Object::Int(i64::from(buf[idx])))
@@ -6223,10 +6315,11 @@ impl Interpreter {
     }
 
     fn store_subscr(
-        &self,
+        &mut self,
         container: &Object,
         index: &Object,
         value: Object,
+        globals: &Rc<RefCell<DictData>>,
     ) -> Result<(), RuntimeError> {
         match (container, index) {
             (Object::List(items), Object::Int(i)) => {
@@ -6239,22 +6332,17 @@ impl Interpreter {
                 // CPython: `xs[start:stop:step] = iterable`. We
                 // collect the RHS, then splice in place. Supporting
                 // strided slice assignment requires that `len(rhs)`
-                // matches the slice width.
-                let replacement = match value {
+                // matches the slice width. The RHS is collected via the
+                // full VM iteration protocol so objects that are only
+                // legacy-iterable (`__getitem__`, no `__iter__`) work.
+                let replacement = match &value {
                     Object::List(l) => l.borrow().clone(),
                     Object::Tuple(t) => t.iter().cloned().collect::<Vec<_>>(),
-                    Object::Str(ref txt) => txt
+                    Object::Str(txt) => txt
                         .chars()
                         .map(|c| Object::from_str(c.to_string()))
                         .collect(),
-                    other => {
-                        let mut buf = Vec::new();
-                        let mut it = other.make_iter()?;
-                        while let Some(v) = it.next_value() {
-                            buf.push(v);
-                        }
-                        buf
-                    }
+                    _ => self.collect_iterable(&value, globals)?,
                 };
                 let mut data = items.borrow_mut();
                 apply_slice_assignment(&mut data, s, replacement)?;
@@ -6289,12 +6377,31 @@ impl Interpreter {
                 items.remove(idx);
                 Ok(())
             }
+            (Object::List(items), Object::Slice(s)) => {
+                apply_slice_deletion(&mut items.borrow_mut(), s)
+            }
             (Object::Dict(d), key) => {
                 if d.borrow_mut().shift_remove(&DictKey(key.clone())).is_none() {
                     return Err(key_error(key.repr()));
                 }
                 Ok(())
             }
+            (Object::ByteArray(b), Object::Int(i)) => {
+                let mut b = b.borrow_mut();
+                let idx = normalize_index(*i, b.len())?;
+                b.remove(idx);
+                Ok(())
+            }
+            (Object::ByteArray(b), Object::Slice(s)) => {
+                let mut b = b.borrow_mut();
+                let mut indices = slice_indices(b.len(), s)?;
+                indices.sort_unstable();
+                indices.dedup();
+                for idx in indices.into_iter().rev() {
+                    b.remove(idx);
+                }
+                Ok(())
+            }
             _ => Err(type_error(format!(
                 "'{}' object does not support item deletion",
                 container.type_name()
@@ -6344,6 +6451,9 @@ impl Interpreter {
                 if b.name == "len" && args.len() == 1 {
                     return self.do_len_call(&args[0], outer_globals);
                 }
+                if b.name == "bool" && args.len() <= 1 {
+                    return self.do_bool_call(args, outer_globals);
+                }
                 if b.name == "int" && args.len() <= 2 {
                     return self.do_int_call(args, outer_globals);
                 }
@@ -6488,20 +6598,6 @@ impl Interpreter {
                 if (b.name == "min" || b.name == "max") && !args.is_empty() {
                     return self.do_min_max_call(b.name, args, kwargs, outer_globals);
                 }
-                // ``re.sub(pat, repl, text, count=0, flags=0)``
-                // accepts a callable ``repl``; routing it through the
-                // VM lets the callback invoke arbitrary user code.
-                if b.name == "sub" && args.len() >= 3 {
-                    let callable_repl = matches!(
-                        args.get(1),
-                        Some(Object::Function(_))
-                            | Some(Object::Builtin(_))
-                            | Some(Object::BoundMethod(_))
-                    );
-                    if callable_repl {
-                        return self.do_re_sub_callable(args, outer_globals);
-                    }
-                }
                 // `format`'s dispatching: when args[0] is a string we
                 // assume this is `"...".format(...)` (str_format
                 // builtin) and pass kwargs through. Otherwise fall
@@ -7173,6 +7269,11 @@ impl Interpreter {
                 let global_dummy = Rc::new(RefCell::new(DictData::new()));
                 return self.do_float_call(args, &global_dummy);
             }
+            // `bool(x)` must consult __bool__/__len__ for instances.
+            if cls.name == "bool" && args.len() <= 1 && kwargs.is_empty() {
+                let global_dummy = Rc::new(RefCell::new(DictData::new()));
+                return self.do_bool_call(args, &global_dummy);
+            }
             if let Some(builtin) = self.builtin_constructor_for(&cls) {
                 if !kwargs.is_empty() {
                     return Err(type_error(format!(
@@ -7548,7 +7649,24 @@ impl Interpreter {
             IC::CallPyExactNoFree { func_id, argc: ca } => {
                 if ca as usize == argc {
                     if let Object::Function(f) = &callable {
-                        if specialize::rc_id(f) == func_id && args.len() == argc {
+                        // `func_id` is a raw pointer fingerprint and can
+                        // alias a *different* function after the original
+                        // was freed and its allocation reused (ABA). Re-
+                        // verify the shape this fast path assumes — exact
+                        // arity, no cells/closure — so a recycled address
+                        // can never run an incompatible function through
+                        // the no-free path (which skips defaults & cells).
+                        let code = &f.code;
+                        if specialize::rc_id(f) == func_id
+                            && args.len() == argc
+                            && code.arg_count as usize == argc
+                            && !code.has_varargs
+                            && !code.has_varkeywords
+                            && code.kwonly_count == 0
+                            && code.cellvars.is_empty()
+                            && code.freevars.is_empty()
+                            && f.closure.is_empty()
+                        {
                             specialize::record_hit(op_idx);
                             let f = f.clone();
                             let r = self.run_py_exact_nofree(&f, args)?;
@@ -7562,7 +7680,17 @@ impl Interpreter {
             IC::CallPyExact { func_id, argc: ca } => {
                 if ca as usize == argc {
                     if let Object::Function(f) = &callable {
-                        if specialize::rc_id(f) == func_id && args.len() == argc {
+                        // Same ABA guard as above: confirm exact arity
+                        // before taking the binding-free path (cells are
+                        // rebuilt from `f.code`, so they stay correct).
+                        let code = &f.code;
+                        if specialize::rc_id(f) == func_id
+                            && args.len() == argc
+                            && code.arg_count as usize == argc
+                            && !code.has_varargs
+                            && !code.has_varkeywords
+                            && code.kwonly_count == 0
+                        {
                             specialize::record_hit(op_idx);
                             let f = f.clone();
                             let r = self.run_py_exact_with_cells(&f, args)?;
@@ -8416,6 +8544,126 @@ fn apply_slice_assignment(
     Ok(())
 }
 
+/// Compute the concrete indices covered by `s` over a sequence of
+/// length `len` (CPython's `PySlice_Unpack` + `PySlice_AdjustIndices`),
+/// returned in iteration order.
+fn slice_indices(len: usize, s: &PySlice) -> Result<Vec<usize>, RuntimeError> {
+    let len = len as i64;
+    let step = match &s.step {
+        Object::None => 1i64,
+        Object::Int(i) => *i,
+        _ => return Err(type_error("slice indices must be integers or None")),
+    };
+    if step == 0 {
+        return Err(value_error("slice step cannot be zero"));
+    }
+    let (lower, upper) = if step < 0 {
+        (-1i64, len - 1)
+    } else {
+        (0i64, len)
+    };
+    // Resolve a bound: `None` falls back to its default sentinel
+    // directly (never re-mapped through the negative-index rule), while
+    // explicit values are wrapped (`+= len`) then clamped to [lower, upper].
+    let resolve = |o: &Object, default: i64| -> Result<i64, RuntimeError> {
+        match o {
+            Object::None => Ok(default),
+            Object::Int(i) => {
+                let v = if *i < 0 { *i + len } else { *i };
+                Ok(v.clamp(lower, upper))
+            }
+            _ => Err(type_error("slice indices must be integers or None")),
+        }
+    };
+    let mut i = resolve(&s.start, if step > 0 { 0 } else { len - 1 })?;
+    let stop = resolve(&s.stop, if step > 0 { len } else { -1 })?;
+    let mut out = Vec::new();
+    while (step > 0 && i < stop) || (step < 0 && i > stop) {
+        if i >= 0 && (i as usize) < len as usize {
+            out.push(i as usize);
+        }
+        i += step;
+    }
+    Ok(out)
+}
+
+/// CPython's `PySlice_Unpack` + `PySlice_AdjustIndices`: resolve a slice
+/// against a sequence of length `len`, returning
+/// `(start, stop, step, slicelength)` with the same clamping rules.
+fn adjust_slice(len: i64, s: &PySlice) -> Result<(i64, i64, i64, i64), RuntimeError> {
+    let step = match &s.step {
+        Object::None => 1i64,
+        Object::Int(i) => *i,
+        _ => return Err(type_error("slice indices must be integers or None")),
+    };
+    if step == 0 {
+        return Err(value_error("slice step cannot be zero"));
+    }
+    let (lower, upper) = if step < 0 {
+        (-1i64, len - 1)
+    } else {
+        (0i64, len)
+    };
+    let clamp = |o: &Object, dflt: i64| -> Result<i64, RuntimeError> {
+        match o {
+            Object::None => Ok(dflt),
+            Object::Int(i) => {
+                let mut x = *i;
+                if x < 0 {
+                    x += len;
+                    if x < lower {
+                        x = lower;
+                    }
+                } else if x > upper {
+                    x = upper;
+                }
+                Ok(x)
+            }
+            _ => Err(type_error("slice indices must be integers or None")),
+        }
+    };
+    let start = clamp(&s.start, if step < 0 { upper } else { lower })?;
+    let stop = clamp(&s.stop, if step < 0 { lower } else { upper })?;
+    let slicelength = if step < 0 {
+        if stop < start {
+            (start - stop - 1) / (-step) + 1
+        } else {
+            0
+        }
+    } else if start < stop {
+        (stop - start - 1) / step + 1
+    } else {
+        0
+    };
+    Ok((start, stop, step, slicelength.max(0)))
+}
+
+/// `range(...)[slice]` → a new range, mirroring CPython `compute_slice`.
+fn range_slice(r: &crate::object::Range, len: i64, s: &PySlice) -> Result<Object, RuntimeError> {
+    let (start, _stop, step, slicelen) = adjust_slice(len, s)?;
+    let new_start = r.start + start * r.step;
+    let new_step = r.step * step;
+    let new_stop = new_start + slicelen * new_step;
+    Ok(Object::Range(Rc::new(crate::object::Range {
+        start: new_start,
+        stop: new_stop,
+        step: new_step,
+    })))
+}
+
+/// `del data[start:stop:step]` — remove the slice members in place.
+fn apply_slice_deletion(data: &mut Vec<Object>, s: &PySlice) -> Result<(), RuntimeError> {
+    let mut indices = slice_indices(data.len(), s)?;
+    // Remove from highest index to lowest so earlier removals don't
+    // shift the positions still to be deleted.
+    indices.sort_unstable();
+    indices.dedup();
+    for idx in indices.into_iter().rev() {
+        data.remove(idx);
+    }
+    Ok(())
+}
+
 fn slice_seq(seq: &[Object], s: &PySlice) -> Result<Vec<Object>, RuntimeError> {
     let len = seq.len() as i64;
     let step = match &s.step {
@@ -8430,43 +8678,76 @@ fn slice_seq(seq: &[Object], s: &PySlice) -> Result<Vec<Object>, RuntimeError> {
     if step == 0 {
         return Err(value_error("slice step cannot be zero"));
     }
-    let extract = |o: &Object, default: i64| -> Result<i64, RuntimeError> {
-        match o {
-            Object::None => Ok(default),
-            Object::Int(i) => Ok(*i),
-            _ => Err(type_error(
-                "slice indices must be integers or None or have an __index__ method",
-            )),
-        }
-    };
-    let start = extract(&s.start, if step > 0 { 0 } else { len - 1 })?;
-    let stop = extract(&s.stop, if step > 0 { len } else { -1 })?;
-    let norm = |x: i64| -> i64 {
+    // Map an *explicit* index to a concrete one, mirroring CPython's
+    // `PySlice_AdjustIndices`. The clamp floor differs by step sign: a
+    // negative step can legitimately walk down to index -1 (one below
+    // the start of the sequence), whereas a positive step floors at 0.
+    let adjust = |x: i64| -> i64 {
         if x < 0 {
-            let n = x + len;
-            if n < 0 && step > 0 {
-                0
+            let v = x + len;
+            if v < 0 {
+                if step < 0 {
+                    -1
+                } else {
+                    0
+                }
+            } else {
+                v
+            }
+        } else if x >= len {
+            if step < 0 {
+                len - 1
             } else {
-                n
+                len
             }
-        } else if x > len {
-            len
         } else {
             x
         }
     };
-    let mut i = norm(start);
-    let stop_norm = norm(stop);
+    // Defaults for an omitted bound use sentinels that must *not* pass
+    // through `adjust` (e.g. an omitted `stop` with a negative step is
+    // -1, meaning "below index 0", not "the last element").
+    let start = match &s.start {
+        Object::None => {
+            if step < 0 {
+                len - 1
+            } else {
+                0
+            }
+        }
+        Object::Int(i) => adjust(*i),
+        _ => {
+            return Err(type_error(
+                "slice indices must be integers or None or have an __index__ method",
+            ))
+        }
+    };
+    let stop = match &s.stop {
+        Object::None => {
+            if step < 0 {
+                -1
+            } else {
+                len
+            }
+        }
+        Object::Int(i) => adjust(*i),
+        _ => {
+            return Err(type_error(
+                "slice indices must be integers or None or have an __index__ method",
+            ))
+        }
+    };
+    let mut i = start;
     let mut out = Vec::new();
     if step > 0 {
-        while i < stop_norm {
+        while i < stop {
             if (0..len).contains(&i) {
                 out.push(seq[i as usize].clone());
             }
             i += step;
         }
     } else {
-        while i > stop_norm {
+        while i > stop {
             if (0..len).contains(&i) {
                 out.push(seq[i as usize].clone());
             }
@@ -9098,6 +9379,20 @@ fn bytes_percent_args(value: &Object) -> Object {
 }
 
 pub(crate) fn percent_format(template: &str, value: &Object) -> Result<String, RuntimeError> {
+    let mut noop = |_: &Object, _: char| Ok(None);
+    percent_format_with(template, value, &mut noop)
+}
+
+/// Printf-style `%` formatting with a VM-supplied `resolve` callback.
+///
+/// `resolve(item, kind)` lets the caller render `%s` / `%r` of user
+/// instances through `__str__` / `__repr__` (returning `Some(rendered)`),
+/// falling back to the built-in conversion when it returns `None`.
+pub(crate) fn percent_format_with(
+    template: &str,
+    value: &Object,
+    resolve: &mut dyn FnMut(&Object, char) -> Result<Option<String>, RuntimeError>,
+) -> Result<String, RuntimeError> {
     let mut out = String::new();
     let bytes = template.as_bytes();
     let mut i = 0;
@@ -9219,13 +9514,36 @@ pub(crate) fn percent_format(template: &str, value: &Object) -> Result<String, R
             }
             spec.push(kind);
             let rendered = match kind {
-                's' => format_via_spec(&Object::from_str(item.to_str()), &spec)?,
-                'r' => format_via_spec(&Object::from_str(item.repr()), &spec.replace('r', "s"))?,
+                's' => {
+                    let s = match resolve(&item, 's')? {
+                        Some(s) => s,
+                        None => item.to_str(),
+                    };
+                    format_via_spec(&Object::from_str(s), &spec)?
+                }
+                'r' => {
+                    let s = match resolve(&item, 'r')? {
+                        Some(s) => s,
+                        None => item.repr(),
+                    };
+                    format_via_spec(&Object::from_str(s), &spec.replace('r', "s"))?
+                }
                 'a' => format_via_spec(
                     &Object::from_str(ascii_repr(&item)),
                     &spec.replace('a', "s"),
                 )?,
-                'd' | 'i' | 'u' => format_via_spec(&item, &spec.replace(['i', 'u'], "d"))?,
+                'd' | 'i' | 'u' => {
+                    // Unwrap `int` subclasses (enum members, _NamedIntConstant)
+                    // so `%d` sees a real integer rather than the instance.
+                    let numeric = match &item {
+                        Object::Instance(_) => match item.as_i64() {
+                            Some(n) => Object::Int(n),
+                            None => item.clone(),
+                        },
+                        _ => item.clone(),
+                    };
+                    format_via_spec(&numeric, &spec.replace(['i', 'u'], "d"))?
+                }
                 'b' | 'o' | 'x' | 'X' => format_via_spec(&item, &spec)?,
                 'f' | 'F' | 'e' | 'E' | 'g' | 'G' => format_via_spec(&item, &spec)?,
                 'c' => match &item {
@@ -9699,6 +10017,19 @@ fn group_decimal(mag: u64, sep: char) -> String {
     out
 }
 
+/// Is `e` an `IndexError` (or subclass)? Used by the legacy
+/// `__getitem__` iteration protocol to detect the end of a sequence.
+fn is_index_error(e: &RuntimeError) -> bool {
+    if let RuntimeError::PyException(pe) = e {
+        if let Object::Instance(inst) = &pe.instance {
+            return inst
+                .class
+                .is_subclass_of(&crate::builtin_types::builtin_types().index_error);
+        }
+    }
+    false
+}
+
 fn binop_dunders(op: BinOpKind) -> (&'static str, &'static str) {
     use BinOpKind as B;
     match op {
@@ -9898,8 +10229,15 @@ fn object_to_constant(o: &Object) -> Constant {
 fn binary_op(a: &Object, b: &Object, op: BinOpKind) -> Result<Object, RuntimeError> {
     use BinOpKind as B;
     use Object as O;
+    // Subclasses of immutable built-ins (`class C(int)`, `enum.IntEnum`,
+    // `_NamedIntConstant`, …) behave like the value they wrap. By the
+    // time we reach this primitive path the caller has already ruled
+    // out any user `__op__` / `__rop__` override, so unwrapping to the
+    // native value is the correct (and CPython-matching) fallback.
+    let a = a.native_value().unwrap_or_else(|| a.clone());
+    let b = b.native_value().unwrap_or_else(|| b.clone());
     // Promote bool → int where appropriate.
-    let (a, b) = (promote_bool(a), promote_bool(b));
+    let (a, b) = (promote_bool(&a), promote_bool(&b));
 
     // Numeric tower: any (int-like, int-like) arithmetic routes
     // through the bignum-aware path with i64 fast-track and overflow
diff --git a/crates/weavepy-vm/src/object.rs b/crates/weavepy-vm/src/object.rs
index 2f5720c..c6cfe23 100644
--- a/crates/weavepy-vm/src/object.rs
+++ b/crates/weavepy-vm/src/object.rs
@@ -529,6 +529,12 @@ impl Eq for DictKey {}
 
 impl Hash for DictKey {
     fn hash<H: Hasher>(&self, state: &mut H) {
+        // An `int`/`str`/… subclass instance hashes identically to the
+        // value it wraps, so it can be used interchangeably with that
+        // value as a dict/set key (CPython invariant).
+        if let Some(native) = self.0.native_value() {
+            return DictKey(native).hash(state);
+        }
         match &self.0 {
             Object::None => 0u8.hash(state),
             Object::Bool(b) => {
@@ -1207,6 +1213,14 @@ impl Object {
             Object::FrozenSet(s) => !s.is_empty(),
             Object::Cell(inner) => inner.borrow().is_truthy(),
             Object::Instance(inst) => {
+                // int/str/… subclass instances are truthy per their
+                // wrapped value unless the class overrides __bool__/__len__.
+                if inst.class.lookup("__bool__").is_none() && inst.class.lookup("__len__").is_none()
+                {
+                    if let Some(native) = &inst.native {
+                        return native.is_truthy();
+                    }
+                }
                 // Honour __bool__ then __len__ before defaulting to True.
                 if let Some(m) = inst.class.lookup("__bool__") {
                     // Caller dispatches; we cannot run Python here.
@@ -1282,6 +1296,17 @@ impl Object {
 
     /// `==` operator semantics — recursive value equality.
     pub fn eq_value(&self, other: &Self) -> bool {
+        // Subclasses of immutable built-ins (`class C(int)`,
+        // `enum.IntEnum`, `_NamedIntConstant`, …) compare by the value
+        // they wrap, so `C(5) == 5` and two distinct instances with the
+        // same value are equal — exactly like CPython.
+        let lhs_native = self.native_value();
+        let rhs_native = other.native_value();
+        if lhs_native.is_some() || rhs_native.is_some() {
+            let l = lhs_native.as_ref().unwrap_or(self);
+            let r = rhs_native.as_ref().unwrap_or(other);
+            return l.eq_value(r);
+        }
         match (self, other) {
             (Object::None, Object::None) => true,
             (Object::Bool(a), Object::Bool(b)) => a == b,
@@ -1366,6 +1391,14 @@ impl Object {
     /// combinations return [`Err`] mapping to Python's `TypeError`.
     pub fn cmp(&self, other: &Self) -> Result<Ordering, RuntimeError> {
         use Object as O;
+        // Order `int`/`str`/… subclass instances by the value they wrap.
+        let lhs_native = self.native_value();
+        let rhs_native = other.native_value();
+        if lhs_native.is_some() || rhs_native.is_some() {
+            let l = lhs_native.as_ref().unwrap_or(self);
+            let r = rhs_native.as_ref().unwrap_or(other);
+            return l.cmp(r);
+        }
         match (self, other) {
             (O::Int(a), O::Int(b)) => Ok(a.cmp(b)),
             (O::Long(a), O::Long(b)) => Ok((**a).cmp(b)),
@@ -1677,19 +1710,42 @@ impl Object {
                 }
             }
             Object::Str(s) => {
+                // CPython quote selection (Objects/unicodeobject.c
+                // `unicode_repr`): use '\'' unless the string contains a
+                // single quote and no double quote, in which case use '"'
+                // so the single quotes need not be escaped.
+                let has_single = s.contains('\'');
+                let has_double = s.contains('"');
+                let quote = if has_single && !has_double { '"' } else { '\'' };
                 let mut out = String::with_capacity(s.len() + 2);
-                out.push('\'');
+                out.push(quote);
                 for c in s.chars() {
                     match c {
                         '\\' => out.push_str("\\\\"),
-                        '\'' => out.push_str("\\'"),
                         '\n' => out.push_str("\\n"),
                         '\r' => out.push_str("\\r"),
                         '\t' => out.push_str("\\t"),
-                        c => out.push(c),
+                        c if c == quote => {
+                            out.push('\\');
+                            out.push(quote);
+                        }
+                        c if char_is_printable(c) => out.push(c),
+                        // Non-printable code points are escaped the way
+                        // CPython's `unicode_repr` does: \xNN, \uNNNN or
+                        // \UNNNNNNNN depending on the code-point width.
+                        c => {
+                            let n = c as u32;
+                            if n <= 0xff {
+                                out.push_str(&format!("\\x{n:02x}"));
+                            } else if n <= 0xffff {
+                                out.push_str(&format!("\\u{n:04x}"));
+                            } else {
+                                out.push_str(&format!("\\U{n:08x}"));
+                            }
+                        }
                     }
                 }
-                out.push('\'');
+                out.push(quote);
                 out
             }
             Object::Tuple(items) => {
@@ -2021,6 +2077,28 @@ fn bytes_contains(haystack: &[u8], needle: &[u8]) -> bool {
     haystack.windows(needle.len()).any(|w| w == needle)
 }
 
+/// CPython's `Py_UNICODE_ISPRINTABLE`: every character is printable
+/// except those in the "Other" (Cc, Cf, Cs, Co, Cn) and "Separator"
+/// (Zl, Zp, Zs) general categories, with U+0020 (space) treated as
+/// printable. Used by `repr(str)` (and `str.isprintable`).
+pub(crate) fn char_is_printable(c: char) -> bool {
+    if c == ' ' {
+        return true;
+    }
+    use unicode_properties::{GeneralCategory as GC, UnicodeGeneralCategory};
+    !matches!(
+        c.general_category(),
+        GC::Control
+            | GC::Format
+            | GC::Surrogate
+            | GC::PrivateUse
+            | GC::Unassigned
+            | GC::LineSeparator
+            | GC::ParagraphSeparator
+            | GC::SpaceSeparator
+    )
+}
+
 fn bytes_repr(b: &[u8]) -> String {
     let mut out = String::with_capacity(b.len() + 3);
     out.push('b');
@@ -2174,11 +2252,25 @@ impl Object {
     /// View this object as `i64`, succeeding only when the value
     /// genuinely fits in 64 bits. Returns `None` for `Long`s that
     /// don't fit, and for non-integer types.
+    /// For an instance of a subclass of a primitive built-in
+    /// (`int`, `str`, …) return a clone of the underlying value the
+    /// instance wraps; `None` for everything else. The wrapped value
+    /// is always itself a primitive (never another `Instance`), so
+    /// callers can recurse exactly once.
+    #[inline]
+    pub fn native_value(&self) -> Option<Object> {
+        match self {
+            Object::Instance(inst) => inst.native.clone(),
+            _ => None,
+        }
+    }
+
     pub fn as_i64(&self) -> Option<i64> {
         match self {
             Object::Bool(b) => Some(i64::from(*b)),
             Object::Int(i) => Some(*i),
             Object::Long(b) => b.to_i64(),
+            Object::Instance(inst) => inst.native.as_ref().and_then(Object::as_i64),
             _ => None,
         }
     }
@@ -2190,6 +2282,7 @@ impl Object {
             Object::Bool(b) => Some(usize::from(*b)),
             Object::Int(i) if *i >= 0 => usize::try_from(*i).ok(),
             Object::Long(b) if !b.is_negative() => b.to_usize(),
+            Object::Instance(inst) => inst.native.as_ref().and_then(Object::as_usize),
             _ => None,
         }
     }
diff --git a/crates/weavepy-vm/src/stdlib/mod.rs b/crates/weavepy-vm/src/stdlib/mod.rs
index 3ba6b93..dec2f2f 100644
--- a/crates/weavepy-vm/src/stdlib/mod.rs
+++ b/crates/weavepy-vm/src/stdlib/mod.rs
@@ -39,7 +39,6 @@ pub mod marshal_mod;
 pub mod math;
 pub mod os;
 pub mod random;
-pub mod re;
 pub mod resource_mod;
 pub mod secrets_mod;
 pub mod select_mod;
@@ -47,6 +46,7 @@ pub mod shutil_mod;
 pub mod signal_mod;
 pub mod socket_mod;
 pub mod sqlite3_mod;
+pub mod sre_mod;
 pub mod ssl_mod;
 pub mod struct_mod;
 pub mod subprocess_mod;
@@ -89,7 +89,6 @@ pub fn register_all(cache: &ModuleCache) {
     cache.register_builtin("os", os::build);
     cache.register_builtin("os.path", os::build_path);
     cache.register_builtin("io", io::build);
-    cache.register_builtin("re", re::build);
     cache.register_builtin("json", json::build);
     cache.register_builtin("random", random::build);
     cache.register_builtin("time", time::build);
@@ -114,6 +113,8 @@ pub fn register_all(cache: &ModuleCache) {
     cache.register_builtin("_struct", struct_mod::build);
     cache.register_builtin("_codecs", codecs_mod::build);
     cache.register_builtin("marshal", marshal_mod::build);
+    // RFC 0035 — native SRE regex core behind the frozen `re` package.
+    cache.register_builtin("_sre", sre_mod::build);
     // RFC 0033 — native AST parsing core behind the frozen `ast` module.
     cache.register_builtin("_ast", ast_mod::build);
     // RFC 0033 — native symbol-table core behind the frozen `symtable` module.
@@ -992,5 +993,56 @@ fn frozen_sources() -> &'static [FrozenSource] {
             source: include_str!("python/symtable.py"),
             is_package: false,
         },
+        // RFC 0035 — the `re` package: a faithful port of CPython's
+        // secret-labs engine. `_constants` / `_parser` / `_compiler` /
+        // `_casefix` are verbatim from CPython 3.13; `_engine` builds the
+        // Pattern / Match objects on top of the native `_sre` core.
+        FrozenSource {
+            name: "re",
+            source: include_str!("python/re_init.py"),
+            is_package: true,
+        },
+        FrozenSource {
+            name: "re._constants",
+            source: include_str!("python/re_constants.py"),
+            is_package: false,
+        },
+        FrozenSource {
+            name: "re._casefix",
+            source: include_str!("python/re_casefix.py"),
+            is_package: false,
+        },
+        FrozenSource {
+            name: "re._parser",
+            source: include_str!("python/re_parser.py"),
+            is_package: false,
+        },
+        FrozenSource {
+            name: "re._compiler",
+            source: include_str!("python/re_compiler.py"),
+            is_package: false,
+        },
+        FrozenSource {
+            name: "re._engine",
+            source: include_str!("python/re_engine.py"),
+            is_package: false,
+        },
+        // Deprecated 3.x aliases kept for compatibility with code that
+        // still imports the pre-3.11 module names.
+        FrozenSource {
+            name: "sre_constants",
+            source: include_str!("python/sre_constants.py"),
+            is_package: false,
+        },
+        FrozenSource {
+            name: "sre_parse",
+            source: include_str!("python/sre_parse.py"),
+            is_package: false,
+        },
+        FrozenSource {
+            name: "sre_compile",
+            source: include_str!("python/sre_compile.py"),
+            is_package: false,
+        },
     ]
 }
diff --git a/crates/weavepy-vm/src/stdlib/python/enum.py b/crates/weavepy-vm/src/stdlib/python/enum.py
index 032a84d..f0d8520 100644
--- a/crates/weavepy-vm/src/stdlib/python/enum.py
+++ b/crates/weavepy-vm/src/stdlib/python/enum.py
@@ -166,7 +166,14 @@ def __members__(cls):
         return dict(cls._member_map_) if cls._member_map_ is not None else {}
 
     def _create_member_(cls, name, value):
-        member = object.__new__(cls)
+        # For int-backed enums (IntEnum/IntFlag) build a real int
+        # instance so members *are* ints — `IntEnum.X + 1`, `flags &
+        # member`, `int(member)` and dict/set interchange with the
+        # bare value all work exactly as in CPython.
+        if isinstance(value, int) and issubclass(cls, int):
+            member = int.__new__(cls, value)
+        else:
+            member = object.__new__(cls)
         member._name_ = name
         member._value_ = value
         return member
@@ -217,12 +224,10 @@ def __hash__(self):
         return hash(self._name_)
 
 
-class IntEnum(Enum):
-    """Mirror of :class:`Enum` whose members compare equal to their
-    integer value. (CPython inherits from ``int`` directly; WeavePy
-    keeps a separate base and overloads ``__eq__`` / ``__int__`` to
-    cover the common patterns.)
-    """
+class IntEnum(int, Enum):
+    """Mirror of :class:`Enum` whose members are also genuine ints, so
+    they compare and operate exactly like their integer value
+    (CPython's ``class IntEnum(int, Enum)``)."""
 
     def __int__(self):
         return self._value_
@@ -308,7 +313,10 @@ def _decompose_flag(cls, value):
                 combined_value |= member._value_
         if combined_value != value:
             raise ValueError(f"{value!r} is not a valid {cls.__name__}")
-        new_member = object.__new__(cls)
+        if issubclass(cls, int):
+            new_member = int.__new__(cls, value)
+        else:
+            new_member = object.__new__(cls)
         new_member._name_ = "|".join(combined_name)
         new_member._value_ = value
         return new_member
@@ -345,8 +353,9 @@ def __bool__(self):
         return bool(self._value_)
 
 
-class IntFlag(Flag):
-    """Like :class:`IntEnum` but for bitfield-style values."""
+class IntFlag(int, Flag):
+    """Like :class:`IntEnum` but for bitfield-style values; members are
+    genuine ints (CPython's ``class IntFlag(int, Flag)``)."""
 
     def __int__(self):
         return self._value_
diff --git a/crates/weavepy-vm/src/stdlib/python/re_casefix.py b/crates/weavepy-vm/src/stdlib/python/re_casefix.py
new file mode 100644
index 0000000..fed2d84
--- /dev/null
+++ b/crates/weavepy-vm/src/stdlib/python/re_casefix.py
@@ -0,0 +1,106 @@
+# Auto-generated by Tools/build/generate_re_casefix.py.
+
+# Maps the code of lowercased character to codes of different lowercased
+# characters which have the same uppercase.
+_EXTRA_CASES = {
+    # LATIN SMALL LETTER I: LATIN SMALL LETTER DOTLESS I
+    0x0069: (0x0131,), # 'i': 'ı'
+    # LATIN SMALL LETTER S: LATIN SMALL LETTER LONG S
+    0x0073: (0x017f,), # 's': 'ſ'
+    # MICRO SIGN: GREEK SMALL LETTER MU
+    0x00b5: (0x03bc,), # 'µ': 'μ'
+    # LATIN SMALL LETTER DOTLESS I: LATIN SMALL LETTER I
+    0x0131: (0x0069,), # 'ı': 'i'
+    # LATIN SMALL LETTER LONG S: LATIN SMALL LETTER S
+    0x017f: (0x0073,), # 'ſ': 's'
+    # COMBINING GREEK YPOGEGRAMMENI: GREEK SMALL LETTER IOTA, GREEK PROSGEGRAMMENI
+    0x0345: (0x03b9, 0x1fbe), # '\u0345': 'ιι'
+    # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS: GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
+    0x0390: (0x1fd3,), # 'ΐ': 'ΐ'
+    # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS: GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
+    0x03b0: (0x1fe3,), # 'ΰ': 'ΰ'
+    # GREEK SMALL LETTER BETA: GREEK BETA SYMBOL
+    0x03b2: (0x03d0,), # 'β': 'ϐ'
+    # GREEK SMALL LETTER EPSILON: GREEK LUNATE EPSILON SYMBOL
+    0x03b5: (0x03f5,), # 'ε': 'ϵ'
+    # GREEK SMALL LETTER THETA: GREEK THETA SYMBOL
+    0x03b8: (0x03d1,), # 'θ': 'ϑ'
+    # GREEK SMALL LETTER IOTA: COMBINING GREEK YPOGEGRAMMENI, GREEK PROSGEGRAMMENI
+    0x03b9: (0x0345, 0x1fbe), # 'ι': '\u0345ι'
+    # GREEK SMALL LETTER KAPPA: GREEK KAPPA SYMBOL
+    0x03ba: (0x03f0,), # 'κ': 'ϰ'
+    # GREEK SMALL LETTER MU: MICRO SIGN
+    0x03bc: (0x00b5,), # 'μ': 'µ'
+    # GREEK SMALL LETTER PI: GREEK PI SYMBOL
+    0x03c0: (0x03d6,), # 'π': 'ϖ'
+    # GREEK SMALL LETTER RHO: GREEK RHO SYMBOL
+    0x03c1: (0x03f1,), # 'ρ': 'ϱ'
+    # GREEK SMALL LETTER FINAL SIGMA: GREEK SMALL LETTER SIGMA
+    0x03c2: (0x03c3,), # 'ς': 'σ'
+    # GREEK SMALL LETTER SIGMA: GREEK SMALL LETTER FINAL SIGMA
+    0x03c3: (0x03c2,), # 'σ': 'ς'
+    # GREEK SMALL LETTER PHI: GREEK PHI SYMBOL
+    0x03c6: (0x03d5,), # 'φ': 'ϕ'
+    # GREEK BETA SYMBOL: GREEK SMALL LETTER BETA
+    0x03d0: (0x03b2,), # 'ϐ': 'β'
+    # GREEK THETA SYMBOL: GREEK SMALL LETTER THETA
+    0x03d1: (0x03b8,), # 'ϑ': 'θ'
+    # GREEK PHI SYMBOL: GREEK SMALL LETTER PHI
+    0x03d5: (0x03c6,), # 'ϕ': 'φ'
+    # GREEK PI SYMBOL: GREEK SMALL LETTER PI
+    0x03d6: (0x03c0,), # 'ϖ': 'π'
+    # GREEK KAPPA SYMBOL: GREEK SMALL LETTER KAPPA
+    0x03f0: (0x03ba,), # 'ϰ': 'κ'
+    # GREEK RHO SYMBOL: GREEK SMALL LETTER RHO
+    0x03f1: (0x03c1,), # 'ϱ': 'ρ'
+    # GREEK LUNATE EPSILON SYMBOL: GREEK SMALL LETTER EPSILON
+    0x03f5: (0x03b5,), # 'ϵ': 'ε'
+    # CYRILLIC SMALL LETTER VE: CYRILLIC SMALL LETTER ROUNDED VE
+    0x0432: (0x1c80,), # 'в': 'ᲀ'
+    # CYRILLIC SMALL LETTER DE: CYRILLIC SMALL LETTER LONG-LEGGED DE
+    0x0434: (0x1c81,), # 'д': 'ᲁ'
+    # CYRILLIC SMALL LETTER O: CYRILLIC SMALL LETTER NARROW O
+    0x043e: (0x1c82,), # 'о': 'ᲂ'
+    # CYRILLIC SMALL LETTER ES: CYRILLIC SMALL LETTER WIDE ES
+    0x0441: (0x1c83,), # 'с': 'ᲃ'
+    # CYRILLIC SMALL LETTER TE: CYRILLIC SMALL LETTER TALL TE, CYRILLIC SMALL LETTER THREE-LEGGED TE
+    0x0442: (0x1c84, 0x1c85), # 'т': 'ᲄᲅ'
+    # CYRILLIC SMALL LETTER HARD SIGN: CYRILLIC SMALL LETTER TALL HARD SIGN
+    0x044a: (0x1c86,), # 'ъ': 'ᲆ'
+    # CYRILLIC SMALL LETTER YAT: CYRILLIC SMALL LETTER TALL YAT
+    0x0463: (0x1c87,), # 'ѣ': 'ᲇ'
+    # CYRILLIC SMALL LETTER ROUNDED VE: CYRILLIC SMALL LETTER VE
+    0x1c80: (0x0432,), # 'ᲀ': 'в'
+    # CYRILLIC SMALL LETTER LONG-LEGGED DE: CYRILLIC SMALL LETTER DE
+    0x1c81: (0x0434,), # 'ᲁ': 'д'
+    # CYRILLIC SMALL LETTER NARROW O: CYRILLIC SMALL LETTER O
+    0x1c82: (0x043e,), # 'ᲂ': 'о'
+    # CYRILLIC SMALL LETTER WIDE ES: CYRILLIC SMALL LETTER ES
+    0x1c83: (0x0441,), # 'ᲃ': 'с'
+    # CYRILLIC SMALL LETTER TALL TE: CYRILLIC SMALL LETTER TE, CYRILLIC SMALL LETTER THREE-LEGGED TE
+    0x1c84: (0x0442, 0x1c85), # 'ᲄ': 'тᲅ'
+    # CYRILLIC SMALL LETTER THREE-LEGGED TE: CYRILLIC SMALL LETTER TE, CYRILLIC SMALL LETTER TALL TE
+    0x1c85: (0x0442, 0x1c84), # 'ᲅ': 'тᲄ'
+    # CYRILLIC SMALL LETTER TALL HARD SIGN: CYRILLIC SMALL LETTER HARD SIGN
+    0x1c86: (0x044a,), # 'ᲆ': 'ъ'
+    # CYRILLIC SMALL LETTER TALL YAT: CYRILLIC SMALL LETTER YAT
+    0x1c87: (0x0463,), # 'ᲇ': 'ѣ'
+    # CYRILLIC SMALL LETTER UNBLENDED UK: CYRILLIC SMALL LETTER MONOGRAPH UK
+    0x1c88: (0xa64b,), # 'ᲈ': 'ꙋ'
+    # LATIN SMALL LETTER S WITH DOT ABOVE: LATIN SMALL LETTER LONG S WITH DOT ABOVE
+    0x1e61: (0x1e9b,), # 'ṡ': 'ẛ'
+    # LATIN SMALL LETTER LONG S WITH DOT ABOVE: LATIN SMALL LETTER S WITH DOT ABOVE
+    0x1e9b: (0x1e61,), # 'ẛ': 'ṡ'
+    # GREEK PROSGEGRAMMENI: COMBINING GREEK YPOGEGRAMMENI, GREEK SMALL LETTER IOTA
+    0x1fbe: (0x0345, 0x03b9), # 'ι': '\u0345ι'
+    # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA: GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+    0x1fd3: (0x0390,), # 'ΐ': 'ΐ'
+    # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA: GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
+    0x1fe3: (0x03b0,), # 'ΰ': 'ΰ'
+    # CYRILLIC SMALL LETTER MONOGRAPH UK: CYRILLIC SMALL LETTER UNBLENDED UK
+    0xa64b: (0x1c88,), # 'ꙋ': 'ᲈ'
+    # LATIN SMALL LIGATURE LONG S T: LATIN SMALL LIGATURE ST
+    0xfb05: (0xfb06,), # 'ﬅ': 'ﬆ'
+    # LATIN SMALL LIGATURE ST: LATIN SMALL LIGATURE LONG S T
+    0xfb06: (0xfb05,), # 'ﬆ': 'ﬅ'
+}
diff --git a/crates/weavepy-vm/src/stdlib/python/re_compiler.py b/crates/weavepy-vm/src/stdlib/python/re_compiler.py
new file mode 100644
index 0000000..c26e999
--- /dev/null
+++ b/crates/weavepy-vm/src/stdlib/python/re_compiler.py
@@ -0,0 +1,775 @@
+#
+# Secret Labs' Regular Expression Engine
+#
+# convert template to internal format
+#
+# Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
+#
+# See the __init__.py file for information on usage and redistribution.
+#
+
+"""Internal support module for sre"""
+
+import _sre
+from . import _parser
+from ._constants import *
+from ._casefix import _EXTRA_CASES
+
+assert _sre.MAGIC == MAGIC, "SRE module mismatch"
+
+_LITERAL_CODES = {LITERAL, NOT_LITERAL}
+_SUCCESS_CODES = {SUCCESS, FAILURE}
+_ASSERT_CODES = {ASSERT, ASSERT_NOT}
+_UNIT_CODES = _LITERAL_CODES | {ANY, IN}
+
+_REPEATING_CODES = {
+    MIN_REPEAT: (REPEAT, MIN_UNTIL, MIN_REPEAT_ONE),
+    MAX_REPEAT: (REPEAT, MAX_UNTIL, REPEAT_ONE),
+    POSSESSIVE_REPEAT: (POSSESSIVE_REPEAT, SUCCESS, POSSESSIVE_REPEAT_ONE),
+}
+
+def _combine_flags(flags, add_flags, del_flags,
+                   TYPE_FLAGS=_parser.TYPE_FLAGS):
+    if add_flags & TYPE_FLAGS:
+        flags &= ~TYPE_FLAGS
+    return (flags | add_flags) & ~del_flags
+
+def _compile(code, pattern, flags):
+    # internal: compile a (sub)pattern
+    emit = code.append
+    _len = len
+    LITERAL_CODES = _LITERAL_CODES
+    REPEATING_CODES = _REPEATING_CODES
+    SUCCESS_CODES = _SUCCESS_CODES
+    ASSERT_CODES = _ASSERT_CODES
+    iscased = None
+    tolower = None
+    fixes = None
+    if flags & SRE_FLAG_IGNORECASE and not flags & SRE_FLAG_LOCALE:
+        if flags & SRE_FLAG_UNICODE:
+            iscased = _sre.unicode_iscased
+            tolower = _sre.unicode_tolower
+            fixes = _EXTRA_CASES
+        else:
+            iscased = _sre.ascii_iscased
+            tolower = _sre.ascii_tolower
+    for op, av in pattern:
+        if op in LITERAL_CODES:
+            if not flags & SRE_FLAG_IGNORECASE:
+                emit(op)
+                emit(av)
+            elif flags & SRE_FLAG_LOCALE:
+                emit(OP_LOCALE_IGNORE[op])
+                emit(av)
+            elif not iscased(av):
+                emit(op)
+                emit(av)
+            else:
+                lo = tolower(av)
+                if not fixes:  # ascii
+                    emit(OP_IGNORE[op])
+                    emit(lo)
+                elif lo not in fixes:
+                    emit(OP_UNICODE_IGNORE[op])
+                    emit(lo)
+                else:
+                    emit(IN_UNI_IGNORE)
+                    skip = _len(code); emit(0)
+                    if op is NOT_LITERAL:
+                        emit(NEGATE)
+                    for k in (lo,) + fixes[lo]:
+                        emit(LITERAL)
+                        emit(k)
+                    emit(FAILURE)
+                    code[skip] = _len(code) - skip
+        elif op is IN:
+            charset, hascased = _optimize_charset(av, iscased, tolower, fixes)
+            if flags & SRE_FLAG_IGNORECASE and flags & SRE_FLAG_LOCALE:
+                emit(IN_LOC_IGNORE)
+            elif not hascased:
+                emit(IN)
+            elif not fixes:  # ascii
+                emit(IN_IGNORE)
+            else:
+                emit(IN_UNI_IGNORE)
+            skip = _len(code); emit(0)
+            _compile_charset(charset, flags, code)
+            code[skip] = _len(code) - skip
+        elif op is ANY:
+            if flags & SRE_FLAG_DOTALL:
+                emit(ANY_ALL)
+            else:
+                emit(ANY)
+        elif op in REPEATING_CODES:
+            if _simple(av[2]):
+                emit(REPEATING_CODES[op][2])
+                skip = _len(code); emit(0)
+                emit(av[0])
+                emit(av[1])
+                _compile(code, av[2], flags)
+                emit(SUCCESS)
+                code[skip] = _len(code) - skip
+            else:
+                emit(REPEATING_CODES[op][0])
+                skip = _len(code); emit(0)
+                emit(av[0])
+                emit(av[1])
+                _compile(code, av[2], flags)
+                code[skip] = _len(code) - skip
+                emit(REPEATING_CODES[op][1])
+        elif op is SUBPATTERN:
+            group, add_flags, del_flags, p = av
+            if group:
+                emit(MARK)
+                emit((group-1)*2)
+            # _compile_info(code, p, _combine_flags(flags, add_flags, del_flags))
+            _compile(code, p, _combine_flags(flags, add_flags, del_flags))
+            if group:
+                emit(MARK)
+                emit((group-1)*2+1)
+        elif op is ATOMIC_GROUP:
+            # Atomic Groups are handled by starting with an Atomic
+            # Group op code, then putting in the atomic group pattern
+            # and finally a success op code to tell any repeat
+            # operations within the Atomic Group to stop eating and
+            # pop their stack if they reach it
+            emit(ATOMIC_GROUP)
+            skip = _len(code); emit(0)
+            _compile(code, av, flags)
+            emit(SUCCESS)
+            code[skip] = _len(code) - skip
+        elif op in SUCCESS_CODES:
+            emit(op)
+        elif op in ASSERT_CODES:
+            emit(op)
+            skip = _len(code); emit(0)
+            if av[0] >= 0:
+                emit(0) # look ahead
+            else:
+                lo, hi = av[1].getwidth()
+                if lo > MAXCODE:
+                    raise error("looks too much behind")
+                if lo != hi:
+                    raise PatternError("look-behind requires fixed-width pattern")
+                emit(lo) # look behind
+            _compile(code, av[1], flags)
+            emit(SUCCESS)
+            code[skip] = _len(code) - skip
+        elif op is AT:
+            emit(op)
+            if flags & SRE_FLAG_MULTILINE:
+                av = AT_MULTILINE.get(av, av)
+            if flags & SRE_FLAG_LOCALE:
+                av = AT_LOCALE.get(av, av)
+            elif flags & SRE_FLAG_UNICODE:
+                av = AT_UNICODE.get(av, av)
+            emit(av)
+        elif op is BRANCH:
+            emit(op)
+            tail = []
+            tailappend = tail.append
+            for av in av[1]:
+                skip = _len(code); emit(0)
+                # _compile_info(code, av, flags)
+                _compile(code, av, flags)
+                emit(JUMP)
+                tailappend(_len(code)); emit(0)
+                code[skip] = _len(code) - skip
+            emit(FAILURE) # end of branch
+            for tail in tail:
+                code[tail] = _len(code) - tail
+        elif op is CATEGORY:
+            emit(op)
+            if flags & SRE_FLAG_LOCALE:
+                av = CH_LOCALE[av]
+            elif flags & SRE_FLAG_UNICODE:
+                av = CH_UNICODE[av]
+            emit(av)
+        elif op is GROUPREF:
+            if not flags & SRE_FLAG_IGNORECASE:
+                emit(op)
+            elif flags & SRE_FLAG_LOCALE:
+                emit(GROUPREF_LOC_IGNORE)
+            elif not fixes:  # ascii
+                emit(GROUPREF_IGNORE)
+            else:
+                emit(GROUPREF_UNI_IGNORE)
+            emit(av-1)
+        elif op is GROUPREF_EXISTS:
+            emit(op)
+            emit(av[0]-1)
+            skipyes = _len(code); emit(0)
+            _compile(code, av[1], flags)
+            if av[2]:
+                emit(JUMP)
+                skipno = _len(code); emit(0)
+                code[skipyes] = _len(code) - skipyes + 1
+                _compile(code, av[2], flags)
+                code[skipno] = _len(code) - skipno
+            else:
+                code[skipyes] = _len(code) - skipyes + 1
+        else:
+            raise PatternError(f"internal: unsupported operand type {op!r}")
+
+def _compile_charset(charset, flags, code):
+    # compile charset subprogram
+    emit = code.append
+    for op, av in charset:
+        emit(op)
+        if op is NEGATE:
+            pass
+        elif op is LITERAL:
+            emit(av)
+        elif op is RANGE or op is RANGE_UNI_IGNORE:
+            emit(av[0])
+            emit(av[1])
+        elif op is CHARSET:
+            code.extend(av)
+        elif op is BIGCHARSET:
+            code.extend(av)
+        elif op is CATEGORY:
+            if flags & SRE_FLAG_LOCALE:
+                emit(CH_LOCALE[av])
+            elif flags & SRE_FLAG_UNICODE:
+                emit(CH_UNICODE[av])
+            else:
+                emit(av)
+        else:
+            raise PatternError(f"internal: unsupported set operator {op!r}")
+    emit(FAILURE)
+
+def _optimize_charset(charset, iscased=None, fixup=None, fixes=None):
+    # internal: optimize character set
+    out = []
+    tail = []
+    charmap = bytearray(256)
+    hascased = False
+    for op, av in charset:
+        while True:
+            try:
+                if op is LITERAL:
+                    if fixup: # IGNORECASE and not LOCALE
+                        av = fixup(av)
+                        charmap[av] = 1
+                        if fixes and av in fixes:
+                            for k in fixes[av]:
+                                charmap[k] = 1
+                        if not hascased and iscased(av):
+                            hascased = True
+                    else:
+                        charmap[av] = 1
+                elif op is RANGE:
+                    r = range(av[0], av[1]+1)
+                    if fixup: # IGNORECASE and not LOCALE
+                        if fixes:
+                            for i in map(fixup, r):
+                                charmap[i] = 1
+                                if i in fixes:
+                                    for k in fixes[i]:
+                                        charmap[k] = 1
+                        else:
+                            for i in map(fixup, r):
+                                charmap[i] = 1
+                        if not hascased:
+                            hascased = any(map(iscased, r))
+                    else:
+                        for i in r:
+                            charmap[i] = 1
+                elif op is NEGATE:
+                    out.append((op, av))
+                else:
+                    tail.append((op, av))
+            except IndexError:
+                if len(charmap) == 256:
+                    # character set contains non-UCS1 character codes
+                    charmap += b'\0' * 0xff00
+                    continue
+                # Character set contains non-BMP character codes.
+                # For range, all BMP characters in the range are already
+                # proceeded.
+                if fixup: # IGNORECASE and not LOCALE
+                    # For now, IN_UNI_IGNORE+LITERAL and
+                    # IN_UNI_IGNORE+RANGE_UNI_IGNORE work for all non-BMP
+                    # characters, because two characters (at least one of
+                    # which is not in the BMP) match case-insensitively
+                    # if and only if:
+                    # 1) c1.lower() == c2.lower()
+                    # 2) c1.lower() == c2 or c1.lower().upper() == c2
+                    # Also, both c.lower() and c.lower().upper() are single
+                    # characters for every non-BMP character.
+                    if op is RANGE:
+                        if fixes: # not ASCII
+                            op = RANGE_UNI_IGNORE
+                        hascased = True
+                    else:
+                        assert op is LITERAL
+                        if not hascased and iscased(av):
+                            hascased = True
+                tail.append((op, av))
+            break
+
+    # compress character map
+    runs = []
+    q = 0
+    while True:
+        p = charmap.find(1, q)
+        if p < 0:
+            break
+        if len(runs) >= 2:
+            runs = None
+            break
+        q = charmap.find(0, p)
+        if q < 0:
+            runs.append((p, len(charmap)))
+            break
+        runs.append((p, q))
+    if runs is not None:
+        # use literal/range
+        for p, q in runs:
+            if q - p == 1:
+                out.append((LITERAL, p))
+            else:
+                out.append((RANGE, (p, q - 1)))
+        out += tail
+        # if the case was changed or new representation is more compact
+        if hascased or len(out) < len(charset):
+            return out, hascased
+        # else original character set is good enough
+        return charset, hascased
+
+    # use bitmap
+    if len(charmap) == 256:
+        data = _mk_bitmap(charmap)
+        out.append((CHARSET, data))
+        out += tail
+        return out, hascased
+
+    # To represent a big charset, first a bitmap of all characters in the
+    # set is constructed. Then, this bitmap is sliced into chunks of 256
+    # characters, duplicate chunks are eliminated, and each chunk is
+    # given a number. In the compiled expression, the charset is
+    # represented by a 32-bit word sequence, consisting of one word for
+    # the number of different chunks, a sequence of 256 bytes (64 words)
+    # of chunk numbers indexed by their original chunk position, and a
+    # sequence of 256-bit chunks (8 words each).
+
+    # Compression is normally good: in a typical charset, large ranges of
+    # Unicode will be either completely excluded (e.g. if only cyrillic
+    # letters are to be matched), or completely included (e.g. if large
+    # subranges of Kanji match). These ranges will be represented by
+    # chunks of all one-bits or all zero-bits.
+
+    # Matching can be also done efficiently: the more significant byte of
+    # the Unicode character is an index into the chunk number, and the
+    # less significant byte is a bit index in the chunk (just like the
+    # CHARSET matching).
+
+    charmap = bytes(charmap) # should be hashable
+    comps = {}
+    mapping = bytearray(256)
+    block = 0
+    data = bytearray()
+    for i in range(0, 65536, 256):
+        chunk = charmap[i: i + 256]
+        if chunk in comps:
+            mapping[i // 256] = comps[chunk]
+        else:
+            mapping[i // 256] = comps[chunk] = block
+            block += 1
+            data += chunk
+    data = _mk_bitmap(data)
+    data[0:0] = [block] + _bytes_to_codes(mapping)
+    out.append((BIGCHARSET, data))
+    out += tail
+    return out, hascased
+
+_CODEBITS = _sre.CODESIZE * 8
+MAXCODE = (1 << _CODEBITS) - 1
+_BITS_TRANS = b'0' + b'1' * 255
+def _mk_bitmap(bits, _CODEBITS=_CODEBITS, _int=int):
+    s = bits.translate(_BITS_TRANS)[::-1]
+    return [_int(s[i - _CODEBITS: i], 2)
+            for i in range(len(s), 0, -_CODEBITS)]
+
+def _bytes_to_codes(b):
+    # Convert block indices to word array. CPython uses
+    # ``memoryview(b).cast('I')``; we decode CODESIZE-byte little/native
+    # words directly so we don't depend on memoryview.cast().
+    import sys
+    cs = _sre.CODESIZE
+    assert len(b) % cs == 0
+    return [int.from_bytes(bytes(b[i:i + cs]), sys.byteorder)
+            for i in range(0, len(b), cs)]
+
+def _simple(p):
+    # check if this subpattern is a "simple" operator
+    if len(p) != 1:
+        return False
+    op, av = p[0]
+    if op is SUBPATTERN:
+        return av[0] is None and _simple(av[-1])
+    return op in _UNIT_CODES
+
+def _generate_overlap_table(prefix):
+    """
+    Generate an overlap table for the following prefix.
+    An overlap table is a table of the same size as the prefix which
+    informs about the potential self-overlap for each index in the prefix:
+    - if overlap[i] == 0, prefix[i:] can't overlap prefix[0:...]
+    - if overlap[i] == k with 0 < k <= i, prefix[i-k+1:i+1] overlaps with
+      prefix[0:k]
+    """
+    table = [0] * len(prefix)
+    for i in range(1, len(prefix)):
+        idx = table[i - 1]
+        while prefix[i] != prefix[idx]:
+            if idx == 0:
+                table[i] = 0
+                break
+            idx = table[idx - 1]
+        else:
+            table[i] = idx + 1
+    return table
+
+def _get_iscased(flags):
+    if not flags & SRE_FLAG_IGNORECASE:
+        return None
+    elif flags & SRE_FLAG_UNICODE:
+        return _sre.unicode_iscased
+    else:
+        return _sre.ascii_iscased
+
+def _get_literal_prefix(pattern, flags):
+    # look for literal prefix
+    prefix = []
+    prefixappend = prefix.append
+    prefix_skip = None
+    iscased = _get_iscased(flags)
+    for op, av in pattern.data:
+        if op is LITERAL:
+            if iscased and iscased(av):
+                break
+            prefixappend(av)
+        elif op is SUBPATTERN:
+            group, add_flags, del_flags, p = av
+            flags1 = _combine_flags(flags, add_flags, del_flags)
+            if flags1 & SRE_FLAG_IGNORECASE and flags1 & SRE_FLAG_LOCALE:
+                break
+            prefix1, prefix_skip1, got_all = _get_literal_prefix(p, flags1)
+            if prefix_skip is None:
+                if group is not None:
+                    prefix_skip = len(prefix)
+                elif prefix_skip1 is not None:
+                    prefix_skip = len(prefix) + prefix_skip1
+            prefix.extend(prefix1)
+            if not got_all:
+                break
+        else:
+            break
+    else:
+        return prefix, prefix_skip, True
+    return prefix, prefix_skip, False
+
+def _get_charset_prefix(pattern, flags):
+    while True:
+        if not pattern.data:
+            return None
+        op, av = pattern.data[0]
+        if op is not SUBPATTERN:
+            break
+        group, add_flags, del_flags, pattern = av
+        flags = _combine_flags(flags, add_flags, del_flags)
+        if flags & SRE_FLAG_IGNORECASE and flags & SRE_FLAG_LOCALE:
+            return None
+
+    iscased = _get_iscased(flags)
+    if op is LITERAL:
+        if iscased and iscased(av):
+            return None
+        return [(op, av)]
+    elif op is BRANCH:
+        charset = []
+        charsetappend = charset.append
+        for p in av[1]:
+            if not p:
+                return None
+            op, av = p[0]
+            if op is LITERAL and not (iscased and iscased(av)):
+                charsetappend((op, av))
+            else:
+                return None
+        return charset
+    elif op is IN:
+        charset = av
+        if iscased:
+            for op, av in charset:
+                if op is LITERAL:
+                    if iscased(av):
+                        return None
+                elif op is RANGE:
+                    if av[1] > 0xffff:
+                        return None
+                    if any(map(iscased, range(av[0], av[1]+1))):
+                        return None
+        return charset
+    return None
+
+def _compile_info(code, pattern, flags):
+    # internal: compile an info block.  in the current version,
+    # this contains min/max pattern width, and an optional literal
+    # prefix or a character map
+    lo, hi = pattern.getwidth()
+    if hi > MAXCODE:
+        hi = MAXCODE
+    if lo == 0:
+        code.extend([INFO, 4, 0, lo, hi])
+        return
+    # look for a literal prefix
+    prefix = []
+    prefix_skip = 0
+    charset = [] # not used
+    if not (flags & SRE_FLAG_IGNORECASE and flags & SRE_FLAG_LOCALE):
+        # look for literal prefix
+        prefix, prefix_skip, got_all = _get_literal_prefix(pattern, flags)
+        # if no prefix, look for charset prefix
+        if not prefix:
+            charset = _get_charset_prefix(pattern, flags)
+##     if prefix:
+##         print("*** PREFIX", prefix, prefix_skip)
+##     if charset:
+##         print("*** CHARSET", charset)
+    # add an info block
+    emit = code.append
+    emit(INFO)
+    skip = len(code); emit(0)
+    # literal flag
+    mask = 0
+    if prefix:
+        mask = SRE_INFO_PREFIX
+        if prefix_skip is None and got_all:
+            mask = mask | SRE_INFO_LITERAL
+    elif charset:
+        mask = mask | SRE_INFO_CHARSET
+    emit(mask)
+    # pattern length
+    if lo < MAXCODE:
+        emit(lo)
+    else:
+        emit(MAXCODE)
+        prefix = prefix[:MAXCODE]
+    emit(hi)
+    # add literal prefix
+    if prefix:
+        emit(len(prefix)) # length
+        if prefix_skip is None:
+            prefix_skip =  len(prefix)
+        emit(prefix_skip) # skip
+        code.extend(prefix)
+        # generate overlap table
+        code.extend(_generate_overlap_table(prefix))
+    elif charset:
+        charset, hascased = _optimize_charset(charset)
+        assert not hascased
+        _compile_charset(charset, flags, code)
+    code[skip] = len(code) - skip
+
+def isstring(obj):
+    return isinstance(obj, (str, bytes))
+
+def _code(p, flags):
+
+    flags = p.state.flags | flags
+    code = []
+
+    # compile info block
+    _compile_info(code, p, flags)
+
+    # compile the pattern
+    _compile(code, p.data, flags)
+
+    code.append(SUCCESS)
+
+    return code
+
+def _hex_code(code):
+    return '[%s]' % ', '.join('%#0*x' % (_sre.CODESIZE*2+2, x) for x in code)
+
+def dis(code):
+    import sys
+
+    labels = set()
+    level = 0
+    offset_width = len(str(len(code) - 1))
+
+    def dis_(start, end):
+        def print_(*args, to=None):
+            if to is not None:
+                labels.add(to)
+                args += ('(to %d)' % (to,),)
+            print('%*d%s ' % (offset_width, start, ':' if start in labels else '.'),
+                  end='  '*(level-1))
+            print(*args)
+
+        def print_2(*args):
+            print(end=' '*(offset_width + 2*level))
+            print(*args)
+
+        nonlocal level
+        level += 1
+        i = start
+        while i < end:
+            start = i
+            op = code[i]
+            i += 1
+            op = OPCODES[op]
+            if op in (SUCCESS, FAILURE, ANY, ANY_ALL,
+                      MAX_UNTIL, MIN_UNTIL, NEGATE):
+                print_(op)
+            elif op in (LITERAL, NOT_LITERAL,
+                        LITERAL_IGNORE, NOT_LITERAL_IGNORE,
+                        LITERAL_UNI_IGNORE, NOT_LITERAL_UNI_IGNORE,
+                        LITERAL_LOC_IGNORE, NOT_LITERAL_LOC_IGNORE):
+                arg = code[i]
+                i += 1
+                print_(op, '%#02x (%r)' % (arg, chr(arg)))
+            elif op is AT:
+                arg = code[i]
+                i += 1
+                arg = str(ATCODES[arg])
+                assert arg[:3] == 'AT_'
+                print_(op, arg[3:])
+            elif op is CATEGORY:
+                arg = code[i]
+                i += 1
+                arg = str(CHCODES[arg])
+                assert arg[:9] == 'CATEGORY_'
+                print_(op, arg[9:])
+            elif op in (IN, IN_IGNORE, IN_UNI_IGNORE, IN_LOC_IGNORE):
+                skip = code[i]
+                print_(op, skip, to=i+skip)
+                dis_(i+1, i+skip)
+                i += skip
+            elif op in (RANGE, RANGE_UNI_IGNORE):
+                lo, hi = code[i: i+2]
+                i += 2
+                print_(op, '%#02x %#02x (%r-%r)' % (lo, hi, chr(lo), chr(hi)))
+            elif op is CHARSET:
+                print_(op, _hex_code(code[i: i + 256//_CODEBITS]))
+                i += 256//_CODEBITS
+            elif op is BIGCHARSET:
+                arg = code[i]
+                i += 1
+                mapping = list(b''.join(x.to_bytes(_sre.CODESIZE, sys.byteorder)
+                                        for x in code[i: i + 256//_sre.CODESIZE]))
+                print_(op, arg, mapping)
+                i += 256//_sre.CODESIZE
+                level += 1
+                for j in range(arg):
+                    print_2(_hex_code(code[i: i + 256//_CODEBITS]))
+                    i += 256//_CODEBITS
+                level -= 1
+            elif op in (MARK, GROUPREF, GROUPREF_IGNORE, GROUPREF_UNI_IGNORE,
+                        GROUPREF_LOC_IGNORE):
+                arg = code[i]
+                i += 1
+                print_(op, arg)
+            elif op is JUMP:
+                skip = code[i]
+                print_(op, skip, to=i+skip)
+                i += 1
+            elif op is BRANCH:
+                skip = code[i]
+                print_(op, skip, to=i+skip)
+                while skip:
+                    dis_(i+1, i+skip)
+                    i += skip
+                    start = i
+                    skip = code[i]
+                    if skip:
+                        print_('branch', skip, to=i+skip)
+                    else:
+                        print_(FAILURE)
+                i += 1
+            elif op in (REPEAT, REPEAT_ONE, MIN_REPEAT_ONE,
+                        POSSESSIVE_REPEAT, POSSESSIVE_REPEAT_ONE):
+                skip, min, max = code[i: i+3]
+                if max == MAXREPEAT:
+                    max = 'MAXREPEAT'
+                print_(op, skip, min, max, to=i+skip)
+                dis_(i+3, i+skip)
+                i += skip
+            elif op is GROUPREF_EXISTS:
+                arg, skip = code[i: i+2]
+                print_(op, arg, skip, to=i+skip)
+                i += 2
+            elif op in (ASSERT, ASSERT_NOT):
+                skip, arg = code[i: i+2]
+                print_(op, skip, arg, to=i+skip)
+                dis_(i+2, i+skip)
+                i += skip
+            elif op is ATOMIC_GROUP:
+                skip = code[i]
+                print_(op, skip, to=i+skip)
+                dis_(i+1, i+skip)
+                i += skip
+            elif op is INFO:
+                skip, flags, min, max = code[i: i+4]
+                if max == MAXREPEAT:
+                    max = 'MAXREPEAT'
+                print_(op, skip, bin(flags), min, max, to=i+skip)
+                start = i+4
+                if flags & SRE_INFO_PREFIX:
+                    prefix_len, prefix_skip = code[i+4: i+6]
+                    print_2('  prefix_skip', prefix_skip)
+                    start = i + 6
+                    prefix = code[start: start+prefix_len]
+                    print_2('  prefix',
+                            '[%s]' % ', '.join('%#02x' % x for x in prefix),
+                            '(%r)' % ''.join(map(chr, prefix)))
+                    start += prefix_len
+                    print_2('  overlap', code[start: start+prefix_len])
+                    start += prefix_len
+                if flags & SRE_INFO_CHARSET:
+                    level += 1
+                    print_2('in')
+                    dis_(start, i+skip)
+                    level -= 1
+                i += skip
+            else:
+                raise ValueError(op)
+
+        level -= 1
+
+    dis_(0, len(code))
+
+
+def compile(p, flags=0):
+    # internal: convert pattern list to internal format
+
+    if isstring(p):
+        pattern = p
+        p = _parser.parse(p, flags)
+    else:
+        pattern = None
+
+    code = _code(p, flags)
+
+    if flags & SRE_FLAG_DEBUG:
+        print()
+        dis(code)
+
+    # map in either direction
+    groupindex = p.state.groupdict
+    indexgroup = [None] * p.state.groups
+    for k, i in groupindex.items():
+        indexgroup[i] = k
+
+    # WeavePy: the matching engine core (_sre) is a pure-data backtracker;
+    # the user-visible Pattern / Match objects live in the frozen
+    # re._engine module (so callable re.sub, scanner, etc. stay in Python).
+    from . import _engine
+    return _engine.compile_pattern(
+        pattern, flags | p.state.flags, code,
+        p.state.groups-1,
+        groupindex, tuple(indexgroup)
+        )
diff --git a/crates/weavepy-vm/src/stdlib/python/re_constants.py b/crates/weavepy-vm/src/stdlib/python/re_constants.py
new file mode 100644
index 0000000..9c3c294
--- /dev/null
+++ b/crates/weavepy-vm/src/stdlib/python/re_constants.py
@@ -0,0 +1,222 @@
+#
+# Secret Labs' Regular Expression Engine
+#
+# various symbols used by the regular expression engine.
+# run this script to update the _sre include files!
+#
+# Copyright (c) 1998-2001 by Secret Labs AB.  All rights reserved.
+#
+# See the __init__.py file for information on usage and redistribution.
+#
+
+"""Internal support module for sre"""
+
+# update when constants are added or removed
+
+MAGIC = 20230612
+
+from _sre import MAXREPEAT, MAXGROUPS
+
+# SRE standard exception (access as sre.error)
+# should this really be here?
+
+class PatternError(Exception):
+    """Exception raised for invalid regular expressions.
+
+    Attributes:
+
+        msg: The unformatted error message
+        pattern: The regular expression pattern
+        pos: The index in the pattern where compilation failed (may be None)
+        lineno: The line corresponding to pos (may be None)
+        colno: The column corresponding to pos (may be None)
+    """
+
+    __module__ = 're'
+
+    def __init__(self, msg, pattern=None, pos=None):
+        self.msg = msg
+        self.pattern = pattern
+        self.pos = pos
+        if pattern is not None and pos is not None:
+            msg = '%s at position %d' % (msg, pos)
+            if isinstance(pattern, str):
+                newline = '\n'
+            else:
+                newline = b'\n'
+            self.lineno = pattern.count(newline, 0, pos) + 1
+            self.colno = pos - pattern.rfind(newline, 0, pos)
+            if newline in pattern:
+                msg = '%s (line %d, column %d)' % (msg, self.lineno, self.colno)
+        else:
+            self.lineno = self.colno = None
+        super().__init__(msg)
+
+
+# Backward compatibility after renaming in 3.13
+error = PatternError
+
+class _NamedIntConstant(int):
+    def __new__(cls, value, name):
+        self = super(_NamedIntConstant, cls).__new__(cls, value)
+        self.name = name
+        return self
+
+    def __repr__(self):
+        return self.name
+
+    __reduce__ = None
+
+MAXREPEAT = _NamedIntConstant(MAXREPEAT, 'MAXREPEAT')
+
+def _makecodes(*names):
+    items = [_NamedIntConstant(i, name) for i, name in enumerate(names)]
+    globals().update({item.name: item for item in items})
+    return items
+
+# operators
+OPCODES = _makecodes(
+    # failure=0 success=1 (just because it looks better that way :-)
+    'FAILURE', 'SUCCESS',
+
+    'ANY', 'ANY_ALL',
+    'ASSERT', 'ASSERT_NOT',
+    'AT',
+    'BRANCH',
+    'CATEGORY',
+    'CHARSET', 'BIGCHARSET',
+    'GROUPREF', 'GROUPREF_EXISTS',
+    'IN',
+    'INFO',
+    'JUMP',
+    'LITERAL',
+    'MARK',
+    'MAX_UNTIL',
+    'MIN_UNTIL',
+    'NOT_LITERAL',
+    'NEGATE',
+    'RANGE',
+    'REPEAT',
+    'REPEAT_ONE',
+    'SUBPATTERN',
+    'MIN_REPEAT_ONE',
+    'ATOMIC_GROUP',
+    'POSSESSIVE_REPEAT',
+    'POSSESSIVE_REPEAT_ONE',
+
+    'GROUPREF_IGNORE',
+    'IN_IGNORE',
+    'LITERAL_IGNORE',
+    'NOT_LITERAL_IGNORE',
+
+    'GROUPREF_LOC_IGNORE',
+    'IN_LOC_IGNORE',
+    'LITERAL_LOC_IGNORE',
+    'NOT_LITERAL_LOC_IGNORE',
+
+    'GROUPREF_UNI_IGNORE',
+    'IN_UNI_IGNORE',
+    'LITERAL_UNI_IGNORE',
+    'NOT_LITERAL_UNI_IGNORE',
+    'RANGE_UNI_IGNORE',
+
+    # The following opcodes are only occurred in the parser output,
+    # but not in the compiled code.
+    'MIN_REPEAT', 'MAX_REPEAT',
+)
+del OPCODES[-2:] # remove MIN_REPEAT and MAX_REPEAT
+
+# positions
+ATCODES = _makecodes(
+    'AT_BEGINNING', 'AT_BEGINNING_LINE', 'AT_BEGINNING_STRING',
+    'AT_BOUNDARY', 'AT_NON_BOUNDARY',
+    'AT_END', 'AT_END_LINE', 'AT_END_STRING',
+
+    'AT_LOC_BOUNDARY', 'AT_LOC_NON_BOUNDARY',
+
+    'AT_UNI_BOUNDARY', 'AT_UNI_NON_BOUNDARY',
+)
+
+# categories
+CHCODES = _makecodes(
+    'CATEGORY_DIGIT', 'CATEGORY_NOT_DIGIT',
+    'CATEGORY_SPACE', 'CATEGORY_NOT_SPACE',
+    'CATEGORY_WORD', 'CATEGORY_NOT_WORD',
+    'CATEGORY_LINEBREAK', 'CATEGORY_NOT_LINEBREAK',
+
+    'CATEGORY_LOC_WORD', 'CATEGORY_LOC_NOT_WORD',
+
+    'CATEGORY_UNI_DIGIT', 'CATEGORY_UNI_NOT_DIGIT',
+    'CATEGORY_UNI_SPACE', 'CATEGORY_UNI_NOT_SPACE',
+    'CATEGORY_UNI_WORD', 'CATEGORY_UNI_NOT_WORD',
+    'CATEGORY_UNI_LINEBREAK', 'CATEGORY_UNI_NOT_LINEBREAK',
+)
+
+
+# replacement operations for "ignore case" mode
+OP_IGNORE = {
+    LITERAL: LITERAL_IGNORE,
+    NOT_LITERAL: NOT_LITERAL_IGNORE,
+}
+
+OP_LOCALE_IGNORE = {
+    LITERAL: LITERAL_LOC_IGNORE,
+    NOT_LITERAL: NOT_LITERAL_LOC_IGNORE,
+}
+
+OP_UNICODE_IGNORE = {
+    LITERAL: LITERAL_UNI_IGNORE,
+    NOT_LITERAL: NOT_LITERAL_UNI_IGNORE,
+}
+
+AT_MULTILINE = {
+    AT_BEGINNING: AT_BEGINNING_LINE,
+    AT_END: AT_END_LINE
+}
+
+AT_LOCALE = {
+    AT_BOUNDARY: AT_LOC_BOUNDARY,
+    AT_NON_BOUNDARY: AT_LOC_NON_BOUNDARY
+}
+
+AT_UNICODE = {
+    AT_BOUNDARY: AT_UNI_BOUNDARY,
+    AT_NON_BOUNDARY: AT_UNI_NON_BOUNDARY
+}
+
+CH_LOCALE = {
+    CATEGORY_DIGIT: CATEGORY_DIGIT,
+    CATEGORY_NOT_DIGIT: CATEGORY_NOT_DIGIT,
+    CATEGORY_SPACE: CATEGORY_SPACE,
+    CATEGORY_NOT_SPACE: CATEGORY_NOT_SPACE,
+    CATEGORY_WORD: CATEGORY_LOC_WORD,
+    CATEGORY_NOT_WORD: CATEGORY_LOC_NOT_WORD,
+    CATEGORY_LINEBREAK: CATEGORY_LINEBREAK,
+    CATEGORY_NOT_LINEBREAK: CATEGORY_NOT_LINEBREAK
+}
+
+CH_UNICODE = {
+    CATEGORY_DIGIT: CATEGORY_UNI_DIGIT,
+    CATEGORY_NOT_DIGIT: CATEGORY_UNI_NOT_DIGIT,
+    CATEGORY_SPACE: CATEGORY_UNI_SPACE,
+    CATEGORY_NOT_SPACE: CATEGORY_UNI_NOT_SPACE,
+    CATEGORY_WORD: CATEGORY_UNI_WORD,
+    CATEGORY_NOT_WORD: CATEGORY_UNI_NOT_WORD,
+    CATEGORY_LINEBREAK: CATEGORY_UNI_LINEBREAK,
+    CATEGORY_NOT_LINEBREAK: CATEGORY_UNI_NOT_LINEBREAK
+}
+
+# flags
+SRE_FLAG_IGNORECASE = 2 # case insensitive
+SRE_FLAG_LOCALE = 4 # honour system locale
+SRE_FLAG_MULTILINE = 8 # treat target as multiline string
+SRE_FLAG_DOTALL = 16 # treat target as a single string
+SRE_FLAG_UNICODE = 32 # use unicode "locale"
+SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments
+SRE_FLAG_DEBUG = 128 # debugging
+SRE_FLAG_ASCII = 256 # use ascii "locale"
+
+# flags for INFO primitive
+SRE_INFO_PREFIX = 1 # has prefix
+SRE_INFO_LITERAL = 2 # entire pattern is literal (given by prefix)
+SRE_INFO_CHARSET = 4 # pattern starts with character from given set
diff --git a/crates/weavepy-vm/src/stdlib/python/re_engine.py b/crates/weavepy-vm/src/stdlib/python/re_engine.py
new file mode 100644
index 0000000..de9c8eb
--- /dev/null
+++ b/crates/weavepy-vm/src/stdlib/python/re_engine.py
@@ -0,0 +1,426 @@
+#
+# WeavePy: user-visible Pattern / Match objects for the re package.
+#
+# CPython implements `re.Pattern` and `re.Match` as C types inside the
+# `_sre` extension. WeavePy instead keeps `_sre` as a pure-data
+# backtracking core (compile + exec returning group spans) and builds
+# the user-facing objects here, in Python. Doing so means callable
+# `re.sub` replacements, `finditer`, `Scanner`, etc. all run on the
+# normal interpreter without the engine ever re-entering the VM.
+#
+# Behaviour (group semantics, greedy/lazy scanning, empty-match
+# handling, split/sub/subn rules) follows CPython 3.13 exactly.
+
+import _sre
+from . import _parser
+from ._constants import error as PatternError
+
+__all__ = ["Pattern", "Match", "compile_pattern"]
+
+# exec() modes understood by the native core.
+_MODE_SEARCH = 0
+_MODE_MATCH = 1
+_MODE_FULLMATCH = 2
+
+# Cache of parsed replacement templates, keyed by (pattern handle, repl).
+# Cleared by re.purge().
+_template_cache = {}
+
+
+def compile_pattern(pattern, flags, code, groups, groupindex, indexgroup):
+    """Build a Pattern. Called by re._compiler.compile()."""
+    handle = _sre.compile(code, groups)
+    return Pattern(handle, pattern, flags, groups, groupindex, indexgroup)
+
+
+def _clamp_span(string, pos, endpos):
+    length = len(string)
+    if pos is None:
+        pos = 0
+    if endpos is None:
+        endpos = length
+    if pos < 0:
+        pos = 0
+    elif pos > length:
+        pos = length
+    if endpos > length:
+        endpos = length
+    elif endpos < 0:
+        endpos = 0
+    return pos, endpos
+
+
+class Pattern:
+    __module__ = 're'
+
+    def __init__(self, handle, pattern, flags, groups, groupindex, indexgroup):
+        self._handle = handle
+        self.pattern = pattern
+        self.flags = flags
+        self.groups = groups
+        self.groupindex = groupindex
+        # tuple: group number (1-based) -> name or None
+        self._indexgroup = indexgroup
+
+    # -- internal --------------------------------------------------------
+
+    def _exec(self, string, pos, endpos, mode, must_advance):
+        return _sre.exec(self._handle, string, pos, endpos, mode,
+                         1 if must_advance else 0)
+
+    def _iter(self, string, pos, endpos):
+        pos, endpos = _clamp_span(string, pos, endpos)
+        must_advance = False
+        opos, oendpos = pos, endpos
+        while pos <= endpos:
+            r = self._exec(string, pos, endpos, _MODE_SEARCH, must_advance)
+            if r is None:
+                break
+            start, end = r[0], r[1]
+            yield Match(self, string, opos, oendpos, r)
+            must_advance = start == end
+            pos = end
+
+    # -- public matching API --------------------------------------------
+
+    def match(self, string, pos=0, endpos=None):
+        p, e = _clamp_span(string, pos, endpos)
+        r = self._exec(string, p, e, _MODE_MATCH, False)
+        if r is None:
+            return None
+        return Match(self, string, p, e, r)
+
+    def fullmatch(self, string, pos=0, endpos=None):
+        p, e = _clamp_span(string, pos, endpos)
+        r = self._exec(string, p, e, _MODE_FULLMATCH, False)
+        if r is None:
+            return None
+        return Match(self, string, p, e, r)
+
+    def search(self, string, pos=0, endpos=None):
+        p, e = _clamp_span(string, pos, endpos)
+        r = self._exec(string, p, e, _MODE_SEARCH, False)
+        if r is None:
+            return None
+        return Match(self, string, p, e, r)
+
+    def findall(self, string, pos=0, endpos=None):
+        g = self.groups
+        empty = string[:0]
+        out = []
+        for m in self._iter(string, pos, endpos):
+            if g == 0:
+                out.append(m.group(0))
+            elif g == 1:
+                v = m.group(1)
+                out.append(v if v is not None else empty)
+            else:
+                row = []
+                for i in range(1, g + 1):
+                    v = m.group(i)
+                    row.append(v if v is not None else empty)
+                out.append(tuple(row))
+        return out
+
+    def finditer(self, string, pos=0, endpos=None):
+        return self._iter(string, pos, endpos)
+
+    def sub(self, repl, string, count=0):
+        return self._subx(repl, string, count)[0]
+
+    def subn(self, repl, string, count=0):
+        return self._subx(repl, string, count)
+
+    def _subx(self, repl, string, count):
+        if count < 0:
+            count = 0
+        empty = string[:0]
+        if callable(repl):
+            filt = repl
+        else:
+            template = _compile_template(self, repl)
+            if len(template) == 1 and not isinstance(template[0], int):
+                # pure literal replacement
+                literal = template[0]
+                filt = lambda m, _l=literal: _l
+            else:
+                filt = lambda m, _t=template: _expand_template(_t, m)
+        out = []
+        n = 0
+        last = 0
+        pos = 0
+        endpos = len(string)
+        must_advance = False
+        while pos <= endpos:
+            if count and n >= count:
+                break
+            r = self._exec(string, pos, endpos, _MODE_SEARCH, must_advance)
+            if r is None:
+                break
+            start, end = r[0], r[1]
+            out.append(string[last:start])
+            m = Match(self, string, 0, endpos, r)
+            out.append(filt(m))
+            last = end
+            n += 1
+            must_advance = start == end
+            pos = end
+        out.append(string[last:])
+        return empty.join(out), n
+
+    def split(self, string, maxsplit=0):
+        if maxsplit < 0:
+            return [string]
+        g = self.groups
+        out = []
+        n = 0
+        last = 0
+        pos = 0
+        endpos = len(string)
+        must_advance = False
+        while pos <= endpos:
+            if maxsplit and n >= maxsplit:
+                break
+            r = self._exec(string, pos, endpos, _MODE_SEARCH, must_advance)
+            if r is None:
+                break
+            start, end = r[0], r[1]
+            m = Match(self, string, 0, endpos, r)
+            out.append(string[last:start])
+            for i in range(1, g + 1):
+                out.append(m.group(i))
+            last = end
+            n += 1
+            must_advance = start == end
+            pos = end
+        out.append(string[last:])
+        return out
+
+    def scanner(self, string, pos=0, endpos=None):
+        return _Scanner(self, string, pos, endpos)
+
+    # -- misc ------------------------------------------------------------
+
+    def __repr__(self):
+        s = repr(self.pattern)
+        if len(s) > 200:
+            s = s[:200]
+        # Hide the implicit UNICODE flag (32) the way CPython does.
+        flags = self.flags & ~32
+        if flags:
+            return "re.compile(%s, %s)" % (s, _flags_repr(self.flags))
+        return "re.compile(%s)" % s
+
+    def __copy__(self):
+        return self
+
+    def __deepcopy__(self, memo):
+        return self
+
+    @property
+    def groupindex_proxy(self):
+        return self.groupindex
+
+
+# Bit -> name table for Pattern repr (matches CPython's RegexFlag names).
+_FLAG_NAMES = [
+    (256, 're.ASCII'),
+    (2, 're.IGNORECASE'),
+    (4, 're.LOCALE'),
+    (8, 're.MULTILINE'),
+    (16, 're.DOTALL'),
+    (64, 're.VERBOSE'),
+    (128, 're.DEBUG'),
+]
+
+
+def _flags_repr(flags):
+    # Hide the implicit UNICODE flag (32) the way CPython does.
+    flags &= ~32
+    parts = []
+    for bit, name in _FLAG_NAMES:
+        if flags & bit:
+            parts.append(name)
+            flags &= ~bit
+    if flags:
+        parts.append(hex(flags))
+    if not parts:
+        return '0'
+    return '|'.join(parts)
+
+
+class Match:
+    __module__ = 're'
+
+    def __init__(self, pattern, string, pos, endpos, r):
+        self.re = pattern
+        self.string = string
+        self.pos = pos
+        self.endpos = endpos
+        self._start = r[0]
+        self._end = r[1]
+        self._lastindex_raw = r[2]
+        self._marks = r[3]
+
+    # -- group span helpers ---------------------------------------------
+
+    def _span_of(self, idx):
+        if idx == 0:
+            return (self._start, self._end)
+        i = (idx - 1) * 2
+        return (self._marks[i], self._marks[i + 1])
+
+    def _index(self, group):
+        if isinstance(group, int) or (not isinstance(group, str) and hasattr(group, '__index__')):
+            idx = int(group)
+        else:
+            try:
+                idx = self.re.groupindex[group]
+            except KeyError:
+                raise IndexError("no such group") from None
+        if not 0 <= idx <= self.re.groups:
+            raise IndexError("no such group")
+        return idx
+
+    def _getslice(self, idx, default):
+        s, e = self._span_of(idx)
+        if s < 0 or e < 0:
+            return default
+        return self.string[s:e]
+
+    # -- public API ------------------------------------------------------
+
+    def group(self, *args):
+        if not args:
+            return self._getslice(0, None)
+        if len(args) == 1:
+            return self._getslice(self._index(args[0]), None)
+        return tuple(self._getslice(self._index(g), None) for g in args)
+
+    def __getitem__(self, group):
+        return self._getslice(self._index(group), None)
+
+    def groups(self, default=None):
+        return tuple(self._getslice(i, default)
+                     for i in range(1, self.re.groups + 1))
+
+    def groupdict(self, default=None):
+        result = {}
+        for name, idx in self.re.groupindex.items():
+            result[name] = self._getslice(idx, default)
+        return result
+
+    def start(self, group=0):
+        return self._span_of(self._index(group))[0]
+
+    def end(self, group=0):
+        return self._span_of(self._index(group))[1]
+
+    def span(self, group=0):
+        return self._span_of(self._index(group))
+
+    @property
+    def regs(self):
+        spans = [(self._start, self._end)]
+        for i in range(1, self.re.groups + 1):
+            spans.append(self._span_of(i))
+        return tuple(spans)
+
+    @property
+    def lastindex(self):
+        li = self._lastindex_raw
+        return None if li < 0 else li
+
+    @property
+    def lastgroup(self):
+        li = self.lastindex
+        if li is None:
+            return None
+        try:
+            return self.re._indexgroup[li]
+        except (IndexError, TypeError):
+            return None
+
+    def expand(self, template):
+        return _expand_template(_parse_template(self.re, template), self)
+
+    def __copy__(self):
+        return self
+
+    def __deepcopy__(self, memo):
+        return self
+
+    def __repr__(self):
+        text = self.string[self._start:self._end]
+        return "<re.Match object; span=(%d, %d), match=%r>" % (
+            self._start, self._end, text)
+
+
+class _Scanner:
+    def __init__(self, pattern, string, pos, endpos):
+        self.pattern = pattern
+        self._string = string
+        self._pos, self._endpos = _clamp_span(string, pos, endpos)
+        self._opos = self._pos
+        self._oendpos = self._endpos
+        self._must_advance = False
+
+    def match(self):
+        return self._run(_MODE_MATCH)
+
+    def search(self):
+        return self._run(_MODE_SEARCH)
+
+    def _run(self, mode):
+        if self._pos > self._endpos:
+            return None
+        r = _sre.exec(self.pattern._handle, self._string, self._pos,
+                     self._endpos, mode, 1 if self._must_advance else 0)
+        if r is None:
+            if mode == _MODE_MATCH:
+                return None
+            return None
+        start, end = r[0], r[1]
+        m = Match(self.pattern, self._string, self._opos, self._oendpos, r)
+        self._must_advance = start == end
+        self._pos = end
+        return m
+
+
+# ---------------------------------------------------------------------------
+# Replacement-template handling
+# ---------------------------------------------------------------------------
+
+def _parse_template(pattern, repl):
+    return _parser.parse_template(repl, pattern)
+
+
+def _compile_template(pattern, repl):
+    key = (pattern._handle, repl)
+    try:
+        return _template_cache[key]
+    except KeyError:
+        pass
+    template = _parser.parse_template(repl, pattern)
+    if len(_template_cache) >= 512:
+        _template_cache.clear()
+    _template_cache[key] = template
+    return template
+
+
+def _expand_template(template, match):
+    # `template` is the flat list returned by _parser.parse_template:
+    # literals (str/bytes) interleaved with integer group references.
+    empty = match.string[:0]
+    parts = []
+    for item in template:
+        if isinstance(item, int):
+            g = match.group(item)
+            parts.append(g if g is not None else empty)
+        else:
+            parts.append(item)
+    return empty.join(parts)
+
+
+def clear_template_cache():
+    _template_cache.clear()
diff --git a/crates/weavepy-vm/src/stdlib/python/re_init.py b/crates/weavepy-vm/src/stdlib/python/re_init.py
new file mode 100644
index 0000000..862400c
--- /dev/null
+++ b/crates/weavepy-vm/src/stdlib/python/re_init.py
@@ -0,0 +1,350 @@
+#
+# Secret Labs' Regular Expression Engine
+#
+# re-compatible interface for the sre matching engine
+#
+# Copyright (c) 1998-2001 by Secret Labs AB.  All rights reserved.
+#
+# This version of the SRE library can be redistributed under CNRI's
+# Python 1.6 license.  For any other use, please contact Secret Labs
+# AB (info@pythonware.com).
+#
+
+r"""Support for regular expressions (RE).
+
+This module provides regular expression matching operations similar to
+those found in Perl.  It supports both 8-bit and Unicode strings; both
+the pattern and the strings being processed can contain null bytes and
+characters outside the US ASCII range.
+
+Regular expressions can contain both special and ordinary characters.
+Most ordinary characters, like "A", "a", or "0", are the simplest
+regular expressions; they simply match themselves.  You can
+concatenate ordinary characters, so last matches the string 'last'.
+
+This module exports the following functions:
+    match     Match a regular expression pattern to the beginning of a string.
+    fullmatch Match a regular expression pattern to all of a string.
+    search    Search a string for the presence of a pattern.
+    sub       Substitute occurrences of a pattern found in a string.
+    subn      Same as sub, but also return the number of substitutions made.
+    split     Split a string by the occurrences of a pattern.
+    findall   Find all occurrences of a pattern in a string.
+    finditer  Return an iterator yielding a Match object for each match.
+    compile   Compile a pattern into a Pattern object.
+    purge     Clear the regular expression cache.
+    escape    Backslash all non-alphanumerics in a string.
+
+This module also defines an exception 'PatternError', aliased to 'error'
+for backward compatibility.
+
+"""
+
+import enum
+from . import _compiler, _parser
+from . import _engine
+import functools
+import _sre
+
+
+# public symbols
+__all__ = [
+    "match", "fullmatch", "search", "sub", "subn", "split",
+    "findall", "finditer", "compile", "purge", "escape",
+    "error", "Pattern", "Match", "A", "I", "L", "M", "S", "X", "U",
+    "ASCII", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE",
+    "UNICODE", "NOFLAG", "RegexFlag", "PatternError"
+]
+
+__version__ = "2.2.1"
+
+
+class RegexFlag(enum.IntFlag):
+    NOFLAG = 0
+    ASCII = A = _compiler.SRE_FLAG_ASCII # assume ascii "locale"
+    IGNORECASE = I = _compiler.SRE_FLAG_IGNORECASE # ignore case
+    LOCALE = L = _compiler.SRE_FLAG_LOCALE # assume current 8-bit locale
+    UNICODE = U = _compiler.SRE_FLAG_UNICODE # assume unicode "locale"
+    MULTILINE = M = _compiler.SRE_FLAG_MULTILINE # make anchors look for newline
+    DOTALL = S = _compiler.SRE_FLAG_DOTALL # make dot match newline
+    VERBOSE = X = _compiler.SRE_FLAG_VERBOSE # ignore whitespace and comments
+    # sre extensions (experimental, don't rely on these)
+    DEBUG = _compiler.SRE_FLAG_DEBUG # dump pattern after compilation
+
+globals().update(RegexFlag.__members__)
+
+# sre exception
+PatternError = error = _compiler.PatternError
+
+# --------------------------------------------------------------------
+# public interface
+
+def match(pattern, string, flags=0):
+    """Try to apply the pattern at the start of the string, returning
+    a Match object, or None if no match was found."""
+    return _compile(pattern, flags).match(string)
+
+def fullmatch(pattern, string, flags=0):
+    """Try to apply the pattern to all of the string, returning
+    a Match object, or None if no match was found."""
+    return _compile(pattern, flags).fullmatch(string)
+
+def search(pattern, string, flags=0):
+    """Scan through string looking for a match to the pattern, returning
+    a Match object, or None if no match was found."""
+    return _compile(pattern, flags).search(string)
+
+class _ZeroSentinel(int):
+    pass
+_zero_sentinel = _ZeroSentinel()
+
+def sub(pattern, repl, string, *args, count=_zero_sentinel, flags=_zero_sentinel):
+    """Return the string obtained by replacing the leftmost
+    non-overlapping occurrences of the pattern in string by the
+    replacement repl.  repl can be either a string or a callable;
+    if a string, backslash escapes in it are processed.  If it is
+    a callable, it's passed the Match object and must return
+    a replacement string to be used."""
+    if args:
+        if count is not _zero_sentinel:
+            raise TypeError("sub() got multiple values for argument 'count'")
+        count, *args = args
+        if args:
+            if flags is not _zero_sentinel:
+                raise TypeError("sub() got multiple values for argument 'flags'")
+            flags, *args = args
+            if args:
+                raise TypeError("sub() takes from 3 to 5 positional arguments "
+                                "but %d were given" % (5 + len(args)))
+
+        import warnings
+        warnings.warn(
+            "'count' is passed as positional argument",
+            DeprecationWarning, stacklevel=2
+        )
+    elif count is _zero_sentinel:
+        count = 0
+    if flags is _zero_sentinel:
+        flags = 0
+
+    return _compile(pattern, flags).sub(repl, string, count)
+sub.__text_signature__ = '(pattern, repl, string, count=0, flags=0)'
+
+def subn(pattern, repl, string, *args, count=_zero_sentinel, flags=_zero_sentinel):
+    """Return a 2-tuple containing (new_string, number).
+    new_string is the string obtained by replacing the leftmost
+    non-overlapping occurrences of the pattern in the source
+    string by the replacement repl.  number is the number of
+    substitutions that were made. repl can be either a string or a
+    callable; if a string, backslash escapes in it are processed.
+    If it is a callable, it's passed the Match object and must
+    return a replacement string to be used."""
+    if args:
+        if count is not _zero_sentinel:
+            raise TypeError("subn() got multiple values for argument 'count'")
+        count, *args = args
+        if args:
+            if flags is not _zero_sentinel:
+                raise TypeError("subn() got multiple values for argument 'flags'")
+            flags, *args = args
+            if args:
+                raise TypeError("subn() takes from 3 to 5 positional arguments "
+                                "but %d were given" % (5 + len(args)))
+
+        import warnings
+        warnings.warn(
+            "'count' is passed as positional argument",
+            DeprecationWarning, stacklevel=2
+        )
+    elif count is _zero_sentinel:
+        count = 0
+    if flags is _zero_sentinel:
+        flags = 0
+
+    return _compile(pattern, flags).subn(repl, string, count)
+subn.__text_signature__ = '(pattern, repl, string, count=0, flags=0)'
+
+def split(pattern, string, *args, maxsplit=_zero_sentinel, flags=_zero_sentinel):
+    """Split the source string by the occurrences of the pattern,
+    returning a list containing the resulting substrings.  If
+    capturing parentheses are used in pattern, then the text of all
+    groups in the pattern are also returned as part of the resulting
+    list.  If maxsplit is nonzero, at most maxsplit splits occur,
+    and the remainder of the string is returned as the final element
+    of the list."""
+    if args:
+        if maxsplit is not _zero_sentinel:
+            raise TypeError("split() got multiple values for argument 'maxsplit'")
+        maxsplit, *args = args
+        if args:
+            if flags is not _zero_sentinel:
+                raise TypeError("split() got multiple values for argument 'flags'")
+            flags, *args = args
+            if args:
+                raise TypeError("split() takes from 2 to 4 positional arguments "
+                                "but %d were given" % (4 + len(args)))
+
+        import warnings
+        warnings.warn(
+            "'maxsplit' is passed as positional argument",
+            DeprecationWarning, stacklevel=2
+        )
+    elif maxsplit is _zero_sentinel:
+        maxsplit = 0
+    if flags is _zero_sentinel:
+        flags = 0
+
+    return _compile(pattern, flags).split(string, maxsplit)
+split.__text_signature__ = '(pattern, string, maxsplit=0, flags=0)'
+
+def findall(pattern, string, flags=0):
+    """Return a list of all non-overlapping matches in the string.
+
+    If one or more capturing groups are present in the pattern, return
+    a list of groups; this will be a list of tuples if the pattern
+    has more than one group.
+
+    Empty matches are included in the result."""
+    return _compile(pattern, flags).findall(string)
+
+def finditer(pattern, string, flags=0):
+    """Return an iterator over all non-overlapping matches in the
+    string.  For each match, the iterator returns a Match object.
+
+    Empty matches are included in the result."""
+    return _compile(pattern, flags).finditer(string)
+
+def compile(pattern, flags=0):
+    "Compile a regular expression pattern, returning a Pattern object."
+    return _compile(pattern, flags)
+
+def purge():
+    "Clear the regular expression caches"
+    _cache.clear()
+    _cache2.clear()
+    _engine.clear_template_cache()
+
+
+# SPECIAL_CHARS
+# closing ')', '}' and ']'
+# '-' (a range in character set)
+# '&', '~', (extended character set operations)
+# '#' (comment) and WHITESPACE (ignored) in verbose mode
+_special_chars_map = {i: '\\' + chr(i) for i in b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f'}
+
+def escape(pattern):
+    """
+    Escape special characters in a string.
+    """
+    if isinstance(pattern, str):
+        return pattern.translate(_special_chars_map)
+    else:
+        pattern = str(pattern, 'latin1')
+        return pattern.translate(_special_chars_map).encode('latin1')
+
+Pattern = type(_compiler.compile('', 0))
+Match = type(_compiler.compile('', 0).match(''))
+
+# --------------------------------------------------------------------
+# internals
+
+# Use the fact that dict keeps the insertion order.
+# _cache2 uses the simple FIFO policy which has better latency.
+# _cache uses the LRU policy which has better hit rate.
+_cache = {}  # LRU
+_cache2 = {}  # FIFO
+_MAXCACHE = 512
+_MAXCACHE2 = 256
+assert _MAXCACHE2 < _MAXCACHE
+
+def _compile(pattern, flags):
+    # internal: compile pattern
+    if isinstance(flags, RegexFlag):
+        flags = flags.value
+    try:
+        return _cache2[type(pattern), pattern, flags]
+    except KeyError:
+        pass
+
+    key = (type(pattern), pattern, flags)
+    # Item in _cache should be moved to the end if found.
+    p = _cache.pop(key, None)
+    if p is None:
+        if isinstance(pattern, Pattern):
+            if flags:
+                raise ValueError(
+                    "cannot process flags argument with a compiled pattern")
+            return pattern
+        if not _compiler.isstring(pattern):
+            raise TypeError("first argument must be string or compiled pattern")
+        p = _compiler.compile(pattern, flags)
+        if flags & DEBUG:
+            return p
+        if len(_cache) >= _MAXCACHE:
+            # Drop the least recently used item.
+            try:
+                del _cache[next(iter(_cache))]
+            except (StopIteration, RuntimeError, KeyError):
+                pass
+    # Append to the end.
+    _cache[key] = p
+
+    if len(_cache2) >= _MAXCACHE2:
+        # Drop the oldest item.
+        try:
+            del _cache2[next(iter(_cache2))]
+        except (StopIteration, RuntimeError, KeyError):
+            pass
+    _cache2[key] = p
+    return p
+
+# register myself for pickling
+
+import copyreg
+
+def _pickle(p):
+    return _compile, (p.pattern, p.flags)
+
+copyreg.pickle(Pattern, _pickle, _compile)
+
+# --------------------------------------------------------------------
+# experimental stuff (see python-dev discussions for details)
+
+class Scanner:
+    def __init__(self, lexicon, flags=0):
+        from ._constants import BRANCH, SUBPATTERN
+        if isinstance(flags, RegexFlag):
+            flags = flags.value
+        self.lexicon = lexicon
+        # combine phrases into a compound pattern
+        p = []
+        s = _parser.State()
+        s.flags = flags
+        for phrase, action in lexicon:
+            gid = s.opengroup()
+            p.append(_parser.SubPattern(s, [
+                (SUBPATTERN, (gid, 0, 0, _parser.parse(phrase, flags))),
+                ]))
+            s.closegroup(gid, p[-1])
+        p = _parser.SubPattern(s, [(BRANCH, (None, p))])
+        self.scanner = _compiler.compile(p)
+    def scan(self, string):
+        result = []
+        append = result.append
+        match = self.scanner.scanner(string).match
+        i = 0
+        while True:
+            m = match()
+            if not m:
+                break
+            j = m.end()
+            if i == j:
+                break
+            action = self.lexicon[m.lastindex-1][1]
+            if callable(action):
+                self.match = m
+                action = action(self, m.group())
+            if action is not None:
+                append(action)
+            i = j
+        return result, string[i:]
diff --git a/crates/weavepy-vm/src/stdlib/python/re_parser.py b/crates/weavepy-vm/src/stdlib/python/re_parser.py
new file mode 100644
index 0000000..f3c7793
--- /dev/null
+++ b/crates/weavepy-vm/src/stdlib/python/re_parser.py
@@ -0,0 +1,1081 @@
+#
+# Secret Labs' Regular Expression Engine
+#
+# convert re-style regular expression to sre pattern
+#
+# Copyright (c) 1998-2001 by Secret Labs AB.  All rights reserved.
+#
+# See the __init__.py file for information on usage and redistribution.
+#
+
+"""Internal support module for sre"""
+
+# XXX: show string offset and offending character for all errors
+
+from ._constants import *
+
+SPECIAL_CHARS = ".\\[{()*+?^$|"
+REPEAT_CHARS = "*+?{"
+
+DIGITS = frozenset("0123456789")
+
+OCTDIGITS = frozenset("01234567")
+HEXDIGITS = frozenset("0123456789abcdefABCDEF")
+ASCIILETTERS = frozenset("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
+
+WHITESPACE = frozenset(" \t\n\r\v\f")
+
+_REPEATCODES = frozenset({MIN_REPEAT, MAX_REPEAT, POSSESSIVE_REPEAT})
+_UNITCODES = frozenset({ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY})
+
+ESCAPES = {
+    r"\a": (LITERAL, ord("\a")),
+    r"\b": (LITERAL, ord("\b")),
+    r"\f": (LITERAL, ord("\f")),
+    r"\n": (LITERAL, ord("\n")),
+    r"\r": (LITERAL, ord("\r")),
+    r"\t": (LITERAL, ord("\t")),
+    r"\v": (LITERAL, ord("\v")),
+    r"\\": (LITERAL, ord("\\"))
+}
+
+CATEGORIES = {
+    r"\A": (AT, AT_BEGINNING_STRING), # start of string
+    r"\b": (AT, AT_BOUNDARY),
+    r"\B": (AT, AT_NON_BOUNDARY),
+    r"\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]),
+    r"\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]),
+    r"\s": (IN, [(CATEGORY, CATEGORY_SPACE)]),
+    r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]),
+    r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
+    r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
+    r"\Z": (AT, AT_END_STRING), # end of string
+}
+
+FLAGS = {
+    # standard flags
+    "i": SRE_FLAG_IGNORECASE,
+    "L": SRE_FLAG_LOCALE,
+    "m": SRE_FLAG_MULTILINE,
+    "s": SRE_FLAG_DOTALL,
+    "x": SRE_FLAG_VERBOSE,
+    # extensions
+    "a": SRE_FLAG_ASCII,
+    "u": SRE_FLAG_UNICODE,
+}
+
+TYPE_FLAGS = SRE_FLAG_ASCII | SRE_FLAG_LOCALE | SRE_FLAG_UNICODE
+GLOBAL_FLAGS = SRE_FLAG_DEBUG
+
+# Maximal value returned by SubPattern.getwidth().
+# Must be larger than MAXREPEAT, MAXCODE and sys.maxsize.
+MAXWIDTH = 1 << 64
+
+class State:
+    # keeps track of state for parsing
+    def __init__(self):
+        self.flags = 0
+        self.groupdict = {}
+        self.groupwidths = [None]  # group 0
+        self.lookbehindgroups = None
+        self.grouprefpos = {}
+    @property
+    def groups(self):
+        return len(self.groupwidths)
+    def opengroup(self, name=None):
+        gid = self.groups
+        self.groupwidths.append(None)
+        if self.groups > MAXGROUPS:
+            raise error("too many groups")
+        if name is not None:
+            ogid = self.groupdict.get(name, None)
+            if ogid is not None:
+                raise error("redefinition of group name %r as group %d; "
+                            "was group %d" % (name, gid,  ogid))
+            self.groupdict[name] = gid
+        return gid
+    def closegroup(self, gid, p):
+        self.groupwidths[gid] = p.getwidth()
+    def checkgroup(self, gid):
+        return gid < self.groups and self.groupwidths[gid] is not None
+
+    def checklookbehindgroup(self, gid, source):
+        if self.lookbehindgroups is not None:
+            if not self.checkgroup(gid):
+                raise source.error('cannot refer to an open group')
+            if gid >= self.lookbehindgroups:
+                raise source.error('cannot refer to group defined in the same '
+                                   'lookbehind subpattern')
+
+class SubPattern:
+    # a subpattern, in intermediate form
+    def __init__(self, state, data=None):
+        self.state = state
+        if data is None:
+            data = []
+        self.data = data
+        self.width = None
+
+    def dump(self, level=0):
+        seqtypes = (tuple, list)
+        for op, av in self.data:
+            print(level*"  " + str(op), end='')
+            if op is IN:
+                # member sublanguage
+                print()
+                for op, a in av:
+                    print((level+1)*"  " + str(op), a)
+            elif op is BRANCH:
+                print()
+                for i, a in enumerate(av[1]):
+                    if i:
+                        print(level*"  " + "OR")
+                    a.dump(level+1)
+            elif op is GROUPREF_EXISTS:
+                condgroup, item_yes, item_no = av
+                print('', condgroup)
+                item_yes.dump(level+1)
+                if item_no:
+                    print(level*"  " + "ELSE")
+                    item_no.dump(level+1)
+            elif isinstance(av, SubPattern):
+                print()
+                av.dump(level+1)
+            elif isinstance(av, seqtypes):
+                nl = False
+                for a in av:
+                    if isinstance(a, SubPattern):
+                        if not nl:
+                            print()
+                        a.dump(level+1)
+                        nl = True
+                    else:
+                        if not nl:
+                            print(' ', end='')
+                        print(a, end='')
+                        nl = False
+                if not nl:
+                    print()
+            else:
+                print('', av)
+    def __repr__(self):
+        return repr(self.data)
+    def __len__(self):
+        return len(self.data)
+    def __delitem__(self, index):
+        del self.data[index]
+    def __getitem__(self, index):
+        if isinstance(index, slice):
+            return SubPattern(self.state, self.data[index])
+        return self.data[index]
+    def __setitem__(self, index, code):
+        self.data[index] = code
+    def insert(self, index, code):
+        self.data.insert(index, code)
+    def append(self, code):
+        self.data.append(code)
+    def getwidth(self):
+        # determine the width (min, max) for this subpattern
+        if self.width is not None:
+            return self.width
+        lo = hi = 0
+        for op, av in self.data:
+            if op is BRANCH:
+                i = MAXWIDTH
+                j = 0
+                for av in av[1]:
+                    l, h = av.getwidth()
+                    i = min(i, l)
+                    j = max(j, h)
+                lo = lo + i
+                hi = hi + j
+            elif op is ATOMIC_GROUP:
+                i, j = av.getwidth()
+                lo = lo + i
+                hi = hi + j
+            elif op is SUBPATTERN:
+                i, j = av[-1].getwidth()
+                lo = lo + i
+                hi = hi + j
+            elif op in _REPEATCODES:
+                i, j = av[2].getwidth()
+                lo = lo + i * av[0]
+                if av[1] == MAXREPEAT and j:
+                    hi = MAXWIDTH
+                else:
+                    hi = hi + j * av[1]
+            elif op in _UNITCODES:
+                lo = lo + 1
+                hi = hi + 1
+            elif op is GROUPREF:
+                i, j = self.state.groupwidths[av]
+                lo = lo + i
+                hi = hi + j
+            elif op is GROUPREF_EXISTS:
+                i, j = av[1].getwidth()
+                if av[2] is not None:
+                    l, h = av[2].getwidth()
+                    i = min(i, l)
+                    j = max(j, h)
+                else:
+                    i = 0
+                lo = lo + i
+                hi = hi + j
+            elif op is SUCCESS:
+                break
+        self.width = min(lo, MAXWIDTH), min(hi, MAXWIDTH)
+        return self.width
+
+class Tokenizer:
+    def __init__(self, string):
+        self.istext = isinstance(string, str)
+        self.string = string
+        if not self.istext:
+            string = str(string, 'latin1')
+        self.decoded_string = string
+        self.index = 0
+        self.next = None
+        self.__next()
+    def __next(self):
+        index = self.index
+        try:
+            char = self.decoded_string[index]
+        except IndexError:
+            self.next = None
+            return
+        if char == "\\":
+            index += 1
+            try:
+                char += self.decoded_string[index]
+            except IndexError:
+                raise error("bad escape (end of pattern)",
+                            self.string, len(self.string) - 1) from None
+        self.index = index + 1
+        self.next = char
+    def match(self, char):
+        if char == self.next:
+            self.__next()
+            return True
+        return False
+    def get(self):
+        this = self.next
+        self.__next()
+        return this
+    def getwhile(self, n, charset):
+        result = ''
+        for _ in range(n):
+            c = self.next
+            if c not in charset:
+                break
+            result += c
+            self.__next()
+        return result
+    def getuntil(self, terminator, name):
+        result = ''
+        while True:
+            c = self.next
+            self.__next()
+            if c is None:
+                if not result:
+                    raise self.error("missing " + name)
+                raise self.error("missing %s, unterminated name" % terminator,
+                                 len(result))
+            if c == terminator:
+                if not result:
+                    raise self.error("missing " + name, 1)
+                break
+            result += c
+        return result
+    @property
+    def pos(self):
+        return self.index - len(self.next or '')
+    def tell(self):
+        return self.index - len(self.next or '')
+    def seek(self, index):
+        self.index = index
+        self.__next()
+
+    def error(self, msg, offset=0):
+        if not self.istext:
+            msg = msg.encode('ascii', 'backslashreplace').decode('ascii')
+        return error(msg, self.string, self.tell() - offset)
+
+    def checkgroupname(self, name, offset):
+        if not (self.istext or name.isascii()):
+            msg = "bad character in group name %a" % name
+            raise self.error(msg, len(name) + offset)
+        if not name.isidentifier():
+            msg = "bad character in group name %r" % name
+            raise self.error(msg, len(name) + offset)
+
+def _class_escape(source, escape):
+    # handle escape code inside character class
+    code = ESCAPES.get(escape)
+    if code:
+        return code
+    code = CATEGORIES.get(escape)
+    if code and code[0] is IN:
+        return code
+    try:
+        c = escape[1:2]
+        if c == "x":
+            # hexadecimal escape (exactly two digits)
+            escape += source.getwhile(2, HEXDIGITS)
+            if len(escape) != 4:
+                raise source.error("incomplete escape %s" % escape, len(escape))
+            return LITERAL, int(escape[2:], 16)
+        elif c == "u" and source.istext:
+            # unicode escape (exactly four digits)
+            escape += source.getwhile(4, HEXDIGITS)
+            if len(escape) != 6:
+                raise source.error("incomplete escape %s" % escape, len(escape))
+            return LITERAL, int(escape[2:], 16)
+        elif c == "U" and source.istext:
+            # unicode escape (exactly eight digits)
+            escape += source.getwhile(8, HEXDIGITS)
+            if len(escape) != 10:
+                raise source.error("incomplete escape %s" % escape, len(escape))
+            c = int(escape[2:], 16)
+            chr(c) # raise ValueError for invalid code
+            return LITERAL, c
+        elif c == "N" and source.istext:
+            import unicodedata
+            # named unicode escape e.g. \N{EM DASH}
+            if not source.match('{'):
+                raise source.error("missing {")
+            charname = source.getuntil('}', 'character name')
+            try:
+                c = ord(unicodedata.lookup(charname))
+            except (KeyError, TypeError):
+                raise source.error("undefined character name %r" % charname,
+                                   len(charname) + len(r'\N{}')) from None
+            return LITERAL, c
+        elif c in OCTDIGITS:
+            # octal escape (up to three digits)
+            escape += source.getwhile(2, OCTDIGITS)
+            c = int(escape[1:], 8)
+            if c > 0o377:
+                raise source.error('octal escape value %s outside of '
+                                   'range 0-0o377' % escape, len(escape))
+            return LITERAL, c
+        elif c in DIGITS:
+            raise ValueError
+        if len(escape) == 2:
+            if c in ASCIILETTERS:
+                raise source.error('bad escape %s' % escape, len(escape))
+            return LITERAL, ord(escape[1])
+    except ValueError:
+        pass
+    raise source.error("bad escape %s" % escape, len(escape))
+
+def _escape(source, escape, state):
+    # handle escape code in expression
+    code = CATEGORIES.get(escape)
+    if code:
+        return code
+    code = ESCAPES.get(escape)
+    if code:
+        return code
+    try:
+        c = escape[1:2]
+        if c == "x":
+            # hexadecimal escape
+            escape += source.getwhile(2, HEXDIGITS)
+            if len(escape) != 4:
+                raise source.error("incomplete escape %s" % escape, len(escape))
+            return LITERAL, int(escape[2:], 16)
+        elif c == "u" and source.istext:
+            # unicode escape (exactly four digits)
+            escape += source.getwhile(4, HEXDIGITS)
+            if len(escape) != 6:
+                raise source.error("incomplete escape %s" % escape, len(escape))
+            return LITERAL, int(escape[2:], 16)
+        elif c == "U" and source.istext:
+            # unicode escape (exactly eight digits)
+            escape += source.getwhile(8, HEXDIGITS)
+            if len(escape) != 10:
+                raise source.error("incomplete escape %s" % escape, len(escape))
+            c = int(escape[2:], 16)
+            chr(c) # raise ValueError for invalid code
+            return LITERAL, c
+        elif c == "N" and source.istext:
+            import unicodedata
+            # named unicode escape e.g. \N{EM DASH}
+            if not source.match('{'):
+                raise source.error("missing {")
+            charname = source.getuntil('}', 'character name')
+            try:
+                c = ord(unicodedata.lookup(charname))
+            except (KeyError, TypeError):
+                raise source.error("undefined character name %r" % charname,
+                                   len(charname) + len(r'\N{}')) from None
+            return LITERAL, c
+        elif c == "0":
+            # octal escape
+            escape += source.getwhile(2, OCTDIGITS)
+            return LITERAL, int(escape[1:], 8)
+        elif c in DIGITS:
+            # octal escape *or* decimal group reference (sigh)
+            if source.next in DIGITS:
+                escape += source.get()
+                if (escape[1] in OCTDIGITS and escape[2] in OCTDIGITS and
+                    source.next in OCTDIGITS):
+                    # got three octal digits; this is an octal escape
+                    escape += source.get()
+                    c = int(escape[1:], 8)
+                    if c > 0o377:
+                        raise source.error('octal escape value %s outside of '
+                                           'range 0-0o377' % escape,
+                                           len(escape))
+                    return LITERAL, c
+            # not an octal escape, so this is a group reference
+            group = int(escape[1:])
+            if group < state.groups:
+                if not state.checkgroup(group):
+                    raise source.error("cannot refer to an open group",
+                                       len(escape))
+                state.checklookbehindgroup(group, source)
+                return GROUPREF, group
+            raise source.error("invalid group reference %d" % group, len(escape) - 1)
+        if len(escape) == 2:
+            if c in ASCIILETTERS:
+                raise source.error("bad escape %s" % escape, len(escape))
+            return LITERAL, ord(escape[1])
+    except ValueError:
+        pass
+    raise source.error("bad escape %s" % escape, len(escape))
+
+def _uniq(items):
+    return list(dict.fromkeys(items))
+
+def _parse_sub(source, state, verbose, nested):
+    # parse an alternation: a|b|c
+
+    items = []
+    itemsappend = items.append
+    sourcematch = source.match
+    start = source.tell()
+    while True:
+        itemsappend(_parse(source, state, verbose, nested + 1,
+                           not nested and not items))
+        if not sourcematch("|"):
+            break
+        if not nested:
+            verbose = state.flags & SRE_FLAG_VERBOSE
+
+    if len(items) == 1:
+        return items[0]
+
+    subpattern = SubPattern(state)
+
+    # check if all items share a common prefix
+    while True:
+        prefix = None
+        for item in items:
+            if not item:
+                break
+            if prefix is None:
+                prefix = item[0]
+            elif item[0] != prefix:
+                break
+        else:
+            # all subitems start with a common "prefix".
+            # move it out of the branch
+            for item in items:
+                del item[0]
+            subpattern.append(prefix)
+            continue # check next one
+        break
+
+    # check if the branch can be replaced by a character set
+    set = []
+    for item in items:
+        if len(item) != 1:
+            break
+        op, av = item[0]
+        if op is LITERAL:
+            set.append((op, av))
+        elif op is IN and av[0][0] is not NEGATE:
+            set.extend(av)
+        else:
+            break
+    else:
+        # we can store this as a character set instead of a
+        # branch (the compiler may optimize this even more)
+        subpattern.append((IN, _uniq(set)))
+        return subpattern
+
+    subpattern.append((BRANCH, (None, items)))
+    return subpattern
+
+def _parse(source, state, verbose, nested, first=False):
+    # parse a simple pattern
+    subpattern = SubPattern(state)
+
+    # precompute constants into local variables
+    subpatternappend = subpattern.append
+    sourceget = source.get
+    sourcematch = source.match
+    _len = len
+    _ord = ord
+
+    while True:
+
+        this = source.next
+        if this is None:
+            break # end of pattern
+        if this in "|)":
+            break # end of subpattern
+        sourceget()
+
+        if verbose:
+            # skip whitespace and comments
+            if this in WHITESPACE:
+                continue
+            if this == "#":
+                while True:
+                    this = sourceget()
+                    if this is None or this == "\n":
+                        break
+                continue
+
+        if this[0] == "\\":
+            code = _escape(source, this, state)
+            subpatternappend(code)
+
+        elif this not in SPECIAL_CHARS:
+            subpatternappend((LITERAL, _ord(this)))
+
+        elif this == "[":
+            here = source.tell() - 1
+            # character set
+            set = []
+            setappend = set.append
+##          if sourcematch(":"):
+##              pass # handle character classes
+            if source.next == '[':
+                import warnings
+                warnings.warn(
+                    'Possible nested set at position %d' % source.tell(),
+                    FutureWarning, stacklevel=nested + 6
+                )
+            negate = sourcematch("^")
+            # check remaining characters
+            while True:
+                this = sourceget()
+                if this is None:
+                    raise source.error("unterminated character set",
+                                       source.tell() - here)
+                if this == "]" and set:
+                    break
+                elif this[0] == "\\":
+                    code1 = _class_escape(source, this)
+                else:
+                    if set and this in '-&~|' and source.next == this:
+                        import warnings
+                        warnings.warn(
+                            'Possible set %s at position %d' % (
+                                'difference' if this == '-' else
+                                'intersection' if this == '&' else
+                                'symmetric difference' if this == '~' else
+                                'union',
+                                source.tell() - 1),
+                            FutureWarning, stacklevel=nested + 6
+                        )
+                    code1 = LITERAL, _ord(this)
+                if sourcematch("-"):
+                    # potential range
+                    that = sourceget()
+                    if that is None:
+                        raise source.error("unterminated character set",
+                                           source.tell() - here)
+                    if that == "]":
+                        if code1[0] is IN:
+                            code1 = code1[1][0]
+                        setappend(code1)
+                        setappend((LITERAL, _ord("-")))
+                        break
+                    if that[0] == "\\":
+                        code2 = _class_escape(source, that)
+                    else:
+                        if that == '-':
+                            import warnings
+                            warnings.warn(
+                                'Possible set difference at position %d' % (
+                                    source.tell() - 2),
+                                FutureWarning, stacklevel=nested + 6
+                            )
+                        code2 = LITERAL, _ord(that)
+                    if code1[0] != LITERAL or code2[0] != LITERAL:
+                        msg = "bad character range %s-%s" % (this, that)
+                        raise source.error(msg, len(this) + 1 + len(that))
+                    lo = code1[1]
+                    hi = code2[1]
+                    if hi < lo:
+                        msg = "bad character range %s-%s" % (this, that)
+                        raise source.error(msg, len(this) + 1 + len(that))
+                    setappend((RANGE, (lo, hi)))
+                else:
+                    if code1[0] is IN:
+                        code1 = code1[1][0]
+                    setappend(code1)
+
+            set = _uniq(set)
+            # XXX: <fl> should move set optimization to compiler!
+            if _len(set) == 1 and set[0][0] is LITERAL:
+                # optimization
+                if negate:
+                    subpatternappend((NOT_LITERAL, set[0][1]))
+                else:
+                    subpatternappend(set[0])
+            else:
+                if negate:
+                    set.insert(0, (NEGATE, None))
+                # charmap optimization can't be added here because
+                # global flags still are not known
+                subpatternappend((IN, set))
+
+        elif this in REPEAT_CHARS:
+            # repeat previous item
+            here = source.tell()
+            if this == "?":
+                min, max = 0, 1
+            elif this == "*":
+                min, max = 0, MAXREPEAT
+
+            elif this == "+":
+                min, max = 1, MAXREPEAT
+            elif this == "{":
+                if source.next == "}":
+                    subpatternappend((LITERAL, _ord(this)))
+                    continue
+
+                min, max = 0, MAXREPEAT
+                lo = hi = ""
+                while source.next in DIGITS:
+                    lo += sourceget()
+                if sourcematch(","):
+                    while source.next in DIGITS:
+                        hi += sourceget()
+                else:
+                    hi = lo
+                if not sourcematch("}"):
+                    subpatternappend((LITERAL, _ord(this)))
+                    source.seek(here)
+                    continue
+
+                if lo:
+                    min = int(lo)
+                    if min >= MAXREPEAT:
+                        raise OverflowError("the repetition number is too large")
+                if hi:
+                    max = int(hi)
+                    if max >= MAXREPEAT:
+                        raise OverflowError("the repetition number is too large")
+                    if max < min:
+                        raise source.error("min repeat greater than max repeat",
+                                           source.tell() - here)
+            else:
+                raise AssertionError("unsupported quantifier %r" % (char,))
+            # figure out which item to repeat
+            if subpattern:
+                item = subpattern[-1:]
+            else:
+                item = None
+            if not item or item[0][0] is AT:
+                raise source.error("nothing to repeat",
+                                   source.tell() - here + len(this))
+            if item[0][0] in _REPEATCODES:
+                raise source.error("multiple repeat",
+                                   source.tell() - here + len(this))
+            if item[0][0] is SUBPATTERN:
+                group, add_flags, del_flags, p = item[0][1]
+                if group is None and not add_flags and not del_flags:
+                    item = p
+            if sourcematch("?"):
+                # Non-Greedy Match
+                subpattern[-1] = (MIN_REPEAT, (min, max, item))
+            elif sourcematch("+"):
+                # Possessive Match (Always Greedy)
+                subpattern[-1] = (POSSESSIVE_REPEAT, (min, max, item))
+            else:
+                # Greedy Match
+                subpattern[-1] = (MAX_REPEAT, (min, max, item))
+
+        elif this == ".":
+            subpatternappend((ANY, None))
+
+        elif this == "(":
+            start = source.tell() - 1
+            capture = True
+            atomic = False
+            name = None
+            add_flags = 0
+            del_flags = 0
+            if sourcematch("?"):
+                # options
+                char = sourceget()
+                if char is None:
+                    raise source.error("unexpected end of pattern")
+                if char == "P":
+                    # python extensions
+                    if sourcematch("<"):
+                        # named group: skip forward to end of name
+                        name = source.getuntil(">", "group name")
+                        source.checkgroupname(name, 1)
+                    elif sourcematch("="):
+                        # named backreference
+                        name = source.getuntil(")", "group name")
+                        source.checkgroupname(name, 1)
+                        gid = state.groupdict.get(name)
+                        if gid is None:
+                            msg = "unknown group name %r" % name
+                            raise source.error(msg, len(name) + 1)
+                        if not state.checkgroup(gid):
+                            raise source.error("cannot refer to an open group",
+                                               len(name) + 1)
+                        state.checklookbehindgroup(gid, source)
+                        subpatternappend((GROUPREF, gid))
+                        continue
+
+                    else:
+                        char = sourceget()
+                        if char is None:
+                            raise source.error("unexpected end of pattern")
+                        raise source.error("unknown extension ?P" + char,
+                                           len(char) + 2)
+                elif char == ":":
+                    # non-capturing group
+                    capture = False
+                elif char == "#":
+                    # comment
+                    while True:
+                        if source.next is None:
+                            raise source.error("missing ), unterminated comment",
+                                               source.tell() - start)
+                        if sourceget() == ")":
+                            break
+                    continue
+
+                elif char in "=!<":
+                    # lookahead assertions
+                    dir = 1
+                    if char == "<":
+                        char = sourceget()
+                        if char is None:
+                            raise source.error("unexpected end of pattern")
+                        if char not in "=!":
+                            raise source.error("unknown extension ?<" + char,
+                                               len(char) + 2)
+                        dir = -1 # lookbehind
+                        lookbehindgroups = state.lookbehindgroups
+                        if lookbehindgroups is None:
+                            state.lookbehindgroups = state.groups
+                    p = _parse_sub(source, state, verbose, nested + 1)
+                    if dir < 0:
+                        if lookbehindgroups is None:
+                            state.lookbehindgroups = None
+                    if not sourcematch(")"):
+                        raise source.error("missing ), unterminated subpattern",
+                                           source.tell() - start)
+                    if char == "=":
+                        subpatternappend((ASSERT, (dir, p)))
+                    elif p:
+                        subpatternappend((ASSERT_NOT, (dir, p)))
+                    else:
+                        subpatternappend((FAILURE, ()))
+                    continue
+
+                elif char == "(":
+                    # conditional backreference group
+                    condname = source.getuntil(")", "group name")
+                    if not (condname.isdecimal() and condname.isascii()):
+                        source.checkgroupname(condname, 1)
+                        condgroup = state.groupdict.get(condname)
+                        if condgroup is None:
+                            msg = "unknown group name %r" % condname
+                            raise source.error(msg, len(condname) + 1)
+                    else:
+                        condgroup = int(condname)
+                        if not condgroup:
+                            raise source.error("bad group number",
+                                               len(condname) + 1)
+                        if condgroup >= MAXGROUPS:
+                            msg = "invalid group reference %d" % condgroup
+                            raise source.error(msg, len(condname) + 1)
+                        if condgroup not in state.grouprefpos:
+                            state.grouprefpos[condgroup] = (
+                                source.tell() - len(condname) - 1
+                            )
+                        if not (condname.isdecimal() and condname.isascii()):
+                            import warnings
+                            warnings.warn(
+                                "bad character in group name %s at position %d" %
+                                (repr(condname) if source.istext else ascii(condname),
+                                 source.tell() - len(condname) - 1),
+                                DeprecationWarning, stacklevel=nested + 6
+                            )
+                    state.checklookbehindgroup(condgroup, source)
+                    item_yes = _parse(source, state, verbose, nested + 1)
+                    if source.match("|"):
+                        item_no = _parse(source, state, verbose, nested + 1)
+                        if source.next == "|":
+                            raise source.error("conditional backref with more than two branches")
+                    else:
+                        item_no = None
+                    if not source.match(")"):
+                        raise source.error("missing ), unterminated subpattern",
+                                           source.tell() - start)
+                    subpatternappend((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))
+                    continue
+
+                elif char == ">":
+                    # non-capturing, atomic group
+                    capture = False
+                    atomic = True
+                elif char in FLAGS or char == "-":
+                    # flags
+                    flags = _parse_flags(source, state, char)
+                    if flags is None:  # global flags
+                        if not first or subpattern:
+                            raise source.error('global flags not at the start '
+                                               'of the expression',
+                                               source.tell() - start)
+                        verbose = state.flags & SRE_FLAG_VERBOSE
+                        continue
+
+                    add_flags, del_flags = flags
+                    capture = False
+                else:
+                    raise source.error("unknown extension ?" + char,
+                                       len(char) + 1)
+
+            # parse group contents
+            if capture:
+                try:
+                    group = state.opengroup(name)
+                except error as err:
+                    raise source.error(err.msg, len(name) + 1) from None
+            else:
+                group = None
+            sub_verbose = ((verbose or (add_flags & SRE_FLAG_VERBOSE)) and
+                           not (del_flags & SRE_FLAG_VERBOSE))
+            p = _parse_sub(source, state, sub_verbose, nested + 1)
+            if not source.match(")"):
+                raise source.error("missing ), unterminated subpattern",
+                                   source.tell() - start)
+            if group is not None:
+                state.closegroup(group, p)
+            if atomic:
+                assert group is None
+                subpatternappend((ATOMIC_GROUP, p))
+            else:
+                subpatternappend((SUBPATTERN, (group, add_flags, del_flags, p)))
+
+        elif this == "^":
+            subpatternappend((AT, AT_BEGINNING))
+
+        elif this == "$":
+            subpatternappend((AT, AT_END))
+
+        else:
+            raise AssertionError("unsupported special character %r" % (char,))
+
+    # unpack non-capturing groups
+    for i in range(len(subpattern))[::-1]:
+        op, av = subpattern[i]
+        if op is SUBPATTERN:
+            group, add_flags, del_flags, p = av
+            if group is None and not add_flags and not del_flags:
+                subpattern[i: i+1] = p
+
+    return subpattern
+
+def _parse_flags(source, state, char):
+    sourceget = source.get
+    add_flags = 0
+    del_flags = 0
+    if char != "-":
+        while True:
+            flag = FLAGS[char]
+            if source.istext:
+                if char == 'L':
+                    msg = "bad inline flags: cannot use 'L' flag with a str pattern"
+                    raise source.error(msg)
+            else:
+                if char == 'u':
+                    msg = "bad inline flags: cannot use 'u' flag with a bytes pattern"
+                    raise source.error(msg)
+            add_flags |= flag
+            if (flag & TYPE_FLAGS) and (add_flags & TYPE_FLAGS) != flag:
+                msg = "bad inline flags: flags 'a', 'u' and 'L' are incompatible"
+                raise source.error(msg)
+            char = sourceget()
+            if char is None:
+                raise source.error("missing -, : or )")
+            if char in ")-:":
+                break
+            if char not in FLAGS:
+                msg = "unknown flag" if char.isalpha() else "missing -, : or )"
+                raise source.error(msg, len(char))
+    if char == ")":
+        state.flags |= add_flags
+        return None
+    if add_flags & GLOBAL_FLAGS:
+        raise source.error("bad inline flags: cannot turn on global flag", 1)
+    if char == "-":
+        char = sourceget()
+        if char is None:
+            raise source.error("missing flag")
+        if char not in FLAGS:
+            msg = "unknown flag" if char.isalpha() else "missing flag"
+            raise source.error(msg, len(char))
+        while True:
+            flag = FLAGS[char]
+            if flag & TYPE_FLAGS:
+                msg = "bad inline flags: cannot turn off flags 'a', 'u' and 'L'"
+                raise source.error(msg)
+            del_flags |= flag
+            char = sourceget()
+            if char is None:
+                raise source.error("missing :")
+            if char == ":":
+                break
+            if char not in FLAGS:
+                msg = "unknown flag" if char.isalpha() else "missing :"
+                raise source.error(msg, len(char))
+    assert char == ":"
+    if del_flags & GLOBAL_FLAGS:
+        raise source.error("bad inline flags: cannot turn off global flag", 1)
+    if add_flags & del_flags:
+        raise source.error("bad inline flags: flag turned on and off", 1)
+    return add_flags, del_flags
+
+def fix_flags(src, flags):
+    # Check and fix flags according to the type of pattern (str or bytes)
+    if isinstance(src, str):
+        if flags & SRE_FLAG_LOCALE:
+            raise ValueError("cannot use LOCALE flag with a str pattern")
+        if not flags & SRE_FLAG_ASCII:
+            flags |= SRE_FLAG_UNICODE
+        elif flags & SRE_FLAG_UNICODE:
+            raise ValueError("ASCII and UNICODE flags are incompatible")
+    else:
+        if flags & SRE_FLAG_UNICODE:
+            raise ValueError("cannot use UNICODE flag with a bytes pattern")
+        if flags & SRE_FLAG_LOCALE and flags & SRE_FLAG_ASCII:
+            raise ValueError("ASCII and LOCALE flags are incompatible")
+    return flags
+
+def parse(str, flags=0, state=None):
+    # parse 're' pattern into list of (opcode, argument) tuples
+
+    source = Tokenizer(str)
+
+    if state is None:
+        state = State()
+    state.flags = flags
+    state.str = str
+
+    p = _parse_sub(source, state, flags & SRE_FLAG_VERBOSE, 0)
+    p.state.flags = fix_flags(str, p.state.flags)
+
+    if source.next is not None:
+        assert source.next == ")"
+        raise source.error("unbalanced parenthesis")
+
+    for g in p.state.grouprefpos:
+        if g >= p.state.groups:
+            msg = "invalid group reference %d" % g
+            raise error(msg, str, p.state.grouprefpos[g])
+
+    if flags & SRE_FLAG_DEBUG:
+        p.dump()
+
+    return p
+
+def parse_template(source, pattern):
+    # parse 're' replacement string into list of literals and
+    # group references
+    s = Tokenizer(source)
+    sget = s.get
+    result = []
+    literal = []
+    lappend = literal.append
+    def addliteral():
+        if s.istext:
+            result.append(''.join(literal))
+        else:
+            # The tokenizer implicitly decodes bytes objects as latin-1, we must
+            # therefore re-encode the final representation.
+            result.append(''.join(literal).encode('latin-1'))
+        del literal[:]
+    def addgroup(index, pos):
+        if index > pattern.groups:
+            raise s.error("invalid group reference %d" % index, pos)
+        addliteral()
+        result.append(index)
+    groupindex = pattern.groupindex
+    while True:
+        this = sget()
+        if this is None:
+            break # end of replacement string
+        if this[0] == "\\":
+            # group
+            c = this[1]
+            if c == "g":
+                if not s.match("<"):
+                    raise s.error("missing <")
+                name = s.getuntil(">", "group name")
+                if not (name.isdecimal() and name.isascii()):
+                    s.checkgroupname(name, 1)
+                    try:
+                        index = groupindex[name]
+                    except KeyError:
+                        raise IndexError("unknown group name %r" % name) from None
+                else:
+                    index = int(name)
+                    if index >= MAXGROUPS:
+                        raise s.error("invalid group reference %d" % index,
+                                      len(name) + 1)
+                    if not (name.isdecimal() and name.isascii()):
+                        import warnings
+                        warnings.warn(
+                            "bad character in group name %s at position %d" %
+                            (repr(name) if s.istext else ascii(name),
+                             s.tell() - len(name) - 1),
+                            DeprecationWarning, stacklevel=5
+                        )
+                addgroup(index, len(name) + 1)
+            elif c == "0":
+                if s.next in OCTDIGITS:
+                    this += sget()
+                    if s.next in OCTDIGITS:
+                        this += sget()
+                lappend(chr(int(this[1:], 8) & 0xff))
+            elif c in DIGITS:
+                isoctal = False
+                if s.next in DIGITS:
+                    this += sget()
+                    if (c in OCTDIGITS and this[2] in OCTDIGITS and
+                        s.next in OCTDIGITS):
+                        this += sget()
+                        isoctal = True
+                        c = int(this[1:], 8)
+                        if c > 0o377:
+                            raise s.error('octal escape value %s outside of '
+                                          'range 0-0o377' % this, len(this))
+                        lappend(chr(c))
+                if not isoctal:
+                    addgroup(int(this[1:]), len(this) - 1)
+            else:
+                try:
+                    this = chr(ESCAPES[this][1])
+                except KeyError:
+                    if c in ASCIILETTERS:
+                        raise s.error('bad escape %s' % this, len(this)) from None
+                lappend(this)
+        else:
+            lappend(this)
+    addliteral()
+    return result
diff --git a/crates/weavepy-vm/src/stdlib/python/sre_compile.py b/crates/weavepy-vm/src/stdlib/python/sre_compile.py
new file mode 100644
index 0000000..41cc5bd
--- /dev/null
+++ b/crates/weavepy-vm/src/stdlib/python/sre_compile.py
@@ -0,0 +1,11 @@
+"""Internal support module for sre (deprecated alias for re._compiler)."""
+
+import warnings
+warnings.warn(f"module {__name__!r} is deprecated",
+              DeprecationWarning, stacklevel=2)
+
+from re import _compiler
+globals().update({k: v for k, v in vars(_compiler).items()
+                  if not k.startswith('__')})
+
+del warnings, _compiler
diff --git a/crates/weavepy-vm/src/stdlib/python/sre_constants.py b/crates/weavepy-vm/src/stdlib/python/sre_constants.py
new file mode 100644
index 0000000..b895082
--- /dev/null
+++ b/crates/weavepy-vm/src/stdlib/python/sre_constants.py
@@ -0,0 +1,11 @@
+"""Internal support module for sre (deprecated alias for re._constants)."""
+
+import warnings
+warnings.warn(f"module {__name__!r} is deprecated",
+              DeprecationWarning, stacklevel=2)
+
+from re import _constants
+globals().update({k: v for k, v in vars(_constants).items()
+                  if not k.startswith('__')})
+
+del warnings, _constants
diff --git a/crates/weavepy-vm/src/stdlib/python/sre_parse.py b/crates/weavepy-vm/src/stdlib/python/sre_parse.py
new file mode 100644
index 0000000..13d9bf2
--- /dev/null
+++ b/crates/weavepy-vm/src/stdlib/python/sre_parse.py
@@ -0,0 +1,11 @@
+"""Internal support module for sre (deprecated alias for re._parser)."""
+
+import warnings
+warnings.warn(f"module {__name__!r} is deprecated",
+              DeprecationWarning, stacklevel=2)
+
+from re import _parser
+globals().update({k: v for k, v in vars(_parser).items()
+                  if not k.startswith('__')})
+
+del warnings, _parser
diff --git a/crates/weavepy-vm/src/stdlib/re.rs b/crates/weavepy-vm/src/stdlib/re.rs
deleted file mode 100644
index d6e92cd..0000000
--- a/crates/weavepy-vm/src/stdlib/re.rs
+++ /dev/null
@@ -1,1089 +0,0 @@
-//! The `re` built-in module.
-//!
-//! Backed by Rust's `regex` crate. The user-visible API mirrors
-//! CPython's `re` module for the common functions (`match`,
-//! `search`, `findall`, `finditer`, `sub`, `split`, `compile`).
-//!
-//! We do not support every CPython feature: backreferences in the
-//! pattern (e.g. `(?P=name)`) and lookaround (`(?=...)` / `(?<=...)`)
-//! are limited by the underlying engine. The dialect is close enough
-//! that the vast majority of everyday patterns work as expected.
-
-use crate::sync::Rc;
-use crate::sync::RefCell;
-
-use regex::{Captures, Regex};
-
-use crate::error::{type_error, value_error, RuntimeError};
-use crate::import::ModuleCache;
-use crate::object::{BuiltinFn, DictData, DictKey, Object, PyModule};
-use crate::types::{PyInstance, TypeObject};
-
-pub fn build(_cache: &ModuleCache) -> Rc<PyModule> {
-    let dict = Rc::new(RefCell::new(DictData::new()));
-    {
-        let mut d = dict.borrow_mut();
-        d.insert(
-            DictKey(Object::from_static("__name__")),
-            Object::from_static("re"),
-        );
-        d.insert(
-            DictKey(Object::from_static("__doc__")),
-            Object::from_static("Support for regular expressions."),
-        );
-        d.insert(DictKey(Object::from_static("IGNORECASE")), Object::Int(2));
-        d.insert(DictKey(Object::from_static("I")), Object::Int(2));
-        d.insert(DictKey(Object::from_static("MULTILINE")), Object::Int(8));
-        d.insert(DictKey(Object::from_static("M")), Object::Int(8));
-        d.insert(DictKey(Object::from_static("DOTALL")), Object::Int(16));
-        d.insert(DictKey(Object::from_static("S")), Object::Int(16));
-        d.insert(DictKey(Object::from_static("VERBOSE")), Object::Int(64));
-        d.insert(DictKey(Object::from_static("X")), Object::Int(64));
-        d.insert(DictKey(Object::from_static("ASCII")), Object::Int(256));
-        d.insert(DictKey(Object::from_static("A")), Object::Int(256));
-        d.insert(DictKey(Object::from_static("match")), b("match", re_match));
-        d.insert(
-            DictKey(Object::from_static("search")),
-            b("search", re_search),
-        );
-        d.insert(
-            DictKey(Object::from_static("fullmatch")),
-            b("fullmatch", re_fullmatch),
-        );
-        d.insert(
-            DictKey(Object::from_static("findall")),
-            b("findall", re_findall),
-        );
-        d.insert(
-            DictKey(Object::from_static("finditer")),
-            b("finditer", re_finditer),
-        );
-        d.insert(DictKey(Object::from_static("sub")), b("sub", re_sub));
-        d.insert(DictKey(Object::from_static("subn")), b("subn", re_subn));
-        d.insert(DictKey(Object::from_static("split")), b("split", re_split));
-        d.insert(
-            DictKey(Object::from_static("compile")),
-            b("compile", re_compile),
-        );
-        d.insert(
-            DictKey(Object::from_static("escape")),
-            b("escape", re_escape),
-        );
-        d.insert(
-            DictKey(Object::from_static("error")),
-            Object::Type(re_error_type()),
-        );
-    }
-    Rc::new(PyModule {
-        name: "re".to_owned(),
-        filename: None,
-        dict,
-    })
-}
-
-fn b(name: &'static str, body: fn(&[Object]) -> Result<Object, RuntimeError>) -> Object {
-    Object::Builtin(Rc::new(BuiltinFn {
-        name,
-        call: Box::new(body),
-        call_kw: None,
-    }))
-}
-
-fn re_error_type() -> Rc<TypeObject> {
-    let bt = crate::builtin_types::builtin_types();
-    TypeObject::new_user("error", vec![bt.value_error.clone()], DictData::new())
-        .unwrap_or_else(|_| bt.value_error.clone())
-}
-
-/// Convert a Python regex pattern to one accepted by `regex`. We
-/// rewrite the most common CPython-only shortcuts: `\A` (string
-/// start) and `\Z` (string end) are kept as-is (regex supports them
-/// as `\A` and `\z` respectively, but for our purposes we treat them
-/// equivalently to anchors).
-fn compile_pattern(pat: &str, flags: i64) -> Result<Regex, RuntimeError> {
-    let mut translated = pat.replace("\\Z", "\\z");
-    // Python's `(?P<name>...)` is supported by `regex` natively.
-    let mut builder = regex::RegexBuilder::new(&translated);
-    if flags & 2 != 0 {
-        builder.case_insensitive(true);
-    }
-    if flags & 8 != 0 {
-        builder.multi_line(true);
-    }
-    if flags & 16 != 0 {
-        builder.dot_matches_new_line(true);
-    }
-    if flags & 64 != 0 {
-        builder.ignore_whitespace(true);
-    }
-    // `regex` rejects some Python escapes (`\d` defaults to ASCII in
-    // Python 3 unless `re.UNICODE`); our build treats `\d`/`\w`/`\s`
-    // as Unicode-aware, matching CPython 3 defaults.
-    builder.build().or_else(|_| {
-        // Some patterns contain literal `(?P=name)` backrefs we can't
-        // support; if so, fall back to a verbose error.
-        translated = pat.to_owned();
-        builder = regex::RegexBuilder::new(&translated);
-        builder
-            .build()
-            .map_err(|e| value_error(format!("invalid pattern: {e}")))
-    })
-}
-
-/// Compile with the `fancy-regex` engine. Used as a fallback when
-/// the base `regex` crate rejects the pattern — typically because
-/// of CPython features `regex` doesn't implement (lookaround,
-/// backreferences). Returned eagerly so callers can decide whether
-/// to fall back without paying the cost on every successful
-/// compile.
-fn compile_pattern_fancy(pat: &str, flags: i64) -> Result<fancy_regex::Regex, RuntimeError> {
-    let mut translated = pat.replace("\\Z", "\\z");
-    // Apply inline flag prefix so the same CPython flag bits steer
-    // the fancy engine.
-    let mut prefix = String::new();
-    if flags & 2 != 0 {
-        prefix.push('i');
-    }
-    if flags & 8 != 0 {
-        prefix.push('m');
-    }
-    if flags & 16 != 0 {
-        prefix.push('s');
-    }
-    if flags & 64 != 0 {
-        prefix.push('x');
-    }
-    if !prefix.is_empty() {
-        translated = format!("(?{prefix}){translated}");
-    }
-    fancy_regex::Regex::new(&translated).map_err(|e| value_error(format!("invalid pattern: {e}")))
-}
-
-/// Public alias exposed to the VM dispatcher so it can route
-/// callable-replacement ``re.sub`` calls itself.
-pub fn extract_pattern_pub(arg: &Object) -> Result<(String, i64), RuntimeError> {
-    extract_pattern(arg)
-}
-
-/// Public helper: collect every non-overlapping match span +
-/// captures of ``pat`` over ``text``. Used by the VM-routed
-/// ``re.sub`` callable path so the actual ``repl(match)`` calls
-/// happen on the interpreter side.
-pub fn collect_all_matches(
-    pat: &str,
-    flags: i64,
-    text: &str,
-) -> Result<Vec<(usize, usize, Vec<Option<(usize, usize)>>)>, RuntimeError> {
-    let mut out: Vec<(usize, usize, Vec<Option<(usize, usize)>>)> = Vec::new();
-    let mut on_match = |s: usize, e: usize, groups: &[Option<(usize, usize)>]| {
-        out.push((s, e, groups.to_vec()));
-    };
-    iter_all_matches(pat, flags, text, &mut on_match)?;
-    Ok(out)
-}
-
-/// Build a ``re.Match`` object compatible with the rest of the
-/// module from a pre-extracted set of group spans.
-pub fn build_match_object(
-    pat: &str,
-    text: &str,
-    groups: &[Option<(usize, usize)>],
-    _full_start: usize,
-    _full_end: usize,
-) -> Object {
-    let caps = DualCaptures {
-        groups: groups.to_vec(),
-        named: Vec::new(),
-    };
-    make_match_from_captured(pat, text, &caps, text, 0)
-}
-
-fn extract_pattern(arg: &Object) -> Result<(String, i64), RuntimeError> {
-    match arg {
-        Object::Str(s) => Ok((s.to_string(), 0)),
-        Object::Instance(inst) if inst.class.name == "Pattern" => {
-            let pat = inst
-                .dict
-                .borrow()
-                .get(&DictKey(Object::from_static("pattern")))
-                .cloned()
-                .unwrap_or(Object::from_static(""));
-            let flags = inst
-                .dict
-                .borrow()
-                .get(&DictKey(Object::from_static("flags")))
-                .cloned()
-                .unwrap_or(Object::Int(0));
-            let p = match pat {
-                Object::Str(s) => s.to_string(),
-                _ => return Err(type_error("invalid Pattern object")),
-            };
-            let f = match flags {
-                Object::Int(i) => i,
-                _ => 0,
-            };
-            Ok((p, f))
-        }
-        _ => Err(type_error(
-            "first argument must be string or compiled pattern",
-        )),
-    }
-}
-
-thread_local! {
-    static PATTERN_CLASS: RefCell<Option<Rc<TypeObject>>> = const { RefCell::new(None) };
-    static MATCH_CLASS: RefCell<Option<Rc<TypeObject>>> = const { RefCell::new(None) };
-}
-
-fn pattern_class() -> Rc<TypeObject> {
-    PATTERN_CLASS.with(|slot| {
-        if let Some(c) = slot.borrow().as_ref() {
-            return c.clone();
-        }
-        let bt = crate::builtin_types::builtin_types();
-        let mut dict = DictData::new();
-        for (name, method) in pattern_methods() {
-            dict.insert(DictKey(Object::from_str(name)), method);
-        }
-        let cls =
-            TypeObject::new_user("Pattern", vec![bt.object_.clone()], dict).expect("Pattern type");
-        *slot.borrow_mut() = Some(cls.clone());
-        cls
-    })
-}
-
-fn match_class() -> Rc<TypeObject> {
-    MATCH_CLASS.with(|slot| {
-        if let Some(c) = slot.borrow().as_ref() {
-            return c.clone();
-        }
-        let bt = crate::builtin_types::builtin_types();
-        let mut dict = DictData::new();
-        for (name, method) in match_methods() {
-            dict.insert(DictKey(Object::from_str(name)), method);
-        }
-        let cls =
-            TypeObject::new_user("Match", vec![bt.object_.clone()], dict).expect("Match type");
-        *slot.borrow_mut() = Some(cls.clone());
-        cls
-    })
-}
-
-fn re_compile(args: &[Object]) -> Result<Object, RuntimeError> {
-    let pat = match args.first() {
-        Some(Object::Str(s)) => s.to_string(),
-        _ => return Err(type_error("compile() expects str pattern")),
-    };
-    let flags = match args.get(1) {
-        Some(Object::Int(i)) => *i,
-        None => 0,
-        _ => return Err(type_error("flags must be int")),
-    };
-    // Validate by compiling now; we store the source.
-    let _ = compile_pattern(&pat, flags)?;
-    Ok(make_pattern(pat, flags))
-}
-
-fn make_pattern(pattern: String, flags: i64) -> Object {
-    let inst = PyInstance::new(pattern_class());
-    inst.dict.borrow_mut().insert(
-        DictKey(Object::from_static("pattern")),
-        Object::from_str(pattern),
-    );
-    inst.dict
-        .borrow_mut()
-        .insert(DictKey(Object::from_static("flags")), Object::Int(flags));
-    Object::Instance(Rc::new(inst))
-}
-
-fn pattern_methods() -> Vec<(&'static str, Object)> {
-    vec![
-        ("match", b("match", pattern_match)),
-        ("search", b("search", pattern_search)),
-        ("fullmatch", b("fullmatch", pattern_fullmatch)),
-        ("findall", b("findall", pattern_findall)),
-        ("finditer", b("finditer", pattern_finditer)),
-        ("sub", b("sub", pattern_sub)),
-        ("split", b("split", pattern_split)),
-    ]
-}
-
-fn pattern_match(args: &[Object]) -> Result<Object, RuntimeError> {
-    run_match(args, true, false)
-}
-fn pattern_search(args: &[Object]) -> Result<Object, RuntimeError> {
-    run_match(args, false, false)
-}
-fn pattern_fullmatch(args: &[Object]) -> Result<Object, RuntimeError> {
-    run_match(args, true, true)
-}
-fn pattern_findall(args: &[Object]) -> Result<Object, RuntimeError> {
-    re_findall(args)
-}
-fn pattern_finditer(args: &[Object]) -> Result<Object, RuntimeError> {
-    re_finditer(args)
-}
-fn pattern_sub(args: &[Object]) -> Result<Object, RuntimeError> {
-    re_sub(args)
-}
-fn pattern_split(args: &[Object]) -> Result<Object, RuntimeError> {
-    re_split(args)
-}
-
-fn re_escape(args: &[Object]) -> Result<Object, RuntimeError> {
-    let s = match args.first() {
-        Some(Object::Str(s)) => s.to_string(),
-        _ => return Err(type_error("escape() expects str")),
-    };
-    Ok(Object::from_str(regex::escape(&s)))
-}
-
-fn run_match(
-    args: &[Object],
-    require_start: bool,
-    fullmatch: bool,
-) -> Result<Object, RuntimeError> {
-    let first = args
-        .first()
-        .ok_or_else(|| type_error("expected pattern argument"))?;
-    let from_pattern = matches!(first, Object::Instance(inst) if inst.class.name == "Pattern");
-    let (pat, default_flags) = extract_pattern(first)?;
-    let text = match args.get(1) {
-        Some(Object::Str(s)) => s.to_string(),
-        _ => return Err(type_error("expected str input")),
-    };
-    // Pattern method form: `pattern.match(s, pos=0, endpos=len(s))`.
-    // Module-level form: `re.match(pattern, s, flags=0)`.
-    let (flags, pos, endpos) = if from_pattern {
-        let pos = match args.get(2) {
-            Some(Object::Int(i)) => *i,
-            _ => 0,
-        };
-        let endpos = match args.get(3) {
-            Some(Object::Int(i)) => *i,
-            _ => text.chars().count() as i64,
-        };
-        (default_flags, pos, endpos)
-    } else {
-        let flags = match args.get(2) {
-            Some(Object::Int(i)) => *i,
-            _ => default_flags,
-        };
-        (flags, 0i64, text.chars().count() as i64)
-    };
-    let start_byte = char_index_to_byte(&text, pos.max(0) as usize);
-    let end_byte = char_index_to_byte(&text, endpos.max(0) as usize);
-    if start_byte > end_byte || start_byte > text.len() {
-        return Ok(Object::None);
-    }
-    let slice_end = end_byte.min(text.len());
-    let slice = &text[start_byte..slice_end];
-    let captured = match dual_captures(&pat, flags, slice)? {
-        Some(c) => c,
-        None => return Ok(Object::None),
-    };
-    let span0 = captured.groups[0].expect("group 0 always present");
-    if require_start && span0.0 != 0 {
-        return Ok(Object::None);
-    }
-    if fullmatch && (span0.0 != 0 || span0.1 != slice.len()) {
-        return Ok(Object::None);
-    }
-    Ok(make_match_from_captured(
-        &pat, &text, &captured, slice, start_byte,
-    ))
-}
-
-/// A capture result that hides which engine produced it. Spans are
-/// byte offsets into the *slice* the caller passed; the caller adds
-/// any base offset back.
-struct DualCaptures {
-    groups: Vec<Option<(usize, usize)>>,
-    /// Ordered ``(name, Option<group_idx>)`` pairs for named groups.
-    /// Group indices line up with ``groups``.
-    named: Vec<(String, usize)>,
-}
-
-fn dual_captures(pat: &str, flags: i64, slice: &str) -> Result<Option<DualCaptures>, RuntimeError> {
-    if let Ok(re) = compile_pattern(pat, flags) {
-        if let Some(caps) = re.captures(slice) {
-            let mut groups = Vec::with_capacity(caps.len());
-            for i in 0..caps.len() {
-                groups.push(caps.get(i).map(|m| (m.start(), m.end())));
-            }
-            let mut named = Vec::new();
-            for (i, name) in re.capture_names().enumerate() {
-                if let Some(n) = name {
-                    named.push((n.to_owned(), i));
-                }
-            }
-            return Ok(Some(DualCaptures { groups, named }));
-        }
-        return Ok(None);
-    }
-    // Fallback to fancy-regex.
-    let re = compile_pattern_fancy(pat, flags)?;
-    let cap = re
-        .captures(slice)
-        .map_err(|e| value_error(format!("regex error: {e}")))?;
-    let caps = match cap {
-        Some(c) => c,
-        None => return Ok(None),
-    };
-    let mut groups = Vec::with_capacity(caps.len());
-    for i in 0..caps.len() {
-        groups.push(caps.get(i).map(|m| (m.start(), m.end())));
-    }
-    let mut named = Vec::new();
-    for (i, name) in re.capture_names().enumerate() {
-        if let Some(n) = name {
-            named.push((n.to_owned(), i));
-        }
-    }
-    Ok(Some(DualCaptures { groups, named }))
-}
-
-fn make_match_from_captured(
-    pat: &str,
-    text: &str,
-    caps: &DualCaptures,
-    slice: &str,
-    base_offset: usize,
-) -> Object {
-    let inst = PyInstance::new(match_class());
-    let span0 = caps.groups[0].expect("group 0 always present");
-    inst.dict.borrow_mut().insert(
-        DictKey(Object::from_static("string")),
-        Object::from_str(text.to_owned()),
-    );
-    inst.dict.borrow_mut().insert(
-        DictKey(Object::from_static("re")),
-        Object::from_str(pat.to_owned()),
-    );
-    inst.dict.borrow_mut().insert(
-        DictKey(Object::from_static("pos")),
-        Object::Int(base_offset as i64),
-    );
-    inst.dict.borrow_mut().insert(
-        DictKey(Object::from_static("endpos")),
-        Object::Int(text.len() as i64),
-    );
-    let mut groups: Vec<Object> = Vec::new();
-    let mut spans: Vec<Object> = Vec::new();
-    for span in &caps.groups {
-        match span {
-            Some((s, e)) => {
-                groups.push(Object::from_str(slice[*s..*e].to_owned()));
-                spans.push(Object::new_tuple(vec![
-                    Object::Int((s + base_offset) as i64),
-                    Object::Int((e + base_offset) as i64),
-                ]));
-            }
-            None => {
-                groups.push(Object::None);
-                spans.push(Object::new_tuple(vec![Object::Int(-1), Object::Int(-1)]));
-            }
-        }
-    }
-    let mut named_dict = DictData::new();
-    for (name, idx) in &caps.named {
-        let val = match caps.groups.get(*idx).copied().flatten() {
-            Some((s, e)) => Object::from_str(slice[s..e].to_owned()),
-            None => Object::None,
-        };
-        named_dict.insert(DictKey(Object::from_str(name.clone())), val);
-    }
-    inst.dict.borrow_mut().insert(
-        DictKey(Object::from_static("_groups")),
-        Object::new_tuple(groups),
-    );
-    inst.dict.borrow_mut().insert(
-        DictKey(Object::from_static("_spans")),
-        Object::new_tuple(spans),
-    );
-    inst.dict.borrow_mut().insert(
-        DictKey(Object::from_static("_named")),
-        Object::Dict(Rc::new(RefCell::new(named_dict))),
-    );
-    inst.dict.borrow_mut().insert(
-        DictKey(Object::from_static("_full_start")),
-        Object::Int((span0.0 + base_offset) as i64),
-    );
-    inst.dict.borrow_mut().insert(
-        DictKey(Object::from_static("_full_end")),
-        Object::Int((span0.1 + base_offset) as i64),
-    );
-    Object::Instance(Rc::new(inst))
-}
-
-fn char_index_to_byte(s: &str, n: usize) -> usize {
-    for (count, (i, _)) in s.char_indices().enumerate() {
-        if count == n {
-            return i;
-        }
-    }
-    s.len()
-}
-
-fn re_match(args: &[Object]) -> Result<Object, RuntimeError> {
-    run_match(args, true, false)
-}
-
-fn re_search(args: &[Object]) -> Result<Object, RuntimeError> {
-    run_match(args, false, false)
-}
-
-fn re_fullmatch(args: &[Object]) -> Result<Object, RuntimeError> {
-    run_match(args, true, true)
-}
-
-#[allow(dead_code)]
-fn make_match(
-    pat: &str,
-    text: &str,
-    caps: &Captures<'_>,
-    re: &Regex,
-    base_offset: usize,
-) -> Object {
-    let inst = PyInstance::new(match_class());
-    let m0 = caps.get(0).expect("at least one capture");
-    inst.dict.borrow_mut().insert(
-        DictKey(Object::from_static("string")),
-        Object::from_str(text.to_owned()),
-    );
-    inst.dict.borrow_mut().insert(
-        DictKey(Object::from_static("re")),
-        Object::from_str(pat.to_owned()),
-    );
-    inst.dict.borrow_mut().insert(
-        DictKey(Object::from_static("pos")),
-        Object::Int(base_offset as i64),
-    );
-    inst.dict.borrow_mut().insert(
-        DictKey(Object::from_static("endpos")),
-        Object::Int(text.len() as i64),
-    );
-    let mut groups: Vec<Object> = Vec::new();
-    for i in 0..caps.len() {
-        match caps.get(i) {
-            Some(m) => groups.push(Object::from_str(m.as_str().to_owned())),
-            None => groups.push(Object::None),
-        }
-    }
-    let mut spans: Vec<Object> = Vec::new();
-    for i in 0..caps.len() {
-        match caps.get(i) {
-            Some(m) => spans.push(Object::new_tuple(vec![
-                Object::Int((m.start() + base_offset) as i64),
-                Object::Int((m.end() + base_offset) as i64),
-            ])),
-            None => spans.push(Object::new_tuple(vec![Object::Int(-1), Object::Int(-1)])),
-        }
-    }
-    let mut named = DictData::new();
-    for name in re.capture_names().flatten() {
-        let val = caps
-            .name(name)
-            .map(|m| Object::from_str(m.as_str().to_owned()))
-            .unwrap_or(Object::None);
-        named.insert(DictKey(Object::from_str(name.to_owned())), val);
-    }
-    inst.dict.borrow_mut().insert(
-        DictKey(Object::from_static("_groups")),
-        Object::new_tuple(groups),
-    );
-    inst.dict.borrow_mut().insert(
-        DictKey(Object::from_static("_spans")),
-        Object::new_tuple(spans),
-    );
-    inst.dict.borrow_mut().insert(
-        DictKey(Object::from_static("_named")),
-        Object::Dict(Rc::new(RefCell::new(named))),
-    );
-    inst.dict.borrow_mut().insert(
-        DictKey(Object::from_static("_full_start")),
-        Object::Int((m0.start() + base_offset) as i64),
-    );
-    inst.dict.borrow_mut().insert(
-        DictKey(Object::from_static("_full_end")),
-        Object::Int((m0.end() + base_offset) as i64),
-    );
-    Object::Instance(Rc::new(inst))
-}
-
-fn match_methods() -> Vec<(&'static str, Object)> {
-    vec![
-        ("group", b("group", match_group)),
-        ("groups", b("groups", match_groups_method)),
-        ("groupdict", b("groupdict", match_groupdict)),
-        ("start", b("start", match_start)),
-        ("end", b("end", match_end)),
-        ("span", b("span", match_span)),
-    ]
-}
-
-fn match_self(args: &[Object]) -> Result<Rc<PyInstance>, RuntimeError> {
-    match args.first() {
-        Some(Object::Instance(i)) if i.class.name == "Match" => Ok(i.clone()),
-        _ => Err(type_error("expected Match receiver")),
-    }
-}
-
-fn match_group(args: &[Object]) -> Result<Object, RuntimeError> {
-    let m = match_self(args)?;
-    let groups = m
-        .dict
-        .borrow()
-        .get(&DictKey(Object::from_static("_groups")))
-        .cloned();
-    let named = m
-        .dict
-        .borrow()
-        .get(&DictKey(Object::from_static("_named")))
-        .cloned();
-    let groups_tuple = match groups {
-        Some(Object::Tuple(t)) => t,
-        _ => return Err(type_error("invalid Match groups")),
-    };
-    let lookup = |idx: &Object| -> Result<Object, RuntimeError> {
-        match idx {
-            Object::Int(i) => groups_tuple
-                .get(*i as usize)
-                .cloned()
-                .ok_or_else(|| value_error("no such group")),
-            Object::Str(s) => match named {
-                Some(Object::Dict(ref d)) => d
-                    .borrow()
-                    .get(&DictKey(Object::from_str(s.to_string())))
-                    .cloned()
-                    .ok_or_else(|| value_error("no such group")),
-                _ => Err(value_error("no named groups")),
-            },
-            _ => Err(type_error("group key must be int or str")),
-        }
-    };
-    let arg_indices = &args[1..];
-    if arg_indices.is_empty() {
-        return Ok(groups_tuple.first().cloned().unwrap_or(Object::None));
-    }
-    if arg_indices.len() == 1 {
-        return lookup(&arg_indices[0]);
-    }
-    let mut out = Vec::with_capacity(arg_indices.len());
-    for a in arg_indices {
-        out.push(lookup(a)?);
-    }
-    Ok(Object::new_tuple(out))
-}
-
-fn match_groups_method(args: &[Object]) -> Result<Object, RuntimeError> {
-    let m = match_self(args)?;
-    let groups = m
-        .dict
-        .borrow()
-        .get(&DictKey(Object::from_static("_groups")))
-        .cloned();
-    let default = args.get(1).cloned().unwrap_or(Object::None);
-    match groups {
-        Some(Object::Tuple(t)) => {
-            let out: Vec<Object> = t
-                .iter()
-                .skip(1)
-                .cloned()
-                .map(|v| {
-                    if matches!(v, Object::None) {
-                        default.clone()
-                    } else {
-                        v
-                    }
-                })
-                .collect();
-            Ok(Object::new_tuple(out))
-        }
-        _ => Err(type_error("invalid Match groups")),
-    }
-}
-
-fn match_groupdict(args: &[Object]) -> Result<Object, RuntimeError> {
-    let m = match_self(args)?;
-    let named = m
-        .dict
-        .borrow()
-        .get(&DictKey(Object::from_static("_named")))
-        .cloned();
-    match named {
-        Some(Object::Dict(d)) => Ok(Object::Dict(d.clone())),
-        _ => Ok(Object::new_dict()),
-    }
-}
-
-fn match_start(args: &[Object]) -> Result<Object, RuntimeError> {
-    let m = match_self(args)?;
-    let idx = args.get(1).cloned().unwrap_or(Object::Int(0));
-    let i = match idx {
-        Object::Int(i) => i,
-        _ => return Err(type_error("start() expected int")),
-    };
-    let spans = m
-        .dict
-        .borrow()
-        .get(&DictKey(Object::from_static("_spans")))
-        .cloned();
-    match spans {
-        Some(Object::Tuple(spans)) => match spans.get(i as usize) {
-            Some(Object::Tuple(t)) => Ok(t[0].clone()),
-            _ => Err(value_error("no such group")),
-        },
-        _ => Err(type_error("invalid Match spans")),
-    }
-}
-
-fn match_end(args: &[Object]) -> Result<Object, RuntimeError> {
-    let m = match_self(args)?;
-    let idx = args.get(1).cloned().unwrap_or(Object::Int(0));
-    let i = match idx {
-        Object::Int(i) => i,
-        _ => return Err(type_error("end() expected int")),
-    };
-    let spans = m
-        .dict
-        .borrow()
-        .get(&DictKey(Object::from_static("_spans")))
-        .cloned();
-    match spans {
-        Some(Object::Tuple(spans)) => match spans.get(i as usize) {
-            Some(Object::Tuple(t)) => Ok(t[1].clone()),
-            _ => Err(value_error("no such group")),
-        },
-        _ => Err(type_error("invalid Match spans")),
-    }
-}
-
-fn match_span(args: &[Object]) -> Result<Object, RuntimeError> {
-    let m = match_self(args)?;
-    let idx = args.get(1).cloned().unwrap_or(Object::Int(0));
-    let i = match idx {
-        Object::Int(i) => i,
-        _ => return Err(type_error("span() expected int")),
-    };
-    let spans = m
-        .dict
-        .borrow()
-        .get(&DictKey(Object::from_static("_spans")))
-        .cloned();
-    match spans {
-        Some(Object::Tuple(spans)) => spans
-            .get(i as usize)
-            .cloned()
-            .ok_or_else(|| value_error("no such group")),
-        _ => Err(type_error("invalid Match spans")),
-    }
-}
-
-fn re_findall(args: &[Object]) -> Result<Object, RuntimeError> {
-    let (pat, default_flags) =
-        extract_pattern(args.first().ok_or_else(|| type_error("expected pattern"))?)?;
-    let text = match args.get(1) {
-        Some(Object::Str(s)) => s.to_string(),
-        _ => return Err(type_error("expected str")),
-    };
-    let flags = match args.get(2) {
-        Some(Object::Int(i)) => *i,
-        _ => default_flags,
-    };
-    let mut out = Vec::new();
-    let mut on_match = |_s: usize, _e: usize, groups: &[Option<(usize, usize)>]| {
-        let has_groups = groups.len() > 1;
-        if has_groups {
-            let group_count = groups.len() - 1;
-            if group_count == 1 {
-                let s = groups[1].map_or(String::new(), |(s, e)| text[s..e].to_owned());
-                out.push(Object::from_str(s));
-            } else {
-                let mut tup = Vec::with_capacity(group_count);
-                for g in groups.iter().skip(1).take(group_count) {
-                    let s = g.map_or(String::new(), |(s, e)| text[s..e].to_owned());
-                    tup.push(Object::from_str(s));
-                }
-                out.push(Object::new_tuple(tup));
-            }
-        } else {
-            let s = groups[0].map_or(String::new(), |(s, e)| text[s..e].to_owned());
-            out.push(Object::from_str(s));
-        }
-    };
-    iter_all_matches(&pat, flags, &text, &mut on_match)?;
-    Ok(Object::new_list(out))
-}
-
-fn re_finditer(args: &[Object]) -> Result<Object, RuntimeError> {
-    let (pat, default_flags) =
-        extract_pattern(args.first().ok_or_else(|| type_error("expected pattern"))?)?;
-    let text = match args.get(1) {
-        Some(Object::Str(s)) => s.to_string(),
-        _ => return Err(type_error("expected str")),
-    };
-    let flags = match args.get(2) {
-        Some(Object::Int(i)) => *i,
-        _ => default_flags,
-    };
-    let mut out = Vec::new();
-    let mut consume_groups = |start: usize, _end: usize, groups: &[Option<(usize, usize)>]| {
-        let groups_vec = groups.to_vec();
-        let _ = start;
-        let caps = DualCaptures {
-            groups: groups_vec,
-            named: Vec::new(),
-        };
-        out.push(make_match_from_captured(&pat, &text, &caps, &text, 0));
-    };
-    iter_all_matches(&pat, flags, &text, &mut consume_groups)?;
-    Ok(Object::new_list(out))
-}
-
-/// Walk every non-overlapping match in ``text`` and invoke ``f``
-/// with the byte span and capture groups. Falls back to the
-/// ``fancy_regex`` engine if the base ``regex`` can't compile the
-/// pattern.
-fn iter_all_matches(
-    pat: &str,
-    flags: i64,
-    text: &str,
-    f: &mut dyn FnMut(usize, usize, &[Option<(usize, usize)>]),
-) -> Result<(), RuntimeError> {
-    match compile_pattern(pat, flags) {
-        Ok(re) => {
-            for caps in re.captures_iter(text) {
-                let mut groups = Vec::with_capacity(caps.len());
-                for i in 0..caps.len() {
-                    groups.push(caps.get(i).map(|m| (m.start(), m.end())));
-                }
-                let m = caps.get(0).unwrap();
-                f(m.start(), m.end(), &groups);
-            }
-            Ok(())
-        }
-        Err(_) => {
-            let re = compile_pattern_fancy(pat, flags)?;
-            for caps in re.captures_iter(text) {
-                let caps = caps.map_err(|e| value_error(format!("regex error: {e}")))?;
-                let mut groups = Vec::with_capacity(caps.len());
-                for i in 0..caps.len() {
-                    groups.push(caps.get(i).map(|m| (m.start(), m.end())));
-                }
-                let m = caps.get(0).unwrap();
-                f(m.start(), m.end(), &groups);
-            }
-            Ok(())
-        }
-    }
-}
-
-fn re_sub(args: &[Object]) -> Result<Object, RuntimeError> {
-    let (s, _) = re_sub_impl(args)?;
-    Ok(Object::from_str(s))
-}
-
-fn re_subn(args: &[Object]) -> Result<Object, RuntimeError> {
-    let (s, n) = re_sub_impl(args)?;
-    Ok(Object::new_tuple(vec![Object::from_str(s), Object::Int(n)]))
-}
-
-fn re_sub_impl(args: &[Object]) -> Result<(String, i64), RuntimeError> {
-    let (pat, default_flags) =
-        extract_pattern(args.first().ok_or_else(|| type_error("expected pattern"))?)?;
-    let repl = match args.get(1) {
-        Some(Object::Str(s)) => s.to_string(),
-        Some(Object::Function(_)) | Some(Object::Builtin(_)) | Some(Object::BoundMethod(_)) => {
-            // ``re.sub`` with a callable replacement requires
-            // calling back into the VM. The VM intercepts the
-            // ``sub`` builtin (see ``do_re_sub_call`` in
-            // ``lib.rs``) and routes those calls itself, so the
-            // pure-data path here only services the string form.
-            return Err(type_error(
-                "internal: callable re.sub should be handled at the VM dispatch layer",
-            ));
-        }
-        _ => return Err(type_error("repl must be str or callable")),
-    };
-    let text = match args.get(2) {
-        Some(Object::Str(s)) => s.to_string(),
-        _ => return Err(type_error("expected str")),
-    };
-    let count = match args.get(3) {
-        Some(Object::Int(i)) => *i,
-        _ => 0,
-    };
-    let flags = match args.get(4) {
-        Some(Object::Int(i)) => *i,
-        _ => default_flags,
-    };
-    let mut out = String::new();
-    let mut last_end = 0usize;
-    let mut replacements = 0i64;
-    let mut on_match = |s: usize, e: usize, groups: &[Option<(usize, usize)>]| {
-        if count > 0 && replacements >= count {
-            return;
-        }
-        out.push_str(&text[last_end..s]);
-        out.push_str(&expand_replacement_from_groups(&repl, groups, &text));
-        last_end = e;
-        replacements += 1;
-    };
-    iter_all_matches(&pat, flags, &text, &mut on_match)?;
-    out.push_str(&text[last_end..]);
-    Ok((out, replacements))
-}
-
-/// Same expansion rules as ``expand_replacement`` but driven by
-/// pre-extracted group spans rather than a regex ``Captures``.
-fn expand_replacement_from_groups(
-    repl: &str,
-    groups: &[Option<(usize, usize)>],
-    text: &str,
-) -> String {
-    let mut out = String::new();
-    let bytes = repl.as_bytes();
-    let mut i = 0;
-    while i < bytes.len() {
-        if bytes[i] == b'\\' && i + 1 < bytes.len() {
-            let next = bytes[i + 1];
-            if next.is_ascii_digit() {
-                let idx = (next - b'0') as usize;
-                if let Some(Some((s, e))) = groups.get(idx).copied() {
-                    out.push_str(&text[s..e]);
-                }
-                i += 2;
-            } else if next == b'g' && i + 2 < bytes.len() && bytes[i + 2] == b'<' {
-                let close = bytes[i + 3..]
-                    .iter()
-                    .position(|b| *b == b'>')
-                    .map(|p| i + 3 + p);
-                if let Some(end) = close {
-                    let name = &repl[i + 3..end];
-                    if let Ok(n) = name.parse::<usize>() {
-                        if let Some(Some((s, e))) = groups.get(n).copied() {
-                            out.push_str(&text[s..e]);
-                        }
-                    }
-                    i = end + 1;
-                    continue;
-                }
-                out.push('\\');
-                i += 1;
-            } else if next == b'n' {
-                out.push('\n');
-                i += 2;
-            } else if next == b't' {
-                out.push('\t');
-                i += 2;
-            } else if next == b'\\' {
-                out.push('\\');
-                i += 2;
-            } else {
-                out.push('\\');
-                out.push(next as char);
-                i += 2;
-            }
-        } else {
-            out.push(bytes[i] as char);
-            i += 1;
-        }
-    }
-    out
-}
-
-/// Expand `\1` / `\g<name>` etc. inside a `re.sub` replacement.
-#[allow(dead_code)]
-fn expand_replacement(repl: &str, caps: &Captures<'_>) -> String {
-    let mut out = String::new();
-    let bytes = repl.as_bytes();
-    let mut i = 0;
-    while i < bytes.len() {
-        if bytes[i] == b'\\' && i + 1 < bytes.len() {
-            let next = bytes[i + 1];
-            if next.is_ascii_digit() {
-                let idx = (next - b'0') as usize;
-                if let Some(m) = caps.get(idx) {
-                    out.push_str(m.as_str());
-                }
-                i += 2;
-            } else if next == b'g' && i + 2 < bytes.len() && bytes[i + 2] == b'<' {
-                let close = bytes[i + 3..]
-                    .iter()
-                    .position(|b| *b == b'>')
-                    .map(|p| i + 3 + p);
-                if let Some(end) = close {
-                    let name = &repl[i + 3..end];
-                    if let Ok(n) = name.parse::<usize>() {
-                        if let Some(m) = caps.get(n) {
-                            out.push_str(m.as_str());
-                        }
-                    } else if let Some(m) = caps.name(name) {
-                        out.push_str(m.as_str());
-                    }
-                    i = end + 1;
-                    continue;
-                }
-                out.push('\\');
-                i += 1;
-            } else if next == b'n' {
-                out.push('\n');
-                i += 2;
-            } else if next == b't' {
-                out.push('\t');
-                i += 2;
-            } else if next == b'\\' {
-                out.push('\\');
-                i += 2;
-            } else {
-                out.push('\\');
-                out.push(next as char);
-                i += 2;
-            }
-        } else {
-            let ch_len = if bytes[i] < 0x80 { 1 } else { 1 };
-            out.push(bytes[i] as char);
-            i += ch_len;
-        }
-    }
-    out
-}
-
-fn re_split(args: &[Object]) -> Result<Object, RuntimeError> {
-    let (pat, default_flags) =
-        extract_pattern(args.first().ok_or_else(|| type_error("expected pattern"))?)?;
-    let text = match args.get(1) {
-        Some(Object::Str(s)) => s.to_string(),
-        _ => return Err(type_error("expected str")),
-    };
-    let maxsplit = match args.get(2) {
-        Some(Object::Int(i)) => *i,
-        _ => 0,
-    };
-    let flags = match args.get(3) {
-        Some(Object::Int(i)) => *i,
-        _ => default_flags,
-    };
-    let re = compile_pattern(&pat, flags)?;
-    let mut out = Vec::new();
-    let mut last_end = 0;
-    for (splits, caps) in re.captures_iter(&text).enumerate() {
-        if maxsplit > 0 && splits as i64 >= maxsplit {
-            break;
-        }
-        let m = caps.get(0).expect("capture 0");
-        out.push(Object::from_str(text[last_end..m.start()].to_owned()));
-        // Include captured groups as separate output elements (Python
-        // semantics).
-        for i in 1..caps.len() {
-            match caps.get(i) {
-                Some(g) => out.push(Object::from_str(g.as_str().to_owned())),
-                None => out.push(Object::None),
-            }
-        }
-        last_end = m.end();
-    }
-    out.push(Object::from_str(text[last_end..].to_owned()));
-    Ok(Object::new_list(out))
-}
diff --git a/crates/weavepy-vm/src/stdlib/sre_mod.rs b/crates/weavepy-vm/src/stdlib/sre_mod.rs
new file mode 100644
index 0000000..deb821a
--- /dev/null
+++ b/crates/weavepy-vm/src/stdlib/sre_mod.rs
@@ -0,0 +1,1557 @@
+//! The native `_sre` module: WeavePy's faithful port of CPython's
+//! secret-labs regular-expression engine (RFC 0035).
+//!
+//! This is a direct, line-for-line translation of the backtracking
+//! matcher in CPython 3.13's `Modules/_sre/sre_lib.h` (the `SRE(match)`
+//! / `SRE(count)` / `SRE(charset)` / `SRE(search)` templated
+//! functions). It consumes the exact same compiled int-code emitted by
+//! the frozen Python `re._compiler`, so behaviour — including
+//! lookaround, backreferences, atomic groups, possessive quantifiers,
+//! conditional groups and the precise greedy/lazy backtracking order —
+//! matches CPython.
+//!
+//! The public Python surface (`Pattern` / `Match` objects, `sub`,
+//! `split`, `finditer`, …) lives in the frozen `re` package; this
+//! module only exposes the primitive matching core plus the
+//! case-folding helpers the compiler needs.
+//!
+//! Strings are matched over code-point arrays, so every position
+//! returned (group spans, `pos`, `endpos`) is a Python code-point
+//! index, exactly like CPython. Byte patterns are matched over the raw
+//! byte values (each byte widened to a `u32`).
+
+use crate::error::{runtime_error, type_error, value_error, RuntimeError};
+use crate::import::ModuleCache;
+use crate::object::{BuiltinFn, DictData, DictKey, Object, PyModule};
+use crate::sync::Rc;
+use crate::sync::RefCell;
+
+// ---------------------------------------------------------------------------
+// Constants (mirrors re/_constants.py and sre_constants.h)
+// ---------------------------------------------------------------------------
+
+pub const MAGIC: i64 = 20_230_612;
+pub const CODESIZE: i64 = 4;
+/// `SRE_MAXREPEAT` — the "unlimited" sentinel for `{m,}` style repeats.
+const MAXREPEAT: u32 = 4_294_967_295;
+const MAXREPEAT_I64: i64 = 4_294_967_295;
+/// `SRE_MAXGROUPS`.
+const MAXGROUPS: i64 = 2_147_483_647 / 2;
+
+// Opcodes — indices into re/_constants.py OPCODES (after trimming
+// MIN_REPEAT / MAX_REPEAT, which never reach the compiled code).
+const OP_FAILURE: u32 = 0;
+const OP_SUCCESS: u32 = 1;
+const OP_ANY: u32 = 2;
+const OP_ANY_ALL: u32 = 3;
+const OP_ASSERT: u32 = 4;
+const OP_ASSERT_NOT: u32 = 5;
+const OP_AT: u32 = 6;
+const OP_BRANCH: u32 = 7;
+const OP_CATEGORY: u32 = 8;
+const OP_CHARSET: u32 = 9;
+const OP_BIGCHARSET: u32 = 10;
+const OP_GROUPREF: u32 = 11;
+const OP_GROUPREF_EXISTS: u32 = 12;
+const OP_IN: u32 = 13;
+const OP_INFO: u32 = 14;
+const OP_JUMP: u32 = 15;
+const OP_LITERAL: u32 = 16;
+const OP_MARK: u32 = 17;
+const OP_MAX_UNTIL: u32 = 18;
+const OP_MIN_UNTIL: u32 = 19;
+const OP_NOT_LITERAL: u32 = 20;
+const OP_NEGATE: u32 = 21;
+const OP_RANGE: u32 = 22;
+const OP_REPEAT: u32 = 23;
+const OP_REPEAT_ONE: u32 = 24;
+#[allow(dead_code)] // appears only in parser output, never in compiled code
+const OP_SUBPATTERN: u32 = 25;
+const OP_MIN_REPEAT_ONE: u32 = 26;
+const OP_ATOMIC_GROUP: u32 = 27;
+const OP_POSSESSIVE_REPEAT: u32 = 28;
+const OP_POSSESSIVE_REPEAT_ONE: u32 = 29;
+const OP_GROUPREF_IGNORE: u32 = 30;
+const OP_IN_IGNORE: u32 = 31;
+const OP_LITERAL_IGNORE: u32 = 32;
+const OP_NOT_LITERAL_IGNORE: u32 = 33;
+const OP_GROUPREF_LOC_IGNORE: u32 = 34;
+const OP_IN_LOC_IGNORE: u32 = 35;
+const OP_LITERAL_LOC_IGNORE: u32 = 36;
+const OP_NOT_LITERAL_LOC_IGNORE: u32 = 37;
+const OP_GROUPREF_UNI_IGNORE: u32 = 38;
+const OP_IN_UNI_IGNORE: u32 = 39;
+const OP_LITERAL_UNI_IGNORE: u32 = 40;
+const OP_NOT_LITERAL_UNI_IGNORE: u32 = 41;
+const OP_RANGE_UNI_IGNORE: u32 = 42;
+
+// AT codes.
+const AT_BEGINNING: u32 = 0;
+const AT_BEGINNING_LINE: u32 = 1;
+const AT_BEGINNING_STRING: u32 = 2;
+const AT_BOUNDARY: u32 = 3;
+const AT_NON_BOUNDARY: u32 = 4;
+const AT_END: u32 = 5;
+const AT_END_LINE: u32 = 6;
+const AT_END_STRING: u32 = 7;
+const AT_LOC_BOUNDARY: u32 = 8;
+const AT_LOC_NON_BOUNDARY: u32 = 9;
+const AT_UNI_BOUNDARY: u32 = 10;
+const AT_UNI_NON_BOUNDARY: u32 = 11;
+
+// Category codes.
+const CAT_DIGIT: u32 = 0;
+const CAT_NOT_DIGIT: u32 = 1;
+const CAT_SPACE: u32 = 2;
+const CAT_NOT_SPACE: u32 = 3;
+const CAT_WORD: u32 = 4;
+const CAT_NOT_WORD: u32 = 5;
+const CAT_LINEBREAK: u32 = 6;
+const CAT_NOT_LINEBREAK: u32 = 7;
+const CAT_LOC_WORD: u32 = 8;
+const CAT_LOC_NOT_WORD: u32 = 9;
+const CAT_UNI_DIGIT: u32 = 10;
+const CAT_UNI_NOT_DIGIT: u32 = 11;
+const CAT_UNI_SPACE: u32 = 12;
+const CAT_UNI_NOT_SPACE: u32 = 13;
+const CAT_UNI_WORD: u32 = 14;
+const CAT_UNI_NOT_WORD: u32 = 15;
+const CAT_UNI_LINEBREAK: u32 = 16;
+const CAT_UNI_NOT_LINEBREAK: u32 = 17;
+
+/// Guards against unbounded native recursion on pathological patterns
+/// (CPython uses a heap-allocated context stack; we recurse on the
+/// Rust stack and bail out with an error rather than crash).
+const MAX_DEPTH: u32 = 10_000;
+
+// ---------------------------------------------------------------------------
+// Compiled-pattern registry
+// ---------------------------------------------------------------------------
+
+struct CompiledCode {
+    code: Vec<u32>,
+    groups: usize,
+}
+
+thread_local! {
+    static REGISTRY: RefCell<Vec<Rc<CompiledCode>>> = const { RefCell::new(Vec::new()) };
+}
+
+// ---------------------------------------------------------------------------
+// Case-folding helpers
+// ---------------------------------------------------------------------------
+
+#[inline]
+fn lower_ascii(ch: u32) -> u32 {
+    if (u32::from(b'A')..=u32::from(b'Z')).contains(&ch) {
+        ch + 32
+    } else {
+        ch
+    }
+}
+
+#[inline]
+fn upper_ascii(ch: u32) -> u32 {
+    if (u32::from(b'a')..=u32::from(b'z')).contains(&ch) {
+        ch - 32
+    } else {
+        ch
+    }
+}
+
+fn lower_unicode(ch: u32) -> u32 {
+    match char::from_u32(ch) {
+        Some(c) => c.to_lowercase().next().map_or(ch, |c| c as u32),
+        None => ch,
+    }
+}
+
+fn upper_unicode(ch: u32) -> u32 {
+    match char::from_u32(ch) {
+        Some(c) => c.to_uppercase().next().map_or(ch, |c| c as u32),
+        None => ch,
+    }
+}
+
+// We approximate locale case folding with ASCII (CPython's behaviour is
+// locale-dependent and LOCALE tests are largely skipped).
+#[inline]
+fn lower_locale(ch: u32) -> u32 {
+    lower_ascii(ch)
+}
+#[inline]
+fn upper_locale(ch: u32) -> u32 {
+    upper_ascii(ch)
+}
+
+#[inline]
+fn char_loc_ignore(pat: u32, ch: u32) -> bool {
+    ch == pat || lower_locale(ch) == pat || upper_locale(ch) == pat
+}
+
+fn unicode_iscased(ch: u32) -> bool {
+    let lo = lower_unicode(ch);
+    let up = upper_unicode(ch);
+    ch != lo || ch != up
+}
+
+fn ascii_iscased(ch: u32) -> bool {
+    (u32::from(b'a')..=u32::from(b'z')).contains(&ch) || (u32::from(b'A')..=u32::from(b'Z')).contains(&ch)
+}
+
+// ---------------------------------------------------------------------------
+// Character classification (mirrors the SRE_IS_* / SRE_UNI_IS_* macros)
+// ---------------------------------------------------------------------------
+
+#[inline]
+fn is_linebreak(ch: u32) -> bool {
+    ch == u32::from(b'\n')
+}
+
+#[inline]
+fn ascii_digit(ch: u32) -> bool {
+    ch < 128 && (u32::from(b'0')..=u32::from(b'9')).contains(&ch)
+}
+
+#[inline]
+fn ascii_space(ch: u32) -> bool {
+    // ' ', \t, \n, \r, \v, \f
+    ch < 128 && matches!(ch, 0x20 | 0x09 | 0x0a | 0x0b | 0x0c | 0x0d)
+}
+
+#[inline]
+fn ascii_word(ch: u32) -> bool {
+    ch < 128
+        && (ascii_digit(ch)
+            || (u32::from(b'a')..=u32::from(b'z')).contains(&ch)
+            || (u32::from(b'A')..=u32::from(b'Z')).contains(&ch)
+            || ch == u32::from(b'_'))
+}
+
+#[inline]
+fn loc_word(ch: u32) -> bool {
+    // Latin-1 alphanumeric or underscore.
+    if ch == u32::from(b'_') {
+        return true;
+    }
+    match char::from_u32(ch) {
+        Some(c) => ch < 256 && (c.is_alphanumeric()),
+        None => false,
+    }
+}
+
+fn uni_digit(ch: u32) -> bool {
+    match char::from_u32(ch) {
+        // Py_UNICODE_ISDECIMAL — decimal digits: ASCII `0`-`9` plus the
+        // Unicode Decimal_Number (Nd) category for non-ASCII scripts.
+        Some(c) => c.is_ascii_digit() || nd_digit(c),
+        None => false,
+    }
+}
+
+/// Best-effort Unicode decimal-digit (general category Nd) test for the
+/// common non-ASCII blocks, so `\d` matches like CPython without a full
+/// Unicode database.
+fn nd_digit(c: char) -> bool {
+    let v = c as u32;
+    matches!(v,
+        0x0660..=0x0669 // Arabic-Indic
+        | 0x06F0..=0x06F9 // Extended Arabic-Indic
+        | 0x07C0..=0x07C9 // NKo
+        | 0x0966..=0x096F // Devanagari
+        | 0x09E6..=0x09EF // Bengali
+        | 0x0A66..=0x0A6F // Gurmukhi
+        | 0x0AE6..=0x0AEF // Gujarati
+        | 0x0B66..=0x0B6F // Oriya
+        | 0x0BE6..=0x0BEF // Tamil
+        | 0x0C66..=0x0C6F // Telugu
+        | 0x0CE6..=0x0CEF // Kannada
+        | 0x0D66..=0x0D6F // Malayalam
+        | 0x0E50..=0x0E59 // Thai
+        | 0x0ED0..=0x0ED9 // Lao
+        | 0x0F20..=0x0F29 // Tibetan
+        | 0xFF10..=0xFF19 // Fullwidth
+    )
+}
+
+fn uni_space(ch: u32) -> bool {
+    match char::from_u32(ch) {
+        Some(c) => c.is_whitespace(),
+        None => false,
+    }
+}
+
+fn uni_word(ch: u32) -> bool {
+    if ch == u32::from(b'_') {
+        return true;
+    }
+    match char::from_u32(ch) {
+        Some(c) => c.is_alphanumeric(),
+        None => false,
+    }
+}
+
+fn uni_linebreak(ch: u32) -> bool {
+    matches!(
+        ch,
+        0x0a | 0x0b | 0x0c | 0x0d | 0x1c | 0x1d | 0x1e | 0x85 | 0x2028 | 0x2029
+    )
+}
+
+fn category(chcode: u32, ch: u32) -> bool {
+    match chcode {
+        CAT_DIGIT => ascii_digit(ch),
+        CAT_NOT_DIGIT => !ascii_digit(ch),
+        CAT_SPACE => ascii_space(ch),
+        CAT_NOT_SPACE => !ascii_space(ch),
+        CAT_WORD => ascii_word(ch),
+        CAT_NOT_WORD => !ascii_word(ch),
+        CAT_LINEBREAK => is_linebreak(ch),
+        CAT_NOT_LINEBREAK => !is_linebreak(ch),
+        CAT_LOC_WORD => loc_word(ch),
+        CAT_LOC_NOT_WORD => !loc_word(ch),
+        CAT_UNI_DIGIT => uni_digit(ch),
+        CAT_UNI_NOT_DIGIT => !uni_digit(ch),
+        CAT_UNI_SPACE => uni_space(ch),
+        CAT_UNI_NOT_SPACE => !uni_space(ch),
+        CAT_UNI_WORD => uni_word(ch),
+        CAT_UNI_NOT_WORD => !uni_word(ch),
+        CAT_UNI_LINEBREAK => uni_linebreak(ch),
+        CAT_UNI_NOT_LINEBREAK => !uni_linebreak(ch),
+        _ => false,
+    }
+}
+
+// ---------------------------------------------------------------------------
+// The matcher
+// ---------------------------------------------------------------------------
+
+#[derive(Clone)]
+struct MarkSnapshot {
+    marks: Vec<isize>,
+    lastmark: isize,
+    lastindex: isize,
+}
+
+struct RepeatCtx {
+    count: isize,
+    /// Index in `code` of the REPEAT op's first argument (skip slot).
+    pattern: usize,
+    last_ptr: isize,
+    prev: Option<usize>,
+}
+
+struct Matcher<'a> {
+    s: &'a [u32],
+    code: &'a [u32],
+    beginning: usize,
+    start: usize,
+    end: usize,
+    ptr: usize,
+    marks: Vec<isize>,
+    lastmark: isize,
+    lastindex: isize,
+    must_advance: bool,
+    match_all: bool,
+    repeats: Vec<RepeatCtx>,
+    cur_repeat: Option<usize>,
+    depth: u32,
+}
+
+impl<'a> Matcher<'a> {
+    fn new(s: &'a [u32], code: &'a [u32], groups: usize) -> Self {
+        Matcher {
+            s,
+            code,
+            beginning: 0,
+            start: 0,
+            end: s.len(),
+            ptr: 0,
+            marks: vec![-1; groups * 2],
+            lastmark: -1,
+            lastindex: -1,
+            must_advance: false,
+            match_all: false,
+            repeats: Vec::new(),
+            cur_repeat: None,
+            depth: 0,
+        }
+    }
+
+    fn reset_capture(&mut self) {
+        for m in self.marks.iter_mut() {
+            *m = -1;
+        }
+        self.lastmark = -1;
+        self.lastindex = -1;
+    }
+
+    #[inline]
+    fn snapshot(&self) -> MarkSnapshot {
+        MarkSnapshot {
+            marks: self.marks.clone(),
+            lastmark: self.lastmark,
+            lastindex: self.lastindex,
+        }
+    }
+
+    #[inline]
+    fn restore(&mut self, snap: &MarkSnapshot) {
+        self.marks.clone_from(&snap.marks);
+        self.lastmark = snap.lastmark;
+        self.lastindex = snap.lastindex;
+    }
+
+    fn at(&self, ptr: usize, atcode: u32) -> bool {
+        let s = self.s;
+        match atcode {
+            AT_BEGINNING | AT_BEGINNING_STRING => ptr == self.beginning,
+            AT_BEGINNING_LINE => ptr == self.beginning || is_linebreak(s[ptr - 1]),
+            AT_END => (self.end - ptr == 1 && is_linebreak(s[ptr])) || ptr == self.end,
+            AT_END_LINE => ptr == self.end || is_linebreak(s[ptr]),
+            AT_END_STRING => ptr == self.end,
+            AT_BOUNDARY => self.word_boundary(ptr, ascii_word),
+            AT_NON_BOUNDARY => !self.word_boundary(ptr, ascii_word),
+            AT_LOC_BOUNDARY => self.word_boundary(ptr, loc_word),
+            AT_LOC_NON_BOUNDARY => !self.word_boundary(ptr, loc_word),
+            AT_UNI_BOUNDARY => self.word_boundary(ptr, uni_word),
+            AT_UNI_NON_BOUNDARY => !self.word_boundary(ptr, uni_word),
+            _ => false,
+        }
+    }
+
+    #[inline]
+    fn word_boundary(&self, ptr: usize, is_word: fn(u32) -> bool) -> bool {
+        if self.beginning == self.end {
+            return false;
+        }
+        let thatp = ptr > self.beginning && is_word(self.s[ptr - 1]);
+        let thisp = ptr < self.end && is_word(self.s[ptr]);
+        thisp != thatp
+    }
+
+    /// `SRE(charset)` — is `ch` a member of the set starting at `set`?
+    fn charset(&self, mut set: usize, ch: u32) -> bool {
+        let code = self.code;
+        let mut ok = true;
+        loop {
+            let op = code[set];
+            set += 1;
+            match op {
+                OP_FAILURE => return !ok,
+                OP_LITERAL => {
+                    if ch == code[set] {
+                        return ok;
+                    }
+                    set += 1;
+                }
+                OP_CATEGORY => {
+                    if category(code[set], ch) {
+                        return ok;
+                    }
+                    set += 1;
+                }
+                OP_CHARSET => {
+                    // <CHARSET> <bitmap: 8 words>
+                    if ch < 256 && (code[set + (ch / 32) as usize] & (1u32 << (ch & 31))) != 0 {
+                        return ok;
+                    }
+                    set += 8;
+                }
+                OP_RANGE => {
+                    if code[set] <= ch && ch <= code[set + 1] {
+                        return ok;
+                    }
+                    set += 2;
+                }
+                OP_RANGE_UNI_IGNORE => {
+                    if code[set] <= ch && ch <= code[set + 1] {
+                        return ok;
+                    }
+                    let uch = upper_unicode(ch);
+                    if code[set] <= uch && uch <= code[set + 1] {
+                        return ok;
+                    }
+                    set += 2;
+                }
+                OP_NEGATE => ok = !ok,
+                OP_BIGCHARSET => {
+                    // <BIGCHARSET> <blockcount> <256 block-indices as bytes
+                    //   packed into 64 words> <blocks: blockcount * 8 words>
+                    let count = code[set] as usize;
+                    set += 1;
+                    let block: i64 = if ch < 0x10000 {
+                        // 256 indices stored as bytes, little/native order
+                        // inside u32 words.
+                        let byte_index = (ch >> 8) as usize;
+                        let word = code[set + byte_index / 4];
+                        i64::from((word >> ((byte_index % 4) * 8)) & 0xff)
+                    } else {
+                        -1
+                    };
+                    set += 64;
+                    if block >= 0 {
+                        let block = block as usize;
+                        let bit = (block * 256 + (ch as usize & 255)) as u32;
+                        if (code[set + (bit / 32) as usize] & (1u32 << (bit & 31))) != 0 {
+                            return ok;
+                        }
+                    }
+                    set += count * 8;
+                }
+                _ => return false,
+            }
+        }
+    }
+
+    fn charset_loc_ignore(&self, set: usize, ch: u32) -> bool {
+        let lo = lower_locale(ch);
+        if self.charset(set, lo) {
+            return true;
+        }
+        let up = upper_locale(ch);
+        up != lo && self.charset(set, up)
+    }
+
+    /// `SRE(count)` — count repeated single-character matches of the
+    /// item at `pat`, starting at `self.ptr`, up to `maxcount`.
+    fn count(&mut self, pat: usize, maxcount: u32) -> Result<usize, RuntimeError> {
+        let code = self.code;
+        let ptr = self.ptr;
+        let mut end = self.end;
+        if maxcount != MAXREPEAT && (maxcount as usize) < end - ptr {
+            end = ptr + maxcount as usize;
+        }
+        let s = self.s;
+        let op = code[pat];
+        let counted = match op {
+            OP_IN => {
+                let mut p = ptr;
+                while p < end && self.charset(pat + 2, s[p]) {
+                    p += 1;
+                }
+                p - ptr
+            }
+            OP_ANY => {
+                let mut p = ptr;
+                while p < end && !is_linebreak(s[p]) {
+                    p += 1;
+                }
+                p - ptr
+            }
+            OP_ANY_ALL => end - ptr,
+            OP_LITERAL => {
+                let chr = code[pat + 1];
+                let mut p = ptr;
+                while p < end && s[p] == chr {
+                    p += 1;
+                }
+                p - ptr
+            }
+            OP_NOT_LITERAL => {
+                let chr = code[pat + 1];
+                let mut p = ptr;
+                while p < end && s[p] != chr {
+                    p += 1;
+                }
+                p - ptr
+            }
+            OP_LITERAL_IGNORE => {
+                let chr = code[pat + 1];
+                let mut p = ptr;
+                while p < end && lower_ascii(s[p]) == chr {
+                    p += 1;
+                }
+                p - ptr
+            }
+            OP_LITERAL_UNI_IGNORE => {
+                let chr = code[pat + 1];
+                let mut p = ptr;
+                while p < end && lower_unicode(s[p]) == chr {
+                    p += 1;
+                }
+                p - ptr
+            }
+            OP_LITERAL_LOC_IGNORE => {
+                let chr = code[pat + 1];
+                let mut p = ptr;
+                while p < end && char_loc_ignore(chr, s[p]) {
+                    p += 1;
+                }
+                p - ptr
+            }
+            OP_NOT_LITERAL_IGNORE => {
+                let chr = code[pat + 1];
+                let mut p = ptr;
+                while p < end && lower_ascii(s[p]) != chr {
+                    p += 1;
+                }
+                p - ptr
+            }
+            OP_NOT_LITERAL_UNI_IGNORE => {
+                let chr = code[pat + 1];
+                let mut p = ptr;
+                while p < end && lower_unicode(s[p]) != chr {
+                    p += 1;
+                }
+                p - ptr
+            }
+            OP_NOT_LITERAL_LOC_IGNORE => {
+                let chr = code[pat + 1];
+                let mut p = ptr;
+                while p < end && !char_loc_ignore(chr, s[p]) {
+                    p += 1;
+                }
+                p - ptr
+            }
+            _ => {
+                // General case: repeatedly match the subpattern.
+                self.ptr = ptr;
+                while self.ptr < end {
+                    let matched = self.do_match(pat, false)?;
+                    if !matched {
+                        break;
+                    }
+                }
+                let n = self.ptr - ptr;
+                self.ptr = ptr;
+                return Ok(n);
+            }
+        };
+        self.ptr = ptr;
+        Ok(counted)
+    }
+
+    /// `SRE(match)` — try to match the pattern at `pat` against the
+    /// string starting at `self.ptr`. Returns whether it matched; on
+    /// success `self.ptr` holds the end position.
+    fn do_match(&mut self, pat: usize, toplevel: bool) -> Result<bool, RuntimeError> {
+        self.depth += 1;
+        if self.depth > MAX_DEPTH {
+            self.depth -= 1;
+            return Err(runtime_error(
+                "internal: regular expression recursion limit exceeded",
+            ));
+        }
+        let r = self.do_match_inner(pat, toplevel);
+        self.depth -= 1;
+        r
+    }
+
+    fn do_match_inner(&mut self, mut pat: usize, toplevel: bool) -> Result<bool, RuntimeError> {
+        let code = self.code;
+        let mut ptr = self.ptr;
+        let end = self.end;
+
+        // Optimization info block at the head of the (sub)pattern.
+        if code[pat] == OP_INFO {
+            let min = code[pat + 3] as usize;
+            if min != 0 && end - ptr < min {
+                return Ok(false);
+            }
+            pat += code[pat + 1] as usize + 1;
+        }
+
+        loop {
+            let op = code[pat];
+            pat += 1;
+            match op {
+                OP_MARK => {
+                    let i = code[pat] as usize;
+                    let ii = i as isize;
+                    if i & 1 != 0 {
+                        self.lastindex = (i / 2 + 1) as isize;
+                    }
+                    if ii > self.lastmark {
+                        let mut j = self.lastmark + 1;
+                        while j < ii {
+                            self.marks[j as usize] = -1;
+                            j += 1;
+                        }
+                        self.lastmark = ii;
+                    }
+                    self.marks[i] = ptr as isize;
+                    pat += 1;
+                }
+                OP_LITERAL => {
+                    if ptr >= end || self.s[ptr] != code[pat] {
+                        return Ok(false);
+                    }
+                    pat += 1;
+                    ptr += 1;
+                }
+                OP_NOT_LITERAL => {
+                    if ptr >= end || self.s[ptr] == code[pat] {
+                        return Ok(false);
+                    }
+                    pat += 1;
+                    ptr += 1;
+                }
+                OP_SUCCESS => {
+                    if toplevel
+                        && ((self.match_all && ptr != self.end)
+                            || (self.must_advance && ptr == self.start))
+                    {
+                        return Ok(false);
+                    }
+                    self.ptr = ptr;
+                    return Ok(true);
+                }
+                OP_AT => {
+                    if !self.at(ptr, code[pat]) {
+                        return Ok(false);
+                    }
+                    pat += 1;
+                }
+                OP_CATEGORY => {
+                    if ptr >= end || !category(code[pat], self.s[ptr]) {
+                        return Ok(false);
+                    }
+                    pat += 1;
+                    ptr += 1;
+                }
+                OP_ANY => {
+                    if ptr >= end || is_linebreak(self.s[ptr]) {
+                        return Ok(false);
+                    }
+                    ptr += 1;
+                }
+                OP_ANY_ALL => {
+                    if ptr >= end {
+                        return Ok(false);
+                    }
+                    ptr += 1;
+                }
+                OP_IN => {
+                    if ptr >= end || !self.charset(pat + 1, self.s[ptr]) {
+                        return Ok(false);
+                    }
+                    pat += code[pat] as usize;
+                    ptr += 1;
+                }
+                OP_LITERAL_IGNORE => {
+                    if ptr >= end || lower_ascii(self.s[ptr]) != code[pat] {
+                        return Ok(false);
+                    }
+                    pat += 1;
+                    ptr += 1;
+                }
+                OP_LITERAL_UNI_IGNORE => {
+                    if ptr >= end || lower_unicode(self.s[ptr]) != code[pat] {
+                        return Ok(false);
+                    }
+                    pat += 1;
+                    ptr += 1;
+                }
+                OP_LITERAL_LOC_IGNORE => {
+                    if ptr >= end || !char_loc_ignore(code[pat], self.s[ptr]) {
+                        return Ok(false);
+                    }
+                    pat += 1;
+                    ptr += 1;
+                }
+                OP_NOT_LITERAL_IGNORE => {
+                    if ptr >= end || lower_ascii(self.s[ptr]) == code[pat] {
+                        return Ok(false);
+                    }
+                    pat += 1;
+                    ptr += 1;
+                }
+                OP_NOT_LITERAL_UNI_IGNORE => {
+                    if ptr >= end || lower_unicode(self.s[ptr]) == code[pat] {
+                        return Ok(false);
+                    }
+                    pat += 1;
+                    ptr += 1;
+                }
+                OP_NOT_LITERAL_LOC_IGNORE => {
+                    if ptr >= end || char_loc_ignore(code[pat], self.s[ptr]) {
+                        return Ok(false);
+                    }
+                    pat += 1;
+                    ptr += 1;
+                }
+                OP_IN_IGNORE => {
+                    if ptr >= end || !self.charset(pat + 1, lower_ascii(self.s[ptr])) {
+                        return Ok(false);
+                    }
+                    pat += code[pat] as usize;
+                    ptr += 1;
+                }
+                OP_IN_UNI_IGNORE => {
+                    if ptr >= end || !self.charset(pat + 1, lower_unicode(self.s[ptr])) {
+                        return Ok(false);
+                    }
+                    pat += code[pat] as usize;
+                    ptr += 1;
+                }
+                OP_IN_LOC_IGNORE => {
+                    if ptr >= end || !self.charset_loc_ignore(pat + 1, self.s[ptr]) {
+                        return Ok(false);
+                    }
+                    pat += code[pat] as usize;
+                    ptr += 1;
+                }
+                OP_JUMP | OP_INFO => {
+                    pat += code[pat] as usize;
+                }
+                OP_BRANCH => {
+                    let save = self.snapshot();
+                    while code[pat] != 0 {
+                        // Fast skip when the branch can't possibly match.
+                        if code[pat + 1] == OP_LITERAL
+                            && (ptr >= end || self.s[ptr] != code[pat + 2])
+                        {
+                            pat += code[pat] as usize;
+                            continue;
+                        }
+                        if code[pat + 1] == OP_IN
+                            && (ptr >= end || !self.charset(pat + 3, self.s[ptr]))
+                        {
+                            pat += code[pat] as usize;
+                            continue;
+                        }
+                        self.ptr = ptr;
+                        // Each alternative flows through its trailing JUMP to
+                        // the BRANCH tail and on to the final SUCCESS, so it
+                        // inherits `toplevel` (CPython `DO_JUMP`) — otherwise
+                        // the `must_advance`/`match_all` guards are skipped for
+                        // top-level alternations (e.g. `fullmatch('a|ab','ab')`).
+                        if self.do_match(pat + 1, toplevel)? {
+                            return Ok(true);
+                        }
+                        self.restore(&save);
+                        pat += code[pat] as usize;
+                    }
+                    return Ok(false);
+                }
+                OP_REPEAT_ONE => {
+                    let skip = code[pat] as usize;
+                    let pmin = code[pat + 1] as usize;
+                    let pmax = code[pat + 2];
+                    if pmin > end - ptr {
+                        return Ok(false);
+                    }
+                    self.ptr = ptr;
+                    let cnt0 = self.count(pat + 3, pmax)?;
+                    let mut cnt = cnt0 as isize;
+                    if (cnt as usize) < pmin {
+                        return Ok(false);
+                    }
+                    let tail = pat + skip;
+                    let after = ptr + cnt as usize;
+                    if code[tail] == OP_SUCCESS
+                        && after == self.end
+                        && !(toplevel && self.must_advance && after == self.start)
+                    {
+                        self.ptr = after;
+                        return Ok(true);
+                    }
+                    let save = self.snapshot();
+                    let orig = ptr;
+                    let pmin_i = pmin as isize;
+                    if code[tail] == OP_LITERAL {
+                        let chr = code[tail + 1];
+                        loop {
+                            while cnt >= pmin_i && {
+                                let pos = orig + cnt as usize;
+                                pos >= end || self.s[pos] != chr
+                            } {
+                                cnt -= 1;
+                            }
+                            if cnt < pmin_i {
+                                break;
+                            }
+                            let pos = orig + cnt as usize;
+                            self.ptr = pos;
+                            // The tail is the continuation of *this* match,
+                            // so it inherits `toplevel` (CPython `DO_JUMP`)
+                            // — otherwise the trailing SUCCESS would skip the
+                            // empty-match / `must_advance` guard and the
+                            // scanner could loop on a zero-width match.
+                            if self.do_match(tail, toplevel)? {
+                                return Ok(true);
+                            }
+                            self.restore(&save);
+                            cnt -= 1;
+                        }
+                    } else {
+                        while cnt >= pmin_i {
+                            let pos = orig + cnt as usize;
+                            self.ptr = pos;
+                            if self.do_match(tail, toplevel)? {
+                                return Ok(true);
+                            }
+                            self.restore(&save);
+                            cnt -= 1;
+                        }
+                    }
+                    return Ok(false);
+                }
+                OP_MIN_REPEAT_ONE => {
+                    let skip = code[pat] as usize;
+                    let pmin = code[pat + 1] as usize;
+                    let pmax = code[pat + 2];
+                    if pmin > end - ptr {
+                        return Ok(false);
+                    }
+                    self.ptr = ptr;
+                    let mut cnt: isize = 0;
+                    if pmin != 0 {
+                        let r = self.count(pat + 3, code[pat + 1])?;
+                        if r < pmin {
+                            return Ok(false);
+                        }
+                        cnt = r as isize;
+                        ptr += cnt as usize;
+                    }
+                    let tail = pat + skip;
+                    if code[tail] == OP_SUCCESS
+                        && !(toplevel
+                            && ((self.match_all && ptr != self.end)
+                                || (self.must_advance && ptr == self.start)))
+                    {
+                        self.ptr = ptr;
+                        return Ok(true);
+                    }
+                    let save = self.snapshot();
+                    loop {
+                        if !(pmax == MAXREPEAT || (cnt as u32) <= pmax) {
+                            break;
+                        }
+                        self.ptr = ptr;
+                        if self.do_match(tail, toplevel)? {
+                            return Ok(true);
+                        }
+                        self.restore(&save);
+                        self.ptr = ptr;
+                        let r = self.count(pat + 3, 1)?;
+                        if r == 0 {
+                            break;
+                        }
+                        ptr += 1;
+                        cnt += 1;
+                    }
+                    return Ok(false);
+                }
+                OP_POSSESSIVE_REPEAT_ONE => {
+                    let skip = code[pat] as usize;
+                    let pmin = code[pat + 1] as usize;
+                    let pmax = code[pat + 2];
+                    if ptr + pmin > end {
+                        return Ok(false);
+                    }
+                    self.ptr = ptr;
+                    let cnt = self.count(pat + 3, pmax)?;
+                    ptr += cnt;
+                    if cnt < pmin {
+                        return Ok(false);
+                    }
+                    pat += skip;
+                    if code[pat] == OP_SUCCESS
+                        && ptr == self.end
+                        && !(toplevel && self.must_advance && ptr == self.start)
+                    {
+                        self.ptr = ptr;
+                        return Ok(true);
+                    }
+                    // Evaluate the tail in this same frame.
+                }
+                OP_REPEAT => {
+                    let skip = code[pat] as usize;
+                    let rep = RepeatCtx {
+                        count: -1,
+                        pattern: pat,
+                        last_ptr: -1,
+                        prev: self.cur_repeat,
+                    };
+                    let idx = self.repeats.len();
+                    self.repeats.push(rep);
+                    self.cur_repeat = Some(idx);
+                    self.ptr = ptr;
+                    // The MAX_UNTIL/MIN_UNTIL operator (reached via `pat+skip`)
+                    // ultimately continues to the pattern tail and SUCCESS, so
+                    // it inherits `toplevel` (CPython `DO_JUMP`). Forcing it to
+                    // `false` would skip the `must_advance` guard and let the
+                    // scanner loop forever on a zero-width repeat such as
+                    // `(a)*` over an empty match.
+                    let r = self.do_match(pat + skip, toplevel);
+                    self.cur_repeat = self.repeats[idx].prev;
+                    self.repeats.truncate(idx);
+                    return r;
+                }
+                OP_MAX_UNTIL => {
+                    let idx = self
+                        .cur_repeat
+                        .ok_or_else(|| runtime_error("internal: MAX_UNTIL without REPEAT"))?;
+                    self.ptr = ptr;
+                    let count = self.repeats[idx].count + 1;
+                    let rpat = self.repeats[idx].pattern;
+                    let rmin = code[rpat + 1] as isize;
+                    let rmax = code[rpat + 2];
+                    let item = rpat + 3;
+                    if count < rmin {
+                        self.repeats[idx].count = count;
+                        self.ptr = ptr;
+                        // Repeated-item matches inherit `toplevel` (CPython
+                        // `DO_JUMP` for JUMP_MAX_UNTIL_1/_2): when the item can
+                        // match empty (e.g. `(a?)*`), the recursion bottoms out
+                        // at the tail SUCCESS, which must still see the
+                        // `must_advance`/`match_all` guards.
+                        if self.do_match(item, toplevel)? {
+                            return Ok(true);
+                        }
+                        self.repeats[idx].count = count - 1;
+                        self.ptr = ptr;
+                        return Ok(false);
+                    }
+                    if (count < rmax as isize || rmax == MAXREPEAT)
+                        && (ptr as isize) != self.repeats[idx].last_ptr
+                    {
+                        self.repeats[idx].count = count;
+                        let save = self.snapshot();
+                        let saved_last = self.repeats[idx].last_ptr;
+                        self.repeats[idx].last_ptr = ptr as isize;
+                        self.ptr = ptr;
+                        if self.do_match(item, toplevel)? {
+                            return Ok(true);
+                        }
+                        self.repeats[idx].last_ptr = saved_last;
+                        self.restore(&save);
+                        self.repeats[idx].count = count - 1;
+                        self.ptr = ptr;
+                    }
+                    let prev = self.repeats[idx].prev;
+                    self.cur_repeat = prev;
+                    self.ptr = ptr;
+                    // Tail continuation inherits `toplevel` (CPython
+                    // `DO_JUMP`) so the trailing SUCCESS still honours the
+                    // `must_advance`/`match_all` guards.
+                    let r = self.do_match(pat, toplevel)?;
+                    self.cur_repeat = Some(idx);
+                    if r {
+                        return Ok(true);
+                    }
+                    self.ptr = ptr;
+                    return Ok(false);
+                }
+                OP_MIN_UNTIL => {
+                    let idx = self
+                        .cur_repeat
+                        .ok_or_else(|| runtime_error("internal: MIN_UNTIL without REPEAT"))?;
+                    self.ptr = ptr;
+                    let count = self.repeats[idx].count + 1;
+                    let rpat = self.repeats[idx].pattern;
+                    let rmin = code[rpat + 1] as isize;
+                    let rmax = code[rpat + 2];
+                    let item = rpat + 3;
+                    if count < rmin {
+                        self.repeats[idx].count = count;
+                        self.ptr = ptr;
+                        // Inherit `toplevel` (CPython `DO_JUMP` JUMP_MIN_UNTIL_1).
+                        if self.do_match(item, toplevel)? {
+                            return Ok(true);
+                        }
+                        self.repeats[idx].count = count - 1;
+                        self.ptr = ptr;
+                        return Ok(false);
+                    }
+                    let prev = self.repeats[idx].prev;
+                    let save = self.snapshot();
+                    self.cur_repeat = prev;
+                    self.ptr = ptr;
+                    let r = self.do_match(pat, toplevel)?;
+                    self.cur_repeat = Some(idx);
+                    if r {
+                        return Ok(true);
+                    }
+                    self.restore(&save);
+                    self.ptr = ptr;
+                    if (count >= rmax as isize && rmax != MAXREPEAT)
+                        || (ptr as isize) == self.repeats[idx].last_ptr
+                    {
+                        return Ok(false);
+                    }
+                    self.repeats[idx].count = count;
+                    let saved_last = self.repeats[idx].last_ptr;
+                    self.repeats[idx].last_ptr = ptr as isize;
+                    self.ptr = ptr;
+                    // Inherit `toplevel` (CPython `DO_JUMP` JUMP_MIN_UNTIL_3).
+                    if self.do_match(item, toplevel)? {
+                        return Ok(true);
+                    }
+                    self.repeats[idx].last_ptr = saved_last;
+                    self.repeats[idx].count = count - 1;
+                    self.ptr = ptr;
+                    return Ok(false);
+                }
+                OP_POSSESSIVE_REPEAT => {
+                    let skip = code[pat] as usize;
+                    let pmin = code[pat + 1] as usize;
+                    let pmax = code[pat + 2];
+                    self.ptr = ptr;
+                    let rep = RepeatCtx {
+                        count: -1,
+                        pattern: usize::MAX,
+                        last_ptr: -1,
+                        prev: self.cur_repeat,
+                    };
+                    let idx = self.repeats.len();
+                    self.repeats.push(rep);
+                    self.cur_repeat = Some(idx);
+                    let body = pat + 3;
+                    let mut cnt: usize = 0;
+                    let mut failed = false;
+                    while cnt < pmin {
+                        if self.do_match(body, false)? {
+                            cnt += 1;
+                        } else {
+                            failed = true;
+                            break;
+                        }
+                    }
+                    if failed {
+                        self.ptr = ptr;
+                        self.cur_repeat = self.repeats[idx].prev;
+                        self.repeats.truncate(idx);
+                        return Ok(false);
+                    }
+                    let mut prev_ptr: Option<usize> = None;
+                    loop {
+                        let can_more = (pmax == MAXREPEAT || (cnt as u32) < pmax)
+                            && Some(self.ptr) != prev_ptr;
+                        if !can_more {
+                            break;
+                        }
+                        let save = self.snapshot();
+                        prev_ptr = Some(self.ptr);
+                        if self.do_match(body, false)? {
+                            cnt += 1;
+                        } else {
+                            self.restore(&save);
+                            self.ptr = prev_ptr.unwrap();
+                            break;
+                        }
+                    }
+                    self.cur_repeat = self.repeats[idx].prev;
+                    self.repeats.truncate(idx);
+                    pat += skip + 1;
+                    ptr = self.ptr;
+                    continue;
+                }
+                OP_ATOMIC_GROUP => {
+                    let skip = code[pat] as usize;
+                    self.ptr = ptr;
+                    if self.do_match(pat + 1, false)? {
+                        pat += skip;
+                        ptr = self.ptr;
+                    } else {
+                        self.ptr = ptr;
+                        return Ok(false);
+                    }
+                }
+                OP_GROUPREF => {
+                    if !self.groupref_match(pat, GroupRefKind::Exact, end, ptr, &mut ptr) {
+                        return Ok(false);
+                    }
+                    pat += 1;
+                }
+                OP_GROUPREF_IGNORE => {
+                    if !self.groupref_match(pat, GroupRefKind::Ascii, end, ptr, &mut ptr) {
+                        return Ok(false);
+                    }
+                    pat += 1;
+                }
+                OP_GROUPREF_UNI_IGNORE => {
+                    if !self.groupref_match(pat, GroupRefKind::Unicode, end, ptr, &mut ptr) {
+                        return Ok(false);
+                    }
+                    pat += 1;
+                }
+                OP_GROUPREF_LOC_IGNORE => {
+                    if !self.groupref_match(pat, GroupRefKind::Locale, end, ptr, &mut ptr) {
+                        return Ok(false);
+                    }
+                    pat += 1;
+                }
+                OP_GROUPREF_EXISTS => {
+                    let g = code[pat] as usize;
+                    let skip = code[pat + 1] as usize;
+                    let groupref = (g * 2) as isize;
+                    let set = if groupref >= self.lastmark {
+                        false
+                    } else {
+                        let p = self.marks[groupref as usize];
+                        let e = self.marks[groupref as usize + 1];
+                        !(p < 0 || e < 0 || e < p)
+                    };
+                    if set {
+                        pat += 2;
+                    } else {
+                        pat += skip;
+                    }
+                }
+                OP_ASSERT => {
+                    let skip = code[pat] as usize;
+                    let back = code[pat + 1] as usize;
+                    if ptr - self.beginning < back {
+                        return Ok(false);
+                    }
+                    self.ptr = ptr - back;
+                    if !self.do_match(pat + 2, false)? {
+                        return Ok(false);
+                    }
+                    pat += skip;
+                }
+                OP_ASSERT_NOT => {
+                    let skip = code[pat] as usize;
+                    let back = code[pat + 1] as usize;
+                    if ptr - self.beginning >= back {
+                        self.ptr = ptr - back;
+                        let save = self.snapshot();
+                        let matched = self.do_match(pat + 2, false)?;
+                        self.restore(&save);
+                        if matched {
+                            return Ok(false);
+                        }
+                    }
+                    pat += skip;
+                }
+                OP_FAILURE => return Ok(false),
+                _ => {
+                    return Err(value_error(format!(
+                        "internal: unsupported sre opcode {op}"
+                    )));
+                }
+            }
+        }
+    }
+
+    fn groupref_match(
+        &self,
+        pat: usize,
+        kind: GroupRefKind,
+        end: usize,
+        start_ptr: usize,
+        ptr_out: &mut usize,
+    ) -> bool {
+        let g = self.code[pat] as usize;
+        let groupref = (g * 2) as isize;
+        if groupref >= self.lastmark {
+            return false;
+        }
+        let p0 = self.marks[groupref as usize];
+        let e0 = self.marks[groupref as usize + 1];
+        if p0 < 0 || e0 < 0 || e0 < p0 {
+            return false;
+        }
+        let mut p = p0 as usize;
+        let e = e0 as usize;
+        let mut ptr = start_ptr;
+        while p < e {
+            if ptr >= end {
+                return false;
+            }
+            let a = self.s[ptr];
+            let b = self.s[p];
+            let eq = match kind {
+                GroupRefKind::Exact => a == b,
+                GroupRefKind::Ascii => lower_ascii(a) == lower_ascii(b),
+                GroupRefKind::Unicode => lower_unicode(a) == lower_unicode(b),
+                GroupRefKind::Locale => lower_locale(a) == lower_locale(b),
+            };
+            if !eq {
+                return false;
+            }
+            p += 1;
+            ptr += 1;
+        }
+        *ptr_out = ptr;
+        true
+    }
+
+    /// `SRE(search)` — scan for the leftmost match at or after
+    /// `self.start`. Returns the start position of the match on success.
+    fn search(&mut self) -> Result<Option<usize>, RuntimeError> {
+        // Determine where the real pattern starts (after any INFO block)
+        // for the anchored-pattern fast reject.
+        let mut p = 0usize;
+        let mut min = 0usize;
+        if self.code[0] == OP_INFO {
+            min = self.code[3] as usize;
+            p = 1 + self.code[1] as usize;
+        }
+        let anchored = self.code.get(p) == Some(&OP_AT)
+            && matches!(
+                self.code.get(p + 1).copied(),
+                Some(AT_BEGINNING) | Some(AT_BEGINNING_STRING)
+            );
+
+        let mut ptr = self.start;
+        let mut first = true;
+        loop {
+            if min != 0 && self.end.saturating_sub(ptr) < min {
+                return Ok(None);
+            }
+            self.start = ptr;
+            self.ptr = ptr;
+            self.reset_capture();
+            let matched = self.do_match(0, true)?;
+            if first {
+                self.must_advance = false;
+                first = false;
+            }
+            if matched {
+                return Ok(Some(ptr));
+            }
+            if anchored {
+                return Ok(None);
+            }
+            if ptr >= self.end {
+                return Ok(None);
+            }
+            ptr += 1;
+        }
+    }
+}
+
+#[derive(Clone, Copy)]
+enum GroupRefKind {
+    Exact,
+    Ascii,
+    Unicode,
+    Locale,
+}
+
+// ---------------------------------------------------------------------------
+// Module functions
+// ---------------------------------------------------------------------------
+
+fn arg_i64(args: &[Object], i: usize, name: &str) -> Result<i64, RuntimeError> {
+    args.get(i)
+        .and_then(|o| o.as_i64())
+        .ok_or_else(|| type_error(format!("_sre: expected int for {name}")))
+}
+
+/// Read a Python sequence of small ints into a `Vec<u32>`.
+fn codeseq_to_vec(obj: &Object) -> Result<Vec<u32>, RuntimeError> {
+    let collect = |items: &[Object]| -> Result<Vec<u32>, RuntimeError> {
+        let mut out = Vec::with_capacity(items.len());
+        for it in items {
+            let v = it
+                .as_i64()
+                .ok_or_else(|| type_error("_sre.compile: code must be a sequence of ints"))?;
+            if !(0..=i64::from(u32::MAX)).contains(&v) {
+                return Err(value_error("_sre.compile: code value out of range"));
+            }
+            out.push(v as u32);
+        }
+        Ok(out)
+    };
+    match obj {
+        Object::List(l) => collect(&l.borrow()),
+        Object::Tuple(t) => collect(t),
+        _ => Err(type_error("_sre.compile: code must be a list or tuple")),
+    }
+}
+
+/// Decode the subject into code points (str) or byte values (bytes).
+fn subject_to_vec(obj: &Object) -> Result<Vec<u32>, RuntimeError> {
+    match obj {
+        Object::Str(s) => Ok(s.chars().map(|c| c as u32).collect()),
+        Object::Bytes(b) => Ok(b.iter().map(|&x| u32::from(x)).collect()),
+        Object::ByteArray(b) => Ok(b.borrow().iter().map(|&x| u32::from(x)).collect()),
+        _ => Err(type_error("expected string or bytes-like object")),
+    }
+}
+
+/// `_sre.compile(code, groups)` → an integer handle into the registry.
+fn sre_compile(args: &[Object]) -> Result<Object, RuntimeError> {
+    let code = codeseq_to_vec(
+        args.first()
+            .ok_or_else(|| type_error("_sre.compile: code"))?,
+    )?;
+    let groups = arg_i64(args, 1, "groups")?.max(0) as usize;
+    let handle = REGISTRY.with(|reg| {
+        let mut reg = reg.borrow_mut();
+        reg.push(Rc::new(CompiledCode { code, groups }));
+        reg.len() - 1
+    });
+    Ok(Object::Int(handle as i64))
+}
+
+/// `_sre.exec(handle, string, pos, endpos, mode, must_advance)`.
+///
+/// Returns `None` on no match, otherwise a tuple
+/// `(start, end, lastindex, marks)` where `marks` is a tuple of
+/// `2 * groups` code-point indices (`-1` for an unset group).
+fn sre_exec(args: &[Object]) -> Result<Object, RuntimeError> {
+    let handle = arg_i64(args, 0, "handle")? as usize;
+    let cc = REGISTRY.with(|reg| reg.borrow().get(handle).cloned());
+    let cc = cc.ok_or_else(|| value_error("_sre.exec: invalid pattern handle"))?;
+    let subject = subject_to_vec(args.get(1).ok_or_else(|| type_error("_sre.exec: string"))?)?;
+    let slen = subject.len() as i64;
+    let pos = arg_i64(args, 2, "pos")?.clamp(0, slen) as usize;
+    let endpos = arg_i64(args, 3, "endpos")?.clamp(0, slen) as usize;
+    let mode = arg_i64(args, 4, "mode")?;
+    let must_advance = args
+        .get(5)
+        .map(|o| o.as_i64().unwrap_or(0) != 0)
+        .unwrap_or(false);
+
+    if pos > endpos {
+        return Ok(Object::None);
+    }
+
+    let mut m = Matcher::new(&subject, &cc.code, cc.groups);
+    m.end = endpos;
+    m.start = pos;
+    m.ptr = pos;
+    m.must_advance = must_advance;
+
+    let (mstart, ok) = match mode {
+        // 1 = match (anchored at pos)
+        1 => {
+            let r = m.do_match(0, true)?;
+            (pos, r)
+        }
+        // 2 = fullmatch (anchored + must reach endpos)
+        2 => {
+            m.match_all = true;
+            let r = m.do_match(0, true)?;
+            (pos, r)
+        }
+        // 0 = search
+        _ => match m.search()? {
+            Some(s) => (s, true),
+            None => (0, false),
+        },
+    };
+
+    if !ok {
+        return Ok(Object::None);
+    }
+
+    let mend = m.ptr;
+    let mut marks_out: Vec<Object> = Vec::with_capacity(cc.groups * 2);
+    for i in 0..cc.groups * 2 {
+        let v = if (i as isize) <= m.lastmark {
+            m.marks[i]
+        } else {
+            -1
+        };
+        marks_out.push(Object::Int(v as i64));
+    }
+    Ok(Object::new_tuple(vec![
+        Object::Int(mstart as i64),
+        Object::Int(mend as i64),
+        Object::Int(m.lastindex as i64),
+        Object::new_tuple(marks_out),
+    ]))
+}
+
+fn sre_ascii_tolower(args: &[Object]) -> Result<Object, RuntimeError> {
+    Ok(Object::Int(
+        i64::from(lower_ascii(arg_i64(args, 0, "ch")? as u32))
+    ))
+}
+fn sre_ascii_iscased(args: &[Object]) -> Result<Object, RuntimeError> {
+    Ok(Object::Bool(ascii_iscased(arg_i64(args, 0, "ch")? as u32)))
+}
+fn sre_unicode_tolower(args: &[Object]) -> Result<Object, RuntimeError> {
+    Ok(Object::Int(
+        i64::from(lower_unicode(arg_i64(args, 0, "ch")? as u32))
+    ))
+}
+fn sre_unicode_iscased(args: &[Object]) -> Result<Object, RuntimeError> {
+    Ok(Object::Bool(
+        unicode_iscased(arg_i64(args, 0, "ch")? as u32),
+    ))
+}
+fn sre_getcodesize(_args: &[Object]) -> Result<Object, RuntimeError> {
+    Ok(Object::Int(CODESIZE))
+}
+/// `_sre.getlower(ch, flags)`.
+fn sre_getlower(args: &[Object]) -> Result<Object, RuntimeError> {
+    let ch = arg_i64(args, 0, "ch")? as u32;
+    let flags = arg_i64(args, 1, "flags").unwrap_or(0);
+    // SRE_FLAG_LOCALE = 4, SRE_FLAG_UNICODE = 32
+    let lowered = if flags & 4 != 0 {
+        lower_locale(ch)
+    } else if flags & 32 != 0 {
+        lower_unicode(ch)
+    } else {
+        lower_ascii(ch)
+    };
+    Ok(Object::Int(i64::from(lowered)))
+}
+
+fn b(name: &'static str, body: fn(&[Object]) -> Result<Object, RuntimeError>) -> Object {
+    Object::Builtin(Rc::new(BuiltinFn {
+        name,
+        call: Box::new(body),
+        call_kw: None,
+    }))
+}
+
+pub fn build(_cache: &ModuleCache) -> Rc<PyModule> {
+    let dict = Rc::new(RefCell::new(DictData::new()));
+    {
+        let mut d = dict.borrow_mut();
+        d.insert(
+            DictKey(Object::from_static("__name__")),
+            Object::from_static("_sre"),
+        );
+        d.insert(
+            DictKey(Object::from_static("__doc__")),
+            Object::from_static("WeavePy native SRE regular-expression core (RFC 0035)."),
+        );
+        d.insert(DictKey(Object::from_static("MAGIC")), Object::Int(MAGIC));
+        d.insert(
+            DictKey(Object::from_static("CODESIZE")),
+            Object::Int(CODESIZE),
+        );
+        d.insert(
+            DictKey(Object::from_static("MAXREPEAT")),
+            Object::Int(MAXREPEAT_I64),
+        );
+        d.insert(
+            DictKey(Object::from_static("MAXGROUPS")),
+            Object::Int(MAXGROUPS),
+        );
+        d.insert(
+            DictKey(Object::from_static("compile")),
+            b("compile", sre_compile),
+        );
+        d.insert(DictKey(Object::from_static("exec")), b("exec", sre_exec));
+        d.insert(
+            DictKey(Object::from_static("ascii_tolower")),
+            b("ascii_tolower", sre_ascii_tolower),
+        );
+        d.insert(
+            DictKey(Object::from_static("ascii_iscased")),
+            b("ascii_iscased", sre_ascii_iscased),
+        );
+        d.insert(
+            DictKey(Object::from_static("unicode_tolower")),
+            b("unicode_tolower", sre_unicode_tolower),
+        );
+        d.insert(
+            DictKey(Object::from_static("unicode_iscased")),
+            b("unicode_iscased", sre_unicode_iscased),
+        );
+        d.insert(
+            DictKey(Object::from_static("getcodesize")),
+            b("getcodesize", sre_getcodesize),
+        );
+        d.insert(
+            DictKey(Object::from_static("getlower")),
+            b("getlower", sre_getlower),
+        );
+    }
+    Rc::new(PyModule {
+        name: "_sre".to_owned(),
+        filename: None,
+        dict,
+    })
+}
diff --git a/crates/weavepy-vm/src/stdlib/thread_real.rs b/crates/weavepy-vm/src/stdlib/thread_real.rs
index a54dfd4..16cea09 100644
--- a/crates/weavepy-vm/src/stdlib/thread_real.rs
+++ b/crates/weavepy-vm/src/stdlib/thread_real.rs
@@ -277,6 +277,7 @@ fn make_lock_object(lock: Arc<RealLock>) -> Object {
     let inst = Rc::new(PyInstance {
         class: lock_type(),
         dict,
+        native: None,
     });
     Object::Instance(inst)
 }
@@ -376,6 +377,7 @@ fn make_rlock_object(rlock: Arc<RealRLock>) -> Object {
     let inst = Rc::new(PyInstance {
         class: rlock_type(),
         dict,
+        native: None,
     });
     Object::Instance(inst)
 }
diff --git a/crates/weavepy-vm/src/stdlib/weakref_real.rs b/crates/weavepy-vm/src/stdlib/weakref_real.rs
index 7f8b485..8d4511d 100644
--- a/crates/weavepy-vm/src/stdlib/weakref_real.rs
+++ b/crates/weavepy-vm/src/stdlib/weakref_real.rs
@@ -304,7 +304,11 @@ fn make_ref_object(target: Object, callback: Option<Object>, kind_tag: u8) -> Ob
         );
     }
 
-    Object::Instance(Rc::new(PyInstance { class, dict }))
+    Object::Instance(Rc::new(PyInstance {
+        class,
+        dict,
+        native: None,
+    }))
 }
 
 /// `_weakref.getweakrefcount(obj)` — number of live weakrefs
diff --git a/crates/weavepy-vm/src/types.rs b/crates/weavepy-vm/src/types.rs
index 29800b6..75fbc41 100644
--- a/crates/weavepy-vm/src/types.rs
+++ b/crates/weavepy-vm/src/types.rs
@@ -221,6 +221,15 @@ fn compute_c3(
 pub struct PyInstance {
     pub class: Rc<TypeObject>,
     pub dict: Rc<RefCell<DictData>>,
+    /// For instances of a subclass of an immutable built-in
+    /// (`int`, `str`, `float`, `bytes`, `tuple`, …) this holds the
+    /// underlying primitive value the instance *is* — the moral
+    /// equivalent of CPython storing the C-level value in the object
+    /// struct. `None` for ordinary objects. Set once at construction
+    /// (the wrapped builtins are themselves immutable) and unwrapped
+    /// by the numeric / comparison / hashing / conversion fast paths
+    /// so e.g. `class C(int)` instances behave like real ints.
+    pub native: Option<Object>,
 }
 
 impl PyInstance {
@@ -228,6 +237,17 @@ impl PyInstance {
         Self {
             class,
             dict: Rc::new(RefCell::new(DictData::new())),
+            native: None,
+        }
+    }
+
+    /// Build an instance that wraps a primitive `native` value
+    /// (subclass of `int`/`str`/…).
+    pub fn with_native(class: Rc<TypeObject>, native: Object) -> Self {
+        Self {
+            class,
+            dict: Rc::new(RefCell::new(DictData::new())),
+            native: Some(native),
         }
     }
 }
diff --git a/docs/rfcs/0035-faithful-re-sre-unicode.md b/docs/rfcs/0035-faithful-re-sre-unicode.md
new file mode 100644
index 0000000..0e38bd3
--- /dev/null
+++ b/docs/rfcs/0035-faithful-re-sre-unicode.md
@@ -0,0 +1,435 @@
+# RFC 0035: A faithful `re`/`_sre` engine — porting CPython's secret-labs matcher, with the Unicode and `%`-formatting fidelity it demands
+
+- **Status**: Accepted
+- **Authors**: WeavePy authors
+- **Created**: 2026-05-31
+- **Tracking issue**: TBD
+- **Builds on**: RFC 0012 (modules/imports + frozen stdlib), RFC 0015
+  (object-model completion), RFC 0020 (real-Python frozen stdlib),
+  RFC 0023 (drop-in parity), RFC 0030 (pure-Python drop-in), RFC 0034
+  (the CPython test suite as a live harness — `test_re.py` is exactly
+  the kind of file it was built to run)
+
+## Summary
+
+WeavePy's `re` module was, until this RFC, a **translation layer**: a
+native Rust shim (`stdlib/re.rs`) that parsed a subset of Python's
+regex syntax and forwarded it to the Rust [`regex`] and [`fancy_regex`]
+crates. That bought us working `re.match`/`search`/`findall` for the
+common cases quickly, but it was a parallel implementation of a
+notoriously corner-heavy language. Anywhere CPython's behaviour is
+defined by *the secret-labs engine itself* — backtracking order, the
+exact group-reset semantics on alternation, zero-width-match
+bookkeeping, `\b` at the bytes/str boundary, the precise text of a
+`re.error`, `Pattern`/`Match` repr and attribute surface, the
+`re.Scanner` undocumented-but-tested API — a re-implementation can only
+approximate, and the approximations are exactly what `Lib/test/test_re.py`
+exists to catch.
+
+This RFC replaces the shim with **CPython's own engine**:
+
+1. A **native `_sre` core** (`crates/weavepy-vm/src/stdlib/sre_mod.rs`)
+   — a faithful port of `Modules/_sre/sre_lib.h`'s backtracking VM
+   (`SRE(match)`, `SRE(search)`, `SRE(count)`, `SRE(charset)`), the
+   case-folding/character-classification primitives, and the module
+   surface real code touches (`compile`, the compiled-pattern `exec`
+   trampoline, `ascii_tolower`/`unicode_tolower`,
+   `ascii_iscased`/`unicode_iscased`, `getlower`, `getcodesize`, plus
+   `MAGIC`/`CODESIZE`). Compiled programs live in a thread-local
+   registry keyed by an integer handle, so the Rust core stays free of
+   Python-object lifetime concerns.
+2. The **frozen Python `re` package**, ported from CPython 3.13:
+   `re/__init__.py`, `re._constants`, `re._casefix`, `re._parser`,
+   `re._compiler`, all essentially verbatim, plus a small
+   `re._engine` that builds the `Pattern`/`Match` classes on top of
+   the native core (CPython builds those in C; we build them in frozen
+   Python over a minimal native primitive — see *Detailed design §3*).
+   The pre-3.11 deprecated aliases `sre_constants`/`sre_parse`/
+   `sre_compile` are shipped as re-export shims.
+3. The **Unicode, `str`/`bytes`, and `%`-formatting fidelity** that the
+   real `re` parser/compiler turned out to depend on — and which the
+   shim had hidden. Porting CPython's own `_parser.py` surfaced a tail
+   of interpreter gaps (`int` subclassing, slice deletion/assignment,
+   the legacy `__getitem__` iteration protocol, faithful `repr()`
+   printability, `str(bytes, encoding)`, `\U` escapes, `%`-format
+   dunder dispatch). Each is a general correctness fix that happened to
+   be *forced into the light* by running CPython's code unmodified.
+
+Diff shape: **~6K lines added** — the `_sre` Rust core (~1.6K), the five
+frozen `re` submodules + three alias shims (~3K, mostly *CPython's own
+Python* carried verbatim), the interpreter/object-model fixes (~1K
+across the VM/compiler/parser), `tests/regrtest/test_re.py`, the
+module-registry rewiring, and this RFC — against ~1.1K deleted with the
+old `stdlib/re.rs` shim (and its VM-level `re.sub`-callable hook,
+`do_re_sub_callable`), for a net diff of **~5K LOC**.
+
+That the *fidelity* upgrade is also a *smaller* footprint is the whole
+argument: a faithful port reuses CPython's ~3K-line Python parser/
+compiler unchanged and reimplements only the ~1.6K-line C matcher,
+where a from-scratch shim would have to grow toward the full corner-case
+surface line by line and still never reach parity. Compactness here is
+evidence the strategy is right, not that the scope is small.
+
+Mission alignment: `re` is one of the most-imported modules in the
+stdlib, and `test_re.py` is one of CPython's largest single-module test
+files. Running *CPython's engine* rather than an emulation of it is the
+difference between "regex mostly works" and "regex is CPython."
+
+## Motivation
+
+The shim was a liability for three compounding reasons:
+
+- **It was a second implementation of a hard language.** Python's regex
+  dialect is not PCRE and is not the Rust `regex` crate's dialect.
+  Conditional groups `(?(id)yes|no)`, the exact semantics of
+  `\b`/`\B`/`\A`/`\Z`, possessive quantifiers and atomic groups
+  `(?>...)`, the group-state rollback on a failed alternation branch,
+  the rule that an empty match adjacent to a previous match is skipped
+  in `findall`/`finditer`/`sub` (`must_advance`), and the textual
+  content of every `re.error` ("nothing to repeat", "missing ),
+  unterminated subpattern", "redefinition of group name") are all
+  defined operationally by the secret-labs engine. `fancy_regex` makes
+  *different* choices, so any program that depends on Python's choices
+  silently diverged.
+- **It could never run `test_re.py`.** RFC 0034 made CPython's own
+  `Lib/test/test_re.py` runnable in principle; the shim made it
+  unpassable in practice, because the test asserts on engine internals
+  (`sre_compile` output sizes, `Pattern.__repr__`, `re.error` line/col,
+  `Match.regs`, the `_sre.MAGIC`/`CODESIZE` contract that
+  `re._compiler` checks at import).
+- **It diverged from the frozen-stdlib strategy.** RFC 0020's thesis is
+  "ship real CPython Python where we can." `re` is *the* poster child:
+  CPython's `re` is ~90% Python (`_parser`/`_compiler`/`__init__`) over
+  a ~10% C core (`_sre`). Porting the Python verbatim and reimplementing
+  only the C core is both less code and dramatically more faithful than
+  a from-scratch shim.
+
+The cost of inaction was an open-ended tail of "regex behaves subtly
+differently" bugs — the worst kind, because they are silent.
+
+## CPython reference
+
+We track **CPython 3.13**. Specific sources ported or matched:
+
+- **The C core**: `Modules/_sre/sre.c`, `Modules/_sre/sre_lib.h`,
+  `Modules/_sre/sre_constants.h`, `Modules/_sre/sre.h`. The matcher
+  port mirrors the `SRE(match)` opcode dispatch loop, `SRE(search)`'s
+  prefix/charset fast-paths, `SRE(count)` for `REPEAT_ONE`/
+  `MIN_REPEAT_ONE`, and `SRE(charset)`/`SRE(charset_loc_ignore)`. The
+  `_sre.MAGIC` constant (`20230612`) and `CODESIZE` (`4`) are the
+  contract `re._compiler` checks at import.
+- **The Python layer**: `Lib/re/__init__.py`, `Lib/re/_constants.py`,
+  `Lib/re/_casefix.py`, `Lib/re/_parser.py`, `Lib/re/_compiler.py`.
+- **The deprecated aliases**: `Lib/sre_constants.py`, `Lib/sre_parse.py`,
+  `Lib/sre_compile.py` (each a thin "moved to `re._*`" shim since 3.11).
+- **The Unicode/format surface** the engine leans on: the `str`/`bytes`
+  data model (the language reference §3), `str.isprintable`/`repr`
+  (CPython `Objects/unicodeobject.c`'s `unicode_repr` and
+  `Py_UNICODE_ISPRINTABLE`), printf-style `%` formatting
+  (`PyUnicode_Format`), and the `str(object, encoding, errors)`
+  constructor form.
+- **The acceptance test**: `Lib/test/test_re.py` (the parts that don't
+  require `_sre` C-detail refleak hooks).
+
+As with RFC 0034, anything the engine reaches for that we deliberately
+do not model raises the *same* exception CPython would, so an
+unsupported corner reads as the correct error, never a wrong answer.
+
+## Detailed design
+
+### 1 — the native `_sre` core (`stdlib/sre_mod.rs`)
+
+The native module exposes the minimal surface `re._compiler` and
+`re._engine` import:
+
+| symbol | role |
+|---|---|
+| `MAGIC` = `20230612` | version stamp `re._compiler` asserts against `_constants.MAGIC` |
+| `CODESIZE` = `4` | word size of the compiled program (`sizeof(SRE_CODE)`) |
+| `compile(pattern, flags, code, groups, groupindex, indexgroup)` | intern a compiled program into the thread-local registry; returns an integer handle |
+| `exec(handle, string, pos, endpos, …)` | run `SRE(search)`/`SRE(match)` and return the match groups (or `None`) |
+| `ascii_tolower` / `unicode_tolower` | case-fold a single code point |
+| `ascii_iscased` / `unicode_iscased` | "is this code point case-sensitive?" (drives `IGNORECASE`) |
+| `getlower(ch, flags)` | the flag-aware lowercase used by `LITERAL_IGNORE` |
+| `getcodesize()` | `CODESIZE`, as a function (the historical API) |
+
+**The matcher.** `SRE(match)` is a recursive backtracking interpreter
+over the `u32` program emitted by `re._compiler`. The port keeps the
+opcode numbering from `_constants.OPCODES` (so the *Python* compiler and
+the *Rust* matcher agree by construction) and implements the full set
+real patterns reach: `LITERAL`/`NOT_LITERAL`/`LITERAL_IGNORE`/
+`LITERAL_UNI_IGNORE`/`LITERAL_LOC_IGNORE`, `ANY`/`ANY_ALL`, `IN`/
+`IN_IGNORE`/`IN_UNI_IGNORE`/`IN_LOC_IGNORE` (delegating to
+`SRE(charset)`), `BRANCH`, `REPEAT`/`MAX_UNTIL`/`MIN_UNTIL`,
+`REPEAT_ONE`/`MIN_REPEAT_ONE` (with the `SRE(count)` fast path),
+`GROUPREF`/`GROUPREF_IGNORE`/`GROUPREF_EXISTS`, `AT` (the
+anchors/boundaries), `ASSERT`/`ASSERT_NOT` (look-around),
+`MARK`, `JUMP`, `SUCCESS`, `FAILURE`, `INFO`, `ATOMIC_GROUP`/
+`POSSESSIVE_REPEAT`/`POSSESSIVE_REPEAT_ONE`.
+
+**Zero-width correctness — the `toplevel`/`must_advance` invariant.**
+The single subtlest part of the port. CPython threads a `toplevel` flag
+through `SRE(match)` and uses it (with the saved repeat mark) to refuse
+a second *empty* iteration of a repeat or branch tail, which is what
+keeps `re.findall(r'a*', 'aaa')`, `re.split(r'x*', 'axbxc')`, and
+`re.sub(r'', '-', 'abc')` from looping forever or producing the wrong
+split. The port reproduces this exactly: `OP_BRANCH`, `OP_REPEAT`,
+`OP_MAX_UNTIL`, and `OP_MIN_UNTIL` **inherit** the caller's `toplevel`
+into their tail continuations rather than forcing `false` (an early
+draft hard-coded `false` and hung on `a{,3}` against `'aaaaa'`). The
+`REPEAT_ONE` count loop uses signed arithmetic so a decrement past zero
+can't wrap a `usize`.
+
+**Lifetime model.** The Rust matcher never holds a Python object across
+a call. `compile` copies the `code: Vec<u32>` and the group metadata
+into a thread-local `RegistryEntry` and returns its index as a plain
+`int`; `exec` looks the entry up, runs against the subject string/bytes,
+and returns owned results. This sidesteps the GC/borrow questions a
+native `Pattern` object would raise and keeps the core a pure function
+of `(program, subject, pos)`.
+
+### 2 — the frozen Python `re` package
+
+Registered as a frozen package with submodules (the model
+`email`/`importlib` already use):
+
+```
+re                  (package)   <- Lib/re/__init__.py        (re_init.py)
+re._constants       (module)    <- Lib/re/_constants.py      (verbatim)
+re._casefix         (module)    <- Lib/re/_casefix.py        (verbatim)
+re._parser          (module)    <- Lib/re/_parser.py         (verbatim)
+re._compiler        (module)    <- Lib/re/_compiler.py        (≈verbatim)
+re._engine          (module)    <- WeavePy-specific glue (Pattern/Match)
+sre_constants       (module)    <- re-export shim (deprecated alias)
+sre_parse           (module)    <- re-export shim (deprecated alias)
+sre_compile         (module)    <- re-export shim (deprecated alias)
+```
+
+`_constants`, `_casefix`, and `_parser` are CPython 3.13 **verbatim** —
+the whole point is to run CPython's parser, not ours. `_compiler` is
+all-but-verbatim: the only adaptations are where it calls into the C
+core (it gets `MAGIC`/`CODESIZE` from our native `_sre`, and its
+`_bytes_to_codes` helper assembles the `array`-backed program the same
+way, which is what surfaced the `int.byteorder`/`array` plumbing fixes
+below).
+
+### 3 — `Pattern`/`Match` in frozen Python (`re._engine`)
+
+CPython implements `Pattern` and `Match` as **C types** in `_sre`.
+Reproducing those as native Rust objects would mean a second object
+type with its own GC integration, attribute table, and repr — a large
+surface for little gain. Instead `re._engine` defines `Pattern` and
+`Match` as **Python classes** over the native primitive:
+
+- `Pattern` holds the compiled handle, `pattern`/`flags`/`groups`/
+  `groupindex`/`groupdict`, and implements `match`/`fullmatch`/`search`/
+  `findall`/`finditer`/`split`/`sub`/`subn`/`scanner` by calling
+  `_sre.exec` and wrapping results. The scan loop (`_iter`) implements
+  the `must_advance` "skip empty match adjacent to previous" rule in
+  Python, mirroring `_sre`'s `scanner`/`Pattern.finditer`.
+- `Match` exposes `group`/`groups`/`groupdict`/`start`/`end`/`span`/
+  `expand`/`__getitem__`/`regs`/`pos`/`endpos`/`lastindex`/
+  `lastgroup`/`re`/`string`, with CPython's `repr`.
+- **Template expansion** (`re.sub`'s replacement parsing, `\g<name>`
+  and `\1` back-references, and the *callable* `repl` path) lives here
+  in Python — which is why the old VM-level `do_re_sub_callable`
+  interception in `weavepy-vm/src/lib.rs` is **deleted**: a callable
+  `repl` is now just a Python call in `_engine`, exactly as in CPython.
+
+This "C core + Python wrapper class" split is the same shape CPython
+*almost* has (its wrapper is C only for speed); functionally it is
+indistinguishable to user code.
+
+### 4 — interpreter & object-model fixes surfaced by the port
+
+Running CPython's unmodified `_parser.py`/`_compiler.py` is a stress
+test of the interpreter. Each gap below was a real CPython-behaviour
+bug that the shim had simply never exercised; all are fixed as general
+correctness work in `weavepy-vm`/`weavepy-compiler`/`weavepy-parser`:
+
+- **`int` subclassing.** `_parser` and `_constants` use
+  `_NamedIntConstant(int)` for opcodes, and `enum.IntFlag`/`IntEnum`
+  back the `re` flags. `PyInstance` gained a `native: Option<Object>`
+  slot; `object.__new__` initialises it for `int`/`float` subclasses;
+  and every arithmetic/identity/hash/ordering/truth path
+  (`as_i64`, `as_usize`, `eq_value`, `DictKey::hash`, `is_truthy`,
+  `Object::cmp`, `binary_op`, `binary_subscr`, the `int()`/`float()`/
+  `bool()` constructors) now unwraps a subclass to its native value.
+  `enum.py`'s `IntEnum`/`IntFlag` were re-based on `(int, Enum)` /
+  `(int, Flag)` and member creation routed through `int.__new__`.
+- **Slice deletion & assignment.** `_parser` does `del subpattern[x]`
+  and `subpattern[a:b] = …`. Added `del seq[slice]` for `list`/
+  `bytearray`, slice-assignment from an arbitrary iterable RHS
+  (via the VM's `collect_iterable`), `range` slicing, and correct
+  negative-step (`[::-1]`) handling mirroring `PySlice_AdjustIndices`.
+- **Legacy iteration protocol.** Objects with `__getitem__`+`__len__`
+  but no `__iter__` are now iterable (call `__getitem__(0,1,2,…)` until
+  `IndexError`), which `re`'s `SubPattern` relies on.
+- **The ABA call-cache bug.** `MAKE_FUNCTION`'s inline cache could
+  mis-specialize when a freed function's `Rc` address was reused
+  (classic ABA). `CallPyExact`/`CallPyExactNoFree` now re-validate the
+  callee's closure shape and arg count before taking the fast path.
+- **`bytes`/`bytearray` methods.** `translate`/`maketrans` implemented;
+  `find`/`rfind`/`index`/`count` now honour `start`/`end` (bytes
+  patterns go through these).
+- **Truthiness dispatch.** A shared `obj_truthy` that dispatches
+  `__bool__` then `__len__` for instances, wired into `PopJumpIfFalse`/
+  `PopJumpIfTrue`/`UnaryOp(not)`/`bool()` — without it `(?i)`-style
+  inline-flag parsing mis-fired.
+- **Compiler import binding.** `collect_decls` now records names bound
+  by `import`/`from … import` so they can be captured as cellvars
+  (a closure in `_compiler` referenced an imported name).
+
+### 5 — Unicode, `str`/`bytes`, and `%`-formatting fidelity
+
+- **Faithful `repr()`.** `str.__repr__` now picks CPython's quote
+  (double quotes iff the string has a `'` and no `"`, else single) and
+  escapes non-printable code points as `\xNN`/`\uNNNN`/`\UNNNNNNNN`.
+  Printability is decided by Unicode general category via the
+  `unicode_properties` crate (`Cc`/`Cf`/`Cs`/`Co`/`Cn` and the
+  separators are non-printable; `U+0020` is the one printable space),
+  matching `Py_UNICODE_ISPRINTABLE`. `str.isprintable` shares the
+  helper. (`re.escape` and `Pattern.__repr__` both depend on this.)
+- **`str(bytes, encoding[, errors])`.** The two/three-arg `str`
+  constructor now decodes via the codec machinery instead of returning
+  `repr(b'…')`. `re._parser.Tokenizer` builds itself from
+  `str(byte, 'latin-1')`, so without this every *bytes* pattern was
+  mis-tokenised.
+- **`\U` escapes.** The lexer's string-literal decoder learned the
+  eight-hex-digit `\U` form (it already handled `\x`/`\u`), so non-BMP
+  literals like `'\U0001f600'` parse correctly.
+- **`%`-format dunder dispatch.** `str.__mod__` (`"%s"/"%r" % obj`) now
+  dispatches `__str__`/`__repr__` for instances (so `"%s" % some_error`
+  prints the message, not `<PatternError object>`), and `%d`/`%i`/`%u`
+  unwrap `int` subclasses (so `"%d" % OPCODES.LITERAL` formats the
+  value). Implemented by threading a VM-aware `resolve` callback into a
+  refactored `percent_format_with`.
+
+### 6 — module rewiring & test
+
+- `stdlib/mod.rs` registers `_sre` as a builtin native module and the
+  nine frozen sources above; the old native `re` registration and
+  `stdlib/re.rs` are removed.
+- `weavepy-vm/src/lib.rs` drops `do_re_sub_callable` (now handled in
+  frozen `re._engine`).
+- `tests/regrtest/test_re.py` is a new bundled fixture (auto-discovered
+  by the RFC 0034 harness) covering literals/quantifiers/groups/
+  alternation/flags, look-around, back-references, named groups,
+  `split`/`sub`/`subn`/`findall`/`finditer`, bytes patterns, Unicode
+  categories, `re.error` text, and the zero-width edge cases that the
+  `toplevel` invariant protects. Every expectation was cross-checked
+  against the local CPython 3.13 oracle.
+
+## Implementation status (post-merge)
+
+| area | status | notes |
+|------|--------|-------|
+| native `_sre` matcher (`SRE(match)`/`search`/`count`/`charset`) | ✅ | full opcode set incl. look-around, back-refs, possessive/atomic, conditional groups |
+| `_sre` module surface (`compile`/`exec`/case-fold/`MAGIC`/`CODESIZE`) | ✅ | thread-local compiled-program registry keyed by int handle |
+| zero-width `toplevel`/`must_advance` invariant | ✅ | `BRANCH`/`REPEAT`/`MAX_UNTIL`/`MIN_UNTIL` inherit `toplevel`; signed `REPEAT_ONE` count |
+| frozen `re` package (`__init__`/`_constants`/`_casefix`/`_parser`) | ✅ | CPython 3.13 verbatim |
+| frozen `re._compiler` | ✅ | ≈verbatim; targets our native `_sre` `MAGIC`/`CODESIZE` |
+| `re._engine` `Pattern`/`Match` + template expansion | ✅ | Python classes over the native core; callable `repl` is plain Python |
+| deprecated `sre_constants`/`sre_parse`/`sre_compile` aliases | ✅ | re-export shims |
+| removed: native `stdlib/re.rs` + `do_re_sub_callable` VM hook | ✅ | the shim and its VM interception are gone |
+| `int`/`float` subclassing (`native` slot; arith/hash/cmp/truth) | ✅ | `enum.IntFlag`/`IntEnum`, `_NamedIntConstant` work |
+| slice delete/assign, `range` slicing, negative step | ✅ | `del seq[slice]`, `seq[a:b]=iter`, `r[::-1]` |
+| legacy `__getitem__` iteration protocol | ✅ | `__getitem__`+`__len__` without `__iter__` iterates |
+| ABA inline-cache hardening (`MAKE_FUNCTION`/`CallPyExact*`) | ✅ | closure-shape + arg-count re-validation |
+| `bytes`/`bytearray` `translate`/`maketrans`; `find`-family `start`/`end` | ✅ | bytes patterns rely on these |
+| truthiness dispatch (`__bool__`/`__len__`) | ✅ | wired into jumps/`not`/`bool()` |
+| faithful `repr()`/`isprintable` (Unicode general category) | ✅ | quote selection + `\xNN`/`\uNNNN`/`\UNNNNNNNN` escaping |
+| `str(bytes, encoding[, errors])`; lexer `\U` escapes | ✅ | bytes patterns + non-BMP literals |
+| `%`-format `__str__`/`__repr__` dispatch + int-subclass unwrap | ✅ | `"%s" % exc`, `"%d" % OPCODE` |
+| bundled `tests/regrtest/test_re.py` | ✅ | passes under WeavePy and CPython 3.13 |
+
+## Drawbacks
+
+- **The matcher is recursive, like CPython's.** `SRE(match)` recurses
+  per opcode group; pathological patterns can hit the native stack
+  before Python's `sys.setrecursionlimit`. CPython has the same shape
+  (and the same class of failure); a future iterative/explicit-stack
+  rewrite is possible but out of scope.
+- **Two languages for one module.** `re` is now Rust (`_sre`) + Python
+  (`re._*`). That is precisely CPython's split, but it means a bug can
+  live on either side of the FFI line; the saving grace is that the
+  Python side is *CPython's own code*, so bugs concentrate in the small
+  Rust core.
+- **The surfaced fix tail was broad.** Half this RFC is interpreter
+  fixes (slicing, int-subclassing, repr, `%`) that are *not* about
+  regex. That is the nature of running real CPython code: it exercises
+  the whole object model. Those fixes are pure upside elsewhere, but
+  they widened the diff.
+- **No native `Pattern`/`Match` type.** Code that introspects
+  `type(p).__module__ == '_sre'` or pickles a compiled pattern via the
+  C type's `__reduce__` sees our Python classes instead. The observable
+  attribute/method surface matches; the type identity does not.
+
+## Alternatives
+
+1. **Keep the `regex`/`fancy_regex` shim and paper over differences.**
+   Rejected: the differences are unbounded and silent, and `test_re.py`
+   asserts on engine internals a shim can't reproduce. Every patch
+   would be whack-a-mole against a foreign engine's choices.
+2. **Port `_sre` *and* write `Pattern`/`Match` as native Rust types.**
+   More faithful to CPython's type identity, but a large second native
+   object surface (GC, attributes, repr, pickle) for marginal gain over
+   frozen-Python classes. Deferred to future work if type-identity
+   parity is ever required.
+3. **Compile Python regex to the Rust `regex` crate's AST.** A
+   translation layer by another name — same fidelity ceiling as the
+   shim, plus a new impedance mismatch (no backtracking, different
+   group semantics). Rejected.
+4. **A bytecode-level `re` fast path in the VM.** Premature: get
+   faithful first, optimise the hot `exec` loop later (see *Future
+   work*).
+
+## Prior art
+
+- **CPython** is the reference; we port its engine rather than imitate
+  it. The secret-labs engine (Fredrik Lundh) has been stable in shape
+  since Python 1.6, which is what makes a verbatim parser/compiler port
+  viable.
+- **PyPy** reimplements `_sre` in RPython but keeps `_sre.py`'s
+  structure and the CPython `re` Python layer — the same "port the C
+  core, reuse the Python" strategy this RFC follows.
+- **RustPython** ships a hand-written `sre-engine` Rust crate plus the
+  CPython Python layer — close to our approach; our matcher independently
+  arrives at the same `toplevel`/`must_advance` structure, which is good
+  corroboration that it's the load-bearing invariant.
+- **GraalPy** runs CPython's `_sre` Python over a Truffle-based engine;
+  again, the Python layer is reused, not rewritten.
+
+The cross-implementation consensus — *reuse CPython's Python `re`
+layer, reimplement only the C core* — is exactly what this RFC adopts.
+
+## Unresolved questions
+
+- **`localeconv`/`LOCALE`-flag fidelity.** `IN_LOC_IGNORE`/
+  `CATEGORY_LOC_*` depend on the C locale; we implement the structure
+  but the locale tables are the byte locale only. Full locale parity is
+  deferred (CPython itself discourages `re.LOCALE` on str patterns).
+- **Native-stack depth vs `sys.setrecursionlimit`.** Should the matcher
+  consult the Python recursion limit to raise `RecursionError` instead
+  of risking a native overflow on adversarial input?
+- **Pickling compiled patterns.** Do we need `Pattern.__reduce__` to
+  round-trip through `re.compile(pattern, flags)` (CPython's approach)
+  before any real workload needs it?
+
+## Future work
+
+- **Optimise the `exec` hot loop.** The faithful matcher is correctness-
+  first; a charset-prefix fast path and a flattened dispatch (or a
+  Tier-2 JIT intrinsic per RFC 0032) can follow now that behaviour is
+  pinned by `test_re.py`.
+- **Native `Pattern`/`Match` types** if/when `type` identity or C-level
+  pickling parity is required.
+- **Wire the full `Lib/test/test_re.py`** (including the C-detail
+  refleak/`gc` hooks) into the RFC 0034 opt-in CPython sweep, not just
+  the bundled subset.
+- **`regex`-module-style atomic-group/possessive optimisations** kept
+  behind CPython-compatible semantics.
+- **Locale tables** for `re.LOCALE` parity on bytes patterns.
+
+[`regex`]: https://docs.rs/regex
+[`fancy_regex`]: https://docs.rs/fancy-regex
diff --git a/tests/regrtest/test_re.py b/tests/regrtest/test_re.py
new file mode 100644
index 0000000..a98f410
--- /dev/null
+++ b/tests/regrtest/test_re.py
@@ -0,0 +1,116 @@
+"""Regression coverage for the faithful ``re`` / ``_sre`` engine.
+
+Exercises the CPython-ported backtracking matcher: quantifiers,
+groups, backreferences, look-around, alternation, flags, Unicode and
+bytes patterns, plus the zero-width scanning behaviour that previously
+looped forever. All expectations were diffed against CPython 3.13.
+"""
+
+import re
+
+# --- basic matching / search ------------------------------------------
+assert re.match("abc", "abcdef").span() == (0, 3)
+assert re.search("cd", "abcdef").span() == (2, 4)
+assert re.match("abc", "xabc") is None
+assert re.fullmatch("a.c", "abc") is not None
+assert re.fullmatch("a.c", "abcd") is None
+
+# --- quantifiers -------------------------------------------------------
+assert re.findall(r"a{2,4}", "a aa aaa aaaa aaaaa") == ["aa", "aaa", "aaaa", "aaaa"]
+assert re.findall(r"a{,3}", "aaaaa") == ["aaa", "aa", ""]
+assert re.findall(r"<.+>", "<a><b>") == ["<a><b>"]      # greedy
+assert re.findall(r"<.+?>", "<a><b>") == ["<a>", "<b>"]  # lazy
+assert re.search(r"a.*c", "abcabc").span() == (0, 6)
+assert re.search(r"a.*?c", "abcabc").span() == (0, 3)
+
+# --- alternation / groups ---------------------------------------------
+assert re.fullmatch("a|ab", "ab").group(0) == "ab"   # toplevel branch guard
+assert re.match(r"(a)(b)(c)", "abc").groups() == ("a", "b", "c")
+assert re.match(r"(a)(b)?(c)", "ac").groups() == ("a", None, "c")
+assert re.match(r"(a)(b)?(c)", "ac").groups("X") == ("a", "X", "c")
+m = re.match(r"(?P<y>\d{4})-(?P<m>\d{2})", "2026-05")
+assert m.groupdict() == {"y": "2026", "m": "05"}
+assert m["y"] == "2026" and m.group("m") == "05"
+assert m.lastgroup == "m" and m.lastindex == 2
+
+# --- backreferences ----------------------------------------------------
+assert re.findall(r"(\w)\1", "aa bb cd ee") == ["a", "b", "e"]
+assert re.search(r"(?P<q>['\"]).*?(?P=q)", "say 'hi' done").group(0) == "'hi'"
+assert re.findall(r"<(\w+)>.*?</\1>", "<b>x</b><i>y</i>") == ["b", "i"]
+
+# --- look-around -------------------------------------------------------
+assert re.findall(r"\d+(?= dollars)", "100 dollars, 50 cents") == ["100"]
+assert re.findall(r"\d+(?! dollars)", "100 dollars 50 cents") == ["10", "50"]
+assert re.findall(r"(?<=\$)\d+", "$100 and 50") == ["100"]
+assert re.findall(r"(?<!\$)\b\d+", "$100 and 50") == ["50"]
+
+# --- zero-width scanning (previously looped forever) -------------------
+assert re.findall(r"x*", "xxab") == ["xx", "", "", ""]
+assert re.findall(r"(a)*", "aab") == ["a", "", ""]
+assert re.findall(r"(a?)*", "aaa") == ["", ""]
+assert re.findall(r"(a|b)*", "abc") == ["b", "", ""]
+assert re.sub(r"x*", "-", "xxab") == "--a-b-"
+assert re.sub(r"(a)*", "-", "aab") == "--b-"
+assert re.split(r"x*", "axbxc") == ["", "a", "", "b", "", "c", ""]
+assert re.split(r"(?<=,)", "a,b,c") == ["a,", "b,", "c"]
+
+# --- substitution / templates -----------------------------------------
+assert re.sub(r"(\w+)@(\w+)", r"\2.\1", "user@host") == "host.user"
+assert re.sub(r"(?P<n>\w+)", r"[\g<n>]", "hi there") == "[hi] [there]"
+assert re.subn(r"\d+", "#", "a1b22c333") == ("a#b#c#", 3)
+assert re.sub(r"a", "b", "aaaa", count=2) == "bbaa"
+assert re.sub(r"\d+", lambda mo: str(int(mo.group()) * 2), "1 2 3") == "2 4 6"
+assert re.match(r"(\w+) (\w+)", "John Smith").expand(r"\2 \1") == "Smith John"
+
+# --- flags -------------------------------------------------------------
+assert re.findall(r"abc", "ABC abc", re.I) == ["ABC", "abc"]
+assert re.findall(r"(?i)abc", "ABC abc") == ["ABC", "abc"]
+assert re.findall(r"(?i:ab)c", "ABc abc ABC") == ["ABc", "abc"]
+assert re.findall(r"^\w+", "foo\nbar\nbaz", re.M) == ["foo", "bar", "baz"]
+assert re.findall(r"a.b", "a\nb", re.S) == ["a\nb"]
+assert re.findall(r"""\d +  # int
+                      \.    # dot
+                      \d *  # frac""", "3.14 x", re.X) == ["3.14"]
+
+# --- unicode vs ascii --------------------------------------------------
+assert re.findall(r"\w+", "café déjà") == ["café", "déjà"]
+assert re.findall(r"\w+", "café", re.A) == ["caf"]
+assert re.findall(r"\d+", "\uff11\uff12 99") == ["\uff11\uff12", "99"]  # fullwidth
+assert re.match(r"(?i)\u00e9", "\u00c9") is not None   # é ~ É
+assert re.findall(r"\s", "a b\tc\u00a0d") == [" ", "\t", "\u00a0"]
+
+# --- bytes patterns ----------------------------------------------------
+assert re.findall(rb"\d+", b"a12b345") == [b"12", b"345"]
+assert re.sub(rb"\s+", b"_", b"a  b\tc") == b"a_b_c"
+assert re.match(rb"(\w+)@(\w+)", b"user@host").groups() == (b"user", b"host")
+assert re.split(rb"[,;]", b"a,b;c") == [b"a", b"b", b"c"]
+assert re.findall(rb"[\x00-\x02]", bytes(range(5))) == [b"\x00", b"\x01", b"\x02"]
+
+# --- possessive / atomic ----------------------------------------------
+assert re.search(r"(?>a+)b", "aaab") is not None
+assert re.search(r"(?>a+)a", "aaa") is None     # atomic: no give-back
+assert re.findall(r"a*+", "aaab") == ["aaa", "", ""]
+
+# --- escape / error semantics -----------------------------------------
+assert re.escape("a.b*c+d?") == r"a\.b\*c\+d\?"
+for bad, msg in [
+    (r"(?P<n>a)(?P<n>b)", "redefinition"),
+    (r"a{2,1}", "min repeat greater than max repeat"),
+    (r"(?P=undef)", "unknown group name"),
+    (r"[", "unterminated character set"),
+    (r"a\1", "invalid group reference"),
+]:
+    try:
+        re.compile(bad)
+    except re.error as e:
+        assert msg in str(e), (bad, str(e))
+    else:
+        raise AssertionError("expected re.error for %r" % bad)
+
+# --- compiled Pattern surface -----------------------------------------
+p = re.compile(r"(\d+)")
+assert p.pattern == r"(\d+)" and p.groups == 1
+assert [mo.group(1) for mo in p.finditer("a1b22c")] == ["1", "22"]
+assert isinstance(re.match(r"x", "x").re, re.Pattern)
+
+print("ok")

From 481759bc1cb5f6d03182ee906083042a8844917f Mon Sep 17 00:00:00 2001
From: Owen Carey <37121709+owenthcarey@users.noreply.github.com>
Date: Sun, 31 May 2026 16:37:35 -0700
Subject: [PATCH 2/2] style: format sre_mod with rustfmt

---
 crates/weavepy-vm/src/stdlib/sre_mod.rs | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/crates/weavepy-vm/src/stdlib/sre_mod.rs b/crates/weavepy-vm/src/stdlib/sre_mod.rs
index deb821a..429f29c 100644
--- a/crates/weavepy-vm/src/stdlib/sre_mod.rs
+++ b/crates/weavepy-vm/src/stdlib/sre_mod.rs
@@ -196,7 +196,8 @@ fn unicode_iscased(ch: u32) -> bool {
 }
 
 fn ascii_iscased(ch: u32) -> bool {
-    (u32::from(b'a')..=u32::from(b'z')).contains(&ch) || (u32::from(b'A')..=u32::from(b'Z')).contains(&ch)
+    (u32::from(b'a')..=u32::from(b'z')).contains(&ch)
+        || (u32::from(b'A')..=u32::from(b'Z')).contains(&ch)
 }
 
 // ---------------------------------------------------------------------------
@@ -1451,17 +1452,17 @@ fn sre_exec(args: &[Object]) -> Result<Object, RuntimeError> {
 }
 
 fn sre_ascii_tolower(args: &[Object]) -> Result<Object, RuntimeError> {
-    Ok(Object::Int(
-        i64::from(lower_ascii(arg_i64(args, 0, "ch")? as u32))
-    ))
+    Ok(Object::Int(i64::from(lower_ascii(
+        arg_i64(args, 0, "ch")? as u32
+    ))))
 }
 fn sre_ascii_iscased(args: &[Object]) -> Result<Object, RuntimeError> {
     Ok(Object::Bool(ascii_iscased(arg_i64(args, 0, "ch")? as u32)))
 }
 fn sre_unicode_tolower(args: &[Object]) -> Result<Object, RuntimeError> {
-    Ok(Object::Int(
-        i64::from(lower_unicode(arg_i64(args, 0, "ch")? as u32))
-    ))
+    Ok(Object::Int(i64::from(lower_unicode(
+        arg_i64(args, 0, "ch")? as u32,
+    ))))
 }
 fn sre_unicode_iscased(args: &[Object]) -> Result<Object, RuntimeError> {
     Ok(Object::Bool(