Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions crates/weavepy-compiler/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4337,6 +4337,27 @@ fn collect_decls(
collect_decls(s, globals, nonlocals, assigned);
}
}
// `import a.b.c` binds the top-level package `a` (or the
// asname); `from m import x as y` binds `y`. These are real
// local bindings and must be tracked so a name captured by a
// nested scope is promoted to a cellvar (CPython parity).
StmtKind::Import(aliases) => {
for a in aliases {
let bind = a
.asname
.clone()
.unwrap_or_else(|| a.name.split('.').next().unwrap_or(&a.name).to_owned());
assigned.insert(bind);
}
}
StmtKind::ImportFrom { names, .. } => {
for a in names {
let bind = a.asname.clone().unwrap_or_else(|| a.name.clone());
if bind != "*" {
assigned.insert(bind);
}
}
}
_ => {}
}
}
Expand Down
14 changes: 14 additions & 0 deletions crates/weavepy-parser/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3359,6 +3359,20 @@ fn decode_str_body(s: &str, raw: bool) -> Result<String, String> {
let n = u32::from_str_radix(&hex, 16).map_err(|e| e.to_string())?;
out.push(char::from_u32(n).unwrap_or('\u{FFFD}'));
}
'U' => {
// 8-hex code-point escape, e.g. `\U0001F600`. Required
// for non-BMP literals; CPython rejects out-of-range or
// surrogate values, so we surface a clear error.
let mut hex = String::new();
for _ in 0..8 {
hex.push(chars.next().ok_or("incomplete \\U escape")?);
}
let n = u32::from_str_radix(&hex, 16).map_err(|e| e.to_string())?;
let ch = char::from_u32(n).ok_or_else(|| {
format!("invalid \\U escape: {n:#x} is not a valid character")
})?;
out.push(ch);
}
other => {
// CPython issues a DeprecationWarning for unknown
// escapes but emits both characters literally.
Expand Down
16 changes: 16 additions & 0 deletions crates/weavepy-vm/src/builtin_types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -626,6 +626,22 @@ fn install_object_dunders(object_: &Rc<TypeObject>) {
))
}
};
// When `cls` derives from a primitive immutable built-in (so far
// `int` — covering `_NamedIntConstant`, `enum.IntEnum`/`IntFlag`
// and hand-written `class C(int)`), capture the value the
// instance wraps. `super().__new__(cls, value)` passes it as the
// second positional argument; absent that it defaults to 0.
if cls.is_subclass_of(&builtin_types().int_) {
let native = match args.get(1) {
None => Object::Int(0),
Some(o @ (Object::Int(_) | Object::Long(_))) => o.clone(),
Some(Object::Bool(b)) => Object::Int(i64::from(*b)),
Some(o) => Object::Int(o.as_i64().unwrap_or(0)),
};
return Ok(Object::Instance(Rc::new(PyInstance::with_native(
cls, native,
))));
}
Ok(Object::Instance(Rc::new(PyInstance::new(cls))))
}
fn object_init(_args: &[Object]) -> Result<Object, RuntimeError> {
Expand Down
161 changes: 150 additions & 11 deletions crates/weavepy-vm/src/builtins.rs
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,8 @@ pub fn lookup_method(obj: &Object, name: &str) -> Option<Object> {
"splitlines" => Some(method("splitlines", bytes_splitlines)),
"join" => Some(method("join", bytes_join)),
"replace" => Some(method("replace", bytes_replace)),
"translate" => Some(method("translate", bytes_translate)),
"maketrans" => Some(method("maketrans", bytes_maketrans)),
"isalnum" => Some(method("isalnum", bytes_isalnum)),
"isalpha" => Some(method("isalpha", bytes_isalpha)),
"isdigit" => Some(method("isdigit", bytes_isdigit)),
Expand Down Expand Up @@ -618,6 +620,35 @@ fn b_str(args: &[Object]) -> Result<Object, RuntimeError> {
if args.is_empty() {
return Ok(Object::from_static(""));
}
// `str(object, encoding[, errors])` decodes a bytes-like object,
// equivalent to `object.decode(encoding, errors)`. CPython's
// `re._parser.Tokenizer` relies on `str(pattern, 'latin1')` to
// tokenize bytes patterns, so this path must decode rather than
// fall back to `repr`-style stringification.
if args.len() >= 2 {
match &args[0] {
Object::Bytes(_) | Object::ByteArray(_) => {}
other => {
return Err(type_error(format!(
"decoding to str: need a bytes-like object, {} found",
other.type_name()
)));
}
}
let data = bytes_data(args)?;
let encoding = match &args[1] {
Object::Str(e) => e.to_string(),
Object::None => "utf-8".to_owned(),
_ => return Err(type_error("str() argument 'encoding' must be str")),
};
let errors = match args.get(2) {
Some(Object::Str(e)) => e.to_string(),
Some(Object::None) | None => "strict".to_owned(),
_ => return Err(type_error("str() argument 'errors' must be str")),
};
let s = crate::stdlib::codecs_mod::decode_bytes(&data, &encoding, &errors)?;
return Ok(Object::from_str(s));
}
Ok(Object::from_str(args[0].to_str()))
}

Expand Down Expand Up @@ -2628,6 +2659,7 @@ pub fn make_super(class: Rc<crate::types::TypeObject>, receiver: Object) -> Obje
d.insert(DictKey(Object::from_static("__self__")), receiver);
d
})),
native: None,
};
Object::Instance(Rc::new(inst))
}
Expand Down Expand Up @@ -3804,7 +3836,9 @@ fn str_isidentifier(args: &[Object]) -> Result<Object, RuntimeError> {

fn str_isprintable(args: &[Object]) -> Result<Object, RuntimeError> {
let s = str_self(args)?;
Ok(Object::Bool(s.chars().all(|c| !c.is_control())))
Ok(Object::Bool(
s.chars().all(crate::object::char_is_printable),
))
}

fn str_zfill(args: &[Object]) -> Result<Object, RuntimeError> {
Expand Down Expand Up @@ -4827,17 +4861,56 @@ fn bytes_match_prefix_suffix(
}
}

/// Resolve the optional `start`/`end` arguments of `bytes.find` and
/// friends (positions 2 and 3) into a clamped `[start, end]` byte
/// window, applying CPython's slice-style negative-index handling.
fn bytes_search_range(args: &[Object], len: usize) -> (usize, usize) {
let n = len as i64;
let resolve = |o: Option<&Object>, default: i64| -> i64 {
match o {
None | Some(Object::None) => default,
Some(obj) => match obj.as_i64() {
Some(mut x) => {
if x < 0 {
x += n;
}
x.clamp(0, n)
}
None => default,
},
}
};
let start = resolve(args.get(2), 0).clamp(0, n) as usize;
let end = resolve(args.get(3), n).clamp(0, n) as usize;
(start, end.max(start))
}

/// Find `sub` within `data[start..end]`, returning the *absolute*
/// position (or -1). Mirrors `bytes.find`'s empty-needle behaviour.
fn bytes_find_in(data: &[u8], sub: &[u8], start: usize, end: usize) -> i64 {
if start > end || end > data.len() {
return -1;
}
let hay = &data[start..end];
if sub.is_empty() {
return start as i64;
}
if sub.len() > hay.len() {
return -1;
}
hay.windows(sub.len())
.position(|w| w == sub)
.map_or(-1, |i| (start + i) as i64)
}

fn bytes_find(args: &[Object]) -> Result<Object, RuntimeError> {
let data = bytes_data(args)?;
let sub = bytes_argview(
args.get(1)
.ok_or_else(|| type_error("find() expected 1 arg"))?,
)?;
Ok(Object::Int(
data.windows(sub.len())
.position(|w| w == sub)
.map_or(-1, |i| i as i64),
))
let (start, end) = bytes_search_range(args, data.len());
Ok(Object::Int(bytes_find_in(&data, &sub, start, end)))
}

fn bytes_rfind(args: &[Object]) -> Result<Object, RuntimeError> {
Expand All @@ -4846,9 +4919,16 @@ fn bytes_rfind(args: &[Object]) -> Result<Object, RuntimeError> {
args.get(1)
.ok_or_else(|| type_error("rfind() expected 1 arg"))?,
)?;
let (start, end) = bytes_search_range(args, data.len());
if start > end || end > data.len() {
return Ok(Object::Int(-1));
}
if sub.is_empty() {
return Ok(Object::Int(end as i64));
}
let mut last = -1i64;
if sub.len() <= data.len() {
for i in 0..=data.len() - sub.len() {
if sub.len() <= end - start {
for i in start..=end - sub.len() {
if data[i..i + sub.len()] == sub[..] {
last = i as i64;
}
Expand All @@ -4870,12 +4950,13 @@ fn bytes_count(args: &[Object]) -> Result<Object, RuntimeError> {
args.get(1)
.ok_or_else(|| type_error("count() expected 1 arg"))?,
)?;
let (start, end) = bytes_search_range(args, data.len());
if sub.is_empty() {
return Ok(Object::Int(data.len() as i64 + 1));
return Ok(Object::Int((end - start) as i64 + 1));
}
let mut n = 0i64;
let mut i = 0;
while i + sub.len() <= data.len() {
let mut i = start;
while i + sub.len() <= end {
if data[i..i + sub.len()] == sub[..] {
n += 1;
i += sub.len();
Expand Down Expand Up @@ -5072,6 +5153,64 @@ fn bytes_replace(args: &[Object]) -> Result<Object, RuntimeError> {
Ok(Object::new_bytes(out))
}

/// `bytes.translate(table, /, delete=b'')` and the `bytearray`
/// equivalent. `table` is `None` (identity) or a bytes-like of length
/// 256; bytes present in `delete` are dropped first. The receiver's
/// type (bytes vs bytearray) is preserved.
fn bytes_translate(args: &[Object]) -> Result<Object, RuntimeError> {
let data = bytes_data(args)?;
let table = match args.get(1) {
None | Some(Object::None) => None,
Some(o) => {
let t = bytes_argview(o)?;
if t.len() != 256 {
return Err(value_error("translation table must be 256 characters long"));
}
Some(t)
}
};
let delete = match args.get(2) {
None | Some(Object::None) => Vec::new(),
Some(o) => bytes_argview(o)?,
};
let mut out = Vec::with_capacity(data.len());
for &b in &data {
if delete.contains(&b) {
continue;
}
out.push(match &table {
Some(t) => t[b as usize],
None => b,
});
}
if matches!(args.first(), Some(Object::ByteArray(_))) {
Ok(Object::new_bytearray(out))
} else {
Ok(Object::new_bytes(out))
}
}

/// `bytes.maketrans(from, to)` — builds a 256-byte translation table
/// mapping each byte in `from` to the byte at the same index in `to`.
fn bytes_maketrans(args: &[Object]) -> Result<Object, RuntimeError> {
let from = bytes_argview(
args.first()
.ok_or_else(|| type_error("maketrans() takes exactly two arguments"))?,
)?;
let to = bytes_argview(
args.get(1)
.ok_or_else(|| type_error("maketrans() takes exactly two arguments"))?,
)?;
if from.len() != to.len() {
return Err(value_error("maketrans arguments must have same length"));
}
let mut table: Vec<u8> = (0u8..=255).collect();
for (f, t) in from.iter().zip(to.iter()) {
table[*f as usize] = *t;
}
Ok(Object::new_bytes(table))
}

fn bytes_isalnum(args: &[Object]) -> Result<Object, RuntimeError> {
let data = bytes_data(args)?;
Ok(Object::Bool(
Expand Down
Loading
Loading