Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,6 @@ __pycache__/
# Generated docs
/book/
/site/

# Local scratch / throwaway diagnostics
.scratch/
25 changes: 25 additions & 0 deletions crates/weavepy-cli/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,37 @@
//! (`./configure --enable-shared` adds it for the same reason).
//! It's a no-op on macOS (two-level namespaces) and unrecognised
//! by `link.exe` on Windows, hence the target-family gate.
//!
//! On Windows we additionally enlarge the binary's *main-thread* stack
//! reserve. WeavePy's evaluator is a recursive tree-walker, so Python
//! call depth maps onto native (Rust) stack depth. Windows reserves only
//! 1 MiB for the main thread by default — far below the 8 MiB Linux and
//! macOS give — so deep workloads such as `weavepy -m test` overflow the
//! stack before `sys.setrecursionlimit` can guard them. Reserving 64 MiB
//! (committed lazily, so it costs only address space) makes the depth
//! limit governed by the recursion limit uniformly across platforms.
//! A build-script link arg is used rather than `.cargo/config.toml`
//! `rustflags` because the latter is silently dropped when CI sets the
//! `RUSTFLAGS` environment variable.

use std::env;

const WINDOWS_STACK_BYTES: u64 = 64 * 1024 * 1024;

fn main() {
let target_os = env::var("CARGO_CFG_TARGET_OS").unwrap_or_default();
if target_os == "linux" || target_os == "freebsd" || target_os == "android" {
println!("cargo:rustc-link-arg-bins=-Wl,--export-dynamic");
}
if target_os == "windows" {
let target_env = env::var("CARGO_CFG_TARGET_ENV").unwrap_or_default();
if target_env == "gnu" {
// GNU ld (mingw): `--stack <reserve>`.
println!("cargo:rustc-link-arg-bins=-Wl,--stack,{WINDOWS_STACK_BYTES}");
} else {
// MSVC link.exe (the default on the GitHub `windows-latest`
// runner): `/STACK:reserve`.
println!("cargo:rustc-link-arg-bins=/STACK:{WINDOWS_STACK_BYTES}");
}
}
}
32 changes: 32 additions & 0 deletions crates/weavepy-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -682,6 +682,13 @@ fn run_source_with_options(source: &str, opts: &RunOptions) -> Result<()> {
match weavepy::run_source_with_options(source, opts) {
Ok(()) => Ok(()),
Err(err) => {
// A `SystemExit` reaching the top level terminates the
// process with its code and prints no traceback — exactly
// like CPython. This is what makes `weavepy -m unittest`,
// `-m test`, and bare `sys.exit()` behave as a drop-in.
if let Some(code) = err.system_exit_code() {
exit_with_system_exit(code);
}
let mut stderr = io::stderr().lock();
let diag = err.format(source, &opts.filename);
let _ = stderr.write_all(diag.as_bytes());
Expand All @@ -690,6 +697,31 @@ fn run_source_with_options(source: &str, opts: &RunOptions) -> Result<()> {
}
}

/// Terminate the process the way CPython does when `SystemExit` reaches
/// the top level: `None` → 0, a bool/int → that code (masked to 8
/// bits), anything else → print `str(code)` to stderr and exit 1.
/// Never prints a traceback.
fn exit_with_system_exit(code: weavepy::vm::object::Object) -> ! {
use weavepy::vm::object::Object;
let _ = io::stdout().flush();
let status: i32 = match code {
Object::None => 0,
Object::Bool(b) => i32::from(b),
Object::Int(n) => (n & 0xFF) as i32,
// A bare `raise SystemExit` (and `sys.exit()`) carries no
// message; WeavePy models the empty payload as an empty string,
// which means "no error" → exit 0, not a printed message.
Object::Str(s) if s.is_empty() => 0,
other => {
let mut stderr = io::stderr().lock();
let _ = writeln!(stderr, "{}", other.to_str());
1
}
};
let _ = io::stderr().flush();
std::process::exit(status);
}

fn run_repl(flags: InterpreterFlags, startup: Option<&Path>, argv: Vec<String>) -> Result<()> {
let mut interpreter = weavepy::vm::Interpreter::default();
interpreter.apply_run_options(&flags);
Expand Down
126 changes: 126 additions & 0 deletions crates/weavepy-cli/tests/m_test.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
//! Integration coverage for `weavepy -m test` (RFC 0034 §6).
//!
//! These tests drive the real `weavepy` binary through the
//! `test.__main__` → `test.libregrtest.main` plumbing against the
//! bundled self-host fixtures in `tests/regrtest/`. They guarantee the
//! `-m test` entry point — argument parsing, discovery, per-module
//! classification, the CPython-shaped summary, and the propagated exit
//! code — never silently rots, without needing a CPython checkout.

use std::path::PathBuf;
use std::process::Command;

/// Absolute path to the bundled `tests/regrtest/` fixture directory.
fn bundled_testdir() -> PathBuf {
// CARGO_MANIFEST_DIR is `<workspace>/crates/weavepy-cli`.
let dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("../../tests/regrtest")
.canonicalize()
.expect("bundled tests/regrtest directory should exist");
assert!(
dir.join("test_unittest_machinery.py").is_file(),
"expected bundled fixtures under {}",
dir.display()
);
dir
}

/// Run `weavepy -m test <args...>` against the bundled fixtures and
/// return `(success, stdout, stderr)`.
fn run_m_test(extra: &[&str]) -> (bool, String, String) {
let testdir = bundled_testdir();
let mut cmd = Command::new(env!("CARGO_BIN_EXE_weavepy"));
cmd.arg("-m")
.arg("test")
.arg("--testdir")
.arg(&testdir)
.args(extra);
let out = cmd.output().expect("failed to spawn weavepy -m test");
(
out.status.success(),
String::from_utf8_lossy(&out.stdout).into_owned(),
String::from_utf8_lossy(&out.stderr).into_owned(),
)
}

#[test]
fn m_test_single_bundled_fixture_passes() {
let (ok, stdout, stderr) = run_m_test(&["--single", "test_unittest_machinery"]);
assert!(
ok,
"`weavepy -m test --single test_unittest_machinery` should exit 0\n\
--- stdout ---\n{stdout}\n--- stderr ---\n{stderr}"
);
assert!(
stdout.contains("Result: SUCCESS"),
"expected a CPython-shaped SUCCESS summary, got:\n{stdout}"
);
assert!(
stdout.contains("passed: 1"),
"expected exactly one passing module, got:\n{stdout}"
);
}

#[test]
fn m_test_runs_multiple_named_modules() {
let (ok, stdout, stderr) = run_m_test(&["test_unittest_machinery", "test_doctest_machinery"]);
assert!(
ok,
"`weavepy -m test <two modules>` should exit 0\n\
--- stdout ---\n{stdout}\n--- stderr ---\n{stderr}"
);
assert!(
stdout.contains("Result: SUCCESS"),
"expected a SUCCESS summary, got:\n{stdout}"
);
assert!(
stdout.contains("passed: 2"),
"expected two passing modules, got:\n{stdout}"
);
}

/// A module that fails its assertions must make `-m test` exit non-zero
/// with a CPython-shaped FAILURE summary — this is the signal CI gates
/// on, so it must be wired through faithfully.
#[test]
fn m_test_reports_failure_exit_code() {
let testdir = bundled_testdir();
let tmp = std::env::temp_dir().join(format!("weavepy_mtest_fail_{}", std::process::id()));
std::fs::create_dir_all(&tmp).expect("create temp testdir");
let failing = tmp.join("test_intentional_fail.py");
std::fs::write(
&failing,
"import unittest\n\
class T(unittest.TestCase):\n\
\x20 def test_boom(self):\n\
\x20 self.assertEqual(1, 2)\n\
if __name__ == '__main__':\n\
\x20 unittest.main()\n",
)
.expect("write failing fixture");

// Point --testdir at the temp dir holding only the failing module.
let out = Command::new(env!("CARGO_BIN_EXE_weavepy"))
.arg("-m")
.arg("test")
.arg("--testdir")
.arg(&tmp)
.arg("test_intentional_fail")
.output()
.expect("failed to spawn weavepy -m test");
let stdout = String::from_utf8_lossy(&out.stdout);
let stderr = String::from_utf8_lossy(&out.stderr);

let _ = std::fs::remove_dir_all(&tmp);
let _ = testdir; // bundled dir presence already asserted above.

assert!(
!out.status.success(),
"a failing test module must yield a non-zero exit\n\
--- stdout ---\n{stdout}\n--- stderr ---\n{stderr}"
);
assert!(
stdout.contains("Result: FAILURE") || stdout.contains("failed:"),
"expected a FAILURE summary, got:\n{stdout}"
);
}
7 changes: 5 additions & 2 deletions crates/weavepy-compiler/src/bytecode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -425,8 +425,11 @@ pub enum OpCode {
/// Peek TOS, push `len(TOS)` as an int.
GetLen,

/// Print the diagnostic representation of TOS — used by the
/// `dis` formatter only. Never emitted; reserved.
/// Echo TOS through `sys.displayhook` (CPython `PRINT_EXPR`).
/// Emitted only for top-level expression statements compiled in
/// interactive ("single") mode — the REPL (`code`/`codeop`) and
/// `doctest`. In "exec" mode an expression statement uses
/// `PopTop` instead.
PrintExpr,
}

Expand Down
1 change: 1 addition & 0 deletions crates/weavepy-compiler/src/cpython_code.rs
Original file line number Diff line number Diff line change
Expand Up @@ -681,6 +681,7 @@ fn stack_effect(opcode: OpCode, arg: u32) -> i64 {
| O::ReturnValue
| O::PopJumpIfFalse
| O::PopJumpIfTrue
| O::PrintExpr
| O::ImportStar => -1,
O::CopyTop => 1,
O::StoreAttr => -2,
Expand Down
85 changes: 82 additions & 3 deletions crates/weavepy-compiler/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,28 @@ pub fn compile_module_with_source(
Ok(top.finish())
}

/// Compile in interactive ("single") mode: identical to
/// [`compile_module_with_source`] except top-level expression
/// statements echo their value through `sys.displayhook`
/// (`OpCode::PrintExpr`) the way CPython's `compile(src, fn, "single")`
/// does. Powers the REPL (`code`/`codeop`) and `doctest`.
pub fn compile_interactive_with_source(
module: &Module,
source: &str,
filename: &str,
) -> Result<CodeObject, CompileError> {
let line_index = LineIndex::new(source);
let mut top = Compiler::new(
"<module>".to_owned(),
filename.to_owned(),
CodeKind::Module,
Rc::new(line_index),
);
top.interactive = true;
top.compile_module_body(module)?;
Ok(top.finish())
}

/// Lookup table that maps a byte offset back to a 1-based line number.
/// Filled once per top-level compile and shared by reference into every
/// nested `Compiler` for cheap per-instruction line lookups.
Expand Down Expand Up @@ -447,6 +469,14 @@ struct Compiler {
/// expose it here rather than threading the value through every
/// call site.
code_kind: CodeKind,
/// `True` for the top-level code object compiled in interactive
/// ("single") mode. Module-level expression *statements* then echo
/// their value through `sys.displayhook` (via `OpCode::PrintExpr`)
/// instead of being discarded — the REPL / `code` / `doctest`
/// behaviour. Never set on nested function/class scopes (they get
/// fresh `Compiler` instances), matching CPython's
/// `c_interactive && nestlevel <= 1` rule.
interactive: bool,
}

struct LoopFrame {
Expand Down Expand Up @@ -506,6 +536,7 @@ impl Compiler {
inside_class_body: false,
annotations_initialized: false,
code_kind: kind,
interactive: false,
}
}

Expand Down Expand Up @@ -677,7 +708,17 @@ impl Compiler {
match &stmt.kind {
StmtKind::Expr(e) => {
self.compile_expr(e)?;
self.emit(OpCode::PopTop, 0);
// Interactive ("single") mode: a top-level expression
// statement echoes its value via `sys.displayhook`
// instead of being discarded. Only the interactive
// top-level compiler sets this flag; nested scopes get
// fresh `Compiler` instances (always non-interactive),
// so this never fires inside functions/classes.
if self.interactive {
self.emit(OpCode::PrintExpr, 0);
} else {
self.emit(OpCode::PopTop, 0);
}
}
StmtKind::Pass => {}
StmtKind::Delete(targets) => {
Expand Down Expand Up @@ -1550,6 +1591,19 @@ impl Compiler {
}
}
inner.emit(OpCode::Resume, 0);
// CPython reserves `co_consts[0]` for the function docstring (or
// `None`). Mirror that here so `__doc__` is *only* the leading
// bare string-literal statement — never an unrelated string
// constant that merely happens to be interned first (e.g. the
// RHS of `x = "s"` as the first statement). `intern_constant`
// dedups, so a real docstring shares this slot with its own
// `LoadConst`, and a `None` slot is reused by the implicit
// `return None`.
let doc_slot = match first_stmt_docstring(body) {
Some(doc) => Constant::Str(doc.to_owned()),
None => Constant::None,
};
inner.co.intern_constant(doc_slot);
for s in body {
inner.compile_stmt(s)?;
}
Expand Down Expand Up @@ -1926,7 +1980,9 @@ impl Compiler {
handler: handlers_start,
depth: body_depth,
});
self.emit(OpCode::PushExcInfo, 0);
// Back-patched to the pc past the handler region (see the
// non-`except*` branch for the rationale).
let push_exc_site = self.emit(OpCode::PushExcInfo, 0);
// Stack on entry: [exc]. Stash the remainder in a
// synthetic local so each handler can update it.
let rem_name = format!(".eg_remaining{}", self.with_counter);
Expand Down Expand Up @@ -2014,14 +2070,21 @@ impl Compiler {
for site in handler_exit_jumps {
self.patch_jump(site, end);
}
// Record the handler-body end on PUSH_EXC_INFO (see below).
self.co.instructions[push_exc_site as usize].arg = end;
} else if has_handlers {
self.co.exception_table.push(ExcHandler {
start: body_start,
end: body_end,
handler: handlers_start,
depth: body_depth,
});
self.emit(OpCode::PushExcInfo, 0);
// The arg is back-patched below to the pc just past this
// handler region; the VM tags the active-handler entry with
// it so an exception escaping the handler to an enclosing
// `try` correctly unwinds `sys.exc_info()` (see
// `Interpreter::handle_exception`).
let push_exc_site = self.emit(OpCode::PushExcInfo, 0);
// Stack on entry: [exc] (pushed by dispatch loop).
let mut next_handler_sites: Vec<u32> = Vec::new();
let mut handler_exit_jumps: Vec<u32> = Vec::new();
Expand Down Expand Up @@ -2105,6 +2168,8 @@ impl Compiler {
for site in handler_exit_jumps {
self.patch_jump(site, end);
}
// Record the handler-body end on PUSH_EXC_INFO (see above).
self.co.instructions[push_exc_site as usize].arg = end;
} else if has_finally {
// `try/finally` without except. The dispatch loop has
// pushed the exception onto the value stack. We leave it
Expand Down Expand Up @@ -3688,6 +3753,20 @@ fn method_references_class(body: &[Stmt]) -> bool {
reads.contains("super") || reads.contains("__class__")
}

/// The docstring of a body, per CPython's rule: the first statement is a
/// bare string-literal *expression statement*. An assignment whose RHS is
/// a string (`x = "s"`), an f-string, or any non-string first statement is
/// **not** a docstring. Returns the string slice when present.
fn first_stmt_docstring(body: &[Stmt]) -> Option<&str> {
match &body.first()?.kind {
StmtKind::Expr(expr) => match &expr.kind {
ExprKind::Constant(AstConstant::Str(s)) => Some(s.as_str()),
_ => None,
},
_ => None,
}
}

/// `True` if any statement in `body` contains a `yield` or `yield from`
/// in the immediate scope. Does NOT recurse into nested `def` / `lambda`
/// / comprehension bodies — those have their own scopes.
Expand Down
Loading
Loading