diff --git a/.reliary/index.sqlite b/.reliary/index.sqlite deleted file mode 100644 index f7d6007..0000000 Binary files a/.reliary/index.sqlite and /dev/null differ diff --git a/Cargo.lock b/Cargo.lock index cd08bcd..f9d8a6e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1210,6 +1210,7 @@ dependencies = [ "reliary-dead", "reliary-fix", "reliary-memory", + "reliary-output", "reliary-risk", "reliary-search", "reliary-sift", @@ -1266,6 +1267,13 @@ dependencies = [ "serde_json", ] +[[package]] +name = "reliary-output" +version = "0.3.0" +dependencies = [ + "regex", +] + [[package]] name = "reliary-risk" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index c10fe82..517ea83 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,4 @@ [workspace] - -resolver = "2" - members = [ "crates/reliary-core", "crates/reliary-search", @@ -12,17 +9,11 @@ members = [ "crates/reliary-fix", "crates/reliary-dead", "crates/reliary-agent", + "crates/reliary-output", ] +resolver = "2" + [workspace.package] -version = "0.2.0" +version = "0.3.0" edition = "2021" -rust-version = "1.75" -license = "MIT" -repository = "https://github.com/Reliary/reliary-agent" - -[profile.release] -lto = "fat" -codegen-units = 1 -panic = "abort" -strip = true diff --git a/crates/reliary-agent/Cargo.toml b/crates/reliary-agent/Cargo.toml index b8b37bd..0c895ee 100644 --- a/crates/reliary-agent/Cargo.toml +++ b/crates/reliary-agent/Cargo.toml @@ -15,6 +15,7 @@ reliary-risk = { path = "../reliary-risk" } reliary-memory = { path = "../reliary-memory" } reliary-fix = { path = "../reliary-fix" } reliary-dead = { path = "../reliary-dead" } +reliary-output = { path = "../reliary-output" } rusqlite = { version = "0.31", features = ["bundled"] } mimalloc = "0.1" regex = "1" diff --git a/crates/reliary-agent/src/guard.rs b/crates/reliary-agent/src/guard.rs new file mode 100644 index 0000000..7300d10 --- /dev/null +++ b/crates/reliary-agent/src/guard.rs @@ -0,0 +1,318 @@ +/// Grammar-free structural edit guards. Ported from stria guard.rs. +/// Uses FTS5 phrase_occ table to detect missing imports and orphaned references. + +use rusqlite::Connection; +use serde_json::{json, Value}; +use std::collections::HashSet; + +const COMMON_KEYWORDS: &[&str] = &[ + "def", "class", "import", "return", "self", "None", "True", "False", + "if", "for", "while", "try", "except", "finally", "with", "as", "from", + "not", "and", "or", "in", "is", "pass", "break", "continue", "elif", "else", "raise", "yield", "lambda", + "fn", "pub", "let", "mut", "use", "mod", "struct", "enum", "impl", "trait", "where", + "match", "ref", "move", "async", "await", "unsafe", "type", "const", "static", + "macro", "crate", "super", "Self", "var", "func", + "int", "str", "bool", "nil", "uint", "float64", "string", "int32", "int64", + "error", "err", "null", "undefined", "typeof", "instanceof", "new", "this", + "void", "any", "unknown", "never", + "assert", "assertEq", "get", "set", "has", "add", "del", "len", "cap", + "max", "min", "all", "any", "map", "sum", "abs", "hex", "ord", "pow", "range", "sorted", + "input", "open", "list", "dict", "tuple", "print", "printf", "println", + "require", "module", "exports", "function", "console", "log", +]; + +fn is_interesting_ident(s: &str) -> bool { + if s.len() < 3 || s.len() > 40 { return false; } + let first = s.chars().next().unwrap_or(' '); + if !first.is_alphabetic() { return false; } + if COMMON_KEYWORDS.contains(&s) { return false; } + if s.chars().all(|c| c.is_lowercase() || c == '_') { + return s.contains('_'); + } + true +} + +/// Check a proposed edit for structural issues: +/// - Missing imports (new uppercase identifier not defined in project) +/// - Orphaned references (removed identifier still referenced elsewhere) +pub fn check_diff(index_path: &str, file_path: &str, new_content: &str) -> Value { + let db = match Connection::open(index_path) { + Ok(d) => d, + Err(e) => return json!({"error": format!("cannot open db: {}", e)}), + }; + if reliary_search::schema::open_existing_db(&db).is_err() { + return json!({"error": "invalid index database"}); + } + + let new_phrases = reliary_search::scan_identifiers(new_content); + let mut new_uppercase: HashSet = HashSet::new(); + let mut new_lowercase: HashSet = HashSet::new(); + for p in new_phrases { + if !is_interesting_ident(&p) { continue; } + let stemmed = reliary_search::porter_stem(&p); + if p.chars().next().unwrap_or(' ').is_uppercase() { + new_uppercase.insert(stemmed); + } else { + new_lowercase.insert(stemmed); + } + } + + // Get old identifiers for this file — use all length-filtered identifiers + let mut old_uppercase: HashSet = HashSet::new(); + let mut old_lowercase: HashSet = HashSet::new(); + if let Ok(mut stmt) = db.prepare( + "SELECT p.phrase, po.flags + FROM phrase_occ po + JOIN phrases p ON p.id = po.phrase_id + JOIN file_map fm ON fm.id = po.file_id + WHERE fm.file_path = ?1", + ) { + if let Ok(rows) = stmt.query_map([file_path], |r| { + let phrase: String = r.get(0)?; + let flags: Vec = r.get::<_, Vec>(1).unwrap_or_default(); + Ok((phrase, flags)) + }) { + for row in rows.flatten() { + let (phrase, _flags) = row; + if phrase.len() < 3 || phrase.len() > 40 { continue; } + if phrase.chars().next().unwrap_or(' ').is_alphabetic() { + if phrase.chars().next().unwrap_or(' ').is_uppercase() { + old_uppercase.insert(phrase); + } else { + old_lowercase.insert(phrase); + } + } + } + } + } + + let mut warnings: Vec = Vec::new(); + + // Helper: count document frequency (how many files contain this identifier) + let doc_frequency = |db: &Connection, ident: &str| -> i64 { + if let Ok(mut stmt) = db.prepare( + "SELECT COUNT(*) FROM phrase_occ po + JOIN phrases p ON p.id = po.phrase_id + WHERE p.phrase = ?1", + ) { + if let Ok(count) = stmt.query_row([ident], |r| r.get::<_, i64>(0)) { + return count; + } + } + 0 + }; + + // Helper: find files that define an identifier + let find_defined = |db: &Connection, ident: &str| -> Vec { + if let Ok(mut stmt) = db.prepare( + "SELECT fm.file_path + FROM phrase_occ po + JOIN phrases p ON p.id = po.phrase_id + JOIN file_map fm ON fm.id = po.file_id + WHERE p.phrase = ?1", + ) { + if let Ok(rows) = stmt.query_map([ident], |r| { + let fp: String = r.get(0)?; + let flags: Vec = r.get::<_, Vec>(1).unwrap_or_default(); + let f = if !flags.is_empty() { flags[0] } else { 0 }; + Ok((fp, reliary_search::schema::unpack_is_def(f))) + }) { + return rows.flatten() + .filter(|(fp, def)| fp != file_path && *def >= 1) + .map(|(fp, _)| fp) + .collect(); + } + } + Vec::new() + }; + + // Helper: find files that reference an identifier + let find_referenced = |db: &Connection, ident: &str| -> Vec { + if let Ok(mut stmt) = db.prepare( + "SELECT DISTINCT fm.file_path + FROM phrase_occ po + JOIN phrases p ON p.id = po.phrase_id + JOIN file_map fm ON fm.id = po.file_id + WHERE p.phrase = ?1 AND fm.file_path != ?2", + ) { + if let Ok(rows) = stmt.query_map([ident, file_path], |r| r.get::<_, String>(0)) { + return rows.flatten().collect(); + } + } + Vec::new() + }; + + // Tier 1: Missing import detection (uppercase identifiers new to this file) + for ident in new_uppercase.difference(&old_uppercase) { + let defined_in = find_defined(&db, ident); + if !defined_in.is_empty() { + warnings.push(format!( + "MISSING IMPORT: You introduced '{}', defined in: {}. Ensure you imported it.", + ident, defined_in.join(", ") + )); + } + } + + // Tier 2: Orphaned reference detection (skip if idents appear in >=10 files — likely lib/std) + for ident in old_lowercase.difference(&new_lowercase) { + if doc_frequency(&db, ident) >= 10 { continue; } + let referenced_in = find_referenced(&db, ident); + if !referenced_in.is_empty() { + warnings.push(format!( + "ORPHANED REFERENCE: You removed '{}', but it is referenced in {} files (e.g., {}).", + ident, referenced_in.len(), referenced_in.iter().take(3).cloned().collect::>().join(", ") + )); + } + } + for ident in old_uppercase.difference(&new_uppercase) { + if doc_frequency(&db, ident) >= 10 { continue; } + let referenced_in = find_referenced(&db, ident); + if !referenced_in.is_empty() { + warnings.push(format!( + "ORPHANED REFERENCE: You removed '{}', but it is referenced in {} files (e.g., {}).", + ident, referenced_in.len(), referenced_in.iter().take(3).cloned().collect::>().join(", ") + )); + } + } + + if warnings.is_empty() { + json!({"status": "clean"}) + } else { + json!({"status": "warnings", "warnings": warnings}) + } +} + +/// Before reading a file, check for identifiers defined in this file that +/// are referenced elsewhere (warns about deletion/rename risk). +pub fn read_validated(index_path: &str, file_path: &str, content: &str) -> Value { + let db = match Connection::open(index_path) { + Ok(d) => d, + Err(_) => return json!({"file": file_path, "content": content}), + }; + if reliary_search::schema::open_existing_db(&db).is_err() { + return json!({"file": file_path, "content": content}); + } + + let mut def_refs: Vec<(String, i64, Vec)> = Vec::new(); + if let Ok(mut stmt) = db.prepare( + "SELECT p.phrase, po.flags + FROM phrase_occ po + JOIN phrases p ON p.id = po.phrase_id + JOIN file_map fm ON fm.id = po.file_id + WHERE fm.file_path = ?1", + ) { + if let Ok(rows) = stmt.query_map([file_path], |r| { + let phrase: String = r.get(0)?; + let flags: Vec = r.get::<_, Vec>(1).unwrap_or_default(); + Ok((phrase, flags)) + }) { + for row in rows.flatten() { + let (phrase, flags) = row; + if !is_interesting_ident(&phrase) { continue; } + let f = if !flags.is_empty() { flags[0] } else { 0 }; + let is_def = reliary_search::schema::unpack_is_def(f); + if is_def < 1 { continue; } + // Skip all-uppercase identifiers (likely constants) + if phrase.chars().all(|c| c.is_uppercase() || c == '_') { continue; } + // Count references in other files + if let Ok(mut ref_stmt) = db.prepare( + "SELECT COUNT(*) + FROM phrase_occ po + JOIN phrases p ON p.id = po.phrase_id + JOIN file_map fm ON fm.id = po.file_id + WHERE p.phrase = ?1 AND fm.file_path != ?2", + ) { + if let Ok(count) = ref_stmt.query_row([&phrase, file_path], |r| r.get::<_, i64>(0)) { + if count > 0 { + if let Ok(mut name_stmt) = db.prepare( + "SELECT DISTINCT fm.file_path + FROM phrase_occ po + JOIN phrases p ON p.id = po.phrase_id + JOIN file_map fm ON fm.id = po.file_id + WHERE p.phrase = ?1 AND fm.file_path != ?2 + LIMIT 5", + ) { + let refs: Vec = name_stmt + .query_map([&phrase, file_path], |r| r.get::<_, String>(0)) + .into_iter() + .flatten() + .flatten() + .collect(); + def_refs.push((phrase, count, refs)); + } + } + } + } + } + } + } + + let mut warnings: Vec = Vec::new(); + def_refs.sort_by_key(|b| std::cmp::Reverse(b.1)); + def_refs.truncate(5); + for (phrase, count, refs) in &def_refs { + let preview = refs.iter().take(3).cloned().collect::>().join(", "); + warnings.push(format!( + "ORPHAN RISK: '{}' referenced by {} file(s) (e.g., {}). Do not delete or rename without updating callers.", + phrase, count, preview + )); + } + + let mut result = json!({"file": file_path, "content": content}); + if !warnings.is_empty() { + result["warnings"] = json!(warnings); + result["status"] = json!("warnings_detected"); + } else { + result["status"] = json!("clean"); + } + result +} + +#[cfg(test)] +mod tests { + use super::*; + + fn test_index_path() -> String { + // Find working index + for dir in &["crates/.reliary/index.sqlite", ".reliary/index.sqlite"] { + let p = format!("/home/john/src/reliary-agent/{}", dir); + if std::path::Path::new(&p).exists() { + let count: i64 = rusqlite::Connection::open(&p) + .and_then(|db| db.query_row("SELECT COUNT(*) FROM file_map", [], |r| r.get(0))) + .unwrap_or(0); + if count > 0 { + return p; + } + } + } + panic!("no working index found") + } + + #[test] + fn test_check_diff_orphan_detected() { + let path = test_index_path(); + // Use dummy content — all old identifiers will be "removed", triggering orphan + let content = "fn z_no_identif() {}"; + let result = check_diff(&path, "crates/reliary-agent/src/proxy.rs", content); + let warnings = result["warnings"].as_array().map(|a| a.len()).unwrap_or(0); + let status = result["status"].as_str().unwrap_or("error"); + let warn_texts: Vec = result["warnings"].as_array() + .map(|a| a.iter().take(5).map(|w| w.as_str().unwrap_or("").to_string()).collect()) + .unwrap_or_default(); + println!("ORPHAN CHECK: status={}, warnings={}", status, warnings); + for w in &warn_texts { + println!(" {}", w); + } + assert!(warnings > 0, "Should detect orphaned references, got 0 warnings"); + } + + #[test] + fn test_check_diff_clean_edit() { + let path = test_index_path(); + let content = "// same identifiers — no changes"; + let result = check_diff(&path, "crates/reliary-agent/src/proxy.rs", content); + let status = result["status"].as_str().unwrap_or("error"); + println!("CLEAN CHECK: status={}", status); + assert!(true); + } +} diff --git a/crates/reliary-agent/src/main.rs b/crates/reliary-agent/src/main.rs index a0e9e22..36b23e7 100644 --- a/crates/reliary-agent/src/main.rs +++ b/crates/reliary-agent/src/main.rs @@ -16,6 +16,7 @@ mod init; mod ux; mod proxy; mod routes; +mod guard; use clap::{Parser, Subcommand}; use std::io::Read; diff --git a/crates/reliary-agent/src/proxy.rs b/crates/reliary-agent/src/proxy.rs index 3e31ec2..2b01216 100644 --- a/crates/reliary-agent/src/proxy.rs +++ b/crates/reliary-agent/src/proxy.rs @@ -3,7 +3,7 @@ use axum::{ Router, extract::Query, http::{HeaderMap, StatusCode, header}, - response::{sse::Sse, IntoResponse, Json, sse::Event}, + response::{IntoResponse, Json}, routing::{get, post}, }; use bytes::Bytes; @@ -318,6 +318,17 @@ fn truncate_tool_result(content: &str) -> String { format!("{} …[truncated {} chars]… {}", prefix, content.len() - 250, suffix) } +/// Sift-based tool result compression — uses reliary-output for structural collapse. +fn sift_compress_tool_result(content: &str) -> String { + if content.len() <= 200 { return content.to_string(); } + let compressed = reliary_output::compress_output(content); + if compressed.len() < content.len() { + compressed + } else { + content.to_string() + } +} + /// Compress all messages in the conversation history. fn compress_messages(messages: &mut Vec, state: &mut PerKeyState) -> (usize, usize) { let total = messages.len(); @@ -350,16 +361,25 @@ fn compress_messages(messages: &mut Vec, state: &mut PerKeyState) -> (usi } } } - "tool" | "toolResult" if age > 2 => { - // Dedup repeated file reads + "tool" | "toolResult" if age > 2 && age <= 4 => { + // Dedup repeated file reads, then zone-compress remaining if let Some(content) = messages[i].get("content").and_then(|c| c.as_str()) { - // Extract file path from content (heuristic: first line that looks like a path) + // Try dedup first let path = content.lines().find(|l| l.contains(".rs") || l.contains(".py") || l.contains(".js") || l.contains(".ts")) .unwrap_or("file"); - if let Some(deduped) = state.check_dedup(content, path) { - let saved = content.len().saturating_sub(deduped.len()); + let deduped = state.check_dedup(content, path); + if let Some(d) = deduped { + let saved = content.len().saturating_sub(d.len()); history_saved += saved; - messages[i]["content"] = Value::String(deduped); + messages[i]["content"] = Value::String(d); + } else { + // Not a file read — sift output compression + let compressed = sift_compress_tool_result(content); + if compressed.len() < content.len() { + let saved = content.len().saturating_sub(compressed.len()); + history_saved += saved; + messages[i]["content"] = Value::String(compressed); + } } } } @@ -443,6 +463,22 @@ async fn proxy_post( let is_streaming = payload.get("stream").and_then(|v| v.as_bool()).unwrap_or(false); + // Normalize roles: translate provider-specific roles to API-compatible + if let Some(messages) = payload.get_mut("messages").and_then(|m| m.as_array_mut()) { + for msg in messages.iter_mut() { + if let Some(role) = msg.get_mut("role") { + if let Some(r) = role.as_str() { + match r { + "developer" | "latest_reminder" => { + *role = Value::String("system".to_string()); + } + _ => {} + } + } + } + } + } + // Context filter: drop old tool results if let Some(messages) = payload.get_mut("messages").and_then(|m| m.as_array_mut()) { let mut turn_count = 0; @@ -527,14 +563,43 @@ async fn proxy_post( Ok(upstream_resp) => { if is_streaming { let byte_stream = upstream_resp.bytes_stream(); - let event_stream = byte_stream.map(|chunk| { - let data = match chunk { - Ok(b) => String::from_utf8_lossy(&b).to_string(), - Err(_) => "[error]".to_string(), - }; - Ok::(Event::default().data(data)) + let body_stream = byte_stream.map({ + let auth_key = auth_key.clone(); + let input_tokens = input_tokens; + let compressed_tokens = compressed_tokens; + let _aggressiveness = aggressiveness; + move |chunk| { + if let Ok(bytes) = &chunk { + let text = String::from_utf8_lossy(bytes); + // Log token usage from final SSE chunk + if text.contains("\"usage\"") { + // Extract prompt_tokens and completion_tokens + let pt = text.split("\"prompt_tokens\":").nth(1) + .and_then(|s| s.trim_start().split(|c: char| !c.is_ascii_digit()).next()) + .and_then(|s| s.parse::().ok()).unwrap_or(0); + let ct = text.split("\"completion_tokens\":").nth(1) + .and_then(|s| s.trim_start().split(|c: char| !c.is_ascii_digit()).next()) + .and_then(|s| s.parse::().ok()).unwrap_or(0); + let log_entry = serde_json::json!({ + "event": "stream_usage", + "auth_prefix": &auth_key[..auth_key.len().min(12)], + "input_tokens": input_tokens, + "compressed_tokens": compressed_tokens, + "prompt_tokens": pt, + "completion_tokens": ct, + }); + if let Ok(mut lf) = std::fs::OpenOptions::new() + .create(true).append(true).open("/tmp/reliary_proxy.jsonl") + { + use std::io::Write; + let _ = writeln!(lf, "{}", log_entry); + } + } + } + Ok::(chunk.unwrap_or_else(|_| Bytes::from("[error]\n"))) + } }); - let mut resp = Sse::new(event_stream).into_response(); + let mut resp = axum::response::Response::new(axum::body::Body::from_stream(body_stream)); resp.headers_mut().insert("content-type", header::HeaderValue::from_static("text/event-stream")); resp.headers_mut().insert("cache-control", header::HeaderValue::from_static("no-cache")); resp.headers_mut().insert("x-reliaty-input-tokens", header::HeaderValue::from_str(&token_hdr_input).unwrap()); @@ -542,19 +607,34 @@ async fn proxy_post( resp.headers_mut().insert("x-reliaty-savings-pct", header::HeaderValue::from_str(&token_hdr_savings).unwrap()); resp.headers_mut().insert("x-reliaty-history-saved", header::HeaderValue::from_str(&hdr_history_saved).unwrap()); resp.headers_mut().insert("x-reliaty-aggressiveness", header::HeaderValue::from_str(&hdr_aggr).unwrap()); - // Update adaptive policy - if let Ok(mut guard) = PER_KEY_STATE.lock() { - if let Some(st) = guard.get_mut(&auth_key) { - st.policy.update(body_bytes.len()); - } - } - resp.into_response() + resp } else { match upstream_resp.bytes().await { Ok(bytes) => { let body_str = String::from_utf8_lossy(&bytes).to_string(); store_response(&auth_key, &String::from_utf8_lossy(&body_bytes), &body_str); - let (final_body, resp_saved, resp_pct) = compress_response_body(&body_str); + let (final_body, resp_saved, _) = compress_response_body(&body_str); + + // Log per-request token data for benchmarking + if let Ok(mut log_fh) = std::fs::OpenOptions::new() + .create(true).append(true).open("/tmp/reliary_proxy.jsonl") + { + use std::io::Write; + let log_entry = serde_json::json!({ + "event": "proxy_response", + "auth_prefix": &auth_key[..auth_key.len().min(12)], + "input_tokens": input_tokens, + "compressed_tokens": compressed_tokens, + "history_saved": history_saved, + "aggressiveness": aggressiveness, + "response_saved": resp_saved, + "timestamp": std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_secs()).unwrap_or(0), + }); + let _ = writeln!(log_fh, "{}", log_entry); + } + // Update adaptive policy with output length if let Ok(mut guard) = PER_KEY_STATE.lock() { if let Some(st) = guard.get_mut(&auth_key) { @@ -580,6 +660,84 @@ async fn proxy_post( } } +/// Try multiple relative path forms to match the index's stored paths. +fn resolve_index_paths(file_path: &str, root: &str) -> Vec { + let mut candidates = Vec::new(); + if file_path.starts_with(root) { + let rel = file_path[root.len() + 1..].trim_start_matches('/').to_string(); + candidates.push(rel.clone()); + if let Some(stripped) = rel.strip_prefix("crates/") { + candidates.push(stripped.to_string()); + } + } + if let Ok(cwd) = std::env::current_dir() { + let cwd_str = cwd.to_string_lossy().to_string(); + if file_path.starts_with(&cwd_str) { + let rel = file_path[cwd_str.len() + 1..].trim_start_matches('/').to_string(); + candidates.push(rel.clone()); + if let Some(stripped) = rel.strip_prefix("crates/") { + candidates.push(stripped.to_string()); + } + } + } + candidates.push(file_path.to_string()); + candidates +} + +/// GET /check-diff — check a proposed edit for structural issues. +async fn check_diff_handler(Query(params): Query>) -> String { + let file_path = params.get("file").map(|s| s.as_str()).unwrap_or(""); + let new_content = params.get("content").map(|s| s.as_str()).unwrap_or(""); + if file_path.is_empty() || new_content.is_empty() { + return "{\"error\": \"missing file or content param\"}".to_string(); + } + if let Some((root, index_path, _)) = crate::daemon::find_reliary_root(file_path) { + // Try multiple relative path forms to match index + let rel_paths = resolve_index_paths(file_path, &root); + // Try each, return first that produces warnings + for rp in &rel_paths { + let result = crate::guard::check_diff(&index_path, rp, new_content); + if result.get("status").and_then(|s| s.as_str()) != Some("clean") { + return serde_json::to_string(&result).unwrap_or_else(|_| "{\"error\": \"serialization failed\"}".to_string()); + } + } + // All returned clean — return the first + let result = crate::guard::check_diff(&index_path, &rel_paths[0], new_content); + serde_json::to_string(&result).unwrap_or_else(|_| "{\"error\": \"serialization failed\"}".to_string()) + } else { + "{\"error\": \"no .reliary index\"}".to_string() + } +} + +/// GET /read-validated — warn about externally-referenced identifiers before editing. +async fn read_validated_handler(Query(params): Query>) -> String { + let file_path = params.get("file").map(|s| s.as_str()).unwrap_or(""); + if file_path.is_empty() { + return "{\"error\": \"missing file param\"}".to_string(); + } + if let Some((root, index_path, _)) = crate::daemon::find_reliary_root(file_path) { + use std::io::Read; + let rel_paths = resolve_index_paths(file_path, &root); + let rel_path = rel_paths.first().map(|s| s.as_str()).unwrap_or(file_path); + let full_path = std::path::Path::new(&root).join(rel_path); + let mut content = String::new(); + if let Ok(mut f) = std::fs::File::open(&full_path) { + let _ = f.read_to_string(&mut content); + } + // Try each path form + for rp in &rel_paths { + let result = crate::guard::read_validated(&index_path, rp, &content); + if result.get("status").and_then(|s| s.as_str()) != Some("clean") { + return serde_json::to_string(&result).unwrap_or_else(|_| "{\"error\": \"serialization failed\"}".to_string()); + } + } + let result = crate::guard::read_validated(&index_path, &rel_paths[0], &content); + serde_json::to_string(&result).unwrap_or_else(|_| "{\"error\": \"serialization failed\"}".to_string()) + } else { + "{\"error\": \"no .reliary index\"}".to_string() + } +} + // ── Startup ── pub async fn start(port: u16, daemon_state: Option>) -> Result<(), String> { @@ -627,6 +785,8 @@ pub async fn start(port: u16, daemon_state: Option Option { - // Pi provider configs + // 1. Local proxy-routes.json (explicit user override, highest priority) + if let Some(url) = scan_proxy_routes(auth_key) { + return Some(url); + } + // 2. Pi provider configs if let Some(url) = scan_pi_configs(auth_key) { return Some(url); } - // Environment variables (generic fallback) + // 3. Environment variables (generic fallback) if let Some(url) = scan_env_vars(auth_key) { return Some(url); } None } +/// Scan ~/.reliary/proxy-routes.json for explicit auth→upstream mappings. +fn scan_proxy_routes(auth_key: &str) -> Option { + let routes_path = home_dir().join(".reliary/proxy-routes.json"); + let content = std::fs::read_to_string(routes_path).ok()?; + let routes: std::collections::HashMap = + serde_json::from_str(&content).ok()?; + routes.get(auth_key).cloned() +} + /// Scan Pi's ~/.pi/agent/models.json for provider API keys matching the auth key. fn scan_pi_configs(auth_key: &str) -> Option { let pi_config = home_dir().join(".pi/agent/models.json"); @@ -44,7 +57,7 @@ fn scan_env_vars(auth_key: &str) -> Option { let env_key_map: [(&str, &str); 4] = [ ("ANTHROPIC_API_KEY", "https://api.anthropic.com/v1/messages"), ("OPENAI_API_KEY", "https://api.openai.com/v1/chat/completions"), - ("DEEPSEEK_API_KEY", "https://api.deepinfra.com/v1/openai/chat/completions"), + ("DEEPSEEK_API_KEY", "https://api.deepseek.com/v1/chat/completions"), ("RELIARY_UPSTREAM_URL", ""), // Direct URL override, checked below ]; diff --git a/crates/reliary-output/Cargo.toml b/crates/reliary-output/Cargo.toml new file mode 100644 index 0000000..dff3a08 --- /dev/null +++ b/crates/reliary-output/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "reliary-output" +version.workspace = true +edition.workspace = true + +[dependencies] +regex = "1.12.4" diff --git a/crates/reliary-output/src/classify.rs b/crates/reliary-output/src/classify.rs new file mode 100644 index 0000000..bf278ab --- /dev/null +++ b/crates/reliary-output/src/classify.rs @@ -0,0 +1,140 @@ +/// Command-output line classification + skeleton normalization. +/// Ported from sift src/classify.rs (grammar-free structural compression). + +use regex::Regex; + +#[derive(Debug, Clone, PartialEq)] +pub enum OutputLineType { + Blank, + Separator, + Progress, + Error, + Warning, + Summary, + PrefixLine { prefix: String, body: String }, + Code, +} + +#[derive(Debug, Clone)] +pub struct OutputLine { + pub text: String, + pub line_type: OutputLineType, + pub skeleton: String, +} + +struct Patterns { + ansi: Regex, + uuid: Regex, + hex40: Regex, + version: Regex, + numbers: Regex, + timestamp: Regex, + progress: Regex, + error_starts: [&'static str; 5], + warning_starts: [&'static str; 2], + test_header: Regex, + separator_re: Regex, +} + +static PATTERNS: std::sync::LazyLock = std::sync::LazyLock::new(|| Patterns { + ansi: Regex::new(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~]|\])").unwrap(), + uuid: Regex::new(r"(?i)[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}").unwrap(), + hex40: Regex::new(r"(?i)\b[0-9a-f]{7,40}\b").unwrap(), + version: Regex::new(r"\b\d+\.\d+(?:\.\d+(?:-\w+(?:\.\d+)?)?)?\b").unwrap(), + numbers: Regex::new(r"\b\d+\b").unwrap(), + timestamp: Regex::new(r"\d{2}:\d{2}:\d{2}(?:[.,]\d{3,})?").unwrap(), + progress: Regex::new(r"^\s*(?:Compiling|Checking|Building|Linking|Running|Processing|Generating)\s").unwrap(), + error_starts: ["error:", "error[", "Error:", "FAILED", "thread '"], + warning_starts: ["warning:", "Warning:"], + test_header: Regex::new(r"^(?:running|test|tests? result)").unwrap(), + separator_re: Regex::new(r"^\s*[-=*_.~]{3,}\s*$").unwrap(), +}); + +/// Strip ANSI escape sequences from text. +pub fn strip_ansi(text: &str) -> String { + PATTERNS.ansi.replace_all(text, "").to_string() +} + +/// Normalize concrete values to structural skeletons. +pub fn skeleton(text: &str) -> String { + let cleaned = strip_ansi(text); + let s = PATTERNS.uuid.replace_all(&cleaned, "{uuid}"); + let s = PATTERNS.hex40.replace_all(&s, "{hash}"); + let s = PATTERNS.version.replace_all(&s, "{ver}"); + let s = PATTERNS.timestamp.replace_all(&s, "{time}"); + let s = PATTERNS.progress.replace_all(&s, "{progress}"); + let s = PATTERNS.numbers.replace_all(&s, "{n}"); + s.to_string() +} + +/// Classify a single line of command output. +pub fn classify_output_line(_line: &str, trimmed: &str) -> OutputLineType { + if trimmed.is_empty() { + return OutputLineType::Blank; + } + + if PATTERNS.separator_re.is_match(trimmed) { + return OutputLineType::Separator; + } + + for pat in &PATTERNS.error_starts { + if trimmed.starts_with(pat) { + return OutputLineType::Error; + } + } + + for pat in &PATTERNS.warning_starts { + if trimmed.starts_with(pat) { + return OutputLineType::Warning; + } + } + + let lower = trimmed.to_lowercase(); + if (lower.contains("passed") && lower.contains("failed")) + || trimmed.starts_with("test result:") + || trimmed.starts_with("Finished ") + || trimmed.starts_with(" --> ") + || trimmed.starts_with("error[") + { + return OutputLineType::Summary; + } + + if PATTERNS.test_header.is_match(trimmed) { + return OutputLineType::Summary; + } + + if PATTERNS.progress.is_match(trimmed) { + return OutputLineType::Progress; + } + + // Prefix detection: leading word followed by separator + if let Some(pos) = trimmed.find(|c: char| c == '>' || c == ' ') { + let prefix = &trimmed[..pos]; + if (5..=20).contains(&prefix.len()) { + let after = &trimmed[pos..].trim_start(); + if after.len() > 5 { + return OutputLineType::PrefixLine { + prefix: prefix.to_string(), + body: after.to_string(), + }; + } + } + } + + OutputLineType::Code +} + +/// Classify all lines in command output. +pub fn classify_output(text: &str) -> Vec { + text.lines().enumerate().map(|(_i, line)| { + let cleaned = strip_ansi(line); + let trimmed = cleaned.trim(); + let lt = classify_output_line(&cleaned, trimmed); + let skel = skeleton(&cleaned); + OutputLine { + text: cleaned, + line_type: lt, + skeleton: skel, + } + }).collect() +} diff --git a/crates/reliary-output/src/collapse.rs b/crates/reliary-output/src/collapse.rs new file mode 100644 index 0000000..21b4a46 --- /dev/null +++ b/crates/reliary-output/src/collapse.rs @@ -0,0 +1,295 @@ +/// Collapse repetitive command-output patterns. +/// Ported from sift src/filter.rs (grammar-free structural compression). + +use crate::classify::*; + +/// Compress command output: classify + collapse runs + format. +pub fn compress_output(text: &str) -> String { + if text.len() <= 200 { + return text.to_string(); + } + + let lines = classify_output(text); + let total = lines.len(); + if total <= 15 { + return text.to_string(); + } + + // Phase 1: Extract error blocks (merge adjacent errors) + let merged = merge_error_blocks(&lines); + + // Phase 2: Collapse prefix runs (repeated Compiling/Checking lines) + let collapsed = collapse_prefix_runs(&merged); + + // Phase 3: Collapse OK runs (consecutive passing test lines) + let result = collapse_ok_lines(&collapsed); + + // Phase 4: Blank line collapse + join + let final_result = collapse_blanks(&result); + + if final_result.len() < text.len() { + final_result + } else { + text.to_string() + } +} + +/// Merge adjacent error lines into blocks. +/// Preserves the first error line (diagnostic), summarizes the rest. +fn merge_error_blocks(lines: &[OutputLine]) -> Vec { + let mut result: Vec = Vec::new(); + let mut in_block = false; + let mut block_lines: Vec = Vec::new(); + + for line in lines { + let is_error = line.line_type == OutputLineType::Error + || line.line_type == OutputLineType::Warning + || line.line_type == OutputLineType::Summary + && line.text.contains(" --> "); + if is_error { + in_block = true; + block_lines.push(line.text.clone()); + } else { + if in_block && block_lines.len() > 1 { + // Preserve first error line, summarize rest + result.push(block_lines[0].clone()); + result.push(format!(" [error: {} additional lines]", block_lines.len() - 1)); + } else if in_block && block_lines.len() == 1 { + result.push(block_lines[0].clone()); + } + in_block = false; + block_lines.clear(); + result.push(line.text.clone()); + } + } + if in_block && block_lines.len() > 1 { + result.push(block_lines[0].clone()); + result.push(format!(" [error: {} additional lines]", block_lines.len() - 1)); + } else if in_block && block_lines.len() == 1 { + result.push(block_lines[0].clone()); + } + + result +} + +/// Collapse 3+ consecutive lines sharing the same compilation/checking prefix. +fn collapse_prefix_runs(lines: &[String]) -> Vec { + let mut result: Vec = Vec::new(); + let mut i = 0; + + while i < lines.len() { + let trimmed = lines[i].trim(); + + let is_progress = trimmed.starts_with("Compiling") + || trimmed.starts_with("Checking") + || trimmed.starts_with("Building") + || trimmed.starts_with("Linking") + || trimmed.starts_with("Running"); + + if is_progress { + let prefix = trimmed.split_whitespace().next().unwrap_or("").to_string(); + let mut count = 1; + let mut j = i + 1; + while j < lines.len() { + let nt = lines[j].trim(); + if nt.split_whitespace().next().unwrap_or("") == prefix { + count += 1; + j += 1; + } else { + break; + } + } + if count >= 3 { + result.push(format!("[{} {} ...] ({} lines)", count, prefix, count)); + i = j; + continue; + } + } + + let is_ok = trimmed.contains("... ok") || trimmed == "ok"; + if is_ok { + let mut count = 1; + let mut j = i + 1; + while j < lines.len() { + let nt = lines[j].trim(); + if nt.contains("... ok") || nt == "ok" { + count += 1; + j += 1; + } else { + break; + } + } + if count >= 3 { + result.push(format!("[{} ok]", count)); + i = j; + continue; + } + } + + result.push(lines[i].clone()); + i += 1; + } + result +} + +/// Collapse consecutive ok lines. +fn collapse_ok_lines(lines: &[String]) -> Vec { + let mut result: Vec = Vec::new(); + let mut i = 0; + + while i < lines.len() { + let trimmed = lines[i].trim(); + if trimmed == "ok" || trimmed.ends_with("... ok") { + let mut count = 1; + let mut j = i + 1; + while j < lines.len() { + let nt = lines[j].trim(); + if nt == "ok" || nt.ends_with("... ok") { + count += 1; + j += 1; + } else { + break; + } + } + if count >= 3 { + result.push(format!("[{} ok]", count)); + i = j; + continue; + } + } + result.push(lines[i].clone()); + i += 1; + } + result +} + +/// Collapse consecutive blank lines to single blank. +fn collapse_blanks(lines: &[String]) -> String { + let mut result = String::new(); + let mut prev_blank = false; + for line in lines { + let is_blank = line.trim().is_empty(); + if is_blank && prev_blank { continue; } + if !result.is_empty() { result.push('\n'); } + result.push_str(line); + prev_blank = is_blank; + } + result +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_collapse_ok_lines_basic() { + let input = vec![ + "test parse_empty ... ok".to_string(), + "test parse_single ... ok".to_string(), + "test parse_multiple ... ok".to_string(), + ]; + let r = collapse_ok_lines(&input); + assert_eq!(r.len(), 1); + assert!(r[0].contains("3 ok")); + } + + #[test] + fn test_collapse_prefix_runs_compiling() { + let input = vec![ + " Compiling foo v0.1.0".to_string(), + " Compiling bar v0.2.0".to_string(), + " Compiling baz v0.3.0".to_string(), + ]; + let r = collapse_prefix_runs(&input); + assert_eq!(r.len(), 1); + assert!(r[0].contains("3 Compiling")); + } + + #[test] + fn test_compress_output_cargo_output() { + let mut text = String::new(); + for i in 0..10 { + text.push_str(&format!(" Compiling crate{} v0.1.0\n", i)); + } + text.push_str(" Finished dev\n"); + for i in 0..8 { + text.push_str(&format!("test test_{} ... ok\n", i)); + } + text.push_str("test result: ok. 8 passed\n"); + + let compressed = compress_output(&text); + assert!(compressed.len() < text.len(), "should compress: {} < {}", compressed.len(), text.len()); + assert!(compressed.contains("Compiling"), "should mention compiling"); + } + + #[test] + fn test_compress_output_short_pass_through() { + let short = "hello world"; + assert_eq!(compress_output(short), short); + } + + #[test] + fn test_compress_output_preserves_errors() { + let mut text = String::new(); + for i in 0..30 { + text.push_str(&format!(" Compiling crate{} v0.1.0\n", i)); + } + text.push_str("test test_error ... FAILED\n"); + text.push_str("test result: FAILED. 30 passed, 1 failed\n"); + text.push_str("error[E0308]: mismatched types\n"); + text.push_str(" --> src/lib.rs:47\n"); + text.push_str(" = help: use to_string()\n"); + + let compressed = compress_output(&text); + assert!(compressed.contains("FAILED"), "FAILED should survive: {}", compressed); + assert!(compressed.contains("E0308"), "E0308 should survive: {}", compressed); + } +} + + #[test] + fn bench_compression_ratios() { + // Cargo build output (25 repeated Compiling lines) + let mut cargo = String::new(); + for i in 0..25 { + cargo.push_str(&format!(" Compiling crate{} v0.1.0 (build/{}-abc)\n", i, i)); + } + cargo.push_str(" Finished dev [unoptimized + debuginfo] in 2.34s\n"); + + // Test output (20 ok + 1 FAILED + error block) + let mut test = String::new(); + for i in 0..20 { + test.push_str(&format!("test test_{} ... ok\n", i)); + } + test.push_str("test test_error ... FAILED\n"); + test.push_str("test result: FAILED. 20 passed, 1 failed\n"); + test.push_str("error[E0308]: mismatched types\n"); + test.push_str(" --> src/lib.rs:47\n"); + test.push_str(" = help: use .to_string()\n"); + + // Small file content (not compressible) + let file = "fn parse() {}\nfn tokenize() {}\nfn eval() {}\n"; + + // Cargo build + let compressed_cargo = compress_output(&cargo); + let cargo_pct = (1.0 - compressed_cargo.len() as f64 / cargo.len() as f64) * 100.0; + println!("Cargo build: {} -> {} chars ({:.0}%)", cargo.len(), compressed_cargo.len(), cargo_pct); + assert!(compressed_cargo.contains("Compiling"), "should still mention compiling"); + + // Test output + let compressed_test = compress_output(&test); + let test_pct = (1.0 - compressed_test.len() as f64 / test.len() as f64) * 100.0; + println!("Test output: {} -> {} chars ({:.0}%)", test.len(), compressed_test.len(), test_pct); + assert!(compressed_test.contains("FAILED"), "FAILED should survive"); + assert!(compressed_test.contains("E0308"), "E0308 should survive"); + + // File content (too short to compress) + let compressed_file = compress_output(&file); + let file_pct = (1.0 - compressed_file.len() as f64 / file.len() as f64) * 100.0; + println!("File content: {} -> {} chars ({:.0}%)", file.len(), compressed_file.len(), file_pct); + assert_eq!(compressed_file.len(), file.len(), "short file should pass through"); + + println!("\n---"); + println!("Cargo compressed: {}", &compressed_cargo[..compressed_cargo.len().min(200)]); + println!(); + println!("Test compressed: {}", &compressed_test[..compressed_test.len().min(200)]); + } diff --git a/crates/reliary-output/src/lib.rs b/crates/reliary-output/src/lib.rs new file mode 100644 index 0000000..b302394 --- /dev/null +++ b/crates/reliary-output/src/lib.rs @@ -0,0 +1,4 @@ +pub mod classify; +pub mod collapse; + +pub use collapse::compress_output;