diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4b170dc..30e909c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -42,7 +42,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-24.04, macos-latest] + os: [ubuntu-24.04, macos-14] runs-on: ${{ matrix.os }} timeout-minutes: 15 steps: @@ -60,7 +60,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-24.04, macos-latest] + os: [ubuntu-24.04, macos-14] runs-on: ${{ matrix.os }} timeout-minutes: 15 steps: @@ -129,6 +129,10 @@ jobs: timeout-minutes: 10 steps: - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{ env.RUST_TOOLCHAIN }} + - uses: Swatinem/rust-cache@v2 - uses: EmbarkStudios/cargo-deny-action@v2 with: command: check diff --git a/Cargo.toml b/Cargo.toml index e07e696..471fc78 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,6 +50,14 @@ directories = "6.0" figment = { version = "0.10", features = ["toml", "env"] } # HTTP client +# Note on TLS feature: reqwest 0.13 exposes `rustls` (the pre-cooked rustls +# stack: aws-lc-rs crypto + rustls-platform-verifier for system root certs). +# A bundled-roots variant (`rustls-tls-webpki-roots`) existed in older +# reqwest releases but is not a 0.13.x feature; switching to a pure +# webpki-roots/ring stack would require `rustls-no-provider` plus manual +# rustls configuration in code, which is out of scope for the audit fix. +# The `rustls` feature is the appropriate pin for "no native TLS" while +# accepting platform-native cert verification. reqwest = { version = "0.13", default-features = false, features = ["rustls", "charset", "http2", "json", "stream"] } # URL parsing (loopback enforcement for ollama_host per SR-001) diff --git a/src/app.rs b/src/app.rs index d517989..887fd51 100644 --- a/src/app.rs +++ b/src/app.rs @@ -8,7 +8,6 @@ use std::path::PathBuf; use console::style; use dialoguer::{Confirm, Editor, Input, Select}; use globset::{Glob, GlobSetBuilder}; -use secrecy::ExposeSecret; use tokio::signal; use tokio::sync::mpsc; use tokio_util::sync::CancellationToken; @@ -84,12 +83,16 @@ impl App { } pub async fn run(&mut self) -> Result<()> { - // Setup Ctrl+C handler with CancellationToken + // Setup Ctrl+C handler with CancellationToken. + // On registration failure, log the error and still call cancel.cancel() + // so any running CancellationToken-aware task (e.g., LLM streams) gets + // an explicit shutdown signal. This matches the legacy `.ok()` behavior + // (which fell through to cancel on Err) while now surfacing the error + // via tracing instead of silently discarding it (audit F-025). let cancel = self.cancel_token.clone(); tokio::spawn(async move { if let Err(e) = signal::ctrl_c().await { warn!(error = %e, "failed to install Ctrl+C handler"); - return; } cancel.cancel(); }); @@ -625,22 +628,19 @@ impl App { None => { eprintln!("{} (no API key configured)", style("MISSING").red().bold()); } - Some(key) => { - let redacted = redact_api_key(key.expose_secret()); + Some(_key) => { // Build the provider to reach verify(). Construction itself // can fail (e.g. HTTP client build); keep that non-fatal so - // the rest of `doctor` still runs. + // the rest of `doctor` still runs. Do not echo any portion + // of the API key (even a partial suffix is correlatable + // across logs/screenshots). match llm::create_provider(&self.config) { Err(e) => { eprintln!("{}: {}", style("ERROR").red().bold(), e); } Ok(provider) => match provider.verify().await { Ok(()) => { - eprintln!( - "{} (key '{}')", - style("reachable").green().bold(), - redacted - ); + eprintln!("{}", style("reachable").green().bold()); } Err(e) => { eprintln!("{}: {}", style("ERROR").red().bold(), e); @@ -1570,23 +1570,6 @@ fi } } -/// Redact an API key for display, preserving only the last 4 characters. -/// -/// Used by `doctor` to confirm which key is in effect without leaking it into -/// logs or terminal scrollback. Keys shorter than 8 characters are fully -/// masked since partial disclosure would be a large fraction of the secret. -fn redact_api_key(key: &str) -> String { - const SUFFIX_LEN: usize = 4; - const MIN_LEN_FOR_SUFFIX: usize = 8; - - let char_count = key.chars().count(); - if char_count < MIN_LEN_FOR_SUFFIX { - return "****".to_string(); - } - let tail: String = key.chars().skip(char_count - SUFFIX_LEN).collect(); - format!("****{tail}") -} - #[cfg(test)] mod tests { use super::*; @@ -1612,25 +1595,4 @@ mod tests { assert!(result.contains("Make it shorter")); assert!(result.contains("Respond with ONLY the refined JSON object.")); } - - #[test] - fn redact_api_key_masks_full_key_when_short() { - assert_eq!(redact_api_key(""), "****"); - assert_eq!(redact_api_key("abc"), "****"); - assert_eq!(redact_api_key("abcdefg"), "****"); - } - - #[test] - fn redact_api_key_exposes_only_last_four() { - assert_eq!(redact_api_key("sk-proj-abcdef1234"), "****1234"); - assert_eq!(redact_api_key("abcdefgh"), "****efgh"); - } - - #[test] - fn redact_api_key_handles_multibyte_chars() { - // Ensure we don't slice through a UTF-8 codepoint boundary. - let key = "keyé🔑ABCD"; - let redacted = redact_api_key(key); - assert_eq!(redacted, "****ABCD"); - } } diff --git a/src/cli.rs b/src/cli.rs index dfe3cfb..91fc297 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -27,7 +27,7 @@ pub struct Cli { /// Allow proceeding after potential secrets are detected in staged changes. /// - /// ⚠ DANGER: Secrets are still scanned for. This flag does not bypass + /// DANGER: Secrets are still scanned for. This flag does not bypass /// detection; it only allows an interactive confirmation to continue after /// secrets are found. In non-interactive modes (for example --yes, /// --porcelain, or piped stdin), commitbee still fails closed instead of diff --git a/src/config.rs b/src/config.rs index 75fbf7a..b736442 100644 --- a/src/config.rs +++ b/src/config.rs @@ -767,6 +767,26 @@ fn validate_ollama_host_is_loopback(raw: &str) -> Result<()> { ))); } + // Reject paths, queries, and fragments. The Ollama provider treats this + // value as a base and appends `/api/...` via string concat; any non-empty + // path/query/fragment would either break the resulting URL or silently + // forward unintended segments to the upstream request. + if !matches!(url.path(), "" | "/") { + return Err(Error::Config(format!( + "ollama_host must not include a path component; got '{raw}'" + ))); + } + if url.query().is_some() { + return Err(Error::Config(format!( + "ollama_host must not include a query string; got '{raw}'" + ))); + } + if url.fragment().is_some() { + return Err(Error::Config(format!( + "ollama_host must not include a fragment; got '{raw}'" + ))); + } + Ok(()) } @@ -838,4 +858,22 @@ mod tests { let err = validate_ollama_host_is_loopback("http://").unwrap_err(); assert!(matches!(err, Error::Config(_))); } + + #[test] + fn loopback_rejects_path_component() { + let err = validate_ollama_host_is_loopback("http://localhost:11434/foo").unwrap_err(); + assert!(err.to_string().contains("path")); + } + + #[test] + fn loopback_rejects_query_string() { + let err = validate_ollama_host_is_loopback("http://localhost:11434?x=1").unwrap_err(); + assert!(err.to_string().contains("query")); + } + + #[test] + fn loopback_rejects_fragment() { + let err = validate_ollama_host_is_loopback("http://localhost:11434#frag").unwrap_err(); + assert!(err.to_string().contains("fragment")); + } } diff --git a/src/services/context.rs b/src/services/context.rs index 076593a..df29dcd 100644 --- a/src/services/context.rs +++ b/src/services/context.rs @@ -873,19 +873,28 @@ impl ContextBuilder { /// When false, the model is guided away from using "fix" type. fn detect_bug_evidence(changes: &StagedChanges) -> bool { changes.files.iter().any(|f| { - f.diff - .lines() - .filter(|l| l.starts_with('+') && !l.starts_with("+++")) - .any(|l| { - let lower = l[1..].to_lowercase(); - lower.contains("// fix") - || lower.contains("# fix") - || lower.contains("/* fix") - || lower.contains("// bug") - || lower.contains("# bug") - || lower.contains("fixme") - || lower.contains("hotfix") - }) + // Synthetic test diffs may omit `@@`; treat them as fully in-hunk. + let mut in_hunk = !f.diff.contains("@@"); + f.diff.lines().any(|line| { + if line.starts_with("@@") { + in_hunk = true; + return false; + } + if !in_hunk { + return false; + } + let Some(content) = line.strip_prefix('+') else { + return false; + }; + let lower = content.to_lowercase(); + lower.contains("// fix") + || lower.contains("# fix") + || lower.contains("/* fix") + || lower.contains("// bug") + || lower.contains("# bug") + || lower.contains("fixme") + || lower.contains("hotfix") + }) }) } @@ -958,10 +967,17 @@ impl ContextBuilder { } let call_pattern = format!("{}(", sym_name); + let mut in_hunk = !file.diff.contains("@@"); let has_call = file.diff.lines().any(|line| { - line.starts_with('+') - && !line.starts_with("+++") - && line.contains(&call_pattern) + if line.starts_with("@@") { + in_hunk = true; + return false; + } + in_hunk + && line.starts_with('+') + && line + .strip_prefix('+') + .is_some_and(|c| c.contains(&call_pattern)) }); if has_call { @@ -993,9 +1009,17 @@ impl ContextBuilder { let mut imports = Vec::new(); for file in &changes.files { + // Track `in_hunk` so an added/removed line whose content begins with + // `++` or `--` (rendered as `+++`/`---`) is not mistaken for a file + // header. Synthetic test diffs without `@@` are treated as + // fully in-hunk. + let mut in_hunk = !file.diff.contains("@@"); for line in file.diff.lines() { - // Skip diff headers - if line.starts_with("+++") || line.starts_with("---") { + if line.starts_with("@@") { + in_hunk = true; + continue; + } + if !in_hunk { continue; } // Detect added/removed import lines @@ -1069,8 +1093,16 @@ impl ContextBuilder { for file in &changes.files { let filename = file.path.file_name().and_then(|n| n.to_str()).unwrap_or(""); + // Track `in_hunk` so a real added line whose content begins with + // `++` is not mistaken for the `+++ b/path` file header. Synthetic + // test diffs without `@@` are treated as fully in-hunk. + let mut in_hunk = !file.diff.contains("@@"); for line in file.diff.lines() { - if line.starts_with("+++") { + if line.starts_with("@@") { + in_hunk = true; + continue; + } + if !in_hunk { continue; } let Some(content) = line.strip_prefix('+') else { @@ -1124,10 +1156,21 @@ impl ContextBuilder { filename, "Cargo.toml" | "package.json" | "pyproject.toml" | "go.mod" ) { + let mut in_hunk_dep = !file.diff.contains("@@"); let has_version_change = file.diff.lines().any(|l| { - l.starts_with('+') - && !l.starts_with("+++") - && (l.contains("version") || l.contains("\"^") || l.contains("\"~")) + if l.starts_with("@@") { + in_hunk_dep = true; + return false; + } + if !in_hunk_dep { + return false; + } + let Some(content) = l.strip_prefix('+') else { + return false; + }; + content.contains("version") + || content.contains("\"^") + || content.contains("\"~") }); if has_version_change { dep_update = true; @@ -1211,8 +1254,16 @@ impl ContextBuilder { for file in &changes.files { let name = file.path.file_name().and_then(|n| n.to_str()).unwrap_or(""); + // Track `in_hunk` so an added/removed line whose content begins with + // `++` or `--` is not skipped as a file-header. Synthetic test diffs + // without `@@` are treated as fully in-hunk. + let mut in_hunk = !file.diff.contains("@@"); for line in file.diff.lines() { - if line.starts_with("+++") || line.starts_with("---") { + if line.starts_with("@@") { + in_hunk = true; + continue; + } + if !in_hunk { continue; } let (is_added, content) = if let Some(rest) = line.strip_prefix('+') { diff --git a/src/services/git.rs b/src/services/git.rs index 8394c7c..eef67db 100644 --- a/src/services/git.rs +++ b/src/services/git.rs @@ -7,7 +7,7 @@ use std::path::{Path, PathBuf}; use std::sync::Arc; use tokio::process::Command; -use tracing::warn; +use tracing::{Instrument, warn}; use crate::domain::{ChangeStatus, DiffStats, FileCategory, FileChange, StagedChanges}; use crate::error::{Error, Result}; @@ -225,14 +225,14 @@ impl GitService { /// Concurrency ceiling for the `git show` subprocesses spawned by /// [`Self::fetch_file_contents`]. Each staged file spawns two processes - /// (staged + HEAD); capping at `cores * 2` (clamped to 16..=32) keeps - /// parallelism high on beefy machines without causing fork/FD pressure on - /// large stages. + /// (staged + HEAD). Scales as `cores * 2`, capped at 32 to keep parallelism + /// high on beefy machines without causing fork/FD pressure on large stages, + /// and floored at 2 so a single-core environment still makes progress. fn git_show_concurrency_limit() -> usize { let cores = std::thread::available_parallelism() .map(std::num::NonZeroUsize::get) .unwrap_or(4); - (cores * 2).clamp(16, 32) + (cores * 2).clamp(2, 32) } /// Fetch staged and HEAD content for multiple files concurrently, bounded @@ -252,18 +252,25 @@ impl GitService { let work_dir = Arc::clone(&work_dir); let semaphore = Arc::clone(&semaphore); let path = path.clone(); - set.spawn(async move { - // Semaphore is never closed, so acquire cannot fail. - let _permit = semaphore - .acquire_owned() - .await - .expect("git-show semaphore closed unexpectedly"); - let staged = - Self::fetch_git_show(&work_dir, &format!(":0:{}", path.display())).await; - let head = - Self::fetch_git_show(&work_dir, &format!("HEAD:{}", path.display())).await; - (path, staged, head) - }); + // Attach the path to the task's span so it propagates into the + // `warn!` log emitted by the JoinSet error arm below — without + // this, the path is unrecoverable from a `JoinError`. + let span = tracing::warn_span!("git_show", path = %path.display()); + set.spawn( + async move { + // Semaphore is never closed, so acquire cannot fail. + let _permit = semaphore + .acquire_owned() + .await + .expect("git-show semaphore closed unexpectedly"); + let staged = + Self::fetch_git_show(&work_dir, &format!(":0:{}", path.display())).await; + let head = + Self::fetch_git_show(&work_dir, &format!("HEAD:{}", path.display())).await; + (path, staged, head) + } + .instrument(span), + ); } let mut staged_map = HashMap::new(); diff --git a/src/services/safety.rs b/src/services/safety.rs index cf4f229..62c56d9 100644 --- a/src/services/safety.rs +++ b/src/services/safety.rs @@ -39,11 +39,13 @@ pub struct SecretPattern { pub fn build_patterns(custom: &[String], disabled: &[String]) -> Vec { let builtin = builtin_patterns(); - // Remove disabled patterns by name (case-insensitive match) + // Remove disabled patterns by name (case-insensitive match). Use a HashSet + // so the membership check is O(1) per pattern instead of O(N) `Vec::contains`. let mut patterns: Vec = if disabled.is_empty() { builtin.to_vec() } else { - let disabled_lower: Vec = disabled.iter().map(|s| s.to_lowercase()).collect(); + let disabled_lower: std::collections::HashSet = + disabled.iter().map(|s| s.to_lowercase()).collect(); builtin .iter() .filter(|p| !disabled_lower.contains(&p.name.to_lowercase())) @@ -334,10 +336,12 @@ pub fn scan_full_diff_with_patterns( continue; } - // Only check added lines - if !line.starts_with("+++") - && let Some(content) = line.strip_prefix('+') - { + // Only check added lines. `current_line` is only `Some` after parsing + // an `@@ ... @@` hunk header, so file-header lines (`+++ b/path`) are + // already filtered out above — no need for an extra `!starts_with("+++")` + // guard, which would also drop legitimate added lines whose content + // begins with `++`. + if let Some(content) = line.strip_prefix('+') { for pat in patterns { if pat.regex.is_match(content) { found.push(SecretMatch { diff --git a/src/services/splitter.rs b/src/services/splitter.rs index 5a7f3c6..76b5c3e 100644 --- a/src/services/splitter.rs +++ b/src/services/splitter.rs @@ -166,19 +166,33 @@ impl CommitSplitter { /// specific content. Files with similar fingerprints likely received the same /// mechanical transformation. fn diff_fingerprint(file: &FileChange) -> DiffFingerprint { - let lines: Vec<&str> = file.diff.lines().collect(); - // Count structural line categories + // Count structural line categories. Track `in_hunk` after `@@` so we don't + // mistake an added/removed line whose content begins with `++` or `--` + // (rendered as `+++` / `---`) for a file-header line. Synthetic diffs in + // tests sometimes omit the `@@` header entirely; in that case treat the + // whole input as content from the start. let mut added = 0usize; let mut removed = 0usize; let mut hunk_headers = 0usize; + let mut added_lines: Vec<&str> = Vec::new(); + let mut removed_lines: Vec<&str> = Vec::new(); + let mut in_hunk = !file.diff.contains("@@"); - for line in &lines { + for line in file.diff.lines() { if line.starts_with("@@") { hunk_headers += 1; - } else if line.starts_with('+') && !line.starts_with("+++") { + in_hunk = true; + continue; + } + if !in_hunk { + continue; + } + if let Some(rest) = line.strip_prefix('+') { added += 1; - } else if line.starts_with('-') && !line.starts_with("---") { + added_lines.push(rest); + } else if let Some(rest) = line.strip_prefix('-') { removed += 1; + removed_lines.push(rest); } } @@ -195,16 +209,6 @@ impl CommitSplitter { // Detect if changes are purely whitespace/indentation restructuring: // lines that differ only in leading whitespace after stripping +/- let mut indent_only_changes = 0usize; - let added_lines: Vec<&str> = lines - .iter() - .filter(|l| !l.starts_with("+++")) - .filter_map(|l| l.strip_prefix('+')) - .collect(); - let removed_lines: Vec<&str> = lines - .iter() - .filter(|l| !l.starts_with("---")) - .filter_map(|l| l.strip_prefix('-')) - .collect(); for added_line in &added_lines { let trimmed = added_line.trim(); diff --git a/tests/history.rs b/tests/history.rs index cfc67a3..cf0e39d 100644 --- a/tests/history.rs +++ b/tests/history.rs @@ -327,11 +327,25 @@ fn prompt_section_percentage_calculation() { /// the test's own tempdir, and pre-supplies author/committer identity /// so commits succeed even when the host has no `user.email` set or /// has `commit.gpgsign=true` globally. +/// +/// Uses an empty file inside the tempdir as `GIT_CONFIG_GLOBAL` rather +/// than `/dev/null` so the helper works on Windows as well as Unix. +/// +/// Synchronous `std::process::Command` is used here because the helper +/// only runs inside test fixtures that build a tempdir-backed git repo +/// before `HistoryService::analyze` is invoked — never inside an async +/// hot path. The narrow `#[allow]` keeps the project-wide clippy rule +/// in force for production code (see clippy.toml). +#[allow(clippy::disallowed_methods)] fn hermetic_git(dir: &Path) -> Command { + let empty_global = dir.join(".git_config_global_empty"); + // `write` truncates if the file already exists, so calling + // `hermetic_git` repeatedly with the same dir is idempotent. + let _ = std::fs::write(&empty_global, ""); let mut cmd = Command::new("git"); cmd.current_dir(dir) .env("GIT_CONFIG_NOSYSTEM", "1") - .env("GIT_CONFIG_GLOBAL", "/dev/null") + .env("GIT_CONFIG_GLOBAL", &empty_global) .env("HOME", dir) .env("GIT_AUTHOR_NAME", "test") .env("GIT_AUTHOR_EMAIL", "test@example.com")