From f160f4063b0434586486f6750424834bffe36ebb Mon Sep 17 00:00:00 2001 From: fuhan666 Date: Wed, 22 Apr 2026 00:58:54 +0800 Subject: [PATCH 1/7] Implement check command result contract --- src/cli/args.rs | 32 ++- src/cli/check.rs | 397 +++++++++++++++++++++++++++-- src/cli/dispatch.rs | 26 +- src/cli/error.rs | 4 - src/main.rs | 4 +- src/ui/handlers.rs | 108 ++++---- tests/check_contract_test.rs | 271 ++++++++++++++++++++ tests/headless_inspect_test.rs | 38 --- tests/help_and_version_cli_test.rs | 21 ++ 9 files changed, 775 insertions(+), 126 deletions(-) create mode 100644 tests/check_contract_test.rs diff --git a/src/cli/args.rs b/src/cli/args.rs index 3739596..bdbc294 100644 --- a/src/cli/args.rs +++ b/src/cli/args.rs @@ -21,14 +21,22 @@ pub enum Commands { #[command(subcommand)] subcommand: ReadCommands, }, - /// Check data quality (namespace reserved for v1.3.0) + /// Check workbook or sheet data quality Check { /// Excel file path file: PathBuf, - /// Check rule to run + /// Sheet name (exact match) + #[arg(long)] + sheet: Option, + + /// Check rules to run, comma-separated #[arg(long)] - rule: Option, + rules: Option, + + /// Minimum finding severity to return + #[arg(long, value_enum, default_value = "info")] + severity_threshold: SeverityThreshold, }, /// Open interactive TUI browser Ui { @@ -304,6 +312,24 @@ impl OutputShape { } } +#[derive(Clone, Copy, Debug, Default, clap::ValueEnum, PartialEq, Eq)] +pub enum SeverityThreshold { + #[default] + Info, + Warning, + Error, +} + +impl SeverityThreshold { + pub fn as_str(&self) -> &'static str { + match self { + SeverityThreshold::Info => "info", + SeverityThreshold::Warning => "warning", + SeverityThreshold::Error => "error", + } + } +} + /// Resolve the sheet target (by name or index) to a sheet index. pub fn resolve_sheet_target( workbook: &crate::excel::Workbook, diff --git a/src/cli/check.rs b/src/cli/check.rs index b369a85..e51e64b 100644 --- a/src/cli/check.rs +++ b/src/cli/check.rs @@ -1,28 +1,395 @@ -use serde_json::Value; +use serde::Serialize; +use serde_json::{json, Value}; +use std::cmp::Ordering; +use std::collections::HashMap; +use std::path::{Path, PathBuf}; -use crate::cli::error::AppError; +use crate::cli::args::SeverityThreshold; +use crate::cli::envelope; +use crate::cli::error::{AppError, EXIT_CHECK_FINDINGS, EXIT_SUCCESS}; +use crate::excel::{open_workbook, Workbook}; -pub fn handle(file: std::path::PathBuf, rule: Option) -> Result { - let _format_str = file_format(&file); - let _path_str = file.to_string_lossy().to_string(); +const RULES: [CheckRuleId; 8] = [ + CheckRuleId::BlankHeaders, + CheckRuleId::DuplicateHeaders, + CheckRuleId::BlankRows, + CheckRuleId::BlankColumns, + CheckRuleId::NullRatio, + CheckRuleId::DuplicateValues, + CheckRuleId::TypeDrift, + CheckRuleId::FormulaPresence, +]; - // v1.0.0: check is namespace-only. Any attempt to run a rule is rejected. - let message = if let Some(r) = rule { - format!( - "Check rule '{}' is not implemented in v1.0.0. Quality checks are planned for v1.3.0.", - r - ) +pub fn handle( + file: PathBuf, + sheet: Option, + rules: Option, + severity_threshold: SeverityThreshold, +) -> Result<(Value, i32), AppError> { + let format_str = file_format(&file); + let path_str = file.to_string_lossy().to_string(); + + let mut workbook = + open_workbook(&file, false).map_err(crate::cli::error::anyhow_to_app_error)?; + let selected_rules = parse_rules(rules.as_deref())?; + let threshold = Severity::from_threshold(severity_threshold); + let checked_sheet_indices = resolve_checked_sheets(&workbook, sheet.as_deref())?; + + for index in &checked_sheet_indices { + let sheet_name = workbook.get_sheet_names()[*index].clone(); + workbook + .ensure_sheet_loaded(*index, &sheet_name) + .map_err(crate::cli::error::anyhow_to_app_error)?; + } + + let sheet_names = workbook.get_sheet_names(); + let mut findings = run_rules(&selected_rules, &checked_sheet_indices); + let finding_count_before_threshold = findings.len(); + findings.retain(|finding| finding.severity >= threshold); + sort_findings(&mut findings, &sheet_names); + + let data = json!({ + "summary": summarize_findings(&findings), + "stats": build_stats( + &workbook, + &checked_sheet_indices, + &selected_rules, + severity_threshold, + finding_count_before_threshold, + )?, + "findings": findings, + }); + + let target = if let Some(sheet_name) = sheet { + let sheet_index = checked_sheet_indices[0]; + envelope::target_sheet(&sheet_name, sheet_index) } else { - "Check command requires a --rule argument. Quality checks are planned for v1.3.0." - .to_string() + envelope::target_workbook() }; - Err(AppError::CheckNotImplemented { message }) + let exit_code = exit_code_for_findings( + data["summary"]["finding_count"] + .as_u64() + .unwrap_or_default() as usize, + ); + + Ok(( + envelope::success_envelope( + "check", + &path_str, + &format_str, + target, + json!({}), + data, + vec![], + ), + exit_code, + )) } -fn file_format(path: &std::path::Path) -> String { +fn file_format(path: &Path) -> String { path.extension() .and_then(|e| e.to_str()) .map(|e| e.to_lowercase()) .unwrap_or_else(|| "unknown".to_string()) } + +fn parse_rules(value: Option<&str>) -> Result, AppError> { + let Some(value) = value else { + return Ok(RULES.to_vec()); + }; + + let mut requested = Vec::new(); + for raw in value.split(',') { + let id = raw.trim(); + if id.is_empty() { + continue; + } + let rule = CheckRuleId::parse(id).ok_or_else(|| AppError::InvalidQuery { + message: format!( + "Unknown check rule '{}'. Supported rules: {}", + id, + RULES + .iter() + .map(CheckRuleId::as_str) + .collect::>() + .join(", ") + ), + })?; + if !requested.contains(&rule) { + requested.push(rule); + } + } + + if requested.is_empty() { + return Err(AppError::InvalidQuery { + message: "--rules must include at least one rule id".to_string(), + }); + } + + Ok(RULES + .iter() + .copied() + .filter(|rule| requested.contains(rule)) + .collect()) +} + +fn resolve_checked_sheets( + workbook: &Workbook, + sheet: Option<&str>, +) -> Result, AppError> { + if let Some(name) = sheet { + workbook + .resolve_sheet_by_name(name) + .map(|index| vec![index]) + .map_err(|e| AppError::TargetNotFound { + message: e.to_string(), + }) + } else { + Ok((0..workbook.get_sheet_names().len()).collect()) + } +} + +fn run_rules(_rules: &[CheckRuleId], _sheet_indices: &[usize]) -> Vec { + Vec::new() +} + +fn summarize_findings(findings: &[CheckFinding]) -> Value { + let error_count = findings + .iter() + .filter(|finding| finding.severity == Severity::Error) + .count(); + let warning_count = findings + .iter() + .filter(|finding| finding.severity == Severity::Warning) + .count(); + let info_count = findings + .iter() + .filter(|finding| finding.severity == Severity::Info) + .count(); + let finding_count = findings.len(); + + json!({ + "status": if finding_count == 0 { "pass" } else { "fail" }, + "finding_count": finding_count, + "error_count": error_count, + "warning_count": warning_count, + "info_count": info_count, + }) +} + +fn build_stats( + workbook: &Workbook, + checked_sheet_indices: &[usize], + rules: &[CheckRuleId], + severity_threshold: SeverityThreshold, + finding_count_before_threshold: usize, +) -> Result { + let checked_sheets: Result, AppError> = checked_sheet_indices + .iter() + .map(|index| { + let sheet = + workbook + .get_sheet_by_index(*index) + .ok_or_else(|| AppError::TargetNotFound { + message: format!("Sheet index {} not found", index), + })?; + let used_range = workbook + .get_used_range(*index) + .map_err(crate::cli::error::anyhow_to_app_error)?; + + Ok(json!({ + "name": sheet.name, + "index": index, + "used_range": used_range, + "max_rows": sheet.max_rows, + "max_cols": sheet.max_cols, + })) + }) + .collect(); + + Ok(json!({ + "sheet_count": workbook.get_sheet_names().len(), + "checked_sheet_count": checked_sheet_indices.len(), + "checked_sheets": checked_sheets?, + "rules_run": rules.iter().map(CheckRuleId::as_str).collect::>(), + "severity_threshold": severity_threshold.as_str(), + "finding_count_before_threshold": finding_count_before_threshold, + })) +} + +fn exit_code_for_findings(finding_count: usize) -> i32 { + if finding_count == 0 { + EXIT_SUCCESS + } else { + EXIT_CHECK_FINDINGS + } +} + +fn sort_findings(findings: &mut [CheckFinding], sheet_names: &[String]) { + let sheet_order: HashMap<&str, usize> = sheet_names + .iter() + .enumerate() + .map(|(index, name)| (name.as_str(), index)) + .collect(); + + findings.sort_by(|left, right| { + compare_usize( + sheet_order.get(left.sheet.as_str()).copied(), + sheet_order.get(right.sheet.as_str()).copied(), + ) + .then_with(|| left.rule_id.order().cmp(&right.rule_id.order())) + .then_with(|| compare_location(left.row, right.row)) + .then_with(|| compare_location(left.column, right.column)) + .then_with(|| left.range.cmp(&right.range)) + .then_with(|| left.message.cmp(&right.message)) + .then_with(|| left.details.to_string().cmp(&right.details.to_string())) + }); +} + +fn compare_location(left: Option, right: Option) -> Ordering { + match (left, right) { + (Some(left), Some(right)) => left.cmp(&right), + (Some(_), None) => Ordering::Less, + (None, Some(_)) => Ordering::Greater, + (None, None) => Ordering::Equal, + } +} + +fn compare_usize(left: Option, right: Option) -> Ordering { + match (left, right) { + (Some(left), Some(right)) => left.cmp(&right), + (Some(_), None) => Ordering::Less, + (None, Some(_)) => Ordering::Greater, + (None, None) => Ordering::Equal, + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize)] +#[serde(rename_all = "snake_case")] +enum CheckRuleId { + BlankHeaders, + DuplicateHeaders, + BlankRows, + BlankColumns, + NullRatio, + DuplicateValues, + TypeDrift, + FormulaPresence, +} + +impl CheckRuleId { + fn parse(value: &str) -> Option { + RULES.iter().copied().find(|rule| rule.as_str() == value) + } + + fn as_str(&self) -> &'static str { + match self { + CheckRuleId::BlankHeaders => "blank_headers", + CheckRuleId::DuplicateHeaders => "duplicate_headers", + CheckRuleId::BlankRows => "blank_rows", + CheckRuleId::BlankColumns => "blank_columns", + CheckRuleId::NullRatio => "null_ratio", + CheckRuleId::DuplicateValues => "duplicate_values", + CheckRuleId::TypeDrift => "type_drift", + CheckRuleId::FormulaPresence => "formula_presence", + } + } + + fn order(&self) -> usize { + RULES + .iter() + .position(|rule| rule == self) + .unwrap_or(usize::MAX) + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize)] +#[serde(rename_all = "lowercase")] +enum Severity { + Info, + Warning, + Error, +} + +impl Severity { + fn from_threshold(threshold: SeverityThreshold) -> Self { + match threshold { + SeverityThreshold::Info => Severity::Info, + SeverityThreshold::Warning => Severity::Warning, + SeverityThreshold::Error => Severity::Error, + } + } +} + +#[derive(Clone, Debug, Serialize)] +struct CheckFinding { + rule_id: CheckRuleId, + severity: Severity, + sheet: String, + row: Option, + column: Option, + range: Option, + message: String, + details: Value, +} + +#[cfg(test)] +mod tests { + use serde_json::json; + + use super::*; + use crate::cli::error::{EXIT_CHECK_FINDINGS, EXIT_SUCCESS}; + + #[test] + fn exit_code_uses_one_for_successful_reports_with_findings() { + assert_eq!(exit_code_for_findings(0), EXIT_SUCCESS); + assert_eq!(exit_code_for_findings(2), EXIT_CHECK_FINDINGS); + } + + #[test] + fn findings_sort_by_sheet_rule_position_then_location() { + let mut findings = vec![ + CheckFinding { + rule_id: CheckRuleId::DuplicateHeaders, + severity: Severity::Warning, + sheet: "Orders".to_string(), + row: Some(3), + column: Some(2), + range: Some("B3".to_string()), + message: "later".to_string(), + details: json!({"field": "customer"}), + }, + CheckFinding { + rule_id: CheckRuleId::BlankHeaders, + severity: Severity::Warning, + sheet: "Summary".to_string(), + row: None, + column: None, + range: None, + message: "workbook-level".to_string(), + details: json!({}), + }, + CheckFinding { + rule_id: CheckRuleId::BlankHeaders, + severity: Severity::Warning, + sheet: "Orders".to_string(), + row: Some(2), + column: Some(1), + range: Some("A2".to_string()), + message: "earlier".to_string(), + details: json!({}), + }, + ]; + + sort_findings( + &mut findings, + &["Summary".to_string(), "Orders".to_string()], + ); + + assert_eq!(findings[0].sheet, "Summary"); + assert_eq!(findings[1].rule_id, CheckRuleId::BlankHeaders); + assert_eq!(findings[1].row, Some(2)); + assert_eq!(findings[2].rule_id, CheckRuleId::DuplicateHeaders); + } +} diff --git a/src/cli/dispatch.rs b/src/cli/dispatch.rs index 7681345..55e1482 100644 --- a/src/cli/dispatch.rs +++ b/src/cli/dispatch.rs @@ -1,9 +1,9 @@ use serde_json::Value; -use crate::cli::args::{Cli, Commands}; -use crate::cli::error::AppError; +use crate::cli::args::{Cli, Commands, OutputFormat}; +use crate::cli::error::{AppError, EXIT_SUCCESS}; -pub fn dispatch(cli: Cli) -> Result<(Value, crate::cli::args::OutputFormat), AppError> { +pub fn dispatch(cli: Cli) -> Result<(Value, OutputFormat, i32), AppError> { match cli.command { Commands::Inspect { subcommand } => { let format = match &subcommand { @@ -14,7 +14,7 @@ pub fn dispatch(cli: Cli) -> Result<(Value, crate::cli::args::OutputFormat), App crate::cli::args::InspectCommands::Tables { format, .. } => format.clone(), }; let value = crate::cli::inspect::handle(subcommand)?; - Ok((value, format)) + Ok((value, format, EXIT_SUCCESS)) } Commands::Read { subcommand } => { let format = match &subcommand { @@ -24,12 +24,17 @@ pub fn dispatch(cli: Cli) -> Result<(Value, crate::cli::args::OutputFormat), App crate::cli::args::ReadCommands::Records { format, .. } => format.clone(), }; let value = crate::cli::read::handle(subcommand)?; - Ok((value, format)) + Ok((value, format, EXIT_SUCCESS)) } - Commands::Check { file, rule } => { - let value = crate::cli::check::handle(file, rule)?; - // check returns error always in v1.0.0, but we still need a format - Ok((value, crate::cli::args::OutputFormat::Json)) + Commands::Check { + file, + sheet, + rules, + severity_threshold, + } => { + let (value, exit_code) = + crate::cli::check::handle(file, sheet, rules, severity_threshold)?; + Ok((value, OutputFormat::Json, exit_code)) } Commands::Ui { file } => { let workbook = crate::excel::open_workbook(&file, false) @@ -47,7 +52,8 @@ pub fn dispatch(cli: Cli) -> Result<(Value, crate::cli::args::OutputFormat), App serde_json::json!({"status": "interactive"}), vec![], ), - crate::cli::args::OutputFormat::Json, + OutputFormat::Json, + EXIT_SUCCESS, )) } } diff --git a/src/cli/error.rs b/src/cli/error.rs index 0e9963a..12d59a3 100644 --- a/src/cli/error.rs +++ b/src/cli/error.rs @@ -20,7 +20,6 @@ pub enum AppError { TargetNotFound { message: String }, InvalidQuery { message: String }, InternalError { message: String }, - CheckNotImplemented { message: String }, } impl AppError { @@ -32,7 +31,6 @@ impl AppError { AppError::TargetNotFound { .. } => "target_not_found", AppError::InvalidQuery { .. } => "invalid_query", AppError::InternalError { .. } => "internal_error", - AppError::CheckNotImplemented { .. } => "check_not_implemented", } } @@ -44,7 +42,6 @@ impl AppError { AppError::TargetNotFound { .. } => EXIT_TARGET_NOT_FOUND, AppError::InvalidQuery { .. } => EXIT_INVALID_QUERY, AppError::InternalError { .. } => EXIT_INTERNAL_ERROR, - AppError::CheckNotImplemented { .. } => EXIT_INVALID_QUERY, } } @@ -56,7 +53,6 @@ impl AppError { AppError::TargetNotFound { message } => message.clone(), AppError::InvalidQuery { message } => message.clone(), AppError::InternalError { message } => message.clone(), - AppError::CheckNotImplemented { message } => message.clone(), } } diff --git a/src/main.rs b/src/main.rs index db269f6..0bb37bc 100644 --- a/src/main.rs +++ b/src/main.rs @@ -27,13 +27,13 @@ fn main() { let result = dispatch::dispatch(cli); match result { - Ok((value, format)) => { + Ok((value, format, exit_code)) => { if let Err(e) = output::write_success(&value, &format) { let envelope = e.to_envelope("", "", "unknown"); output::write_error(&envelope); std::process::exit(e.exit_code()); } - std::process::exit(0); + std::process::exit(exit_code); } Err(e) => { // Try to extract file/command info for the error envelope diff --git a/src/ui/handlers.rs b/src/ui/handlers.rs index 3fb6ac1..7d971f3 100644 --- a/src/ui/handlers.rs +++ b/src/ui/handlers.rs @@ -273,60 +273,6 @@ fn handle_editing_mode(app_state: &mut AppState, key: KeyEvent) { } } -#[cfg(test)] -mod tests { - use crossterm::event::{KeyCode, KeyEvent, KeyModifiers}; - use std::path::PathBuf; - - use super::handle_key_event; - use crate::app::{AppState, InputMode}; - use crate::excel::{Cell, Sheet, Workbook}; - - fn app_with_preview() -> AppState<'static> { - let mut data = vec![vec![Cell::empty(); 2]; 2]; - data[1][1] = Cell::new("Ada".to_string(), false); - let sheet = Sheet { - name: "Data".to_string(), - data, - max_rows: 1, - max_cols: 1, - is_loaded: true, - }; - let mut app = AppState::new( - Workbook::from_sheets_for_test(vec![sheet]), - PathBuf::from("test.xlsx"), - ) - .unwrap(); - app.show_query_preview(); - app - } - - #[test] - fn escape_closes_preview_without_quitting() { - let mut app = app_with_preview(); - - handle_key_event(&mut app, KeyEvent::new(KeyCode::Esc, KeyModifiers::empty())); - - assert!(matches!(app.input_mode, InputMode::Normal)); - assert!(app.query_preview.is_none()); - assert!(!app.should_quit); - } - - #[test] - fn q_closes_preview_without_quitting() { - let mut app = app_with_preview(); - - handle_key_event( - &mut app, - KeyEvent::new(KeyCode::Char('q'), KeyModifiers::empty()), - ); - - assert!(matches!(app.input_mode, InputMode::Normal)); - assert!(app.query_preview.is_none()); - assert!(!app.should_quit); - } -} - fn handle_search_mode(app_state: &mut AppState, key_code: KeyCode) { match key_code { KeyCode::Enter => app_state.execute_search(), @@ -453,3 +399,57 @@ fn handle_help_mode(app_state: &mut AppState, key_code: KeyCode) { _ => {} } } + +#[cfg(test)] +mod tests { + use crossterm::event::{KeyCode, KeyEvent, KeyModifiers}; + use std::path::PathBuf; + + use super::handle_key_event; + use crate::app::{AppState, InputMode}; + use crate::excel::{Cell, Sheet, Workbook}; + + fn app_with_preview() -> AppState<'static> { + let mut data = vec![vec![Cell::empty(); 2]; 2]; + data[1][1] = Cell::new("Ada".to_string(), false); + let sheet = Sheet { + name: "Data".to_string(), + data, + max_rows: 1, + max_cols: 1, + is_loaded: true, + }; + let mut app = AppState::new( + Workbook::from_sheets_for_test(vec![sheet]), + PathBuf::from("test.xlsx"), + ) + .unwrap(); + app.show_query_preview(); + app + } + + #[test] + fn escape_closes_preview_without_quitting() { + let mut app = app_with_preview(); + + handle_key_event(&mut app, KeyEvent::new(KeyCode::Esc, KeyModifiers::empty())); + + assert!(matches!(app.input_mode, InputMode::Normal)); + assert!(app.query_preview.is_none()); + assert!(!app.should_quit); + } + + #[test] + fn q_closes_preview_without_quitting() { + let mut app = app_with_preview(); + + handle_key_event( + &mut app, + KeyEvent::new(KeyCode::Char('q'), KeyModifiers::empty()), + ); + + assert!(matches!(app.input_mode, InputMode::Normal)); + assert!(app.query_preview.is_none()); + assert!(!app.should_quit); + } +} diff --git a/tests/check_contract_test.rs b/tests/check_contract_test.rs new file mode 100644 index 0000000..610a65a --- /dev/null +++ b/tests/check_contract_test.rs @@ -0,0 +1,271 @@ +use std::path::{Path, PathBuf}; +use std::process::{Command, Output}; + +use serde_json::{json, Value}; + +fn excel_cli_bin() -> PathBuf { + PathBuf::from(env!("CARGO_BIN_EXE_excel-cli")) +} + +fn create_check_workbook(path: &Path) { + use rust_xlsxwriter::Workbook as XlsxWorkbook; + + let mut workbook = XlsxWorkbook::new(); + + let summary = workbook.add_worksheet(); + summary.set_name("Summary").unwrap(); + summary.write_string(0, 0, "Total").unwrap(); + summary.write_number(0, 1, 1234.5).unwrap(); + + let orders = workbook.add_worksheet(); + orders.set_name("Orders").unwrap(); + orders.write_string(0, 0, "order_id").unwrap(); + orders.write_string(0, 1, "customer").unwrap(); + orders.write_string(1, 0, "1001").unwrap(); + orders.write_string(1, 1, "Alice").unwrap(); + + let customers = workbook.add_worksheet(); + customers.set_name("客户").unwrap(); + customers.write_string(0, 0, "姓名").unwrap(); + customers.write_string(1, 0, "张三").unwrap(); + + let empty = workbook.add_worksheet(); + empty.set_name("EmptySheet").unwrap(); + + workbook.save(path).unwrap(); +} + +fn temp_workbook(name: &str) -> PathBuf { + let path = std::env::temp_dir().join(name); + create_check_workbook(&path); + path +} + +fn run_check(args: &[&str]) -> Output { + Command::new(excel_cli_bin()) + .args(args) + .output() + .unwrap_or_else(|_| panic!("Failed to execute excel-cli {}", args.join(" "))) +} + +fn parse_stdout(output: &Output) -> Value { + serde_json::from_slice(&output.stdout).expect("stdout should be valid JSON") +} + +fn parse_stderr(output: &Output) -> Value { + serde_json::from_slice(&output.stderr).expect("stderr should be valid JSON") +} + +fn assert_success(output: &Output, code: i32) { + assert_eq!(output.status.code(), Some(code)); + assert!( + output.stderr.is_empty(), + "stderr should be empty: {}", + String::from_utf8_lossy(&output.stderr) + ); +} + +fn assert_json_error(output: &Output, code: i32, error_code: &str) -> Value { + assert_eq!(output.status.code(), Some(code)); + assert!( + output.stdout.is_empty(), + "stdout should be empty: {}", + String::from_utf8_lossy(&output.stdout) + ); + let err = parse_stderr(output); + assert_eq!(err["error"]["code"], error_code); + err +} + +#[test] +fn check_workbook_returns_stable_empty_report_contract() { + let file_path = temp_workbook("excel_cli_check_contract_workbook.xlsx"); + let output = Command::new(excel_cli_bin()) + .arg("check") + .arg(&file_path) + .output() + .expect("Failed to execute excel-cli check"); + + assert_success(&output, 0); + let json = parse_stdout(&output); + + assert_eq!(json["schema_version"], "1.0"); + assert_eq!(json["command"], "check"); + assert_eq!(json["file"]["path"], file_path.to_string_lossy().as_ref()); + assert_eq!(json["file"]["format"], "xlsx"); + assert_eq!(json["target"], json!({})); + assert_eq!(json["warnings"], json!([])); + + assert_eq!( + json["data"]["summary"], + json!({ + "status": "pass", + "finding_count": 0, + "error_count": 0, + "warning_count": 0, + "info_count": 0 + }) + ); + assert_eq!(json["data"]["findings"], json!([])); + assert_eq!(json["data"]["stats"]["sheet_count"], 4); + assert_eq!(json["data"]["stats"]["checked_sheet_count"], 4); + assert_eq!(json["data"]["stats"]["severity_threshold"], "info"); + assert_eq!(json["data"]["stats"]["finding_count_before_threshold"], 0); + assert_eq!( + json["data"]["stats"]["rules_run"], + json!([ + "blank_headers", + "duplicate_headers", + "blank_rows", + "blank_columns", + "null_ratio", + "duplicate_values", + "type_drift", + "formula_presence" + ]) + ); + assert_eq!( + json["data"]["stats"]["checked_sheets"], + json!([ + { + "name": "Summary", + "index": 0, + "used_range": "A1:B1", + "max_rows": 1, + "max_cols": 2 + }, + { + "name": "Orders", + "index": 1, + "used_range": "A1:B2", + "max_rows": 2, + "max_cols": 2 + }, + { + "name": "客户", + "index": 2, + "used_range": "A1:A2", + "max_rows": 2, + "max_cols": 1 + }, + { + "name": "EmptySheet", + "index": 3, + "used_range": "", + "max_rows": 0, + "max_cols": 0 + } + ]) + ); +} + +#[test] +fn check_sheet_accepts_rules_and_threshold_with_registry_order() { + let file_path = temp_workbook("excel_cli_check_contract_sheet.xlsx"); + let output = Command::new(excel_cli_bin()) + .arg("check") + .arg(&file_path) + .arg("--sheet") + .arg("客户") + .arg("--rules") + .arg("duplicate_headers,blank_headers") + .arg("--severity-threshold") + .arg("warning") + .output() + .expect("Failed to execute excel-cli check"); + + assert_success(&output, 0); + let json = parse_stdout(&output); + + assert_eq!(json["target"], json!({"sheet": "客户", "sheet_index": 2})); + assert_eq!(json["data"]["summary"]["status"], "pass"); + assert_eq!(json["data"]["findings"], json!([])); + assert_eq!(json["data"]["stats"]["sheet_count"], 4); + assert_eq!(json["data"]["stats"]["checked_sheet_count"], 1); + assert_eq!(json["data"]["stats"]["severity_threshold"], "warning"); + assert_eq!( + json["data"]["stats"]["rules_run"], + json!(["blank_headers", "duplicate_headers"]) + ); + assert_eq!( + json["data"]["stats"]["checked_sheets"], + json!([{ + "name": "客户", + "index": 2, + "used_range": "A1:A2", + "max_rows": 2, + "max_cols": 1 + }]) + ); +} + +#[test] +fn check_rejects_unknown_and_empty_rule_lists_as_query_errors() { + let file_path = temp_workbook("excel_cli_check_contract_invalid_rules.xlsx"); + let file_arg = file_path.to_string_lossy(); + + let unknown = run_check(&[ + "check", + file_arg.as_ref(), + "--rules", + "blank_headers,unknown_rule", + ]); + let err = assert_json_error(&unknown, 6, "invalid_query"); + assert!( + err["error"]["message"] + .as_str() + .unwrap() + .contains("unknown_rule"), + "unexpected error: {err}" + ); + + let empty = run_check(&["check", file_arg.as_ref(), "--rules", " , "]); + let err = assert_json_error(&empty, 6, "invalid_query"); + assert!( + err["error"]["message"] + .as_str() + .unwrap() + .contains("--rules"), + "unexpected error: {err}" + ); +} + +#[test] +fn check_rejects_missing_sheet_as_target_not_found() { + let file_path = temp_workbook("excel_cli_check_contract_missing_sheet.xlsx"); + let output = Command::new(excel_cli_bin()) + .arg("check") + .arg(&file_path) + .arg("--sheet") + .arg("Missing") + .output() + .expect("Failed to execute excel-cli check"); + + let err = assert_json_error(&output, 5, "target_not_found"); + assert!( + err["error"]["message"] + .as_str() + .unwrap() + .contains("Missing"), + "unexpected error: {err}" + ); +} + +#[test] +fn check_rejects_legacy_rule_flag_at_parser_level() { + let file_path = temp_workbook("excel_cli_check_contract_legacy_rule.xlsx"); + let output = Command::new(excel_cli_bin()) + .arg("check") + .arg(&file_path) + .arg("--rule") + .arg("missing_values") + .output() + .expect("Failed to execute excel-cli check"); + + let err = assert_json_error(&output, 2, "invalid_args"); + assert_ne!(err["error"]["code"], "check_not_implemented"); + assert!( + err["error"]["message"].as_str().unwrap().contains("--rule"), + "unexpected error: {err}" + ); +} diff --git a/tests/headless_inspect_test.rs b/tests/headless_inspect_test.rs index 1e24847..110ea80 100644 --- a/tests/headless_inspect_test.rs +++ b/tests/headless_inspect_test.rs @@ -1561,44 +1561,6 @@ fn test_inspect_tables_text() { assert!(stdout.contains("confidence=")); } -#[test] -fn test_check_namespace_only() { - let temp_dir = std::env::temp_dir(); - let file_path = temp_dir.join("excel_cli_test_check.xlsx"); - create_test_workbook(&file_path); - - let output = Command::new(excel_cli_bin()) - .arg("check") - .arg(&file_path) - .arg("--rule") - .arg("missing_values") - .output() - .expect("Failed to execute excel-cli"); - - assert_json_error(&output, 6); // EXIT_INVALID_QUERY - let err_json: serde_json::Value = - serde_json::from_slice(&output.stderr).expect("Valid JSON error"); - assert_eq!(err_json["error"]["code"], "check_not_implemented"); -} - -#[test] -fn test_check_help_without_rule() { - let temp_dir = std::env::temp_dir(); - let file_path = temp_dir.join("excel_cli_test_check_no_rule.xlsx"); - create_test_workbook(&file_path); - - let output = Command::new(excel_cli_bin()) - .arg("check") - .arg(&file_path) - .output() - .expect("Failed to execute excel-cli"); - - assert_json_error(&output, 6); // EXIT_INVALID_QUERY - let err_json: serde_json::Value = - serde_json::from_slice(&output.stderr).expect("Valid JSON error"); - assert_eq!(err_json["error"]["code"], "check_not_implemented"); -} - #[test] fn test_bare_file_path_is_error() { let temp_dir = std::env::temp_dir(); diff --git a/tests/help_and_version_cli_test.rs b/tests/help_and_version_cli_test.rs index c6fac7c..5352b08 100644 --- a/tests/help_and_version_cli_test.rs +++ b/tests/help_and_version_cli_test.rs @@ -97,6 +97,27 @@ fn read_records_help_documents_default_record_shape() { } } +#[test] +fn check_help_documents_v13_contract_flags() { + let stdout = assert_successful_help(&["check", "--help"]); + + for expected in [ + "Usage: excel-cli check [OPTIONS] ", + "--sheet ", + "--rules ", + "--severity-threshold ", + ] { + assert!( + stdout.contains(expected), + "expected {expected:?} in stdout: {stdout}" + ); + } + assert!( + !stdout.contains("--rule "), + "legacy --rule flag should not be documented: {stdout}" + ); +} + #[test] fn version_prints_to_stdout_and_exits_zero() { let output = Command::new(excel_cli_bin()) From 4a1f897decaeac9f75843343415837e926ba5543 Mon Sep 17 00:00:00 2001 From: fuhan666 Date: Wed, 22 Apr 2026 01:20:17 +0800 Subject: [PATCH 2/7] feat: add structural check rules --- src/cli/check.rs | 216 ++++++++++++++++++++- tests/check_structural_rules_test.rs | 272 +++++++++++++++++++++++++++ 2 files changed, 484 insertions(+), 4 deletions(-) create mode 100644 tests/check_structural_rules_test.rs diff --git a/src/cli/check.rs b/src/cli/check.rs index e51e64b..51ccbf6 100644 --- a/src/cli/check.rs +++ b/src/cli/check.rs @@ -7,7 +7,8 @@ use std::path::{Path, PathBuf}; use crate::cli::args::SeverityThreshold; use crate::cli::envelope; use crate::cli::error::{AppError, EXIT_CHECK_FINDINGS, EXIT_SUCCESS}; -use crate::excel::{open_workbook, Workbook}; +use crate::excel::{open_workbook, Cell, Sheet, Workbook}; +use crate::utils::{cell_reference, index_to_col_name}; const RULES: [CheckRuleId; 8] = [ CheckRuleId::BlankHeaders, @@ -43,7 +44,7 @@ pub fn handle( } let sheet_names = workbook.get_sheet_names(); - let mut findings = run_rules(&selected_rules, &checked_sheet_indices); + let mut findings = run_rules(&workbook, &selected_rules, &checked_sheet_indices)?; let finding_count_before_threshold = findings.len(); findings.retain(|finding| finding.severity >= threshold); sort_findings(&mut findings, &sheet_names); @@ -150,8 +151,215 @@ fn resolve_checked_sheets( } } -fn run_rules(_rules: &[CheckRuleId], _sheet_indices: &[usize]) -> Vec { - Vec::new() +fn run_rules( + workbook: &Workbook, + rules: &[CheckRuleId], + sheet_indices: &[usize], +) -> Result, AppError> { + let mut findings = Vec::new(); + + for sheet_index in sheet_indices { + let sheet = + workbook + .get_sheet_by_index(*sheet_index) + .ok_or_else(|| AppError::TargetNotFound { + message: format!("Sheet index {} not found", sheet_index), + })?; + let used_range = workbook + .get_used_range(*sheet_index) + .map_err(crate::cli::error::anyhow_to_app_error)?; + let (_, header_row) = workbook + .find_header_candidates(*sheet_index) + .map_err(crate::cli::error::anyhow_to_app_error)?; + + for rule in rules { + match rule { + CheckRuleId::BlankHeaders => { + findings.extend(find_blank_headers(sheet, header_row)); + } + CheckRuleId::DuplicateHeaders => { + findings.extend(find_duplicate_headers(sheet, header_row)); + } + CheckRuleId::BlankRows => { + findings.extend(find_blank_rows(sheet, &used_range)); + } + CheckRuleId::BlankColumns => { + findings.extend(find_blank_columns(sheet, &used_range)); + } + CheckRuleId::NullRatio + | CheckRuleId::DuplicateValues + | CheckRuleId::TypeDrift + | CheckRuleId::FormulaPresence => {} + } + } + } + + Ok(findings) +} + +fn find_blank_headers(sheet: &Sheet, header_row: Option) -> Vec { + let Some(header_row) = header_row else { + return Vec::new(); + }; + + (1..=sheet.max_cols) + .filter(|col| is_blank_cell(cell_at(sheet, header_row, *col))) + .map(|col| { + let column_label = index_to_col_name(col); + let range = cell_reference((header_row, col)); + CheckFinding { + rule_id: CheckRuleId::BlankHeaders, + severity: Severity::Warning, + sheet: sheet.name.clone(), + row: Some(header_row), + column: Some(col), + range: Some(range.clone()), + message: format!("Blank header at {range}."), + details: json!({ + "header_row": header_row, + "column_label": column_label, + "reason": "blank_header", + }), + } + }) + .collect() +} + +fn find_duplicate_headers(sheet: &Sheet, header_row: Option) -> Vec { + let Some(header_row) = header_row else { + return Vec::new(); + }; + + let mut counts: HashMap = HashMap::new(); + let mut first_locations: HashMap = HashMap::new(); + let headers: Vec<_> = (1..=sheet.max_cols) + .map(|col| { + let header = header_value(sheet, header_row, col); + if !header.is_empty() { + *counts.entry(header.clone()).or_insert(0) += 1; + first_locations + .entry(header.clone()) + .or_insert_with(|| (col, cell_reference((header_row, col)))); + } + header + }) + .collect(); + + let mut seen: HashMap = HashMap::new(); + let mut findings = Vec::new(); + for (offset, header) in headers.into_iter().enumerate() { + if header.is_empty() { + continue; + } + + let occurrence = seen.entry(header.clone()).or_insert(0); + *occurrence += 1; + if *occurrence == 1 { + continue; + } + + let col = offset + 1; + let range = cell_reference((header_row, col)); + let (first_column, first_range) = first_locations + .get(&header) + .cloned() + .unwrap_or_else(|| (col, range.clone())); + findings.push(CheckFinding { + rule_id: CheckRuleId::DuplicateHeaders, + severity: Severity::Warning, + sheet: sheet.name.clone(), + row: Some(header_row), + column: Some(col), + range: Some(range.clone()), + message: format!("Duplicate header '{header}' at {range}."), + details: json!({ + "header": header, + "normalized_header": header, + "first_column": first_column, + "first_range": first_range, + "duplicate_count": counts.get(&header).copied().unwrap_or(0), + }), + }); + } + + findings +} + +fn find_blank_rows(sheet: &Sheet, used_range: &str) -> Vec { + if used_range.is_empty() || sheet.max_rows == 0 || sheet.max_cols == 0 { + return Vec::new(); + } + + (1..=sheet.max_rows) + .filter(|row| (1..=sheet.max_cols).all(|col| is_blank_cell(cell_at(sheet, *row, col)))) + .map(|row| { + let end_col = index_to_col_name(sheet.max_cols); + let range = format!("A{row}:{end_col}{row}"); + CheckFinding { + rule_id: CheckRuleId::BlankRows, + severity: Severity::Warning, + sheet: sheet.name.clone(), + row: Some(row), + column: None, + range: Some(range), + message: format!("Blank row {row} in used range {used_range}."), + details: json!({ + "used_range": used_range, + "max_columns": sheet.max_cols, + "reason": "blank_row", + }), + } + }) + .collect() +} + +fn find_blank_columns(sheet: &Sheet, used_range: &str) -> Vec { + if used_range.is_empty() || sheet.max_rows == 0 || sheet.max_cols == 0 { + return Vec::new(); + } + + (1..=sheet.max_cols) + .filter(|col| (1..=sheet.max_rows).all(|row| is_blank_cell(cell_at(sheet, row, *col)))) + .map(|col| { + let column_label = index_to_col_name(col); + let range = format!("{column_label}1:{column_label}{}", sheet.max_rows); + CheckFinding { + rule_id: CheckRuleId::BlankColumns, + severity: Severity::Warning, + sheet: sheet.name.clone(), + row: None, + column: Some(col), + range: Some(range), + message: format!("Blank column {column_label} in used range {used_range}."), + details: json!({ + "used_range": used_range, + "column_label": column_label, + "max_rows": sheet.max_rows, + "reason": "blank_column", + }), + } + }) + .collect() +} + +fn header_value(sheet: &Sheet, row: usize, col: usize) -> String { + cell_at(sheet, row, col) + .filter(|cell| !cell_has_formula(cell)) + .map(|cell| cell.value.trim().to_string()) + .unwrap_or_default() +} + +fn cell_at(sheet: &Sheet, row: usize, col: usize) -> Option<&Cell> { + sheet.data.get(row).and_then(|row_data| row_data.get(col)) +} + +fn is_blank_cell(cell: Option<&Cell>) -> bool { + cell.map(|cell| !cell_has_formula(cell) && cell.value.trim().is_empty()) + .unwrap_or(true) +} + +fn cell_has_formula(cell: &Cell) -> bool { + cell.is_formula || cell.formula.is_some() } fn summarize_findings(findings: &[CheckFinding]) -> Value { diff --git a/tests/check_structural_rules_test.rs b/tests/check_structural_rules_test.rs new file mode 100644 index 0000000..6c53087 --- /dev/null +++ b/tests/check_structural_rules_test.rs @@ -0,0 +1,272 @@ +use std::path::{Path, PathBuf}; +use std::process::{Command, Output}; + +use serde_json::{json, Value}; + +fn excel_cli_bin() -> PathBuf { + PathBuf::from(env!("CARGO_BIN_EXE_excel-cli")) +} + +fn create_structural_workbook(path: &Path) { + use rust_xlsxwriter::Workbook as XlsxWorkbook; + + let mut workbook = XlsxWorkbook::new(); + + let clean = workbook.add_worksheet(); + clean.set_name("Clean").unwrap(); + clean.write_string(0, 0, "客户").unwrap(); + clean.write_string(0, 1, "订单").unwrap(); + clean.write_string(0, 2, "金额").unwrap(); + clean.write_string(1, 0, "张三").unwrap(); + clean.write_string(1, 1, "A-100").unwrap(); + clean.write_number(1, 2, 10).unwrap(); + clean.write_string(2, 0, "李四").unwrap(); + clean.write_string(2, 1, "A-101").unwrap(); + clean.write_number(2, 2, 20).unwrap(); + + let structural = workbook.add_worksheet(); + structural.set_name("结构").unwrap(); + structural.write_string(0, 0, "客户").unwrap(); + structural.write_string(0, 2, "客户").unwrap(); + structural.write_string(0, 4, "金额").unwrap(); + structural.write_string(1, 0, "张三").unwrap(); + structural.write_string(1, 1, "A-100").unwrap(); + structural.write_string(1, 2, "张三").unwrap(); + structural.write_number(1, 4, 10).unwrap(); + structural.write_string(3, 0, "李四").unwrap(); + structural.write_string(3, 1, "A-101").unwrap(); + structural.write_string(3, 2, "李四").unwrap(); + structural.write_number(3, 4, 20).unwrap(); + + workbook.save(path).unwrap(); +} + +fn temp_workbook(name: &str) -> PathBuf { + let path = std::env::temp_dir().join(name); + create_structural_workbook(&path); + path +} + +fn run_check(args: &[&str]) -> Output { + Command::new(excel_cli_bin()) + .args(args) + .output() + .unwrap_or_else(|_| panic!("Failed to execute excel-cli {}", args.join(" "))) +} + +fn parse_stdout(output: &Output) -> Value { + serde_json::from_slice(&output.stdout).expect("stdout should be valid JSON") +} + +fn assert_success(output: &Output, code: i32) { + assert_eq!(output.status.code(), Some(code)); + assert!( + output.stderr.is_empty(), + "stderr should be empty: {}", + String::from_utf8_lossy(&output.stderr) + ); +} + +#[test] +fn structural_rules_report_stable_locations_and_details() { + let file_path = temp_workbook("excel_cli_check_structural_positive.xlsx"); + let file_arg = file_path.to_string_lossy(); + let output = run_check(&[ + "check", + file_arg.as_ref(), + "--sheet", + "结构", + "--rules", + "blank_headers,duplicate_headers,blank_rows,blank_columns", + ]); + + assert_success(&output, 1); + let json = parse_stdout(&output); + + assert_eq!(json["target"], json!({"sheet": "结构", "sheet_index": 1})); + assert_eq!(json["data"]["summary"]["status"], "fail"); + assert_eq!(json["data"]["summary"]["finding_count"], 5); + assert_eq!(json["data"]["summary"]["warning_count"], 5); + assert_eq!(json["data"]["stats"]["finding_count_before_threshold"], 5); + assert_eq!( + json["data"]["stats"]["rules_run"], + json!([ + "blank_headers", + "duplicate_headers", + "blank_rows", + "blank_columns" + ]) + ); + + assert_eq!( + json["data"]["findings"], + json!([ + { + "rule_id": "blank_headers", + "severity": "warning", + "sheet": "结构", + "row": 1, + "column": 2, + "range": "B1", + "message": "Blank header at B1.", + "details": { + "header_row": 1, + "column_label": "B", + "reason": "blank_header" + } + }, + { + "rule_id": "blank_headers", + "severity": "warning", + "sheet": "结构", + "row": 1, + "column": 4, + "range": "D1", + "message": "Blank header at D1.", + "details": { + "header_row": 1, + "column_label": "D", + "reason": "blank_header" + } + }, + { + "rule_id": "duplicate_headers", + "severity": "warning", + "sheet": "结构", + "row": 1, + "column": 3, + "range": "C1", + "message": "Duplicate header '客户' at C1.", + "details": { + "header": "客户", + "normalized_header": "客户", + "first_column": 1, + "first_range": "A1", + "duplicate_count": 2 + } + }, + { + "rule_id": "blank_rows", + "severity": "warning", + "sheet": "结构", + "row": 3, + "column": null, + "range": "A3:E3", + "message": "Blank row 3 in used range A1:E4.", + "details": { + "used_range": "A1:E4", + "max_columns": 5, + "reason": "blank_row" + } + }, + { + "rule_id": "blank_columns", + "severity": "warning", + "sheet": "结构", + "row": null, + "column": 4, + "range": "D1:D4", + "message": "Blank column D in used range A1:E4.", + "details": { + "used_range": "A1:E4", + "column_label": "D", + "max_rows": 4, + "reason": "blank_column" + } + } + ]) + ); +} + +#[test] +fn structural_rules_have_clean_negative_cases() { + let file_path = temp_workbook("excel_cli_check_structural_negative.xlsx"); + let file_arg = file_path.to_string_lossy(); + + for rule in [ + "blank_headers", + "duplicate_headers", + "blank_rows", + "blank_columns", + ] { + let output = run_check(&[ + "check", + file_arg.as_ref(), + "--sheet", + "Clean", + "--rules", + rule, + ]); + + assert_success(&output, 0); + let json = parse_stdout(&output); + assert_eq!(json["data"]["summary"]["status"], "pass", "rule: {rule}"); + assert_eq!(json["data"]["findings"], json!([]), "rule: {rule}"); + assert_eq!( + json["data"]["stats"]["finding_count_before_threshold"], 0, + "rule: {rule}" + ); + } +} + +#[test] +fn structural_rules_aggregate_workbook_and_filter_by_sheet() { + let file_path = temp_workbook("excel_cli_check_structural_targets.xlsx"); + let file_arg = file_path.to_string_lossy(); + + let workbook = run_check(&[ + "check", + file_arg.as_ref(), + "--rules", + "blank_headers,duplicate_headers,blank_rows,blank_columns", + ]); + assert_success(&workbook, 1); + let json = parse_stdout(&workbook); + assert_eq!(json["target"], json!({})); + assert_eq!(json["data"]["stats"]["sheet_count"], 2); + assert_eq!(json["data"]["stats"]["checked_sheet_count"], 2); + assert_eq!(json["data"]["summary"]["finding_count"], 5); + assert!(json["data"]["findings"] + .as_array() + .unwrap() + .iter() + .all(|finding| finding["sheet"] == "结构")); + + let sheet = run_check(&[ + "check", + file_arg.as_ref(), + "--sheet", + "Clean", + "--rules", + "blank_headers,duplicate_headers,blank_rows,blank_columns", + ]); + assert_success(&sheet, 0); + let json = parse_stdout(&sheet); + assert_eq!(json["target"], json!({"sheet": "Clean", "sheet_index": 0})); + assert_eq!(json["data"]["stats"]["checked_sheet_count"], 1); + assert_eq!(json["data"]["summary"]["finding_count"], 0); + assert_eq!(json["data"]["findings"], json!([])); +} + +#[test] +fn severity_threshold_filters_warning_structural_findings() { + let file_path = temp_workbook("excel_cli_check_structural_threshold.xlsx"); + let file_arg = file_path.to_string_lossy(); + let output = run_check(&[ + "check", + file_arg.as_ref(), + "--sheet", + "结构", + "--rules", + "blank_headers,duplicate_headers,blank_rows,blank_columns", + "--severity-threshold", + "error", + ]); + + assert_success(&output, 0); + let json = parse_stdout(&output); + assert_eq!(json["data"]["summary"]["status"], "pass"); + assert_eq!(json["data"]["summary"]["finding_count"], 0); + assert_eq!(json["data"]["stats"]["finding_count_before_threshold"], 5); + assert_eq!(json["data"]["findings"], json!([])); +} From 9b6b513af42ad0f48f9b79d70682cc6706f761ad Mon Sep 17 00:00:00 2001 From: fuhan666 Date: Wed, 22 Apr 2026 01:23:30 +0800 Subject: [PATCH 3/7] Implement analytical check rules --- src/cli/check.rs | 499 +++++++++++++++++++++++---- tests/check_analytical_rules_test.rs | 390 +++++++++++++++++++++ tests/check_rule_integration_test.rs | 117 +++++++ 3 files changed, 948 insertions(+), 58 deletions(-) create mode 100644 tests/check_analytical_rules_test.rs create mode 100644 tests/check_rule_integration_test.rs diff --git a/src/cli/check.rs b/src/cli/check.rs index 51ccbf6..4347f12 100644 --- a/src/cli/check.rs +++ b/src/cli/check.rs @@ -1,13 +1,13 @@ use serde::Serialize; use serde_json::{json, Value}; use std::cmp::Ordering; -use std::collections::HashMap; +use std::collections::{BTreeMap, HashMap}; use std::path::{Path, PathBuf}; use crate::cli::args::SeverityThreshold; use crate::cli::envelope; use crate::cli::error::{AppError, EXIT_CHECK_FINDINGS, EXIT_SUCCESS}; -use crate::excel::{open_workbook, Cell, Sheet, Workbook}; +use crate::excel::{open_workbook, Cell, CellType, Sheet, Workbook}; use crate::utils::{cell_reference, index_to_col_name}; const RULES: [CheckRuleId; 8] = [ @@ -159,58 +159,100 @@ fn run_rules( let mut findings = Vec::new(); for sheet_index in sheet_indices { + let context = SheetCheckContext::new(workbook, *sheet_index)?; + for rule in rules { + match rule { + CheckRuleId::BlankHeaders => findings.extend(find_blank_headers(&context)), + CheckRuleId::DuplicateHeaders => findings.extend(find_duplicate_headers(&context)), + CheckRuleId::BlankRows => findings.extend(find_blank_rows(&context)), + CheckRuleId::BlankColumns => findings.extend(find_blank_columns(&context)), + CheckRuleId::NullRatio => findings.extend(check_null_ratio(&context)), + CheckRuleId::DuplicateValues => findings.extend(check_duplicate_values(&context)), + CheckRuleId::TypeDrift => findings.extend(check_type_drift(&context)), + CheckRuleId::FormulaPresence => findings.extend(check_formula_presence(&context)), + } + } + } + + Ok(findings) +} + +struct SheetCheckContext<'a> { + sheet: &'a Sheet, + header_row: Option, + used_range: String, + data_start_row: usize, + data_row_count: usize, +} + +impl<'a> SheetCheckContext<'a> { + fn new(workbook: &'a Workbook, sheet_index: usize) -> Result { let sheet = workbook - .get_sheet_by_index(*sheet_index) + .get_sheet_by_index(sheet_index) .ok_or_else(|| AppError::TargetNotFound { message: format!("Sheet index {} not found", sheet_index), })?; let used_range = workbook - .get_used_range(*sheet_index) + .get_used_range(sheet_index) .map_err(crate::cli::error::anyhow_to_app_error)?; let (_, header_row) = workbook - .find_header_candidates(*sheet_index) + .find_header_candidates(sheet_index) .map_err(crate::cli::error::anyhow_to_app_error)?; + let data_start_row = header_row.map_or(1, |row| row.saturating_add(1)); + let data_row_count = if sheet.max_rows >= data_start_row { + sheet.max_rows - data_start_row + 1 + } else { + 0 + }; + + Ok(Self { + sheet, + header_row, + used_range, + data_start_row, + data_row_count, + }) + } - for rule in rules { - match rule { - CheckRuleId::BlankHeaders => { - findings.extend(find_blank_headers(sheet, header_row)); - } - CheckRuleId::DuplicateHeaders => { - findings.extend(find_duplicate_headers(sheet, header_row)); - } - CheckRuleId::BlankRows => { - findings.extend(find_blank_rows(sheet, &used_range)); - } - CheckRuleId::BlankColumns => { - findings.extend(find_blank_columns(sheet, &used_range)); - } - CheckRuleId::NullRatio - | CheckRuleId::DuplicateValues - | CheckRuleId::TypeDrift - | CheckRuleId::FormulaPresence => {} - } - } + fn column_name(&self, col: usize) -> String { + self.header_row + .and_then(|row| cell_at(self.sheet, row, col)) + .map(|cell| cell.value.trim()) + .filter(|value| !value.is_empty()) + .map(ToOwned::to_owned) + .unwrap_or_else(|| format!("col_{}", index_to_col_name(col))) } - Ok(findings) + fn data_column_range(&self, col: usize) -> Option { + if self.data_row_count == 0 { + None + } else { + Some(format!( + "{}{}:{}{}", + index_to_col_name(col), + self.data_start_row, + index_to_col_name(col), + self.sheet.max_rows + )) + } + } } -fn find_blank_headers(sheet: &Sheet, header_row: Option) -> Vec { - let Some(header_row) = header_row else { +fn find_blank_headers(context: &SheetCheckContext<'_>) -> Vec { + let Some(header_row) = context.header_row else { return Vec::new(); }; - (1..=sheet.max_cols) - .filter(|col| is_blank_cell(cell_at(sheet, header_row, *col))) + (1..=context.sheet.max_cols) + .filter(|col| is_blank_cell(cell_at(context.sheet, header_row, *col))) .map(|col| { let column_label = index_to_col_name(col); let range = cell_reference((header_row, col)); CheckFinding { rule_id: CheckRuleId::BlankHeaders, severity: Severity::Warning, - sheet: sheet.name.clone(), + sheet: context.sheet.name.clone(), row: Some(header_row), column: Some(col), range: Some(range.clone()), @@ -225,16 +267,16 @@ fn find_blank_headers(sheet: &Sheet, header_row: Option) -> Vec) -> Vec { - let Some(header_row) = header_row else { +fn find_duplicate_headers(context: &SheetCheckContext<'_>) -> Vec { + let Some(header_row) = context.header_row else { return Vec::new(); }; let mut counts: HashMap = HashMap::new(); let mut first_locations: HashMap = HashMap::new(); - let headers: Vec<_> = (1..=sheet.max_cols) + let headers: Vec<_> = (1..=context.sheet.max_cols) .map(|col| { - let header = header_value(sheet, header_row, col); + let header = header_value(context.sheet, header_row, col); if !header.is_empty() { *counts.entry(header.clone()).or_insert(0) += 1; first_locations @@ -267,7 +309,7 @@ fn find_duplicate_headers(sheet: &Sheet, header_row: Option) -> Vec) -> Vec Vec { - if used_range.is_empty() || sheet.max_rows == 0 || sheet.max_cols == 0 { +fn find_blank_rows(context: &SheetCheckContext<'_>) -> Vec { + if context.used_range.is_empty() || context.sheet.max_rows == 0 || context.sheet.max_cols == 0 { return Vec::new(); } - (1..=sheet.max_rows) - .filter(|row| (1..=sheet.max_cols).all(|col| is_blank_cell(cell_at(sheet, *row, col)))) + (1..=context.sheet.max_rows) + .filter(|row| { + (1..=context.sheet.max_cols) + .all(|col| is_blank_cell(cell_at(context.sheet, *row, col))) + }) .map(|row| { - let end_col = index_to_col_name(sheet.max_cols); + let end_col = index_to_col_name(context.sheet.max_cols); let range = format!("A{row}:{end_col}{row}"); CheckFinding { rule_id: CheckRuleId::BlankRows, severity: Severity::Warning, - sheet: sheet.name.clone(), + sheet: context.sheet.name.clone(), row: Some(row), column: None, range: Some(range), - message: format!("Blank row {row} in used range {used_range}."), + message: format!("Blank row {row} in used range {}.", context.used_range), details: json!({ - "used_range": used_range, - "max_columns": sheet.max_cols, + "used_range": context.used_range, + "max_columns": context.sheet.max_cols, "reason": "blank_row", }), } @@ -313,28 +358,31 @@ fn find_blank_rows(sheet: &Sheet, used_range: &str) -> Vec { .collect() } -fn find_blank_columns(sheet: &Sheet, used_range: &str) -> Vec { - if used_range.is_empty() || sheet.max_rows == 0 || sheet.max_cols == 0 { +fn find_blank_columns(context: &SheetCheckContext<'_>) -> Vec { + if context.used_range.is_empty() || context.sheet.max_rows == 0 || context.sheet.max_cols == 0 { return Vec::new(); } - (1..=sheet.max_cols) - .filter(|col| (1..=sheet.max_rows).all(|row| is_blank_cell(cell_at(sheet, row, *col)))) + (1..=context.sheet.max_cols) + .filter(|col| { + (1..=context.sheet.max_rows) + .all(|row| is_blank_cell(cell_at(context.sheet, row, *col))) + }) .map(|col| { let column_label = index_to_col_name(col); - let range = format!("{column_label}1:{column_label}{}", sheet.max_rows); + let range = format!("{column_label}1:{column_label}{}", context.sheet.max_rows); CheckFinding { rule_id: CheckRuleId::BlankColumns, severity: Severity::Warning, - sheet: sheet.name.clone(), + sheet: context.sheet.name.clone(), row: None, column: Some(col), range: Some(range), - message: format!("Blank column {column_label} in used range {used_range}."), + message: format!("Blank column {column_label} in used range {}.", context.used_range), details: json!({ - "used_range": used_range, + "used_range": context.used_range, "column_label": column_label, - "max_rows": sheet.max_rows, + "max_rows": context.sheet.max_rows, "reason": "blank_column", }), } @@ -342,6 +390,282 @@ fn find_blank_columns(sheet: &Sheet, used_range: &str) -> Vec { .collect() } +fn check_null_ratio(context: &SheetCheckContext<'_>) -> Vec { + if context.data_row_count == 0 { + return Vec::new(); + } + + let mut findings = Vec::new(); + for col in 1..=context.sheet.max_cols { + let null_rows: Vec = (context.data_start_row..=context.sheet.max_rows) + .filter(|row| !cell_is_present(cell_at(context.sheet, *row, col))) + .collect(); + + if null_rows.is_empty() { + continue; + } + + let null_count = null_rows.len(); + let null_ratio = rounded_ratio(null_count, context.data_row_count); + let severity = if null_count == context.data_row_count { + Severity::Error + } else if null_ratio >= 0.5 { + Severity::Warning + } else { + Severity::Info + }; + let column_name = context.column_name(col); + let first_null_row = null_rows[0]; + let first_null_cell = cell_reference((first_null_row, col)); + + findings.push(CheckFinding { + rule_id: CheckRuleId::NullRatio, + severity, + sheet: context.sheet.name.clone(), + row: Some(first_null_row), + column: Some(col), + range: context.data_column_range(col), + message: format!( + "Column '{}' has blank values in {} of {} data rows.", + column_name, null_count, context.data_row_count + ), + details: json!({ + "column_name": column_name, + "data_row_count": context.data_row_count, + "first_null_cell": first_null_cell, + "null_count": null_count, + "null_ratio": null_ratio, + "severity_threshold": { + "info": "> 0 and < 0.5", + "warning": ">= 0.5 and < 1.0", + "error": "1.0" + } + }), + }); + } + + findings +} + +fn check_duplicate_values(context: &SheetCheckContext<'_>) -> Vec { + let Some((candidate_col, selection)) = default_duplicate_candidate(context) else { + return Vec::new(); + }; + + let mut values: BTreeMap> = BTreeMap::new(); + for row in context.data_start_row..=context.sheet.max_rows { + if let Some(cell) = cell_at(context.sheet, row, candidate_col) { + let value = cell.value.trim(); + if !value.is_empty() { + values.entry(value.to_string()).or_default().push(row); + } + } + } + + let column_name = context.column_name(candidate_col); + values + .into_iter() + .filter(|(_, rows)| rows.len() > 1) + .map(|(duplicate_value, rows)| { + let cells: Vec = rows + .iter() + .map(|row| cell_reference((*row, candidate_col))) + .collect(); + + CheckFinding { + rule_id: CheckRuleId::DuplicateValues, + severity: Severity::Warning, + sheet: context.sheet.name.clone(), + row: rows.first().copied(), + column: Some(candidate_col), + range: context.data_column_range(candidate_col), + message: format!( + "Column '{}' has duplicate value '{}' in {} rows.", + column_name, + duplicate_value, + rows.len() + ), + details: json!({ + "candidate_column": { + "column": candidate_col, + "column_name": column_name, + "selection": selection + }, + "duplicate_value": duplicate_value, + "occurrence_count": rows.len(), + "rows": rows, + "cells": cells + }), + } + }) + .collect() +} + +fn default_duplicate_candidate(context: &SheetCheckContext<'_>) -> Option<(usize, &'static str)> { + if context.data_row_count == 0 { + return None; + } + + if let Some(header_row) = context.header_row { + for col in 1..=context.sheet.max_cols { + let has_header = cell_at(context.sheet, header_row, col) + .map(|cell| !cell.value.trim().is_empty()) + .unwrap_or(false); + if has_header && data_column_has_value(context, col) { + return Some((col, "first non-empty header data column")); + } + } + } + + (1..=context.sheet.max_cols) + .find(|col| data_column_has_value(context, *col)) + .map(|col| (col, "first data column with values")) +} + +fn check_type_drift(context: &SheetCheckContext<'_>) -> Vec { + if context.data_row_count == 0 { + return Vec::new(); + } + + let mut findings = Vec::new(); + for col in 1..=context.sheet.max_cols { + let mut type_counts: BTreeMap<&'static str, usize> = BTreeMap::new(); + let mut cells_by_type: BTreeMap<&'static str, Vec> = BTreeMap::new(); + + for row in context.data_start_row..=context.sheet.max_rows { + let Some(cell) = cell_at(context.sheet, row, col) else { + continue; + }; + let Some(kind) = cell_kind(cell) else { + continue; + }; + + *type_counts.entry(kind).or_default() += 1; + cells_by_type + .entry(kind) + .or_default() + .push(cell_reference((row, col))); + } + + if type_counts.len() < 2 { + continue; + } + + let dominant_type = dominant_type(&type_counts); + let Some((drift_type, drift_count)) = first_drift_type(&type_counts, dominant_type) else { + continue; + }; + let Some(first_drift_cell) = cells_by_type + .get(drift_type) + .and_then(|cells| cells.first()) + .cloned() + else { + continue; + }; + let Some((first_drift_row, _)) = parse_cell_for_row(&first_drift_cell) else { + continue; + }; + let column_name = context.column_name(col); + let sample_drift_cells: Vec = cells_by_type + .get(drift_type) + .into_iter() + .flat_map(|cells| cells.iter().take(5).cloned()) + .collect(); + + findings.push(CheckFinding { + rule_id: CheckRuleId::TypeDrift, + severity: Severity::Warning, + sheet: context.sheet.name.clone(), + row: Some(first_drift_row), + column: Some(col), + range: context.data_column_range(col), + message: format!( + "Column '{}' mixes {} values with dominant {} values.", + column_name, drift_type, dominant_type + ), + details: json!({ + "column_name": column_name, + "dominant_type": dominant_type, + "drift_type": drift_type, + "drift_count": drift_count, + "type_counts": type_counts, + "sample_drift_cells": sample_drift_cells + }), + }); + } + + findings +} + +fn check_formula_presence(context: &SheetCheckContext<'_>) -> Vec { + if context.data_row_count == 0 { + return Vec::new(); + } + + let mut formulas = Vec::new(); + let mut min_row = usize::MAX; + let mut min_col = usize::MAX; + let mut max_row = 0; + let mut max_col = 0; + + for row in context.data_start_row..=context.sheet.max_rows { + for col in 1..=context.sheet.max_cols { + let Some(cell) = cell_at(context.sheet, row, col) else { + continue; + }; + if !cell_has_formula(cell) { + continue; + } + + min_row = min_row.min(row); + min_col = min_col.min(col); + max_row = max_row.max(row); + max_col = max_col.max(col); + formulas.push(json!({ + "cell": cell_reference((row, col)), + "formula": cell.formula.clone().unwrap_or_else(|| cell.value.clone()) + })); + } + } + + if formulas.is_empty() { + return Vec::new(); + } + + let formula_count = formulas.len(); + let formula_ratio = rounded_ratio(formula_count, context.data_row_count); + formulas.truncate(5); + + vec![CheckFinding { + rule_id: CheckRuleId::FormulaPresence, + severity: Severity::Info, + sheet: context.sheet.name.clone(), + row: Some(min_row), + column: Some(min_col), + range: Some(format!( + "{}{}:{}{}", + index_to_col_name(min_col), + min_row, + index_to_col_name(max_col), + max_row + )), + message: format!( + "Sheet '{}' contains {} formula cells.", + context.sheet.name, formula_count + ), + details: json!({ + "data_row_count": context.data_row_count, + "formula_count": formula_count, + "formula_ratio": formula_ratio, + "sample_formula_cells": formulas + }), + }] +} + +fn cell_at(sheet: &Sheet, row: usize, col: usize) -> Option<&Cell> { + sheet.data.get(row).and_then(|row_data| row_data.get(col)) +} + fn header_value(sheet: &Sheet, row: usize, col: usize) -> String { cell_at(sheet, row, col) .filter(|cell| !cell_has_formula(cell)) @@ -349,10 +673,6 @@ fn header_value(sheet: &Sheet, row: usize, col: usize) -> String { .unwrap_or_default() } -fn cell_at(sheet: &Sheet, row: usize, col: usize) -> Option<&Cell> { - sheet.data.get(row).and_then(|row_data| row_data.get(col)) -} - fn is_blank_cell(cell: Option<&Cell>) -> bool { cell.map(|cell| !cell_has_formula(cell) && cell.value.trim().is_empty()) .unwrap_or(true) @@ -362,6 +682,69 @@ fn cell_has_formula(cell: &Cell) -> bool { cell.is_formula || cell.formula.is_some() } +fn cell_is_present(cell: Option<&Cell>) -> bool { + cell.map(|cell| !cell.value.trim().is_empty() || cell_has_formula(cell)) + .unwrap_or(false) +} + +fn data_column_has_value(context: &SheetCheckContext<'_>, col: usize) -> bool { + (context.data_start_row..=context.sheet.max_rows) + .any(|row| cell_is_present(cell_at(context.sheet, row, col))) +} + +fn cell_kind(cell: &Cell) -> Option<&'static str> { + if !cell_is_present(Some(cell)) { + return None; + } + + match cell.cell_type { + CellType::Text => Some("string"), + CellType::Number => Some("number"), + CellType::Date => Some("date"), + CellType::Boolean => Some("boolean"), + CellType::Empty => None, + } +} + +fn dominant_type(type_counts: &BTreeMap<&'static str, usize>) -> &'static str { + type_counts + .iter() + .max_by(|(left_type, left_count), (right_type, right_count)| { + left_count + .cmp(right_count) + .then_with(|| right_type.cmp(left_type)) + }) + .map(|(kind, _)| *kind) + .unwrap_or("string") +} + +fn first_drift_type( + type_counts: &BTreeMap<&'static str, usize>, + dominant_type: &'static str, +) -> Option<(&'static str, usize)> { + type_counts + .iter() + .filter(|(kind, _)| **kind != dominant_type) + .min_by(|(left_type, left_count), (right_type, right_count)| { + left_count + .cmp(right_count) + .then_with(|| left_type.cmp(right_type)) + }) + .map(|(kind, count)| (*kind, *count)) +} + +fn parse_cell_for_row(cell: &str) -> Option<(usize, usize)> { + crate::utils::parse_cell_reference(cell) +} + +fn rounded_ratio(numerator: usize, denominator: usize) -> f64 { + if denominator == 0 { + 0.0 + } else { + ((numerator as f64 / denominator as f64) * 10_000.0).round() / 10_000.0 + } +} + fn summarize_findings(findings: &[CheckFinding]) -> Value { let error_count = findings .iter() diff --git a/tests/check_analytical_rules_test.rs b/tests/check_analytical_rules_test.rs new file mode 100644 index 0000000..5d5c348 --- /dev/null +++ b/tests/check_analytical_rules_test.rs @@ -0,0 +1,390 @@ +use std::path::{Path, PathBuf}; +use std::process::{Command, Output}; + +use serde_json::{json, Value}; + +fn excel_cli_bin() -> PathBuf { + PathBuf::from(env!("CARGO_BIN_EXE_excel-cli")) +} + +fn run_check(args: &[&str]) -> Output { + Command::new(excel_cli_bin()) + .args(args) + .output() + .unwrap_or_else(|_| panic!("Failed to execute excel-cli {}", args.join(" "))) +} + +fn parse_stdout(output: &Output) -> Value { + serde_json::from_slice(&output.stdout).expect("stdout should be valid JSON") +} + +fn assert_success(output: &Output, code: i32) { + assert_eq!(output.status.code(), Some(code)); + assert!( + output.stderr.is_empty(), + "stderr should be empty: {}", + String::from_utf8_lossy(&output.stderr) + ); +} + +fn temp_path(name: &str) -> PathBuf { + std::env::temp_dir().join(name) +} + +fn create_null_ratio_positive(path: &Path) { + use rust_xlsxwriter::Workbook as XlsxWorkbook; + + let mut workbook = XlsxWorkbook::new(); + let sheet = workbook.add_worksheet(); + sheet.set_name("Orders").unwrap(); + sheet.write_string(0, 0, "order_id").unwrap(); + sheet.write_string(0, 1, "customer").unwrap(); + sheet.write_string(0, 2, "email").unwrap(); + sheet.write_string(1, 0, "1001").unwrap(); + sheet.write_string(1, 1, "Alice").unwrap(); + sheet.write_string(1, 2, "alice@example.test").unwrap(); + sheet.write_string(2, 0, "1002").unwrap(); + sheet.write_string(2, 1, "Bob").unwrap(); + sheet.write_string(3, 0, "1003").unwrap(); + sheet.write_string(3, 1, "Cara").unwrap(); + workbook.save(path).unwrap(); +} + +fn create_clean_values(path: &Path) { + use rust_xlsxwriter::Workbook as XlsxWorkbook; + + let mut workbook = XlsxWorkbook::new(); + let sheet = workbook.add_worksheet(); + sheet.set_name("Orders").unwrap(); + sheet.write_string(0, 0, "order_id").unwrap(); + sheet.write_string(0, 1, "customer").unwrap(); + sheet.write_string(0, 2, "amount").unwrap(); + sheet.write_string(1, 0, "1001").unwrap(); + sheet.write_string(1, 1, "Alice").unwrap(); + sheet.write_number(1, 2, 12.5).unwrap(); + sheet.write_string(2, 0, "1002").unwrap(); + sheet.write_string(2, 1, "Bob").unwrap(); + sheet.write_number(2, 2, 18.0).unwrap(); + sheet.write_string(3, 0, "1003").unwrap(); + sheet.write_string(3, 1, "Cara").unwrap(); + sheet.write_number(3, 2, 21.0).unwrap(); + workbook.save(path).unwrap(); +} + +fn create_duplicate_values_positive(path: &Path) { + use rust_xlsxwriter::Workbook as XlsxWorkbook; + + let mut workbook = XlsxWorkbook::new(); + let sheet = workbook.add_worksheet(); + sheet.set_name("Orders").unwrap(); + sheet.write_string(0, 0, "order_id").unwrap(); + sheet.write_string(0, 1, "customer").unwrap(); + sheet.write_string(1, 0, "1001").unwrap(); + sheet.write_string(1, 1, "Alice").unwrap(); + sheet.write_string(2, 0, "1002").unwrap(); + sheet.write_string(2, 1, "Bob").unwrap(); + sheet.write_string(3, 0, "1001").unwrap(); + sheet.write_string(3, 1, "Cara").unwrap(); + workbook.save(path).unwrap(); +} + +fn create_type_drift_positive(path: &Path) { + use rust_xlsxwriter::Workbook as XlsxWorkbook; + + let mut workbook = XlsxWorkbook::new(); + let sheet = workbook.add_worksheet(); + sheet.set_name("Orders").unwrap(); + sheet.write_string(0, 0, "order_id").unwrap(); + sheet.write_string(0, 1, "amount").unwrap(); + sheet.write_string(1, 0, "1001").unwrap(); + sheet.write_number(1, 1, 12.5).unwrap(); + sheet.write_string(2, 0, "1002").unwrap(); + sheet.write_number(2, 1, 18.0).unwrap(); + sheet.write_string(3, 0, "1003").unwrap(); + sheet.write_string(3, 1, "unknown").unwrap(); + workbook.save(path).unwrap(); +} + +fn create_formula_presence_positive(path: &Path) { + use rust_xlsxwriter::Workbook as XlsxWorkbook; + + let mut workbook = XlsxWorkbook::new(); + let sheet = workbook.add_worksheet(); + sheet.set_name("Orders").unwrap(); + sheet.write_string(0, 0, "order_id").unwrap(); + sheet.write_string(0, 1, "amount").unwrap(); + sheet.write_string(0, 2, "total").unwrap(); + sheet.write_string(1, 0, "1001").unwrap(); + sheet.write_number(1, 1, 12.5).unwrap(); + sheet.write_formula(1, 2, "=B2*2").unwrap(); + sheet.write_string(2, 0, "1002").unwrap(); + sheet.write_number(2, 1, 18.0).unwrap(); + sheet.write_formula(2, 2, "=B3*2").unwrap(); + workbook.save(path).unwrap(); +} + +fn create_combined_positive(path: &Path) { + use rust_xlsxwriter::Workbook as XlsxWorkbook; + + let mut workbook = XlsxWorkbook::new(); + let sheet = workbook.add_worksheet(); + sheet.set_name("Orders").unwrap(); + sheet.write_string(0, 0, "order_id").unwrap(); + sheet.write_string(0, 1, "amount").unwrap(); + sheet.write_string(0, 2, "reviewer").unwrap(); + sheet.write_string(0, 3, "total").unwrap(); + sheet.write_string(1, 0, "1001").unwrap(); + sheet.write_number(1, 1, 12.5).unwrap(); + sheet.write_string(1, 2, "Alice").unwrap(); + sheet.write_formula(1, 3, "=B2*2").unwrap(); + sheet.write_string(2, 0, "1002").unwrap(); + sheet.write_number(2, 1, 18.0).unwrap(); + sheet.write_formula(2, 3, "=B3*2").unwrap(); + sheet.write_string(3, 0, "1001").unwrap(); + sheet.write_string(3, 1, "unknown").unwrap(); + sheet.write_formula(3, 3, "=B4*2").unwrap(); + workbook.save(path).unwrap(); +} + +#[test] +fn check_analytical_null_ratio_reports_blank_data_cells() { + let file_path = temp_path("excel_cli_check_analytical_null_ratio.xlsx"); + create_null_ratio_positive(&file_path); + let file_arg = file_path.to_string_lossy(); + + let output = run_check(&["check", file_arg.as_ref(), "--rules", "null_ratio"]); + + assert_success(&output, 1); + let json = parse_stdout(&output); + assert_eq!(json["data"]["summary"]["warning_count"], 1); + assert_eq!(json["data"]["stats"]["finding_count_before_threshold"], 1); + assert_eq!( + json["data"]["findings"][0], + json!({ + "rule_id": "null_ratio", + "severity": "warning", + "sheet": "Orders", + "row": 3, + "column": 3, + "range": "C2:C4", + "message": "Column 'email' has blank values in 2 of 3 data rows.", + "details": { + "column_name": "email", + "data_row_count": 3, + "first_null_cell": "C3", + "null_count": 2, + "null_ratio": 0.6667, + "severity_threshold": { + "info": "> 0 and < 0.5", + "warning": ">= 0.5 and < 1.0", + "error": "1.0" + } + } + }) + ); +} + +#[test] +fn check_analytical_null_ratio_ignores_complete_columns() { + let file_path = temp_path("excel_cli_check_analytical_null_ratio_clean.xlsx"); + create_clean_values(&file_path); + let file_arg = file_path.to_string_lossy(); + + let output = run_check(&["check", file_arg.as_ref(), "--rules", "null_ratio"]); + + assert_success(&output, 0); + let json = parse_stdout(&output); + assert_eq!(json["data"]["summary"]["status"], "pass"); + assert_eq!(json["data"]["findings"], json!([])); +} + +#[test] +fn check_analytical_duplicate_values_checks_default_candidate_column() { + let file_path = temp_path("excel_cli_check_analytical_duplicate_values.xlsx"); + create_duplicate_values_positive(&file_path); + let file_arg = file_path.to_string_lossy(); + + let output = run_check(&["check", file_arg.as_ref(), "--rules", "duplicate_values"]); + + assert_success(&output, 1); + let json = parse_stdout(&output); + assert_eq!( + json["data"]["findings"][0], + json!({ + "rule_id": "duplicate_values", + "severity": "warning", + "sheet": "Orders", + "row": 2, + "column": 1, + "range": "A2:A4", + "message": "Column 'order_id' has duplicate value '1001' in 2 rows.", + "details": { + "candidate_column": { + "column": 1, + "column_name": "order_id", + "selection": "first non-empty header data column" + }, + "duplicate_value": "1001", + "occurrence_count": 2, + "rows": [2, 4], + "cells": ["A2", "A4"] + } + }) + ); +} + +#[test] +fn check_analytical_duplicate_values_ignores_unique_candidate_values() { + let file_path = temp_path("excel_cli_check_analytical_duplicate_values_clean.xlsx"); + create_clean_values(&file_path); + let file_arg = file_path.to_string_lossy(); + + let output = run_check(&["check", file_arg.as_ref(), "--rules", "duplicate_values"]); + + assert_success(&output, 0); + let json = parse_stdout(&output); + assert_eq!(json["data"]["findings"], json!([])); +} + +#[test] +fn check_analytical_type_drift_reports_mixed_column_types() { + let file_path = temp_path("excel_cli_check_analytical_type_drift.xlsx"); + create_type_drift_positive(&file_path); + let file_arg = file_path.to_string_lossy(); + + let output = run_check(&["check", file_arg.as_ref(), "--rules", "type_drift"]); + + assert_success(&output, 1); + let json = parse_stdout(&output); + assert_eq!( + json["data"]["findings"][0], + json!({ + "rule_id": "type_drift", + "severity": "warning", + "sheet": "Orders", + "row": 4, + "column": 2, + "range": "B2:B4", + "message": "Column 'amount' mixes string values with dominant number values.", + "details": { + "column_name": "amount", + "dominant_type": "number", + "drift_type": "string", + "drift_count": 1, + "type_counts": { + "number": 2, + "string": 1 + }, + "sample_drift_cells": ["B4"] + } + }) + ); +} + +#[test] +fn check_analytical_type_drift_ignores_consistent_columns() { + let file_path = temp_path("excel_cli_check_analytical_type_drift_clean.xlsx"); + create_clean_values(&file_path); + let file_arg = file_path.to_string_lossy(); + + let output = run_check(&["check", file_arg.as_ref(), "--rules", "type_drift"]); + + assert_success(&output, 0); + let json = parse_stdout(&output); + assert_eq!(json["data"]["findings"], json!([])); +} + +#[test] +fn check_analytical_formula_presence_reports_formula_cells() { + let file_path = temp_path("excel_cli_check_analytical_formula_presence.xlsx"); + create_formula_presence_positive(&file_path); + let file_arg = file_path.to_string_lossy(); + + let output = run_check(&["check", file_arg.as_ref(), "--rules", "formula_presence"]); + + assert_success(&output, 1); + let json = parse_stdout(&output); + assert_eq!( + json["data"]["findings"][0], + json!({ + "rule_id": "formula_presence", + "severity": "info", + "sheet": "Orders", + "row": 2, + "column": 3, + "range": "C2:C3", + "message": "Sheet 'Orders' contains 2 formula cells.", + "details": { + "data_row_count": 2, + "formula_count": 2, + "formula_ratio": 1.0, + "sample_formula_cells": [ + {"cell": "C2", "formula": "=B2*2"}, + {"cell": "C3", "formula": "=B3*2"} + ] + } + }) + ); +} + +#[test] +fn check_analytical_formula_presence_ignores_non_formula_sheets() { + let file_path = temp_path("excel_cli_check_analytical_formula_presence_clean.xlsx"); + create_clean_values(&file_path); + let file_arg = file_path.to_string_lossy(); + + let output = run_check(&["check", file_arg.as_ref(), "--rules", "formula_presence"]); + + assert_success(&output, 0); + let json = parse_stdout(&output); + assert_eq!(json["data"]["findings"], json!([])); +} + +#[test] +fn check_analytical_rule_combinations_and_thresholds_are_stable() { + let file_path = temp_path("excel_cli_check_analytical_combined.xlsx"); + create_combined_positive(&file_path); + let file_arg = file_path.to_string_lossy(); + + let output = run_check(&[ + "check", + file_arg.as_ref(), + "--rules", + "formula_presence,null_ratio,duplicate_values,type_drift", + "--severity-threshold", + "warning", + ]); + + assert_success(&output, 1); + let json = parse_stdout(&output); + assert_eq!( + json["data"]["stats"]["rules_run"], + json!([ + "null_ratio", + "duplicate_values", + "type_drift", + "formula_presence" + ]) + ); + assert_eq!(json["data"]["stats"]["severity_threshold"], "warning"); + assert_eq!(json["data"]["stats"]["finding_count_before_threshold"], 4); + assert_eq!( + json["data"]["summary"], + json!({ + "status": "fail", + "finding_count": 3, + "error_count": 0, + "warning_count": 3, + "info_count": 0 + }) + ); + assert_eq!( + json["data"]["findings"] + .as_array() + .unwrap() + .iter() + .map(|finding| finding["rule_id"].as_str().unwrap()) + .collect::>(), + vec!["null_ratio", "duplicate_values", "type_drift"] + ); +} diff --git a/tests/check_rule_integration_test.rs b/tests/check_rule_integration_test.rs new file mode 100644 index 0000000..75b73d5 --- /dev/null +++ b/tests/check_rule_integration_test.rs @@ -0,0 +1,117 @@ +use std::path::{Path, PathBuf}; +use std::process::{Command, Output}; + +use serde_json::{json, Value}; + +fn excel_cli_bin() -> PathBuf { + PathBuf::from(env!("CARGO_BIN_EXE_excel-cli")) +} + +fn temp_path(name: &str) -> PathBuf { + std::env::temp_dir().join(name) +} + +fn create_workbook(path: &Path) { + use rust_xlsxwriter::Workbook as XlsxWorkbook; + + let mut workbook = XlsxWorkbook::new(); + + let structural = workbook.add_worksheet(); + structural.set_name("Structural").unwrap(); + structural.write_string(0, 0, "customer").unwrap(); + structural.write_string(0, 2, "customer").unwrap(); + structural.write_string(1, 0, "Alice").unwrap(); + structural.write_string(1, 1, "A-100").unwrap(); + structural.write_string(1, 2, "Alice").unwrap(); + + let analytical = workbook.add_worksheet(); + analytical.set_name("Analytical").unwrap(); + analytical.write_string(0, 0, "order_id").unwrap(); + analytical.write_string(0, 1, "amount").unwrap(); + analytical.write_string(0, 2, "total").unwrap(); + analytical.write_string(1, 0, "1001").unwrap(); + analytical.write_number(1, 1, 12.5).unwrap(); + analytical.write_formula(1, 2, "=B2*2").unwrap(); + analytical.write_string(2, 0, "1001").unwrap(); + analytical.write_number(2, 1, 18.0).unwrap(); + analytical.write_formula(2, 2, "=B3*2").unwrap(); + + workbook.save(path).unwrap(); +} + +fn run_check(args: &[&str]) -> Output { + Command::new(excel_cli_bin()) + .args(args) + .output() + .unwrap_or_else(|_| panic!("Failed to execute excel-cli {}", args.join(" "))) +} + +fn parse_stdout(output: &Output) -> Value { + serde_json::from_slice(&output.stdout).expect("stdout should be valid JSON") +} + +fn assert_success(output: &Output, code: i32) { + assert_eq!(output.status.code(), Some(code)); + assert!( + output.stderr.is_empty(), + "stderr should be empty: {}", + String::from_utf8_lossy(&output.stderr) + ); +} + +#[test] +fn workbook_check_combines_structural_and_analytical_findings() { + let file_path = temp_path("excel_cli_check_rule_integration.xlsx"); + create_workbook(&file_path); + let file_arg = file_path.to_string_lossy(); + + let output = run_check(&[ + "check", + file_arg.as_ref(), + "--rules", + "blank_headers,duplicate_headers,duplicate_values,formula_presence", + ]); + + assert_success(&output, 1); + let json = parse_stdout(&output); + + assert_eq!(json["target"], json!({})); + assert_eq!( + json["data"]["summary"], + json!({ + "status": "fail", + "finding_count": 4, + "error_count": 0, + "warning_count": 3, + "info_count": 1 + }) + ); + assert_eq!( + json["data"]["stats"]["rules_run"], + json!([ + "blank_headers", + "duplicate_headers", + "duplicate_values", + "formula_presence" + ]) + ); + assert_eq!(json["data"]["stats"]["checked_sheet_count"], 2); + assert_eq!(json["data"]["stats"]["finding_count_before_threshold"], 4); + assert_eq!( + json["data"]["findings"] + .as_array() + .unwrap() + .iter() + .map(|finding| ( + finding["sheet"].as_str().unwrap(), + finding["rule_id"].as_str().unwrap() + )) + .collect::>(), + vec![ + ("Structural", "blank_headers"), + ("Structural", "duplicate_headers"), + ("Analytical", "duplicate_values"), + ("Analytical", "formula_presence"), + ] + ); +} From 0cd6dc9c6956a414a0ac42ab8bfcdc2fd6b1725d Mon Sep 17 00:00:00 2001 From: fuhan666 Date: Wed, 22 Apr 2026 02:15:42 +0800 Subject: [PATCH 4/7] fix: format merged check rules --- src/cli/check.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/cli/check.rs b/src/cli/check.rs index 4347f12..3d27753 100644 --- a/src/cli/check.rs +++ b/src/cli/check.rs @@ -334,8 +334,7 @@ fn find_blank_rows(context: &SheetCheckContext<'_>) -> Vec { (1..=context.sheet.max_rows) .filter(|row| { - (1..=context.sheet.max_cols) - .all(|col| is_blank_cell(cell_at(context.sheet, *row, col))) + (1..=context.sheet.max_cols).all(|col| is_blank_cell(cell_at(context.sheet, *row, col))) }) .map(|row| { let end_col = index_to_col_name(context.sheet.max_cols); @@ -365,8 +364,7 @@ fn find_blank_columns(context: &SheetCheckContext<'_>) -> Vec { (1..=context.sheet.max_cols) .filter(|col| { - (1..=context.sheet.max_rows) - .all(|row| is_blank_cell(cell_at(context.sheet, row, *col))) + (1..=context.sheet.max_rows).all(|row| is_blank_cell(cell_at(context.sheet, row, *col))) }) .map(|col| { let column_label = index_to_col_name(col); @@ -378,7 +376,10 @@ fn find_blank_columns(context: &SheetCheckContext<'_>) -> Vec { row: None, column: Some(col), range: Some(range), - message: format!("Blank column {column_label} in used range {}.", context.used_range), + message: format!( + "Blank column {column_label} in used range {}.", + context.used_range + ), details: json!({ "used_range": context.used_range, "column_label": column_label, From 4120a4aae7f1c91ed32301ba939f39f753470781 Mon Sep 17 00:00:00 2001 From: fuhan666 Date: Wed, 22 Apr 2026 03:29:42 +0800 Subject: [PATCH 5/7] feat: add findings navigation to the tui --- src/app/findings.rs | 142 +++++++++++++++++++++++++++++++++++ src/app/mod.rs | 1 + src/app/state.rs | 129 +++++++++++++++++++++++++++----- src/app/ui.rs | 8 ++ src/cli/check.rs | 155 +++++++++++++++++++++++++++++---------- src/commands/executor.rs | 25 ++++++- src/ui/handlers.rs | 57 +++++++++++++- src/ui/render.rs | 102 +++++++++++++++++++++++++- 8 files changed, 557 insertions(+), 62 deletions(-) create mode 100644 src/app/findings.rs diff --git a/src/app/findings.rs b/src/app/findings.rs new file mode 100644 index 0000000..576e02c --- /dev/null +++ b/src/app/findings.rs @@ -0,0 +1,142 @@ +use crate::app::{AppState, InputMode}; +use crate::cli::args::SeverityThreshold; +use crate::cli::check::{run_check_report, CheckFinding}; +use crate::utils::{cell_reference, parse_range}; + +#[derive(Clone, Debug, Default)] +pub(crate) struct FindingsState { + pub(crate) items: Vec, + pub(crate) selected: usize, + pub(crate) last_refresh_error: Option, +} + +impl FindingsState { + fn clamp_selected(&mut self) { + if self.items.is_empty() { + self.selected = 0; + } else { + self.selected = self.selected.min(self.items.len() - 1); + } + } +} + +impl AppState<'_> { + pub fn show_findings(&mut self) { + self.input_mode = InputMode::Findings; + self.refresh_findings(); + } + + pub fn close_findings(&mut self) { + self.input_mode = InputMode::Normal; + } + + pub fn refresh_findings(&mut self) { + let was_modified = self.workbook.is_modified(); + let result = self.ensure_findings_workbook_ready().and_then(|_| { + run_check_report(&mut self.workbook, None, None, SeverityThreshold::Info) + }); + self.workbook.set_modified(was_modified); + + match result { + Ok(report) => { + let finding_count = report.findings.len(); + self.findings.items = report.findings; + self.findings.last_refresh_error = None; + self.findings.clamp_selected(); + + if finding_count == 0 { + self.add_notification("No findings in current workbook".to_string()); + } else { + self.add_notification(format!("Loaded {finding_count} findings")); + } + } + Err(err) => { + self.findings.items.clear(); + self.findings.selected = 0; + self.findings.last_refresh_error = Some(err.to_string()); + self.add_notification(format!("Findings refresh failed: {err}")); + } + } + } + + pub fn select_next_finding(&mut self) { + if self.findings.selected + 1 < self.findings.items.len() { + self.findings.selected += 1; + } + } + + pub fn select_prev_finding(&mut self) { + self.findings.selected = self.findings.selected.saturating_sub(1); + } + + pub fn activate_selected_finding(&mut self) { + let Some(finding) = self.findings.items.get(self.findings.selected).cloned() else { + self.add_notification("No finding selected".to_string()); + return; + }; + + let target_index = match self.workbook.resolve_sheet_by_name(&finding.sheet) { + Ok(index) => index, + Err(err) => { + self.add_notification(format!( + "Finding sheet '{}' not found: {err}", + finding.sheet + )); + return; + } + }; + + if self.workbook.get_current_sheet_index() != target_index { + if let Err(err) = self.switch_sheet_by_index(target_index) { + self.add_notification(format!("Failed to switch to finding sheet: {err}")); + return; + } + } + + let Some(target_cell) = finding_target_cell(&finding) else { + self.add_notification(format!("Jumped to finding on sheet '{}'", finding.sheet)); + return; + }; + + let sheet = self.workbook.get_current_sheet(); + let max_row = sheet.max_rows.max(1); + let max_col = sheet.max_cols.max(1); + let clamped = (target_cell.0.min(max_row), target_cell.1.min(max_col)); + + self.selected_cell = clamped; + self.handle_scrolling(); + + if clamped == target_cell { + self.add_notification(format!("Jumped to finding at {}", cell_reference(clamped))); + } else { + self.add_notification(format!( + "Finding target {} was out of range; jumped to {}", + cell_reference(target_cell), + cell_reference(clamped) + )); + } + } + + fn ensure_findings_workbook_ready(&mut self) -> Result<(), crate::cli::error::AppError> { + let sheet_names = self.workbook.get_sheet_names(); + for (index, sheet_name) in sheet_names.iter().enumerate() { + self.workbook + .ensure_sheet_loaded(index, sheet_name) + .map_err(crate::cli::error::anyhow_to_app_error)?; + } + Ok(()) + } +} + +fn finding_target_cell(finding: &CheckFinding) -> Option<(usize, usize)> { + match (finding.row, finding.column) { + (Some(row), Some(col)) => Some((row, col)), + _ => finding + .range + .as_deref() + .and_then(parse_range) + .map(|(start, _)| start) + .or_else(|| finding.row.map(|row| (row, 1))) + .or_else(|| finding.column.map(|col| (1, col))), + } +} diff --git a/src/app/mod.rs b/src/app/mod.rs index 97de0b2..9079b85 100644 --- a/src/app/mod.rs +++ b/src/app/mod.rs @@ -1,4 +1,5 @@ mod edit; +mod findings; mod navigation; mod query_preview; mod search; diff --git a/src/app/state.rs b/src/app/state.rs index 484e2e1..07f69ff 100644 --- a/src/app/state.rs +++ b/src/app/state.rs @@ -4,6 +4,7 @@ use std::path::PathBuf; use tui_textarea::TextArea; use crate::actions::UndoHistory; +use crate::app::findings::FindingsState; use crate::app::QueryPreview; use crate::app::VimState; use crate::excel::Workbook; @@ -25,6 +26,7 @@ pub enum InputMode { SearchBackward, Help, Preview, + Findings, LazyLoading, CommandInLazyLoading, } @@ -59,6 +61,7 @@ pub struct AppState<'a> { pub help_scroll: usize, pub help_visible_lines: usize, pub query_preview: Option, + pub(crate) findings: FindingsState, pub undo_history: UndoHistory, pub vim_state: Option, } @@ -157,6 +160,7 @@ impl AppState<'_> { help_scroll: 0, help_visible_lines: 20, query_preview: None, + findings: FindingsState::default(), undo_history: UndoHistory::new(), vim_state: None, }) @@ -223,23 +227,26 @@ impl AppState<'_> { } pub fn cancel_input(&mut self) { - if let InputMode::Preview = self.input_mode { - self.close_query_preview(); - return; - } - - // If in help mode, just close the help window - if let InputMode::Help = self.input_mode { - self.input_mode = InputMode::Normal; - return; - } - - // If in CommandInLazyLoading mode, return to LazyLoading mode - if let InputMode::CommandInLazyLoading = self.input_mode { - self.input_mode = InputMode::LazyLoading; - self.input_buffer = String::new(); - self.text_area = TextArea::default(); - return; + match self.input_mode { + InputMode::Preview => { + self.close_query_preview(); + return; + } + InputMode::Findings => { + self.close_findings(); + return; + } + InputMode::Help => { + self.input_mode = InputMode::Normal; + return; + } + InputMode::CommandInLazyLoading => { + self.input_mode = InputMode::LazyLoading; + self.input_buffer = String::new(); + self.text_area = TextArea::default(); + return; + } + _ => {} } // Otherwise, cancel the current input @@ -266,3 +273,91 @@ impl AppState<'_> { self.input_buffer = String::new(); } } + +#[cfg(test)] +mod tests { + use std::path::PathBuf; + + use super::{AppState, InputMode}; + use crate::cli::check::CheckRuleId; + use crate::excel::{Cell, Sheet, Workbook}; + + fn sheet_with_values(name: &str, values: &[&[&str]]) -> Sheet { + let max_rows = values.len(); + let max_cols = values.iter().map(|row| row.len()).max().unwrap_or(0); + let mut data = vec![vec![Cell::empty(); max_cols + 1]; max_rows + 1]; + + for (row_idx, row) in values.iter().enumerate() { + for (col_idx, value) in row.iter().enumerate() { + data[row_idx + 1][col_idx + 1] = Cell::new((*value).to_string(), false); + } + } + + Sheet { + name: name.to_string(), + data, + max_rows, + max_cols, + is_loaded: true, + } + } + + #[test] + fn show_findings_refreshes_report_without_marking_workbook_modified() { + let workbook = Workbook::from_sheets_for_test(vec![sheet_with_values( + "Data", + &[&["Name", "Name"], &["Ada", ""], &["", ""]], + )]); + let mut app = AppState::new(workbook, PathBuf::from("quality.xlsx")).unwrap(); + + app.show_findings(); + + assert!(matches!(app.input_mode, InputMode::Findings)); + assert!(!app.workbook.is_modified()); + assert!(!app.findings.items.is_empty()); + assert_eq!(app.findings.selected, 0); + } + + #[test] + fn activate_selected_finding_switches_sheet_and_uses_range_fallback() { + let workbook = Workbook::from_sheets_for_test(vec![ + sheet_with_values("Summary", &[&["Status"], &["ok"]]), + sheet_with_values("报告", &[&["Name", ""], &["Ada", ""]]), + ]); + let mut app = AppState::new(workbook, PathBuf::from("quality.xlsx")).unwrap(); + + app.show_findings(); + app.findings.selected = app + .findings + .items + .iter() + .position(|finding| finding.rule_id == CheckRuleId::BlankColumns) + .expect("blank column finding should exist"); + app.activate_selected_finding(); + + assert!(matches!(app.input_mode, InputMode::Findings)); + assert_eq!(app.workbook.get_current_sheet_name(), "报告"); + assert_eq!(app.selected_cell, (1, 2)); + } + + #[test] + fn activate_selected_finding_prefers_exact_row_and_column() { + let workbook = Workbook::from_sheets_for_test(vec![sheet_with_values( + "Data", + &[&["Name", "Score"], &["Ada", "10"], &["Ada", "11"]], + )]); + let mut app = AppState::new(workbook, PathBuf::from("quality.xlsx")).unwrap(); + + app.show_findings(); + app.findings.selected = app + .findings + .items + .iter() + .position(|finding| finding.rule_id == CheckRuleId::DuplicateValues) + .expect("duplicate values finding should exist"); + app.activate_selected_finding(); + + assert_eq!(app.workbook.get_current_sheet_name(), "Data"); + assert_eq!(app.selected_cell, (2, 1)); + } +} diff --git a/src/app/ui.rs b/src/app/ui.rs index ea7c0b3..afca0a3 100644 --- a/src/app/ui.rs +++ b/src/app/ui.rs @@ -13,7 +13,9 @@ impl AppState<'_> { NAVIGATION:\n\ :[cell] - Jump to cell (e.g., :B10)\n\ :preview, :pv - Show read-only preview of current sheet data\n\ + :findings, :issues - Open workbook findings panel\n\ hjkl - Move cursor (left, down, up, right)\n\ + f - Open or refresh findings panel\n\ 0 - Jump to first column\n\ ^ - Jump to first non-empty column\n\ $ - Jump to last column\n\ @@ -36,6 +38,12 @@ impl AppState<'_> { n - Jump to next search result\n\ N - Jump to previous search result\n\ :nohlsearch, :noh - Disable search highlighting\n\n\ + FINDINGS PANEL:\n\ + j/k, ↑/↓ - Move findings selection\n\ + Enter - Jump to selected finding location\n\ + r or f - Refresh findings\n\ + Esc or q - Close findings panel\n\n\ +\ COLUMN OPERATIONS:\n\ :cw fit - Adjust width of current column to fit its content\n\ :cw fit all - Adjust width of all columns to fit their content\n\ diff --git a/src/cli/check.rs b/src/cli/check.rs index 3d27753..cffa1b1 100644 --- a/src/cli/check.rs +++ b/src/cli/check.rs @@ -32,37 +32,26 @@ pub fn handle( let mut workbook = open_workbook(&file, false).map_err(crate::cli::error::anyhow_to_app_error)?; - let selected_rules = parse_rules(rules.as_deref())?; - let threshold = Severity::from_threshold(severity_threshold); - let checked_sheet_indices = resolve_checked_sheets(&workbook, sheet.as_deref())?; - - for index in &checked_sheet_indices { - let sheet_name = workbook.get_sheet_names()[*index].clone(); - workbook - .ensure_sheet_loaded(*index, &sheet_name) - .map_err(crate::cli::error::anyhow_to_app_error)?; - } - - let sheet_names = workbook.get_sheet_names(); - let mut findings = run_rules(&workbook, &selected_rules, &checked_sheet_indices)?; - let finding_count_before_threshold = findings.len(); - findings.retain(|finding| finding.severity >= threshold); - sort_findings(&mut findings, &sheet_names); + let report = run_check_report( + &mut workbook, + sheet.as_deref(), + rules.as_deref(), + severity_threshold, + )?; let data = json!({ - "summary": summarize_findings(&findings), - "stats": build_stats( - &workbook, - &checked_sheet_indices, - &selected_rules, - severity_threshold, - finding_count_before_threshold, - )?, - "findings": findings, + "summary": report.summary, + "stats": report.stats, + "findings": report.findings, }); let target = if let Some(sheet_name) = sheet { - let sheet_index = checked_sheet_indices[0]; + let sheet_index = + workbook + .resolve_sheet_by_name(&sheet_name) + .map_err(|e| AppError::TargetNotFound { + message: e.to_string(), + })?; envelope::target_sheet(&sheet_name, sheet_index) } else { envelope::target_workbook() @@ -88,6 +77,42 @@ pub fn handle( )) } +pub(crate) fn run_check_report( + workbook: &mut Workbook, + sheet: Option<&str>, + rules: Option<&str>, + severity_threshold: SeverityThreshold, +) -> Result { + let selected_rules = parse_rules(rules)?; + let threshold = Severity::from_threshold(severity_threshold); + let checked_sheet_indices = resolve_checked_sheets(workbook, sheet)?; + + for index in &checked_sheet_indices { + let sheet_name = workbook.get_sheet_names()[*index].clone(); + workbook + .ensure_sheet_loaded(*index, &sheet_name) + .map_err(crate::cli::error::anyhow_to_app_error)?; + } + + let sheet_names = workbook.get_sheet_names(); + let mut findings = run_rules(workbook, &selected_rules, &checked_sheet_indices)?; + let finding_count_before_threshold = findings.len(); + findings.retain(|finding| finding.severity >= threshold); + sort_findings(&mut findings, &sheet_names); + + Ok(CheckReport { + summary: summarize_findings(&findings), + stats: build_stats( + workbook, + &checked_sheet_indices, + &selected_rules, + severity_threshold, + finding_count_before_threshold, + )?, + findings, + }) +} + fn file_format(path: &Path) -> String { path.extension() .and_then(|e| e.to_str()) @@ -857,9 +882,16 @@ fn compare_usize(left: Option, right: Option) -> Ordering { } } +#[derive(Clone, Debug)] +pub(crate) struct CheckReport { + pub(crate) summary: Value, + pub(crate) stats: Value, + pub(crate) findings: Vec, +} + #[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize)] #[serde(rename_all = "snake_case")] -enum CheckRuleId { +pub(crate) enum CheckRuleId { BlankHeaders, DuplicateHeaders, BlankRows, @@ -875,7 +907,7 @@ impl CheckRuleId { RULES.iter().copied().find(|rule| rule.as_str() == value) } - fn as_str(&self) -> &'static str { + pub(crate) fn as_str(&self) -> &'static str { match self { CheckRuleId::BlankHeaders => "blank_headers", CheckRuleId::DuplicateHeaders => "duplicate_headers", @@ -898,7 +930,7 @@ impl CheckRuleId { #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize)] #[serde(rename_all = "lowercase")] -enum Severity { +pub(crate) enum Severity { Info, Warning, Error, @@ -912,18 +944,26 @@ impl Severity { SeverityThreshold::Error => Severity::Error, } } + + pub(crate) fn as_str(&self) -> &'static str { + match self { + Severity::Info => "info", + Severity::Warning => "warning", + Severity::Error => "error", + } + } } #[derive(Clone, Debug, Serialize)] -struct CheckFinding { - rule_id: CheckRuleId, - severity: Severity, - sheet: String, - row: Option, - column: Option, - range: Option, - message: String, - details: Value, +pub(crate) struct CheckFinding { + pub(crate) rule_id: CheckRuleId, + pub(crate) severity: Severity, + pub(crate) sheet: String, + pub(crate) row: Option, + pub(crate) column: Option, + pub(crate) range: Option, + pub(crate) message: String, + pub(crate) details: Value, } #[cfg(test)] @@ -932,6 +972,7 @@ mod tests { use super::*; use crate::cli::error::{EXIT_CHECK_FINDINGS, EXIT_SUCCESS}; + use crate::excel::{Cell, Sheet, Workbook}; #[test] fn exit_code_uses_one_for_successful_reports_with_findings() { @@ -984,4 +1025,42 @@ mod tests { assert_eq!(findings[1].row, Some(2)); assert_eq!(findings[2].rule_id, CheckRuleId::DuplicateHeaders); } + + fn sheet_with_values(name: &str, values: &[&[&str]]) -> Sheet { + let max_rows = values.len(); + let max_cols = values.iter().map(|row| row.len()).max().unwrap_or(0); + let mut data = vec![vec![Cell::empty(); max_cols + 1]; max_rows + 1]; + + for (row_idx, row) in values.iter().enumerate() { + for (col_idx, value) in row.iter().enumerate() { + data[row_idx + 1][col_idx + 1] = Cell::new((*value).to_string(), false); + } + } + + Sheet { + name: name.to_string(), + data, + max_rows, + max_cols, + is_loaded: true, + } + } + + #[test] + fn run_check_report_reuses_rule_pipeline_for_structured_findings() { + let mut workbook = Workbook::from_sheets_for_test(vec![sheet_with_values( + "Data", + &[&["Name", "Name"], &["Ada", ""], &["", ""]], + )]); + + let report = run_check_report(&mut workbook, None, None, SeverityThreshold::Info).unwrap(); + + assert_eq!(report.summary["status"], "fail"); + assert_eq!(report.stats["checked_sheet_count"], 1); + assert!(!report.findings.is_empty()); + assert!(report + .findings + .iter() + .any(|finding| finding.rule_id == CheckRuleId::DuplicateHeaders)); + } } diff --git a/src/commands/executor.rs b/src/commands/executor.rs index 617d9d2..96a95ca 100644 --- a/src/commands/executor.rs +++ b/src/commands/executor.rs @@ -53,6 +53,7 @@ impl AppState<'_> { "nohlsearch" | "noh" => self.disable_search_highlight(), "help" => self.show_help(), "preview" | "pv" => self.show_query_preview(), + "findings" | "issues" => self.show_findings(), "delsheet" => self.delete_current_sheet(), "addsheet" => self.add_notification("Usage: :addsheet ".to_string()), _ => { @@ -415,7 +416,7 @@ mod tests { fn app_with_sheet() -> AppState<'static> { let mut data = vec![vec![Cell::empty(); 3]; 3]; data[1][1] = Cell::new("Name".to_string(), false); - data[1][2] = Cell::new("Score".to_string(), false); + data[1][2] = Cell::new("Name".to_string(), false); data[2][1] = Cell::new("Ada".to_string(), false); data[2][2] = Cell::new("10".to_string(), false); @@ -460,4 +461,26 @@ mod tests { assert!(matches!(app.input_mode, InputMode::Preview)); assert!(app.query_preview.is_some()); } + + #[test] + fn findings_command_opens_findings_panel() { + let mut app = app_with_sheet(); + app.input_buffer = "findings".to_string(); + + app.execute_command(); + + assert!(matches!(app.input_mode, InputMode::Findings)); + assert!(!app.findings.items.is_empty()); + } + + #[test] + fn issues_alias_opens_findings_panel() { + let mut app = app_with_sheet(); + app.input_buffer = "issues".to_string(); + + app.execute_command(); + + assert!(matches!(app.input_mode, InputMode::Findings)); + assert!(!app.findings.items.is_empty()); + } } diff --git a/src/ui/handlers.rs b/src/ui/handlers.rs index 7d971f3..97a64c5 100644 --- a/src/ui/handlers.rs +++ b/src/ui/handlers.rs @@ -21,6 +21,7 @@ pub fn handle_key_event(app_state: &mut AppState, key: KeyEvent) { InputMode::SearchBackward => handle_search_mode(app_state, key.code), InputMode::Help => handle_help_mode(app_state, key.code), InputMode::Preview => handle_preview_mode(app_state, key.code), + InputMode::Findings => handle_findings_mode(app_state, key.code), InputMode::LazyLoading => handle_lazy_loading_mode(app_state, key.code), } } @@ -203,6 +204,10 @@ fn handle_normal_mode(app_state: &mut AppState, key_code: KeyCode) { app_state.g_pressed = false; app_state.start_command_mode(); } + KeyCode::Char('f') => { + app_state.g_pressed = false; + app_state.show_findings(); + } KeyCode::Char('/') => { app_state.g_pressed = false; app_state.start_search_forward(); @@ -259,6 +264,17 @@ fn handle_normal_mode(app_state: &mut AppState, key_code: KeyCode) { } } +fn handle_findings_mode(app_state: &mut AppState, key_code: KeyCode) { + match key_code { + KeyCode::Char('j') | KeyCode::Down => app_state.select_next_finding(), + KeyCode::Char('k') | KeyCode::Up => app_state.select_prev_finding(), + KeyCode::Enter => app_state.activate_selected_finding(), + KeyCode::Char('r') | KeyCode::Char('f') => app_state.refresh_findings(), + KeyCode::Esc | KeyCode::Char('q') => app_state.close_findings(), + _ => {} + } +} + fn handle_editing_mode(app_state: &mut AppState, key: KeyEvent) { // Convert KeyEvent to Input for tui-textarea let input = Input { @@ -410,13 +426,16 @@ mod tests { use crate::excel::{Cell, Sheet, Workbook}; fn app_with_preview() -> AppState<'static> { - let mut data = vec![vec![Cell::empty(); 2]; 2]; - data[1][1] = Cell::new("Ada".to_string(), false); + let mut data = vec![vec![Cell::empty(); 3]; 3]; + data[1][1] = Cell::new("Name".to_string(), false); + data[1][2] = Cell::new("Name".to_string(), false); + data[2][1] = Cell::new("Ada".to_string(), false); + data[2][2] = Cell::new("10".to_string(), false); let sheet = Sheet { name: "Data".to_string(), data, - max_rows: 1, - max_cols: 1, + max_rows: 2, + max_cols: 2, is_loaded: true, }; let mut app = AppState::new( @@ -452,4 +471,34 @@ mod tests { assert!(app.query_preview.is_none()); assert!(!app.should_quit); } + + #[test] + fn f_opens_findings_panel_from_normal_mode() { + let mut app = app_with_preview(); + app.close_query_preview(); + + handle_key_event( + &mut app, + KeyEvent::new(KeyCode::Char('f'), KeyModifiers::empty()), + ); + + assert!(matches!(app.input_mode, InputMode::Findings)); + assert!(!app.findings.items.is_empty()); + } + + #[test] + fn j_moves_selected_finding_in_findings_mode() { + let mut app = app_with_preview(); + app.close_query_preview(); + app.show_findings(); + + let initial = app.findings.selected; + + handle_key_event( + &mut app, + KeyEvent::new(KeyCode::Char('j'), KeyModifiers::empty()), + ); + + assert!(app.findings.selected >= initial); + } } diff --git a/src/ui/render.rs b/src/ui/render.rs index c3f181c..daa5281 100644 --- a/src/ui/render.rs +++ b/src/ui/render.rs @@ -324,6 +324,8 @@ fn parse_command(input: &str) -> Vec> { "help", "preview", "pv", + "findings", + "issues", "addsheet", "delsheet", ]; @@ -458,6 +460,17 @@ fn draw_info_panel(f: &mut Frame, app_state: &mut AppState, area: Rect) { f.render_widget(edit_block, chunks[0]); f.render_widget(app_state.text_area.widget(), padded_area); + } else if let InputMode::Findings = app_state.input_mode { + let title = format!(" Findings ({}) ", app_state.findings.items.len()); + let findings_block = Block::default() + .borders(Borders::ALL) + .border_style(Style::default().fg(Color::LightCyan)) + .title(title); + let findings_paragraph = Paragraph::new(format_findings_lines(app_state)) + .block(findings_block) + .wrap(ratatui::widgets::Wrap { trim: false }); + + f.render_widget(findings_paragraph, chunks[0]); } else { // Get cell content let content = app_state.get_cell_content(row, col); @@ -541,10 +554,62 @@ fn format_query_preview(preview: &crate::app::QueryPreview) -> String { lines.join("\n") } +fn format_findings_lines(app_state: &AppState) -> Vec> { + if let Some(error) = &app_state.findings.last_refresh_error { + return vec![Line::from(format!("Refresh failed: {error}"))]; + } + + if app_state.findings.items.is_empty() { + return vec![Line::from("No findings. Press r to refresh.")]; + } + + app_state + .findings + .items + .iter() + .enumerate() + .map(|(index, finding)| { + let location = finding + .range + .clone() + .or_else(|| match (finding.row, finding.column) { + (Some(row), Some(col)) => Some(cell_reference((row, col))), + (Some(row), None) => Some(format!("row {row}")), + (None, Some(col)) => Some(index_to_col_name(col)), + (None, None) => None, + }) + .unwrap_or_else(|| "sheet".to_string()); + + let marker = if index == app_state.findings.selected { + ">" + } else { + " " + }; + let summary = format!( + "{marker} {} {} {} {}", + finding.severity.as_str(), + finding.rule_id.as_str(), + finding.sheet, + location + ); + let style = if index == app_state.findings.selected { + Style::default().add_modifier(Modifier::REVERSED) + } else { + Style::default() + }; + + Line::from(vec![ + Span::styled(summary, style), + Span::raw(format!(" {}", finding.message)), + ]) + }) + .collect() +} + fn draw_status_bar(f: &mut Frame, app_state: &AppState, area: Rect) { match app_state.input_mode { InputMode::Normal => { - let status = "Input :help for operating instructions | hjkl=move [ ]=prev/next-sheet Enter=edit y=copy d=cut p=paste /=search N/n=prev/next-search-result :=command "; + let status = "Input :help for operating instructions | hjkl=move f=findings [ ]=prev/next-sheet Enter=edit y=copy d=cut p=paste /=search N/n=prev/next-search-result :=command "; let status_widget = Paragraph::new(status) .style(Style::default()) @@ -619,6 +684,15 @@ fn draw_status_bar(f: &mut Frame, app_state: &AppState, area: Rect) { f.render_widget(status_widget, area); } + InputMode::Findings => { + let status_widget = + Paragraph::new("Findings: j/k or arrows=move Enter=jump r/f=refresh Esc/q=close") + .style(Style::default().fg(Color::LightCyan)) + .alignment(ratatui::layout::Alignment::Left); + + f.render_widget(status_widget, area); + } + InputMode::LazyLoading => { // Show a status message for lazy loading mode let status_widget = Paragraph::new( @@ -875,7 +949,7 @@ mod tests { fn app_with_preview() -> AppState<'static> { let mut data = vec![vec![Cell::empty(); 3]; 3]; data[1][1] = Cell::new("Name".to_string(), false); - data[1][2] = Cell::new("Score".to_string(), false); + data[1][2] = Cell::new("Name".to_string(), false); data[2][1] = Cell::new("Ada".to_string(), false); data[2][2] = Cell::new("10".to_string(), false); @@ -919,4 +993,28 @@ mod tests { assert!(rendered.contains("Filters: none")); assert!(rendered.contains("Ada")); } + + #[test] + fn renders_findings_panel_with_selected_entry() { + let backend = TestBackend::new(100, 30); + let mut terminal = Terminal::new(backend).unwrap(); + let mut app = app_with_preview(); + app.close_query_preview(); + app.show_findings(); + + terminal.draw(|frame| ui(frame, &mut app)).unwrap(); + + let rendered = terminal + .backend() + .buffer() + .content + .iter() + .map(|cell| cell.symbol.as_str()) + .collect::(); + + assert!(matches!(app.input_mode, InputMode::Findings)); + assert!(rendered.contains("Findings")); + assert!(rendered.contains("duplicate_headers")); + assert!(rendered.contains("Data")); + } } From 42cc984f75ceee6921f0a0a6a3df1c763ca85478 Mon Sep 17 00:00:00 2001 From: fuhan666 Date: Wed, 22 Apr 2026 04:30:24 +0800 Subject: [PATCH 6/7] docs: prepare the v1.3.0 release surface --- CHANGELOG.md | 17 ++++++++++++++- Cargo.lock | 2 +- Cargo.toml | 2 +- README.md | 33 ++++++++++++++++++++++++++++++ README_zh.md | 33 ++++++++++++++++++++++++++++++ src/cli/args.rs | 15 +++++++++++++- tests/help_and_version_cli_test.rs | 11 +++++++++- 7 files changed, 108 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c7813b..aa9ab8d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [1.3.0] - 2026-04-22 + +### Added + +- `check ` workbook and sheet quality scans with a stable JSON summary, stats, and findings contract. +- Fixed v1.3.0 rule registry covering `blank_headers`, `duplicate_headers`, `blank_rows`, `blank_columns`, `null_ratio`, `duplicate_values`, `type_drift`, and `formula_presence`. +- TUI findings panel support for browsing quality-check results without leaving the terminal. +- Regression coverage for rule positives and negatives, workbook and sheet targeting, threshold filtering, finding order, and exit-code behavior. + +### Changed + +- `check --help`, `README.md`, and `README_zh.md` now document the v1.3.0 quality-check surface and rule IDs. +- Package version updated to `1.3.0`. + ## [1.2.0] - 2026-04-21 ### Added @@ -195,7 +209,8 @@ This is the initial release of excel-cli, a lightweight terminal-based Excel vie - Copy, cut, and paste functionality with `y`, `d`, and `p` keys - Support for pipe operator when exporting to JSON -[Unreleased]: https://github.com/fuhan666/excel-cli/compare/v1.2.0...HEAD +[Unreleased]: https://github.com/fuhan666/excel-cli/compare/v1.3.0...HEAD +[1.3.0]: https://github.com/fuhan666/excel-cli/compare/v1.2.0...v1.3.0 [1.2.0]: https://github.com/fuhan666/excel-cli/compare/v1.1.0...v1.2.0 [1.1.0]: https://github.com/fuhan666/excel-cli/compare/v1.0.0...v1.1.0 [1.0.0]: https://github.com/fuhan666/excel-cli/releases/tag/v1.0.0 diff --git a/Cargo.lock b/Cargo.lock index 56de56f..25dcd1e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -437,7 +437,7 @@ checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "excel-cli" -version = "1.2.0" +version = "1.3.0" dependencies = [ "anyhow", "calamine", diff --git a/Cargo.toml b/Cargo.toml index 0a68187..b60ad72 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "excel-cli" -version = "1.2.0" +version = "1.3.0" edition = "2021" description = "Excel CLI for AI, scripting, and terminal users. Headless JSON API for automation, plus a Vim-like TUI for interactive browsing and editing." license = "MIT" diff --git a/README.md b/README.md index 7aa7143..7799dc2 100644 --- a/README.md +++ b/README.md @@ -9,9 +9,11 @@ An Excel CLI for AI, scripting, and terminal users. Inspect and read headlessly - Edit cell contents directly in the terminal - Export data to JSON format - Select, filter, paginate, and stream read results for automation +- Run workbook and sheet quality checks with stable JSON findings - Delete rows and columns - Search functionality with highlighting - Read-only query preview in the TUI +- Review quality-check findings inside the TUI - Command mode for advanced operations ## Installation & Uninstallation @@ -95,6 +97,15 @@ excel-cli read records path/to/your/file.xlsx --sheet Orders \ --limit 50 \ --output-shape jsonl +# Check workbook data quality with the full v1.3.0 rule set +excel-cli check path/to/your/file.xlsx + +# Check one sheet with selected rules +excel-cli check path/to/your/file.xlsx --sheet Orders --rules duplicate_values,type_drift + +# Return only warning and error findings +excel-cli check path/to/your/file.xlsx --severity-threshold warning + # Open interactive TUI browser excel-cli ui path/to/your/file.xlsx ``` @@ -154,6 +165,28 @@ excel-cli read records report.xlsx --sheet Orders --output-shape jsonl Invalid selected columns, unknown filter columns, unsupported operators, malformed filters, invalid numeric comparisons, and invalid regular expressions return structured `invalid_query` errors with exit code `6`. +### Quality Checks + +`check` runs the fixed v1.3.0 quality-rule registry against a whole workbook or a single sheet and emits the same stable JSON envelope as the other headless commands. By default it scans every sheet, returns `info`, `warning`, and `error` findings, exits `1` when the filtered result set is non-empty, and exits `0` when no findings remain after filtering. + +Supported rules: +- `blank_headers`: flag blank header cells inside the detected header row +- `duplicate_headers`: flag duplicate normalized header names +- `blank_rows`: flag fully blank rows inside the used range +- `blank_columns`: flag fully blank columns inside the used range +- `null_ratio`: flag columns with blank-value ratios above the built-in thresholds +- `duplicate_values`: flag repeated values in the candidate identifier column +- `type_drift`: flag mixed dominant and drift data types in a column +- `formula_presence`: report sheets that still contain formulas in the checked data region + +Use `--sheet ` to limit the scan to one sheet, `--rules ` to run a subset in registry order, and `--severity-threshold ` to filter returned findings. `data.summary` counts only the returned findings, while `data.stats.finding_count_before_threshold` preserves the total before threshold filtering and `data.stats.rules_run` records the normalized rule order. + +```bash +excel-cli check report.xlsx +excel-cli check report.xlsx --sheet 客户 --rules blank_headers,duplicate_headers +excel-cli check report.xlsx --rules null_ratio,duplicate_values,type_drift --severity-threshold warning +``` + ### Exit Codes | Code | Meaning | diff --git a/README_zh.md b/README_zh.md index 278fbb3..d6410e4 100644 --- a/README_zh.md +++ b/README_zh.md @@ -9,9 +9,11 @@ - 直接在终端中编辑单元格内容 - 将数据导出为 JSON 格式 - 选择、筛选、分页和流式读取结果,支持自动化场景 +- 对整个工作簿或单个工作表运行质量检查,并输出稳定的 JSON findings - 删除行和列 - 搜索功能并支持高亮显示 - TUI 中只读查询预览 +- 在 TUI 中查看质量检查 findings - 命令模式支持高级操作 ## 安装与卸载 @@ -95,6 +97,15 @@ excel-cli read records path/to/your/file.xlsx --sheet Orders \ --limit 50 \ --output-shape jsonl +# 使用完整 v1.3.0 规则集检查工作簿质量 +excel-cli check path/to/your/file.xlsx + +# 仅检查一个工作表,并选择指定规则 +excel-cli check path/to/your/file.xlsx --sheet Orders --rules duplicate_values,type_drift + +# 只返回 warning 和 error 级别的 findings +excel-cli check path/to/your/file.xlsx --severity-threshold warning + # 打开交互式 TUI 浏览器 excel-cli ui path/to/your/file.xlsx ``` @@ -154,6 +165,28 @@ excel-cli read records report.xlsx --sheet Orders --output-shape jsonl 无效的选择列、未知的筛选列、不支持的操作符、格式错误的筛选条件、无效的数值比较和无效的正则表达式会返回结构化的 `invalid_query` 错误,退出码为 `6`。 +### 质量检查 + +`check` 会对整个工作簿或单个工作表运行固定的 v1.3.0 质量规则集,并返回与其他无界面命令一致的稳定 JSON 信封结构。默认会扫描所有工作表,返回 `info`、`warning`、`error` 三种级别的 findings;当过滤后的 findings 非空时退出码为 `1`,过滤后为空时退出码为 `0`。 + +支持的规则: +- `blank_headers`:标记检测到的表头行中的空白表头单元格 +- `duplicate_headers`:标记标准化后重复的表头名称 +- `blank_rows`:标记已使用区域中的整空白行 +- `blank_columns`:标记已使用区域中的整空白列 +- `null_ratio`:根据内置阈值标记空值比例过高的列 +- `duplicate_values`:标记候选标识列中的重复值 +- `type_drift`:标记同一列中偏离主类型的数据 +- `formula_presence`:报告检查区域中仍然包含公式的工作表 + +使用 `--sheet ` 可以只检查单个工作表,`--rules <以逗号分隔的规则 ID>` 可以按注册表顺序运行子集规则,`--severity-threshold ` 可以过滤返回的 findings。`data.summary` 只统计最终返回的 findings,而 `data.stats.finding_count_before_threshold` 会保留阈值过滤前的总数,`data.stats.rules_run` 则记录规范化后的规则顺序。 + +```bash +excel-cli check report.xlsx +excel-cli check report.xlsx --sheet 客户 --rules blank_headers,duplicate_headers +excel-cli check report.xlsx --rules null_ratio,duplicate_values,type_drift --severity-threshold warning +``` + ### 退出码 | 代码 | 含义 | diff --git a/src/cli/args.rs b/src/cli/args.rs index bdbc294..c6b05d6 100644 --- a/src/cli/args.rs +++ b/src/cli/args.rs @@ -3,7 +3,12 @@ use std::path::PathBuf; #[derive(Parser)] #[command(name = "excel-cli")] -#[command(author, version, about = "Excel CLI - single-file read-only inspector", long_about = None)] +#[command( + author, + version, + about = "Excel CLI for AI, scripting, and terminal users", + long_about = None +)] pub struct Cli { #[command(subcommand)] pub command: Commands, @@ -31,10 +36,18 @@ pub enum Commands { sheet: Option, /// Check rules to run, comma-separated + /// + /// Supported rules: blank_headers, duplicate_headers, blank_rows, + /// blank_columns, null_ratio, duplicate_values, type_drift, + /// formula_presence #[arg(long)] rules: Option, /// Minimum finding severity to return + /// + /// Findings below this threshold stay out of the response, while + /// data.stats.finding_count_before_threshold preserves the pre-filter + /// total. #[arg(long, value_enum, default_value = "info")] severity_threshold: SeverityThreshold, }, diff --git a/tests/help_and_version_cli_test.rs b/tests/help_and_version_cli_test.rs index 5352b08..1027a5e 100644 --- a/tests/help_and_version_cli_test.rs +++ b/tests/help_and_version_cli_test.rs @@ -30,6 +30,7 @@ fn top_level_help_prints_to_stdout_and_exits_zero() { ); assert!(stdout.contains("inspect"), "unexpected stdout: {stdout}"); assert!(stdout.contains("read"), "unexpected stdout: {stdout}"); + assert!(stdout.contains("check"), "unexpected stdout: {stdout}"); assert!(stdout.contains("ui"), "unexpected stdout: {stdout}"); } @@ -106,6 +107,14 @@ fn check_help_documents_v13_contract_flags() { "--sheet ", "--rules ", "--severity-threshold ", + "blank_headers", + "duplicate_headers", + "blank_rows", + "blank_columns", + "null_ratio", + "duplicate_values", + "type_drift", + "formula_presence", ] { assert!( stdout.contains(expected), @@ -133,5 +142,5 @@ fn version_prints_to_stdout_and_exits_zero() { ); let stdout = String::from_utf8_lossy(&output.stdout); - assert_eq!(stdout.trim(), "excel-cli 1.2.0"); + assert_eq!(stdout.trim(), "excel-cli 1.3.0"); } From d207c834240409c94fdaefa0bb82cffa13c5cd4c Mon Sep 17 00:00:00 2001 From: fuhan666 Date: Wed, 22 Apr 2026 11:11:34 +0800 Subject: [PATCH 7/7] docs: update changelog and README for v1.3.0 release --- CHANGELOG.md | 8 -- README.md | 4 +- README_zh.md | 238 +++++++++++++++++++++++++-------------------------- 3 files changed, 121 insertions(+), 129 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index aa9ab8d..c160324 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,11 +16,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - TUI findings panel support for browsing quality-check results without leaving the terminal. - Regression coverage for rule positives and negatives, workbook and sheet targeting, threshold filtering, finding order, and exit-code behavior. -### Changed - -- `check --help`, `README.md`, and `README_zh.md` now document the v1.3.0 quality-check surface and rule IDs. -- Package version updated to `1.3.0`. - ## [1.2.0] - 2026-04-21 ### Added @@ -35,12 +30,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - JSON Lines output for stream-friendly record processing. - Read-only TUI query preview with `:preview` and `:pv`. - Regression coverage for filtering, output shapes, invalid query errors, no-match results, help text, and query preview behavior. -- CI formatting enforcement with `cargo fmt --check`. ### Changed - Enriched read metadata now reports applied filters, selected columns, returned row count, truncation, and output shape. -- Package version updated to `1.2.0`. ## [1.1.0] - 2026-04-21 @@ -49,7 +42,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `inspect columns --sheet ` to profile column headers, generated safe names, duplicate headers, inferred types, non-null ratios, formula ratios, and sample values. - `inspect tables --sheet ` to detect table-like regions with ranges, header rows, dimensions, and confidence scores. - Regression coverage for structure inspection cases including duplicate headers, blank headers, preamble sections, late headers, multi-table sheets, mixed-type columns, formula columns, and non-ASCII column names. -- Release branch CI triggers for `release/**` branches. ## [1.0.0] - 2026-04-20 diff --git a/README.md b/README.md index 7799dc2..45b637b 100644 --- a/README.md +++ b/README.md @@ -97,7 +97,7 @@ excel-cli read records path/to/your/file.xlsx --sheet Orders \ --limit 50 \ --output-shape jsonl -# Check workbook data quality with the full v1.3.0 rule set +# Check workbook data quality with the full rule set excel-cli check path/to/your/file.xlsx # Check one sheet with selected rules @@ -167,7 +167,7 @@ Invalid selected columns, unknown filter columns, unsupported operators, malform ### Quality Checks -`check` runs the fixed v1.3.0 quality-rule registry against a whole workbook or a single sheet and emits the same stable JSON envelope as the other headless commands. By default it scans every sheet, returns `info`, `warning`, and `error` findings, exits `1` when the filtered result set is non-empty, and exits `0` when no findings remain after filtering. +`check` runs the fixed quality-rule registry against a whole workbook or a single sheet and emits the same stable JSON envelope as the other headless commands. By default it scans every sheet, returns `info`, `warning`, and `error` findings, exits `1` when the filtered result set is non-empty, and exits `0` when no findings remain after filtering. Supported rules: - `blank_headers`: flag blank header cells inside the detected header row diff --git a/README_zh.md b/README_zh.md index d6410e4..b9f2c64 100644 --- a/README_zh.md +++ b/README_zh.md @@ -1,19 +1,19 @@ # Excel-CLI -面向 AI、脚本和终端用户的 Excel 命令行工具。提供 JSON API 供自动化场景调用,同时内置交互式 TUI,支持类 Vim 快捷键浏览和编辑表格。 +面向 AI、脚本和终端用户的 Excel 命令行工具。既提供 JSON API 供自动化调用,也内置了交互式 TUI,支持用类 Vim 快捷键浏览和编辑表格。 ## 功能特性 -- 使用类 Vim 快捷键浏览和导航 Excel 工作表 -- 在多工作表工作簿中创建、切换和删除工作表 -- 直接在终端中编辑单元格内容 -- 将数据导出为 JSON 格式 -- 选择、筛选、分页和流式读取结果,支持自动化场景 -- 对整个工作簿或单个工作表运行质量检查,并输出稳定的 JSON findings +- 用类 Vim 快捷键浏览、导航 Excel 工作表 +- 在多表工作簿里创建、切换和删除工作表 +- 直接在终端里编辑单元格内容 +- 将数据导出为 JSON +- 支持列选择、筛选、分页和流式读取,方便自动化场景 +- 对工作簿或单个工作表做质量检查,并以稳定的 JSON 格式输出结果 - 删除行和列 -- 搜索功能并支持高亮显示 -- TUI 中只读查询预览 -- 在 TUI 中查看质量检查 findings +- 支持搜索并高亮匹配项 +- 在 TUI 内以只读方式预览查询结果 +- 在 TUI 内查看质量检查结果 - 命令模式支持高级操作 ## 安装与卸载 @@ -22,7 +22,7 @@ #### 方式一:通过 Cargo 安装(推荐) -本包已发布到 crates.io,可直接使用以下命令安装: +本包已发布到 crates.io,直接执行: ```bash cargo install excel-cli --locked @@ -31,14 +31,14 @@ cargo install excel-cli --locked #### 方式二:从 GitHub Release 下载 1. 访问 [GitHub Releases](https://github.com/fuhan666/excel-cli/releases) -2. 下载适合您操作系统的预编译二进制文件 -3. 将可执行文件放入系统路径,或直接从下载位置运行 +2. 下载适合你系统的预编译二进制文件 +3. 把可执行文件放到系统 PATH 里,或直接在下载目录运行 -Linux 和 macOS 用户可能需要先添加执行权限 +Linux 和 macOS 用户可能需要先添加执行权限。 #### 方式三:从源码编译 -需要 Rust 和 Cargo。使用以下命令安装: +需要 Rust 和 Cargo。执行以下命令: ```bash # 克隆仓库 @@ -59,20 +59,20 @@ cargo uninstall excel-cli ## 使用方法 ```bash -# 检查工作簿元数据 +# 查看工作簿元数据 excel-cli inspect workbook path/to/your/file.xlsx -# 检查单个工作表 +# 查看单个工作表 excel-cli inspect sheet path/to/your/file.xlsx --sheet Orders excel-cli inspect sheet path/to/your/file.xlsx --sheet-index 0 -# 从工作表中采样数据 +# 从工作表采样数据 excel-cli inspect sample path/to/your/file.xlsx --sheet Orders --rows 10 -# 检查列信息(自动检测表头) +# 查看列信息(自动检测表头) excel-cli inspect columns path/to/your/file.xlsx --sheet Orders --header-row auto -# 检查表格区域 +# 检测表格区域 excel-cli inspect tables path/to/your/file.xlsx --sheet Orders # 读取单个单元格 @@ -97,13 +97,13 @@ excel-cli read records path/to/your/file.xlsx --sheet Orders \ --limit 50 \ --output-shape jsonl -# 使用完整 v1.3.0 规则集检查工作簿质量 +# 用完整规则集检查工作簿质量 excel-cli check path/to/your/file.xlsx -# 仅检查一个工作表,并选择指定规则 +# 只检查一个工作表,并指定规则 excel-cli check path/to/your/file.xlsx --sheet Orders --rules duplicate_values,type_drift -# 只返回 warning 和 error 级别的 findings +# 只返回 warning 和 error 级别的结果 excel-cli check path/to/your/file.xlsx --severity-threshold warning # 打开交互式 TUI 浏览器 @@ -112,18 +112,18 @@ excel-cli ui path/to/your/file.xlsx ### 命令行选项 -所有无界面命令(`inspect`、`read`、`check`)默认输出 JSON 格式。使用 `--format text` 获取人类可读的输出。 +所有非交互式命令(`inspect`、`read`、`check`)默认输出 JSON。加 `--format text` 可得到人类可读的文本输出。 -**全局输出规则:** -- `stdout` 仅包含结果 -- `stderr` 仅包含错误 -- 成功返回退出码 `0` -- 失败返回非零退出码(见下方退出码说明) +**全局输出约定:** +- `stdout` 只输出结果 +- `stderr` 只输出错误 +- 成功时返回退出码 `0` +- 失败时返回非零退出码(见下方说明) - 空单元格在 JSON 模式下输出 `null`,在文本模式下输出空字符串 ### 读取行与记录 -`read rows` 默认返回位置数组。使用 `--output-shape records` 可以返回以解析后的表头为键的对象,或者直接使用 `read records`,此时记录形式输出为默认。 +`read rows` 默认返回位置数组。加 `--output-shape records` 可返回以解析后的表头为键的对象;也可以直接用 `read records`,此时默认就是记录格式。 ```bash excel-cli read rows report.xlsx --sheet Orders --output-shape rows @@ -131,13 +131,13 @@ excel-cli read rows report.xlsx --sheet Orders --output-shape records excel-cli read records report.xlsx --sheet Orders ``` -`--select` 保留指定的列,列名来自解析后的表头行,重复或空白的表头会按与 `inspect columns` 相同的方式处理为稳定名称。 +`--select` 用于保留指定列。列名来自解析后的表头行,重复或空白的表头会按与 `inspect columns` 相同的方式处理成稳定名称。 ```bash excel-cli read records report.xlsx --sheet Orders --select order_id,customer,total ``` -`--filter 字段:操作符:值` 按列名筛选行。重复 `--filter` 会以 AND 逻辑组合条件。支持的操作符有 `eq`、`ne`、`gt`、`gte`、`lt`、`lte`、`contains`、`regex`、`isnull` 和 `notnull`。 +`--filter 字段:操作符:值` 按列名筛选行。多次使用 `--filter` 会以 AND 逻辑组合条件。支持的操作符有 `eq`、`ne`、`gt`、`gte`、`lt`、`lte`、`contains`、`regex`、`isnull` 和 `notnull`。 ```bash excel-cli read records report.xlsx --sheet Orders --filter status:eq:open @@ -147,7 +147,7 @@ excel-cli read records report.xlsx --sheet Orders --filter order_id:regex:^INV-[ excel-cli read records report.xlsx --sheet Orders --filter optional_note:isnull: ``` -`--limit` 和 `--offset` 在筛选后生效。`--non-empty` 会移除所有单元格为空的行。未匹配的筛选仍是成功的查询:返回空的 `rows` 或 `records` 数组,退出码为 `0`。 +`--limit` 和 `--offset` 在筛选之后生效。`--non-empty` 会去掉所有单元格都为空的行。即使筛选后没有匹配结果,也属于成功查询,返回空的 `rows` 或 `records` 数组,退出码为 `0`。 ```bash excel-cli read records report.xlsx --sheet Orders \ @@ -157,29 +157,29 @@ excel-cli read records report.xlsx --sheet Orders \ --non-empty ``` -`--output-shape jsonl` 将换行分隔的 JSON 记录直接输出到 stdout,而不是标准的信封格式。它使用与记录输出相同的选择、筛选、分页和表头解析规则。 +`--output-shape jsonl` 将换行分隔的 JSON 记录直接输出到 stdout,而非标准包装格式。它沿用记录输出时的列选择、筛选、分页和表头解析规则。 ```bash excel-cli read records report.xlsx --sheet Orders --output-shape jsonl ``` -无效的选择列、未知的筛选列、不支持的操作符、格式错误的筛选条件、无效的数值比较和无效的正则表达式会返回结构化的 `invalid_query` 错误,退出码为 `6`。 +如果列名不存在、筛选列未知、操作符不支持、筛选条件格式错误、数值比较无效或正则表达式无效,会返回结构化的 `invalid_query` 错误,退出码为 `6`。 ### 质量检查 -`check` 会对整个工作簿或单个工作表运行固定的 v1.3.0 质量规则集,并返回与其他无界面命令一致的稳定 JSON 信封结构。默认会扫描所有工作表,返回 `info`、`warning`、`error` 三种级别的 findings;当过滤后的 findings 非空时退出码为 `1`,过滤后为空时退出码为 `0`。 +`check` 会对整个工作簿或单个工作表运行固定的质量规则集,输出格式与其他非交互式命令一致,采用稳定的 JSON 包装结构。默认扫描所有工作表,返回 `info`、`warning`、`error` 三级结果;过滤后仍有结果则退出码为 `1`,过滤后为空则退出码为 `0`。 支持的规则: -- `blank_headers`:标记检测到的表头行中的空白表头单元格 +- `blank_headers`:标记检测到的表头行中的空白单元格 - `duplicate_headers`:标记标准化后重复的表头名称 -- `blank_rows`:标记已使用区域中的整空白行 -- `blank_columns`:标记已使用区域中的整空白列 -- `null_ratio`:根据内置阈值标记空值比例过高的列 +- `blank_rows`:标记已用区域内的整行空白 +- `blank_columns`:标记已用区域内的整列空白 +- `null_ratio`:标记空值比例超过内置阈值的列 - `duplicate_values`:标记候选标识列中的重复值 - `type_drift`:标记同一列中偏离主类型的数据 -- `formula_presence`:报告检查区域中仍然包含公式的工作表 +- `formula_presence`:报告检查区域内仍包含公式的工作表 -使用 `--sheet ` 可以只检查单个工作表,`--rules <以逗号分隔的规则 ID>` 可以按注册表顺序运行子集规则,`--severity-threshold ` 可以过滤返回的 findings。`data.summary` 只统计最终返回的 findings,而 `data.stats.finding_count_before_threshold` 会保留阈值过滤前的总数,`data.stats.rules_run` 则记录规范化后的规则顺序。 +用 `--sheet ` 可限定只检查单个工作表,`--rules <逗号分隔的规则 ID>` 可按注册表顺序运行部分规则,`--severity-threshold ` 可过滤返回的结果级别。`data.summary` 只统计最终返回的结果数,`data.stats.finding_count_before_threshold` 保留阈值过滤前的总数,`data.stats.rules_run` 记录规范化后的规则顺序。 ```bash excel-cli check report.xlsx @@ -202,7 +202,7 @@ excel-cli check report.xlsx --rules null_ratio,duplicate_values,type_drift --sev ### 输出格式 -无界面成功响应遵循稳定的信封结构: +非交互式命令的成功响应采用统一的 JSON 包装格式: ```json { @@ -218,21 +218,21 @@ excel-cli check report.xlsx --rules null_ratio,duplicate_values,type_drift --sev ### 结构检查 -`inspect columns` 分析工作表中的每一列,帮助你为后续命令选择稳定的字段名。响应数据包含 `columns`,每列包含 `index`、原始 `name`、生成的 `safe_name`、`is_duplicate`、尽力推断的 `inferred_type`、`non_null_ratio`、`formula_ratio` 和 `sample_values`。响应元数据包含 `header_row_mode`、`resolved_header_row`、`column_count` 和 `data_row_count`。 +`inspect columns` 分析工作表中的每一列,帮你为后续命令选择稳定的字段名。响应数据的 `columns` 数组中,每列包含 `index`、原始 `name`、生成的 `safe_name`、`is_duplicate`、推断的 `inferred_type`、`non_null_ratio`、`formula_ratio` 和 `sample_values`。响应元数据包含 `header_row_mode`、`resolved_header_row`、`column_count` 和 `data_row_count`。 ```bash excel-cli inspect columns path/to/your/file.xlsx --sheet Orders --header-row auto excel-cli inspect columns path/to/your/file.xlsx --sheet Orders --header-row 2 --format text ``` -`inspect tables` 检测工作表中的连续表格区域。响应数据包含 `data.candidates`;每个候选区域包含 `range`、`header_row`、`column_count`、`row_count` 和 `confidence`。响应元数据包含 `candidate_count`。 +`inspect tables` 检测工作表中的连续表格区域。响应数据的 `data.candidates` 中,每个候选区域包含 `range`、`header_row`、`column_count`、`row_count` 和 `confidence`。响应元数据包含 `candidate_count`。 ```bash excel-cli inspect tables path/to/your/file.xlsx --sheet Orders excel-cli inspect tables path/to/your/file.xlsx --sheet Orders --format text ``` -无界面错误响应遵循稳定的信封结构: +非交互式命令的错误响应也采用统一的 JSON 包装格式: ```json { @@ -249,10 +249,10 @@ excel-cli inspect tables path/to/your/file.xlsx --sheet Orders --format text ## 用户界面 -应用具有简洁直观的界面: +界面简洁直观: - **标题栏与工作表标签**:显示当前文件名和所有可用工作表,当前工作表高亮显示 -- **电子表格区域**:显示 Excel 数据的主区域 +- **电子表格区域**:主数据展示区域 - **内容面板**:显示当前选中单元格的完整内容 - **通知面板**:显示操作反馈和系统通知 - **状态栏**:显示操作提示和当前输入的命令 @@ -262,49 +262,49 @@ excel-cli inspect tables path/to/your/file.xlsx --sheet Orders --format text - `h`、`j`、`k`、`l` 或方向键:在单元格之间移动(每次 1 格) - `[`:切换到上一个工作表(停在第一个工作表) - `]`:切换到下一个工作表(停在最后一个工作表) -- `0`:跳转到当前行的第一列 -- `^`:跳转到当前行的第一个非空列 -- `$`:跳转到当前行的最后一列 -- `gg`:跳转到当前列的第一行 -- `G`:跳转到当前列的最后一行 -- `Ctrl+←`(Mac 上为 `Command+←`):如果当前单元格为空,跳到左侧第一个非空单元格;如果当前单元格非空,跳到左侧最后一个非空单元格 -- `Ctrl+→`(Mac 上为 `Command+→`):如果当前单元格为空,跳到右侧第一个非空单元格;如果当前单元格非空,跳到右侧最后一个非空单元格 -- `Ctrl+↑`(Mac 上为 `Command+↑`):如果当前单元格为空,跳到上方第一个非空单元格;如果当前单元格非空,跳到上方最后一个非空单元格 -- `Ctrl+↓`(Mac 上为 `Command+↓`):如果当前单元格为空,跳到下方第一个非空单元格;如果当前单元格非空,跳到下方最后一个非空单元格 +- `0`:跳到当前行的第一列 +- `^`:跳到当前行的第一个非空列 +- `$`:跳到当前行的最后一列 +- `gg`:跳到当前列的第一行 +- `G`:跳到当前列的最后一行 +- `Ctrl+←`(Mac 上为 `Command+←`):当前单元格为空时跳到左侧第一个非空单元格;非空时跳到左侧最后一个非空单元格 +- `Ctrl+→`(Mac 上为 `Command+→`):当前单元格为空时跳到右侧第一个非空单元格;非空时跳到右侧最后一个非空单元格 +- `Ctrl+↑`(Mac 上为 `Command+↑`):当前单元格为空时跳到上方第一个非空单元格;非空时跳到上方最后一个非空单元格 +- `Ctrl+↓`(Mac 上为 `Command+↓`):当前单元格为空时跳到下方第一个非空单元格;非空时跳到下方最后一个非空单元格 - `Enter`:编辑当前单元格 - `y`:复制当前单元格内容 - `d`:剪切当前单元格内容 - `p`:将剪贴板内容粘贴到当前单元格 -- `u`:撤销上一次操作(编辑、行/列变更、工作表创建/删除) +- `u`:撤销上一次操作(编辑、行列变更、工作表创建/删除) - `Ctrl+r`:重做上一次撤销的操作 - `/`:开始向前搜索 - `?`:开始向后搜索 -- `n`:跳转到下一个搜索结果 -- `N`:跳转到上一个搜索结果 +- `n`:跳到下一个搜索结果 +- `N`:跳到上一个搜索结果 - `:`:进入命令模式(类 Vim 命令) ## Vim 编辑模式 编辑单元格内容时(按 `Enter` 进入编辑模式): -- **模式切换**: +- **模式切换** - - `Esc`:退出 Vim 模式并保存更改 + - `Esc`:退出 Vim 模式并保存修改 - `i`:进入插入模式 - `v`:进入可视模式 -- **导航(普通模式下)**: +- **导航(普通模式)** - `h`、`j`、`k`、`l`:左、下、上、右移动光标 - - `w`:移动到下一个单词 - - `b`:移动到单词开头 - - `e`:移动到单词末尾 - - `$`:移动到行尾 - - `^`:移动到行首第一个非空白字符 - - `gg`:移动到第一行 - - `G`:移动到最后一行 + - `w`:跳到下一个单词 + - `b`:跳到单词开头 + - `e`:跳到单词末尾 + - `$`:跳到行尾 + - `^`:跳到行首第一个非空白字符 + - `gg`:跳到第一行 + - `G`:跳到最后一行 -- **编辑操作**: +- **编辑操作** - `x`:删除光标下的字符 - `D`:删除到行尾 @@ -314,108 +314,108 @@ excel-cli inspect tables path/to/your/file.xlsx --sheet Orders --format text - `A`:在行尾追加 - `I`:在行首插入 -- **可视模式操作**: +- **可视模式操作** - `y`:复制(yank)选中的文本 - `d`:删除选中的文本 - `c`:修改选中的文本(删除并进入插入模式) -- **操作符命令**: +- **操作符命令** - `y{motion}`:复制 motion 指定的文本 - `d{motion}`:删除 motion 指定的文本 - `c{motion}`:修改 motion 指定的文本 -- **剪贴板操作**: +- **剪贴板操作** - `p`:粘贴复制或删除的文本 - - `u`:撤销上一次更改 - - `Ctrl+r`:重做上一次撤销的更改 + - `u`:撤销上一次修改 + - `Ctrl+r`:重做上一次撤销的修改 ## 搜索模式 按 `/`(向前搜索)或 `?`(向后搜索)进入搜索模式: - 输入搜索关键词 -- `Enter`:执行搜索并跳转到第一个匹配项 +- `Enter`:执行搜索并跳到第一个匹配项 - `Esc`:取消搜索 -- `n`:跳转到下一个匹配项(搜索执行后) -- `N`:跳转到上一个匹配项(搜索执行后) +- `n`:跳到下一个匹配项(搜索执行后) +- `N`:跳到上一个匹配项(搜索执行后) - 搜索结果以黄色高亮显示 -- 搜索采用先行后列的顺序(从左到右逐行搜索,然后移动到下一行) +- 搜索顺序为先逐行从左到右,再从上到下移动到下一行 ## 命令模式 -按 `:` 进入命令模式。可用命令: +按 `:` 进入命令模式。可用命令如下: ### 列宽命令 -- `:cw fit` - 自动调整当前列宽以适应内容 -- `:cw fit all` - 自动调整所有列宽以适应内容 -- `:cw min` - 最小化当前列宽(最大 15 或内容宽度) -- `:cw min all` - 最小化所有列宽(最大 15 或内容宽度) -- `:cw [数字]` - 将当前列宽设置为指定值 +- `:cw fit` — 自动调整当前列宽以适应内容 +- `:cw fit all` — 自动调整所有列宽以适应内容 +- `:cw min` — 最小化当前列宽(最大 15 或内容宽度) +- `:cw min all` — 最小化所有列宽(最大 15 或内容宽度) +- `:cw [数字]` — 将当前列宽设为指定值 ### JSON 导出命令 -- `:ej [h|v] [行数]` - 将当前工作表数据导出为 JSON 格式 +- `:ej [h|v] [行数]` — 将当前工作表导出为 JSON - - `h|v` - 表头方向:`h` 为横向(顶部行),`v` 为纵向(左侧列) - - `行数` - 表头行数(横向)或列数(纵向) + - `h|v` — 表头方向:`h` 为横向(顶部行),`v` 为纵向(左侧列) + - `行数` — 表头行数(横向)或列数(纵向) -- `:eja [h|v] [行数]` - 将所有工作表导出到单个 JSON 文件 - - 使用与 `:ej` 相同的参数 - - 创建一个 JSON 对象,以工作表名称为键,工作表数据为值 +- `:eja [h|v] [行数]` — 将所有工作表导出到单个 JSON 文件 + - 参数与 `:ej` 相同 + - 生成一个 JSON 对象,以工作表名为键,数据为值 -输出文件名按以下格式自动生成: +输出文件名自动生成,格式如下: - 单个工作表:`原文件名_sheet_工作表名称_YYYYMMDD_HHMMSS.json` - 所有工作表:`原文件名_all_sheets_YYYYMMDD_HHMMSS.json` -JSON 文件保存在与原始 Excel 文件相同的目录中。 +JSON 文件保存在原始 Excel 文件所在目录。 ### 类 Vim 命令 -- `:w` - 保存文件但不退出 -- `:wq` 或 `:x` - 保存并退出 -- `:q` - 退出(如果有未保存的更改会警告) -- `:q!` - 强制退出不保存 - 文件保存逻辑详见[下文](#文件保存逻辑)。 +- `:w` — 保存文件但不退出 +- `:wq` 或 `:x` — 保存并退出 +- `:q` — 退出(如有未保存的修改会提示警告) +- `:q!` — 强制退出,不保存 + 保存逻辑详见[下文](#文件保存逻辑)。 -- `:y` - 复制当前单元格内容 -- `:d` - 剪切当前单元格内容 -- `:put` 或 `:pu` - 将剪贴板内容粘贴到当前单元格 -- `:[单元格]` - 跳转到指定单元格(例如 `:A1`、`:B10`)。支持大小写字母(`:a1` 与 `:A1` 效果相同) +- `:y` — 复制当前单元格内容 +- `:d` — 剪切当前单元格内容 +- `:put` 或 `:pu` — 将剪贴板内容粘贴到当前单元格 +- `:[单元格]` — 跳到指定单元格(如 `:A1`、`:B10`)。大小写不敏感(`:a1` 与 `:A1` 效果相同) ### 工作表管理命令 -- `:addsheet [名称]` - 在当前工作表后添加新工作表 -- `:sheet [名称/编号]` - 按名称或索引切换工作表(从 1 开始计数) -- `:delsheet` - 删除当前工作表 +- `:addsheet [名称]` — 在当前工作表后添加新工作表 +- `:sheet [名称/编号]` — 按名称或索引切换工作表(从 1 开始计数) +- `:delsheet` — 删除当前工作表 ### 行列管理命令 -- `:dr` - 删除当前行 -- `:dr [行号]` - 删除指定行(例如 `:dr 5` 删除第 5 行) -- `:dr [起始] [结束]` - 删除行范围(例如 `:dr 5 10` 删除第 5 到 10 行) -- `:dc` - 删除当前列 -- `:dc [列]` - 删除指定列(例如 `:dc A`、`:dc a` 或 `:dc 1` 均删除 A 列) -- `:dc [起始] [结束]` - 删除列范围(例如 `:dc A C` 或 `:dc a c` 删除 A 到 C 列) +- `:dr` — 删除当前行 +- `:dr [行号]` — 删除指定行(如 `:dr 5` 删除第 5 行) +- `:dr [起始] [结束]` — 删除行范围(如 `:dr 5 10` 删除第 5 到 10 行) +- `:dc` — 删除当前列 +- `:dc [列]` — 删除指定列(如 `:dc A`、`:dc a` 或 `:dc 1` 都删除 A 列) +- `:dc [起始] [结束]` — 删除列范围(如 `:dc A C` 或 `:dc a c` 删除 A 到 C 列) ### 其他命令 -- `:nohlsearch` 或 `:noh` - 关闭搜索高亮 -- `:help` - 显示可用命令 -- `:preview` 或 `:pv` - 显示当前工作表目标和样本行的只读预览 +- `:nohlsearch` 或 `:noh` — 关闭搜索高亮 +- `:help` — 显示可用命令 +- `:preview` 或 `:pv` — 显示当前工作表目标和样本行的只读预览 ## 文件保存逻辑 -Excel-CLI 采用非破坏性方式保存文件: +Excel-CLI 采用非破坏性保存方式: -- 保存文件时(使用 `:w`、`:wq` 或 `:x`),应用会检查是否进行了更改 -- 如果没有更改,不会创建新文件,并显示"No changes to save"消息 +- 保存时(使用 `:w`、`:wq` 或 `:x`),程序会检查是否有修改 +- 如果没有修改,不会创建新文件,并提示 "No changes to save" - 如果启用了懒加载,所有未加载的工作表会在保存前加载,以保留工作簿内容 -- 如果进行了更改,会创建一个带时间戳的新文件,格式为 `原文件名_YYYYMMDD_HHMMSS.xlsx` +- 如果有修改,会创建一个带时间戳的新文件,格式为 `原文件名_YYYYMMDD_HHMMSS.xlsx` - 新文件不包含任何样式 - 原始文件永远不会被修改