From 1918f38f3ae341886f528bf3961367a6edbd09eb Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Tue, 2 Jun 2026 15:17:32 +0200 Subject: [PATCH 1/3] feat: allow `fmt` command to process directories recursively The `yr fmt` command can now accept directory paths in addition to individual file paths. When a directory is provided, it will format all YARA files (`.yar`, `.yara`) found within it. By default, only files in the top-level directory are processed. A new `-r` or `--recursive` option has been added to enable scanning for YARA files in subdirectories, allowing users to specify a maximum recursion depth. This improves usability by simplifying formatting across multi-file projects. Closes #271 --- cli/src/commands/fmt.rs | 61 +++++++++++++++++++++++++++++------------ cli/src/tests/fmt.rs | 43 +++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 18 deletions(-) diff --git a/cli/src/commands/fmt.rs b/cli/src/commands/fmt.rs index 3ff26cbe8..5c7c81f42 100644 --- a/cli/src/commands/fmt.rs +++ b/cli/src/commands/fmt.rs @@ -8,13 +8,14 @@ use yara_x_fmt::{Formatter, Indentation}; use crate::config::Config; use crate::help; +use crate::walk; pub fn fmt() -> Command { super::command("fmt") .about("Format YARA source files") .arg( arg!() - .help("Path to YARA source file") + .help("Path to YARA source file or directory") .required(true) .value_parser(value_parser!(PathBuf)) .action(ArgAction::Append), @@ -23,6 +24,14 @@ pub fn fmt() -> Command { arg!(-c --check "Run in 'check' mode") .long_help(help::FMT_CHECK_MODE), ) + .arg( + arg!(-r - -"recursive"[MAX_DEPTH]) + .help("Walk directories recursively up to a given depth") + .long_help(help::RECURSIVE_LONG_HELP) + .default_missing_value("1000") + .require_equals(true) + .value_parser(value_parser!(usize)), + ) .arg( arg!(-t - -"tab-size" ) .help("Tab size (in spaces) used in source files") @@ -36,6 +45,7 @@ pub fn exec_fmt(args: &ArgMatches, config: &Config) -> anyhow::Result<()> { let files = args.get_many::("FILE").unwrap(); let check = args.get_flag("check"); let tab_size = args.get_one::("tab-size").unwrap(); + let recursive = args.get_one::("recursive"); let formatter = Formatter::new() .input_tab_size(*tab_size) @@ -56,27 +66,42 @@ pub fn exec_fmt(args: &ArgMatches, config: &Config) -> anyhow::Result<()> { config.fmt.rule.empty_line_after_section_header, ); - let mut modified_files: Vec<&PathBuf> = Vec::new(); + let mut modified_files: Vec = Vec::new(); for file in files { - let input = fs::read(file.as_path())?; - let file_modified = if check { - formatter.format(input.as_slice(), io::sink())? + let mut walker = walk::Walker::path(file); + if let Some(recursive) = recursive { + walker.max_depth(*recursive); } else { - let mut formatted = Cursor::new(Vec::with_capacity(input.len())); - if formatter.format(input.as_slice(), &mut formatted)? { - formatted.seek(SeekFrom::Start(0))?; - let mut output_file = File::create(file.as_path())?; - io::copy(&mut formatted, &mut output_file)?; - true - } else { - false - } - }; - - if file_modified { - modified_files.push(file); + walker.max_depth(0); } + walker.filter("**/*.yar").filter("**/*.yara"); + + walker.walk( + |file_path| { + let input = fs::read(file_path)?; + let file_modified = if check { + formatter.format(input.as_slice(), io::sink())? + } else { + let mut formatted = + Cursor::new(Vec::with_capacity(input.len())); + if formatter.format(input.as_slice(), &mut formatted)? { + formatted.seek(SeekFrom::Start(0))?; + let mut output_file = File::create(file_path)?; + io::copy(&mut formatted, &mut output_file)?; + true + } else { + false + } + }; + + if file_modified { + modified_files.push(file_path.to_path_buf()); + } + Ok(()) + }, + Err, + )?; } if !modified_files.is_empty() { diff --git a/cli/src/tests/fmt.rs b/cli/src/tests/fmt.rs index 7398dfe1b..78aa1c035 100644 --- a/cli/src/tests/fmt.rs +++ b/cli/src/tests/fmt.rs @@ -53,3 +53,46 @@ fn utf8_error() { .stderr("error: invalid UTF-8 at [0..1]\n") .code(1); } + +#[test] +fn fmt_directory() { + let temp_dir = TempDir::new().unwrap(); + let subdir = temp_dir.child("subdir"); + subdir.create_dir_all().unwrap(); + + let file1 = temp_dir.child("rule1.yar"); + let file2 = subdir.child("rule2.yar"); + + file1.write_str("rule test1 { condition: true }").unwrap(); + file2.write_str("rule test2 { condition: true }").unwrap(); + + // By default without -r/--recursive, only the top-level directory files are formatted. + Command::new(cargo_bin!("yr")) + .arg("fmt") + .arg(temp_dir.path()) + .assert() + .code(1); // file1 should be modified. + + // So now file1 is formatted, but file2 should still be unformatted. + Command::new(cargo_bin!("yr")) + .arg("fmt") + .arg(temp_dir.path()) + .assert() + .code(0); // Top-level files are already formatted, so no changes. + + // With -r/--recursive, the subdirectories are also processed, so file2 will be formatted. + Command::new(cargo_bin!("yr")) + .arg("fmt") + .arg("-r") + .arg(temp_dir.path()) + .assert() + .code(1); // file2 in subdir should be modified. + + // Subsequent format runs should find no modified files. + Command::new(cargo_bin!("yr")) + .arg("fmt") + .arg("-r") + .arg(temp_dir.path()) + .assert() + .code(0); +} From 9dc53fe2c4c4598f6c879e7a3b3bd1b346bd606a Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Tue, 2 Jun 2026 15:59:56 +0200 Subject: [PATCH 2/3] docs: document changes in `yr fmt`. --- cli/src/commands/fmt.rs | 8 ++++---- site/content/docs/cli/commands.md | 23 +++++++++++++++++++++-- site/hugo_stats.json | 3 +++ 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/cli/src/commands/fmt.rs b/cli/src/commands/fmt.rs index 5c7c81f42..8b52633d3 100644 --- a/cli/src/commands/fmt.rs +++ b/cli/src/commands/fmt.rs @@ -14,7 +14,7 @@ pub fn fmt() -> Command { super::command("fmt") .about("Format YARA source files") .arg( - arg!() + arg!() .help("Path to YARA source file or directory") .required(true) .value_parser(value_parser!(PathBuf)) @@ -42,7 +42,7 @@ pub fn fmt() -> Command { } pub fn exec_fmt(args: &ArgMatches, config: &Config) -> anyhow::Result<()> { - let files = args.get_many::("FILE").unwrap(); + let paths = args.get_many::("PATH").unwrap(); let check = args.get_flag("check"); let tab_size = args.get_one::("tab-size").unwrap(); let recursive = args.get_one::("recursive"); @@ -68,8 +68,8 @@ pub fn exec_fmt(args: &ArgMatches, config: &Config) -> anyhow::Result<()> { let mut modified_files: Vec = Vec::new(); - for file in files { - let mut walker = walk::Walker::path(file); + for path in paths { + let mut walker = walk::Walker::path(path); if let Some(recursive) = recursive { walker.max_depth(*recursive); } else { diff --git a/site/content/docs/cli/commands.md b/site/content/docs/cli/commands.md index 2f90672fd..193d612b4 100644 --- a/site/content/docs/cli/commands.md +++ b/site/content/docs/cli/commands.md @@ -462,16 +462,35 @@ This command is similar in spirit to other code formatting tools like `gofmt` and `rustfmt`. ``` -yr fmt ... +yr fmt ... ``` +The path can be either a file or directory. If a directory is used, every `.yar` +or `.yara` file contained in the directory will be formated. + +### -r, --recursive=[MAX_DEPTH] + +Walk directories recursively. When is a directory, this option enables +recursive directory traversal. You can optionally specify a `MAX_DEPTH` to +limit how deep the traversal goes: + +Examples: + +``` +--recursive formats nested subdirectories with no limits. +--recursive=0 formats only the files in (no subdirectories) +--recursive=3 formats up to 3 levels deep, including nested subdirectories +``` + +If --recursive is not specified, the default behavior is equivalent to --recursive=0. + ### --check, -c Run in "check" mode. Doesn't modify any file, but exits error code 0 if the files are formatted correctly and no change is necessary, or error code 1 if otherwise. -### -t, --tab-size \\ +### -t, --tab-size \ Tab size (in spaces) used in source files diff --git a/site/hugo_stats.json b/site/hugo_stats.json index b627090fd..153403037 100644 --- a/site/hugo_stats.json +++ b/site/hugo_stats.json @@ -321,6 +321,7 @@ "--tag-tag", "--threads-num_threads", "--timeout-seconds", + "-r---recursivemax_depth", "-t---tab-size-num_spaces", "-what-about-the-original-yara", "-x---module-data-modulefile", @@ -485,6 +486,7 @@ "exportsfn_regex", "exportsordinal", "extracting-file-paths", + "fast_scanbool", "fat_header", "fatarch", "fewer-timeouts", @@ -785,6 +787,7 @@ "yrx_scanner_clear_profiling_data", "yrx_scanner_create", "yrx_scanner_destroy", + "yrx_scanner_fast_scan", "yrx_scanner_finish", "yrx_scanner_iter_slowest_rules", "yrx_scanner_on_console_log", From e9ee494c9f177cc3b3ef0a537e6b49458d72f771 Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Tue, 2 Jun 2026 16:22:11 +0200 Subject: [PATCH 3/3] feat: add `--cpu-limit` option to scan command This option allows users to dynamically limit the CPU utilization of the scan process. It helps prevent CPU saturation when running background scan tasks on production servers or multi-user systems. The limit is achieved by measuring the active time spent scanning each file and introducing a calculated sleep delay before processing the next file. Closes #115. --- cli/src/commands/scan.rs | 10 +++++++- cli/src/tests/scan.rs | 11 ++++++++ cli/src/walk.rs | 42 +++++++++++++++++++++++++++++-- site/content/docs/cli/commands.md | 12 +++++++++ 4 files changed, 72 insertions(+), 3 deletions(-) diff --git a/cli/src/commands/scan.rs b/cli/src/commands/scan.rs index 57740feee..62755fcd1 100644 --- a/cli/src/commands/scan.rs +++ b/cli/src/commands/scan.rs @@ -60,6 +60,9 @@ pub fn scan() -> Command { .long_help(help::COMPILED_RULES_LONG_HELP), arg!(-c --"count") .help("Print only the number of matches per file"), + arg!(--"cpu-limit" ) + .help("Limit the CPU usage of the scan (percentage from 1 to 99)") + .value_parser(value_parser!(u8).range(1..=99)), arg!(--"disable-console-logs") .help("Disable printing console log messages"), arg!(-f --"fast-scan") @@ -121,7 +124,6 @@ pub fn scan() -> Command { arg!(-a --"timeout" ) .help("Abort scanning after the given number of seconds") .value_parser(value_parser!(u64).range(1..)) - ])) } @@ -179,6 +181,8 @@ pub fn exec_scan(args: &ArgMatches, config: &Config) -> anyhow::Result<()> { let compiled_rules = args.get_flag("compiled-rules"); let profiling = args.get_flag("profiling"); let num_threads = args.get_one::("threads"); + + let cpu_limit = args.get_one::("cpu-limit"); let skip_larger = args.get_one::("skip-larger"); let disable_console_logs = args.get_flag("disable-console-logs"); let scan_list = args.get_flag("scan-list"); @@ -264,6 +268,10 @@ pub fn exec_scan(args: &ArgMatches, config: &Config) -> anyhow::Result<()> { w.num_threads(*num_threads); } + if let Some(limit) = cpu_limit { + w.cpu_limit(*limit); + } + if let Some(max_file_size) = skip_larger { w.metadata_filter(|metadata| metadata.len() <= *max_file_size); } diff --git a/cli/src/tests/scan.rs b/cli/src/tests/scan.rs index 66e2041b8..44bcde3dc 100644 --- a/cli/src/tests/scan.rs +++ b/cli/src/tests/scan.rs @@ -440,3 +440,14 @@ fn fast_scan() { .success() .stdout(predicate::str::contains("foo src/tests/testdata/dummy.file")); } + +#[test] +fn cpu_limit() { + Command::new(cargo_bin!("yr")) + .arg("scan") + .arg("--cpu-limit=50") + .arg("src/tests/testdata/foo.yar") + .arg("src/tests/testdata/dummy.file") + .assert() + .success(); +} diff --git a/cli/src/walk.rs b/cli/src/walk.rs index 8a61d3a32..f338e1482 100644 --- a/cli/src/walk.rs +++ b/cli/src/walk.rs @@ -342,6 +342,7 @@ impl<'a> Walker<'a> { /// ``` pub(crate) struct ParWalker<'a> { num_threads: Option, + cpu_limit: Option, walker: Walker<'a>, } @@ -350,7 +351,7 @@ impl<'a> ParWalker<'a> { /// /// `path` can also point to an individual file instead of a directory. pub fn path(path: &'a Path) -> Self { - Self { walker: Walker::path(path), num_threads: None } + Self { walker: Walker::path(path), num_threads: None, cpu_limit: None } } /// Creates a [`ParWalker`] that walks the files listed in a text file @@ -358,7 +359,11 @@ impl<'a> ParWalker<'a> { /// /// `path` points to the text file that contains the paths to be walked. pub fn file_list(path: &'a Path) -> Self { - Self { walker: Walker::file_list(path), num_threads: None } + Self { + walker: Walker::file_list(path), + num_threads: None, + cpu_limit: None, + } } /// Sets the number of threads used. @@ -370,6 +375,12 @@ impl<'a> ParWalker<'a> { self } + /// Sets the target CPU limit percentage. + pub fn cpu_limit(&mut self, limit: u8) -> &mut Self { + self.cpu_limit = Some(limit); + self + } + /// Sets a maximum depth while traversing the directory tree. /// /// When the maximum depth is 0 only the files that reside in the given @@ -429,6 +440,8 @@ impl<'a> ParWalker<'a> { thread::available_parallelism().map(usize::from).unwrap_or(32) }; + let cpu_limit = self.cpu_limit; + crossbeam::scope(|s| { let mut threads = Vec::with_capacity(num_threads); @@ -453,12 +466,37 @@ impl<'a> ParWalker<'a> { threads.push(s.spawn(move |_| { let mut per_thread_obj = init(&state, &msg_send); for path in paths_recv { + let start_time = Instant::now(); let res = action( &state, &msg_send, path.to_path_buf(), &mut per_thread_obj, ); + let t_active = start_time.elapsed(); + + if let Some(limit) = cpu_limit { + if limit < 100 { + // Calculate the required sleep duration to limit + // CPU usage to the target percentage. + // + // Let T_active be the elapsed time scanning the + // file. Let T_sleep be the sleep time. The target + // utilization percentage is P. + // + // P = 100 * T_active / (T_active + T_sleep) + // P * (T_active + T_sleep) = 100 * T_active + // P * T_sleep = (100 - P) * T_active + // T_sleep = T_active * (100 - P) / P + let t_sleep = t_active.mul_f64( + (100.0 - limit as f64) / limit as f64, + ); + if !t_sleep.is_zero() { + thread::sleep(t_sleep); + } + } + } + if let Err(err) = res && error(err, &msg_send).is_err() { diff --git a/site/content/docs/cli/commands.md b/site/content/docs/cli/commands.md index 193d612b4..ee45cf910 100644 --- a/site/content/docs/cli/commands.md +++ b/site/content/docs/cli/commands.md @@ -98,6 +98,18 @@ Prints the number of matching rules per file. Instead of printing the names of the rules that matches each file, it prints the number the total number of rules matching each file. +### --cpu-limit \ + +Limit the CPU usage of the scan (percentage from 1 to 99). + +This option dynamically restricts CPU utilization per scan thread to the +specified percentage. The scanner achieves this by measuring the exact +duration spent scanning each file and introducing a sleep delay before +moving to the next file. + +This is useful for running background scan tasks on production servers +or multi-user systems without saturating CPU capacity. + ### --define Defines external variables.