diff --git a/cli/src/commands/fmt.rs b/cli/src/commands/fmt.rs index 3ff26cbe8..8b52633d3 100644 --- a/cli/src/commands/fmt.rs +++ b/cli/src/commands/fmt.rs @@ -8,13 +8,14 @@ use yara_x_fmt::{Formatter, Indentation}; use crate::config::Config; use crate::help; +use crate::walk; pub fn fmt() -> Command { super::command("fmt") .about("Format YARA source files") .arg( - arg!() - .help("Path to YARA source file") + arg!() + .help("Path to YARA source file or directory") .required(true) .value_parser(value_parser!(PathBuf)) .action(ArgAction::Append), @@ -23,6 +24,14 @@ pub fn fmt() -> Command { arg!(-c --check "Run in 'check' mode") .long_help(help::FMT_CHECK_MODE), ) + .arg( + arg!(-r - -"recursive"[MAX_DEPTH]) + .help("Walk directories recursively up to a given depth") + .long_help(help::RECURSIVE_LONG_HELP) + .default_missing_value("1000") + .require_equals(true) + .value_parser(value_parser!(usize)), + ) .arg( arg!(-t - -"tab-size" ) .help("Tab size (in spaces) used in source files") @@ -33,9 +42,10 @@ pub fn fmt() -> Command { } pub fn exec_fmt(args: &ArgMatches, config: &Config) -> anyhow::Result<()> { - let files = args.get_many::("FILE").unwrap(); + let paths = args.get_many::("PATH").unwrap(); let check = args.get_flag("check"); let tab_size = args.get_one::("tab-size").unwrap(); + let recursive = args.get_one::("recursive"); let formatter = Formatter::new() .input_tab_size(*tab_size) @@ -56,27 +66,42 @@ pub fn exec_fmt(args: &ArgMatches, config: &Config) -> anyhow::Result<()> { config.fmt.rule.empty_line_after_section_header, ); - let mut modified_files: Vec<&PathBuf> = Vec::new(); + let mut modified_files: Vec = Vec::new(); - for file in files { - let input = fs::read(file.as_path())?; - let file_modified = if check { - formatter.format(input.as_slice(), io::sink())? + for path in paths { + let mut walker = walk::Walker::path(path); + if let Some(recursive) = recursive { + walker.max_depth(*recursive); } else { - let mut formatted = Cursor::new(Vec::with_capacity(input.len())); - if formatter.format(input.as_slice(), &mut formatted)? { - formatted.seek(SeekFrom::Start(0))?; - let mut output_file = File::create(file.as_path())?; - io::copy(&mut formatted, &mut output_file)?; - true - } else { - false - } - }; - - if file_modified { - modified_files.push(file); + walker.max_depth(0); } + walker.filter("**/*.yar").filter("**/*.yara"); + + walker.walk( + |file_path| { + let input = fs::read(file_path)?; + let file_modified = if check { + formatter.format(input.as_slice(), io::sink())? + } else { + let mut formatted = + Cursor::new(Vec::with_capacity(input.len())); + if formatter.format(input.as_slice(), &mut formatted)? { + formatted.seek(SeekFrom::Start(0))?; + let mut output_file = File::create(file_path)?; + io::copy(&mut formatted, &mut output_file)?; + true + } else { + false + } + }; + + if file_modified { + modified_files.push(file_path.to_path_buf()); + } + Ok(()) + }, + Err, + )?; } if !modified_files.is_empty() { diff --git a/cli/src/commands/scan.rs b/cli/src/commands/scan.rs index 57740feee..62755fcd1 100644 --- a/cli/src/commands/scan.rs +++ b/cli/src/commands/scan.rs @@ -60,6 +60,9 @@ pub fn scan() -> Command { .long_help(help::COMPILED_RULES_LONG_HELP), arg!(-c --"count") .help("Print only the number of matches per file"), + arg!(--"cpu-limit" ) + .help("Limit the CPU usage of the scan (percentage from 1 to 99)") + .value_parser(value_parser!(u8).range(1..=99)), arg!(--"disable-console-logs") .help("Disable printing console log messages"), arg!(-f --"fast-scan") @@ -121,7 +124,6 @@ pub fn scan() -> Command { arg!(-a --"timeout" ) .help("Abort scanning after the given number of seconds") .value_parser(value_parser!(u64).range(1..)) - ])) } @@ -179,6 +181,8 @@ pub fn exec_scan(args: &ArgMatches, config: &Config) -> anyhow::Result<()> { let compiled_rules = args.get_flag("compiled-rules"); let profiling = args.get_flag("profiling"); let num_threads = args.get_one::("threads"); + + let cpu_limit = args.get_one::("cpu-limit"); let skip_larger = args.get_one::("skip-larger"); let disable_console_logs = args.get_flag("disable-console-logs"); let scan_list = args.get_flag("scan-list"); @@ -264,6 +268,10 @@ pub fn exec_scan(args: &ArgMatches, config: &Config) -> anyhow::Result<()> { w.num_threads(*num_threads); } + if let Some(limit) = cpu_limit { + w.cpu_limit(*limit); + } + if let Some(max_file_size) = skip_larger { w.metadata_filter(|metadata| metadata.len() <= *max_file_size); } diff --git a/cli/src/tests/fmt.rs b/cli/src/tests/fmt.rs index 7398dfe1b..78aa1c035 100644 --- a/cli/src/tests/fmt.rs +++ b/cli/src/tests/fmt.rs @@ -53,3 +53,46 @@ fn utf8_error() { .stderr("error: invalid UTF-8 at [0..1]\n") .code(1); } + +#[test] +fn fmt_directory() { + let temp_dir = TempDir::new().unwrap(); + let subdir = temp_dir.child("subdir"); + subdir.create_dir_all().unwrap(); + + let file1 = temp_dir.child("rule1.yar"); + let file2 = subdir.child("rule2.yar"); + + file1.write_str("rule test1 { condition: true }").unwrap(); + file2.write_str("rule test2 { condition: true }").unwrap(); + + // By default without -r/--recursive, only the top-level directory files are formatted. + Command::new(cargo_bin!("yr")) + .arg("fmt") + .arg(temp_dir.path()) + .assert() + .code(1); // file1 should be modified. + + // So now file1 is formatted, but file2 should still be unformatted. + Command::new(cargo_bin!("yr")) + .arg("fmt") + .arg(temp_dir.path()) + .assert() + .code(0); // Top-level files are already formatted, so no changes. + + // With -r/--recursive, the subdirectories are also processed, so file2 will be formatted. + Command::new(cargo_bin!("yr")) + .arg("fmt") + .arg("-r") + .arg(temp_dir.path()) + .assert() + .code(1); // file2 in subdir should be modified. + + // Subsequent format runs should find no modified files. + Command::new(cargo_bin!("yr")) + .arg("fmt") + .arg("-r") + .arg(temp_dir.path()) + .assert() + .code(0); +} diff --git a/cli/src/tests/scan.rs b/cli/src/tests/scan.rs index 66e2041b8..44bcde3dc 100644 --- a/cli/src/tests/scan.rs +++ b/cli/src/tests/scan.rs @@ -440,3 +440,14 @@ fn fast_scan() { .success() .stdout(predicate::str::contains("foo src/tests/testdata/dummy.file")); } + +#[test] +fn cpu_limit() { + Command::new(cargo_bin!("yr")) + .arg("scan") + .arg("--cpu-limit=50") + .arg("src/tests/testdata/foo.yar") + .arg("src/tests/testdata/dummy.file") + .assert() + .success(); +} diff --git a/cli/src/walk.rs b/cli/src/walk.rs index 8a61d3a32..f338e1482 100644 --- a/cli/src/walk.rs +++ b/cli/src/walk.rs @@ -342,6 +342,7 @@ impl<'a> Walker<'a> { /// ``` pub(crate) struct ParWalker<'a> { num_threads: Option, + cpu_limit: Option, walker: Walker<'a>, } @@ -350,7 +351,7 @@ impl<'a> ParWalker<'a> { /// /// `path` can also point to an individual file instead of a directory. pub fn path(path: &'a Path) -> Self { - Self { walker: Walker::path(path), num_threads: None } + Self { walker: Walker::path(path), num_threads: None, cpu_limit: None } } /// Creates a [`ParWalker`] that walks the files listed in a text file @@ -358,7 +359,11 @@ impl<'a> ParWalker<'a> { /// /// `path` points to the text file that contains the paths to be walked. pub fn file_list(path: &'a Path) -> Self { - Self { walker: Walker::file_list(path), num_threads: None } + Self { + walker: Walker::file_list(path), + num_threads: None, + cpu_limit: None, + } } /// Sets the number of threads used. @@ -370,6 +375,12 @@ impl<'a> ParWalker<'a> { self } + /// Sets the target CPU limit percentage. + pub fn cpu_limit(&mut self, limit: u8) -> &mut Self { + self.cpu_limit = Some(limit); + self + } + /// Sets a maximum depth while traversing the directory tree. /// /// When the maximum depth is 0 only the files that reside in the given @@ -429,6 +440,8 @@ impl<'a> ParWalker<'a> { thread::available_parallelism().map(usize::from).unwrap_or(32) }; + let cpu_limit = self.cpu_limit; + crossbeam::scope(|s| { let mut threads = Vec::with_capacity(num_threads); @@ -453,12 +466,37 @@ impl<'a> ParWalker<'a> { threads.push(s.spawn(move |_| { let mut per_thread_obj = init(&state, &msg_send); for path in paths_recv { + let start_time = Instant::now(); let res = action( &state, &msg_send, path.to_path_buf(), &mut per_thread_obj, ); + let t_active = start_time.elapsed(); + + if let Some(limit) = cpu_limit { + if limit < 100 { + // Calculate the required sleep duration to limit + // CPU usage to the target percentage. + // + // Let T_active be the elapsed time scanning the + // file. Let T_sleep be the sleep time. The target + // utilization percentage is P. + // + // P = 100 * T_active / (T_active + T_sleep) + // P * (T_active + T_sleep) = 100 * T_active + // P * T_sleep = (100 - P) * T_active + // T_sleep = T_active * (100 - P) / P + let t_sleep = t_active.mul_f64( + (100.0 - limit as f64) / limit as f64, + ); + if !t_sleep.is_zero() { + thread::sleep(t_sleep); + } + } + } + if let Err(err) = res && error(err, &msg_send).is_err() { diff --git a/site/content/docs/cli/commands.md b/site/content/docs/cli/commands.md index 2f90672fd..ee45cf910 100644 --- a/site/content/docs/cli/commands.md +++ b/site/content/docs/cli/commands.md @@ -98,6 +98,18 @@ Prints the number of matching rules per file. Instead of printing the names of the rules that matches each file, it prints the number the total number of rules matching each file. +### --cpu-limit \ + +Limit the CPU usage of the scan (percentage from 1 to 99). + +This option dynamically restricts CPU utilization per scan thread to the +specified percentage. The scanner achieves this by measuring the exact +duration spent scanning each file and introducing a sleep delay before +moving to the next file. + +This is useful for running background scan tasks on production servers +or multi-user systems without saturating CPU capacity. + ### --define Defines external variables. @@ -462,16 +474,35 @@ This command is similar in spirit to other code formatting tools like `gofmt` and `rustfmt`. ``` -yr fmt ... +yr fmt ... ``` +The path can be either a file or directory. If a directory is used, every `.yar` +or `.yara` file contained in the directory will be formated. + +### -r, --recursive=[MAX_DEPTH] + +Walk directories recursively. When is a directory, this option enables +recursive directory traversal. You can optionally specify a `MAX_DEPTH` to +limit how deep the traversal goes: + +Examples: + +``` +--recursive formats nested subdirectories with no limits. +--recursive=0 formats only the files in (no subdirectories) +--recursive=3 formats up to 3 levels deep, including nested subdirectories +``` + +If --recursive is not specified, the default behavior is equivalent to --recursive=0. + ### --check, -c Run in "check" mode. Doesn't modify any file, but exits error code 0 if the files are formatted correctly and no change is necessary, or error code 1 if otherwise. -### -t, --tab-size \\ +### -t, --tab-size \ Tab size (in spaces) used in source files diff --git a/site/hugo_stats.json b/site/hugo_stats.json index b627090fd..153403037 100644 --- a/site/hugo_stats.json +++ b/site/hugo_stats.json @@ -321,6 +321,7 @@ "--tag-tag", "--threads-num_threads", "--timeout-seconds", + "-r---recursivemax_depth", "-t---tab-size-num_spaces", "-what-about-the-original-yara", "-x---module-data-modulefile", @@ -485,6 +486,7 @@ "exportsfn_regex", "exportsordinal", "extracting-file-paths", + "fast_scanbool", "fat_header", "fatarch", "fewer-timeouts", @@ -785,6 +787,7 @@ "yrx_scanner_clear_profiling_data", "yrx_scanner_create", "yrx_scanner_destroy", + "yrx_scanner_fast_scan", "yrx_scanner_finish", "yrx_scanner_iter_slowest_rules", "yrx_scanner_on_console_log",