Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 46 additions & 21 deletions cli/src/commands/fmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@ use yara_x_fmt::{Formatter, Indentation};

use crate::config::Config;
use crate::help;
use crate::walk;

pub fn fmt() -> Command {
super::command("fmt")
.about("Format YARA source files")
.arg(
arg!(<FILE>)
.help("Path to YARA source file")
arg!(<PATH>)
.help("Path to YARA source file or directory")
.required(true)
.value_parser(value_parser!(PathBuf))
.action(ArgAction::Append),
Expand All @@ -23,6 +24,14 @@ pub fn fmt() -> Command {
arg!(-c --check "Run in 'check' mode")
.long_help(help::FMT_CHECK_MODE),
)
.arg(
arg!(-r - -"recursive"[MAX_DEPTH])
.help("Walk directories recursively up to a given depth")
.long_help(help::RECURSIVE_LONG_HELP)
.default_missing_value("1000")
.require_equals(true)
.value_parser(value_parser!(usize)),
)
.arg(
arg!(-t - -"tab-size" <NUM_SPACES>)
.help("Tab size (in spaces) used in source files")
Expand All @@ -33,9 +42,10 @@ pub fn fmt() -> Command {
}

pub fn exec_fmt(args: &ArgMatches, config: &Config) -> anyhow::Result<()> {
let files = args.get_many::<PathBuf>("FILE").unwrap();
let paths = args.get_many::<PathBuf>("PATH").unwrap();
let check = args.get_flag("check");
let tab_size = args.get_one::<usize>("tab-size").unwrap();
let recursive = args.get_one::<usize>("recursive");

let formatter = Formatter::new()
.input_tab_size(*tab_size)
Expand All @@ -56,27 +66,42 @@ pub fn exec_fmt(args: &ArgMatches, config: &Config) -> anyhow::Result<()> {
config.fmt.rule.empty_line_after_section_header,
);

let mut modified_files: Vec<&PathBuf> = Vec::new();
let mut modified_files: Vec<PathBuf> = Vec::new();

for file in files {
let input = fs::read(file.as_path())?;
let file_modified = if check {
formatter.format(input.as_slice(), io::sink())?
for path in paths {
let mut walker = walk::Walker::path(path);
if let Some(recursive) = recursive {
walker.max_depth(*recursive);
} else {
let mut formatted = Cursor::new(Vec::with_capacity(input.len()));
if formatter.format(input.as_slice(), &mut formatted)? {
formatted.seek(SeekFrom::Start(0))?;
let mut output_file = File::create(file.as_path())?;
io::copy(&mut formatted, &mut output_file)?;
true
} else {
false
}
};

if file_modified {
modified_files.push(file);
walker.max_depth(0);
}
walker.filter("**/*.yar").filter("**/*.yara");

walker.walk(
|file_path| {
let input = fs::read(file_path)?;
let file_modified = if check {
formatter.format(input.as_slice(), io::sink())?
} else {
let mut formatted =
Cursor::new(Vec::with_capacity(input.len()));
if formatter.format(input.as_slice(), &mut formatted)? {
formatted.seek(SeekFrom::Start(0))?;
let mut output_file = File::create(file_path)?;
io::copy(&mut formatted, &mut output_file)?;
true
} else {
false
}
};

if file_modified {
modified_files.push(file_path.to_path_buf());
}
Ok(())
},
Err,
)?;
}

if !modified_files.is_empty() {
Expand Down
10 changes: 9 additions & 1 deletion cli/src/commands/scan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ pub fn scan() -> Command {
.long_help(help::COMPILED_RULES_LONG_HELP),
arg!(-c --"count")
.help("Print only the number of matches per file"),
arg!(--"cpu-limit" <PERCENTAGE>)
.help("Limit the CPU usage of the scan (percentage from 1 to 99)")
.value_parser(value_parser!(u8).range(1..=99)),
arg!(--"disable-console-logs")
.help("Disable printing console log messages"),
arg!(-f --"fast-scan")
Expand Down Expand Up @@ -121,7 +124,6 @@ pub fn scan() -> Command {
arg!(-a --"timeout" <SECONDS>)
.help("Abort scanning after the given number of seconds")
.value_parser(value_parser!(u64).range(1..))

]))
}

Expand Down Expand Up @@ -179,6 +181,8 @@ pub fn exec_scan(args: &ArgMatches, config: &Config) -> anyhow::Result<()> {
let compiled_rules = args.get_flag("compiled-rules");
let profiling = args.get_flag("profiling");
let num_threads = args.get_one::<u8>("threads");

let cpu_limit = args.get_one::<u8>("cpu-limit");
let skip_larger = args.get_one::<u64>("skip-larger");
let disable_console_logs = args.get_flag("disable-console-logs");
let scan_list = args.get_flag("scan-list");
Expand Down Expand Up @@ -264,6 +268,10 @@ pub fn exec_scan(args: &ArgMatches, config: &Config) -> anyhow::Result<()> {
w.num_threads(*num_threads);
}

if let Some(limit) = cpu_limit {
w.cpu_limit(*limit);
}

if let Some(max_file_size) = skip_larger {
w.metadata_filter(|metadata| metadata.len() <= *max_file_size);
}
Expand Down
43 changes: 43 additions & 0 deletions cli/src/tests/fmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,46 @@ fn utf8_error() {
.stderr("error: invalid UTF-8 at [0..1]\n")
.code(1);
}

#[test]
fn fmt_directory() {
let temp_dir = TempDir::new().unwrap();
let subdir = temp_dir.child("subdir");
subdir.create_dir_all().unwrap();

let file1 = temp_dir.child("rule1.yar");
let file2 = subdir.child("rule2.yar");

file1.write_str("rule test1 { condition: true }").unwrap();
file2.write_str("rule test2 { condition: true }").unwrap();

// By default without -r/--recursive, only the top-level directory files are formatted.
Command::new(cargo_bin!("yr"))
.arg("fmt")
.arg(temp_dir.path())
.assert()
.code(1); // file1 should be modified.

// So now file1 is formatted, but file2 should still be unformatted.
Command::new(cargo_bin!("yr"))
.arg("fmt")
.arg(temp_dir.path())
.assert()
.code(0); // Top-level files are already formatted, so no changes.

// With -r/--recursive, the subdirectories are also processed, so file2 will be formatted.
Command::new(cargo_bin!("yr"))
.arg("fmt")
.arg("-r")
.arg(temp_dir.path())
.assert()
.code(1); // file2 in subdir should be modified.

// Subsequent format runs should find no modified files.
Command::new(cargo_bin!("yr"))
.arg("fmt")
.arg("-r")
.arg(temp_dir.path())
.assert()
.code(0);
}
11 changes: 11 additions & 0 deletions cli/src/tests/scan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -440,3 +440,14 @@ fn fast_scan() {
.success()
.stdout(predicate::str::contains("foo src/tests/testdata/dummy.file"));
}

#[test]
fn cpu_limit() {
Command::new(cargo_bin!("yr"))
.arg("scan")
.arg("--cpu-limit=50")
.arg("src/tests/testdata/foo.yar")
.arg("src/tests/testdata/dummy.file")
.assert()
.success();
}
42 changes: 40 additions & 2 deletions cli/src/walk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,7 @@ impl<'a> Walker<'a> {
/// ```
pub(crate) struct ParWalker<'a> {
num_threads: Option<u8>,
cpu_limit: Option<u8>,
walker: Walker<'a>,
}

Expand All @@ -350,15 +351,19 @@ impl<'a> ParWalker<'a> {
///
/// `path` can also point to an individual file instead of a directory.
pub fn path(path: &'a Path) -> Self {
Self { walker: Walker::path(path), num_threads: None }
Self { walker: Walker::path(path), num_threads: None, cpu_limit: None }
}

/// Creates a [`ParWalker`] that walks the files listed in a text file
/// containing one path per line.
///
/// `path` points to the text file that contains the paths to be walked.
pub fn file_list(path: &'a Path) -> Self {
Self { walker: Walker::file_list(path), num_threads: None }
Self {
walker: Walker::file_list(path),
num_threads: None,
cpu_limit: None,
}
}

/// Sets the number of threads used.
Expand All @@ -370,6 +375,12 @@ impl<'a> ParWalker<'a> {
self
}

/// Sets the target CPU limit percentage.
pub fn cpu_limit(&mut self, limit: u8) -> &mut Self {
self.cpu_limit = Some(limit);
self
}

/// Sets a maximum depth while traversing the directory tree.
///
/// When the maximum depth is 0 only the files that reside in the given
Expand Down Expand Up @@ -429,6 +440,8 @@ impl<'a> ParWalker<'a> {
thread::available_parallelism().map(usize::from).unwrap_or(32)
};

let cpu_limit = self.cpu_limit;

crossbeam::scope(|s| {
let mut threads = Vec::with_capacity(num_threads);

Expand All @@ -453,12 +466,37 @@ impl<'a> ParWalker<'a> {
threads.push(s.spawn(move |_| {
let mut per_thread_obj = init(&state, &msg_send);
for path in paths_recv {
let start_time = Instant::now();
let res = action(
&state,
&msg_send,
path.to_path_buf(),
&mut per_thread_obj,
);
let t_active = start_time.elapsed();

if let Some(limit) = cpu_limit {
if limit < 100 {
// Calculate the required sleep duration to limit
// CPU usage to the target percentage.
//
// Let T_active be the elapsed time scanning the
// file. Let T_sleep be the sleep time. The target
// utilization percentage is P.
//
// P = 100 * T_active / (T_active + T_sleep)
// P * (T_active + T_sleep) = 100 * T_active
// P * T_sleep = (100 - P) * T_active
// T_sleep = T_active * (100 - P) / P
let t_sleep = t_active.mul_f64(
(100.0 - limit as f64) / limit as f64,
);
if !t_sleep.is_zero() {
thread::sleep(t_sleep);
}
}
}

if let Err(err) = res
&& error(err, &msg_send).is_err()
{
Expand Down
35 changes: 33 additions & 2 deletions site/content/docs/cli/commands.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,18 @@ Prints the number of matching rules per file. Instead of printing the
names of the rules that matches each file, it prints the number the
total number of rules matching each file.

### --cpu-limit \<PERCENTAGE\>

Limit the CPU usage of the scan (percentage from 1 to 99).

This option dynamically restricts CPU utilization per scan thread to the
specified percentage. The scanner achieves this by measuring the exact
duration spent scanning each file and introducing a sleep delay before
moving to the next file.

This is useful for running background scan tasks on production servers
or multi-user systems without saturating CPU capacity.

### --define <VAR=VALUE>

Defines external variables.
Expand Down Expand Up @@ -462,16 +474,35 @@ This command is similar in spirit to other code formatting tools like `gofmt`
and `rustfmt`.

```
yr fmt <FILE>...
yr fmt <PATH>...
```

The path can be either a file or directory. If a directory is used, every `.yar`
or `.yara` file contained in the directory will be formated.

### -r, --recursive=[MAX_DEPTH]

Walk directories recursively. When <PATH> is a directory, this option enables
recursive directory traversal. You can optionally specify a `MAX_DEPTH` to
limit how deep the traversal goes:

Examples:

```
--recursive formats nested subdirectories with no limits.
--recursive=0 formats only the files in <PATH> (no subdirectories)
--recursive=3 formats up to 3 levels deep, including nested subdirectories
```

If --recursive is not specified, the default behavior is equivalent to --recursive=0.

### --check, -c

Run in "check" mode. Doesn't modify any file, but exits error code 0 if the
files are formatted correctly and no change is necessary, or error code 1
if otherwise.

### -t, --tab-size \<NUM_SPACES>\
### -t, --tab-size \<NUM_SPACES>

Tab size (in spaces) used in source files

Expand Down
3 changes: 3 additions & 0 deletions site/hugo_stats.json
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,7 @@
"--tag-tag",
"--threads-num_threads",
"--timeout-seconds",
"-r---recursivemax_depth",
"-t---tab-size-num_spaces",
"-what-about-the-original-yara",
"-x---module-data-modulefile",
Expand Down Expand Up @@ -485,6 +486,7 @@
"exportsfn_regex",
"exportsordinal",
"extracting-file-paths",
"fast_scanbool",
"fat_header",
"fatarch",
"fewer-timeouts",
Expand Down Expand Up @@ -785,6 +787,7 @@
"yrx_scanner_clear_profiling_data",
"yrx_scanner_create",
"yrx_scanner_destroy",
"yrx_scanner_fast_scan",
"yrx_scanner_finish",
"yrx_scanner_iter_slowest_rules",
"yrx_scanner_on_console_log",
Expand Down
Loading