From a8cbb87df4d0b25ace0ea54ff246ff401b7f0d52 Mon Sep 17 00:00:00 2001 From: Giles Hutton Date: Mon, 23 Feb 2026 14:09:19 +0000 Subject: [PATCH 01/10] Moves glob/wildcard matching into Fact. Host scanning now uses globs to only get inodes for the specific files matching the globs. Prefix map is populated with the longest prefix for each glob e.g. /etc/**/*.conf -> /etc/ /home/user/.ssh/id_{rsa,dsa} -> /home/user/.ssh/id_ Kernel captures events based on inode first and then prefix match (this behavior is unchanged) and then userspace does a glob match on the path and host_path. --- Cargo.lock | 65 +++++++++++++++++++--------------------- Cargo.toml | 2 ++ fact-ebpf/src/lib.rs | 21 +++++++++++-- fact/Cargo.toml | 2 ++ fact/src/bpf/mod.rs | 24 ++++++++++++++- fact/src/event/mod.rs | 22 ++++++++++++++ fact/src/host_scanner.rs | 28 ++++++++--------- 7 files changed, 109 insertions(+), 55 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d09a4115..dbc05398 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -249,6 +249,16 @@ version = "2.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" +[[package]] +name = "bstr" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "bumpalo" version = "3.19.0" @@ -443,6 +453,8 @@ dependencies = [ "env_logger", "fact-api", "fact-ebpf", + "glob", + "globset", "http-body-util", "hyper", "hyper-tls", @@ -575,18 +587,6 @@ dependencies = [ "pin-utils", ] -[[package]] -name = "getrandom" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" -dependencies = [ - "cfg-if", - "libc", - "r-efi", - "wasi 0.14.2+wasi-0.2.4", -] - [[package]] name = "getrandom" version = "0.4.1" @@ -608,9 +608,22 @@ checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" [[package]] name = "glob" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" + +[[package]] +name = "globset" +version = "0.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52dfc19153a48bde0cbd630453615c8151bce3a5adfac7a0aebfbf0a1e1f57e3" +dependencies = [ + "aho-corasick", + "bstr", + "log", + "regex-automata", + "regex-syntax", +] [[package]] name = "h2" @@ -940,7 +953,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" dependencies = [ "libc", - "wasi 0.11.1+wasi-snapshot-preview1", + "wasi", "windows-sys 0.59.0", ] @@ -1491,7 +1504,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1" dependencies = [ "fastrand", - "getrandom 0.3.3", + "getrandom", "once_cell", "rustix", "windows-sys 0.61.2", @@ -1745,7 +1758,7 @@ version = "1.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b672338555252d43fd2240c714dc444b8c6fb0a5c5335e65a07bba7742735ddb" dependencies = [ - "getrandom 0.4.1", + "getrandom", "js-sys", "wasm-bindgen", ] @@ -1777,15 +1790,6 @@ version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" -[[package]] -name = "wasi" -version = "0.14.2+wasi-0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" -dependencies = [ - "wit-bindgen-rt", -] - [[package]] name = "wasip2" version = "1.0.2+wasi-0.2.9" @@ -2004,15 +2008,6 @@ dependencies = [ "wit-parser", ] -[[package]] -name = "wit-bindgen-rt" -version = "0.39.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" -dependencies = [ - "bitflags", -] - [[package]] name = "wit-bindgen-rust" version = "0.51.0" diff --git a/Cargo.toml b/Cargo.toml index 45b275fc..525e8040 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,8 @@ aya = { version = "0.13.1", default-features = false } anyhow = { version = "1", default-features = false, features = ["std", "backtrace"] } clap = { version = "4.5.41", features = ["derive", "env"] } env_logger = { version = "0.11.5", default-features = false, features = ["humantime"] } +glob = "0.3.3" +globset = "0.4.18" http-body-util = "0.1.3" hyper = { version = "1.6.0", default-features = false } hyper-tls = "0.6.0" diff --git a/fact-ebpf/src/lib.rs b/fact-ebpf/src/lib.rs index 0bf7a1ad..ac7802bc 100644 --- a/fact-ebpf/src/lib.rs +++ b/fact-ebpf/src/lib.rs @@ -30,17 +30,32 @@ impl TryFrom<&PathBuf> for path_prefix_t { prefix: value.display().to_string(), }); }; - let len = if filename.len() > LPM_SIZE_MAX as usize { + + // Take the start of the path until the first occurence of a wildcard + // character. This is used as a filter in the kernel in cases where + // the inode has failed to match. The full wildcard string is used + // for further processing in userspace. + let filename_prefix = if let Some(wildcard_idx) = filename.chars().position(|c| { + "*?[]{}".contains(c) + }) { + &filename[..wildcard_idx] + } else { + // if there are no wildcards then the whole path can be + // the prefix + filename + }; + + let len = if filename_prefix.len() > LPM_SIZE_MAX as usize { LPM_SIZE_MAX as usize } else { - filename.len() + filename_prefix.len() }; unsafe { let mut cfg: path_prefix_t = std::mem::zeroed(); memcpy( cfg.path.as_mut_ptr() as *mut _, - filename.as_ptr() as *const _, + filename_prefix.as_ptr() as *const _, len, ); cfg.bit_len = (len * 8) as u32; diff --git a/fact/Cargo.toml b/fact/Cargo.toml index 9b1b52e3..2641ee9f 100644 --- a/fact/Cargo.toml +++ b/fact/Cargo.toml @@ -10,6 +10,8 @@ anyhow = { workspace = true } aya = { workspace = true } clap = { workspace = true } env_logger = { workspace = true } +glob = { workspace = true } +globset = { workspace = true } http-body-util = { workspace = true } hyper = { workspace = true } hyper-tls = { workspace = true } diff --git a/fact/src/bpf/mod.rs b/fact/src/bpf/mod.rs index 71077cb8..2b8dc6e4 100644 --- a/fact/src/bpf/mod.rs +++ b/fact/src/bpf/mod.rs @@ -14,6 +14,7 @@ use tokio::{ sync::{mpsc, watch}, task::JoinHandle, }; +use globset::{GlobSet, GlobSetBuilder, Glob}; use crate::{event::Event, host_info, metrics::EventCounter}; @@ -30,6 +31,8 @@ pub struct Bpf { paths: Vec, paths_config: watch::Receiver>, + + paths_globset: GlobSet, } impl Bpf { @@ -61,6 +64,7 @@ impl Bpf { tx, paths, paths_config, + paths_globset: GlobSet::empty(), }; bpf.load_paths()?; @@ -127,11 +131,14 @@ impl Bpf { // Add the new prefixes let mut new_paths = Vec::with_capacity(paths_config.len()); + let mut builder = GlobSetBuilder::new(); for p in paths_config.iter() { + builder.add(Glob::new(&p.to_string_lossy())?); let prefix = path_prefix_t::try_from(p)?; path_prefix.insert(&prefix.into(), 0, 0)?; new_paths.push(prefix); } + self.paths_globset = builder.build()?; // Remove old prefixes for p in self.paths.iter().filter(|p| !new_paths.contains(p)) { @@ -193,7 +200,22 @@ impl Bpf { while let Some(event) = ringbuf.next() { let event: &event_t = unsafe { &*(event.as_ptr() as *const _) }; let event = match Event::try_from(event) { - Ok(event) => event, + Ok(event) => { + // With wildcards, the kernel can only match on the inode and + // then the longest non-wildcard prefix (e.g. for /etc/**/*.conf, + // the kernel matches up to /etc/) + // + // We do a proper glob match here to do a final check + // using short circuiting to avoid calling is_match in all + // scenarios + if self.paths_globset.is_match(event.get_filename()) || + self.paths_globset.is_match(event.get_host_path()) { + event + } else { + event_counter.dropped(); + continue; + } + }, Err(e) => { error!("Failed to parse event: '{e}'"); event_counter.dropped(); diff --git a/fact/src/event/mod.rs b/fact/src/event/mod.rs index c85189d5..080274e5 100644 --- a/fact/src/event/mod.rs +++ b/fact/src/event/mod.rs @@ -150,6 +150,28 @@ impl Event { } } + pub fn get_filename(&self) -> &PathBuf { + match &self.file { + FileData::Open(data) => &data.filename, + FileData::Creation(data) => &data.filename, + FileData::Unlink(data) => &data.filename, + FileData::Chmod(data) => &data.inner.filename, + FileData::Chown(data) => &data.inner.filename, + FileData::Rename(data) => &data.new.filename, + } + } + + pub fn get_host_path(&self) -> &PathBuf { + match &self.file { + FileData::Open(data) => &data.host_file, + FileData::Creation(data) => &data.host_file, + FileData::Unlink(data) => &data.host_file, + FileData::Chmod(data) => &data.inner.host_file, + FileData::Chown(data) => &data.inner.host_file, + FileData::Rename(data) => &data.new.host_file, + } + } + /// Set the `host_file` field of the event to the one provided. /// /// In the case of operations that involve two paths, like rename, diff --git a/fact/src/host_scanner.rs b/fact/src/host_scanner.rs index 73893967..a0e0c0ac 100644 --- a/fact/src/host_scanner.rs +++ b/fact/src/host_scanner.rs @@ -107,8 +107,8 @@ impl HostScanner { } }); - for path in config.iter() { - let path = host_info::prepend_host_mount(path); + for pattern in self.paths.borrow().iter() { + let path = host_info::prepend_host_mount(pattern); self.scan_inner(&path)?; } debug!("Host scan done"); @@ -119,21 +119,17 @@ impl HostScanner { fn scan_inner(&self, path: &Path) -> anyhow::Result<()> { self.metrics.scan_inc(ScanLabels::ElementsScanned); - if path.is_dir() { - self.metrics.scan_inc(ScanLabels::DirectoryScanned); - for entry in path.read_dir()?.flatten() { - let entry = entry.path(); - self.scan_inner(&entry) - .with_context(|| format!("Failed to scan {}", entry.display()))?; + glob::glob(&path.to_string_lossy())?.try_for_each(|entry| match entry { + Ok(path) => { + if path.is_file() { + self.update_entry(path.as_path()).with_context(|| { + format!("Failed to update entry for {}", path.display()) + })?; + } + Ok(()) } - } else if path.is_file() { - self.metrics.scan_inc(ScanLabels::FileScanned); - self.update_entry(path) - .with_context(|| format!("Failed to update entry for {}", path.display()))?; - } else { - self.metrics.scan_inc(ScanLabels::FsItemIgnored); - } - Ok(()) + Err(e) => Err(e.into()), + }) } fn update_entry(&self, path: &Path) -> anyhow::Result<()> { From fbd7be93ecd2ebf6df399fc32cd92be440bf2915 Mon Sep 17 00:00:00 2001 From: Giles Hutton Date: Mon, 23 Feb 2026 14:20:30 +0000 Subject: [PATCH 02/10] Fmt --- fact-ebpf/src/lib.rs | 17 ++++++++--------- fact/src/bpf/mod.rs | 2 +- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/fact-ebpf/src/lib.rs b/fact-ebpf/src/lib.rs index ac7802bc..ac1a0b33 100644 --- a/fact-ebpf/src/lib.rs +++ b/fact-ebpf/src/lib.rs @@ -35,15 +35,14 @@ impl TryFrom<&PathBuf> for path_prefix_t { // character. This is used as a filter in the kernel in cases where // the inode has failed to match. The full wildcard string is used // for further processing in userspace. - let filename_prefix = if let Some(wildcard_idx) = filename.chars().position(|c| { - "*?[]{}".contains(c) - }) { - &filename[..wildcard_idx] - } else { - // if there are no wildcards then the whole path can be - // the prefix - filename - }; + let filename_prefix = + if let Some(wildcard_idx) = filename.chars().position(|c| "*?[]{}".contains(c)) { + &filename[..wildcard_idx] + } else { + // if there are no wildcards then the whole path can be + // the prefix + filename + }; let len = if filename_prefix.len() > LPM_SIZE_MAX as usize { LPM_SIZE_MAX as usize diff --git a/fact/src/bpf/mod.rs b/fact/src/bpf/mod.rs index 2b8dc6e4..56ed2b6c 100644 --- a/fact/src/bpf/mod.rs +++ b/fact/src/bpf/mod.rs @@ -7,6 +7,7 @@ use aya::{ Btf, Ebpf, }; use checks::Checks; +use globset::{Glob, GlobSet, GlobSetBuilder}; use libc::c_char; use log::{error, info}; use tokio::{ @@ -14,7 +15,6 @@ use tokio::{ sync::{mpsc, watch}, task::JoinHandle, }; -use globset::{GlobSet, GlobSetBuilder, Glob}; use crate::{event::Event, host_info, metrics::EventCounter}; From 0367c24beb853ca21277d85b5db411c5381f150c Mon Sep 17 00:00:00 2001 From: Giles Hutton Date: Mon, 23 Feb 2026 15:26:45 +0000 Subject: [PATCH 03/10] PR review fixes --- fact-ebpf/src/lib.rs | 25 +++++++++++-------------- fact/src/bpf/mod.rs | 10 +++++++--- fact/src/host_scanner.rs | 20 +++++++++++--------- 3 files changed, 29 insertions(+), 26 deletions(-) diff --git a/fact-ebpf/src/lib.rs b/fact-ebpf/src/lib.rs index ac1a0b33..bd84ee08 100644 --- a/fact-ebpf/src/lib.rs +++ b/fact-ebpf/src/lib.rs @@ -35,20 +35,11 @@ impl TryFrom<&PathBuf> for path_prefix_t { // character. This is used as a filter in the kernel in cases where // the inode has failed to match. The full wildcard string is used // for further processing in userspace. - let filename_prefix = - if let Some(wildcard_idx) = filename.chars().position(|c| "*?[]{}".contains(c)) { - &filename[..wildcard_idx] - } else { - // if there are no wildcards then the whole path can be - // the prefix - filename - }; - - let len = if filename_prefix.len() > LPM_SIZE_MAX as usize { - LPM_SIZE_MAX as usize - } else { - filename_prefix.len() - }; + // + // unwrap is safe here - if there are no matches, the full string is the + // only item in the iterator + let filename_prefix = filename.split(['*', '?', '[', '{']).next().unwrap(); + let len = filename_prefix.len().min(LPM_SIZE_MAX as usize); unsafe { let mut cfg: path_prefix_t = std::mem::zeroed(); @@ -77,6 +68,12 @@ impl PartialEq for path_prefix_t { unsafe impl Pod for path_prefix_t {} +impl inode_key_t { + pub fn empty(&self) -> bool { + self.inode == 0 && self.dev == 0 + } +} + impl PartialEq for inode_key_t { fn eq(&self, other: &Self) -> bool { self.inode == other.inode && self.dev == other.dev diff --git a/fact/src/bpf/mod.rs b/fact/src/bpf/mod.rs index 56ed2b6c..50ec3e27 100644 --- a/fact/src/bpf/mod.rs +++ b/fact/src/bpf/mod.rs @@ -133,7 +133,11 @@ impl Bpf { let mut new_paths = Vec::with_capacity(paths_config.len()); let mut builder = GlobSetBuilder::new(); for p in paths_config.iter() { - builder.add(Glob::new(&p.to_string_lossy())?); + builder.add( + Glob::new(&p.to_string_lossy()) + .with_context(|| format!("invalid glob {}", p.display())) + .unwrap(), + ); let prefix = path_prefix_t::try_from(p)?; path_prefix.insert(&prefix.into(), 0, 0)?; new_paths.push(prefix); @@ -208,8 +212,8 @@ impl Bpf { // We do a proper glob match here to do a final check // using short circuiting to avoid calling is_match in all // scenarios - if self.paths_globset.is_match(event.get_filename()) || - self.paths_globset.is_match(event.get_host_path()) { + if !event.get_inode().empty() || + self.paths_globset.is_match(event.get_filename()) { event } else { event_counter.dropped(); diff --git a/fact/src/host_scanner.rs b/fact/src/host_scanner.rs index a0e0c0ac..207db7cf 100644 --- a/fact/src/host_scanner.rs +++ b/fact/src/host_scanner.rs @@ -119,17 +119,19 @@ impl HostScanner { fn scan_inner(&self, path: &Path) -> anyhow::Result<()> { self.metrics.scan_inc(ScanLabels::ElementsScanned); - glob::glob(&path.to_string_lossy())?.try_for_each(|entry| match entry { - Ok(path) => { - if path.is_file() { - self.update_entry(path.as_path()).with_context(|| { - format!("Failed to update entry for {}", path.display()) - })?; + for entry in glob::glob(&path.to_string_lossy())? { + match entry { + Ok(path) => { + if path.is_file() { + self.update_entry(path.as_path()).with_context(|| { + format!("Failed to update entry for {}", path.display()) + })?; + } } - Ok(()) + Err(e) => return Err(e.into()), } - Err(e) => Err(e.into()), - }) + } + Ok(()) } fn update_entry(&self, path: &Path) -> anyhow::Result<()> { From 9409dbf1bb4807a32d606f3604c9b0edd209d827 Mon Sep 17 00:00:00 2001 From: Giles Hutton Date: Tue, 24 Feb 2026 12:39:09 +0000 Subject: [PATCH 04/10] Fix matching/tests and add wildcard tests --- fact-ebpf/src/bpf/file.h | 19 +++++- fact-ebpf/src/bpf/main.c | 84 +++++++++---------------- fact/src/bpf/mod.rs | 5 +- tests/conftest.py | 2 +- tests/test_config_hotreload.py | 4 +- tests/test_wildcard.py | 109 +++++++++++++++++++++++++++++++++ 6 files changed, 161 insertions(+), 62 deletions(-) create mode 100644 tests/test_wildcard.py diff --git a/fact-ebpf/src/bpf/file.h b/fact-ebpf/src/bpf/file.h index dd0639de..6c420095 100644 --- a/fact-ebpf/src/bpf/file.h +++ b/fact-ebpf/src/bpf/file.h @@ -6,12 +6,13 @@ #include "builtins.h" #include "types.h" #include "maps.h" +#include "inode.h" #include #include // clang-format on -__always_inline static bool is_monitored(struct bound_path_t* path) { +__always_inline static bool path_is_monitored(struct bound_path_t* path) { if (!filter_by_prefix()) { // no path configured, allow all return true; @@ -30,3 +31,19 @@ __always_inline static bool is_monitored(struct bound_path_t* path) { path->len = len; return res; } + +__always_inline static bool is_monitored(inode_key_t inode, struct bound_path_t* path, inode_key_t** submit) { + const inode_value_t* volatile inode_value = inode_get(&inode); + + switch (inode_is_monitored(inode_value)) { + case NOT_MONITORED: + if (path_is_monitored(path)) { + return true; + } + *submit = NULL; + return false; + case MONITORED: + break; + } + return true; +} diff --git a/fact-ebpf/src/bpf/main.c b/fact-ebpf/src/bpf/main.c index 3c71fd71..e0a23522 100644 --- a/fact-ebpf/src/bpf/main.c +++ b/fact-ebpf/src/bpf/main.c @@ -45,18 +45,13 @@ int BPF_PROG(trace_file_open, struct file* file) { } inode_key_t inode_key = inode_to_key(file->f_inode); - const inode_value_t* inode = inode_get(&inode_key); - switch (inode_is_monitored(inode)) { - case NOT_MONITORED: - if (!is_monitored(path)) { - goto ignored; - } - break; - case MONITORED: - break; + inode_key_t* inode_to_submit = &inode_key; + + if (!is_monitored(inode_key, path, &inode_to_submit)) { + goto ignored; } - submit_open_event(&m->file_open, event_type, path->path, &inode_key); + submit_open_event(&m->file_open, event_type, path->path, inode_to_submit); return 0; @@ -82,24 +77,16 @@ int BPF_PROG(trace_path_unlink, struct path* dir, struct dentry* dentry) { } inode_key_t inode_key = inode_to_key(dentry->d_inode); - const inode_value_t* inode = inode_get(&inode_key); - - switch (inode_is_monitored(inode)) { - case NOT_MONITORED: - if (!is_monitored(path)) { - m->path_unlink.ignored++; - return 0; - } - break; + inode_key_t* inode_to_submit = &inode_key; - case MONITORED: - inode_remove(&inode_key); - break; + if (!is_monitored(inode_key, path, &inode_to_submit)) { + m->path_unlink.ignored++; + return 0; } submit_unlink_event(&m->path_unlink, path->path, - &inode_key); + inode_to_submit); return 0; } @@ -120,24 +107,17 @@ int BPF_PROG(trace_path_chmod, struct path* path, umode_t mode) { } inode_key_t inode_key = inode_to_key(path->dentry->d_inode); - const inode_value_t* inode = inode_get(&inode_key); - - switch (inode_is_monitored(inode)) { - case NOT_MONITORED: - if (!is_monitored(bound_path)) { - m->path_chmod.ignored++; - return 0; - } - break; + inode_key_t* inode_to_submit = &inode_key; - case MONITORED: - break; + if (!is_monitored(inode_key, bound_path, &inode_to_submit)) { + m->path_chmod.ignored++; + return 0; } umode_t old_mode = BPF_CORE_READ(path, dentry, d_inode, i_mode); submit_mode_event(&m->path_chmod, bound_path->path, - &inode_key, + inode_to_submit, mode, old_mode); @@ -164,18 +144,11 @@ int BPF_PROG(trace_path_chown, struct path* path, unsigned long long uid, unsign } inode_key_t inode_key = inode_to_key(path->dentry->d_inode); - const inode_value_t* inode = inode_get(&inode_key); - - switch (inode_is_monitored(inode)) { - case NOT_MONITORED: - if (!is_monitored(bound_path)) { - m->path_chown.ignored++; - return 0; - } - break; + inode_key_t* inode_to_submit = &inode_key; - case MONITORED: - break; + if (!is_monitored(inode_key, bound_path, &inode_to_submit)) { + m->path_chown.ignored++; + return 0; } struct dentry* d = BPF_CORE_READ(path, dentry); @@ -184,7 +157,7 @@ int BPF_PROG(trace_path_chown, struct path* path, unsigned long long uid, unsign submit_ownership_event(&m->path_chown, bound_path->path, - &inode_key, + inode_to_submit, uid, gid, old_uid, @@ -217,14 +190,15 @@ int BPF_PROG(trace_path_rename, struct path* old_dir, } inode_key_t old_inode = inode_to_key(old_dentry->d_inode); - const inode_value_t* volatile old_inode_value = inode_get(&old_inode); inode_key_t new_inode = inode_to_key(new_dentry->d_inode); - const inode_value_t* volatile new_inode_value = inode_get(&new_inode); - if (inode_is_monitored(old_inode_value) == NOT_MONITORED && - inode_is_monitored(new_inode_value) == NOT_MONITORED && - !is_monitored(old_path) && - !is_monitored(new_path)) { + inode_key_t* old_inode_submit = &old_inode; + inode_key_t* new_inode_submit = &new_inode; + + bool old_monitored = is_monitored(old_inode, old_path, &old_inode_submit); + bool new_monitored = is_monitored(new_inode, new_path, &new_inode_submit); + + if (!old_monitored && !new_monitored) { m->path_rename.ignored++; return 0; } @@ -232,8 +206,8 @@ int BPF_PROG(trace_path_rename, struct path* old_dir, submit_rename_event(&m->path_rename, new_path->path, old_path->path, - &old_inode, - &new_inode); + old_inode_submit, + new_inode_submit); return 0; error: diff --git a/fact/src/bpf/mod.rs b/fact/src/bpf/mod.rs index 50ec3e27..727aca4b 100644 --- a/fact/src/bpf/mod.rs +++ b/fact/src/bpf/mod.rs @@ -209,9 +209,8 @@ impl Bpf { // then the longest non-wildcard prefix (e.g. for /etc/**/*.conf, // the kernel matches up to /etc/) // - // We do a proper glob match here to do a final check - // using short circuiting to avoid calling is_match in all - // scenarios + // The kernel sets inode to 0 when it matched via path prefix only. + // so we only need to perform a glob match against the filename if !event.get_inode().empty() || self.paths_globset.is_match(event.get_filename()) { event diff --git a/tests/conftest.py b/tests/conftest.py index ccba3650..4267333e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -104,7 +104,7 @@ def dump_logs(container, file): def fact_config(request, monitored_dir, logs_dir): cwd = os.getcwd() config = { - 'paths': [monitored_dir, '/mounted', '/container-dir'], + 'paths': [f'{monitored_dir}/**/*', '/mounted/**/*', '/container-dir/**/*'], 'grpc': { 'url': 'http://127.0.0.1:9999', }, diff --git a/tests/test_config_hotreload.py b/tests/test_config_hotreload.py index 1989f8f1..e23fdde1 100644 --- a/tests/test_config_hotreload.py +++ b/tests/test_config_hotreload.py @@ -136,7 +136,7 @@ def test_paths(fact, fact_config, monitored_dir, ignored_dir, server): server.wait_events([e]) config, config_file = fact_config - config['paths'] = [ignored_dir] + config['paths'] = [f'{ignored_dir}/**/*'] reload_config(fact, config, config_file, delay=0.5) # At this point, the event in the ignored directory should show up @@ -173,7 +173,7 @@ def test_paths_addition(fact, fact_config, monitored_dir, ignored_dir, server): server.wait_events([e]) config, config_file = fact_config - config['paths'] = [monitored_dir, ignored_dir] + config['paths'] = [f'{monitored_dir}/**/*', f'{ignored_dir}/**/*'] reload_config(fact, config, config_file, delay=0.5) # At this point, the event in the ignored directory should show up diff --git a/tests/test_wildcard.py b/tests/test_wildcard.py new file mode 100644 index 00000000..fb815ee4 --- /dev/null +++ b/tests/test_wildcard.py @@ -0,0 +1,109 @@ +import os + +import pytest +import yaml + +from event import Event, EventType, Process + + +@pytest.fixture +def wildcard_config(fact_config, monitored_dir): + config, config_file = fact_config + config['paths'] = [ + f'{monitored_dir}/**/*.txt', + f'{monitored_dir}/**/test-*.log', + ] + with open(config_file, 'w') as f: + yaml.dump(config, f) + return config, config_file + + +def test_extension_wildcard(fact, wildcard_config, monitored_dir, server): + process = Process.from_proc() + + txt_file = os.path.join(monitored_dir, 'document.txt') + with open(txt_file, 'w') as f: + f.write('This should be captured') + + # Should not match any pattern + log_file = os.path.join(monitored_dir, 'app.log') + with open(log_file, 'w') as f: + f.write('This should be ignored') + + e = Event(process=process, event_type=EventType.CREATION, + file=txt_file, host_path='') + + server.wait_events([e]) + + +def test_prefix_wildcard(fact, wildcard_config, monitored_dir, server): + process = Process.from_proc() + + test_log = os.path.join(monitored_dir, 'test-app.log') + with open(test_log, 'w') as f: + f.write('This should be captured') + + # Wrong prefix - should not match + app_log = os.path.join(monitored_dir, 'app-test.log') + with open(app_log, 'w') as f: + f.write('This should be ignored') + + e = Event(process=process, event_type=EventType.CREATION, + file=test_log, host_path='') + + server.wait_events([e]) + + +def test_recursive_wildcard(fact, wildcard_config, monitored_dir, server): + process = Process.from_proc() + + nested_dir = os.path.join(monitored_dir, 'level1', 'level2') + os.makedirs(nested_dir, exist_ok=True) + + root_txt = os.path.join(monitored_dir, 'root.txt') + with open(root_txt, 'w') as f: + f.write('Root level txt') + + nested_txt = os.path.join(nested_dir, 'nested.txt') + with open(nested_txt, 'w') as f: + f.write('Nested txt') + + # Different extension - should not match + nested_md = os.path.join(nested_dir, 'readme.md') + with open(nested_md, 'w') as f: + f.write('Should be ignored') + + events = [ + Event(process=process, event_type=EventType.CREATION, + file=root_txt, host_path=''), + Event(process=process, event_type=EventType.CREATION, + file=nested_txt, host_path=''), + ] + + server.wait_events(events) + + +def test_multiple_patterns(fact, wildcard_config, monitored_dir, server): + process = Process.from_proc() + + txt_file = os.path.join(monitored_dir, 'notes.txt') + with open(txt_file, 'w') as f: + f.write('Text file') + + log_file = os.path.join(monitored_dir, 'test-output.log') + with open(log_file, 'w') as f: + f.write('Log file') + + # Matches neither pattern + conf_file = os.path.join(monitored_dir, 'config.yml') + with open(conf_file, 'w') as f: + f.write('Config file') + + events = [ + Event(process=process, event_type=EventType.CREATION, + file=txt_file, host_path=''), + Event(process=process, event_type=EventType.CREATION, + file=log_file, host_path=''), + ] + + server.wait_events(events) From f5698a9546384df63de92dbad0f037f7c000f4af Mon Sep 17 00:00:00 2001 From: Giles Hutton Date: Tue, 24 Feb 2026 14:20:08 +0000 Subject: [PATCH 05/10] Fix basic unit test --- fact/src/bpf/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fact/src/bpf/mod.rs b/fact/src/bpf/mod.rs index 727aca4b..71019ea1 100644 --- a/fact/src/bpf/mod.rs +++ b/fact/src/bpf/mod.rs @@ -278,7 +278,7 @@ mod bpf_tests { let monitored_path = env!("CARGO_MANIFEST_DIR"); let monitored_path = PathBuf::from(monitored_path); - let paths = vec![monitored_path.clone()]; + let paths = vec![PathBuf::from(format!("{}/**/*", monitored_path.display()))]; let mut config = FactConfig::default(); config.set_paths(paths); let reloader = Reloader::from(config); From d0e9e31836fdcde8c62162bfc779e6047a3e95df Mon Sep 17 00:00:00 2001 From: Giles Hutton Date: Tue, 24 Feb 2026 14:20:38 +0000 Subject: [PATCH 06/10] Fix missing single quote in README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f4a6c81d..20f712a2 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ In order to run these tests as part of the unit test suite y use the following command: ```shell -cargo test --config 'target."cfg(all())".runner="sudo -E" --features=bpf-test +cargo test --config 'target."cfg(all())".runner="sudo -E"' --features=bpf-test ``` ## Create compile_commands.json From b198ad3b94b293d5e6bcf1b82b26ba2eee0905c1 Mon Sep 17 00:00:00 2001 From: Giles Hutton Date: Wed, 25 Feb 2026 16:22:35 +0000 Subject: [PATCH 07/10] Fix tests based on PR comments --- tests/test_wildcard.py | 65 +++++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 23 deletions(-) diff --git a/tests/test_wildcard.py b/tests/test_wildcard.py index fb815ee4..fd1728f3 100644 --- a/tests/test_wildcard.py +++ b/tests/test_wildcard.py @@ -1,3 +1,4 @@ +from time import sleep import os import pytest @@ -7,59 +8,69 @@ @pytest.fixture -def wildcard_config(fact_config, monitored_dir): +def wildcard_config(fact, fact_config, monitored_dir): config, config_file = fact_config config['paths'] = [ f'{monitored_dir}/**/*.txt', + f'{monitored_dir}/*.conf', f'{monitored_dir}/**/test-*.log', ] with open(config_file, 'w') as f: yaml.dump(config, f) + + # reload the config + fact.kill('SIGHUP') + sleep(0.1) return config, config_file -def test_extension_wildcard(fact, wildcard_config, monitored_dir, server): +def test_extension_wildcard(wildcard_config, monitored_dir, server): process = Process.from_proc() - txt_file = os.path.join(monitored_dir, 'document.txt') - with open(txt_file, 'w') as f: - f.write('This should be captured') - # Should not match any pattern log_file = os.path.join(monitored_dir, 'app.log') with open(log_file, 'w') as f: f.write('This should be ignored') + txt_file = os.path.join(monitored_dir, 'document.txt') + with open(txt_file, 'w') as f: + f.write('This should be captured') + e = Event(process=process, event_type=EventType.CREATION, file=txt_file, host_path='') server.wait_events([e]) -def test_prefix_wildcard(fact, wildcard_config, monitored_dir, server): +def test_prefix_wildcard(wildcard_config, monitored_dir, server): process = Process.from_proc() - test_log = os.path.join(monitored_dir, 'test-app.log') - with open(test_log, 'w') as f: - f.write('This should be captured') - # Wrong prefix - should not match app_log = os.path.join(monitored_dir, 'app-test.log') with open(app_log, 'w') as f: f.write('This should be ignored') + test_log = os.path.join(monitored_dir, 'test-app.log') + with open(test_log, 'w') as f: + f.write('This should be captured') + e = Event(process=process, event_type=EventType.CREATION, file=test_log, host_path='') server.wait_events([e]) -def test_recursive_wildcard(fact, wildcard_config, monitored_dir, server): +def test_recursive_wildcard(wildcard_config, monitored_dir, server): process = Process.from_proc() nested_dir = os.path.join(monitored_dir, 'level1', 'level2') os.makedirs(nested_dir, exist_ok=True) + # Different extension - should not match + nested_md = os.path.join(nested_dir, 'readme.md') + with open(nested_md, 'w') as f: + f.write('Should be ignored') + root_txt = os.path.join(monitored_dir, 'root.txt') with open(root_txt, 'w') as f: f.write('Root level txt') @@ -68,11 +79,6 @@ def test_recursive_wildcard(fact, wildcard_config, monitored_dir, server): with open(nested_txt, 'w') as f: f.write('Nested txt') - # Different extension - should not match - nested_md = os.path.join(nested_dir, 'readme.md') - with open(nested_md, 'w') as f: - f.write('Should be ignored') - events = [ Event(process=process, event_type=EventType.CREATION, file=root_txt, host_path=''), @@ -83,9 +89,27 @@ def test_recursive_wildcard(fact, wildcard_config, monitored_dir, server): server.wait_events(events) -def test_multiple_patterns(fact, wildcard_config, monitored_dir, server): +def test_nonrecursive_wildcard(wildcard_config, monitored_dir, server): + process = Process.from_proc() + + fut = os.path.join(monitored_dir, 'app.conf') + with open(fut, 'w') as f: + f.write('This should be captured') + + e = Event(process=process, event_type=EventType.CREATION, + file=fut, host_path='') + + server.wait_events([e]) + + +def test_multiple_patterns(wildcard_config, monitored_dir, server): process = Process.from_proc() + # Matches no pattern + conf_file = os.path.join(monitored_dir, 'config.yml') + with open(conf_file, 'w') as f: + f.write('Config file') + txt_file = os.path.join(monitored_dir, 'notes.txt') with open(txt_file, 'w') as f: f.write('Text file') @@ -94,11 +118,6 @@ def test_multiple_patterns(fact, wildcard_config, monitored_dir, server): with open(log_file, 'w') as f: f.write('Log file') - # Matches neither pattern - conf_file = os.path.join(monitored_dir, 'config.yml') - with open(conf_file, 'w') as f: - f.write('Config file') - events = [ Event(process=process, event_type=EventType.CREATION, file=txt_file, host_path=''), From d910e7f27aa6a87545d92ea6bb1b093c9f28f2ad Mon Sep 17 00:00:00 2001 From: Giles Hutton Date: Wed, 25 Feb 2026 16:40:21 +0000 Subject: [PATCH 08/10] Use to_str instead of lossy --- fact/src/bpf/mod.rs | 9 +++++++-- fact/src/host_scanner.rs | 8 ++++++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/fact/src/bpf/mod.rs b/fact/src/bpf/mod.rs index 71019ea1..f9b6ca3d 100644 --- a/fact/src/bpf/mod.rs +++ b/fact/src/bpf/mod.rs @@ -133,11 +133,16 @@ impl Bpf { let mut new_paths = Vec::with_capacity(paths_config.len()); let mut builder = GlobSetBuilder::new(); for p in paths_config.iter() { + let Some(glob_str) = p.to_str() else { + bail!("failed to convert path {} to string", p.display()); + }; + builder.add( - Glob::new(&p.to_string_lossy()) - .with_context(|| format!("invalid glob {}", p.display())) + Glob::new(glob_str) + .with_context(|| format!("invalid glob {}", glob_str)) .unwrap(), ); + let prefix = path_prefix_t::try_from(p)?; path_prefix.insert(&prefix.into(), 0, 0)?; new_paths.push(prefix); diff --git a/fact/src/host_scanner.rs b/fact/src/host_scanner.rs index 207db7cf..8eda7afc 100644 --- a/fact/src/host_scanner.rs +++ b/fact/src/host_scanner.rs @@ -26,7 +26,7 @@ use std::{ time::Duration, }; -use anyhow::Context; +use anyhow::{bail, Context}; use aya::maps::MapData; use fact_ebpf::{inode_key_t, inode_value_t}; use log::{debug, info, warn}; @@ -119,7 +119,11 @@ impl HostScanner { fn scan_inner(&self, path: &Path) -> anyhow::Result<()> { self.metrics.scan_inc(ScanLabels::ElementsScanned); - for entry in glob::glob(&path.to_string_lossy())? { + let Some(glob_str) = path.to_str() else { + bail!("invalid path {}", path.display()); + }; + + for entry in glob::glob(glob_str)? { match entry { Ok(path) => { if path.is_file() { From cbb11b3efff2736828b14a12d01e6cc8e63d76dc Mon Sep 17 00:00:00 2001 From: Mauro Ezequiel Moltrasio Date: Mon, 2 Mar 2026 18:03:06 +0100 Subject: [PATCH 09/10] Fix rename events --- fact-ebpf/src/bpf/file.h | 2 +- fact/src/bpf/mod.rs | 12 ++---------- fact/src/event/mod.rs | 29 ++++++++++++++++++++++++++++- 3 files changed, 31 insertions(+), 12 deletions(-) diff --git a/fact-ebpf/src/bpf/file.h b/fact-ebpf/src/bpf/file.h index 6c420095..4e3d6369 100644 --- a/fact-ebpf/src/bpf/file.h +++ b/fact-ebpf/src/bpf/file.h @@ -37,10 +37,10 @@ __always_inline static bool is_monitored(inode_key_t inode, struct bound_path_t* switch (inode_is_monitored(inode_value)) { case NOT_MONITORED: + *submit = NULL; if (path_is_monitored(path)) { return true; } - *submit = NULL; return false; case MONITORED: break; diff --git a/fact/src/bpf/mod.rs b/fact/src/bpf/mod.rs index f9b6ca3d..5604b821 100644 --- a/fact/src/bpf/mod.rs +++ b/fact/src/bpf/mod.rs @@ -210,19 +210,11 @@ impl Bpf { let event: &event_t = unsafe { &*(event.as_ptr() as *const _) }; let event = match Event::try_from(event) { Ok(event) => { - // With wildcards, the kernel can only match on the inode and - // then the longest non-wildcard prefix (e.g. for /etc/**/*.conf, - // the kernel matches up to /etc/) - // - // The kernel sets inode to 0 when it matched via path prefix only. - // so we only need to perform a glob match against the filename - if !event.get_inode().empty() || - self.paths_globset.is_match(event.get_filename()) { - event - } else { + if event.is_ignored(&self.paths_globset) { event_counter.dropped(); continue; } + event }, Err(e) => { error!("Failed to parse event: '{e}'"); diff --git a/fact/src/event/mod.rs b/fact/src/event/mod.rs index 080274e5..98a70162 100644 --- a/fact/src/event/mod.rs +++ b/fact/src/event/mod.rs @@ -6,6 +6,7 @@ use std::{ path::{Path, PathBuf}, }; +use globset::GlobSet; use serde::Serialize; use fact_ebpf::{event_t, file_activity_type_t, inode_key_t, PATH_MAX}; @@ -150,7 +151,7 @@ impl Event { } } - pub fn get_filename(&self) -> &PathBuf { + fn get_filename(&self) -> &PathBuf { match &self.file { FileData::Open(data) => &data.filename, FileData::Creation(data) => &data.filename, @@ -161,6 +162,13 @@ impl Event { } } + fn get_old_filename(&self) -> Option<&PathBuf> { + match &self.file { + FileData::Rename(data) => Some(&data.old.filename), + _ => None, + } + } + pub fn get_host_path(&self) -> &PathBuf { match &self.file { FileData::Open(data) => &data.host_file, @@ -194,6 +202,25 @@ impl Event { data.old.host_file = host_path } } + + /// Determine if the event should be ignored. + /// + /// With wildcards, the kernel can only match on the inode and + /// then the longest non-wildcard prefix (e.g. for /etc/**/*.conf, + /// the kernel matches up to /etc/). + /// + /// The kernel sets inode to 0 when it matched via path prefix only. + /// so we only need to perform a glob match against the filename. + /// + /// We also need to check the old values for rename events. + pub fn is_ignored(&self, globset: &GlobSet) -> bool { + self.get_inode().empty() + && self.get_old_inode().is_none_or(|inode| inode.empty()) + && !globset.is_match(self.get_filename()) + && self + .get_old_filename() + .is_none_or(|path| !globset.is_match(path)) + } } impl TryFrom<&event_t> for Event { From 4064269eaa1b816b484b23680bfe0743672f21ee Mon Sep 17 00:00:00 2001 From: Giles Hutton Date: Mon, 9 Mar 2026 10:11:08 +0000 Subject: [PATCH 10/10] Reinstate metrics dropped during rebase --- fact/src/host_scanner.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fact/src/host_scanner.rs b/fact/src/host_scanner.rs index 8eda7afc..ad7db6f9 100644 --- a/fact/src/host_scanner.rs +++ b/fact/src/host_scanner.rs @@ -127,9 +127,12 @@ impl HostScanner { match entry { Ok(path) => { if path.is_file() { + self.metrics.scan_inc(ScanLabels::FileScanned); self.update_entry(path.as_path()).with_context(|| { format!("Failed to update entry for {}", path.display()) })?; + } else { + self.metrics.scan_inc(ScanLabels::FsItemIgnored); } } Err(e) => return Err(e.into()),