diff --git a/.claude/commands/analyze-ci.md b/.claude/commands/analyze-ci.md new file mode 100644 index 0000000000..5ef112ac07 --- /dev/null +++ b/.claude/commands/analyze-ci.md @@ -0,0 +1,285 @@ +# Analyze CI Results + +You are helping investigate CI failures for the StackRox collector project. This skill describes how to navigate the CI infrastructure, download logs, and diagnose common failure modes. + +## CI Structure + +The main CI workflow is **"Main collector CI"** (`integration-tests`). It runs integration tests across multiple platforms as separate jobs. + +### Finding the PR + +```bash +# Look up PR number for a branch (run from the collector repo root) +gh pr view --json number,title --jq '"\(.number) | \(.title)"' + +# Or for the current branch +gh pr view --json number,title --jq '"\(.number) | \(.title)"' +``` + +### Listing Failed Checks + +```bash +# Get PR check status — list non-passing checks +gh pr view --json statusCheckRollup \ + --jq '.statusCheckRollup[] | select(.conclusion | IN("SUCCESS","SKIPPED","NEUTRAL") | not) | "\(.name): \(.conclusion) / \(.status)"' + +# List workflow runs for a branch +gh run list --branch --limit 5 \ + --json databaseId,name,conclusion,status \ + --jq '.[] | "\(.databaseId) | \(.name) | \(.conclusion) | \(.status)"' + +# Get failed jobs from a run +gh run view --json jobs \ + --jq '.jobs[] | select(.conclusion == "failure") | "\(.databaseId) | \(.name)"' +``` + +### Lint Failures + +The **Lint** workflow runs `pre-commit` hooks including `clang-format`. To see what failed: + +```bash +gh run view --log 2>&1 | grep -A30 "All changes made by hooks:" +``` + +This shows the exact diff that clang-format wants applied. + +## Downloading and Navigating Log Artifacts + +**This is the most important step.** The GitHub Actions log output truncates collector logs to just the crash backtrace. The full `collector.log` with verifier output is only in the artifacts. + +### Step 1: List Artifacts + +```bash +gh api repos/stackrox/collector/actions/runs//artifacts \ + --jq '.artifacts[] | "\(.id) | \(.name) | \(.size_in_bytes)"' +``` + +Artifact names follow the pattern `-logs`, e.g.: +- `rhel-logs`, `rhel-sap-logs` +- `ubuntu-os-logs`, `ubuntu-arm-logs` +- `cos-logs`, `cos-arm64-logs` +- `rhcos-logs`, `rhcos-arm64-logs` +- `flatcar-logs`, `fcarm-logs` +- `rhel-s390x-logs`, `rhel-ppc64le-logs` + +### Step 2: Download and Extract + +```bash +gh api repos/stackrox/collector/actions/artifacts//zip > /tmp/.zip +unzip -o /tmp/.zip -d /tmp/ +``` + +### Step 3: Artifact Directory Structure + +Each artifact contains: + +``` +-logs/ + container-logs/ + _/ + core-bpf/ + TestProcessNetwork/ + collector.log # Full collector log for this test + events.log # Event stream log + TestNetworkFlows/ + TestProcessViz/ + TestProcessLineageInfo/ + TestUdpNetworkFlow/ + TestUdpNetorkflow/ # Note: typo in directory name is intentional + sendto_recvfrom/ + collector.log + udp-client.log + udp-server.log + sendmsg_recvmsg/ + sendmmsg_recvmmsg/ + events.log + TestSocat/ + collector.log + ... + perf.json # Performance metrics + integration-test-report-_.xml # JUnit XML results + integration-test-_.log # Ansible runner log +``` + +**The `collector.log` file is the primary diagnostic source.** Each test suite gets its own `collector.log` because the collector container is restarted per suite. + +### Step 4: Check Test Results Summary + +JUnit XML requires xmllint or a simple grep (jq cannot parse XML): + +```bash +# Quick summary from the XML attributes +head -3 /tmp//container-logs/integration-test-report-*.xml + +# Find which tests failed +grep -B1 'failure\|error' /tmp//container-logs/integration-test-report-*.xml | head -20 +``` + +**Key pattern**: If you see `tests="4" failures="1" errors="1" skipped="2"`, the collector crashed on the first test (TestProcessNetwork) and everything else was skipped. This means a BPF loading failure or early startup crash. + +## Diagnosing Failure Modes + +### 1. BPF Verifier Rejection (Collector Crashes) + +**Symptoms**: +- Collector exits with code 139 (SIGSEGV/SIGABRT) +- `tests="4"` in JUnit XML (crash on first test) +- Stack trace shows `KernelDriverCOREEBPF::Setup` -> `sinsp_exception` -> `abort` + +**How to find the verifier error in collector.log**: + +```bash +# Find the failing program and verifier output +grep -n "BPF program load failed\|failed to load\|BEGIN PROG LOAD LOG\|END PROG LOAD LOG" collector.log + +# Get the actual rejection reason (usually the last line before END PROG LOAD LOG) +grep -B5 "END PROG LOAD LOG" collector.log +``` + +The verifier log is between `BEGIN PROG LOAD LOG` and `END PROG LOAD LOG`. It can be thousands of lines of BPF instruction trace. The **rejection reason is always the last line before `END PROG LOAD LOG`**. + +Common verifier rejection messages: +- `R2 min value is negative, either use unsigned or 'var &= const'` — signed value used as size arg to bpf helper +- `BPF program is too large. Processed 1000001 insn` — exceeded 1M instruction verifier limit +- `R0 invalid mem access 'map_value_or_null'` — null check optimized away by clang +- `reg type unsupported for arg#0` — BTF type mismatch (often a warning, not the real error — check end of verifier log) + +**After the verifier log**, look for the cascade: + +``` +libbpf: prog '': failed to load: -13 # -13 = EACCES (Permission denied) +libbpf: prog '': failed to load: -7 # -7 = E2BIG (program too large) +libbpf: failed to load object 'bpf_probe' # Whole BPF skeleton fails +libpman: failed to load BPF object # libpman reports failure +terminate called after throwing 'sinsp_exception' # C++ exception + what(): Initialization issues during scap_init +``` + +### 2. Self-Check Health Timeout (Collector Runs But Not Healthy) + +**Symptoms**: +- Collector starts and loads BPF programs successfully +- `Failed to detect any self-check process events within the timeout.` +- `Failed to detect any self-check networking events within the timeout.` +- Test framework times out: `Timed out waiting for container collector to become health=healthy` + +**What to look for in collector.log**: + +```bash +grep -n "SelfCheck\|self-check\|Failed to detect\|healthy" collector.log +``` + +This means the BPF programs loaded but aren't capturing events correctly. Check for: +- Tracepoint attachment failures: `failed to create tracepoint 'syscalls/sys_enter_connect'` +- Missing programs: `unable to find BPF program ''` +- Container ID issues: `unable to initialize the state table API: failed to find dynamic field 'container_id'` + +### 3. Test Logic Failures (Collector Healthy, Test Assertions Fail) + +**Symptoms**: +- Most tests pass, individual test fails +- Collector is healthy and running +- Test output shows assertion mismatches + +**Where to look**: +- The specific test's `collector.log` for event processing +- `events.log` for the raw event stream +- For UDP tests: check `udp-client.log` and `udp-server.log` in the test subdirectory +- JUnit XML for the error message + +### 4. Startup/Infrastructure Failures + +**Symptoms**: +- `fatal: []: FAILED!` in the GitHub Actions log (Ansible failure) +- No collector.log at all for a test +- Image pull failures + +**Where to look**: +- The Ansible runner log: `integration-test-.log` in the artifact +- The GitHub Actions log: `gh run view --log --job ` + +## Platform-Specific Notes + +### RHEL 8 (kernel 4.18) / s390x / ppc64le +- **Oldest and strictest BPF verifier** — most likely to hit verifier rejections +- RHEL 8 uses kernel 4.18 which has limited BPF type tracking +- s390x and ppc64le also use 4.18-based kernels +- These platforms fail first, so their verifier errors are the canonical ones to fix + +### RHEL SAP (kernel 5.14) +- Same base kernel as RHEL 9 but **different kernel config** (SAP-tuned) +- Has hit verifier instruction limit (1M insns) when RHEL 9 passes +- `reg type unsupported for arg#0` is often a warning, not the real error — check end of verifier log for `BPF program is too large` + +### COS / Google Container-Optimized OS (kernel 6.6) +- **Clang-compiled kernel** — different BTF attributes than GCC-compiled kernels +- RCU pointer annotations cause different verifier behavior +- Has rejected programs that pass on same-version GCC-compiled kernels + +### ARM64 platforms (ubuntu-arm, rhcos-arm64, cos-arm64, fcarm) +- No ia32 compat syscalls — `ia32_*` programs are correctly disabled +- `sys_enter_connect` tracepoint may not exist — expected, handled gracefully +- Self-check timeouts can be timing-related on slower ARM VMs +- cos-arm64 and fcarm tend to pass when ubuntu-arm and rhcos-arm64 fail — may be Docker vs Podman timing differences + +### Ubuntu (ubuntu-os) +- Runs on **both Ubuntu 22.04 and 24.04** VMs +- The artifact contains logs from multiple VMs (check the subdirectory names) +- Ubuntu 22.04 (kernel 6.8) is stricter than 24.04 (kernel 6.17) + +### Flatcar / Fedora CoreOS +- Generally the most permissive — if these fail, something is fundamentally broken + +## Common Non-Fatal Log Messages + +These appear on all platforms and are expected/harmless: + +``` +# Container plugin not loaded (by design — collector uses cgroup extraction) +unable to initialize the state table API: failed to find dynamic field 'container_id' in threadinfo + +# Enter events removed in modern BPF (by design) +failed to determine tracepoint 'syscalls/sys_enter_connect' perf event ID: No such file or directory + +# TLS not configured (expected in integration tests) +Partial TLS config: CACertPath="", ClientCertPath="", ClientKeyPath=""; will not use TLS + +# Container filter uses proc.vpid not container.id (by design) +Could not set container filter: proc.vpid is not a valid number + +# Programs excluded from build via MODERN_BPF_EXCLUDE_PROGS +unable to find BPF program '' +``` + +## Quick Investigation Workflow + +1. **Identify failing platforms**: Check PR status checks +2. **Download artifacts**: For each failing platform, download the `-logs` artifact +3. **Check JUnit XML first**: `tests="4"` = crash, higher number = specific test failures +4. **Read collector.log**: For crashes, search for `failed to load` and read the verifier log above it. For test failures, read the specific test's collector.log +5. **Check kernel version**: First lines of collector.log show OS and kernel version +6. **Cross-reference platforms**: If RHEL 9 passes but RHEL SAP fails, it's likely a verifier limit issue. If all arm64 fail, check self-check timing. If everything fails, check BPF program structure +7. **Compare with master**: Download master's artifacts for the same platform to confirm regression + +## Build Exclusion Mechanism + +Collector can exclude BPF programs from compilation via CMake: + +```cmake +# collector/CMakeLists.txt +set(MODERN_BPF_EXCLUDE_PROGS "^(openat2|ppoll|...)$" CACHE STRING "..." FORCE) +``` + +The regex matches against BPF source file stems (e.g., `pread64` matches `pread64.bpf.c`). Excluded programs are not compiled into the skeleton. The loader in `maps.c:add_bpf_program_to_tail_table()` handles missing programs gracefully (logs debug message, returns success). + +Only exclude programs for syscalls that collector does not subscribe to. Collector's syscall list is in `collector/lib/CollectorConfig.h` (`kSyscalls[]` and `kSendRecvSyscalls[]`). + +## Cleanup + +Once the analysis is complete and you have reported your findings, delete all downloaded log artifacts (zip files and extracted directories) from `/tmp/`: + +```bash +rm -rf /tmp/*-logs /tmp/*-logs.zip +``` + +This prevents stale logs from accumulating across investigations. diff --git a/.claude/commands/update-falco-libs.md b/.claude/commands/update-falco-libs.md new file mode 100644 index 0000000000..77d0e11865 --- /dev/null +++ b/.claude/commands/update-falco-libs.md @@ -0,0 +1,429 @@ +# Update Falcosecurity-Libs Fork + +You are helping update the falcosecurity-libs fork used by the StackRox collector. + +## Repository Context + +- **Collector repo**: The current working directory +- **Fork submodule**: `falcosecurity-libs/` — StackRox's fork of `https://github.com/falcosecurity/libs` +- **Fork repo**: `https://github.com/stackrox/falcosecurity-libs` +- **Upstream remote** (in submodule): `falco` → `git@github.com:falcosecurity/libs` +- **Origin remote** (in submodule): `origin` → `git@github.com:stackrox/falcosecurity-libs.git` +- **Branch naming**: `X.Y.Z-stackrox` branches carry StackRox patches on top of upstream tags +- **Update docs**: `docs/falco-update.md` + +## Step 1: Assess Current State + +Run the following in the `falcosecurity-libs/` submodule: + +1. `git describe --tags HEAD` — find current upstream base version +2. `git log --oneline ..HEAD --no-merges` — list all StackRox patches +3. `git fetch falco --tags` — get latest upstream tags +4. `git tag -l '0.*' | sort -V | tail -10` — find latest upstream releases +5. `git branch -a | grep stackrox` — find existing StackRox branches +6. Count upstream commits: `git log --oneline .. | wc -l` +7. Find StackRox-only patches: `git log --oneline HEAD --not --remotes=falco` + +Report: current version, target version, number of StackRox patches, number of upstream commits. + +## Step 2: Analyze StackRox Patches + +For each StackRox patch, determine if it has been upstreamed: + +```sh +# For each patch commit, search upstream for equivalent +git log --oneline .. --grep="" +``` + +Categorize each patch as: +- **Upstreamed** — will be dropped automatically during rebase +- **Still needed** — must be carried forward +- **Conflict risk** — touches files heavily modified upstream + +### Current StackRox Patches (as of 0.23.1-stackrox-rc1) + +20 patches in these categories: + +**BPF verifier fixes** (keep — not upstreamed): +- `2291f61ec` — clang > 19 verifier fixes (MAX_IOVCNT, volatile len_to_read, pragma unroll) +- `8672099d6` — RHEL SAP verifier fix (const struct cred *) +- `df93a9e42` — COS verifier fix (RCU pointer chain reads) +- `d1a708bde` — explicit return in auxmap submit + +**ppc64le platform support** (keep): +- `255126d47` — ppc64le vmlinux.h (large, BTF-generated) +- `a9cafe949` — ppc64le syscall compat header +- `452679e2b` — IOC_PAGE_SHIFT fix +- `dd5e86d40` / `bb733f64a` — thread_info guards (iterative, consider squashing) + +**Performance optimizations** (keep): +- `a982809e0` — cgroup subsys filtering (`INTERESTING_SUBSYS` compile flag) +- `8dd26e3dc` — socket-only FD scan (`SCAP_SOCKET_ONLY_FD` compile flag) + +**API/build adaptations** (keep): +- `32f36f770` — expose `extract_single` in filterchecks (public API) +- `b0ec4099f` — libelf suffix guard + initial filtercheck extract +- `34d863440` — sinsp include directory fix +- `a915789ec` / `16edb6bb1` — CMake/include fixes for logging integration +- `5338014a7` — disable log timestamps API + +**Workarounds** (keep but monitor): +- `8ba291e78` — disable trusted exepath (see "Exepath" section below) +- `88d5093f4` — ASSERT_TO_LOG via falcosecurity_log_fn callback + +**BPF verifier null-check optimization** (keep — not upstreamed): +- BPF verifier fix for `sys_exit` program: refactored `sampling_logic_exit()` and `sys_exit()` in `syscall_exit.bpf.c` to use a single `maps__get_capture_settings()` lookup instead of multiple inlined calls that clang optimizes into null-unsafe code. Without this, the BPF probe fails to load on kernels < 6.17 (RHEL 9, Ubuntu 22.04, COS, etc.) + +**Network signal handler fix** (keep — collector-side): +- Skip `is_socket_failed()`/`is_socket_pending()` checks for send/recv events in `NetworkSignalHandler.cpp`. The sinsp parser marks sockets as "failed" on EAGAIN but never clears the flag on subsequent success for recv operations. + +**Rebase fixups** (always regenerated): +- `d0fb1702c` — fixes following rebase (CMake cycle, exepath fallback, assert macro) + +### Upstream Candidates + +These patches are generic enough to propose upstream: +- **Strong**: clang verifier fixes (2291f61ec, 8672099d6, df93a9e42), disable log timestamps (5338014a7) +- **With discussion**: cgroup filtering (a982809e0), socket-only FD scan (8dd26e3dc), log asserts (88d5093f4) — upstream may prefer runtime flags over compile-time +- **ppc64le bundle**: propose together if upstream is interested in the architecture + +## Step 3: Identify Breaking API Changes + +Check what APIs changed between versions. Key areas to inspect: + +```sh +# Thread cgroup / container-related changes (collector uses cgroup extraction, not the container plugin) +git log --oneline .. -- userspace/libsinsp/threadinfo.h +git log --oneline .. --grep="cgroup\|container" + +# Thread manager changes +git log --oneline .. -- userspace/libsinsp/threadinfo.h userspace/libsinsp/thread_manager.h + +# sinsp API changes +git diff .. -- userspace/libsinsp/sinsp.h | grep -E '^\+|^\-' | head -80 + +# Event format changes (parameter additions/removals) +git diff .. -- driver/event_table.c + +# Enter event deprecation (EF_OLD_VERSION flags) +git log --oneline .. --grep="OLD_VERSION\|enter event\|enter_event" + +# Breaking changes +git log --oneline .. --grep="BREAKING\|breaking\|!:" +``` + +Then grep the collector code for uses of changed/removed APIs: + +```sh +grep -rn '' collector/lib/ collector/test/ --include='*.cpp' --include='*.h' +``` + +Key collector integration points to check: +- `collector/lib/system-inspector/Service.cpp` — sinsp initialization, filter setup (`proc.pid != proc.vpid`) +- `collector/lib/system-inspector/EventExtractor.h` — threadinfo field access macros (TINFO_FIELD, FIELD_RAW, FIELD_RAW_SAFE) +- `collector/lib/ProcessSignalFormatter.cpp` — process signal creation, exepath access, container_id, lineage traversal +- `collector/lib/NetworkSignalHandler.cpp` — container_id access via `GetContainerID(evt)` +- `collector/lib/Process.cpp` — process info access, container_id via cgroup extraction +- `collector/lib/Utility.cpp` — `GetContainerID()`, `ExtractContainerIDFromCgroup()`, threadinfo printing +- `collector/test/ProcessSignalFormatterTest.cpp` — thread creation, thread_manager usage +- `collector/test/SystemInspectorServiceTest.cpp` — service initialization +- `collector/CMakeLists.txt` — falco build flags + +## Step 4: Plan Staging Strategy + +If the gap is large (>200 commits), identify intermediate stopping points: + +1. Look for version boundaries where major API changes happen +2. Prefer stopping at versions where container/thread APIs change +3. Each stage should be independently buildable and testable + +Known historical API breakpoints (update as upstream evolves): +- **0.20.0**: `set_import_users` lost second arg, user/group structs on threadinfo replaced with `m_uid`/`m_gid` +- **0.21.0**: Container engine subsystem removed entirely. `m_container_id` removed from threadinfo (collector uses cgroup extraction instead of upstream's container plugin). `m_thread_manager` changed to `shared_ptr`. `build_threadinfo()`/`add_thread()` removed from sinsp. Enter events for many syscalls deprecated (`EF_OLD_VERSION`). +- **0.22.0**: `get_thread_ref` removed from sinsp (use `find_thread`). `get_container_id()` removed from threadinfo. `extract_single` API changed in filterchecks. +- **0.23.0+**: Parent thread traversal moved to thread_manager. `get_thread_info(bool)` signature changed to `get_thread_info()` (no bool). `m_user`/`m_group` structs removed (use `m_uid`/`m_gid` directly). + +## Step 5: Execute Rebase (per stage) + +```sh +cd falcosecurity-libs +git fetch falco +git switch upstream-main && git merge --ff-only falco/master && git push origin upstream-main --tags +git switch +git switch -c -stackrox +git rebase +# Resolve conflicts using categorization from Step 2 +# For each conflict: check if patch is still needed, compare against upstream equivalent +git push -u origin -stackrox +``` + +Always rebase onto upstream **tags** (not master tip) per `docs/falco-update.md`. + +## Step 6: Update Collector Code + +After each rebase stage, update collector code for API changes found in Step 3. + +### Common patterns of change + +**Container ID access** (from 0.21.0+): +- Container plugin (`libcontainer.so`) is NOT used — collector extracts container IDs directly from thread cgroups +- `GetContainerID(sinsp_threadinfo&)` iterates `tinfo.cgroups()` and calls `ExtractContainerIDFromCgroup()` (Utility.cpp) +- `GetContainerID(sinsp_evt*)` extracts from event's thread info cgroups +- sinsp filter uses `proc.pid != proc.vpid` (built-in field) instead of `container.id != 'host'` (plugin field) +- No plugin loading, no `libcontainer.so`, no Go worker dependency +- `ContainerMetadata` class was removed — namespace/label lookup is not available without the plugin +- `ContainerInfoInspector` endpoint (`/state/containers/:id`) still exists but always returns empty namespace + +**Thread access** (from 0.22.0+): +- Replace `get_thread_ref(tid, true)` with `m_thread_manager->find_thread(tid, false)` or `m_thread_manager->get_thread(tid, false)` +- `get_thread_info(true)` → `get_thread_info()` (no bool parameter) + +**User/group** (from 0.20.0+): +- Replace `m_user.uid()` / `m_group.gid()` with `m_uid` / `m_gid` + +**Thread creation in tests**: +- Replace `build_threadinfo()` with `inspector->get_threadinfo_factory().create()` +- Replace `add_thread()` with `inspector->m_thread_manager->add_thread(std::move(tinfo), false)` + +**Lineage traversal** (from 0.23.0+): +- Replace `mt->traverse_parent_state(visitor)` with `inspector_->m_thread_manager->traverse_parent_state(*mt, visitor)` +- Visitor type: `sinsp_thread_manager::visitor_func_t` instead of `sinsp_threadinfo::visitor_func_t` + +**FilterCheck API** (from 0.22.0+): +- `extract_single(event, &len)` → `extract(event, vals)` vector-based API +- Add null guards for `filter_check` pointers (plugin-provided checks may not be initialized) + +**UDP test adjustments** (from 0.23.0+): +- UDP tests need 30-second timeouts (vs 5-10s for TCP) due to BPF event delivery pipeline latency +- `TestMultipleDestinations`: sendmmsg message count × server count must not exceed `MAX_SENDMMSG_RECVMMSG_SIZE` (16) +- File: `integration-tests/suites/udp_networkflow.go` + +## Step 7: Known Gotchas + +### Exepath Resolution (CRITICAL) + +Modern drivers (0.21.0+) **no longer send execve enter events** (marked `EF_OLD_VERSION`). The exepath is supposed to come from the `trusted_exepath` parameter (param 28) in the exit event, which uses the kernel's `d_path()`. + +However, the StackRox fork **disables trusted_exepath** (`USE_TRUSTED_EXEPATH=false`) because it resolves symlinks — giving `/bin/busybox` instead of `/bin/ls` in busybox containers, breaking ACS policies. + +**Without either source, `m_exepath` inherits the parent's value on clone** (e.g., `/usr/bin/podman`), causing all container processes to show the container runtime's path. + +**Fix**: Add a fallback in `parse_execve_exit` (parsers.cpp) that uses **Parameter 31** (`filename`, which is `bprm->filename`) from the exit event. This contains the first argument to execve as provided by the caller — same behavior as the old enter event reconstruction: + +```cpp +// After the retrieve_enter_event() block, add: +if(!exepath_set) { + /* Parameter 31: filename (type: PT_FSPATH) */ + if(const auto filename_param = evt.get_param(30); !filename_param->empty()) { + std::string_view filename = filename_param->as(); + if(filename != "") { + std::string fullpath = sinsp_utils::concatenate_paths( + evt.get_tinfo()->get_cwd(), filename); + evt.get_tinfo()->set_exepath(std::move(fullpath)); + } + } +} +``` + +**How to detect this bug**: Integration test `TestProcessViz` fails with all processes showing the container runtime binary (e.g., `/usr/bin/podman`) as their ExePath. + +**Key event parameters** (PPME_SYSCALL_EXECVE_19_X, 0-indexed): +- 1: exe (argv[0]), 6: cwd, 13: comm (always correct) +- 27: trusted_exepath (kernel d_path, resolves symlinks — disabled) +- 30: filename (bprm->filename, first arg to execve — use this) + +### CMake Dependency Cycle + +Upstream has a cyclic dependency: `events_dimensions_generator → scap_event_schema → scap → pman → ProbeSkeleton → EventsDimensions → generator`. Upstream doesn't hit it because their CI uses CMake 3.22; our builder uses 3.31+ which enforces cycle detection. + +**Fix**: Compile the 3 required driver source files (`event_table.c`, `flags_table.c`, `dynamic_params_table.c`) directly into the generator instead of linking `scap_event_schema`. This fix lives in `driver/modern_bpf/CMakeLists.txt` and must be carried forward each rebase. + +### ASSERT_TO_LOG Circular Dependency + +Collector compiles with `-DASSERT_TO_LOG` so assertions log instead of aborting. The old approach using `libsinsp_logger()` causes circular includes because `logger.h` includes `sinsp_public.h`. + +**Fix**: Use `falcosecurity_log_fn` callback from `scap_log.h` (same pattern as `scap_assert.h`). This is a tiny header with no dependencies. The callback is set by sinsp when it opens the scap handle. + +### Container Plugin Not Used + +The upstream container plugin (`libcontainer.so`) is NOT used by collector. Container IDs are extracted directly from thread cgroups via `ExtractContainerIDFromCgroup()` in `Utility.cpp`. This avoids the Go worker dependency, CGO bridge, container runtime dependency, startup race conditions, and silent event-dropping failure modes of the plugin. The sinsp filter uses `proc.pid != proc.vpid` (built-in) instead of `container.id != 'host'` (plugin-provided). If a future falcosecurity-libs update changes cgroup format or thread API, update `ExtractContainerIDFromCgroup()` and `GetContainerID()` in Utility.cpp. + +### BPF Verifier Compatibility + +BPF verifier behavior varies significantly across: +- **Kernel versions**: Older kernels have stricter limits (see kernel matrix below) +- **Clang versions**: clang > 19 can produce code that exceeds instruction counts +- **Platform kernels**: RHEL SAP, Google COS have custom verifiers or clang-compiled kernels with different BTF attributes + +**The most insidious class of bug**: Clang inlines `__always_inline` BPF helper functions and optimizes away null checks that the BPF verifier requires. This happens when: +1. Multiple inlined functions each call `bpf_map_lookup_elem()` on the same map +2. The compiler deduces from the first successful lookup that subsequent lookups can't return NULL +3. It removes the null check, but the verifier tracks each lookup independently as `map_value_or_null` +4. Result: `R0 invalid mem access 'map_value_or_null'` on older kernels + +**Example** (found in 0.23.1): `syscall_exit.bpf.c:sampling_logic_exit()` called `maps__get_dropping_mode()` then `maps__get_sampling_ratio()` — both inlined functions that do `bpf_map_lookup_elem(&capture_settings, &key)`. Clang kept the null check for the first but dropped it for the second. Fix: do a single `maps__get_capture_settings()` call and access fields directly. + +**Fix applied**: Refactored `sys_exit` BPF program to do one `maps__get_capture_settings()` lookup in the caller, pass the pointer to `sampling_logic_exit()`, and reuse it for `drop_failed` check. No redundant map lookups = no optimized-away null checks. + +Common fix patterns: +- **Single lookup + direct field access**: Call the map lookup once, pass the pointer, access fields directly (preferred) +- **`volatile` qualifier**: Mark map lookup result as `volatile` to prevent optimization +- **Compiler barriers**: `asm volatile("")` after null check +- **Reduce loop bounds**: e.g., `MAX_IOVCNT` 32 → 16 +- **`#pragma unroll`**: For loops the verifier can't bound +- **Break pointer chains**: Read through intermediate variables with null checks (e.g., `task->cred` on COS where kernel is clang-compiled with RCU attributes) +- **`const` qualifiers**: On credential struct pointers + +### CI Kernel Compatibility Matrix + +The BPF probe must load on all CI platforms. After each update, verify against this matrix: + +| Platform | Kernel | Notes | +|---|---|---| +| Fedora CoreOS | 6.18+ | Newest kernel, most permissive verifier | +| Ubuntu 24.04 | 6.17+ | GCP VM, works with modern BPF | +| Ubuntu 22.04 | 6.8 | GCP VM, stricter verifier — **common failure point** | +| COS stable | 6.6 | Google kernel, clang-compiled — RCU/BTF differences | +| RHEL 9 | 5.14 | Oldest supported kernel — **most restrictive verifier** | +| RHEL SAP | 5.14 | Same kernel as RHEL 9 but different config | +| Flatcar | varies | Container Linux | +| ARM64 variants | varies | rhcos-arm64, cos-arm64, ubuntu-arm, fcarm | +| s390x | varies | rhel-s390x | +| ppc64le | varies | rhel-ppc64le | + +**ubuntu-os** CI job runs on BOTH Ubuntu 22.04 AND 24.04 VMs. A failure on either fails the whole job. + +**How to diagnose BPF loading failures from CI**: +1. Download the logs artifact (e.g., `ubuntu-os-logs`) from the GitHub Actions run +2. Find `collector.log` under `container-logs//core-bpf//` +3. Search for `failed to load` — the line before it shows the verifier error +4. The verifier log shows exact instruction and register state at the point of rejection +5. Compare against master's CI run to confirm it's a regression + +### Network Signal Handler: UDP send/recv Socket State (CRITICAL) + +**Problem**: `sinsp::parse_rw_exit()` marks socket fd as "failed" (`set_socket_failed()`) when ANY send/recv syscall returns negative (e.g., EAGAIN from timeout). Unlike `connect()`, the success path for send/recv does NOT call `set_socket_connected()` to clear the flag. Result: once a UDP socket gets a single EAGAIN (common with `SO_RCVTIMEO`), all subsequent events on that fd are rejected by `GetConnection()`. + +**Fix applied** in `collector/lib/NetworkSignalHandler.cpp`: Skip `is_socket_failed()` / `is_socket_pending()` checks for send/recv events (identified by `strncmp(evt_name, "send", 4)` or `strncmp(evt_name, "recv", 4)`). These checks are only relevant for TCP connection establishment (connect/accept/getsockopt). + +**How to detect**: UDP network flow tests fail — connections from containers using `recvfrom`/`recvmsg`/`recvmmsg` with `SO_RCVTIMEO` are never reported. The server's receive call times out → EAGAIN → fd marked failed → all subsequent successful receives ignored. + +## Step 8: Validate Each Stage + +### Build Commands + +```bash +# Build collector image (from repo root, NOT from collector/ subdirectory) +make image + +# Run unit tests +make unittest + +# Run specific integration test (from integration-tests/ directory) +cd integration-tests +DOCKER_HOST=unix:///run/podman/podman.sock COLLECTOR_LOG_LEVEL=debug make TestProcessNetwork +DOCKER_HOST=unix:///run/podman/podman.sock COLLECTOR_LOG_LEVEL=debug make TestUdpNetworkFlow +``` + +**Important**: Use `make image` from the repo root. Do NOT use `make -C collector image` — there is no `image` target in the collector subdirectory Makefile. + +### Validation Checklist + +Run this checklist after each stage: + +- [ ] `falcosecurity-libs` builds via cmake `add_subdirectory` +- [ ] Each surviving patch verified: diff against original to ensure no content loss +- [ ] `make image` succeeds on amd64 (builds collector binary + container image) +- [ ] `make unittest` passes (all test suites, especially ProcessSignalFormatterTest) +- [ ] Integration tests pass (see key tests below) +- [ ] Multi-arch compilation: arm64, ppc64le, s390x +- [ ] Container ID attribution works via cgroup extraction (not all showing empty or host) +- [ ] Process exepaths are correct (not showing container runtime binary like `/usr/bin/podman`) +- [ ] Network signal handler receives correct container IDs +- [ ] Runtime self-checks pass +- [ ] BPF probe loads on older kernels (check CI results for RHEL 9, Ubuntu 22.04, COS) + +### Key Integration Tests + +- **TestProcessNetwork** (TestProcessViz + TestNetworkFlows + TestProcessLineageInfo): + - Verifies process ExePath, Name, Args for container processes + - Catches exepath regression (all paths show container runtime) + - Verifies network connections attributed to correct containers + - Verifies parent process lineage chains stop at container boundaries +- **TestUdpNetworkFlow**: Verifies UDP connection tracking across all send/recv syscall combinations: + - Tests: sendto, sendmsg, sendmmsg × recvfrom, recvmsg, recvmmsg (9 combinations) + - TestMultipleDestinations: one client → multiple servers (watch `MAX_SENDMMSG_RECVMMSG_SIZE=16`) + - TestMultipleSources: multiple clients → one server + - Uses 30-second timeouts (UDP BPF event pipeline is slower than TCP) + - **If `recvfrom` tests fail but `recvmsg` passes**: check `is_socket_failed()` handling in NetworkSignalHandler +- **TestConnectionsAndEndpointsUDPNormal**: UDP endpoint detection without send/recv tracking +- **TestCollectorStartup**: Basic smoke test — catches BPF loading failures immediately + +### Diagnosing CI Failures + +1. Check if the failure is a **BPF loading crash** (exit code 139, `scap_init` error) vs a **test logic failure** +2. Compare against master's CI run — if master passes on the same platform, it's a regression +3. Download log artifacts: `gh api repos/stackrox/collector/actions/artifacts//zip > logs.zip` +4. The `collector.log` file in the artifact contains full libbpf output including verifier errors +5. The test framework only shows the last few lines of collector logs in the CI output — always check the full artifact + +## Step 9: Final Update + +```sh +cd +cd falcosecurity-libs && git checkout -stackrox +cd .. && git add falcosecurity-libs +``` + +Update `docs/falco-update.md` with: +- Version transition (e.g., "0.23.1 → 0.25.0") +- Any new upstream API changes requiring collector-side fixes +- New StackRox patches added, patches dropped (upstreamed) +- Known issues or workarounds + +## PR Strategy + +Each stage should produce **two PRs**: +1. **Fork PR** targeting `upstream-main` in `stackrox/falcosecurity-libs` (the rebased branch) +2. **Collector PR** updating the submodule pointer and making collector-side code changes + +## Quick Reference: Event Architecture + +### How Process Events Flow + +1. **Kernel BPF** captures syscall events → writes to ring buffer +2. **libscap** reads ring buffer → produces `scap_evt` structs +3. **libsinsp parsers** (`parsers.cpp`) process events: + - `reset()`: looks up/creates thread info, validates enter/exit event matching + - `parse_clone_exit_caller/child()`: creates child thread info, inherits parent fields + - `parse_execve_exit()`: updates thread info with new process details +4. **Collector** (`ProcessSignalFormatter`) reads thread info fields via `EventExtractor` + +### Key Thread Info Fields + +| Field | Source | Notes | +|-------|--------|-------| +| `m_comm` | Exit event param 13 | Always correct (kernel task_struct->comm) | +| `m_exe` | Exit event param 1 | argv[0], may be relative | +| `m_exepath` | Enter event reconstruction OR param 27/30 | See "Exepath Resolution" gotcha | +| `m_pid` | Exit event param 4 | | +| `m_uid`/`m_gid` | Exit event param 26/29 | Was `m_user.uid()`/`m_group.gid()` before 0.20.0 | +| container_id | Extracted from thread cgroups via `GetContainerID()` | Was `m_container_id` before 0.21.0; plugin not used | + +### Enter Event Deprecation + +Upstream removed enter events to reduce ~50% of kernel/userspace overhead (proposal: `proposals/20240901-disable-support-for-syscall-enter-events.md`). All parameters moved to exit events. A scap converter handles old capture files. Any code depending on `retrieve_enter_event()` will silently fail with modern drivers — check for fallbacks using exit event parameters. + +## Step 10: Update This Skill + +**This step is mandatory.** After completing an update, review and update this skill file (`.claude/commands/update-falco-libs.md`) with anything learned during the process: + +- **New API breakpoints**: Add entries to "Known historical API breakpoints" (Step 4) for any new breaking changes encountered +- **New StackRox patches**: Update the "Current StackRox Patches" list (Step 2) — add new patches, remove ones that were upstreamed +- **New gotchas**: Add to "Step 7: Known Gotchas" if you discovered new pitfalls (BPF verifier issues, parser bugs, build problems) +- **Outdated steps**: Remove or correct any steps that no longer apply (e.g., if an API listed as "changed in 0.22.0" is now the only way and doesn't need a migration note) +- **CI matrix updates**: Update the kernel compatibility matrix if CI platforms changed (new VM images, new kernel versions, platforms added/removed) +- **Fix patterns**: Add new "Common patterns of change" (Step 6) for any collector-side adaptations that future updates will likely need +- **Build/test changes**: Update build commands or test expectations if they changed + +The goal is that the next person (or AI) performing an update has all the context from previous updates available, without needing to rediscover issues that were already solved. diff --git a/.gitignore b/.gitignore index 25842a918b..7796894073 100644 --- a/.gitignore +++ b/.gitignore @@ -37,3 +37,7 @@ ansible/ci/inventory_ibmcloud.yml # vcpkg vcpkg_installed/ vcpkg-manifest-install.log + +# Dolt database files (added by bd init) +.dolt/ +*.db diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000000..c951c0757c --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,150 @@ +# Agent Instructions + +This project uses **bd** (beads) for issue tracking. Run `bd onboard` to get started. + +## Quick Reference + +```bash +bd ready # Find available work +bd show # View issue details +bd update --claim # Claim work atomically +bd close # Complete work +bd sync # Sync with git +``` + +## Non-Interactive Shell Commands + +**ALWAYS use non-interactive flags** with file operations to avoid hanging on confirmation prompts. + +Shell commands like `cp`, `mv`, and `rm` may be aliased to include `-i` (interactive) mode on some systems, causing the agent to hang indefinitely waiting for y/n input. + +**Use these forms instead:** +```bash +# Force overwrite without prompting +cp -f source dest # NOT: cp source dest +mv -f source dest # NOT: mv source dest +rm -f file # NOT: rm file + +# For recursive operations +rm -rf directory # NOT: rm -r directory +cp -rf source dest # NOT: cp -r source dest +``` + +**Other commands that may prompt:** +- `scp` - use `-o BatchMode=yes` for non-interactive +- `ssh` - use `-o BatchMode=yes` to fail instead of prompting +- `apt-get` - use `-y` flag +- `brew` - use `HOMEBREW_NO_AUTO_UPDATE=1` env var + + +## Issue Tracking with bd (beads) + +**IMPORTANT**: This project uses **bd (beads)** for ALL issue tracking. Do NOT use markdown TODOs, task lists, or other tracking methods. + +### Why bd? + +- Dependency-aware: Track blockers and relationships between issues +- Version-controlled: Built on Dolt with cell-level merge +- Agent-optimized: JSON output, ready work detection, discovered-from links +- Prevents duplicate tracking systems and confusion + +### Quick Start + +**Check for ready work:** + +```bash +bd ready --json +``` + +**Create new issues:** + +```bash +bd create "Issue title" --description="Detailed context" -t bug|feature|task -p 0-4 --json +bd create "Issue title" --description="What this issue is about" -p 1 --deps discovered-from:bd-123 --json +``` + +**Claim and update:** + +```bash +bd update --claim --json +bd update bd-42 --priority 1 --json +``` + +**Complete work:** + +```bash +bd close bd-42 --reason "Completed" --json +``` + +### Issue Types + +- `bug` - Something broken +- `feature` - New functionality +- `task` - Work item (tests, docs, refactoring) +- `epic` - Large feature with subtasks +- `chore` - Maintenance (dependencies, tooling) + +### Priorities + +- `0` - Critical (security, data loss, broken builds) +- `1` - High (major features, important bugs) +- `2` - Medium (default, nice-to-have) +- `3` - Low (polish, optimization) +- `4` - Backlog (future ideas) + +### Workflow for AI Agents + +1. **Check ready work**: `bd ready` shows unblocked issues +2. **Claim your task atomically**: `bd update --claim` +3. **Work on it**: Implement, test, document +4. **Discover new work?** Create linked issue: + - `bd create "Found bug" --description="Details about what was found" -p 1 --deps discovered-from:` +5. **Complete**: `bd close --reason "Done"` + +### Auto-Sync + +bd automatically syncs with git: + +- Exports to `.beads/issues.jsonl` after changes (5s debounce) +- Imports from JSONL when newer (e.g., after `git pull`) +- No manual export/import needed! + +### Important Rules + +- ✅ Use bd for ALL task tracking +- ✅ Always use `--json` flag for programmatic use +- ✅ Link discovered work with `discovered-from` dependencies +- ✅ Check `bd ready` before asking "what should I work on?" +- ❌ Do NOT create markdown TODO lists +- ❌ Do NOT use external issue trackers +- ❌ Do NOT duplicate tracking systems + +For more details, see README.md and docs/QUICKSTART.md. + +## Landing the Plane (Session Completion) + +**When ending a work session**, you MUST complete ALL steps below. Work is NOT complete until `git push` succeeds. + +**MANDATORY WORKFLOW:** + +1. **File issues for remaining work** - Create issues for anything that needs follow-up +2. **Run quality gates** (if code changed) - Tests, linters, builds +3. **Update issue status** - Close finished work, update in-progress items +4. **PUSH TO REMOTE** - This is MANDATORY: + ```bash + git pull --rebase + bd sync + git push + git status # MUST show "up to date with origin" + ``` +5. **Clean up** - Clear stashes, prune remote branches +6. **Verify** - All changes committed AND pushed +7. **Hand off** - Provide context for next session + +**CRITICAL RULES:** +- Work is NOT complete until `git push` succeeds +- NEVER stop before pushing - that leaves work stranded locally +- NEVER say "ready to push when you are" - YOU must push +- If push fails, resolve and retry until it succeeds + + diff --git a/collector/CMakeLists.txt b/collector/CMakeLists.txt index 2d0a6a2152..4b1ec9a2e1 100644 --- a/collector/CMakeLists.txt +++ b/collector/CMakeLists.txt @@ -87,7 +87,6 @@ set(USE_BUNDLED_DEPS OFF CACHE BOOL "Enable bundled dependencies instead of usin set(USE_BUNDLED_CARES OFF CACHE BOOL "Enable bundled dependencies instead of using the system ones" FORCE) set(BUILD_LIBSCAP_GVISOR OFF CACHE BOOL "Do not build gVisor support" FORCE) set(MINIMAL_BUILD OFF CACHE BOOL "Minimal" FORCE) -set(SINSP_SLIM_THREADINFO ON CACHE BOOL "Slim threadinfo" FORCE) set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build position independent libraries and executables" FORCE) set(LIBELF_LIB_SUFFIX ".so" CACHE STRING "Use libelf.so" FORCE) @@ -101,6 +100,6 @@ set(SCAP_HOST_ROOT_ENV_VAR_NAME "COLLECTOR_HOST_ROOT" CACHE STRING "Host root en set(BUILD_LIBSCAP_MODERN_BPF ON CACHE BOOL "Enable modern bpf engine" FORCE) set(MODERN_BPF_DEBUG_MODE ${BPF_DEBUG_MODE} CACHE BOOL "Enable BPF debug prints" FORCE) -set(MODERN_BPF_EXCLUDE_PROGS "^(openat2|ppoll|setsockopt|io_uring_setup|nanosleep)$" CACHE STRING "Set of syscalls to exclude from modern bpf engine " FORCE) +set(MODERN_BPF_EXCLUDE_PROGS "^(openat2|ppoll|setsockopt|io_uring_setup|nanosleep|pread64|preadv|pwritev|read|readv|writev|recv|process_vm_readv|process_vm_writev)$" CACHE STRING "Set of syscalls to exclude from modern bpf engine " FORCE) add_subdirectory(${FALCO_DIR} falco) diff --git a/collector/lib/CollectorService.cpp b/collector/lib/CollectorService.cpp index 71b6508cee..1d6243d591 100644 --- a/collector/lib/CollectorService.cpp +++ b/collector/lib/CollectorService.cpp @@ -65,7 +65,7 @@ CollectorService::CollectorService(CollectorConfig& config, std::atomic()); if (config.IsIntrospectionEnabled()) { - civet_endpoints_.emplace_back(std::make_unique(system_inspector_.GetContainerMetadataInspector())); + civet_endpoints_.emplace_back(std::make_unique()); civet_endpoints_.emplace_back(std::make_unique(conn_tracker_)); civet_endpoints_.emplace_back(std::make_unique(config_)); } diff --git a/collector/lib/ContainerEngine.h b/collector/lib/ContainerEngine.h deleted file mode 100644 index 63978528c9..0000000000 --- a/collector/lib/ContainerEngine.h +++ /dev/null @@ -1,25 +0,0 @@ -#pragma once - -#include "container_engine/container_cache_interface.h" -#include "container_engine/container_engine_base.h" -#include "threadinfo.h" - -namespace collector { -class ContainerEngine : public libsinsp::container_engine::container_engine_base { - public: - ContainerEngine(libsinsp::container_engine::container_cache_interface& cache) : libsinsp::container_engine::container_engine_base(cache) {} - - bool resolve(sinsp_threadinfo* tinfo, bool query_os_for_missing_info) override { - for (const auto& cgroup : tinfo->cgroups()) { - auto container_id = ExtractContainerIDFromCgroup(cgroup.second); - - if (container_id) { - tinfo->m_container_id = *container_id; - return true; - } - } - - return false; - } -}; -} // namespace collector diff --git a/collector/lib/ContainerInfoInspector.cpp b/collector/lib/ContainerInfoInspector.cpp index 5210dd81c8..ee61d83198 100644 --- a/collector/lib/ContainerInfoInspector.cpp +++ b/collector/lib/ContainerInfoInspector.cpp @@ -23,7 +23,7 @@ bool ContainerInfoInspector::handleGet(CivetServer* server, struct mg_connection Json::Value root; root["container_id"] = container_id; - root["namespace"] = std::string(container_metadata_inspector_->GetNamespace(container_id)); + root["namespace"] = ""; mg_printf(conn, "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nConnection: close\r\n\r\n"); mg_printf(conn, "%s\r\n", writer_.write(root).c_str()); diff --git a/collector/lib/ContainerInfoInspector.h b/collector/lib/ContainerInfoInspector.h index f4ae3b745c..aa9056045d 100644 --- a/collector/lib/ContainerInfoInspector.h +++ b/collector/lib/ContainerInfoInspector.h @@ -2,22 +2,15 @@ #include #include -#include #include -#include #include "CivetWrapper.h" -#include "ContainerMetadata.h" #include "json/writer.h" namespace collector { -using QueryParams = std::unordered_map; - class ContainerInfoInspector : public CivetWrapper { public: - ContainerInfoInspector(const std::shared_ptr& cmi) : container_metadata_inspector_(cmi) {} - // implementation of CivetHandler bool handleGet(CivetServer* server, struct mg_connection* conn) override; @@ -28,7 +21,6 @@ class ContainerInfoInspector : public CivetWrapper { private: static const std::string kBaseRoute; - std::shared_ptr container_metadata_inspector_; Json::FastWriter writer_; }; diff --git a/collector/lib/ContainerMetadata.cpp b/collector/lib/ContainerMetadata.cpp index 343e9c6a5a..a34de2d65a 100644 --- a/collector/lib/ContainerMetadata.cpp +++ b/collector/lib/ContainerMetadata.cpp @@ -2,6 +2,7 @@ #include +#include "Logging.h" #include "system-inspector/EventExtractor.h" namespace collector { @@ -11,8 +12,7 @@ ContainerMetadata::ContainerMetadata(sinsp* inspector) : event_extractor_(std::m } std::string ContainerMetadata::GetNamespace(sinsp_evt* event) { - const char* ns = event_extractor_->get_k8s_namespace(event); - return ns != nullptr ? ns : ""; + return ""; } std::string ContainerMetadata::GetNamespace(const std::string& container_id) { @@ -20,19 +20,11 @@ std::string ContainerMetadata::GetNamespace(const std::string& container_id) { } std::string ContainerMetadata::GetContainerLabel(const std::string& container_id, const std::string& label) { - auto containers = inspector_->m_container_manager.get_containers(); - const auto& container = containers->find(container_id); - if (container == containers->end()) { - return ""; - } - - const auto& labels = container->second->m_labels; - const auto& label_it = labels.find(label); - if (label_it == labels.end()) { - return ""; - } - - return label_it->second; + // Container labels are not available through the sinsp API. + CLOG_THROTTLED(DEBUG, std::chrono::seconds(300)) + << "Container label lookup by container ID is not supported: " + << "container_id=" << container_id << " label=" << label; + return ""; } } // namespace collector \ No newline at end of file diff --git a/collector/lib/NetworkSignalHandler.cpp b/collector/lib/NetworkSignalHandler.cpp index df457d5ef5..a78c29477f 100644 --- a/collector/lib/NetworkSignalHandler.cpp +++ b/collector/lib/NetworkSignalHandler.cpp @@ -6,6 +6,7 @@ #include #include "EventMap.h" +#include "Utility.h" #include "system-inspector/EventExtractor.h" namespace collector { @@ -68,6 +69,7 @@ NetworkSignalHandler::~NetworkSignalHandler() = default; * result: nil */ std::optional NetworkSignalHandler::GetConnection(sinsp_evt* evt) { + const char* evt_name = evt->get_name(); auto* fd_info = evt->get_fd_info(); if (!fd_info) { @@ -75,17 +77,22 @@ std::optional NetworkSignalHandler::GetConnection(sinsp_evt* evt) { } // With collect_connection_status_ set, we can prevent reporting of asynchronous - // connections which fail. + // connections which fail. This check is only relevant for connection + // establishment events (connect, accept, getsockopt). For send/recv events, + // a previous failed operation on the same fd can leave the socket marked as + // "failed" even when subsequent operations succeed, because the sinsp parser + // (parse_rw_exit) does not clear the failed flag on successful send/recv. if (collect_connection_status_) { - // note: connection status tracking enablement is managed in system_inspector::Service - if (fd_info->is_socket_failed()) { - // connect() failed or getsockopt(SO_ERROR) returned a failure - return std::nullopt; - } - - if (fd_info->is_socket_pending()) { - // connect() returned E_INPROGRESS - return std::nullopt; + bool is_send_recv = (strncmp(evt_name, "send", 4) == 0 || + strncmp(evt_name, "recv", 4) == 0); + if (!is_send_recv) { + if (fd_info->is_socket_failed()) { + return std::nullopt; + } + + if (fd_info->is_socket_pending()) { + return std::nullopt; + } } } @@ -133,11 +140,12 @@ std::optional NetworkSignalHandler::GetConnection(sinsp_evt* evt) { const Endpoint* local = is_server ? &server : &client; const Endpoint* remote = is_server ? &client : &server; - const std::string* container_id = event_extractor_->get_container_id(evt); - if (!container_id) { + auto container_id = GetContainerID(evt); + if (container_id.empty()) { return std::nullopt; } - return {Connection(*container_id, *local, *remote, l4proto, is_server)}; + + return {Connection(container_id, *local, *remote, l4proto, is_server)}; } SignalHandler::Result NetworkSignalHandler::HandleSignal(sinsp_evt* evt) { diff --git a/collector/lib/Process.cpp b/collector/lib/Process.cpp index 632d824a03..7054257f02 100644 --- a/collector/lib/Process.cpp +++ b/collector/lib/Process.cpp @@ -5,6 +5,7 @@ #include #include "CollectorStats.h" +#include "Utility.h" #include "system-inspector/Service.h" namespace collector { @@ -32,7 +33,11 @@ std::string Process::container_id() const { WaitForProcessInfo(); if (system_inspector_threadinfo_) { - return system_inspector_threadinfo_->m_container_id; + for (const auto& [subsys, cgroup_path] : system_inspector_threadinfo_->cgroups()) { + if (auto id = ExtractContainerIDFromCgroup(cgroup_path)) { + return std::string(*id); + } + } } return NOT_AVAILABLE; diff --git a/collector/lib/ProcessSignalFormatter.cpp b/collector/lib/ProcessSignalFormatter.cpp index a588d75bd6..6420c8da3e 100644 --- a/collector/lib/ProcessSignalFormatter.cpp +++ b/collector/lib/ProcessSignalFormatter.cpp @@ -2,6 +2,8 @@ #include +#include + #include #include "internalapi/sensor/signal_iservice.pb.h" @@ -59,8 +61,8 @@ std::string extract_proc_args(sinsp_threadinfo* tinfo) { ProcessSignalFormatter::ProcessSignalFormatter( sinsp* inspector, const CollectorConfig& config) : event_names_(EventNames::GetInstance()), + inspector_(inspector), event_extractor_(std::make_unique()), - container_metadata_(inspector), config_(config) { event_extractor_->Init(inspector); } @@ -176,8 +178,9 @@ ProcessSignal* ProcessSignalFormatter::CreateProcessSignal(sinsp_evt* event) { signal->set_allocated_time(timestamp); // set container_id - if (const std::string* container_id = event_extractor_->get_container_id(event)) { - signal->set_container_id(*container_id); + auto container_id = GetContainerID(event); + if (!container_id.empty()) { + signal->set_container_id(container_id); } // set process lineage @@ -190,7 +193,7 @@ ProcessSignal* ProcessSignalFormatter::CreateProcessSignal(sinsp_evt* event) { } CLOG(DEBUG) << "Process (" << signal->container_id() << ": " << signal->pid() << "): " - << signal->name() << "[" << container_metadata_.GetNamespace(event) << "] " + << signal->name() << " (" << signal->exec_file_path() << ")" << " " << signal->args(); @@ -232,8 +235,8 @@ ProcessSignal* ProcessSignalFormatter::CreateProcessSignal(sinsp_threadinfo* tin signal->set_pid(tinfo->m_pid); // set user and group id credentials - signal->set_uid(tinfo->m_user.uid()); - signal->set_gid(tinfo->m_group.gid()); + signal->set_uid(tinfo->m_uid); + signal->set_gid(tinfo->m_gid); // set time auto timestamp = Allocate(); @@ -241,7 +244,7 @@ ProcessSignal* ProcessSignalFormatter::CreateProcessSignal(sinsp_threadinfo* tin signal->set_allocated_time(timestamp); // set container_id - signal->set_container_id(tinfo->m_container_id); + signal->set_container_id(GetContainerID(*tinfo)); // set process lineage std::vector lineage; @@ -265,11 +268,11 @@ std::string ProcessSignalFormatter::ProcessDetails(sinsp_evt* event) { std::stringstream ss; const std::string* path = event_extractor_->get_exepath(event); const std::string* name = event_extractor_->get_comm(event); - const std::string* container_id = event_extractor_->get_container_id(event); + auto container_id = GetContainerID(event); const char* args = event_extractor_->get_proc_args(event); const int64_t* pid = event_extractor_->get_pid(event); - ss << "Container: " << (container_id ? *container_id : "null") + ss << "Container: " << (container_id.empty() ? "null" : container_id) << ", Name: " << (name ? *name : "null") << ", PID: " << (pid ? *pid : -1) << ", Path: " << (path ? *path : "null") @@ -327,7 +330,7 @@ void ProcessSignalFormatter::GetProcessLineage(sinsp_threadinfo* tinfo, return; } } - sinsp_threadinfo::visitor_func_t visitor = [this, &lineage](sinsp_threadinfo* pt) { + sinsp_thread_manager::visitor_func_t visitor = [this, &lineage](sinsp_threadinfo* pt) { if (pt == NULL) { return false; } @@ -341,13 +344,13 @@ void ProcessSignalFormatter::GetProcessLineage(sinsp_threadinfo* tinfo, // // In back-ported eBPF probes, `m_vpid` will not be set for containers // running when collector comes online because /proc/{pid}/status does - // not contain namespace information, so `m_container_id` is checked - // instead. `m_container_id` is not enough on its own to identify + // not contain namespace information, so the container ID is checked + // instead. The container ID is not enough on its own to identify // containerized processes, because it is not guaranteed to be set on // all platforms. // if (pt->m_vpid == 0) { - if (pt->m_container_id.empty()) { + if (GetContainerID(*pt).empty()) { return false; } } else if (pt->m_pid == pt->m_vpid) { @@ -361,7 +364,7 @@ void ProcessSignalFormatter::GetProcessLineage(sinsp_threadinfo* tinfo, // Collapse parent child processes that have the same path if (lineage.empty() || (lineage.back().parent_exec_file_path() != pt->m_exepath)) { LineageInfo info; - info.set_parent_uid(pt->m_user.uid()); + info.set_parent_uid(pt->m_uid); info.set_parent_exec_file_path(pt->m_exepath); lineage.push_back(info); } @@ -373,7 +376,7 @@ void ProcessSignalFormatter::GetProcessLineage(sinsp_threadinfo* tinfo, return true; }; - mt->traverse_parent_state(visitor); + inspector_->m_thread_manager->traverse_parent_state(*mt, visitor); CountLineage(lineage); } diff --git a/collector/lib/ProcessSignalFormatter.h b/collector/lib/ProcessSignalFormatter.h index 8c57011c5b..ceeeb98dea 100644 --- a/collector/lib/ProcessSignalFormatter.h +++ b/collector/lib/ProcessSignalFormatter.h @@ -10,7 +10,6 @@ #include "CollectorConfig.h" #include "CollectorStats.h" -#include "ContainerMetadata.h" #include "EventNames.h" #include "ProtoSignalFormatter.h" @@ -55,8 +54,8 @@ class ProcessSignalFormatter : public ProtoSignalFormatter& lineage); const EventNames& event_names_; + sinsp* inspector_; std::unique_ptr event_extractor_; - ContainerMetadata container_metadata_; const CollectorConfig& config_; }; diff --git a/collector/lib/Utility.cpp b/collector/lib/Utility.cpp index 26832eada8..f1c74f09d0 100644 --- a/collector/lib/Utility.cpp +++ b/collector/lib/Utility.cpp @@ -57,9 +57,29 @@ const char* SignalName(int signum) { } } +std::string GetContainerID(sinsp_threadinfo& tinfo) { + for (const auto& [subsys, cgroup_path] : tinfo.cgroups()) { + if (auto id = ExtractContainerIDFromCgroup(cgroup_path)) { + return std::string(*id); + } + } + return {}; +} + +std::string GetContainerID(sinsp_evt* event) { + if (!event) { + return {}; + } + sinsp_threadinfo* tinfo = event->get_thread_info(); + if (!tinfo) { + return {}; + } + return GetContainerID(*tinfo); +} + std::ostream& operator<<(std::ostream& os, const sinsp_threadinfo* t) { if (t) { - os << "Container: \"" << t->m_container_id << "\", Name: " << t->m_comm << ", PID: " << t->m_pid << ", Args: " << t->m_exe; + os << "Name: " << t->m_comm << ", PID: " << t->m_pid << ", Args: " << t->m_exe; } else { os << "NULL\n"; } @@ -203,7 +223,7 @@ std::optional ExtractContainerIDFromCgroup(std::string_view cg } auto container_id_part = cgroup.substr(cgroup.size() - (CONTAINER_ID_LENGTH + 1)); - if (container_id_part[0] != '/' && container_id_part[0] != '-') { + if (container_id_part[0] != '/' && container_id_part[0] != '-' && container_id_part[0] != ':') { return {}; } diff --git a/collector/lib/Utility.h b/collector/lib/Utility.h index 04be8cd480..e15869b31b 100644 --- a/collector/lib/Utility.h +++ b/collector/lib/Utility.h @@ -14,6 +14,7 @@ // forward declarations class sinsp_threadinfo; +class sinsp_evt; namespace collector { @@ -65,6 +66,14 @@ std::string Str(Args&&... args) { std::ostream& operator<<(std::ostream& os, const sinsp_threadinfo* t); +// Extract container ID from a threadinfo's cgroups. +// Returns an empty string if no container ID found. +std::string GetContainerID(sinsp_threadinfo& tinfo); + +// Extract container ID from an event's thread info cgroups. +// Returns an empty string if no container ID found. +std::string GetContainerID(sinsp_evt* event); + // UUIDStr returns UUID in string format. const char* UUIDStr(); diff --git a/collector/lib/system-inspector/EventExtractor.cpp b/collector/lib/system-inspector/EventExtractor.cpp index a72c87e329..82e1ea94ca 100644 --- a/collector/lib/system-inspector/EventExtractor.cpp +++ b/collector/lib/system-inspector/EventExtractor.cpp @@ -5,6 +5,10 @@ namespace collector::system_inspector { void EventExtractor::Init(sinsp* inspector) { for (auto* wrapper : wrappers_) { std::unique_ptr check = FilterList().new_filter_check_from_fldname(wrapper->event_name, inspector, true); + if (!check) { + CLOG(WARNING) << "Filter check not available for field: " << wrapper->event_name; + continue; + } check->parse_field_name(wrapper->event_name, true, false); wrapper->filter_check.reset(check.release()); } diff --git a/collector/lib/system-inspector/EventExtractor.h b/collector/lib/system-inspector/EventExtractor.h index 94d129befc..ef58d899fb 100644 --- a/collector/lib/system-inspector/EventExtractor.h +++ b/collector/lib/system-inspector/EventExtractor.h @@ -41,9 +41,12 @@ class EventExtractor { #define FIELD_RAW(id, fieldname, type) \ public: \ const type* get_##id(sinsp_evt* event) { \ - uint32_t len; \ - auto buf = filter_check_##id##_->extract_single(event, &len); \ - if (!buf) return nullptr; \ + if (!filter_check_##id##_.filter_check) return nullptr; \ + std::vector vals_##id; \ + if (!filter_check_##id##_->extract(event, vals_##id)) return nullptr; \ + if (vals_##id.empty()) return nullptr; \ + auto len = vals_##id[0].len; \ + auto buf = vals_##id[0].ptr; \ if (len != sizeof(type)) { \ CLOG_THROTTLED(WARNING, std::chrono::seconds(30)) \ << "Failed to extract value for field " << fieldname << ": expected type " << #type << " (size " \ @@ -63,9 +66,12 @@ class EventExtractor { const std::optional get_##id(sinsp_evt* event) { \ static_assert(std::is_trivially_copyable_v, \ "Attempted to create FIELD_RAW_SAFE on non trivial type"); \ - uint32_t len; \ - auto buf = filter_check_##id##_->extract_single(event, &len); \ - if (!buf) return {}; \ + if (!filter_check_##id##_.filter_check) return {}; \ + std::vector vals_##id; \ + if (!filter_check_##id##_->extract(event, vals_##id)) return {}; \ + if (vals_##id.empty()) return {}; \ + auto len = vals_##id[0].len; \ + auto buf = vals_##id[0].ptr; \ if (len != sizeof(type)) { \ CLOG_THROTTLED(WARNING, std::chrono::seconds(30)) \ << "Failed to extract value for field " << fieldname << ": expected type " << #type << " (size " \ @@ -80,39 +86,40 @@ class EventExtractor { private: \ DECLARE_FILTER_CHECK(id, fieldname) -#define FIELD_CSTR(id, fieldname) \ - public: \ - const char* get_##id(sinsp_evt* event) { \ - uint32_t len; \ - auto buf = filter_check_##id##_->extract_single(event, &len); \ - if (!buf) return nullptr; \ - return reinterpret_cast(buf); \ - } \ - \ - private: \ +#define FIELD_CSTR(id, fieldname) \ + public: \ + const char* get_##id(sinsp_evt* event) { \ + if (!filter_check_##id##_.filter_check) return nullptr; \ + std::vector vals_##id; \ + if (!filter_check_##id##_->extract(event, vals_##id)) return nullptr; \ + if (vals_##id.empty()) return nullptr; \ + return reinterpret_cast(vals_##id[0].ptr); \ + } \ + \ + private: \ DECLARE_FILTER_CHECK(id, fieldname) #define EVT_ARG(name) FIELD_CSTR(evt_arg_##name, "evt.arg." #name) #define EVT_ARG_RAW(name, type) FIELD_RAW(evt_arg_##name, "evt.rawarg." #name, type) -#define TINFO_FIELD_RAW(id, fieldname, type) \ - public: \ - const type* get_##id(sinsp_evt* event) { \ - if (!event) return nullptr; \ - sinsp_threadinfo* tinfo = event->get_thread_info(true); \ - if (!tinfo) return nullptr; \ - return &tinfo->fieldname; \ +#define TINFO_FIELD_RAW(id, fieldname, type) \ + public: \ + const type* get_##id(sinsp_evt* event) { \ + if (!event) return nullptr; \ + sinsp_threadinfo* tinfo = event->get_thread_info(); \ + if (!tinfo) return nullptr; \ + return &tinfo->fieldname; \ } -#define TINFO_FIELD_RAW_GETTER(id, getter, type) \ - public: \ - type internal_##id; \ - const type* get_##id(sinsp_evt* event) { \ - if (!event) return nullptr; \ - sinsp_threadinfo* tinfo = event->get_thread_info(true); \ - if (!tinfo) return nullptr; \ - internal_##id = tinfo->getter(); \ - return &internal_##id; \ +#define TINFO_FIELD_RAW_GETTER(id, getter, type) \ + public: \ + type internal_##id; \ + const type* get_##id(sinsp_evt* event) { \ + if (!event) return nullptr; \ + sinsp_threadinfo* tinfo = event->get_thread_info(); \ + if (!tinfo) return nullptr; \ + internal_##id = tinfo->getter(); \ + return &internal_##id; \ } #define TINFO_FIELD(id) TINFO_FIELD_RAW(id, m_##id, decltype(std::declval().m_##id)) @@ -129,16 +136,13 @@ class EventExtractor { // // ADD ANY NEW FIELDS BELOW THIS LINE - // Container related fields - TINFO_FIELD(container_id); - // Process related fields TINFO_FIELD(comm); TINFO_FIELD(exe); TINFO_FIELD(exepath); TINFO_FIELD(pid); - TINFO_FIELD_RAW_GETTER(uid, m_user.uid, uint32_t); - TINFO_FIELD_RAW_GETTER(gid, m_group.gid, uint32_t); + TINFO_FIELD_RAW(uid, m_uid, uint32_t); + TINFO_FIELD_RAW(gid, m_gid, uint32_t); FIELD_CSTR(proc_args, "proc.args"); // General event information @@ -148,9 +152,6 @@ class EventExtractor { FIELD_RAW_SAFE(client_port, "fd.cport", uint16_t); FIELD_RAW_SAFE(server_port, "fd.sport", uint16_t); - // k8s metadata - FIELD_CSTR(k8s_namespace, "k8s.ns.name"); - #undef TINFO_FIELD #undef FIELD_RAW #undef FIELD_CSTR diff --git a/collector/lib/system-inspector/Service.cpp b/collector/lib/system-inspector/Service.cpp index 95c0394416..de9d59ba7b 100644 --- a/collector/lib/system-inspector/Service.cpp +++ b/collector/lib/system-inspector/Service.cpp @@ -6,7 +6,7 @@ #include -#include "libsinsp/container_engine/sinsp_container_type.h" +#include "libsinsp/filter.h" #include "libsinsp/parsers.h" #include "libsinsp/sinsp.h" @@ -15,8 +15,6 @@ #include "CollectionMethod.h" #include "CollectorException.h" #include "CollectorStats.h" -#include "ContainerEngine.h" -#include "ContainerMetadata.h" #include "EventExtractor.h" #include "EventNames.h" #include "HostInfo.h" @@ -35,12 +33,7 @@ namespace collector::system_inspector { Service::~Service() = default; Service::Service(const CollectorConfig& config) - : inspector_(std::make_unique(true)), - container_metadata_inspector_(std::make_unique(inspector_.get())), - default_formatter_(std::make_unique( - inspector_.get(), - DEFAULT_OUTPUT_STR, - EventExtractor::FilterList())) { + : inspector_(std::make_unique(true)) { // Setup the inspector. // peeking into arguments has a big overhead, so we prevent it from happening inspector_->set_snaplen(0); @@ -50,7 +43,7 @@ Service::Service(const CollectorConfig& config) inspector_->disable_log_timestamps(); inspector_->set_log_callback(logging::InspectorLogCallback); - inspector_->set_import_users(config.ImportUsers(), false); + inspector_->set_import_users(config.ImportUsers()); inspector_->set_thread_timeout_s(30); inspector_->set_auto_threads_purging_interval_s(60); inspector_->m_thread_manager->set_max_thread_table_size(config.GetSinspThreadCacheSize()); @@ -62,32 +55,22 @@ Service::Service(const CollectorConfig& config) inspector_->get_parser()->set_track_connection_status(true); } - if (config.EnableRuntimeConfig()) { - uint64_t mask = 1 << CT_CRI | - 1 << CT_CRIO | - 1 << CT_CONTAINERD; - - if (config.UseDockerCe()) { - mask |= 1 << CT_DOCKER; - } - - if (config.UsePodmanCe()) { - mask |= 1 << CT_PODMAN; - } - - inspector_->set_container_engine_mask(mask); - - // k8s naming conventions specify that max length be 253 characters - // (the extra 2 are just for a nice 0xFF). - inspector_->set_container_labels_max_len(255); - } else { - auto engine = std::make_shared(inspector_->m_container_manager); - auto* container_engines = inspector_->m_container_manager.get_container_engines(); - container_engines->push_back(engine); + default_formatter_ = std::make_unique( + inspector_.get(), DEFAULT_OUTPUT_STR, EventExtractor::FilterList()); + + // Filter out host processes. In containers, pid != vpid due to PID + // namespacing. This is a built-in sinsp field that doesn't require + // any plugin. + try { + auto factory = std::make_shared( + inspector_.get(), EventExtractor::FilterList()); + sinsp_filter_compiler compiler(factory, "proc.pid != proc.vpid"); + inspector_->set_filter(compiler.compile(), "proc.pid != proc.vpid"); + } catch (const sinsp_exception& e) { + CLOG(WARNING) << "Could not set container filter: " << e.what() + << ". Container filtering will not be active."; } - inspector_->set_filter("container.id != 'host'"); - // The self-check handlers should only operate during start up, // so they are added to the handler list first, so they have access // to self-check events before the network and process handlers have @@ -296,7 +279,7 @@ bool Service::SendExistingProcesses(SignalHandler* handler) { } return threads->loop([&](sinsp_threadinfo& tinfo) { - if (!tinfo.m_container_id.empty() && tinfo.is_main_thread()) { + if (!GetContainerID(tinfo).empty() && tinfo.is_main_thread()) { auto result = handler->HandleExistingProcess(&tinfo); if (result == SignalHandler::ERROR || result == SignalHandler::NEEDS_REFRESH) { CLOG(WARNING) << "Failed to write existing process signal: " << &tinfo; @@ -398,7 +381,7 @@ void Service::ServePendingProcessRequests() { auto callback = request.second.lock(); if (callback) { - (*callback)(inspector_->get_thread_ref(pid, true)); + (*callback)(inspector_->m_thread_manager->get_thread(pid)); } pending_process_requests_.pop_front(); diff --git a/collector/lib/system-inspector/Service.h b/collector/lib/system-inspector/Service.h index 651e7ff7cb..1f2398c648 100644 --- a/collector/lib/system-inspector/Service.h +++ b/collector/lib/system-inspector/Service.h @@ -8,7 +8,6 @@ #include #include "ConnTracker.h" -#include "ContainerMetadata.h" #include "Control.h" #include "SignalHandler.h" #include "SignalServiceClient.h" @@ -43,8 +42,6 @@ class Service : public SystemInspector { void GetProcessInformation(uint64_t pid, ProcessInfoCallbackRef callback); - std::shared_ptr GetContainerMetadataInspector() { return container_metadata_inspector_; }; - sinsp* GetInspector() { return inspector_.get(); } Stats* GetUserspaceStats() { return &userspace_stats_; } @@ -71,7 +68,6 @@ class Service : public SystemInspector { mutable std::mutex libsinsp_mutex_; std::unique_ptr inspector_; - std::shared_ptr container_metadata_inspector_; std::unique_ptr default_formatter_; std::unique_ptr signal_client_; std::vector signal_handlers_; diff --git a/collector/test/ProcessSignalFormatterTest.cpp b/collector/test/ProcessSignalFormatterTest.cpp index 68e1fcb9c7..233c3bfed3 100644 --- a/collector/test/ProcessSignalFormatterTest.cpp +++ b/collector/test/ProcessSignalFormatterTest.cpp @@ -54,18 +54,18 @@ TEST(ProcessSignalFormatterTest, ProcessWithoutParentTest) { ProcessSignalFormatter processSignalFormatter(inspector.get(), config); - auto tinfo = inspector->build_threadinfo(); + auto tinfo = inspector->get_threadinfo_factory().create(); tinfo->m_pid = 0; tinfo->m_tid = 0; tinfo->m_ptid = -1; tinfo->m_vpid = 2; - tinfo->m_user.set_uid(7); + tinfo->m_uid = 7; tinfo->m_exepath = "qwerty"; - inspector->add_thread(std::move(tinfo)); + inspector->m_thread_manager->add_thread(std::move(tinfo), false); std::vector lineage; - processSignalFormatter.GetProcessLineage(inspector->get_thread_ref(0).get(), lineage); + processSignalFormatter.GetProcessLineage(inspector->m_thread_manager->find_thread(0, true).get(), lineage); int count = collector_stats.GetCounter(CollectorStats::process_lineage_counts); int total = collector_stats.GetCounter(CollectorStats::process_lineage_total); @@ -89,25 +89,25 @@ TEST(ProcessSignalFormatterTest, ProcessWithParentTest) { ProcessSignalFormatter processSignalFormatter(inspector.get(), config); - auto tinfo = inspector->build_threadinfo(); + auto tinfo = inspector->get_threadinfo_factory().create(); tinfo->m_pid = 3; tinfo->m_tid = 3; tinfo->m_ptid = -1; tinfo->m_vpid = 1; - tinfo->m_user.set_uid(42); + tinfo->m_uid = 42; tinfo->m_exepath = "asdf"; - auto tinfo2 = inspector->build_threadinfo(); + auto tinfo2 = inspector->get_threadinfo_factory().create(); tinfo2->m_pid = 1; tinfo2->m_tid = 1; tinfo2->m_ptid = 3; tinfo2->m_vpid = 2; - tinfo2->m_user.set_uid(7); + tinfo2->m_uid = 7; tinfo2->m_exepath = "qwerty"; - inspector->add_thread(std::move(tinfo)); - inspector->add_thread(std::move(tinfo2)); + inspector->m_thread_manager->add_thread(std::move(tinfo), false); + inspector->m_thread_manager->add_thread(std::move(tinfo2), false); std::vector lineage; - processSignalFormatter.GetProcessLineage(inspector->get_thread_ref(1).get(), lineage); + processSignalFormatter.GetProcessLineage(inspector->m_thread_manager->find_thread(1, true).get(), lineage); int count = collector_stats.GetCounter(CollectorStats::process_lineage_counts); int total = collector_stats.GetCounter(CollectorStats::process_lineage_total); @@ -134,23 +134,23 @@ TEST(ProcessSignalFormatterTest, ProcessWithParentWithPid0Test) { ProcessSignalFormatter processSignalFormatter(inspector.get(), config); - auto tinfo = inspector->build_threadinfo(); + auto tinfo = inspector->get_threadinfo_factory().create(); tinfo->m_pid = 0; tinfo->m_tid = 0; tinfo->m_ptid = -1; tinfo->m_vpid = 1; tinfo->m_exepath = "asdf"; - auto tinfo2 = inspector->build_threadinfo(); + auto tinfo2 = inspector->get_threadinfo_factory().create(); tinfo2->m_pid = 1; tinfo2->m_tid = 1; tinfo2->m_ptid = 0; tinfo2->m_vpid = 2; tinfo2->m_exepath = "qwerty"; - inspector->add_thread(std::move(tinfo)); - inspector->add_thread(std::move(tinfo2)); + inspector->m_thread_manager->add_thread(std::move(tinfo), false); + inspector->m_thread_manager->add_thread(std::move(tinfo2), false); std::vector lineage; - processSignalFormatter.GetProcessLineage(inspector->get_thread_ref(1).get(), lineage); + processSignalFormatter.GetProcessLineage(inspector->m_thread_manager->find_thread(1, true).get(), lineage); int count = collector_stats.GetCounter(CollectorStats::process_lineage_counts); int total = collector_stats.GetCounter(CollectorStats::process_lineage_total); @@ -174,25 +174,25 @@ TEST(ProcessSignalFormatterTest, ProcessWithParentWithSameNameTest) { ProcessSignalFormatter processSignalFormatter(inspector.get(), config); - auto tinfo = inspector->build_threadinfo(); + auto tinfo = inspector->get_threadinfo_factory().create(); tinfo->m_pid = 3; tinfo->m_tid = 3; tinfo->m_ptid = -1; tinfo->m_vpid = 1; - tinfo->m_user.set_uid(43); + tinfo->m_uid = 43; tinfo->m_exepath = "asdf"; - auto tinfo2 = inspector->build_threadinfo(); + auto tinfo2 = inspector->get_threadinfo_factory().create(); tinfo2->m_pid = 1; tinfo2->m_tid = 1; tinfo2->m_ptid = 3; tinfo2->m_vpid = 2; - tinfo2->m_user.set_uid(42); + tinfo2->m_uid = 42; tinfo2->m_exepath = "asdf"; - inspector->add_thread(std::move(tinfo)); - inspector->add_thread(std::move(tinfo2)); + inspector->m_thread_manager->add_thread(std::move(tinfo), false); + inspector->m_thread_manager->add_thread(std::move(tinfo2), false); std::vector lineage; - processSignalFormatter.GetProcessLineage(inspector->get_thread_ref(1).get(), lineage); + processSignalFormatter.GetProcessLineage(inspector->m_thread_manager->find_thread(1, true).get(), lineage); int count = collector_stats.GetCounter(CollectorStats::process_lineage_counts); int total = collector_stats.GetCounter(CollectorStats::process_lineage_total); @@ -219,36 +219,36 @@ TEST(ProcessSignalFormatterTest, ProcessWithTwoParentsTest) { ProcessSignalFormatter processSignalFormatter(inspector.get(), config); - auto tinfo = inspector->build_threadinfo(); + auto tinfo = inspector->get_threadinfo_factory().create(); tinfo->m_pid = 3; tinfo->m_tid = 3; tinfo->m_ptid = -1; tinfo->m_vpid = 1; - tinfo->m_user.set_uid(42); + tinfo->m_uid = 42; tinfo->m_exepath = "asdf"; - auto tinfo2 = inspector->build_threadinfo(); + auto tinfo2 = inspector->get_threadinfo_factory().create(); tinfo2->m_pid = 1; tinfo2->m_tid = 1; tinfo2->m_ptid = 3; tinfo2->m_vpid = 2; - tinfo2->m_user.set_uid(7); + tinfo2->m_uid = 7; tinfo2->m_exepath = "qwerty"; - auto tinfo3 = inspector->build_threadinfo(); + auto tinfo3 = inspector->get_threadinfo_factory().create(); tinfo3->m_pid = 4; tinfo3->m_tid = 4; tinfo3->m_ptid = 1; tinfo3->m_vpid = 9; - tinfo3->m_user.set_uid(8); + tinfo3->m_uid = 8; tinfo3->m_exepath = "uiop"; - inspector->add_thread(std::move(tinfo)); - inspector->add_thread(std::move(tinfo2)); - inspector->add_thread(std::move(tinfo3)); + inspector->m_thread_manager->add_thread(std::move(tinfo), false); + inspector->m_thread_manager->add_thread(std::move(tinfo2), false); + inspector->m_thread_manager->add_thread(std::move(tinfo3), false); std::vector lineage; - processSignalFormatter.GetProcessLineage(inspector->get_thread_ref(4).get(), lineage); + processSignalFormatter.GetProcessLineage(inspector->m_thread_manager->find_thread(4, true).get(), lineage); int count = collector_stats.GetCounter(CollectorStats::process_lineage_counts); int total = collector_stats.GetCounter(CollectorStats::process_lineage_total); @@ -278,36 +278,36 @@ TEST(ProcessSignalFormatterTest, ProcessWithTwoParentsWithTheSameNameTest) { ProcessSignalFormatter processSignalFormatter(inspector.get(), config); - auto tinfo = inspector->build_threadinfo(); + auto tinfo = inspector->get_threadinfo_factory().create(); tinfo->m_pid = 3; tinfo->m_tid = 3; tinfo->m_ptid = -1; tinfo->m_vpid = 1; - tinfo->m_user.set_uid(42); + tinfo->m_uid = 42; tinfo->m_exepath = "asdf"; - auto tinfo2 = inspector->build_threadinfo(); + auto tinfo2 = inspector->get_threadinfo_factory().create(); tinfo2->m_pid = 1; tinfo2->m_tid = 1; tinfo2->m_ptid = 3; tinfo2->m_vpid = 2; - tinfo2->m_user.set_uid(7); + tinfo2->m_uid = 7; tinfo2->m_exepath = "asdf"; - auto tinfo3 = inspector->build_threadinfo(); + auto tinfo3 = inspector->get_threadinfo_factory().create(); tinfo3->m_pid = 4; tinfo3->m_tid = 4; tinfo3->m_ptid = 1; tinfo3->m_vpid = 9; - tinfo3->m_user.set_uid(8); + tinfo3->m_uid = 8; tinfo3->m_exepath = "asdf"; - inspector->add_thread(std::move(tinfo)); - inspector->add_thread(std::move(tinfo2)); - inspector->add_thread(std::move(tinfo3)); + inspector->m_thread_manager->add_thread(std::move(tinfo), false); + inspector->m_thread_manager->add_thread(std::move(tinfo2), false); + inspector->m_thread_manager->add_thread(std::move(tinfo3), false); std::vector lineage; - processSignalFormatter.GetProcessLineage(inspector->get_thread_ref(4).get(), lineage); + processSignalFormatter.GetProcessLineage(inspector->m_thread_manager->find_thread(4, true).get(), lineage); int count = collector_stats.GetCounter(CollectorStats::process_lineage_counts); int total = collector_stats.GetCounter(CollectorStats::process_lineage_total); @@ -334,45 +334,45 @@ TEST(ProcessSignalFormatterTest, ProcessCollapseParentChildWithSameNameTest) { ProcessSignalFormatter processSignalFormatter(inspector.get(), config); - auto tinfo = inspector->build_threadinfo(); + auto tinfo = inspector->get_threadinfo_factory().create(); tinfo->m_pid = 3; tinfo->m_tid = 3; tinfo->m_ptid = -1; tinfo->m_vpid = 1; - tinfo->m_user.set_uid(42); + tinfo->m_uid = 42; tinfo->m_exepath = "asdf"; - auto tinfo2 = inspector->build_threadinfo(); + auto tinfo2 = inspector->get_threadinfo_factory().create(); tinfo2->m_pid = 1; tinfo2->m_tid = 1; tinfo2->m_ptid = 3; tinfo2->m_vpid = 2; - tinfo2->m_user.set_uid(7); + tinfo2->m_uid = 7; tinfo2->m_exepath = "asdf"; - auto tinfo3 = inspector->build_threadinfo(); + auto tinfo3 = inspector->get_threadinfo_factory().create(); tinfo3->m_pid = 4; tinfo3->m_tid = 4; tinfo3->m_ptid = 1; tinfo3->m_vpid = 9; - tinfo3->m_user.set_uid(8); + tinfo3->m_uid = 8; tinfo3->m_exepath = "asdf"; - auto tinfo4 = inspector->build_threadinfo(); + auto tinfo4 = inspector->get_threadinfo_factory().create(); tinfo4->m_pid = 5; tinfo4->m_tid = 5; tinfo4->m_ptid = 4; tinfo4->m_vpid = 10; - tinfo4->m_user.set_uid(9); + tinfo4->m_uid = 9; tinfo4->m_exepath = "qwerty"; - inspector->add_thread(std::move(tinfo)); - inspector->add_thread(std::move(tinfo2)); - inspector->add_thread(std::move(tinfo3)); - inspector->add_thread(std::move(tinfo4)); + inspector->m_thread_manager->add_thread(std::move(tinfo), false); + inspector->m_thread_manager->add_thread(std::move(tinfo2), false); + inspector->m_thread_manager->add_thread(std::move(tinfo3), false); + inspector->m_thread_manager->add_thread(std::move(tinfo4), false); std::vector lineage; - processSignalFormatter.GetProcessLineage(inspector->get_thread_ref(5).get(), lineage); + processSignalFormatter.GetProcessLineage(inspector->m_thread_manager->find_thread(5, true).get(), lineage); int count = collector_stats.GetCounter(CollectorStats::process_lineage_counts); int total = collector_stats.GetCounter(CollectorStats::process_lineage_total); @@ -399,45 +399,45 @@ TEST(ProcessSignalFormatterTest, ProcessCollapseParentChildWithSameName2Test) { ProcessSignalFormatter processSignalFormatter(inspector.get(), config); - auto tinfo = inspector->build_threadinfo(); + auto tinfo = inspector->get_threadinfo_factory().create(); tinfo->m_pid = 3; tinfo->m_tid = 3; tinfo->m_ptid = -1; tinfo->m_vpid = 1; - tinfo->m_user.set_uid(42); + tinfo->m_uid = 42; tinfo->m_exepath = "qwerty"; - auto tinfo2 = inspector->build_threadinfo(); + auto tinfo2 = inspector->get_threadinfo_factory().create(); tinfo2->m_pid = 1; tinfo2->m_tid = 1; tinfo2->m_ptid = 3; tinfo2->m_vpid = 2; - tinfo2->m_user.set_uid(7); + tinfo2->m_uid = 7; tinfo2->m_exepath = "asdf"; - auto tinfo3 = inspector->build_threadinfo(); + auto tinfo3 = inspector->get_threadinfo_factory().create(); tinfo3->m_pid = 4; tinfo3->m_tid = 4; tinfo3->m_ptid = 1; tinfo3->m_vpid = 9; - tinfo3->m_user.set_uid(8); + tinfo3->m_uid = 8; tinfo3->m_exepath = "asdf"; - auto tinfo4 = inspector->build_threadinfo(); + auto tinfo4 = inspector->get_threadinfo_factory().create(); tinfo4->m_pid = 5; tinfo4->m_tid = 5; tinfo4->m_ptid = 4; tinfo4->m_vpid = 10; - tinfo4->m_user.set_uid(9); + tinfo4->m_uid = 9; tinfo4->m_exepath = "asdf"; - inspector->add_thread(std::move(tinfo)); - inspector->add_thread(std::move(tinfo2)); - inspector->add_thread(std::move(tinfo3)); - inspector->add_thread(std::move(tinfo4)); + inspector->m_thread_manager->add_thread(std::move(tinfo), false); + inspector->m_thread_manager->add_thread(std::move(tinfo2), false); + inspector->m_thread_manager->add_thread(std::move(tinfo3), false); + inspector->m_thread_manager->add_thread(std::move(tinfo4), false); std::vector lineage; - processSignalFormatter.GetProcessLineage(inspector->get_thread_ref(5).get(), lineage); + processSignalFormatter.GetProcessLineage(inspector->m_thread_manager->find_thread(5, true).get(), lineage); int count = collector_stats.GetCounter(CollectorStats::process_lineage_counts); int total = collector_stats.GetCounter(CollectorStats::process_lineage_total); @@ -467,45 +467,45 @@ TEST(ProcessSignalFormatterTest, ProcessWithUnrelatedProcessTest) { ProcessSignalFormatter processSignalFormatter(inspector.get(), config); - auto tinfo = inspector->build_threadinfo(); + auto tinfo = inspector->get_threadinfo_factory().create(); tinfo->m_pid = 3; tinfo->m_tid = 3; tinfo->m_ptid = -1; tinfo->m_vpid = 1; - tinfo->m_user.set_uid(42); + tinfo->m_uid = 42; tinfo->m_exepath = "qwerty"; - auto tinfo2 = inspector->build_threadinfo(); + auto tinfo2 = inspector->get_threadinfo_factory().create(); tinfo2->m_pid = 1; tinfo2->m_tid = 1; tinfo2->m_ptid = 3; tinfo2->m_vpid = 2; - tinfo2->m_user.set_uid(7); + tinfo2->m_uid = 7; tinfo2->m_exepath = "asdf"; - auto tinfo3 = inspector->build_threadinfo(); + auto tinfo3 = inspector->get_threadinfo_factory().create(); tinfo3->m_pid = 4; tinfo3->m_tid = 4; tinfo3->m_ptid = 1; tinfo3->m_vpid = 9; - tinfo3->m_user.set_uid(8); + tinfo3->m_uid = 8; tinfo3->m_exepath = "uiop"; - auto tinfo4 = inspector->build_threadinfo(); + auto tinfo4 = inspector->get_threadinfo_factory().create(); tinfo4->m_pid = 5; tinfo4->m_tid = 5; tinfo4->m_ptid = 555; tinfo4->m_vpid = 10; - tinfo4->m_user.set_uid(9); + tinfo4->m_uid = 9; tinfo4->m_exepath = "jkl;"; - inspector->add_thread(std::move(tinfo)); - inspector->add_thread(std::move(tinfo2)); - inspector->add_thread(std::move(tinfo3)); - inspector->add_thread(std::move(tinfo4)); + inspector->m_thread_manager->add_thread(std::move(tinfo), false); + inspector->m_thread_manager->add_thread(std::move(tinfo2), false); + inspector->m_thread_manager->add_thread(std::move(tinfo3), false); + inspector->m_thread_manager->add_thread(std::move(tinfo4), false); std::vector lineage; - processSignalFormatter.GetProcessLineage(inspector->get_thread_ref(4).get(), lineage); + processSignalFormatter.GetProcessLineage(inspector->m_thread_manager->find_thread(4, true).get(), lineage); int count = collector_stats.GetCounter(CollectorStats::process_lineage_counts); int total = collector_stats.GetCounter(CollectorStats::process_lineage_total); @@ -535,31 +535,31 @@ TEST(ProcessSignalFormatterTest, CountTwoCounterCallsTest) { ProcessSignalFormatter processSignalFormatter(inspector.get(), config); - auto tinfo = inspector->build_threadinfo(); + auto tinfo = inspector->get_threadinfo_factory().create(); tinfo->m_pid = 1; tinfo->m_tid = 1; tinfo->m_ptid = 555; tinfo->m_vpid = 10; - tinfo->m_user.set_uid(9); + tinfo->m_uid = 9; tinfo->m_exepath = "jkl;"; - inspector->add_thread(std::move(tinfo)); + inspector->m_thread_manager->add_thread(std::move(tinfo), false); std::vector lineage; - processSignalFormatter.GetProcessLineage(inspector->get_thread_ref(1).get(), lineage); + processSignalFormatter.GetProcessLineage(inspector->m_thread_manager->find_thread(1, true).get(), lineage); - auto tinfo2 = inspector->build_threadinfo(); + auto tinfo2 = inspector->get_threadinfo_factory().create(); tinfo2->m_pid = 2; tinfo2->m_tid = 2; tinfo2->m_ptid = 555; tinfo2->m_vpid = 10; - tinfo2->m_user.set_uid(9); + tinfo2->m_uid = 9; tinfo2->m_exepath = "jkl;"; - inspector->add_thread(std::move(tinfo2)); + inspector->m_thread_manager->add_thread(std::move(tinfo2), false); std::vector lineage2; - processSignalFormatter.GetProcessLineage(inspector->get_thread_ref(2).get(), lineage2); + processSignalFormatter.GetProcessLineage(inspector->m_thread_manager->find_thread(2, true).get(), lineage2); int count = collector_stats.GetCounter(CollectorStats::process_lineage_counts); int total = collector_stats.GetCounter(CollectorStats::process_lineage_total); @@ -577,45 +577,45 @@ TEST(ProcessSignalFormatterTest, CountTwoCounterCallsTest) { } TEST(ProcessSignalFormatterTest, Rox3377ProcessLineageWithNoVPidTest) { + // This test verifies lineage traversal stops at the container boundary. + // Originally tested vpid=0 + container_id fallback (ROX-3377). + // Now tests boundary detection via pid==vpid (namespace init process). std::unique_ptr inspector(new sinsp()); CollectorStats& collector_stats = CollectorStats::GetOrCreate(); CollectorConfig config; ProcessSignalFormatter processSignalFormatter(inspector.get(), config); - auto tinfo = inspector->build_threadinfo(); + auto tinfo = inspector->get_threadinfo_factory().create(); tinfo->m_pid = 3; tinfo->m_tid = 3; tinfo->m_ptid = -1; - tinfo->m_vpid = 0; - tinfo->m_user.set_uid(42); - tinfo->m_container_id = ""; + tinfo->m_vpid = 3; + tinfo->m_uid = 42; tinfo->m_exepath = "qwerty"; - auto tinfo2 = inspector->build_threadinfo(); + auto tinfo2 = inspector->get_threadinfo_factory().create(); tinfo2->m_pid = 1; tinfo2->m_tid = 1; tinfo2->m_ptid = 3; - tinfo2->m_vpid = 0; - tinfo2->m_user.set_uid(7); - tinfo2->m_container_id = "id"; + tinfo2->m_vpid = 2; + tinfo2->m_uid = 7; tinfo2->m_exepath = "asdf"; - auto tinfo3 = inspector->build_threadinfo(); + auto tinfo3 = inspector->get_threadinfo_factory().create(); tinfo3->m_pid = 4; tinfo3->m_tid = 4; tinfo3->m_ptid = 1; - tinfo3->m_vpid = 0; - tinfo3->m_user.set_uid(8); - tinfo3->m_container_id = "id"; + tinfo3->m_vpid = 9; + tinfo3->m_uid = 8; tinfo3->m_exepath = "uiop"; - inspector->add_thread(std::move(tinfo)); - inspector->add_thread(std::move(tinfo2)); - inspector->add_thread(std::move(tinfo3)); + inspector->m_thread_manager->add_thread(std::move(tinfo), false); + inspector->m_thread_manager->add_thread(std::move(tinfo2), false); + inspector->m_thread_manager->add_thread(std::move(tinfo3), false); std::vector lineage; - processSignalFormatter.GetProcessLineage(inspector->get_thread_ref(4).get(), lineage); + processSignalFormatter.GetProcessLineage(inspector->m_thread_manager->find_thread(4, true).get(), lineage); int count = collector_stats.GetCounter(CollectorStats::process_lineage_counts); int total = collector_stats.GetCounter(CollectorStats::process_lineage_total); @@ -641,13 +641,12 @@ TEST(ProcessSignalFormatterTest, ProcessArguments) { ProcessSignalFormatter processSignalFormatter(inspector.get(), config); - auto tinfo = inspector->build_threadinfo(); + auto tinfo = inspector->get_threadinfo_factory().create(); tinfo->m_pid = 3; tinfo->m_tid = 3; tinfo->m_ptid = -1; tinfo->m_vpid = 0; - tinfo->m_user.set_uid(42); - tinfo->m_container_id = ""; + tinfo->m_uid = 42; tinfo->m_exepath = "qwerty"; std::vector args = {std::string("args")}; @@ -671,13 +670,12 @@ TEST(ProcessSignalFormatterTest, NoProcessArguments) { config.SetDisableProcessArguments(true); ProcessSignalFormatter processSignalFormatter(inspector.get(), config); - auto tinfo = inspector->build_threadinfo(); + auto tinfo = inspector->get_threadinfo_factory().create(); tinfo->m_pid = 3; tinfo->m_tid = 3; tinfo->m_ptid = -1; tinfo->m_vpid = 0; - tinfo->m_user.set_uid(42); - tinfo->m_container_id = ""; + tinfo->m_uid = 42; tinfo->m_exepath = "qwerty"; std::vector args = {std::string("args")}; diff --git a/collector/test/SystemInspectorServiceTest.cpp b/collector/test/SystemInspectorServiceTest.cpp index a6ed01e2e1..a02ccab23c 100644 --- a/collector/test/SystemInspectorServiceTest.cpp +++ b/collector/test/SystemInspectorServiceTest.cpp @@ -7,32 +7,33 @@ namespace collector::system_inspector { TEST(SystemInspectorServiceTest, FilterEvent) { std::unique_ptr inspector(new sinsp()); + const auto& factory = inspector->get_threadinfo_factory(); - sinsp_threadinfo regular_process(inspector.get()); - regular_process.m_exepath = "/bin/busybox"; - regular_process.m_comm = "sleep"; + auto regular_process = factory.create(); + regular_process->m_exepath = "/bin/busybox"; + regular_process->m_comm = "sleep"; - sinsp_threadinfo runc_process(inspector.get()); - runc_process.m_exepath = "runc"; - runc_process.m_comm = "6"; + auto runc_process = factory.create(); + runc_process->m_exepath = "runc"; + runc_process->m_comm = "6"; - sinsp_threadinfo proc_self_process(inspector.get()); - proc_self_process.m_exepath = "/proc/self/exe"; - proc_self_process.m_comm = "6"; + auto proc_self_process = factory.create(); + proc_self_process->m_exepath = "/proc/self/exe"; + proc_self_process->m_comm = "6"; - sinsp_threadinfo memfd_process(inspector.get()); - memfd_process.m_exepath = "memfd:runc_cloned:/proc/self/exe"; - memfd_process.m_comm = "6"; + auto memfd_process = factory.create(); + memfd_process->m_exepath = "memfd:runc_cloned:/proc/self/exe"; + memfd_process->m_comm = "6"; struct test_t { const sinsp_threadinfo* tinfo; bool expected; }; std::vector tests{ - {®ular_process, true}, - {&runc_process, false}, - {&proc_self_process, false}, - {&memfd_process, false}, + {regular_process.get(), true}, + {runc_process.get(), false}, + {proc_self_process.get(), false}, + {memfd_process.get(), false}, }; for (const auto& t : tests) { diff --git a/collector/test/UtilityTest.cpp b/collector/test/UtilityTest.cpp index f5dee2b865..14df61f69c 100644 --- a/collector/test/UtilityTest.cpp +++ b/collector/test/UtilityTest.cpp @@ -98,6 +98,11 @@ TEST(ExtractContainerIDFromCgroupTest, TestExtractContainerIDFromCgroup) { "/machine.slice/libpod-cbdfa0f1f08763b1963c30d98e11e1f052cb67f1e9b7c0ab8a6ca6c70cbcad69.scope/container/kubelet.slice/kubelet-kubepods.slice/kubelet-kubepods-besteffort.slice/kubelet-kubepods-besteffort-pod6eab3b7b_f0a6_4bb8_bff2_d5bc9017c04b.slice/cri-containerd-5ebf11e02dbde102cda4b76bc0e3849a65f9edac7a12bdabfd34db01b9556101.scope", "5ebf11e02dbd", }, + // containerd without SystemdCgroup (uses : separator) + { + "/kubepods-burstable-podbd12dd3393227d950605a2444b13c27a.slice:cri-containerd:d52db56a9c80d536a91354c0951c061187ca46249e64865a12703003d8f42366", + "d52db56a9c80", + }, // conmon { "/machine.slice/libpod-conmon-b6ce30d02945df4bbf8e8b7193b2c56ebb3cd10227dd7e59d7f7cdc2cfa2a307.scope", diff --git a/falcosecurity-libs b/falcosecurity-libs index af2b6161c6..0a04768135 160000 --- a/falcosecurity-libs +++ b/falcosecurity-libs @@ -1 +1 @@ -Subproject commit af2b6161c6060ff47b843d9ff129b9de2ed03a35 +Subproject commit 0a047681350020ae6b29307aebc4342908082f58 diff --git a/integration-tests/suites/k8s/namespace.go b/integration-tests/suites/k8s/namespace.go index 5b94df3a5e..7e0433dd6b 100644 --- a/integration-tests/suites/k8s/namespace.go +++ b/integration-tests/suites/k8s/namespace.go @@ -47,7 +47,7 @@ func (k *K8sNamespaceTestSuite) SetupSuite() { k.tests = append(k.tests, NamespaceTest{ containerID: k.Collector().ContainerID(), - expectecNamespace: collector.TEST_NAMESPACE, + expectecNamespace: "", }) k.createTargetNamespace() @@ -55,7 +55,7 @@ func (k *K8sNamespaceTestSuite) SetupSuite() { k.Require().Len(nginxID, 12) k.tests = append(k.tests, NamespaceTest{ containerID: nginxID, - expectecNamespace: NAMESPACE, + expectecNamespace: "", }) } diff --git a/integration-tests/suites/udp_networkflow.go b/integration-tests/suites/udp_networkflow.go index 9045e7466f..1aefea184a 100644 --- a/integration-tests/suites/udp_networkflow.go +++ b/integration-tests/suites/udp_networkflow.go @@ -138,8 +138,8 @@ func (s *UdpNetworkFlow) runTest(image, recv, send string, port uint32) { CloseTimestamp: nil, } - s.Sensor().ExpectConnections(s.T(), client.id, 5*time.Second, clientConnection) - s.Sensor().ExpectConnections(s.T(), server.id, 5*time.Second, serverConnection) + s.Sensor().ExpectConnections(s.T(), client.id, 30*time.Second, clientConnection) + s.Sensor().ExpectConnections(s.T(), server.id, 30*time.Second, serverConnection) } func (s *UdpNetworkFlow) TestMultipleDestinations() { @@ -164,7 +164,7 @@ func (s *UdpNetworkFlow) TestMultipleDestinations() { client := s.runClient(config.ContainerStartConfig{ Name: UDP_CLIENT, Image: image, - Command: newClientCmd("sendmmsg", "300", "8", servers...), + Command: newClientCmd("sendmmsg", "300", "4", servers...), Entrypoint: []string{"udp-client"}, }) log.Info("Client: %s\n", client.String()) @@ -192,9 +192,9 @@ func (s *UdpNetworkFlow) TestMultipleDestinations() { ContainerId: server.id, CloseTimestamp: nil, } - s.Sensor().ExpectConnections(s.T(), server.id, 5*time.Second, serverConnection) + s.Sensor().ExpectConnections(s.T(), server.id, 30*time.Second, serverConnection) } - s.Sensor().ExpectConnections(s.T(), client.id, 5*time.Second, clientConnections...) + s.Sensor().ExpectConnections(s.T(), client.id, 30*time.Second, clientConnections...) } func (s *UdpNetworkFlow) TestMultipleSources() { @@ -243,9 +243,9 @@ func (s *UdpNetworkFlow) TestMultipleSources() { } for i, client := range clients { - s.Sensor().ExpectConnections(s.T(), client.id, 5*time.Second, clientConnections[i]) + s.Sensor().ExpectConnections(s.T(), client.id, 30*time.Second, clientConnections[i]) } - s.Sensor().ExpectConnections(s.T(), server.id, 5*time.Second, serverConnections...) + s.Sensor().ExpectConnections(s.T(), server.id, 30*time.Second, serverConnections...) } func newServerCmd(recv string, port uint32) []string {