diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index e7eae12..17443ce 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -2,6 +2,7 @@ name: Lint on: push env: RUSTFLAGS: -D warnings + RUSTDOCFLAGS: -D warnings CARGO_TERM_COLOR: always jobs: @@ -55,7 +56,7 @@ jobs: - name: Setup Rust toolchain uses: dtolnay/rust-toolchain@master with: - toolchain: stable + toolchain: "1.89" components: clippy - name: Setup Rust cache uses: swatinem/rust-cache@v2 @@ -63,6 +64,8 @@ jobs: run: cargo build --all-targets --all-features - name: Clippy run: cargo clippy --no-deps --all-targets --all-features + - name: Docs + run: cargo doc --no-deps --document-private-items test: runs-on: ubuntu-latest @@ -75,13 +78,27 @@ jobs: components: llvm-tools-preview - name: Setup Rust cache uses: swatinem/rust-cache@v2 + - name: Setup vcan + id: vcan + run: | + # Ubuntu 24.04 restricts unprivileged user namespaces via AppArmor. + # The vcan-fixture crate needs user namespaces to create isolated vcan + # interfaces without root. + sudo sysctl -w kernel.apparmor_restrict_unprivileged_userns=0 2>/dev/null || true + sudo apt-get install -y linux-modules-extra-"$(uname -r)" && \ + sudo modprobe vcan && \ + echo "available=true" >> "$GITHUB_OUTPUT" || true - name: Setup nextest uses: taiki-e/install-action@v2 with: tool: cargo-nextest,cargo-llvm-cov - name: Test + run: cargo llvm-cov --no-report nextest --all-features --no-tests=warn + - name: Test (vcan) + if: steps.vcan.outputs.available == 'true' + run: cargo llvm-cov --no-report nextest --all-features --run-ignored ignored-only + - name: Coverage report run: | - cargo llvm-cov --no-report nextest --all-features --no-tests=warn cargo llvm-cov report --cobertura --output-path coverage.xml head coverage.xml RATE="$(grep -o -m 1 -P '(?<=line-rate=").*?(?=")' coverage.xml | head -1)" @@ -89,6 +106,9 @@ jobs: PERCENT="$(echo "($RATE * 100)/1" | bc)" echo "PERCENT=$PERCENT" echo "COVERAGE_PERCENT=$PERCENT" >> $GITHUB_ENV + # Run clippy twice - once with the 1.89 MSRV, and once with the latest stable toolchain + - name: Clippy + run: cargo clippy --no-deps --all-targets --all-features - name: Update coverage badge uses: schneegans/dynamic-badges-action@v1.7.0 if: github.ref_name == github.event.repository.default_branch @@ -105,3 +125,16 @@ jobs: valColorRange: ${{ env.COVERAGE_PERCENT }} minColorRange: 40 maxColorRange: 65 + + # Canary job: verifies vcan is available on the runner. Shows yellow when the + # linux-modules-extra package drifts from the runner kernel version, which means the socketcan + # tests in the test job are being silently skipped. + vcan-available: + runs-on: ubuntu-latest + continue-on-error: true + steps: + - name: Setup vcan + run: | + sudo sysctl -w kernel.apparmor_restrict_unprivileged_userns=0 2>/dev/null || true + sudo apt-get install -y linux-modules-extra-"$(uname -r)" + sudo modprobe vcan diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 3d86834..b4d9d54 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -16,9 +16,11 @@ jobs: - name: Extract Release Metadata shell: bash + env: + GH_TOKEN: ${{ secrets.CAN_UTILS_RS_RELEASE_TOKEN }} run: | VERSION="$(.github/parse-manifest-key.sh Cargo.toml version)" - DESCRIPTION="$(.github/parse-manifest-key.sh Cargo.toml description)" + DESCRIPTION="$(gh repo view --json description | jq -r .description)" echo "VERSION=$VERSION" >> "$GITHUB_ENV" echo "DESCRIPTION=$DESCRIPTION" >> "$GITHUB_ENV" diff --git a/Cargo.toml b/Cargo.toml index 3c930ae..2c07e1c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,6 +3,7 @@ resolver = "3" members = [ "candumpr", "cangenr", + "vcan-fixture", ] [workspace.package] @@ -10,3 +11,10 @@ version = "0.1.0-rc0" edition = "2024" license = "MIT" rust-version = "1.89" +description = "Opinionated rewrites of can-utils in Rust" + +[workspace.dependencies] +ctor = "0.6" +eyre = "0.6" +libc = "0.2" +neli = "0.7" diff --git a/README.md b/README.md index e16a751..6a8f652 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,9 @@ # can-utils-rs +![lint workflow](https://github.com/Notgnoshi/can-utils-rs/actions/workflows/lint.yml/badge.svg?event=push) +![release workflow](https://github.com/Notgnoshi/can-utils-rs/actions/workflows/release.yml/badge.svg?event=push) +![code coverage](https://img.shields.io/endpoint?url=https://gist.githubusercontent.com/Notgnoshi/55f3f6cae2abdc5d011d907624dfb883/raw/can-utils-rs-coverage.json) + Opinionated rewrites of can-utils in Rust ## Purpose diff --git a/candumpr/Cargo.toml b/candumpr/Cargo.toml index b67fc75..91e5bd3 100644 --- a/candumpr/Cargo.toml +++ b/candumpr/Cargo.toml @@ -6,4 +6,12 @@ license.workspace = true rust-version.workspace = true description = "Log CAN traffic from multiple networks" +[features] +ci = [] + [dependencies] + +[dev-dependencies] +ctor.workspace = true +libc.workspace = true +vcan-fixture = { path = "../vcan-fixture" } diff --git a/candumpr/tests/smoke.rs b/candumpr/tests/smoke.rs new file mode 100644 index 0000000..66230b9 --- /dev/null +++ b/candumpr/tests/smoke.rs @@ -0,0 +1,96 @@ +use std::ffi::CString; +use std::{io, mem, ptr}; + +use vcan_fixture::VcanHarness; + +#[ctor::ctor] +fn setup() { + vcan_fixture::enter_namespace(); +} + +/// Verify that we can receive CAN frames from multiple vcan interfaces. This is the basic +/// operation that candumpr needs to perform. +#[test] +#[cfg_attr(feature = "ci", ignore = "requires vcan")] +fn recv_frames_from_multiple_interfaces() { + let vcans = VcanHarness::new(2).unwrap(); + + #[repr(C)] + struct CanFrame { + can_id: u32, + len: u8, + _pad: u8, + _res0: u8, + _len8_dlc: u8, + data: [u8; 8], + } + + // Send one frame on each interface and verify we can receive it on a separate socket. + for (i, iface) in vcans.names().iter().enumerate() { + let name_c = CString::new(iface.as_str()).unwrap(); + let ifindex = unsafe { libc::if_nametoindex(name_c.as_ptr()) }; + assert!(ifindex > 0, "interface {iface} not found"); + + let mut addr: libc::sockaddr_can = unsafe { mem::zeroed() }; + addr.can_family = libc::AF_CAN as u16; + addr.can_ifindex = ifindex as i32; + + // Separate tx and rx sockets. vcan delivers frames to all sockets bound to the + // interface except the sender. + let tx = unsafe { libc::socket(libc::PF_CAN, libc::SOCK_RAW, libc::CAN_RAW) }; + let rx = unsafe { libc::socket(libc::PF_CAN, libc::SOCK_RAW, libc::CAN_RAW) }; + assert!(tx >= 0, "tx socket: {}", io::Error::last_os_error()); + assert!(rx >= 0, "rx socket: {}", io::Error::last_os_error()); + + for fd in [tx, rx] { + let ret = unsafe { + libc::bind( + fd, + ptr::from_ref(&addr).cast::(), + mem::size_of::() as u32, + ) + }; + assert_eq!(ret, 0, "bind to {iface}: {}", io::Error::last_os_error()); + } + + // Use a different source address per interface so we can verify which frame we got. + let sa = i as u8; + let frame = CanFrame { + can_id: 0x18FECA00 | (sa as u32) | libc::CAN_EFF_FLAG, + len: 3, + _pad: 0, + _res0: 0, + _len8_dlc: 0, + data: [0xAA, 0xBB, sa, 0, 0, 0, 0, 0], + }; + + let written = unsafe { + libc::write( + tx, + ptr::from_ref(&frame).cast::(), + mem::size_of::(), + ) + }; + assert_eq!(written as usize, mem::size_of::()); + + let mut recv_frame: CanFrame = unsafe { mem::zeroed() }; + let read = unsafe { + libc::read( + rx, + ptr::from_mut(&mut recv_frame).cast::(), + mem::size_of::(), + ) + }; + assert_eq!(read as usize, mem::size_of::()); + assert_eq!( + recv_frame.can_id, + 0x18FECA00 | (sa as u32) | libc::CAN_EFF_FLAG + ); + assert_eq!(&recv_frame.data[..3], &[0xAA, 0xBB, sa]); + + unsafe { + libc::close(tx); + libc::close(rx); + } + } +} diff --git a/docs/candumpr-configuration.md b/docs/candumpr-configuration.md new file mode 100644 index 0000000..316d409 --- /dev/null +++ b/docs/candumpr-configuration.md @@ -0,0 +1,13 @@ +# candumpr configuration + +**TODO** + +This document describes the TOML configuration file format for candumpr, covering: + +* File structure: top-level settings, `[defaults]`, and `[interfaces.]` sections +* All available configuration keys and their types, defaults, and valid values +* Precedence rules: CLI flags, per-interface overrides, defaults, built-in defaults +* List-valued options: replace (not merge) semantics +* Interface discovery: CLI and TOML union, `any` constraints +* Validation rules and error conditions +* Example configurations for common deployment scenarios diff --git a/docs/design/01-candumpr-ux.md b/docs/design/01-candumpr-ux.md new file mode 100644 index 0000000..db1e937 --- /dev/null +++ b/docs/design/01-candumpr-ux.md @@ -0,0 +1,386 @@ +# candumpr UX + +## Status + +**DRAFT** + +## Scope + +This document defines the user-facing features, CLI interface, and configuration file format for +candumpr, a CAN bus logging tool. It does not cover internal implementation details. + +candumpr is an opinionated replacement for can-utils `candump`, focused on J1939 networks. It +prioritizes performance and multi-network support at the cost of broader CAN compatibility. + +A primary design goal is lossless capture: candumpr should never drop a CAN frame under normal +operating conditions, including during log file rotation. Every frame that the kernel delivers to +the socket should appear in the output. + +An additional convenience is to optionally send a J1939 address claim PGN request to ensure that the +CAN logs include address claims for every control function near the beginning of every log. + +## Features + +### Frame support + +* Only supports CAN with 29-bit extended (J1939) identifiers. +* CAN FD and CAN XL are not supported. +* Error frames are supported and logged alongside data frames. + +### Multi-interface logging + +* Supports logging from an arbitrary number of CAN interfaces simultaneously. +* Each interface can be independently configured with its own filters and settings. +* Interfaces can be specified on the CLI, in a TOML config file, or both. + +### Filtering + +Two filtering mechanisms are supported. Both can be used together. + +**candump-compatible mask filters** are specified per-interface using the same syntax as candump: + +* `id:mask` -- positive match (accept when `received_id & mask == id & mask`) +* `id~mask` -- inverse match (accept when `received_id & mask != id & mask`) +* `#error_mask` -- error frame class filter (see `linux/can/error.h`) + +All values are hexadecimal. Multiple filters are comma-separated after the interface name. Appending +`j` or `J` to the filter list switches that interface from OR to AND semantics (same as candump). + +**Convenience filters** provide a more ergonomic way to filter J1939 traffic. These are specified in +the TOML config file: + +* Filter by PGN (Parameter Group Number) +* Filter by source address +* Future work: filter by ISONAME + mask +* Toggle error frame capture on or off + +Convenience filters are compiled to socket-level `id:mask` filters internally. + +When no filters are specified, all traffic is accepted. + +#### Filter combination semantics + +When multiple filters are specified on the same interface (whether candump-style masks, convenience +filters, or both), they are combined with OR by default: a frame is accepted if it matches any +filter. + +To switch to AND semantics (a frame must match all filters): + +* On the CLI, append `j` to the candump-style filter list (e.g., `can0,...,j`) +* In the TOML config, set `filter_join = "and"` on the interface or in `[defaults]` + +Both map to the `CAN_RAW_JOIN_FILTERS` socket option. + +### Output formats + +candumpr supports multiple output formats, configurable per-interface: + +* **candump** (`.log`) -- default -- the can-utils `candump -L` log file format: + `(1345212884.318850) can0 18FECA00#0011223344556677` +* **candump-tty** (`.log`) -- the can-utils `candump` console format: + `can0 18FECA00 [8] 00 11 22 33 44 55 66 77` +* **ASC** (`.asc`) -- Vector ASCII logging format, compatible with CANalyzer/CANoe and other tools + that import ASC files. +* **PCAP** (`.pcap`) -- packet capture format, compatible with Wireshark and tcpdump. + +When compressed, an additional `.zst` suffix is appended (e.g., `.log.zst`, `.asc.zst`). + +### Timestamps + +Timestamp mode controls how frame timestamps are displayed in candump and candump-tty output +formats. ASC and PCAP use their native timestamp conventions and ignore this setting. + +* **absolute** -- seconds since epoch with fractional seconds +* **delta** -- time elapsed since the previous received frame +* **zero** -- time elapsed since the first received frame + +Hardware timestamps from the CAN controller are used automatically when available, falling back to +kernel software timestamps with a diagnostic warning. This requires no configuration. + +### Clock correctness + +candumpr is designed to start early in the boot cycle on IoT devices that may lack a persistent RTC. +On these devices, `CLOCK_REALTIME` can be invalid (near epoch) until NTP or another time source +synchronizes it. + +candumpr will provide options to control how it detects an invalid clock and what it does with +frames captured before the clock becomes valid. Detection methods include a heuristic (is the clock +before a reasonable threshold?) and waiting for a clock step event. Behaviors may include dropping +frames, queueing them in memory, using zero-based timestamps, inserting a marker, or rotating the +log file when the clock becomes valid. The available behaviors may depend on the output format. + +One strategy for clock correctness is to give each log file a strictly monotonic incrementing index. +Then at least you can tell the order of the files. candumpr should also attempt to detect and log +clock jumps to stderr so that they're less surprising if you have to reverse engineer what the clock +did by looking at strictly just the logs. + +This feature requires dedicated detailed design and is not fully specified here. + +### File logging and rotation + +When logging to files, each monitored interface writes to its own log file. This applies even when +using the `any` interface binding; frames are separated by their source interface, and `{interface}` +resolves to the actual interface name (e.g., `can0`), not `any`. + +* Log filenames are controlled by a format string with placeholders: + * `{interface}` -- the source interface name (e.g., `can0`) + * `{start-unix}` -- Unix seconds when the log file was opened (e.g., `1741868400`) + * `{start-iso}` -- ISO 8601 timestamp when the log file was opened, without colons (e.g., + `2026-03-13T120000Z`), since colons break rsync and some filesystems. + * Default format: `candumpr-{interface}-{start-unix}` (plus the appropriate file extension). +* The log directory path supports the same `{interface}` placeholder, allowing per-interface + directory organization (e.g., `/var/log/candumpr/{interface}/`). +* If the resolved file path (directory + name + extension) would be identical for two or more + interfaces, candumpr exits with a configuration error. Disambiguation can be achieved by including + `{interface}` in the filename or directory path, or by setting different `log_dir` values + per-interface. +* File rotation can be triggered by: + * A time interval (e.g., `1h`, `30m`) + * A file size threshold (e.g., `50MB`, `1GB`) + * The value is unambiguous: size units (`B`, `KB`, `MB`, `GB`) and time units (`s`, `m`, `h`, `d`) + do not overlap. Bare integers without a unit suffix are rejected. + * SIGHUP is always available for manual rotation regardless of the configured method. +* During rotation, no frames are lost. Buffered frames are flushed to the old file before the new + file begins. +* Completed log files are never partially written. Files are written to a temporary name and renamed + atomically on completion. +* ZSTD streaming compression is optionally applied during writing. +* Buffered output is flushed to disk periodically (configurable interval) to limit data loss on + unexpected power loss or crash. + +When not logging to files, output goes to stdout. + +### Log retention + +When logging to files, candumpr can automatically prune old log files to prevent unbounded disk +usage. + +* **max_total_size** -- maximum total size of all completed log files across all interfaces (e.g., + `10GB`). When exceeded, the oldest completed log files are deleted regardless of which interface + produced them. Retention is checked after each log rotation. + +### J1939 address claim + +On startup and after each log rotation, candumpr can optionally broadcast a J1939 Address Claim PGN +request. This causes all devices on the bus to re-announce their addresses, ensuring each log file +contains a complete picture of which source addresses are in use. + +### Statistics + +Per-interface statistics counters are maintained and can be reported: + +* Frame count (total and per-second) +* Byte count and estimated bitrate +* Dropped frame count (frames lost due to socket buffer overflow) + +Dropped frame monitoring is always enabled. + +### Socket configuration + +* The socket receive buffer size can be configured per-interface. The tool attempts `SO_RCVBUFFORCE` + first (requires `CAP_NET_ADMIN`) and falls back to `SO_RCVBUF`. + +### Device resilience + +* If a monitored CAN interface goes down, candumpr continues running and resumes logging when the + interface comes back up. This is the default and only behavior (unlike candump, which exits by + default). + +### Signal handling + +* **SIGHUP** -- trigger log file rotation +* **SIGTERM / SIGINT** -- graceful shutdown (flush buffers, finalize current log file) + +### Diagnostic logging + +Operational events are logged to stderr via `tracing`: + +* Dropped frames (socket buffer overflow) +* Bus-off state changes and recovery +* Network interface up/down events +* Startup and shutdown status +* Log file rotation events + +This keeps CAN data output (stdout or log files) clean, while ensuring operational issues are +visible. The log level can be set via `--log-level` on the CLI, `log_level` in the TOML config, or +the `CANDUMPR_LOG` environment variable (in `EnvFilter` format). The environment variable takes +precedence when set. + +### Display options (stdout only) + +When outputting to a TTY: + +* Color mode (`--color`): + * `never` -- no color or styling + * `network` -- each interface gets a distinct color applied to the entire line, to visually + distinguish traffic from different networks + * `highlight` -- use color and weight to improve readability: the interface name and timestamp are + colored, and data bytes alternate between bold and normal weight to make it easier to visually + parse byte boundaries +* TX/RX direction is always shown for each frame. + +## CLI interface + +``` +candumpr [OPTIONS] [INTERFACE[,FILTER]...] +``` + +### Positional arguments + +Interfaces are specified as positional arguments, optionally followed by comma-separated +candump-compatible filters. The special name `any` receives from all CAN interfaces (same as +candump): + +```sh +# Listen on all CAN interfaces that are up +candumpr any + +# No filters (accept all traffic on both interfaces) +candumpr can0 can1 + +# candump-compatible mask filters +candumpr can0,18FECA00:1FFFFFFF can1,18FEE500:1FFFFFFF + +# Inverse match +candumpr can0,18FECA00~1FFFFFFF + +# Error frame filter +candumpr can0,#FFFFFFFF + +# Join filters with AND semantics (must match all) +candumpr can0,18FECA00:1FFF0000,00000017:000000FF,j +``` + +### Options + +#### Configuration + +| Flag | Description | +| --------------------- | -------------------------------------------- | +| `-C, --config ` | Path to a TOML configuration file | +| `--log-level ` | Diagnostic log level (e.g., `info`, `debug`) | + +CLI flags apply globally to every interface. Per-interface configuration, filtering, file logging +options (directory, naming, rotation, compression, retention), and socket tuning require a TOML +config file. Interfaces specified on the CLI are merged with interfaces in the config file. + +#### Output format + +| Flag | Description | +| ------------------------ | ------------------------------------------------------------------ | +| `-f, --format ` | Output format: `candump`, `candump-tty`, `asc`, `pcap` | +| `-t, --timestamp ` | Timestamp mode: `absolute`, `delta`, `zero` (candump formats only) | +| `-c, --color ` | Color mode: `never`, `network`, `highlight` | + +#### File logging + +| Flag | Description | +| ---- | ------------------------------------------------------- | +| `-l` | Log to files in the current directory (default: stdout) | + +#### J1939 + +| Flag | Description | +| ----------------------- | -------------------------------------------------------- | +| `-A`, `--address-claim` | Send address claim request on startup and after rotation | + +#### Termination + +| Flag | Description | +| -------------------- | -------------------------------------------------------- | +| `-n, --count ` | Exit after receiving n frames | +| `-T, --timeout ` | Exit if no frames received within this many milliseconds | + +## TOML configuration file + +The `[defaults]` section provides default values for all interface settings. Individual +`[interfaces.]` sections can override any default. All fields are optional at every level. + +```toml +log_level = "info" # diagnostic log level for stderr output +# All logs together must stay below this limit +max_total_size = "10GB" + +[defaults] +# Output +format = "candump" # "candump" | "candump-tty" | "asc" | "pcap" +timestamp = "absolute" # "absolute" | "delta" | "zero" (candump formats only) +color = "highlight" # "never" | "network" | "highlight" + +# File logging +log_dir = "/var/log/candumpr" # supports {interface} placeholder +log_name = "candumpr-{interface}-{start-unix}" # placeholders: {interface}, {start-unix}, {start-iso} +rotate = "1h" # time or size based rotation +compress = "none" # "zstd" | "none" +zstd_level = 3 +flush_interval = "5s" + +# Filtering +error_frames = true +pgns = [] +source_addresses = [] +filter_join = "or" # "and" | "or" + +# Socket +recv_buffer = "2MB" + +# J1939 +address_claim = true + +# --- Per-interface overrides --- + +# Inherits all [defaults], overrides nothing: +[interfaces.can0] +# Overrides specific settings: +[interfaces.can1] +error_frames = false +pgns = [0xFECA, 0xFEE5] + +[interfaces.can2] +address_claim = false +source_addresses = [0x00, 0x17] +log_dir = "/var/log/candumpr/can2" + +# candump-compatible raw filters: +[interfaces.can3] +filters = ["18FECA00:1FFFFFFF", "18FEE500~1FFFF00"] + +# AND semantics for all filters on this interface: +[interfaces.can4] +pgns = [0xFECA] +source_addresses = [0x17] +filter_join = "and" +``` + +### Precedence + +Settings are resolved in this order, highest priority first: + +1. CLI flags +2. TOML `[interfaces.]` +3. TOML `[defaults]` +4. Built-in defaults + +For settings available on the CLI, CLI flags apply globally and override all other sources, +including per-interface TOML settings. For example, `--format pcap` forces that format on every +interface. Most settings are only available through the TOML config file. + +List-valued options (`pgns`, `source_addresses`, `filters`) are replaced wholesale at each +precedence level, not merged. For example, if `[defaults]` sets `pgns = [0xFECA, 0xFEE5]` and +`[interfaces.can0]` sets `pgns = [0xFECA]`, then `can0` uses only `[0xFECA]`. + +### Interface discovery + +Interfaces to monitor are the union of: + +* Interfaces named on the CLI +* Interfaces listed in `[interfaces]` in the config file + +The special name `any` is specified on the CLI only (`candumpr any`). It binds to all CAN +interfaces, including interfaces that come up after candumpr has started. Using `any` and named +interfaces together is a configuration error, since the `any` binding would duplicate frames from +explicitly-bound interfaces. When using `any`, settings come from `[defaults]` (and CLI flags). + +Even when using `any`, log files are written per source interface (not a single combined file). + +At least one interface must be specified. diff --git a/docs/design/02-candumpr-filters.md b/docs/design/02-candumpr-filters.md new file mode 100644 index 0000000..264cafe --- /dev/null +++ b/docs/design/02-candumpr-filters.md @@ -0,0 +1,16 @@ +# candumpr filter syntax and semantics + +## Status + +**TODO** + +## Scope + +This document specifies the filter syntax and semantics for candumpr, covering: + +* candump-compatible `id:mask` and `id~mask` filter syntax +* Error frame class filters (`#error_mask`) +* Convenience filters (PGN, source address) +* How convenience filters compile to kernel-level `CAN_RAW_FILTER` entries +* Filter combination semantics (OR vs AND, `CAN_RAW_JOIN_FILTERS`) +* Interaction between candump-style and convenience filters on the same interface diff --git a/docs/design/03-candumpr-clock-correctness.md b/docs/design/03-candumpr-clock-correctness.md new file mode 100644 index 0000000..1144fb4 --- /dev/null +++ b/docs/design/03-candumpr-clock-correctness.md @@ -0,0 +1,16 @@ +# candumpr clock correctness + +## Status + +**TODO** + +## Scope + +This document specifies how candumpr handles unreliable system clocks, covering: + +* Detection of an invalid `CLOCK_REALTIME` (heuristic threshold, clock step events) +* Behavior for frames captured before the clock becomes valid +* Monotonic file indexing to preserve ordering independent of wall clock +* Clock jump detection and diagnostic logging +* Interaction with log file rotation and timestamps +* Interaction with output formats (candump, ASC, PCAP) that embed timestamps diff --git a/docs/design/04-candumpr-architecture.md b/docs/design/04-candumpr-architecture.md new file mode 100644 index 0000000..4b19a06 --- /dev/null +++ b/docs/design/04-candumpr-architecture.md @@ -0,0 +1,59 @@ +# candumpr architecture + +## Status + +**TODO** + +## Scope + +This document specifies the internal architecture of candumpr, covering the threading model, I/O +strategy, and the mechanisms used to achieve lossless capture. It does not cover user-facing +features or CLI/config design (see [01-candumpr-ux](01-candumpr-ux.md)). + +## Target environment + +A modern-ish Linux with io_uring and socketcan available. A ~4 core ~1 GHz arm64 CPU with 1 GB +memory and 4+ J1939 CAN networks. + +## Design goal: never drop a frame + +TODO: Define what "never drop" means precisely. Kernel socket buffer overflow is the primary +mechanism for frame loss. Describe the end-to-end path from kernel socket buffer to flushed bytes on +disk, and identify every point where frames could be lost or delayed. + +## Option 1: dedicated thread pairs + +One recv thread and one write thread per interface. The recv thread reads frames from the socket and +passes them to its paired write thread over a channel. The write thread handles formatting, +compression, and file I/O. + +TODO: Describe how io_uring fits in (recv side, write side, or both). Describe the channel type and +backpressure strategy. Describe how log rotation and SIGHUP are coordinated between the two threads. + +## Option 2: shared threads + +A small number of shared recv threads and shared write threads, rather than a dedicated pair per +interface. This may be a better fit for the target environment of 4 ARM cores with 4+ interfaces, +where dedicating 2 threads per interface would oversubscribe the CPU. + +TODO: Describe the multiplexing strategy (io_uring multishot recv, epoll, etc.). Describe how write +work is distributed. Describe how this interacts with per-interface file handles, rotation, and +compression state. + +## Back-of-the-napkin math + +TODO: Estimate the frame rate per interface (J1939 250 kbit/s, 8 byte payloads, 29-bit IDs). +Estimate the CPU cost per frame for recv, formatting, and write. Estimate the throughput ceiling for +each option on the target hardware. Identify whether the bottleneck is CPU, memory bandwidth, or +I/O. + +## Benchmarking strategy + +TODO: Define how to benchmark the two options against each other. Describe the test setup (vcan, +cangen, real hardware). Define the metrics to collect (frame loss, latency, CPU usage, memory +usage). Define the workload (number of interfaces, frame rate, payload size). Define the acceptance +criteria. + +## Open questions + +TODO diff --git a/docs/design/05-testing-strategy.md b/docs/design/05-testing-strategy.md new file mode 100644 index 0000000..6d2f09b --- /dev/null +++ b/docs/design/05-testing-strategy.md @@ -0,0 +1,87 @@ +# Testing strategy + +## Status + +**DRAFT** + +## Scope + +This document specifies how candumpr (and other tools in this workspace) are tested, given that they +depend on Linux socketcan interfaces that require either real hardware or elevated permissions to +create. + +## Problem + +candumpr interacts directly with CAN sockets. Testing requires CAN interfaces, but: + +* Real CAN hardware is not available in CI. +* Virtual CAN (vcan) interfaces require `CAP_NET_ADMIN` to create. +* vcan interfaces are system-global resources, so parallel tests using shared interfaces cause + interference. +* Tests must run in CI (GitHub Actions) and locally without requiring root. + +## Solution: user + network namespaces + +Each test process enters its own isolated Linux network namespace using +`unshare(CLONE_NEWUSER | CLONE_NEWNET)`. Inside the namespace, the process has `CAP_NET_ADMIN` +without real root privileges, vcan interfaces are private and isolated, and everything is cleaned up +when the process exits. See the [vcan-fixture](../../vcan-fixture/) crate for the implementation. + +Constraint: `unshare(CLONE_NEWUSER)` requires a single-threaded process. The Rust test harness is +multi-threaded, so namespace entry happens in a `ctor` constructor before `main()`. + +## Test tiers + +### Unit tests + +No sockets, no namespaces. Config parsing, filter compilation, output formatting, filename template +expansion, duration/size parsing. + +### Integration tests + +Run inside user + network namespaces with vcan interfaces. Socket binding, filter application, +multi-interface capture, file rotation, ZSTD streaming, address claim, device resilience. + +### End-to-end tests + +Run the actual binary inside a network namespace. Launch candumpr, send frames with cangenr, verify +output files, signal handling, config file loading. + +## CI + +Tests that require vcan use `#[cfg_attr(feature = "ci", ignore = "requires vcan")]`. In CI, +`--all-features` enables the `ci` feature, making them `#[ignore]`. They are then run as a separate +step gated on whether vcan setup succeeded: + +A separate canary job (`vcan-available`) with `continue-on-error: true` shows yellow when the vcan +module is unavailable on the runner, rather than silently skipping the tests. + +See [lint.yml](/.github/workflows/lint.yml) for the implementation. + +## Benchmarking + +Benchmarks compare candumpr against candump on 4 vcan interfaces with J1939 traffic. + +### Metrics + +* **Frame loss** (primary): frames sent vs. frames in output +* **Throughput ceiling**: send rate at which frames start dropping +* **CPU usage**: total CPU time (user + system) +* **Memory usage**: peak RSS + +### Simulating the target environment + +The target is a ~4 core ~1 GHz ARM CPU. Use `taskset` to pin benchmarks to 4 cores: + +```sh +taskset -c 0-3 cargo bench +``` + +Core count is the important variable for comparing architecture options (dedicated thread pairs vs. +shared threads). Clock speed matters less for relative comparison. Final validation must happen on +real target hardware. + +### Acceptance criteria + +candumpr must not drop frames at the realistic J1939 rate (2000 frames/s per interface, 8000 +frames/s aggregate). At higher rates, candumpr should drop fewer frames than candump. diff --git a/vcan-fixture/Cargo.toml b/vcan-fixture/Cargo.toml new file mode 100644 index 0000000..cf843b0 --- /dev/null +++ b/vcan-fixture/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "vcan-fixture" +version.workspace = true +edition.workspace = true +license.workspace = true +rust-version.workspace = true +description = "Build vcan interfaces in isolated network namespaces" + +[features] +ci = [] + +[dependencies] +ctor.workspace = true +eyre.workspace = true +libc.workspace = true +neli.workspace = true diff --git a/vcan-fixture/src/lib.rs b/vcan-fixture/src/lib.rs new file mode 100644 index 0000000..97655cc --- /dev/null +++ b/vcan-fixture/src/lib.rs @@ -0,0 +1,294 @@ +//! Test fixture for creating isolated vcan interfaces inside user + network namespaces. +//! +//! # Overview +//! +//! Tests that need CAN sockets require vcan interfaces, which normally need root to create. This +//! crate solves that by entering a user + network namespace via `unshare(2)`, which grants +//! `CAP_NET_ADMIN` without real root privileges. Each test process gets its own isolated namespace +//! with its own vcan interfaces. +//! +//! # Usage +//! +//! Namespace entry must happen before the test harness spawns threads. Use a `ctor` constructor: +//! +//! ```ignore +//! #[ctor::ctor] +//! fn setup() { +//! vcan_fixture::enter_namespace(); +//! } +//! +//! #[test] +//! fn my_can_test() { +//! let vcans = vcan_fixture::VcanHarness::new(2).unwrap(); +//! // vcans.names() -> ["vcan0", "vcan1"] +//! } +//! ``` +//! +//! # Prerequisites +//! +//! The `vcan` kernel module must be loaded on the host before tests run. +//! +//! ## Fedora 24+ +//! +//! ```ignore +//! sudo modprobe vcan +//! ``` +//! +//! ## Ubuntu 24.04 +//! +//! ```ignore +//! # The vcan module is in a separate package +//! sudo apt-get install -y linux-modules-extra-"$(uname -r)" +//! sudo modprobe vcan +//! +//! # Ubuntu 24.04 restricts unprivileged user namespaces via AppArmor +//! sudo sysctl -w kernel.apparmor_restrict_unprivileged_userns=0 +//! ``` +//! +//! In Ubuntu 25.10+ `linux-modules-extra` will get merged back into `linux-modules` and be +//! available by default. + +mod netlink; + +use std::sync::atomic::{AtomicBool, AtomicU32, Ordering}; +use std::{fs, io}; + +static IN_NAMESPACE: AtomicBool = AtomicBool::new(false); +static NEXT_VCAN_ID: AtomicU32 = AtomicU32::new(0); + +/// Enter a new user + network namespace. +/// +/// Must be called while the process is single-threaded. The intended call site is a `ctor` +/// constructor that runs before `main()`. +/// +/// If namespace creation fails (unsupported kernel, AppArmor restrictions, etc.), this prints +/// a diagnostic to stderr and returns without entering a namespace. Callers should check +/// [in_namespace] before attempting to create interfaces. +pub fn enter_namespace() -> bool { + // Save UID/GID before unshare, since they become unmapped (65534) in the new namespace + // until we write the mappings. + let uid = unsafe { libc::getuid() }; + let gid = unsafe { libc::getgid() }; + + let ret = unsafe { libc::unshare(libc::CLONE_NEWUSER | libc::CLONE_NEWNET) }; + if ret != 0 { + let err = io::Error::last_os_error(); + eprintln!("vcan-fixture: unshare(CLONE_NEWUSER | CLONE_NEWNET) failed: {err}"); + return false; + } + + if let Err(e) = write_id_mappings(uid, gid) { + eprintln!("vcan-fixture: failed to write id mappings: {e}"); + return false; + } + + IN_NAMESPACE.store(true, Ordering::Release); + true +} + +/// Returns true if the process has entered an isolated network namespace. +pub fn in_namespace() -> bool { + IN_NAMESPACE.load(Ordering::Acquire) +} + +/// Returns true if the vcan kernel module appears to be loaded on the host. +pub fn is_vcan_available() -> bool { + fs::read_to_string("/proc/modules") + .map(|s| s.lines().any(|line| line.starts_with("vcan "))) + .unwrap_or(false) +} + +/// Returns true if the test environment is fully set up: inside a namespace with the vcan +/// module loaded. +pub fn vcan_available() -> bool { + in_namespace() && is_vcan_available() +} + +/// A set of vcan interfaces created for a single test. +/// +/// Interface names use a global atomic counter for uniqueness (`vcan0`, `vcan1`, ...), so +/// parallel tests within the same process do not collide. Interfaces are deleted on drop. +pub struct VcanHarness { + names: Vec, +} + +impl VcanHarness { + /// Create `count` vcan interfaces. + /// + /// Requires [enter_namespace] to have succeeded. Each interface is created with a unique + /// name and brought up before this returns. + pub fn new(count: usize) -> eyre::Result { + let mut names = Vec::with_capacity(count); + for _ in 0..count { + let id = NEXT_VCAN_ID.fetch_add(1, Ordering::Relaxed); + let name = format!("vcan{id}"); + eprintln!("Creating link {name:?}"); + netlink::create_vcan(&name)?; + names.push(name); + } + Ok(VcanHarness { names }) + } + + /// The interface names (e.g., `["vcan0", "vcan1"]`). + pub fn names(&self) -> &[String] { + &self.names + } + + /// Bring an interface up by name. + pub fn set_up(&self, name: &str) -> eyre::Result<()> { + eprintln!("Setting link {name:?} up"); + netlink::set_link_up(name) + } + + /// Bring an interface down by name. + pub fn set_down(&self, name: &str) -> eyre::Result<()> { + eprintln!("Setting link {name:?} down"); + netlink::set_link_down(name) + } +} + +impl Drop for VcanHarness { + fn drop(&mut self) { + for name in &self.names { + if let Err(e) = netlink::delete_vcan(name) { + eprintln!("Failed to delete vcan {name:?}: {e}"); + } + } + } +} + +/// Map UID/GID 0 inside the namespace to the real UID/GID outside. +fn write_id_mappings(uid: u32, gid: u32) -> eyre::Result<()> { + // Must deny setgroups before writing gid_map as an unprivileged user (since Linux 3.19). + fs::write("/proc/self/setgroups", "deny")?; + fs::write("/proc/self/uid_map", format!("0 {uid} 1"))?; + fs::write("/proc/self/gid_map", format!("0 {gid} 1"))?; + Ok(()) +} + +#[cfg(test)] +mod tests { + use std::ffi::CString; + use std::{mem, ptr}; + + use super::*; + + #[ctor::ctor] + fn setup() { + enter_namespace(); + } + + #[test] + #[cfg_attr(feature = "ci", ignore = "requires vcan")] + fn create_vcan_in_namespace() { + let vcans = VcanHarness::new(2).unwrap(); + assert_eq!(vcans.names().len(), 2); + + for iface in vcans.names() { + let name_c = CString::new(iface.as_str()).unwrap(); + let ifindex = unsafe { libc::if_nametoindex(name_c.as_ptr()) }; + assert!(ifindex > 0, "interface {iface} not found"); + + // Verify we can open and bind a CAN socket. + let fd = unsafe { + libc::socket( + libc::PF_CAN, + libc::SOCK_RAW | libc::SOCK_CLOEXEC, + libc::CAN_RAW, + ) + }; + assert!(fd >= 0, "socket: {}", io::Error::last_os_error()); + + let mut addr: libc::sockaddr_can = unsafe { mem::zeroed() }; + addr.can_family = libc::AF_CAN as u16; + addr.can_ifindex = ifindex as i32; + let ret = unsafe { + libc::bind( + fd, + ptr::from_ref(&addr).cast::(), + mem::size_of::() as u32, + ) + }; + assert_eq!(ret, 0, "bind to {iface}: {}", io::Error::last_os_error()); + unsafe { libc::close(fd) }; + } + } + + #[test] + #[cfg_attr(feature = "ci", ignore = "requires vcan")] + fn send_and_recv_frame() { + let vcans = VcanHarness::new(1).unwrap(); + let iface = &vcans.names()[0]; + let name_c = CString::new(iface.as_str()).unwrap(); + let ifindex = unsafe { libc::if_nametoindex(name_c.as_ptr()) }; + assert!(ifindex > 0); + + let tx = unsafe { libc::socket(libc::PF_CAN, libc::SOCK_RAW, libc::CAN_RAW) }; + let rx = unsafe { libc::socket(libc::PF_CAN, libc::SOCK_RAW, libc::CAN_RAW) }; + assert!(tx >= 0); + assert!(rx >= 0); + + let mut addr: libc::sockaddr_can = unsafe { mem::zeroed() }; + addr.can_family = libc::AF_CAN as u16; + addr.can_ifindex = ifindex as i32; + for fd in [tx, rx] { + let ret = unsafe { + libc::bind( + fd, + ptr::from_ref(&addr).cast::(), + mem::size_of::() as u32, + ) + }; + assert_eq!(ret, 0); + } + + #[repr(C)] + struct CanFrame { + can_id: u32, + len: u8, + __pad: u8, + __res0: u8, + __len8_dlc: u8, + data: [u8; 8], + } + + let frame = CanFrame { + can_id: 0x18FECA00 | libc::CAN_EFF_FLAG, + len: 8, + __pad: 0, + __res0: 0, + __len8_dlc: 0, + data: [0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77], + }; + + let written = unsafe { + libc::write( + tx, + ptr::from_ref(&frame).cast::(), + mem::size_of::(), + ) + }; + assert_eq!(written as usize, mem::size_of::()); + + let mut recv_frame: CanFrame = unsafe { mem::zeroed() }; + let read = unsafe { + libc::read( + rx, + ptr::from_mut(&mut recv_frame).cast::(), + mem::size_of::(), + ) + }; + assert_eq!(read as usize, mem::size_of::()); + assert_eq!(recv_frame.can_id, 0x18FECA00 | libc::CAN_EFF_FLAG); + assert_eq!(recv_frame.len, 8); + assert_eq!( + recv_frame.data, + [0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77] + ); + + unsafe { + libc::close(tx); + libc::close(rx); + } + } +} diff --git a/vcan-fixture/src/netlink.rs b/vcan-fixture/src/netlink.rs new file mode 100644 index 0000000..fcd9841 --- /dev/null +++ b/vcan-fixture/src/netlink.rs @@ -0,0 +1,115 @@ +//! Netlink helpers for managing vcan interfaces using the `neli` crate. + +use std::ffi::CString; + +use eyre::{WrapErr, bail}; +use neli::consts::nl::NlmF; +use neli::consts::rtnl::{Ifla, IflaInfo, RtAddrFamily, Rtm}; +use neli::consts::socket::NlFamily; +use neli::nl::NlPayload; +use neli::router::synchronous::NlRouter; +use neli::rtnl::{Ifinfomsg, IfinfomsgBuilder, RtattrBuilder}; +use neli::types::RtBuffer; +use neli::utils::Groups; + +/// Create a vcan interface and bring it up. +pub fn create_vcan(name: &str) -> eyre::Result<()> { + create_link(name).wrap_err_with(|| format!("creating vcan interface {name:?}"))?; + set_link_up(name).wrap_err_with(|| format!("bringing up interface {name:?}")) +} + +/// Delete a vcan interface. Silently succeeds if the interface is already gone. +pub fn delete_vcan(name: &str) -> eyre::Result<()> { + let name_c = CString::new(name)?; + let index = unsafe { libc::if_nametoindex(name_c.as_ptr()) }; + if index == 0 { + return Ok(()); + } + + let msg = IfinfomsgBuilder::default() + .ifi_family(RtAddrFamily::Unspecified) + .ifi_index(index as i32) + .build() + .map_err(|e| eyre::eyre!("{e}"))?; + + send(Rtm::Dellink, NlmF::ACK, msg).wrap_err_with(|| format!("deleting interface {name:?}")) +} + +fn create_link(name: &str) -> eyre::Result<()> { + let ifname_attr = RtattrBuilder::default() + .rta_type(Ifla::Ifname) + .rta_payload(name) + .build() + .map_err(|e| eyre::eyre!("{e}"))?; + + let kind_attr = RtattrBuilder::default() + .rta_type(IflaInfo::Kind) + .rta_payload("vcan") + .build() + .map_err(|e| eyre::eyre!("{e}"))?; + let linkinfo_attr = RtattrBuilder::default() + .rta_type(Ifla::Linkinfo) + .rta_payload(Vec::::new()) + .build() + .map_err(|e| eyre::eyre!("{e}"))? + .nest(&kind_attr)?; + + let mut attrs = RtBuffer::new(); + attrs.push(ifname_attr); + attrs.push(linkinfo_attr); + + let msg = IfinfomsgBuilder::default() + .ifi_family(RtAddrFamily::Unspecified) + .rtattrs(attrs) + .build() + .map_err(|e| eyre::eyre!("{e}"))?; + + send(Rtm::Newlink, NlmF::CREATE | NlmF::EXCL | NlmF::ACK, msg) +} + +/// Bring a network interface up. +pub fn set_link_up(name: &str) -> eyre::Result<()> { + let index = resolve_ifindex(name)?; + let msg = IfinfomsgBuilder::default() + .ifi_family(RtAddrFamily::Unspecified) + .ifi_index(index) + .up() + .build() + .map_err(|e| eyre::eyre!("{e}"))?; + send(Rtm::Newlink, NlmF::ACK, msg) +} + +/// Bring a network interface down. +pub fn set_link_down(name: &str) -> eyre::Result<()> { + let index = resolve_ifindex(name)?; + let msg = IfinfomsgBuilder::default() + .ifi_family(RtAddrFamily::Unspecified) + .ifi_index(index) + .down() + .build() + .map_err(|e| eyre::eyre!("{e}"))?; + send(Rtm::Newlink, NlmF::ACK, msg) +} + +fn resolve_ifindex(name: &str) -> eyre::Result { + let name_c = CString::new(name)?; + let index = unsafe { libc::if_nametoindex(name_c.as_ptr()) }; + if index == 0 { + bail!("interface not found: {name:?}"); + } + Ok(index as i32) +} + +/// Send a netlink message and drain the response. +fn send(msg_type: Rtm, flags: NlmF, msg: Ifinfomsg) -> eyre::Result<()> { + let (rtnl, _) = NlRouter::connect(NlFamily::Route, None, Groups::empty())?; + let recv: Vec<_> = rtnl + .send::<_, _, Rtm, Ifinfomsg>(msg_type, flags, NlPayload::Payload(msg))? + .collect(); + // NlRouter returns errors as Err items in the iterator. Collecting drains them, and the + // send() call itself returns Err if the kernel responds with a netlink error. + for res in recv { + res?; + } + Ok(()) +}