diff --git a/.cargo/audit.toml b/.cargo/audit.toml index 1ddc9033595..c922c98cbf8 100644 --- a/.cargo/audit.toml +++ b/.cargo/audit.toml @@ -1,3 +1,5 @@ [advisories] -# TODO Remove it from here -ignore = [ "RUSTSEC-2020-0071"] # advisory IDs to ignore e.g. ["RUSTSEC-2019-0001", ...] +# Advisory IDs to ignore, e.g. ["RUSTSEC-2019-0001", ...]. Each entry +# must point at a live advisory in the resolved graph and carry a dated +# rationale; an entry matching nothing trains reviewers to skim the list. +ignore = [] diff --git a/.gitignore b/.gitignore index 983cad56c38..b243acea36e 100644 --- a/.gitignore +++ b/.gitignore @@ -103,3 +103,6 @@ __pycache__/ # Security audit reports (local-only, not committed) audits/ + +# Review scratch (grumpy-review / triage output, local-only) +.review-*/ diff --git a/Cargo.lock b/Cargo.lock index e296c3aebdb..888b657ccc8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -179,6 +179,7 @@ dependencies = [ "blake2", "cpufeatures 0.2.17", "password-hash", + "zeroize", ] [[package]] @@ -1617,8 +1618,10 @@ dependencies = [ name = "dash-async" version = "4.0.0-rc.2" dependencies = [ + "futures", "thiserror 2.0.18", "tokio", + "tokio-util", "tracing", ] @@ -5141,6 +5144,7 @@ dependencies = [ "async-trait", "bimap", "bs58", + "dash-async", "dash-sdk", "dash-spv", "dashcore", @@ -5228,6 +5232,7 @@ dependencies = [ "refinery", "region", "rusqlite", + "schemars 1.2.1", "serde", "serde_json", "serial_test", diff --git a/packages/rs-dash-async/Cargo.toml b/packages/rs-dash-async/Cargo.toml index 26e2c8fdeb9..a567cc60ae5 100644 --- a/packages/rs-dash-async/Cargo.toml +++ b/packages/rs-dash-async/Cargo.toml @@ -7,12 +7,20 @@ authors = ["Dash Core Team"] license = "MIT" description = "Async-sync bridging utilities for Dash Platform" +[features] +# Exposes cross-crate test seams (e.g. `ThreadRegistry::park_orphan_for_test`) +# so downstream crates can drive registry regression tests without shipping +# the seam in their production builds. +test-util = [] + [dependencies] thiserror = "2.0" tracing = "0.1.41" [target.'cfg(not(target_arch = "wasm32"))'.dependencies] tokio = { version = "1.40", features = ["rt", "rt-multi-thread", "time", "net"] } +tokio-util = { version = "0.7.12" } +futures = { version = "0.3.30" } [dev-dependencies] -tokio = { version = "1.40", features = ["macros", "rt-multi-thread", "sync"] } +tokio = { version = "1.40", features = ["macros", "rt-multi-thread", "sync", "time"] } diff --git a/packages/rs-dash-async/src/atomic.rs b/packages/rs-dash-async/src/atomic.rs new file mode 100644 index 00000000000..5a98ba7d7f3 --- /dev/null +++ b/packages/rs-dash-async/src/atomic.rs @@ -0,0 +1,138 @@ +use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; + +/// RAII guard that clears an [`AtomicBool`] flag to `false` on drop. +/// +/// Callers set the flag to `true` before constructing the guard (typically +/// via a `compare_exchange`); the guard resets it on every exit path, +/// including panics, so a panicked holder can never leave the flag wedged. +/// +/// **Panic-strategy caveat:** the clear-on-panic guarantee relies on +/// destructors running while the stack unwinds, so it holds under +/// `panic = "unwind"` (the default). Under `panic = "abort"` — e.g. the +/// iOS release profiles — a panic aborts the process immediately and no +/// `Drop` runs; there is simply no "after" left for the flag to gate. +/// When the binary is built with `panic = "abort"`, constructing a +/// [`ThreadRegistry`](crate::ThreadRegistry) emits a one-shot +/// `tracing::warn!` so operators can audit the risk. +#[must_use = "AtomicFlagGuard clears the flag on drop; binding to `_` or using as a statement drops it immediately"] +pub struct AtomicFlagGuard<'a>(&'a AtomicBool); + +impl<'a> AtomicFlagGuard<'a> { + /// Wrap `flag`. Does **not** set it to `true` — the caller is + /// responsible for doing that before constructing the guard. + pub fn new(flag: &'a AtomicBool) -> Self { + Self(flag) + } +} + +impl Drop for AtomicFlagGuard<'_> { + fn drop(&mut self) { + self.0.store(false, Ordering::Release); + } +} + +/// RAII guard that refcounts a "raised" flag held in an [`AtomicUsize`]. +/// Construction increments; Drop decrements. The flag is "raised" while +/// the count is > 0. Composes safely: multiple holders may raise the gate +/// independently, and Drop never lowers it past another holder's contribution. +/// +/// Where [`AtomicFlagGuard`] is correct only when one party owns the flag, +/// this guard is the analog of the registry's `ClearingGuard` refcount, +/// for cases where two coordinated teardown paths (a public `quiesce()` +/// and an inner-flow `hold_quiescing_gate`) must compose without one +/// path's Drop lowering the other path's barrier. +#[must_use = "RefcountedFlagGuard decrements the count on drop; binding to `_` or using as a statement drops it immediately"] +pub struct RefcountedFlagGuard<'a>(&'a AtomicUsize); + +impl<'a> RefcountedFlagGuard<'a> { + /// Increment the refcount; the flag is observed "raised" while > 0. + pub fn raise(counter: &'a AtomicUsize) -> Self { + // SeqCst: composes into the same handshake `begin_pass` reads the + // gate under; see the wallet-side `quiescing` doc. + counter.fetch_add(1, Ordering::SeqCst); + Self(counter) + } +} + +impl Drop for RefcountedFlagGuard<'_> { + fn drop(&mut self) { + self.0.fetch_sub(1, Ordering::SeqCst); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::panic::{catch_unwind, AssertUnwindSafe}; + + /// A guard constructed over a `true` flag holds it while in scope and + /// clears it to `false` on a normal scope exit. + #[test] + fn clears_flag_on_normal_drop() { + let flag = AtomicBool::new(true); + { + let _guard = AtomicFlagGuard::new(&flag); + assert!(flag.load(Ordering::Acquire), "flag stays set while held"); + } + assert!(!flag.load(Ordering::Acquire), "flag cleared on drop"); + } + + /// The clear also runs while unwinding a panic — the load-bearing + /// property the sync coordinators lean on so a panicked pass can't + /// leave `is_syncing` latched and wedge `quiesce()`'s drain. + #[test] + fn clears_flag_while_unwinding_panic() { + let flag = AtomicBool::new(true); + let result = catch_unwind(AssertUnwindSafe(|| { + let _guard = AtomicFlagGuard::new(&flag); + panic!("boom while holding the guard"); + })); + assert!(result.is_err(), "the panic propagated out of catch_unwind"); + assert!( + !flag.load(Ordering::Acquire), + "Drop ran during unwinding and cleared the flag" + ); + } + + /// Two holders compose: raising twice yields count 2; dropping one + /// leaves the gate raised at 1 (still observed > 0); dropping the + /// second returns to 0. Mirrors the production composition where a + /// public `quiesce()` and an inner-flow `hold_quiescing_gate` both + /// raise the same gate independently. + #[test] + fn composes_holders() { + let counter = AtomicUsize::new(0); + let g1 = RefcountedFlagGuard::raise(&counter); + assert_eq!(counter.load(Ordering::Acquire), 1); + let g2 = RefcountedFlagGuard::raise(&counter); + assert_eq!(counter.load(Ordering::Acquire), 2); + drop(g1); + assert_eq!( + counter.load(Ordering::Acquire), + 1, + "dropping one holder must not lower the gate past the surviving holder's contribution" + ); + drop(g2); + assert_eq!(counter.load(Ordering::Acquire), 0); + } + + /// The decrement also runs while unwinding a panic, so a panicked + /// holder cannot leave the refcount permanently inflated. + #[test] + fn decrements_while_unwinding_panic() { + let counter = AtomicUsize::new(0); + let _outer = RefcountedFlagGuard::raise(&counter); + assert_eq!(counter.load(Ordering::Acquire), 1); + let result = catch_unwind(AssertUnwindSafe(|| { + let _guard = RefcountedFlagGuard::raise(&counter); + assert_eq!(counter.load(Ordering::Acquire), 2); + panic!("boom while holding the refcount"); + })); + assert!(result.is_err(), "the panic propagated out of catch_unwind"); + assert_eq!( + counter.load(Ordering::Acquire), + 1, + "Drop ran during unwinding and decremented the refcount" + ); + } +} diff --git a/packages/rs-dash-async/src/lib.rs b/packages/rs-dash-async/src/lib.rs index 0ef7785253b..31977c41a53 100644 --- a/packages/rs-dash-async/src/lib.rs +++ b/packages/rs-dash-async/src/lib.rs @@ -2,7 +2,20 @@ //! //! Provides [`block_on`] -- a function that bridges async futures into sync code, //! handling multiple tokio runtime flavors (no runtime, current-thread, multi-thread). +//! +//! Also provides [`AtomicFlagGuard`] — a RAII guard for panic-safe `AtomicBool` flag resets, +//! and [`ThreadRegistry`] — a shared lifecycle engine for background OS-thread / tokio-task +//! workers (start, cancel, weight-ordered quiesce + join, orphan reap). +mod atomic; mod block_on; +#[cfg(not(target_arch = "wasm32"))] +mod registry; +pub use atomic::{AtomicFlagGuard, RefcountedFlagGuard}; pub use block_on::{block_on, AsyncError}; +#[cfg(not(target_arch = "wasm32"))] +pub use registry::{ + ClearingGuard, DrainHook, RegistryKey, ShutdownReport, ShutdownWeight, ThreadRegistry, + WorkerConfig, WorkerStatus, DEFAULT_JOIN_BUDGET, DEFAULT_REAP_BACKSTOP, +}; diff --git a/packages/rs-dash-async/src/registry.rs b/packages/rs-dash-async/src/registry.rs new file mode 100644 index 00000000000..4bd46d7c031 --- /dev/null +++ b/packages/rs-dash-async/src/registry.rs @@ -0,0 +1,2325 @@ +//! Shared lifecycle engine for background workers (`ThreadRegistry`). +//! +//! Centralizes the dangerous 80% of a background worker's lifecycle — +//! the generation-match exit epilogue, the +//! reap-or-park of a restarted worker's prior thread, and the orphan +//! drain — into one tested place, while deliberately leaving the +//! domain-specific 20% (the "is a pass in flight?" drain barrier) to the +//! consumer as a [`DrainHook`]. +//! +//! Two worker kinds are supported: +//! - [`start_thread`](ThreadRegistry::start_thread) — a dedicated OS +//! thread, for loops that `block_on` `!Send` futures internally (the +//! `!Send` value never crosses the spawn boundary; the body itself is +//! `Send`). +//! - [`start_task`](ThreadRegistry::start_task) — a tokio task, for +//! `Send` futures. +//! +//! # Safety invariants +//! +//! - **A timed-out or dropped quiesce never detaches a live thread.** +//! Every join path takes `&self`; the live join handle stays owned by +//! the slot and is never moved into a cancellable future's frame. A +//! dropped/timed-out [`quiesce`](ThreadRegistry::quiesce) therefore +//! cannot drop-and-detach the handle — on timeout (or on an external +//! drop) the handle is deterministically re-parked into the orphan +//! list, and the slot reports [`WorkerStatus::Timeout`], never a clean +//! `NotRunning`. +//! - **A store wipe cannot race a parked prior-generation thread.** +//! Orphans live in the registry and +//! [`any_alive_for`](ThreadRegistry::any_alive_for) is the key-scoped +//! liveness gate spanning a key's live slot **and** its parked orphans +//! (with [`any_alive`](ThreadRegistry::any_alive) the registry-wide +//! variant). A store-wiping path scoped to one worker consults the +//! key-scoped gate, so a parked still-live thread blocks the wipe of its +//! own worker's store without an unrelated worker blocking it. + +use std::collections::BTreeMap; +use std::future::Future; +use std::num::NonZeroUsize; +use std::pin::Pin; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::{Arc, Mutex}; +use std::time::{Duration, Instant}; + +use futures::future::FutureExt; +use tokio::runtime::RuntimeFlavor; +use tokio_util::sync::CancellationToken; + +// --------------------------------------------------------------------- +// Key & weight +// --------------------------------------------------------------------- + +/// Worker identity. A wallet supplies a fixed enum; rs-dapi a generated +/// id. Blanket-implemented — consumers just derive the listed bounds on +/// their own key type. +pub trait RegistryKey: Copy + Ord + Eq + std::fmt::Debug + Send + Sync + 'static {} +impl RegistryKey for T {} + +/// Teardown order. Lower weights drain first; equal weights drain +/// concurrently within a tier. Default `0`. +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Default)] +pub struct ShutdownWeight(pub i32); + +// --------------------------------------------------------------------- +// Status +// --------------------------------------------------------------------- + +/// Terminal status of one worker as classified by the registry at the end +/// of [`quiesce`](ThreadRegistry::quiesce) or the orphan reap. +/// +/// Consumers may re-export this directly on their public surface; the +/// variants distinguish clean exits (`Ok`, `NotRunning`) from every +/// non-clean outcome a host UAF-safety check must observe — see +/// [`is_clean`](Self::is_clean). +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum WorkerStatus { + /// The loop exited and its thread/task joined cleanly. + Ok, + /// A tokio task ended for a non-panic, non-clean reason (cancelled / + /// aborted at the runtime level). Carries a reason when available. + /// Only the `Task` kind can produce this; an OS thread never does. + Stopped(Option), + /// The thread/task panicked; carries the best-effort panic message. + Panicked(String), + /// The managed join exceeded this worker's `join_budget`. The live + /// handle was re-parked into the orphan list — UAF-safe, non-clean. + Timeout, + /// A parked orphan was still alive after the reap grace — UAF-safe, + /// non-clean. + Detached, + /// No thread/task was running to join — never started, or already + /// joined by a prior teardown. + NotRunning, + /// Infrastructural join failure that is neither a timeout nor a + /// panic (unreachable in normal operation). + Error(String), +} + +impl WorkerStatus { + /// `true` only for a fully clean outcome: joined normally (`Ok`) or + /// never ran (`NotRunning`). + pub fn is_clean(&self) -> bool { + matches!(self, Self::Ok | Self::NotRunning) + } +} + +/// Aggregate result of [`ThreadRegistry::shutdown`]. +#[derive(Clone, Debug, PartialEq, Eq)] +#[must_use = "inspect all_clean() before freeing host callback context / dropping the runtime: a non-clean status flags a still-live worker or orphan"] +pub struct ShutdownReport { + /// Per-worker terminal status, keyed by worker id. + pub per_worker: BTreeMap, + /// Number of parked orphans still alive at the reap deadline. + pub detached: usize, + /// Aggregate terminal status from the orphan reap. Reaped orphans are + /// not keyed in `per_worker`, so without this a panicked/errored orphan + /// that finished within the reap grace (`detached == 0`) would silently + /// pass `all_clean()`. First non-clean classification wins; `Ok` when + /// every reaped orphan was clean (or none were parked). + pub orphan_status: WorkerStatus, +} + +impl ShutdownReport { + /// `true` only when every per-worker status is clean, every reaped + /// orphan was clean, and no orphan survived the reap. + pub fn all_clean(&self) -> bool { + self.detached == 0 + && self.orphan_status.is_clean() + && self.per_worker.values().all(WorkerStatus::is_clean) + } +} + +// --------------------------------------------------------------------- +// Per-worker registration options +// --------------------------------------------------------------------- + +/// Async drain hook the registry awaits **before** cancelling a worker, +/// in weight order. The domain barrier (raise a `quiescing` gate, wait +/// out an in-flight pass) lives here, supplied by the consumer — the +/// registry never owns domain semantics. +/// +/// The captured state must be `Send + Sync`; a `!Send` capture does not +/// compile as a `DrainHook`. The fence is anchored to `E0277` (unsatisfied +/// `Send` bound) so the test cannot pass vacuously on some unrelated +/// compile error: +/// +/// ```compile_fail,E0277 +/// use std::rc::Rc; +/// use std::sync::Arc; +/// use dash_async::DrainHook; +/// let rc = Rc::new(42u32); // !Send +/// let _hook: DrainHook = +/// Arc::new(move || { let r = Rc::clone(&rc); Box::pin(async move { let _ = &r; }) }); +/// ``` +pub type DrainHook = Arc Pin + Send>> + Send + Sync>; + +/// Default managed-join budget when a [`WorkerConfig`] does not override +/// it. Pinned so an accidental change surfaces in tests. +pub const DEFAULT_JOIN_BUDGET: Duration = Duration::from_secs(30); + +/// Default orphan reap backstop (start-time reap and shutdown grace). +pub const DEFAULT_REAP_BACKSTOP: Duration = Duration::from_secs(1); + +/// Threshold above which a per-worker drain hook is logged at WARN rather +/// than DEBUG by [`ThreadRegistry::quiesce`]. Not a hard timeout — the +/// caller still bounds the whole teardown — just a heuristic surface +/// for a hung drain. Sized as 1/3 of the default join budget so a drain +/// approaching the worker's join-budget ceiling is loud, while a normal +/// few-millisecond drain stays quiet. +pub const DRAIN_HOOK_WARN_THRESHOLD: Duration = Duration::from_secs(10); + +/// Per-worker registration options. +pub struct WorkerConfig { + /// Teardown tier; lower drains first, equal weights concurrently. + pub weight: ShutdownWeight, + /// Optional drain barrier awaited before cancellation. + pub drain: Option, + /// Managed-join timeout for this worker. + pub join_budget: Duration, +} + +impl Default for WorkerConfig { + fn default() -> Self { + Self { + weight: ShutdownWeight::default(), + drain: None, + join_budget: DEFAULT_JOIN_BUDGET, + } + } +} + +impl std::fmt::Debug for WorkerConfig { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + // `drain` is a boxed closure with no useful `Debug`; render its + // presence instead. + f.debug_struct("WorkerConfig") + .field("weight", &self.weight) + .field("drain", &self.drain.is_some()) + .field("join_budget", &self.join_budget) + .finish() + } +} + +// --------------------------------------------------------------------- +// Internal handle + slot state +// --------------------------------------------------------------------- + +/// A live worker's join handle. Kept owned by its slot so a cancellable +/// caller can never move it into a future frame and detach it on drop. +enum WorkerHandle { + OsThread(std::thread::JoinHandle<()>), + Task(tokio::task::JoinHandle<()>), +} + +impl WorkerHandle { + fn is_finished(&self) -> bool { + match self { + WorkerHandle::OsThread(h) => h.is_finished(), + WorkerHandle::Task(h) => h.is_finished(), + } + } + + /// Classify a **finished** handle. Kind-dispatched (R3): an OS thread + /// yields only `Ok` / `Panicked`; a task can also yield `Stopped` + /// (cancelled / aborted at the runtime level). + fn classify(self) -> WorkerStatus { + match self { + WorkerHandle::OsThread(j) => match j.join() { + Ok(()) => WorkerStatus::Ok, + Err(payload) => WorkerStatus::Panicked(panic_message(payload)), + }, + WorkerHandle::Task(j) => match j.now_or_never() { + Some(Ok(())) => WorkerStatus::Ok, + Some(Err(e)) if e.is_panic() => { + WorkerStatus::Panicked(panic_message(e.into_panic())) + } + Some(Err(e)) => WorkerStatus::Stopped(Some(e.to_string())), + // Only ever called on a finished handle, so a finished + // task is always ready; this arm is defensive. + None => WorkerStatus::Error("task handle not ready at join".to_string()), + }, + } + } +} + +/// Best-effort extraction of a panic message (`&str` / `String` cases). +fn panic_message(payload: Box) -> String { + if let Some(s) = payload.downcast_ref::<&str>() { + (*s).to_string() + } else if let Some(s) = payload.downcast_ref::() { + s.clone() + } else { + "".to_string() + } +} + +/// One key's slot. The entry is created on first start and never removed, +/// so `generation` stays monotonic across the key's whole lifetime — a +/// parked prior-generation thread can therefore always tell that its +/// generation is stale. `cancel.is_some()` is the running indicator; +/// `handle` is the join handle, reaped by the next start or by quiesce. +struct SlotState { + generation: u64, + cancel: Option, + handle: Option, + weight: ShutdownWeight, + drain: Option, + join_budget: Duration, +} + +// Manual `Default` (not `#[derive(Default)]`): the derived impl would +// initialise `join_budget` to `Duration::ZERO`, but the dormant slot must +// carry [`DEFAULT_JOIN_BUDGET`] so a key created on first-touch (via +// `BTreeMap::entry().or_default()`) is still join-bounded. +impl Default for SlotState { + fn default() -> Self { + Self { + generation: 0, + cancel: None, + handle: None, + weight: ShutdownWeight::default(), + drain: None, + join_budget: DEFAULT_JOIN_BUDGET, + } + } +} + +impl SlotState { + /// Rotate the slot onto a new generation: take the prior handle, + /// install a fresh cancellation token, bump generation, and write + /// `cfg`'s teardown config. Returns `(prior_handle, new_token, + /// new_generation)`. + fn prepare(&mut self, cfg: WorkerConfig) -> (Option, CancellationToken, u64) { + let prior = self.handle.take(); + let token = CancellationToken::new(); + self.cancel = Some(token.clone()); + self.generation += 1; + let my_gen = self.generation; + self.weight = cfg.weight; + self.drain = cfg.drain; + self.join_budget = cfg.join_budget; + (prior, token, my_gen) + } +} + +// --------------------------------------------------------------------- +// The registry +// --------------------------------------------------------------------- + +/// Shared lifecycle engine for background workers. See the module docs. +/// +/// Parked orphans carry their originating key so a store-wiping path for +/// one worker can gate on [`any_alive_for`](Self::any_alive_for) without +/// being blocked by an unrelated worker still legitimately running. +pub struct ThreadRegistry { + slots: Mutex>, + orphans: Mutex>, + reap_backstop: Duration, + /// One-way teardown latch. [`shutdown`](Self::shutdown) sets it under + /// the slot lock before snapshotting tiers; `start_thread`/`start_task` + /// honour it under the same lock and refuse to register a new worker + /// once teardown has begun, so a start racing shutdown can never leave + /// an un-joined worker behind. + closing: AtomicBool, + /// Per-key clearing latch — refcounted live-holder count per key + /// whose owner is mid clear-then-wipe (e.g. shielded + /// `clear_shielded`). `start_thread`/`start_task` refuse a (re)start + /// for any key with `count > 0`, so a fresh worker cannot slip past + /// the "no new pass" barrier and re-persist into the store the + /// clear is about to wipe. Resettable (mirror of + /// [`closing`](Self::closing) but per-key and scoped to a + /// [`ClearingGuard`]); the guard's `Drop` decrements the count and + /// removes the entry only when the count reaches zero — so two + /// concurrent / nested holders for the same key both keep the latch + /// raised until the LAST guard drops. + clearing: Mutex>, + /// Test seam: when set, the next OS-thread spawn returns an injected + /// `io::Error` instead of really spawning, so the spawn-failure + /// rollback path can be exercised deterministically. + #[cfg(test)] + force_spawn_failure: AtomicBool, +} + +impl std::fmt::Debug for ThreadRegistry { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ThreadRegistry") + .field("live_slots", &self.lock_slots().len()) + .field("orphans", &self.lock_orphans().len()) + .field("reap_backstop", &self.reap_backstop) + .field("closing", &self.closing.load(Ordering::Acquire)) + .field("clearing", &self.lock_clearing().len()) + .finish() + } +} + +/// Process-wide latch for the panic=abort startup warn. Hoisted out of the +/// constructor so the in-crate regression test can assert it tripped without +/// peeking at function-local state. +#[cfg(panic = "abort")] +static PANIC_ABORT_WARNED: std::sync::Once = std::sync::Once::new(); + +impl ThreadRegistry { + /// New registry with the default reap backstop ([`DEFAULT_REAP_BACKSTOP`]). + pub fn new() -> Arc { + Self::with_reap_backstop(DEFAULT_REAP_BACKSTOP) + } + + /// New registry with an explicit orphan reap backstop (the wallet + /// uses 1s — the same grace separates "finishing" from "wedged"). + /// + /// Under `panic = "abort"` builds (e.g. iOS release profiles) this + /// constructor emits a single startup-time `tracing::warn!` so + /// operators can audit the risk that an `EpilogueGuard` / + /// `AtomicFlagGuard` panic during teardown aborts the process before + /// `Drop` can release the orphan-liveness gate. The warn is fired at + /// most once per process via [`std::sync::Once`]. + pub fn with_reap_backstop(backstop: Duration) -> Arc { + // Stable Rust has no runtime API to query the active panic strategy, + // so the gate is compile-time. iOS release builds intentionally pick + // abort — this is observability, not a hard error. + #[cfg(panic = "abort")] + PANIC_ABORT_WARNED.call_once(|| { + tracing::warn!( + "dash-async registry built with panic=abort: an EpilogueGuard or \ + AtomicFlagGuard panic during teardown aborts the process instead \ + of unwinding, so the orphan-liveness gate may stay held — see \ + registry.rs / atomic.rs doc caveats. iOS release builds choose \ + abort intentionally; non-iOS targets should prefer panic=unwind." + ); + }); + Arc::new(Self { + slots: Mutex::new(BTreeMap::new()), + orphans: Mutex::new(Vec::new()), + reap_backstop: backstop, + closing: AtomicBool::new(false), + clearing: Mutex::new(BTreeMap::new()), + #[cfg(test)] + force_spawn_failure: AtomicBool::new(false), + }) + } + + /// Start an OS-thread worker for `!Send` loops. `body` runs on a + /// fresh `std::thread` and may build and `block_on` `!Send` futures + /// internally — the `!Send` value never crosses the spawn boundary + /// (`body` itself is `Send`). Starting a key that already has a live + /// worker is a no-op; a key whose prior thread has not been reaped is + /// reaped-or-parked first (the restart-reap path). After + /// [`shutdown`](Self::shutdown) has begun the call is also a no-op (the + /// one-way closing latch). + /// + /// **Requires a multi-thread runtime**: the worker drives its loop + /// via `Handle::block_on` and needs the shared timer/IO driver. + /// + /// **Blocks the calling thread on restart-reap**: when restarting a + /// key whose prior OS thread is still finishing, this call SPINS + /// SYNCHRONOUSLY for up to `WorkerConfig::reap_backstop` (default + /// `DEFAULT_REAP_BACKSTOP` = 1 s) waiting for the prior to exit. Do + /// not call it directly from an async context — drive it via + /// `tokio::task::spawn_blocking` or a dedicated host thread. + /// + /// # Panics + /// + /// Panics if called outside a multi-thread Tokio runtime (see + /// [`shutdown`](Self::shutdown)). It does **not** panic on thread-spawn + /// failure: a failed spawn (e.g. the OS thread-count limit) is rolled + /// back — the prior handle is re-installed rather than detached and the + /// slot returns to not-running — and the call simply does not start a + /// worker. + pub fn start_thread(self: &Arc, key: K, cfg: WorkerConfig, body: F) + where + F: FnOnce(CancellationToken) + Send + 'static, + { + Self::assert_multi_thread("start_thread"); + let prior_tid = { + let mut slots = self.lock_slots(); + // One-way teardown latch: refuse new workers once shutdown has + // begun, under the same lock shutdown snapshots tiers with. + if self.closing.load(Ordering::Acquire) { + return; + } + // Per-key clearing latch: refuse a (re)start while this key's + // owner is mid clear-then-wipe, so a fresh worker cannot slip + // past the "no new pass" barrier and re-persist into the store + // the clear is about to wipe. + if self.lock_clearing().contains_key(&key) { + return; + } + let slot = slots.entry(key).or_default(); + if slot.cancel.is_some() { + return; + } + // Snapshot the slot's pre-start config so a spawn failure can roll + // the slot back to exactly its prior state: a re-installed prior + // worker must keep its OWN teardown config, not inherit the failed + // start's weight/drain/join_budget. Generation is rolled back too — + // the bump is only ever observed under this lock and a failed start + // spawns no thread to reference it, so the rollback is net-zero and + // the externally-visible generation stays monotonic. The drain hook + // is taken here so `prepare` below can install `cfg.drain` cleanly; + // a spawn failure restores it via `slot.drain = prev_drain`. + let prev_generation = slot.generation; + let prev_weight = slot.weight; + let prev_join_budget = slot.join_budget; + let prev_drain = slot.drain.take(); + // Rotate the slot atomically: take prior handle, install fresh + // cancellation token, bump generation, and write this start's + // teardown config — all under THIS slot lock so a prior thread's + // epilogue observes the post-swap generation. + let (prior, token, my_gen) = slot.prepare(cfg); + + let reg = Arc::clone(self); + let body_token = token; + // Build the epilogue drop-guard INSIDE the worker closure, not + // here: on a spawn failure the closure is dropped while we still + // hold the slot lock, and a guard constructed out here would run + // `run_epilogue` (which re-locks `slots`) on that drop and + // deadlock. Constructing it inside means it only exists once the + // thread is actually running. A panicking `body` then still + // clears this generation's running flag via the guard's Drop + // (under `panic = "unwind"`), and the panic keeps unwinding so + // the join handle still classifies as `Panicked`. + match self.spawn_os_thread(key, move || { + let _epilogue = EpilogueGuard { reg, key, my_gen }; + body(body_token); + }) { + Ok(join) => { + // Store the new handle, then park the prior into orphans — + // both still under THIS slot lock, so `shutdown`'s + // under-lock tier snapshot can never see the new slot + // without also seeing the prior accounted (R1: store handle + // -> park prior -> drop guard -> THEN bounded reap below). + // See `park_prior_locked` for the lock-order rationale; the + // bounded join stays out of the lock in `reap_parked_prior`. + slot.handle = Some(WorkerHandle::OsThread(join)); + self.park_prior_locked(key, prior) + } + Err(e) => { + // Spawn failed (e.g. EAGAIN at the OS thread ceiling). Roll + // the slot back to exactly its pre-start state: clear the + // running flag, re-install the prior handle (never + // detached), and restore the prior teardown config + + // generation so nothing of the failed start lingers. The + // re-installed prior keeps its own weight/drain/join_budget + // for a later quiesce/shutdown, and generation returns to + // its pre-bump value (the bump was never observed outside + // this lock and spawned no thread). Nothing was parked, so + // there is no prior to reap below. + tracing::error!( + ?key, + error = %e, + "failed to spawn registry worker thread; rolling back \ + start (prior handle re-installed, not detached)" + ); + slot.cancel = None; + slot.handle = prior; + slot.generation = prev_generation; + slot.weight = prev_weight; + slot.drain = prev_drain; + slot.join_budget = prev_join_budget; + None + } + } + }; + + // The prior thread was cancellation-signalled by a preceding + // cancel(); with the slot lock released its epilogue completes + // promptly and the join lands in milliseconds — `reap_parked_prior` + // then removes it from orphans and joins it. The backstop fires only + // on a genuine wedge, in which case the still-live handle is left + // parked (not dropped) so teardown can account for it. + self.reap_parked_prior(key, prior_tid); + } + + /// Start a tokio-task worker for `Send` futures. Same restart-reap + /// semantics as [`start_thread`](Self::start_thread); does not require + /// a multi-thread runtime. + /// + // TODO(rs-dapi-adoption): runtime-flavor assert is asymmetric. + // `start_thread` and `shutdown` assert a multi-thread runtime (the + // OS-thread `block_on` needs the shared reactor), but `start_task` does + // not — a task only needs a runtime handle. So a TASK-ONLY consumer + // (rs-dapi, no `start_thread`) can register and run workers on a + // `current_thread` runtime, then panic LATE when it finally calls + // `shutdown()`. The wallet (which always uses `start_thread`) trips the + // assert at start, so it is unaffected. Fix when rs-dapi adopts the + // registry: either drop the assert from `shutdown` for all-task + // registries (track whether any OS-thread worker was ever started) or + // assert in `start_task` too and require multi-thread everywhere. + /// + /// # Panics + /// + /// Panics if called outside a Tokio runtime context (`tokio::spawn`'s + /// own precondition). After [`shutdown`](Self::shutdown) has begun the + /// call is a no-op (the one-way closing latch). + pub fn start_task(self: &Arc, key: K, cfg: WorkerConfig, body: F) + where + F: FnOnce(CancellationToken) -> Fut + Send + 'static, + Fut: Future + Send + 'static, + { + { + let mut slots = self.lock_slots(); + // One-way teardown latch — see `start_thread`. + if self.closing.load(Ordering::Acquire) { + return; + } + // Per-key clearing latch — see `start_thread`. + if self.lock_clearing().contains_key(&key) { + return; + } + let slot = slots.entry(key).or_default(); + if slot.cancel.is_some() { + return; + } + // No spawn-failure rollback here: `tokio::spawn` panics rather + // than failing, so there is no Err arm to snapshot for. + let (prior, token, my_gen) = slot.prepare(cfg); + + let reg = Arc::clone(self); + let body_token = token; + // Drop-guard epilogue, same rationale as `start_thread`: a task + // whose future panics still clears its running flag via the + // guard's Drop during unwind. + let join = tokio::spawn(async move { + let _epilogue = EpilogueGuard { reg, key, my_gen }; + body(body_token).await; + }); + slot.handle = Some(WorkerHandle::Task(join)); + // Park the prior UNDER this slot lock, same rationale as + // `start_thread`: it keeps `shutdown`'s under-lock tier snapshot + // from ever missing the prior. A task cannot be joined + // synchronously, so there is no bounded reap here — a live prior + // is parked for the async orphan reap (`reap_orphans` / + // `shutdown`) and a finished one is dropped. The returned thread + // id is unused: a task prior has none, and a (mixed-usage) + // OS-thread prior is likewise left to the async reap rather than + // spun on synchronously from this (possibly async) caller. + let _ = self.park_prior_locked(key, prior); + } + } + + /// Whether a worker is currently registered and running for `key`. + pub fn is_running(&self, key: K) -> bool { + self.lock_slots() + .get(&key) + .map(|s| s.cancel.is_some()) + .unwrap_or(false) + } + + /// Signal-only cancellation of one worker. + pub fn cancel(&self, key: K) { + if let Some(slot) = self.lock_slots().get_mut(&key) { + if let Some(token) = slot.cancel.take() { + token.cancel(); + } + } + } + + /// Signal-only cancellation of every registered worker. + pub fn cancel_all(&self) { + for slot in self.lock_slots().values_mut() { + if let Some(token) = slot.cancel.take() { + token.cancel(); + } + } + } + + /// Mark `key` as mid clear-then-wipe and refuse any + /// `start_thread`/`start_task` for it until the returned + /// [`ClearingGuard`] drops. Per-key (other keys are unaffected) and + /// resettable (subsequent clears can reacquire). The caller is + /// expected to hold the guard across the full quiesce → liveness → + /// wipe sequence, so a racing `(re)start` for the same key cannot + /// install a fresh worker that re-persists into the store mid-clear. + /// + /// **Refcounted**: two concurrent / nested holders for the same key + /// both keep the latch raised until the LAST guard drops. Each call + /// increments a per-key counter; `Drop` decrements it and removes the + /// entry only when the count reaches zero. This composes safely with + /// re-entrant or concurrent clears on the same key (e.g. a host + /// driving two FFI `shielded_clear` invocations through a read-locked + /// handle). + /// + /// Returns a guard, not a `begin/end` pair, so the latch is released + /// on every drop path — including panic unwinding — and a caller + /// cannot leak it. + pub fn hold_clearing(self: &Arc, key: K) -> ClearingGuard { + let mut clearing = self.lock_clearing(); + let next = match clearing.get(&key) { + Some(n) => n + .checked_add(1) + .expect("ClearingGuard count overflowed usize::MAX"), + None => NonZeroUsize::new(1).expect("1 is non-zero"), + }; + clearing.insert(key, next); + drop(clearing); + ClearingGuard { + reg: Arc::clone(self), + key, + } + } + + /// Whether `key` is currently held under a [`ClearingGuard`] (count + /// ≥ 1). Exposed so a coordinator can observe the latch BEFORE + /// side-effects that would otherwise leak into the clear flow (e.g. + /// lowering a continuously-held quiescing gate) even when its + /// `start_thread`/`start_task` would be refused. + pub fn is_clearing(&self, key: K) -> bool { + self.lock_clearing().contains_key(&key) + } + + /// Await this worker's drain hook, cancel it, then join within its + /// budget. The live handle is owned by the slot and is **never** moved + /// into this future's frame, so a dropped/timed-out call cannot detach + /// it; on the managed timeout — or if this future is dropped + /// mid-poll — the handle is re-parked into the orphan list. + pub async fn quiesce(&self, key: K) -> WorkerStatus { + // Snapshot the drain hook + budget + generation, and bail early if + // nothing is registered for this key. The generation is the anchor + // for the supersede guard below. + // + // The inhabited check is raw (`cancel.is_some() || handle.is_some()`) + // rather than `slot_alive()`: a finished-but-unreaped handle must + // still be classified into its terminal status here, but + // `slot_alive()` treats `handle.is_finished()` as "not alive" and + // would short-circuit to `NotRunning` — incorrectly dropping the + // result on the floor. + let (drain, budget, my_gen) = { + let slots = self.lock_slots(); + match slots.get(&key) { + Some(s) if s.cancel.is_some() || s.handle.is_some() => { + (s.drain.clone(), s.join_budget, s.generation) + } + _ => return WorkerStatus::NotRunning, + } + }; + + // R2: gate-before-cancel — drain hook fully awaited before the + // cancel signal fires. Timed for observability; no hard timeout + // here (the caller bounds teardown). + if let Some(drain) = drain { + let drain_started = Instant::now(); + drain().await; + let drain_elapsed = drain_started.elapsed(); + if drain_elapsed >= DRAIN_HOOK_WARN_THRESHOLD { + tracing::warn!( + ?key, + elapsed_ms = drain_elapsed.as_millis() as u64, + threshold_ms = DRAIN_HOOK_WARN_THRESHOLD.as_millis() as u64, + "registry drain hook took longer than the warn threshold; \ + a slow drain hook delays the per-worker join budget" + ); + } else { + tracing::debug!( + ?key, + elapsed_ms = drain_elapsed.as_millis() as u64, + "registry drain hook completed", + ); + } + } + + // Signal-only cancel — but only if this is still the generation we + // snapshotted. A concurrent restart (which can proceed the instant + // we take `cancel` below) bumps the generation; taking the new + // token here would silently un-track the fresh worker. + if let Some(slot) = self.lock_slots().get_mut(&key) { + if slot.generation == my_gen { + if let Some(token) = slot.cancel.take() { + token.cancel(); + } + } + } + + // Poll-join within budget. The re-park guard moves the slot's + // still-live handle into orphans if this future is dropped before + // the loop finishes — the handle is never owned by this frame. Both + // the guard and the loop are generation-scoped, so a concurrent + // same-key restart's live handle is never parked or classified by + // the quiesce that cancelled the *prior* generation. + let _repark = Repark { + reg: self, + key, + my_gen, + }; + let deadline = Instant::now() + budget; + loop { + enum Step { + Classify(WorkerHandle), + Park(WorkerHandle), + NotRunning, + Superseded, + Wait, + } + let step = { + let mut slots = self.lock_slots(); + match slots.get_mut(&key) { + None => Step::NotRunning, + // A restart replaced the generation we were draining: + // the handle now in the slot belongs to a newer, live + // worker the restart owns. Leave it untouched. + Some(slot) if slot.generation != my_gen => Step::Superseded, + Some(slot) => match slot.handle.take_if(|h| h.is_finished()) { + Some(h) => Step::Classify(h), + None if slot.handle.is_none() => Step::NotRunning, + None if Instant::now() >= deadline => { + Step::Park(slot.handle.take().expect("handle present")) + } + None => Step::Wait, + }, + } + }; + match step { + Step::Classify(h) => return h.classify(), + Step::Park(h) => { + self.lock_orphans().push((key, h)); + return WorkerStatus::Timeout; + } + Step::NotRunning | Step::Superseded => return WorkerStatus::NotRunning, + Step::Wait => tokio::time::sleep(Duration::from_millis(5)).await, + } + } + } + + /// Is any registered worker **or** parked orphan still alive across + /// the whole registry? + pub fn any_alive(&self) -> bool { + { + let slots = self.lock_slots(); + for slot in slots.values() { + if slot_alive(slot) { + return true; + } + } + } + self.lock_orphans().iter().any(|(_, h)| !h.is_finished()) + } + + /// Is the worker for `key` — its live slot **or** any orphan parked + /// under that key — still alive? A store-wiping path scoped to one + /// worker must gate on this (rather than the registry-wide + /// [`any_alive`](Self::any_alive)) so an unrelated worker that is + /// legitimately running does not block the wipe. + pub fn any_alive_for(&self, key: K) -> bool { + if let Some(slot) = self.lock_slots().get(&key) { + if slot_alive(slot) { + return true; + } + } + self.lock_orphans() + .iter() + .any(|(k, h)| *k == key && !h.is_finished()) + } + + /// Reap parked orphans with a short grace; survivors are re-parked and + /// reported as [`WorkerStatus::Detached`] (idempotent retry). + pub async fn reap_orphans(&self, grace: Duration) -> WorkerStatus { + self.reap_orphans_impl(grace).await.0 + } + + /// Weight-ordered teardown: ascending tier by tier, each worker's + /// (drain-hook -> cancel -> join) run concurrently within a tier; + /// orphan reap runs last. **Requires a multi-thread runtime.** + /// + /// Latches the registry closed first (under the slot lock, before the + /// tier snapshot), so any `start_thread`/`start_task` racing teardown is + /// either already in the snapshot or refused outright — shutdown is a + /// one-way door and never leaves a worker un-joined. Idempotent. + /// + /// # Panics + /// + /// Panics if called outside a multi-thread Tokio runtime: an OS-thread + /// worker drives its loop via `Handle::block_on` and needs the shared + /// timer/IO driver, so a `current_thread` runtime would deadlock the + /// join. + pub async fn shutdown(&self) -> ShutdownReport { + // TODO(rs-dapi-adoption): see `start_task` — this assert is the late + // panic point for a task-only consumer on a current_thread runtime. + Self::assert_multi_thread("shutdown"); + + // Snapshot keys grouped by weight. A `BTreeMap` iterates tiers in + // ascending weight order, giving the lower-first drain. Latch the + // registry closed under the same lock and before the snapshot so a + // racing start is serialized: it either landed before this lock (and + // is in the snapshot) or sees `closing` and bails. + let tiers: BTreeMap> = { + let slots = self.lock_slots(); + self.closing.store(true, Ordering::Release); + let mut tiers: BTreeMap> = BTreeMap::new(); + for (key, slot) in slots.iter() { + tiers.entry(slot.weight).or_default().push(*key); + } + tiers + }; + + let mut per_worker = BTreeMap::new(); + for (_weight, keys) in tiers { + // Drain every worker in this tier concurrently: each + // quiesce() drives its own drain-hook -> cancel -> join, and + // `join_all` polls them on one task so their drain hooks + // interleave (equal-weight concurrency). + let drained = keys + .into_iter() + .map(|key| async move { (key, self.quiesce(key).await) }); + for (key, status) in futures::future::join_all(drained).await { + per_worker.insert(key, status); + } + } + + // Account for parked orphans last. The terminal status is folded + // into the report so a panicked/errored reaped orphan that finished + // within the grace (`detached == 0`) still flips `all_clean()`. + let (orphan_status, detached) = self.reap_orphans_impl(self.reap_backstop).await; + ShutdownReport { + per_worker, + detached, + orphan_status, + } + } + + // ----------------------------------------------------------------- + // Internal helpers + // ----------------------------------------------------------------- + + fn lock_slots(&self) -> std::sync::MutexGuard<'_, BTreeMap> { + self.slots.lock().unwrap_or_else(|e| e.into_inner()) + } + + fn lock_orphans(&self) -> std::sync::MutexGuard<'_, Vec<(K, WorkerHandle)>> { + self.orphans.lock().unwrap_or_else(|e| e.into_inner()) + } + + fn lock_clearing(&self) -> std::sync::MutexGuard<'_, BTreeMap> { + self.clearing.lock().unwrap_or_else(|e| e.into_inner()) + } + + fn assert_multi_thread(ctx: &str) { + assert!( + matches!( + tokio::runtime::Handle::current().runtime_flavor(), + RuntimeFlavor::MultiThread + ), + "ThreadRegistry::{ctx}() requires a multi-thread Tokio runtime: an \ + OS-thread worker drives its loop via Handle::block_on and needs the \ + runtime's timer/IO driver, but a current_thread runtime can only \ + drive one block_on at a time" + ); + } + + /// Gen-gated exit epilogue, run on the worker after its body returns + /// (or unwinds): clear this slot's running flag only if a newer start + /// has not since installed a replacement. + fn run_epilogue(&self, key: K, my_gen: u64) { + if let Some(slot) = self.lock_slots().get_mut(&key) { + if slot.generation == my_gen { + slot.cancel = None; + } + } + } + + /// Spawn the named OS worker thread, surfacing a spawn failure as + /// `io::Result` instead of panicking so the caller can roll back. The + /// `#[cfg(test)]` seam forces a synthetic failure to exercise that path. + fn spawn_os_thread(&self, key: K, closure: C) -> std::io::Result> + where + C: FnOnce() + Send + 'static, + { + #[cfg(test)] + if self.force_spawn_failure.load(Ordering::Acquire) { + return Err(std::io::Error::other("forced spawn failure (test seam)")); + } + std::thread::Builder::new() + .name(format!("tr-worker-{key:?}")) + .spawn(closure) + } + + /// Park a restarted key's prior handle into orphans. **Must be called + /// while the slot lock is held** — the resulting `slots`->`orphans` + /// nesting is the only such nesting in this module and is deadlock-free + /// (no path ever acquires `slots` while holding `orphans`, so there is no + /// cycle). Parking the prior here, rather than after the slot lock is + /// released, is what lets `shutdown`'s under-lock tier snapshot never + /// miss it: the take-prior and the park-prior are then atomic from + /// `shutdown`'s view. A finished task is dropped (detaching a finished + /// task is a no-op); a live task and any OS thread are parked. Returns + /// the parked OS thread's id so [`reap_parked_prior`](Self::reap_parked_prior) + /// can find and bounded-join it; tasks (reaped asynchronously) return + /// `None`. + fn park_prior_locked( + &self, + key: K, + prior: Option, + ) -> Option { + match prior { + Some(WorkerHandle::OsThread(h)) => { + let tid = h.thread().id(); + self.lock_orphans().push((key, WorkerHandle::OsThread(h))); + Some(tid) + } + Some(task) => { + if !task.is_finished() { + self.lock_orphans().push((key, task)); + } + None + } + None => None, + } + } + + /// Bounded reap of an OS-thread prior that [`park_prior_locked`](Self::park_prior_locked) + /// parked under `key` at restart. Must be called with no registry lock + /// held (it spins synchronously). The instant the parked thread finishes + /// it is removed from orphans and joined — the join itself stays OUT of + /// any lock (only the bookkeeping is taken under the orphans lock). A + /// genuine wedge past the reap backstop is left parked, so teardown can + /// still account for it. No-op when no OS thread was parked (`None`), or + /// when the orphan was already taken by a concurrent reaper / `shutdown` + /// (which then owns the join). + fn reap_parked_prior(&self, key: K, prior_tid: Option) { + let Some(tid) = prior_tid else { + return; + }; + let deadline = Instant::now() + self.reap_backstop; + loop { + // Bookkeeping under the orphans lock only: locate our parked + // prior by thread id and, once it has finished, take it out to + // join after the lock is released. Never hold the lock across the + // join. + let taken = { + let mut orphans = self.lock_orphans(); + let pos = orphans.iter().position(|(k, h)| { + *k == key && matches!(h, WorkerHandle::OsThread(t) if t.thread().id() == tid) + }); + match pos { + // Already taken by a concurrent reaper / shutdown: it owns + // the join now. + None => return, + Some(i) if orphans[i].1.is_finished() => Some(orphans.remove(i).1), + Some(_) if Instant::now() >= deadline => { + tracing::warn!( + ?key, + backstop = ?self.reap_backstop, + "prior worker thread did not finish within the reap \ + backstop after cancellation; leaving it parked as an \ + orphan for teardown to join rather than detaching it" + ); + return; + } + Some(_) => None, + } + }; + if let Some(WorkerHandle::OsThread(h)) = taken { + let _ = h.join(); + return; + } + std::thread::sleep(Duration::from_millis(5)); + } + } + + /// Drain the orphan list, polling until `grace`. Returns the terminal + /// status and the number of survivors re-parked for an idempotent + /// retry. + async fn reap_orphans_impl(&self, grace: Duration) -> (WorkerStatus, usize) { + let mut pending: Vec<(K, WorkerHandle)> = { + let mut guard = self.lock_orphans(); + std::mem::take(&mut *guard) + }; + if pending.is_empty() { + return (WorkerStatus::Ok, 0); + } + + let deadline = Instant::now() + grace; + // Keep the first non-clean terminal status; a live survivor still + // takes precedence at the deadline. + let mut non_clean: Option = None; + loop { + let mut still_live = Vec::with_capacity(pending.len()); + for (key, handle) in pending.drain(..) { + if handle.is_finished() { + let status = handle.classify(); + if !status.is_clean() { + non_clean.get_or_insert(status); + } + } else { + still_live.push((key, handle)); + } + } + pending = still_live; + + if pending.is_empty() { + return (non_clean.unwrap_or(WorkerStatus::Ok), 0); + } + if Instant::now() >= deadline { + let survivors = pending.len(); + self.lock_orphans().extend(pending); + return (WorkerStatus::Detached, survivors); + } + tokio::time::sleep(Duration::from_millis(5)).await; + } + } + + /// Test-only seam: park a raw thread handle as an orphan under `key`. + /// Used by cross-crate regression tests (e.g. the wallet's F2 gate) + /// that must inject a wedged prior-generation thread without driving + /// the full restart-reap path. Feature-gated behind `test-util` so it + /// never ships in a production build of a downstream consumer. + #[cfg(any(test, feature = "test-util"))] + #[doc(hidden)] + pub fn park_orphan_for_test(&self, key: K, handle: std::thread::JoinHandle<()>) { + self.lock_orphans() + .push((key, WorkerHandle::OsThread(handle))); + } +} + +/// `true` if a slot is running or holds an unfinished handle. +fn slot_alive(slot: &SlotState) -> bool { + slot.cancel.is_some() || slot.handle.as_ref().is_some_and(|h| !h.is_finished()) +} + +/// Re-park guard for [`ThreadRegistry::quiesce`]. If the poll-join future +/// is dropped before it finishes (e.g. an outer timeout fires), this moves +/// the slot's still-live handle into the orphan list instead of letting it +/// be dropped-and-detached. On normal completion the handle has already +/// been taken from the slot, so this is a no-op. +/// +/// Generation-scoped: it only re-parks the handle if the slot still holds +/// the generation `quiesce` was draining. A concurrent same-key restart +/// bumps the generation and installs its own live handle; this guard leaves +/// that fresh handle alone. +struct Repark<'a, K: RegistryKey> { + reg: &'a ThreadRegistry, + key: K, + my_gen: u64, +} + +impl Drop for Repark<'_, K> { + fn drop(&mut self) { + // Take the handle under the slot lock, release it, then push to + // orphans. This path holds only one lock at a time; the single + // sanctioned nesting in the module is `slots`->`orphans` in + // `park_prior_locked`, and nothing ever takes `slots` while holding + // `orphans`, so the ordering stays acyclic. Skip if a restart + // superseded our generation (the handle is the new worker's, not + // ours). + let handle = self + .reg + .lock_slots() + .get_mut(&self.key) + .filter(|slot| slot.generation == self.my_gen) + .and_then(|slot| slot.handle.take()); + if let Some(handle) = handle { + self.reg.lock_orphans().push((self.key, handle)); + } + } +} + +/// Worker-side exit guard. Runs the generation-gated [`run_epilogue`] +/// from its `Drop`, so a worker whose `body` returns normally **or** +/// unwinds on panic still clears its running flag — `is_running()` then +/// reflects reality and `start()` can relaunch a crashed loop. +/// +/// Panic-strategy caveat (same as `AtomicFlagGuard`): the clear-on-panic +/// half relies on `Drop` running while the stack unwinds, so it holds under +/// `panic = "unwind"`. Under `panic = "abort"` a worker panic aborts the +/// process and there is no "after" to gate. When the binary is built with +/// `panic = "abort"`, [`ThreadRegistry::with_reap_backstop`] emits a +/// one-shot `tracing::warn!` so operators can audit the risk. +struct EpilogueGuard { + reg: Arc>, + key: K, + my_gen: u64, +} + +impl Drop for EpilogueGuard { + fn drop(&mut self) { + self.reg.run_epilogue(self.key, self.my_gen); + } +} + +/// RAII guard returned by [`ThreadRegistry::hold_clearing`]. While at +/// least one guard for a key is alive, the registry refuses any +/// `start_thread`/`start_task` for that key. Drop decrements the +/// per-key holder count and removes the entry only when the count +/// reaches zero — so nested or concurrent holders for the same key +/// compose, and the latch stays raised until the LAST guard drops. +/// Drop runs on every exit path, including panic unwinding, so a +/// caller cannot leak the latch by forgetting to call an end function. +pub struct ClearingGuard { + reg: Arc>, + key: K, +} + +impl Drop for ClearingGuard { + fn drop(&mut self) { + let mut clearing = self.reg.lock_clearing(); + match clearing.get(&self.key) { + // Decrement; remove the entry when the last holder drops. + Some(n) => match NonZeroUsize::new(n.get() - 1) { + Some(remaining) => { + clearing.insert(self.key, remaining); + } + None => { + clearing.remove(&self.key); + } + }, + // Defensive: a balanced hold/drop should always find the + // entry. A missing entry means someone removed it out of + // band — refuse to underflow. + None => { + debug_assert!( + false, + "ClearingGuard::drop saw no entry for its key — \ + someone removed the latch out of band" + ); + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::panic::{catch_unwind, AssertUnwindSafe}; + use std::sync::atomic::{AtomicBool, Ordering}; + use std::sync::mpsc; + use tokio::runtime::{Builder, Handle}; + use tokio::sync::Barrier; + + type Reg = Arc>; + + /// Start an OS-thread worker that exits cleanly when cancelled. The + /// runtime handle is captured from the caller's context (the worker + /// thread is not itself a tokio worker, so it can't fetch its own). + fn start_clean(reg: &Reg, key: &'static str, cfg: WorkerConfig) { + let handle = Handle::current(); + reg.start_thread(key, cfg, move |cancel| { + handle.block_on(async move { cancel.cancelled().await }); + }); + } + + /// Body for a worker wedged in a non-yielding section: blocks on a + /// channel and ignores its cancellation token (stands in for a thread + /// stuck in a `Drop` that never observes cancel). + fn wedged_body(rx: mpsc::Receiver<()>) -> impl FnOnce(CancellationToken) + Send + 'static { + move |_cancel| { + let _ = rx.recv(); + } + } + + fn orphan_len(reg: &Reg) -> usize { + reg.lock_orphans().len() + } + + // ----- Group 1: F1 regression ------------------------------------- + + /// TC-001 — a `quiesce` whose outer future is dropped (a tiny enclosing + /// timeout) must re-park the live handle, never drop-and-detach it. The + /// slot is cleared (`is_running == false`) but the handle lives in + /// orphans and `any_alive()` stays true. + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn tc001_quiesce_drop_reparks_handle_not_detach() { + let reg = ThreadRegistry::<&str>::new(); + let (release_tx, release_rx) = mpsc::channel::<()>(); + reg.start_thread("alpha", WorkerConfig::default(), wedged_body(release_rx)); + assert!(reg.is_running("alpha")); + + // The wedged worker never observes cancel, so the internal 30s + // budget can't fire here; the tiny outer timeout drops the quiesce + // future mid-poll. A naive by-value-into-future impl would detach + // the handle (orphans empty, any_alive false); the slot-owned + // handle is re-parked instead. + let result = tokio::time::timeout(Duration::from_millis(100), reg.quiesce("alpha")).await; + assert!( + result.is_err(), + "outer timeout must fire on the wedged worker" + ); + + assert!(reg.any_alive(), "re-parked handle keeps any_alive true"); + assert!(!reg.is_running("alpha"), "slot cleared (cancel taken)"); + assert_eq!(orphan_len(®), 1, "handle was re-parked, not detached"); + assert!(!WorkerStatus::Timeout.is_clean()); + + // Release + reap: the orphan joins cleanly and liveness clears. + release_tx.send(()).unwrap(); + assert_eq!( + reg.reap_orphans(Duration::from_secs(2)).await, + WorkerStatus::Ok + ); + assert!(!reg.any_alive()); + } + + /// TC-001b — internal-budget variant: a wedged worker with a tiny + /// `join_budget` makes `quiesce` itself time out, re-park, and return + /// `Timeout` (no outer drop involved). + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn tc001b_quiesce_internal_budget_timeout_reparks() { + let reg = ThreadRegistry::<&str>::new(); + let (release_tx, release_rx) = mpsc::channel::<()>(); + let cfg = WorkerConfig { + join_budget: Duration::from_millis(50), + ..WorkerConfig::default() + }; + reg.start_thread("alpha", cfg, wedged_body(release_rx)); + + let status = reg.quiesce("alpha").await; + assert_eq!(status, WorkerStatus::Timeout); + assert_eq!(orphan_len(®), 1); + assert!(reg.any_alive()); + assert!(!reg.is_running("alpha")); + + release_tx.send(()).unwrap(); + assert_eq!( + reg.reap_orphans(Duration::from_secs(2)).await, + WorkerStatus::Ok + ); + assert!(!reg.any_alive()); + } + + /// GAP-006 — the F1 scenario via the `shutdown()` path: a wedged worker + /// with a tiny budget surfaces as `Timeout` in the report, its handle + /// is re-parked (`detached == 1`, `any_alive`), and the result is + /// non-clean — never a clean detach. + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn gap006_shutdown_path_reparks_wedged_worker() { + let reg = ThreadRegistry::<&str>::new(); + let (release_tx, release_rx) = mpsc::channel::<()>(); + let cfg = WorkerConfig { + join_budget: Duration::from_millis(50), + ..WorkerConfig::default() + }; + reg.start_thread("alpha", cfg, wedged_body(release_rx)); + + let report = tokio::time::timeout(Duration::from_secs(10), reg.shutdown()) + .await + .expect("shutdown must complete within bound"); + assert_eq!(report.per_worker.get("alpha"), Some(&WorkerStatus::Timeout)); + assert_eq!(report.detached, 1, "wedged handle re-parked, survived reap"); + assert!(!report.all_clean()); + assert!(reg.any_alive()); + + // Cleanup. + release_tx.send(()).unwrap(); + let _ = reg.reap_orphans(Duration::from_secs(5)).await; + assert!(!reg.any_alive()); + } + + // ----- Group 3: registry unit suite ------------------------------- + + /// TC-003 — a slow prior-generation thread's epilogue must NOT clear a + /// newer generation's token. Restarting reaps the prior generation + /// fully (its epilogue runs); the new generation stays tracked. + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn tc003_generation_match_epilogue_preserves_new_token() { + let reg = ThreadRegistry::<&str>::new(); + start_clean(®, "beta", WorkerConfig::default()); // gen 1 + assert!(reg.is_running("beta")); + + // Cancel gen 1, then restart. start_thread's reap joins gen 1 + // (running its gen-gated epilogue) before returning, so this is + // deterministic: if the epilogue ignored generation it would have + // cleared gen 2's token during that join. + reg.cancel("beta"); + start_clean(®, "beta", WorkerConfig::default()); // gen 2 + + assert!( + reg.is_running("beta"), + "gen-2 token must survive gen-1's epilogue" + ); + assert_eq!(reg.quiesce("beta").await, WorkerStatus::Ok); + } + + /// TC-004 — a naturally-finished prior thread is joined cleanly on + /// restart, with no parking. + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn tc004_restart_reaps_finished_prior_without_parking() { + let reg = ThreadRegistry::<&str>::new(); + start_clean(®, "gamma", WorkerConfig::default()); + // Cancel so the prior exits, then restart: the reap must join it, + // not park it. + reg.cancel("gamma"); + start_clean(®, "gamma", WorkerConfig::default()); + assert_eq!(orphan_len(®), 0, "finished prior was joined, not parked"); + assert!(reg.is_running("gamma")); + assert_eq!(reg.quiesce("gamma").await, WorkerStatus::Ok); + } + + /// TC-005 — a prior thread wedged past the reap backstop is parked in + /// orphans (not dropped), then drained after release. + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn tc005_restart_parks_wedged_prior() { + let reg = ThreadRegistry::with_reap_backstop(Duration::from_millis(100)); + let (release_tx, release_rx) = mpsc::channel::<()>(); + + // gen 1: wedged (ignores cancel). + reg.start_thread("delta", WorkerConfig::default(), wedged_body(release_rx)); + reg.cancel("delta"); + + // gen 2: clean. The restart reaps gen 1 — wedged past the 100ms + // backstop, so it is parked. Run off the runtime workers since the + // reap spins synchronously. + let reg_for_start = Arc::clone(®); + let parent = Handle::current(); + tokio::task::spawn_blocking(move || { + let handle = parent.clone(); + reg_for_start.start_thread("delta", WorkerConfig::default(), move |cancel| { + handle.block_on(async move { cancel.cancelled().await }); + }); + }) + .await + .unwrap(); + + assert_eq!(orphan_len(®), 1, "wedged prior parked, not dropped"); + assert!(reg.any_alive()); + assert!(reg.is_running("delta"), "gen-2 loop started"); + + // Release the wedged prior; reap drains it. + release_tx.send(()).unwrap(); + assert_eq!( + reg.reap_orphans(Duration::from_secs(2)).await, + WorkerStatus::Ok + ); + assert_eq!(orphan_len(®), 0); + + // Cleanup gen 2. + assert_eq!(reg.quiesce("delta").await, WorkerStatus::Ok); + } + + /// TC-006 — orphan drain: a survivor at the grace deadline is reported + /// `Detached` and re-parked; once released it reaps `Ok`. + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn tc006_orphan_drain_detached_then_ok() { + let reg = ThreadRegistry::<&str>::new(); + let (release_tx, release_rx) = mpsc::channel::<()>(); + let wedged = std::thread::spawn(move || { + let _ = release_rx.recv(); + }); + reg.park_orphan_for_test("orphan", wedged); + + assert_eq!( + reg.reap_orphans(Duration::from_millis(50)).await, + WorkerStatus::Detached + ); + assert_eq!(orphan_len(®), 1, "survivor re-parked for retry"); + assert!(reg.any_alive()); + + release_tx.send(()).unwrap(); + assert_eq!( + reg.reap_orphans(Duration::from_secs(2)).await, + WorkerStatus::Ok + ); + assert_eq!(orphan_len(®), 0); + assert!(!reg.any_alive()); + } + + /// A reaped orphan whose body PANICKED (finishes within the reap grace, + /// so `detached == 0`) must surface in `ShutdownReport::orphan_status` + /// and flip `all_clean()`. + /// + /// Non-vacuous: `orphan_status` is the only place a panicked-but-reaped + /// orphan is observable — without it, an empty/clean `per_worker` plus + /// `detached == 0` would let `all_clean()` return true and swallow the + /// panic. + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn shutdown_report_surfaces_panicked_reaped_orphan() { + let reg = ThreadRegistry::<&str>::new(); + // Park a panicking thread directly as an orphan via the test seam. + // The body panics immediately, so the thread is finished by the + // time `shutdown()` runs the reap and classifies as `Panicked`. + let panicker = std::thread::spawn(|| { + panic!("deliberate orphan-body panic"); + }); + reg.park_orphan_for_test("k", panicker); + + let report = reg.shutdown().await; + + assert_eq!( + report.detached, 0, + "panicked orphan finished within the reap grace" + ); + assert!( + matches!(report.orphan_status, WorkerStatus::Panicked(_)), + "reaped orphan's panic must surface in orphan_status, got {:?}", + report.orphan_status + ); + assert!( + !report.all_clean(), + "all_clean() must reflect the panicked reaped orphan, not pass it" + ); + assert!(!reg.any_alive()); + } + + /// Complement: a clean reaped orphan (`Ok`) leaves `all_clean()` true, + /// so the orphan-status fold doesn't over-trigger on the common case of + /// orphans that drained cleanly within the grace. + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn shutdown_report_clean_reaped_orphan_is_clean() { + let reg = ThreadRegistry::<&str>::new(); + let clean = std::thread::spawn(|| { /* exits cleanly */ }); + reg.park_orphan_for_test("k", clean); + + let report = reg.shutdown().await; + assert_eq!(report.detached, 0); + assert_eq!(report.orphan_status, WorkerStatus::Ok); + assert!(report.all_clean()); + } + + /// TC-007 — weight-ordered shutdown drains a lower tier before a higher + /// one. + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn tc007_weight_ordered_shutdown_drains_low_first() { + let reg = ThreadRegistry::<&str>::new(); + let log = Arc::new(Mutex::new(Vec::<&'static str>::new())); + + let mk_hook = |tag: &'static str, log: Arc>>| -> DrainHook { + Arc::new(move || { + let log = Arc::clone(&log); + Box::pin(async move { + log.lock().unwrap().push(tag); + }) + }) + }; + + start_clean( + ®, + "w0", + WorkerConfig { + weight: ShutdownWeight(0), + drain: Some(mk_hook("w0", Arc::clone(&log))), + ..WorkerConfig::default() + }, + ); + start_clean( + ®, + "w5", + WorkerConfig { + weight: ShutdownWeight(5), + drain: Some(mk_hook("w5", Arc::clone(&log))), + ..WorkerConfig::default() + }, + ); + start_clean( + ®, + "w10", + WorkerConfig { + weight: ShutdownWeight(10), + drain: Some(mk_hook("w10", Arc::clone(&log))), + ..WorkerConfig::default() + }, + ); + + let report = reg.shutdown().await; + assert!(report.all_clean()); + + let log = log.lock().unwrap(); + let pos = |tag| log.iter().position(|t| *t == tag).unwrap(); + assert!(pos("w0") < pos("w5")); + assert!(pos("w5") < pos("w10")); + } + + /// TC-008 — equal-weight workers drain concurrently. A shared + /// `Barrier(2)` in both drain hooks would deadlock under sequential + /// draining (caught by the enclosing timeout); the event log proves + /// both arrived before either passed. + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn tc008_equal_weight_drains_concurrently() { + let reg = ThreadRegistry::<&str>::new(); + let log = Arc::new(Mutex::new(Vec::<&'static str>::new())); + let barrier = Arc::new(Barrier::new(2)); + + let mk_hook = |arrived: &'static str, + passed: &'static str, + log: Arc>>, + barrier: Arc| + -> DrainHook { + Arc::new(move || { + let log = Arc::clone(&log); + let barrier = Arc::clone(&barrier); + Box::pin(async move { + log.lock().unwrap().push(arrived); + barrier.wait().await; + log.lock().unwrap().push(passed); + }) + }) + }; + + start_clean( + ®, + "a", + WorkerConfig { + weight: ShutdownWeight(0), + drain: Some(mk_hook( + "a_arrived", + "a_passed", + Arc::clone(&log), + Arc::clone(&barrier), + )), + ..WorkerConfig::default() + }, + ); + start_clean( + ®, + "b", + WorkerConfig { + weight: ShutdownWeight(0), + drain: Some(mk_hook( + "b_arrived", + "b_passed", + Arc::clone(&log), + Arc::clone(&barrier), + )), + ..WorkerConfig::default() + }, + ); + + let report = tokio::time::timeout(Duration::from_secs(5), reg.shutdown()) + .await + .expect("equal-weight drain must not deadlock (proves concurrency)"); + assert!(report.all_clean()); + + let log = log.lock().unwrap(); + let pos = |tag| log.iter().position(|t| *t == tag).unwrap(); + let last_arrived = pos("a_arrived").max(pos("b_arrived")); + let first_passed = pos("a_passed").min(pos("b_passed")); + assert!( + last_arrived < first_passed, + "both hooks must reach the barrier before either passes: {log:?}" + ); + } + + /// TC-009 — `any_alive()` accounts for both live slots and orphans. + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn tc009_any_alive_spans_slots_and_orphans() { + let reg = ThreadRegistry::<&str>::new(); + start_clean(®, "alpha", WorkerConfig::default()); + assert!(reg.any_alive()); + + let (release_tx, release_rx) = mpsc::channel::<()>(); + let wedged = std::thread::spawn(move || { + let _ = release_rx.recv(); + }); + reg.park_orphan_for_test("orphan", wedged); + assert!(reg.any_alive()); + + assert_eq!(reg.quiesce("alpha").await, WorkerStatus::Ok); + assert!( + reg.any_alive(), + "orphan still contributes after slot drains" + ); + assert!(!reg.is_running("alpha")); + + release_tx.send(()).unwrap(); + let _ = reg.reap_orphans(Duration::from_secs(2)).await; + assert!(!reg.any_alive()); + } + + /// `any_alive_for(key)` is scoped: an orphan parked under one key does + /// not make a different key look alive (the F2 gate must not be + /// blocked by unrelated workers). + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn any_alive_for_is_key_scoped() { + let reg = ThreadRegistry::<&str>::new(); + let (release_tx, release_rx) = mpsc::channel::<()>(); + let wedged = std::thread::spawn(move || { + let _ = release_rx.recv(); + }); + reg.park_orphan_for_test("shielded", wedged); + + // A live, unrelated worker. + start_clean(®, "identity", WorkerConfig::default()); + + assert!(reg.any_alive(), "registry-wide liveness sees both"); + assert!(reg.any_alive_for("shielded"), "shielded orphan is alive"); + assert!( + !reg.any_alive_for("address"), + "an unrelated key with no slot/orphan is not alive" + ); + + // The running 'identity' worker must not make 'shielded' look alive + // beyond its own orphan, and vice versa. + assert!(reg.any_alive_for("identity"), "running identity is alive"); + + release_tx.send(()).unwrap(); + let _ = reg.reap_orphans(Duration::from_secs(2)).await; + assert!( + !reg.any_alive_for("shielded"), + "shielded clear once its orphan is reaped" + ); + assert_eq!(reg.quiesce("identity").await, WorkerStatus::Ok); + } + + /// TC-010 — `shutdown()` panics with a documented message on a + /// current-thread runtime (R4, variant B). + #[test] + fn tc010_shutdown_asserts_multi_thread_runtime() { + let rt = Builder::new_current_thread().enable_all().build().unwrap(); + let reg = ThreadRegistry::<&str>::new(); + let result = catch_unwind(AssertUnwindSafe(|| { + // We're proving `shutdown()` panics — its return value is + // moot here, but `#[must_use]` requires an explicit drop. + let _ = rt.block_on(async { reg.shutdown().await }); + })); + let payload = result.expect_err("shutdown must panic on current_thread"); + let msg = payload + .downcast_ref::() + .map(String::as_str) + .or_else(|| payload.downcast_ref::<&str>().copied()) + .unwrap_or(""); + assert!( + msg.contains("multi-thread"), + "panic must name the runtime constraint, got: {msg}" + ); + } + + // ----- Group 4: DrainHook ordering -------------------------------- + + /// TC-011 — the drain hook is fully awaited before the cancel signal is + /// observed by the worker. + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn tc011_drain_hook_completes_before_cancel() { + let reg = ThreadRegistry::<&str>::new(); + let log = Arc::new(Mutex::new(Vec::<&'static str>::new())); + + let log_hook = Arc::clone(&log); + let drain: DrainHook = Arc::new(move || { + let log = Arc::clone(&log_hook); + Box::pin(async move { + log.lock().unwrap().push("drain_hook_start"); + tokio::time::sleep(Duration::from_millis(10)).await; + log.lock().unwrap().push("drain_hook_complete"); + }) + }); + + let log_worker = Arc::clone(&log); + let handle = Handle::current(); + reg.start_thread( + "epsilon", + WorkerConfig { + drain: Some(drain), + ..WorkerConfig::default() + }, + move |cancel| { + handle.block_on(async move { + cancel.cancelled().await; + log_worker.lock().unwrap().push("cancel_observed"); + }); + }, + ); + + assert_eq!(reg.quiesce("epsilon").await, WorkerStatus::Ok); + assert!(!reg.is_running("epsilon")); + + let log = log.lock().unwrap(); + let pos = |tag| log.iter().position(|t| *t == tag).unwrap(); + assert!(pos("drain_hook_start") < pos("drain_hook_complete")); + assert!(pos("drain_hook_complete") < pos("cancel_observed")); + } + + /// TC-012 — a `quiesce` blocks in the drain hook until an `is_syncing` + /// barrier the hook polls falls, and only then cancels + joins. + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn tc012_drain_hook_observes_barrier_before_join() { + let reg = ThreadRegistry::<&str>::new(); + let is_syncing = Arc::new(AtomicBool::new(true)); + + let gate = Arc::clone(&is_syncing); + let drain: DrainHook = Arc::new(move || { + let gate = Arc::clone(&gate); + Box::pin(async move { + while gate.load(Ordering::Acquire) { + tokio::time::sleep(Duration::from_millis(5)).await; + } + }) + }); + start_clean( + ®, + "zeta", + WorkerConfig { + drain: Some(drain), + ..WorkerConfig::default() + }, + ); + + let quiesce_completed = Arc::new(AtomicBool::new(false)); + let reg_q = Arc::clone(®); + let done = Arc::clone(&quiesce_completed); + let quiesce_task = tokio::spawn(async move { + let status = reg_q.quiesce("zeta").await; + done.store(true, Ordering::Release); + status + }); + + // While the barrier is held, quiesce must stay pending. + tokio::time::sleep(Duration::from_millis(50)).await; + assert!( + !quiesce_completed.load(Ordering::Acquire), + "quiesce must block while is_syncing is held" + ); + + // Release the barrier; quiesce drains, cancels, joins. + is_syncing.store(false, Ordering::Release); + let status = tokio::time::timeout(Duration::from_secs(2), quiesce_task) + .await + .expect("quiesce must complete once the barrier falls") + .unwrap(); + assert_eq!(status, WorkerStatus::Ok); + assert!(quiesce_completed.load(Ordering::Acquire)); + } + + // ----- Group 5: status classification ----------------------------- + + /// TC-013 — only the `Task` kind can classify as `Stopped` (from a + /// runtime-level cancel/abort JoinError); a cooperatively token- + /// cancelled task exits normally as `Ok`. Verifies the kind-dispatch + /// at the classification boundary. + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn tc013_task_kind_classifies_stopped_and_ok() { + // Stopped: an aborted task yields a cancelled JoinError. + let aborted = tokio::spawn(std::future::pending::<()>()); + aborted.abort(); + while !aborted.is_finished() { + tokio::time::sleep(Duration::from_millis(1)).await; + } + let status = WorkerHandle::Task(aborted).classify(); + assert!(matches!(status, WorkerStatus::Stopped(_)), "got {status:?}"); + assert!(!status.is_clean()); + + // Ok: a cooperatively token-cancelled task returns normally. + let reg = ThreadRegistry::<&str>::new(); + reg.start_task("task_a", WorkerConfig::default(), |cancel| async move { + cancel.cancelled().await; + }); + assert_eq!(reg.quiesce("task_a").await, WorkerStatus::Ok); + assert!(!reg.is_running("task_a")); + } + + /// TC-014 — an `OsThread` worker yields `Ok` (clean) or `Panicked` + /// (`&str` and `String` payloads), never `Stopped`. + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn tc014_os_thread_ok_and_panicked_never_stopped() { + let reg = ThreadRegistry::<&str>::new(); + start_clean(®, "os_clean", WorkerConfig::default()); + let ok = reg.quiesce("os_clean").await; + assert_eq!(ok, WorkerStatus::Ok); + assert!(ok.is_clean()); + + // &str panic payload. + reg.start_thread("os_panic_str", WorkerConfig::default(), |_cancel| { + panic!("deliberate test panic"); + }); + match reg.quiesce("os_panic_str").await { + WorkerStatus::Panicked(msg) => assert!(msg.contains("deliberate test panic")), + other => panic!("expected Panicked, got {other:?}"), + } + + // String panic payload. + reg.start_thread("os_panic_string", WorkerConfig::default(), |_cancel| { + std::panic::panic_any(String::from("deliberate string panic")); + }); + match reg.quiesce("os_panic_string").await { + WorkerStatus::Panicked(msg) => assert!(msg.contains("deliberate string panic")), + other => panic!("expected Panicked, got {other:?}"), + } + } + + // ----- Gaps ------------------------------------------------------- + + /// GAP-003 — `shutdown()` is idempotent: a second call finds every slot + /// already joined and reports `NotRunning`, still clean. + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn gap003_shutdown_is_idempotent() { + let reg = ThreadRegistry::<&str>::new(); + start_clean(®, "alpha", WorkerConfig::default()); + + let first = reg.shutdown().await; + assert_eq!(first.per_worker.get("alpha"), Some(&WorkerStatus::Ok)); + assert!(first.all_clean()); + + let second = reg.shutdown().await; + assert_eq!( + second.per_worker.get("alpha"), + Some(&WorkerStatus::NotRunning) + ); + assert!(second.all_clean()); + } + + /// GAP-004 — `cancel(key)` is selective: cancelling A does not touch B. + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn gap004_cancel_is_selective() { + let reg = ThreadRegistry::<&str>::new(); + start_clean(®, "a", WorkerConfig::default()); + start_clean(®, "b", WorkerConfig::default()); + + reg.cancel("a"); + assert!(reg.is_running("b"), "cancel(a) must not cancel b"); + assert_eq!(reg.quiesce("a").await, WorkerStatus::Ok); + assert!(reg.is_running("b"), "b still running after a drains"); + assert_eq!(reg.quiesce("b").await, WorkerStatus::Ok); + } + + /// `cancel_all()` cancels every registered worker in one call; a + /// subsequent `quiesce` per key drains each one cleanly. Covers the + /// public method that has no in-tree caller yet (the rs-dapi-client + /// adoption will use it). + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn cancel_all_signals_every_worker() { + let reg = ThreadRegistry::<&str>::new(); + start_clean(®, "a", WorkerConfig::default()); + start_clean(®, "b", WorkerConfig::default()); + start_clean(®, "c", WorkerConfig::default()); + assert!(reg.is_running("a") && reg.is_running("b") && reg.is_running("c")); + + reg.cancel_all(); + assert!(!reg.is_running("a")); + assert!(!reg.is_running("b")); + assert!(!reg.is_running("c")); + + // All three drain cleanly — the cancel reached every worker. + assert_eq!(reg.quiesce("a").await, WorkerStatus::Ok); + assert_eq!(reg.quiesce("b").await, WorkerStatus::Ok); + assert_eq!(reg.quiesce("c").await, WorkerStatus::Ok); + assert!(!reg.any_alive()); + } + + /// GAP-005 — `WorkerConfig::default()` values are pinned. + #[test] + fn gap005_worker_config_defaults_pinned() { + let cfg = WorkerConfig::default(); + assert_eq!(cfg.weight, ShutdownWeight(0)); + assert!(cfg.drain.is_none()); + assert_eq!(cfg.join_budget, DEFAULT_JOIN_BUDGET); + } + + /// `hold_clearing(key)` refuses both `start_thread` and `start_task` + /// for that key, but ONLY for that key — other keys are unaffected. + /// After the guard drops the latch releases and starts succeed again. + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn hold_clearing_blocks_starts_for_latched_key_only() { + let reg = ThreadRegistry::<&str>::new(); + let _gate = reg.hold_clearing("shielded"); + + // start_thread on the latched key is a no-op. + start_clean(®, "shielded", WorkerConfig::default()); + assert!( + !reg.is_running("shielded"), + "start_thread must be refused while the key is latched" + ); + + // start_task on the latched key is also a no-op. + reg.start_task("shielded", WorkerConfig::default(), |cancel| async move { + cancel.cancelled().await; + }); + assert!( + !reg.is_running("shielded"), + "start_task must be refused while the key is latched" + ); + + // An unrelated key starts cleanly — the latch is per-key. + start_clean(®, "identity", WorkerConfig::default()); + assert!(reg.is_running("identity")); + + // Drop the latch; the same key now starts. + drop(_gate); + start_clean(®, "shielded", WorkerConfig::default()); + assert!( + reg.is_running("shielded"), + "latch release allows the key to start again" + ); + + // Cleanup. + assert_eq!(reg.quiesce("shielded").await, WorkerStatus::Ok); + assert_eq!(reg.quiesce("identity").await, WorkerStatus::Ok); + } + + /// Refcounted nesting: two concurrent / nested holders for the same + /// key keep the latch raised until the LAST guard drops. The inner + /// guard's drop must NOT release the latch the outer still holds — + /// otherwise a re-entrant or concurrent caller silently lapses the + /// invariant. + /// + /// Non-vacuous: against the pre-refactor set-membership backing, + /// the inner guard's drop removed the entry outright, so the + /// `start_clean` between `drop(inner)` and `drop(outer)` would + /// succeed and `is_clearing` would return false — both asserted to + /// fail below. + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn hold_clearing_inner_drop_does_not_lapse_outer_protection() { + let reg = ThreadRegistry::<&str>::new(); + let outer = reg.hold_clearing("shielded"); + let inner = reg.hold_clearing("shielded"); + + // While both guards are live, starts are refused and the latch + // reports clearing. + start_clean(®, "shielded", WorkerConfig::default()); + assert!(!reg.is_running("shielded")); + assert!(reg.is_clearing("shielded")); + + // Drop the INNER guard while the outer is still alive. + drop(inner); + + // The outer protection MUST survive: the latch is still raised + // and a fresh start is still refused. Pre-refactor (set + // semantics) this assertion would fail — the entry was removed + // by the inner's drop and the outer's protection lapsed. + assert!( + reg.is_clearing("shielded"), + "outer ClearingGuard must keep the latch raised after the inner drops" + ); + start_clean(®, "shielded", WorkerConfig::default()); + assert!( + !reg.is_running("shielded"), + "start must still be refused while the outer guard is alive" + ); + + // After the outer drops too, the latch fully releases. + drop(outer); + assert!(!reg.is_clearing("shielded")); + assert_eq!(reg.lock_clearing().len(), 0); + + // And a fresh start succeeds. + start_clean(®, "shielded", WorkerConfig::default()); + assert!(reg.is_running("shielded")); + assert_eq!(reg.quiesce("shielded").await, WorkerStatus::Ok); + } + + /// The latch holds across panic unwinding (RAII guarantee). A + /// closure that holds the guard and panics still removes the key + /// from the clearing set on its drop path. + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn hold_clearing_releases_on_panic_unwind() { + let reg = ThreadRegistry::<&str>::new(); + let reg_clone = Arc::clone(®); + let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + let _gate = reg_clone.hold_clearing("shielded"); + assert_eq!(reg_clone.lock_clearing().len(), 1); + panic!("simulated clear-flow panic"); + })); + assert!(result.is_err()); + assert_eq!( + reg.lock_clearing().len(), + 0, + "ClearingGuard's Drop must release the latch even when the clear flow panics" + ); + + // The key is startable again post-panic. + start_clean(®, "shielded", WorkerConfig::default()); + assert!(reg.is_running("shielded")); + assert_eq!(reg.quiesce("shielded").await, WorkerStatus::Ok); + } + + // ----- Group 6: concurrency-hazard regressions -------------------- + + /// `quiesce` is generation-guarded. A same-key restart that lands after + /// quiesce takes the prior's cancel must not have its fresh, live handle + /// parked or reported `Timeout`: the superseded quiesce returns + /// `NotRunning` and the new generation survives. + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn quiesce_generation_guard_spares_concurrent_restart() { + let reg = ThreadRegistry::<&str>::new(); + // gen-1: a task that ignores cancellation (pending forever), with a + // tiny join budget so a non-guarded quiesce would Timeout quickly. + reg.start_task( + "k", + WorkerConfig { + join_budget: Duration::from_millis(150), + ..WorkerConfig::default() + }, + |_cancel| async move { std::future::pending::<()>().await }, + ); + + // Drive quiesce concurrently; it snapshots gen=1, cancels (ignored), + // and enters the poll loop with cancel already taken. + let reg_q = Arc::clone(®); + let q = tokio::spawn(async move { reg_q.quiesce("k").await }); + + // Let quiesce pass cancel.take() so a restart can proceed. + tokio::time::sleep(Duration::from_millis(40)).await; + + // Restart: cancel is now None, so this proceeds — it takes gen-1's + // live handle as its prior (parked) and installs gen-2. + reg.start_task("k", WorkerConfig::default(), |cancel| async move { + cancel.cancelled().await; + }); + + // The superseded quiesce must NOT park gen-2 / report Timeout. + let status = q.await.unwrap(); + assert_eq!( + status, + WorkerStatus::NotRunning, + "superseded quiesce returns NotRunning, never a spurious Timeout" + ); + assert!(reg.is_running("k"), "gen-2 survives the racing quiesce"); + + // gen-2 quiesces cleanly. + assert_eq!(reg.quiesce("k").await, WorkerStatus::Ok); + } + + /// A thread-spawn failure must neither panic nor detach the live prior + /// handle: it rolls back (prior re-installed, running flag cleared) and + /// the slot stays usable / reapable. + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn spawn_failure_reparks_live_prior_without_panic() { + let reg = ThreadRegistry::<&str>::new(); + let (release_tx, release_rx) = mpsc::channel::<()>(); + // gen-1: wedged (ignores cancel), stays live until released. + reg.start_thread("k", WorkerConfig::default(), wedged_body(release_rx)); + // cancel() takes the token (slot.cancel = None) but the wedged thread + // keeps running — the slot now holds a LIVE prior handle with cancel + // cleared, the exact shape a racing restart would take as its prior. + reg.cancel("k"); + assert!(!reg.is_running("k")); + + // Force the restart's spawn to fail; it must not panic. + reg.force_spawn_failure.store(true, Ordering::Release); + reg.start_thread("k", WorkerConfig::default(), |_cancel| {}); + assert!( + !reg.is_running("k"), + "failed spawn clears the running flag, never leaves it wedged" + ); + assert!(reg.any_alive(), "live prior re-installed, never detached"); + + // Recover: release the prior; quiesce reaps the now-finished handle + // cleanly, proving it was owned (not leaked/detached) and the slot is + // not wedged. + reg.force_spawn_failure.store(false, Ordering::Release); + release_tx.send(()).unwrap(); + assert_eq!(reg.quiesce("k").await, WorkerStatus::Ok); + assert!(!reg.any_alive()); + } + + /// A thread-spawn failure must roll the slot back to its PRIOR config, not + /// leave the failed start's weight / drain / join_budget / generation + /// behind: the re-installed prior worker keeps its own teardown config for + /// a later quiesce/shutdown. + /// + /// Non-vacuous: against a partial rollback (only cancel/handle restored), + /// the slot would carry the failed start's weight/budget, a `None` drain, + /// and the bumped generation. + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn spawn_failure_restores_prior_slot_config() { + let reg = ThreadRegistry::<&str>::new(); + let (release_tx, release_rx) = mpsc::channel::<()>(); + + // gen-1 with a DISTINCTIVE config (drain hook + non-default weight and + // join budget). Wedged so it stays the live prior after cancel. + let hook: DrainHook = Arc::new(|| Box::pin(async {})); + let cfg1 = WorkerConfig { + weight: ShutdownWeight(7), + join_budget: Duration::from_secs(11), + drain: Some(hook), + }; + reg.start_thread("k", cfg1, wedged_body(release_rx)); + reg.cancel("k"); + let gen_after_gen1 = reg.lock_slots().get("k").unwrap().generation; + + // Failed restart with a DIFFERENT config; the rollback must discard it. + reg.force_spawn_failure.store(true, Ordering::Release); + let cfg2 = WorkerConfig { + weight: ShutdownWeight(99), + join_budget: Duration::from_secs(99), + drain: None, + }; + reg.start_thread("k", cfg2, |_cancel| {}); + reg.force_spawn_failure.store(false, Ordering::Release); + + { + let slots = reg.lock_slots(); + let slot = slots.get("k").expect("slot present"); + assert_eq!(slot.weight, ShutdownWeight(7), "weight restored to prior"); + assert_eq!( + slot.join_budget, + Duration::from_secs(11), + "join_budget restored to prior" + ); + assert!( + slot.drain.is_some(), + "prior drain hook restored, not the failed start's None" + ); + assert_eq!( + slot.generation, gen_after_gen1, + "generation rolled back to its pre-bump value" + ); + assert!( + slot.cancel.is_none(), + "running flag cleared after failed spawn" + ); + assert!( + slot.handle.is_some(), + "prior handle re-installed (alive), not detached" + ); + } + assert!(reg.any_alive(), "live prior still accounted for"); + + // Recover: release + quiesce reaps the prior cleanly. + release_tx.send(()).unwrap(); + assert_eq!(reg.quiesce("k").await, WorkerStatus::Ok); + assert!(!reg.any_alive()); + } + + /// A panicking worker body still runs its epilogue (via the drop-guard), + /// so `is_running()` reflects the crash and `start()` can relaunch the + /// loop instead of silently no-op'ing. + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn panicked_worker_clears_running_and_allows_restart() { + let reg = ThreadRegistry::<&str>::new(); + // A worker whose body panics immediately. + reg.start_thread("k", WorkerConfig::default(), |_cancel| { + panic!("deliberate worker-body panic"); + }); + + // The drop-guard epilogue clears the running flag despite the panic. + let mut waited = Duration::ZERO; + while reg.is_running("k") && waited < Duration::from_secs(2) { + tokio::time::sleep(Duration::from_millis(5)).await; + waited += Duration::from_millis(5); + } + assert!( + !reg.is_running("k"), + "panicked worker clears its running flag via the epilogue guard" + ); + + // start() can relaunch a crashed loop (no longer a silent no-op). + let ran = Arc::new(AtomicBool::new(false)); + let ran_w = Arc::clone(&ran); + let handle = Handle::current(); + reg.start_thread("k", WorkerConfig::default(), move |cancel| { + ran_w.store(true, Ordering::Release); + handle.block_on(async move { cancel.cancelled().await }); + }); + assert!( + reg.is_running("k"), + "start() relaunches a previously-panicked worker" + ); + assert_eq!(reg.quiesce("k").await, WorkerStatus::Ok); + assert!( + ran.load(Ordering::Acquire), + "restarted worker body executed" + ); + } + + /// `shutdown()` latches the registry closed: a start racing (or + /// following) teardown is refused, so no worker is left un-joined. + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn shutdown_latches_closed_refusing_new_workers() { + let reg = ThreadRegistry::<&str>::new(); + start_clean(®, "live", WorkerConfig::default()); + let report = reg.shutdown().await; + assert!(report.all_clean()); + + // One-way door: both worker kinds are refused after shutdown. + start_clean(®, "late_thread", WorkerConfig::default()); + assert!( + !reg.is_running("late_thread"), + "start_thread after shutdown is refused" + ); + reg.start_task("late_task", WorkerConfig::default(), |cancel| async move { + cancel.cancelled().await; + }); + assert!( + !reg.is_running("late_task"), + "start_task after shutdown is refused" + ); + assert!(!reg.any_alive(), "nothing started post-shutdown"); + } + + /// `start_thread` must park a restarted key's still-wedged prior into the + /// orphan list UNDER the slot lock — at the START of the restart, not only + /// after the out-of-lock reap backstop elapses. + /// Otherwise a `shutdown()` that snapshots tiers in the window between + /// "prior taken out of the slot" and "prior parked" sees neither the + /// prior (already moved out of the slot) nor an orphan, and reports + /// clean while the wedged prior is still live and un-joined. + /// + /// Deterministic via a long backstop: parking under the slot lock makes + /// the prior observable in orphans well before the backstop could elapse, + /// so the early assertion lands. Parking only at the end of the + /// out-of-lock spin would fail it. + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn start_thread_parks_wedged_prior_under_slot_lock_at_restart() { + // Long backstop so the under-lock parking is observable well before + // it could possibly elapse. + let reg = ThreadRegistry::with_reap_backstop(Duration::from_secs(10)); + let (release_tx, release_rx) = mpsc::channel::<()>(); + + // gen-1: wedged (ignores cancel), stays live until released. + reg.start_thread("k", WorkerConfig::default(), wedged_body(release_rx)); + reg.cancel("k"); + + // gen-2 restart on a blocking thread: its bounded reap of the wedged + // gen-1 spins the (long) backstop, so start_thread does not return + // promptly. gen-1 is parked under the slot lock at the start of + // this call, before that spin. + let reg2 = Arc::clone(®); + let parent = Handle::current(); + let restart = tokio::task::spawn_blocking(move || { + let handle = parent.clone(); + reg2.start_thread("k", WorkerConfig::default(), move |cancel| { + handle.block_on(async move { cancel.cancelled().await }); + }); + }); + + // The wedged prior must appear in orphans far sooner than the 10s + // backstop — it was parked under the slot lock at restart. + let mut waited = Duration::ZERO; + while orphan_len(®) == 0 && waited < Duration::from_secs(2) { + tokio::time::sleep(Duration::from_millis(10)).await; + waited += Duration::from_millis(10); + } + assert_eq!( + orphan_len(®), + 1, + "wedged prior must be parked under the slot lock at restart, not \ + only after the backstop spin" + ); + assert!(reg.is_running("k"), "gen-2 installed under the same lock"); + + // Release the wedged prior: the restart's bounded reap then finds it + // finished, removes it from orphans, and joins it. + release_tx.send(()).unwrap(); + restart.await.unwrap(); + assert_eq!( + orphan_len(®), + 0, + "finished prior removed from orphans by the bounded reap" + ); + + // gen-2 quiesces cleanly. + assert_eq!(reg.quiesce("k").await, WorkerStatus::Ok); + } + + /// PR #3954 thread #5 — `with_reap_backstop` MUST emit a one-shot + /// `tracing::warn!` when compiled under `panic = "abort"` so an operator + /// can audit the orphan-liveness-gate risk documented on `EpilogueGuard` + /// and `AtomicFlagGuard`. The check is build-cfg-pinned: this test only + /// exists under abort builds and serves as a compile-gate canary — if + /// the cfg block in `with_reap_backstop` is ever removed, this test + /// disappears with it and CI loses the signal. + /// + /// Functional assertion is on the process-wide `Once` latch, which is + /// the most reliable artifact we can probe without subscribing to + /// tracing from a `#[test]`. + #[cfg(panic = "abort")] + #[test] + fn with_reap_backstop_emits_panic_abort_warn_under_abort_builds() { + let _reg = ThreadRegistry::<&'static str>::with_reap_backstop(Duration::from_secs(1)); + assert!( + super::PANIC_ABORT_WARNED.is_completed(), + "with_reap_backstop must trip the panic=abort warn latch on first call" + ); + // Second construction must NOT re-fire — `Once` guarantees this, but + // we exercise it to lock the one-shot contract into the test. + let _reg2 = ThreadRegistry::<&'static str>::with_reap_backstop(Duration::from_secs(1)); + assert!(super::PANIC_ABORT_WARNED.is_completed()); + } + + /// Sentinel for the no-op cfg branch: under `panic = "unwind"` (the + /// dev-profile default) `EpilogueGuard`'s `Drop` runs and releases the + /// orphan slot, so the operator warn is unnecessary. This test just + /// proves the unwind branch compiles and `with_reap_backstop` keeps + /// behaving like a plain constructor — no observable warn-related state + /// to assert because the gated `static` doesn't exist on this build. + #[cfg(not(panic = "abort"))] + #[test] + fn with_reap_backstop_no_warn_under_unwind() { + let reg = ThreadRegistry::<&'static str>::with_reap_backstop(Duration::from_millis(250)); + assert!(!reg.any_alive(), "fresh registry has no live workers"); + } +} diff --git a/packages/rs-platform-wallet-ffi/Cargo.toml b/packages/rs-platform-wallet-ffi/Cargo.toml index 8a2bd4ef2b4..7e60b05d69b 100644 --- a/packages/rs-platform-wallet-ffi/Cargo.toml +++ b/packages/rs-platform-wallet-ffi/Cargo.toml @@ -22,7 +22,7 @@ rs-sdk-ffi = { path = "../rs-sdk-ffi" } once_cell = "1.19" parking_lot = { version = "0.12", features = ["send_guard"] } lazy_static = "1.4" -tokio = { version = "1", features = ["rt-multi-thread"] } +tokio = { version = "1", features = ["rt-multi-thread", "time"] } tokio-metrics = { workspace = true, optional = true } # Core dependencies (for Network type) diff --git a/packages/rs-platform-wallet-ffi/src/core_wallet_types.rs b/packages/rs-platform-wallet-ffi/src/core_wallet_types.rs index c8ebc1348fe..6232fd3baa6 100644 --- a/packages/rs-platform-wallet-ffi/src/core_wallet_types.rs +++ b/packages/rs-platform-wallet-ffi/src/core_wallet_types.rs @@ -996,7 +996,7 @@ pub unsafe fn free_wallet_changeset_ffi(cs: &WalletChangeSetFFI) { // round that ONLY advances the watermark (chain-lock arm with // empty per_account on the WalletEvent side wouldn't fire, but // a coalesced round may still resolve down to "just the CL" - // after `is_empty_no_records` filters out the other arms) must + // after `is_empty` filters out the other arms) must // still release the heap allocation we made in `from_changeset`. if !cs.last_applied_chain_lock_bytes.is_null() && cs.last_applied_chain_lock_bytes_len > 0 { drop(Vec::from_raw_parts( diff --git a/packages/rs-platform-wallet-ffi/src/error.rs b/packages/rs-platform-wallet-ffi/src/error.rs index de1a6cb9441..135a16590a5 100644 --- a/packages/rs-platform-wallet-ffi/src/error.rs +++ b/packages/rs-platform-wallet-ffi/src/error.rs @@ -125,6 +125,33 @@ pub enum PlatformWalletFFIResultCode { /// and could double-send if the original spend landed. ErrorShieldedSpendUnconfirmed = 18, + /// A background coordinator drain did not complete cleanly within the + /// join deadline — one or more `!Send` sync threads may still be alive + /// and still hold a reference to the host-owned callback context, so they + /// could fire one final callback through it. On this code the host **must + /// not** free the callback context immediately: either keep it alive for a + /// further grace period, or accept the (statistically tiny) race. + /// + /// Returned by two callers, which differ in whether the operation may + /// be **retried**: + /// - `platform_wallet_manager_destroy`: the manager **IS** torn down + /// (removed from storage) regardless — do **not** retry `destroy`; the + /// handle is already gone. Only the callback-context lifetime caveat + /// above applies. + /// - `platform_wallet_manager_shielded_clear`: the manager is **NOT** torn + /// down and the store was left **intact** (Clear aborted before touching + /// it). The host may retry the clear, and must **not** commit its own + /// persistence wipe — doing so would desync the host's rows from the + /// still-populated shared tree. + /// + /// `platform_wallet_manager_shielded_sync_stop` is cancel-only and + /// never returns this code; its companion join point is `destroy`. + /// + /// Distinct from a normal operation error (the underlying operation may + /// well have made progress); the terminal coordinator status is rendered + /// into the result message. + ErrorShutdownIncomplete = 19, + NotFound = 98, // Used exclusively for all the Option that are retuned as errors ErrorUnknown = 99, } @@ -237,6 +264,14 @@ impl From for PlatformWalletFFIResult { PlatformWalletError::ShieldedSpendUnconfirmed { .. } => { PlatformWalletFFIResultCode::ErrorShieldedSpendUnconfirmed } + // A Clear that refused because the in-flight shielded pass didn't + // drain cleanly: surface it as ErrorShutdownIncomplete (symmetric + // with `platform_wallet_manager_destroy`) so the host defers + // freeing its callback context AND does not commit its own + // persistence wipe — the store was intentionally left intact. + PlatformWalletError::ShieldedShutdownIncomplete { .. } => { + PlatformWalletFFIResultCode::ErrorShutdownIncomplete + } _ => PlatformWalletFFIResultCode::ErrorUnknown, }; PlatformWalletFFIResult::err(code, error.to_string()) @@ -595,6 +630,29 @@ mod tests { assert_eq!(msg, rendered, "Display payload must survive verbatim"); } + /// A Clear that refused on a non-clean shielded drain must surface as + /// `ErrorShutdownIncomplete` (symmetric with `destroy`), not flatten to + /// `ErrorUnknown`, so the host knows to defer freeing its callback + /// context and to NOT commit its own persistence wipe. The typed Display + /// rendering (carrying the terminal coordinator status) survives verbatim. + #[test] + fn shielded_shutdown_incomplete_maps_to_dedicated_code() { + let err = PlatformWalletError::ShieldedShutdownIncomplete { + status: platform_wallet::WorkerStatus::Timeout, + }; + let rendered = err.to_string(); + let result: PlatformWalletFFIResult = err.into(); + assert_eq!( + result.code, + PlatformWalletFFIResultCode::ErrorShutdownIncomplete, + "ShieldedShutdownIncomplete should map to ErrorShutdownIncomplete (rendered: {rendered})" + ); + let msg = unsafe { std::ffi::CStr::from_ptr(result.message) } + .to_string_lossy() + .into_owned(); + assert_eq!(msg, rendered, "Display payload must survive verbatim"); + } + /// Other wallet-error variants without a dedicated FFI arm still /// fall through to `ErrorUnknown` while carrying the typed /// Display rendering as the message. Pin this so the catch-all diff --git a/packages/rs-platform-wallet-ffi/src/manager.rs b/packages/rs-platform-wallet-ffi/src/manager.rs index 5930c1c4db6..2785aa363a3 100644 --- a/packages/rs-platform-wallet-ffi/src/manager.rs +++ b/packages/rs-platform-wallet-ffi/src/manager.rs @@ -174,6 +174,51 @@ unsafe fn create_wallet_from_mnemonic_impl( PlatformWalletFFIResult::ok() } +/// One wallet skipped during `load_from_persistor` because its +/// persisted row was structurally corrupt (per-row decode failure). +/// The load path is seedless and watch-only, so this is the only skip +/// reason. `reason_code` is per-`CorruptKind` family — see its table. +#[repr(C)] +#[derive(Debug, Clone, Copy)] +pub struct SkippedWalletFFI { + /// The (public) 32-byte wallet id that was skipped. + pub wallet_id: [u8; 32], + /// Structural skip reason. `100` = missing account manifest, + /// `101` = malformed account xpub, `102` = any other structural + /// decode error. No secret material is ever carried. + pub reason_code: u32, +} + +/// C-visible summary of one `load_from_persistor` pass so the host can +/// see which wallets loaded and which were skipped (and why) instead +/// of the outcome being silently discarded. +/// +/// `skipped` is a heap array of length `skipped_count`; pass this +/// struct (by pointer) to +/// [`platform_wallet_load_outcome_free`] exactly once to release it. +#[repr(C)] +#[derive(Debug)] +pub struct LoadOutcomeFFI { + /// Number of wallets fully reconstructed + registered. + pub loaded_count: usize, + /// Length of the `skipped` array. + pub skipped_count: usize, + /// Heap-allocated skipped-wallet array (null iff `skipped_count` + /// is 0). Owned by Rust until `platform_wallet_load_outcome_free`. + pub skipped: *mut SkippedWalletFFI, +} + +fn skip_reason_code(reason: &platform_wallet::SkipReason) -> u32 { + use platform_wallet::manager::load_outcome::CorruptKind; + match reason { + platform_wallet::SkipReason::CorruptPersistedRow { kind } => match kind { + CorruptKind::MissingManifest => 100, + CorruptKind::MalformedXpub => 101, + CorruptKind::DecodeError(_) => 102, + }, + } +} + /// Create a wallet from raw seed bytes (64 bytes). /// /// On success, `out_wallet_handle` is set to a `PlatformWallet` handle and @@ -296,23 +341,102 @@ pub unsafe extern "C" fn platform_wallet_manager_create_wallet_from_mnemonic_wit /// /// Triggers `on_load_wallet_list_fn` on the persistence callbacks to /// fetch the persisted wallet list from the client side (SwiftData), -/// reconstructs each wallet as **watch-only** via its stored root + -/// per-account xpubs, and registers them inside the manager. Does not -/// produce wallet handles — the caller should follow up with -/// [`platform_wallet_manager_get_wallet`] per `wallet_id` it knows -/// about. +/// builds a keyless reconstruction payload per wallet, then registers +/// each one as a **watch-only** wallet. No signing keys are derived +/// here — signing happens later, on demand, via the configured +/// `MnemonicResolverHandle` (`sign_with_mnemonic_resolver` and its +/// siblings), which fail-closed gate the resolver-supplied seed +/// against the loaded `wallet_id`. Does not produce wallet handles — +/// follow up with [`platform_wallet_manager_get_wallet`] per +/// `wallet_id`. +/// +/// A wallet whose persisted row is structurally corrupt is +/// **skipped**, not failed: the call still returns `Success`, every +/// skipped `(wallet_id, reason)` is logged, and — when `out_outcome` +/// is non-null — surfaced through it. +/// +/// # Safety +/// - `out_outcome` may be null (caller doesn't want the summary); +/// otherwise it must point to writable `LoadOutcomeFFI` storage and +/// the caller must later release it via +/// [`platform_wallet_load_outcome_free`]. #[no_mangle] pub unsafe extern "C" fn platform_wallet_manager_load_from_persistor( manager_handle: Handle, + out_outcome: *mut LoadOutcomeFFI, ) -> PlatformWalletFFIResult { let option = PLATFORM_WALLET_MANAGER_STORAGE.with_item(manager_handle, |manager| { runtime().block_on(manager.load_from_persistor()) }); let result = unwrap_option_or_return!(option); - unwrap_result_or_return!(result); + let outcome = unwrap_result_or_return!(result); + + // Never silently drop the outcome: log a structured summary plus + // one line per skipped wallet (the host can inspect / clear the + // corrupt rows). + tracing::info!( + loaded = outcome.loaded.len(), + skipped = outcome.skipped.len(), + "platform_wallet_manager_load_from_persistor complete" + ); + for (wid, reason) in &outcome.skipped { + tracing::warn!( + wallet_id = %hex::encode(wid), + reason = %reason, + "load_from_persistor skipped wallet (corrupt persisted row)" + ); + } + + if !out_outcome.is_null() { + let skipped_vec: Vec = outcome + .skipped + .iter() + .map(|(wid, reason)| SkippedWalletFFI { + wallet_id: *wid, + reason_code: skip_reason_code(reason), + }) + .collect(); + let skipped_count = skipped_vec.len(); + let skipped_ptr = if skipped_count == 0 { + std::ptr::null_mut() + } else { + let boxed = skipped_vec.into_boxed_slice(); + Box::into_raw(boxed) as *mut SkippedWalletFFI + }; + std::ptr::write( + out_outcome, + LoadOutcomeFFI { + loaded_count: outcome.loaded.len(), + skipped_count, + skipped: skipped_ptr, + }, + ); + } PlatformWalletFFIResult::ok() } +/// Release the heap `skipped` array a successful +/// [`platform_wallet_manager_load_from_persistor`] wrote into a +/// `LoadOutcomeFFI`. Idempotent: nulls the pointer after freeing, and +/// a null `outcome` (or already-freed array) is a no-op. +/// +/// # Safety +/// `outcome` must point to a `LoadOutcomeFFI` previously populated by +/// `platform_wallet_manager_load_from_persistor`, not freed already. +#[no_mangle] +pub unsafe extern "C" fn platform_wallet_load_outcome_free(outcome: *mut LoadOutcomeFFI) { + if outcome.is_null() { + return; + } + let o = &mut *outcome; + if !o.skipped.is_null() && o.skipped_count > 0 { + let slice = std::slice::from_raw_parts_mut(o.skipped, o.skipped_count); + drop(Box::from_raw(slice as *mut [SkippedWalletFFI])); + } + o.skipped = std::ptr::null_mut(); + o.skipped_count = 0; +} + /// Get a `PlatformWallet` handle for a wallet registered in the /// manager. Returns `NotFound` if no wallet with the given /// id is currently held. @@ -360,7 +484,28 @@ pub unsafe extern "C" fn platform_wallet_manager_destroy( // left alive to fire a callback against freed memory. // `shutdown()` is idempotent, so this is safe even if the host // already stopped some sync managers before calling destroy. - runtime().block_on(manager.shutdown()); + // It now joins the coordinator OS threads and returns their + // per-thread exit status; the C ABI exposes none of that, so we + // just log it (a panicked loop is worth surfacing) and drop it. + let status = runtime().block_on(manager.shutdown()); + if !status.all_clean() { + tracing::warn!( + ?status, + "platform wallet coordinator(s) did not exit cleanly; \ + host must not free the callback context immediately" + ); + // Return a distinct non-ok code so the host can delay freeing + // its callback context. A lingering coordinator thread (e.g. one + // that timed out) still holds an Arc to the event handler and may + // fire one final callback through the host-owned context pointer; + // returning ok() here would signal that the context is safe to + // free when it may not be yet. + return PlatformWalletFFIResult::err( + PlatformWalletFFIResultCode::ErrorShutdownIncomplete, + format!("coordinator(s) did not exit cleanly: {status:?}"), + ); + } + tracing::debug!(?status, "platform wallet coordinators joined cleanly"); } PlatformWalletFFIResult::ok() } diff --git a/packages/rs-platform-wallet-ffi/src/persistence.rs b/packages/rs-platform-wallet-ffi/src/persistence.rs index 86b5561518c..f554ccb2781 100644 --- a/packages/rs-platform-wallet-ffi/src/persistence.rs +++ b/packages/rs-platform-wallet-ffi/src/persistence.rs @@ -3377,11 +3377,59 @@ fn build_wallet_start_state( // status without rebroadcasting. let unused_asset_locks = build_unused_asset_locks(entry)?; + // Project the reconstructed `wallet` + `wallet_info` into the + // keyless `ClientWalletStartState` the persister contract requires + // (SECRETS.md: no `Wallet`/seed crosses `load()`). The manager + // rebuilds a watch-only wallet from this manifest via + // `Wallet::new_watch_only` and applies this `core_state` projection. + // Signing happens later via the on-demand + // `sign_with_mnemonic_resolver` path, which fail-closed gates the + // resolver-supplied seed against the loaded `wallet_id`. The + // locally-built `wallet` is dropped — it was only needed to shape + // the account collection / UTXO routing above. + let account_manifest: Vec = wallet + .accounts + .all_accounts() + .into_iter() + .map(|a| AccountRegistrationEntry { + account_type: a.account_type, + account_xpub: a.account_xpub, + }) + .collect(); + let new_utxos: Vec = wallet_info + .accounts + .all_funding_accounts() + .into_iter() + .flat_map(|acct| acct.utxos.values().cloned()) + .collect(); + let core_state = platform_wallet::changeset::CoreChangeSet { + new_utxos, + last_processed_height: (wallet_info.metadata.last_processed_height > 0) + .then_some(wallet_info.metadata.last_processed_height), + synced_height: (wallet_info.metadata.synced_height > 0) + .then_some(wallet_info.metadata.synced_height), + ..Default::default() + }; + + // `contacts` / `identity_keys` are the PR-3 keyless feed the + // manager layers onto the managed identities via + // `apply_contacts_and_keys`. The iOS path does NOT use them: + // identity PUBLIC keys are already reconstructed straight into + // `Identity.public_keys` by `build_wallet_identity_bucket` (feeding + // the slot too would double-apply), and `WalletRestoreEntryFFI` + // carries no contacts back from Swift on load — surfacing them + // would need a new cross-boundary struct field + Swift wiring, + // tracked as a follow-up. Empty slots make `apply_contacts_and_keys` + // a no-op for this path, preserving the established iOS behaviour. let wallet_state = ClientWalletStartState { - wallet, - wallet_info, + network, + birth_height: entry.birth_height, + account_manifest, + core_state, identity_manager, unused_asset_locks, + contacts: Default::default(), + identity_keys: Default::default(), }; let platform_address_state = if per_account.is_empty() diff --git a/packages/rs-platform-wallet-ffi/src/shielded_sync.rs b/packages/rs-platform-wallet-ffi/src/shielded_sync.rs index 2d58d8165f6..57d7da31e7e 100644 --- a/packages/rs-platform-wallet-ffi/src/shielded_sync.rs +++ b/packages/rs-platform-wallet-ffi/src/shielded_sync.rs @@ -65,30 +65,29 @@ pub unsafe extern "C" fn platform_wallet_manager_shielded_sync_start( PlatformWalletFFIResult::ok() } -/// Stop the shielded sync manager and wait for any in-flight pass to -/// drain before returning. No-op if not running. +/// Stop the shielded sync manager. No-op if not running. /// -/// Uses `quiesce` rather than cancel-only stop, so on return: the loop -/// is cancelled, no new pass will start, and any in-flight pass has -/// fully drained — its **persistence callbacks have completed** (no -/// note/sync-state row can be written after this returns) and its -/// completion-event *dispatch* on the Rust side has run. +/// **Cancel-only**: signals the loop and returns immediately, matching +/// `platform_address_sync_stop` / `identity_sync_stop`. An in-flight +/// pass is cancelled mid-flight at its next `.await`; a parked prior- +/// generation orphan is **not** joined here. Never returns +/// `ErrorShutdownIncomplete` — the join-and-orphan-liveness gate that +/// prevents a host UAF lives on `platform_wallet_manager_destroy` and +/// `platform_wallet_manager_shielded_clear`, which are the host's +/// contract points for "safe to free the callback context". /// -/// Caveat on host-observed events: a host that marshals the completion -/// callback onto its own executor (e.g. the Swift trampoline hops it to -/// the `@MainActor`) may still observe that final, already-dispatched -/// event land *after* this call returns — Rust controls when the event -/// is dispatched, not when the host's run loop applies it. The drain -/// guarantee above (no further persistence, no new pass) is the -/// load-bearing part; hosts that must ignore a trailing UI event should -/// gate their handler on their own post-stop/post-clear state (the -/// example app drops events while unbound). +/// Caveat: a host marshalling events onto its own executor (e.g. Swift +/// hops to `@MainActor`) may still observe an already-dispatched event +/// land after this returns; gate the handler on post-stop state if +/// trailing events must be dropped (the example app does so). #[no_mangle] pub unsafe extern "C" fn platform_wallet_manager_shielded_sync_stop( handle: Handle, ) -> PlatformWalletFFIResult { let option = PLATFORM_WALLET_MANAGER_STORAGE.with_item(handle, |manager| { - runtime().block_on(manager.shielded_sync().quiesce()); + // Cancel-only by design: a second `AtomicFlagGuard` on `quiescing` + // here would race a continuously-held gate in `shielded_clear`. + manager.shielded_sync().stop(); }); unwrap_option_or_return!(option); PlatformWalletFFIResult::ok() @@ -417,7 +416,9 @@ pub unsafe extern "C" fn platform_wallet_manager_configure_shielded( /// via the changeset path. /// /// Returns `ErrorWalletOperation` if the Rust-side store reset -/// fails. The host **must** check this before wiping its own +/// fails, or `ErrorShutdownIncomplete` if the in-flight sync pass +/// did not drain cleanly first (in which case the store is left +/// intact). The host **must** check this before wiping its own /// persistence: a silent failure would leave the shared tree /// populated while the host drops its rows, and the next cold /// resync would gate-skip every re-downloaded position against the @@ -443,10 +444,19 @@ pub unsafe extern "C" fn platform_wallet_manager_shielded_clear( }); let result = unwrap_option_or_return!(option); if let Err(e) = result { - return PlatformWalletFFIResult::err( - PlatformWalletFFIResultCode::ErrorWalletOperation, - format!("clear_shielded failed: {e}"), - ); + // A non-clean / timed-out quiesce aborts the clear *before* the store + // is touched: surface it as ErrorShutdownIncomplete (symmetric with + // destroy / shielded_sync_stop) so the host defers freeing its + // callback context and does NOT commit its own persistence wipe — the + // store was intentionally left intact. Every other clear failure is a + // store-reset error → ErrorWalletOperation, as before. + let code = match &e { + platform_wallet::PlatformWalletError::ShieldedShutdownIncomplete { .. } => { + PlatformWalletFFIResultCode::ErrorShutdownIncomplete + } + _ => PlatformWalletFFIResultCode::ErrorWalletOperation, + }; + return PlatformWalletFFIResult::err(code, format!("clear_shielded failed: {e}")); } PlatformWalletFFIResult::ok() } diff --git a/packages/rs-platform-wallet-storage/.cargo/audit.toml b/packages/rs-platform-wallet-storage/.cargo/audit.toml new file mode 100644 index 00000000000..6e07a8c19dd --- /dev/null +++ b/packages/rs-platform-wallet-storage/.cargo/audit.toml @@ -0,0 +1,26 @@ +[advisories] +# Each entry MUST point at a live advisory in this crate's resolved graph +# and carry a dated rationale + remediation plan. Do not blanket-ignore. +ignore = [ + # RUSTSEC-2025-0141 — bincode is unmaintained (informational advisory, + # not an exploitable CVE; published 2025-12-16, no patched release). + # Acknowledged 2026-06-08. + # + # Why this is acceptable for now: + # - bincode 2.0.1 is the BLOB-codec trust boundary for every + # persisted column and backup. The known risk class for an + # unmaintained deserializer is unbounded allocation (OOM) on a + # crafted/corrupt input. + # - In-crate size bounds defang that class: MAX_VALUE_LEN / + # BLOB_SIZE_LIMIT_BYTES (kv/blob), the per-secret SecretTooLarge + # write cap, and the MAX_VAULT_SIZE_BYTES read ceiling all reject + # oversized inputs before bincode ever allocates from them. + # - load() is fail-hard: a malformed/over-large row aborts the call + # with a typed error rather than silently over-allocating. + # + # Residual risk: a future bincode defect would go unpatched upstream. + # Remediation plan: migrate the BLOB codec to a maintained equivalent + # (postcard / bitcode candidates) once the wire format is frozen at + # release; revisit this ignore at that time. + "RUSTSEC-2025-0141", +] diff --git a/packages/rs-platform-wallet-storage/Cargo.toml b/packages/rs-platform-wallet-storage/Cargo.toml index 43bf9a0fb0f..d8c9c8ad7c4 100644 --- a/packages/rs-platform-wallet-storage/Cargo.toml +++ b/packages/rs-platform-wallet-storage/Cargo.toml @@ -61,6 +61,11 @@ chrono = { version = "0.4", default-features = false, features = [ "clock", ], optional = true } sha2 = { version = "0.10", optional = true } +# Opt-in `JsonSchema` for `SecretString` (gated by `secret-schemars`). +# Reuses the workspace-locked 1.2.1. `default-features = false` drops the +# `derive` feature (we hand-write the impl), matching the crate's existing +# derive-free schemars usage so the lock gains no `schemars_derive` entry. +schemars = { version = "1", optional = true, default-features = false } # Secret-storage deps (gated by the `secrets` feature). RustSec-clean # pins (Smythe §7); `aes-gcm` is deliberately omitted. `keyring`'s @@ -80,7 +85,14 @@ keyring-core = { version = "=1.0.0", optional = true } # was removed from the sqlite arm — those tests grep for `fs2`/`fs4` # literals in this crate's source/manifest and would re-trigger on the # older crates. `fd-lock` has no such collision. -fd-lock = { version = "4.0.4", optional = true } +# LOCAL-FS ONLY: flock/LockFileEx interlock processes only on local +# filesystems; over NFS/CIFS the lock does not interlock, so a vault file +# must not be shared across hosts — steer multi-host to the OS-keyring arm. +# Exact-pinned (`=`) like the rest of the soundness-critical stack: the +# `VaultLock` unsafe drop-order argument in `secrets/file/mod.rs` is +# calibrated to fd-lock 4.0.4's guard internals; any bump must re-verify +# that the guard releases the OS lock before the backing `RwLock` frees. +fd-lock = { version = "=4.0.4", optional = true } # CLI deps (gated by the `cli` feature) clap = { version = "4", features = ["derive"], optional = true } @@ -180,6 +192,13 @@ cli = [ # crate without the crypto graph. secrets = [ "dep:argon2", + # Enable argon2's `zeroize` feature so the KDF wipes its sensitive + # intermediate state (`initial_hash`/`blockhash`) on drop. In argon2 + # 0.5.3 this does NOT cover the bulk `Block` matrix — that residual is + # documented at `derive_key` in `secrets/file/crypto.rs`. Keep it in + # the feature list (not a `default-features = false` rewrite) so + # argon2's own default features stay intact. + "argon2/zeroize", "dep:chacha20poly1305", # secrets uses serde directly (vault format + crypto envelope derive # `Serialize`/`Deserialize`); declare the dep here so @@ -199,6 +218,15 @@ secrets = [ "dep:apple-native-keyring-store", "dep:windows-native-keyring-store", ] +# Opt-in `SecretString` serde/schemars impls. Deliberately DEFAULT-OFF +# even though `secrets` (and, via it, the `serde` dep) are default-on: +# these gate the IMPLS, not the dep, so the impls are absent unless a +# consumer explicitly opts in. `secret-serde` requires `secrets` (the type +# only exists under it). NO `Serialize` is ever provided. `secret-schemars` +# implies `secret-serde`. (design §5.4 / GAP-001 names / GAP-002 satisfiable +# default-off.) +secret-serde = ["secrets", "dep:serde"] +secret-schemars = ["secret-serde", "dep:schemars"] # Per-object-type key/value metadata API # (`platform_wallet_storage::{KvStore, KvError, ObjectId}`) plus the # SQLite-backed impl. Requires `sqlite` because the only shipped backend diff --git a/packages/rs-platform-wallet-storage/README.md b/packages/rs-platform-wallet-storage/README.md index 34917c10e03..04c4d6057eb 100644 --- a/packages/rs-platform-wallet-storage/README.md +++ b/packages/rs-platform-wallet-storage/README.md @@ -103,8 +103,11 @@ flush, 5 s busy timeout, WAL journal, `NORMAL` synchronous, and an auto-backup dir at `/backups/auto/`. The trait surface is `store` / `flush` / `load` / `get_core_tx_record`. -Schema migrations are append-only Rust files under `migrations/`, applied -via [`refinery`](https://github.com/rust-db/refinery) on every `open`. +Schema migrations are versioned Rust files under `migrations/`, applied via +[`refinery`](https://github.com/rust-db/refinery) on every `open`. While the +crate is unreleased, in-place edits to the sole shipped `V001` are allowed; +the append-only guarantee (add a new versioned file, never edit a prior one) +takes effect once the schema is frozen at release. #### Flush semantics (store / flush) @@ -138,17 +141,31 @@ so one failed wallet does not hide its siblings. #### load() reconstruction -`SqlitePersister::load()` returns the base `ClientStartState` (plain struct, -two slots — no `#[non_exhaustive]`): +`SqlitePersister::load()` returns a fully-rehydrated `ClientStartState` +(plain struct — no `#[non_exhaustive]`). Both slots are populated: | Slot | Reader | Status | |---|---|---| -| `platform_addresses` | `schema::platform_addrs::load_all` (a fixed set of grouped scans over `platform_address_sync`, `platform_addresses`, and `account_registrations`, driven by the `wallet_meta::list_ids` wallet universe) | populated | -| `wallets` | — | empty pending upstream `Wallet::from_persisted` | - -The `identities` / `contacts` / `asset_locks` per-area readers exist as -hardened dormant helpers (`schema::::load_state`) but are not wired -into `load()` — `ClientStartState` carries no slot for them. +| `platform_addresses` | `schema::platform_addrs::load_all` (a fixed set of grouped scans over `platform_address_sync`, `platform_addresses`, and `account_registrations`, driven by the `wallets::list_ids` wallet universe) | populated | +| `wallets` | per-wallet `schema::` readers (see below) | populated | + +Each `ClientStartState::wallets` entry is a **keyless** `ClientWalletStartState` +reconstructed from these per-area readers: + +| Field | Reader | +|---|---| +| `network` / `birth_height` | `schema::wallets::fetch` | +| `account_manifest` | `schema::accounts::load_state` | +| `core_state` | `schema::core_state::load_state` | +| `identity_manager` | `schema::identities::load_state` | +| `unused_asset_locks` | `schema::asset_locks::load_unconsumed` (`Consumed`-filtered — spent locks stay on disk but are never resurrected) | +| `contacts` | `schema::contacts::load_changeset` | +| `identity_keys` | `schema::identity_keys::load_state` | + +The payload carries **no** `Wallet` and no key material — the manager +rebuilds each wallet watch-only via `Wallet::new_watch_only` from the +manifest and applies this state; signing keys are derived later on demand +via the `sign_with_mnemonic_resolver` path. Loading is **fail-hard**: any row that fails to decode, or a stored `wallet_id` that is not exactly 32 bytes, aborts the whole call with a typed @@ -158,9 +175,9 @@ corruption tolerance, no per-row skip, and no partial `Ok` — a corrupt database surfaces as an error rather than silently losing rows. The summary `tracing::info!` carries `wallets_seen`, `addresses_loaded`, -`wallets_rehydrated`, and `wallets_pending_rehydration` (the count of -wallets that *would* be rehydrated once upstream provides -`Wallet::from_persisted`). +`wallets_rehydrated` (the count actually rehydrated this call), and +`wallets_pending_rehydration` (now always `0` — every seen wallet is +rehydrated). The only deferred field is listed in `LOAD_UNIMPLEMENTED`. ### KV metadata API diff --git a/packages/rs-platform-wallet-storage/SCHEMA.md b/packages/rs-platform-wallet-storage/SCHEMA.md index 8149fb16e23..5015f949f04 100644 --- a/packages/rs-platform-wallet-storage/SCHEMA.md +++ b/packages/rs-platform-wallet-storage/SCHEMA.md @@ -23,7 +23,7 @@ see [SECRETS.md](./SECRETS.md). ## How integrity is kept -Schema evolution is version-gated by refinery. Every read-write connection turns on `PRAGMA foreign_keys = ON` at open time (`src/sqlite/conn.rs`), so every `ON DELETE CASCADE` clause is active. Deleting a `wallet_metadata` row cleans that wallet's metadata along two paths: +Schema evolution is version-gated by refinery. Every read-write connection turns on `PRAGMA foreign_keys = ON` at open time (`src/sqlite/conn.rs`), so every `ON DELETE CASCADE` clause is active. Deleting a `wallets` row cleans that wallet's metadata along two paths: - **`wallet_id`-scoped meta** (`meta_wallet`, `meta_contact`, `meta_platform_address`) carries a `wallet_id` column, so `cascade_meta_on_wallet_delete` brooms it directly — regardless of the lifecycle state of any typed parent and even for rows written ahead of (or without) a typed parent. - **identity-scoped meta** (`meta_identity`, `meta_token`) carries no `wallet_id` — only `identity_id` (+ `token_id`). It is cleaned by `cascade_meta_on_identity_delete` (AFTER DELETE ON `identities`), which fires for the wallet's own identities when the FK cascade removes them on a wallet delete. @@ -37,7 +37,7 @@ Any `meta_*` row whose parent object does not exist — because it was never cre A future garbage-collection pass is expected to reap orphan metadata — rows with no live parent object older than approximately one week — but no such GC is implemented yet. Callers should not rely on orphan metadata persisting forever, nor assume it will be cleaned up promptly. `meta_global` is intentionally parentless and always survives. -The 23 tables are split into five domain diagrams below. `WALLET_METADATA` is the root anchor and appears in each diagram. For full column listings see the [Tables](#tables) section. +The 23 tables are split into five domain diagrams below. `WALLETS` is the root anchor and appears in each diagram. For full column listings see the [Tables](#tables) section. ## Diagram 1 — Core / L1 (Bitcoin/Dash layer) @@ -45,16 +45,16 @@ Account registrations, address-pool snapshots, transactions, UTXOs, instant lock ```mermaid erDiagram - WALLET_METADATA ||--o{ ACCOUNT_REGISTRATIONS : "registers" - WALLET_METADATA ||--o{ ACCOUNT_ADDRESS_POOLS : "snapshots" - WALLET_METADATA ||--o{ CORE_TRANSACTIONS : "records" - WALLET_METADATA ||--o{ CORE_UTXOS : "owns" - WALLET_METADATA ||--o{ CORE_INSTANT_LOCKS : "holds" - WALLET_METADATA ||--o{ CORE_DERIVED_ADDRESSES : "derives" - WALLET_METADATA ||--o| CORE_SYNC_STATE : "tracks" + WALLETS ||--o{ ACCOUNT_REGISTRATIONS : "registers" + WALLETS ||--o{ ACCOUNT_ADDRESS_POOLS : "snapshots" + WALLETS ||--o{ CORE_TRANSACTIONS : "records" + WALLETS ||--o{ CORE_UTXOS : "owns" + WALLETS ||--o{ CORE_INSTANT_LOCKS : "holds" + WALLETS ||--o{ CORE_DERIVED_ADDRESSES : "derives" + WALLETS ||--o| CORE_SYNC_STATE : "tracks" CORE_TRANSACTIONS ||--o{ CORE_UTXOS : "spends" - WALLET_METADATA { + WALLETS { BLOB wallet_id PK "32-byte WalletId" TEXT network "mainnet | testnet | devnet | regtest" INTEGER birth_height "SPV scan start height" @@ -62,7 +62,7 @@ erDiagram ACCOUNT_REGISTRATIONS { BLOB wallet_id PK - TEXT account_type PK "standard | coinjoin | identity_registration | ..." + TEXT account_type PK "standard_bip44 | standard_bip32 | coinjoin | identity_registration | ..." INTEGER account_index PK BLOB account_xpub_bytes "bincode-encoded AccountRegistrationEntry" } @@ -105,9 +105,10 @@ erDiagram CORE_DERIVED_ADDRESSES { BLOB wallet_id PK TEXT account_type PK - TEXT address PK "bech32 / Base58 address string" - INTEGER account_index - TEXT derivation_path "pool_type/derivation_index" + INTEGER account_index PK "owning account; also the value the read returns" + TEXT pool_type PK "external | internal | absent | absent_hardened" + INTEGER derivation_index PK + TEXT address UK "bech32 / Base58 address string" INTEGER used "0 | 1" } @@ -125,17 +126,18 @@ erDiagram ## Diagram 2 — Identities + DashPay (Platform L2 identity tree) -Platform identities, their public keys, token balances, and DashPay profiles/payments. Identity-owned tables have no direct `wallet_id` column; cascade flows `wallet_metadata → identities → child`. +Platform identities, their public keys, token balances, and DashPay profiles/payments. Most identity-owned tables have no direct `wallet_id` column and cascade via `wallets → identities → child`; `identity_keys` is the exception — it carries its own `wallet_id` column and two `ON DELETE CASCADE` FKs (one to `wallets`, one to `identities`). ```mermaid erDiagram - WALLET_METADATA ||--o{ IDENTITIES : "parents" + WALLETS ||--o{ IDENTITIES : "parents" + WALLETS ||--o{ IDENTITY_KEYS : "owns" IDENTITIES ||--o{ IDENTITY_KEYS : "has" IDENTITIES ||--o{ TOKEN_BALANCES : "holds" IDENTITIES ||--o| DASHPAY_PROFILES : "has" IDENTITIES ||--o{ DASHPAY_PAYMENTS_OVERLAY : "overlays" - WALLET_METADATA { + WALLETS { BLOB wallet_id PK "32-byte WalletId" TEXT network INTEGER birth_height @@ -144,16 +146,18 @@ erDiagram IDENTITIES { BLOB identity_id PK "32-byte Platform Identifier" BLOB wallet_id FK "NULL = orphan identity (no parent wallet yet)" - INTEGER wallet_index "BIP-32 index; NULL for out-of-wallet identities" + INTEGER identity_index "BIP-32 index; NULL for out-of-wallet identities" BLOB entry_blob "bincode-encoded IdentityEntry" INTEGER tombstoned "0 | 1 (logical delete)" } IDENTITY_KEYS { + BLOB wallet_id PK "32-byte WalletId" BLOB identity_id PK INTEGER key_id PK "KeyID" BLOB public_key_blob "bincode-encoded IdentityKeyWire (public material only)" BLOB public_key_hash "20-byte HASH160 of the key" + BLOB derivation_blob "reserved typed projection; always NULL today" } TOKEN_BALANCES { @@ -181,10 +185,10 @@ One unified table for all three states of a DashPay contact relationship — the ```mermaid erDiagram - WALLET_METADATA ||--o{ CONTACTS : "has" + WALLETS ||--o{ CONTACTS : "has" IDENTITIES ||--o{ CONTACTS : "relates" - WALLET_METADATA { + WALLETS { BLOB wallet_id PK "32-byte WalletId" TEXT network INTEGER birth_height @@ -221,11 +225,11 @@ Platform P2PKH address pool with its sync watermark, and the asset-lock lifecycl ```mermaid erDiagram - WALLET_METADATA ||--o{ PLATFORM_ADDRESSES : "tracks" - WALLET_METADATA ||--o| PLATFORM_ADDRESS_SYNC : "syncs" - WALLET_METADATA ||--o{ ASSET_LOCKS : "issues" + WALLETS ||--o{ PLATFORM_ADDRESSES : "tracks" + WALLETS ||--o| PLATFORM_ADDRESS_SYNC : "syncs" + WALLETS ||--o{ ASSET_LOCKS : "issues" - WALLET_METADATA { + WALLETS { BLOB wallet_id PK "32-byte WalletId" TEXT network INTEGER birth_height @@ -267,7 +271,7 @@ table per [`ObjectId`](./src/kv.rs) variant. `meta_global` has no parent and survives wallet deletion. The other five carry **no foreign key**: metadata may be written before its parent object is synced into its typed table. `AFTER DELETE` triggers provide a soft cascade so metadata -never outlives its wallet. Deleting a `wallet_metadata` row brooms every +never outlives its wallet. Deleting a `wallets` row brooms every wallet-scoped `meta_*` row by `wallet_id` directly, and the FK cascade through `identities` brooms the identity-scoped `meta_*` rows by `identity_id`; both legs key on the id alone, so cleanup is independent @@ -278,9 +282,9 @@ edges below denote trigger-based cleanup, not an FK relationship. ```mermaid erDiagram - WALLET_METADATA ||..o{ META_WALLET : "trigger cleanup (by wallet_id)" - WALLET_METADATA ||..o{ META_CONTACT : "trigger cleanup (by wallet_id)" - WALLET_METADATA ||..o{ META_PLATFORM_ADDRESS : "trigger cleanup (by wallet_id)" + WALLETS ||..o{ META_WALLET : "trigger cleanup (by wallet_id)" + WALLETS ||..o{ META_CONTACT : "trigger cleanup (by wallet_id)" + WALLETS ||..o{ META_PLATFORM_ADDRESS : "trigger cleanup (by wallet_id)" IDENTITIES ||..o{ META_IDENTITY : "trigger cleanup (by identity_id)" IDENTITIES ||..o{ META_TOKEN : "trigger cleanup (by identity_id)" @@ -291,7 +295,7 @@ erDiagram } META_WALLET { - BLOB wallet_id PK "no FK; trigger cleanup on wallet_metadata delete" + BLOB wallet_id PK "no FK; trigger cleanup on wallets delete" TEXT key PK BLOB value INTEGER updated_at @@ -313,7 +317,7 @@ erDiagram } META_CONTACT { - BLOB wallet_id PK "no FK; trigger cleanup on wallet_metadata delete" + BLOB wallet_id PK "no FK; trigger cleanup on wallets delete" BLOB owner_id PK BLOB contact_id PK TEXT key PK @@ -322,7 +326,7 @@ erDiagram } META_PLATFORM_ADDRESS { - BLOB wallet_id PK "no FK; trigger cleanup on wallet_metadata delete" + BLOB wallet_id PK "no FK; trigger cleanup on wallets delete" BLOB address PK TEXT key PK BLOB value @@ -342,7 +346,7 @@ erDiagram ## Tables -### `wallet_metadata` +### `wallets` Root anchor for every per-wallet table. Deleting a row cascades to all direct children; identity-owned children cascade through `identities`. @@ -359,7 +363,7 @@ the typed `account_type` / `account_index` columns mirror it for SQL lookups without blob decoding. - PK: `(wallet_id, account_type, account_index)`. -- FK: `wallet_id → wallet_metadata(wallet_id) ON DELETE CASCADE`. +- FK: `wallet_id → wallets(wallet_id) ON DELETE CASCADE`. ### `account_address_pools` @@ -367,7 +371,7 @@ Address-pool snapshot per `(wallet, account, pool_type)`. `pool_type` is one of `external`, `internal`, `absent`, `absent_hardened`. - PK: `(wallet_id, account_type, account_index, pool_type)`. -- FK: `wallet_id → wallet_metadata(wallet_id) ON DELETE CASCADE`. +- FK: `wallet_id → wallets(wallet_id) ON DELETE CASCADE`. ### `core_transactions` @@ -376,7 +380,7 @@ One row per transaction the wallet has seen. `height`, `block_hash`, and is `1` once block context is present. - PK: `(wallet_id, txid)`. -- FK: `wallet_id → wallet_metadata(wallet_id) ON DELETE CASCADE`. +- FK: `wallet_id → wallets(wallet_id) ON DELETE CASCADE`. - Index: `idx_core_transactions_height(wallet_id, height)`. ### `core_utxos` @@ -387,7 +391,7 @@ by a trigger when its referenced `core_transactions` row is deleted NOT NULL `wallet_id` column). - PK: `(wallet_id, outpoint)`. -- FK: `wallet_id → wallet_metadata(wallet_id) ON DELETE CASCADE`. +- FK: `wallet_id → wallets(wallet_id) ON DELETE CASCADE`. - Index: `idx_core_utxos_spent(wallet_id, spent)`. ### `core_instant_locks` @@ -396,16 +400,51 @@ Instant-lock blobs for transactions that are broadcast but not yet finalized. Rows are removed when the transaction becomes confirmed. - PK: `(wallet_id, txid)`. -- FK: `wallet_id → wallet_metadata(wallet_id) ON DELETE CASCADE`. +- FK: `wallet_id → wallets(wallet_id) ON DELETE CASCADE`. ### `core_derived_addresses` -Address-to-account-index map. Written before UTXOs in the same -transaction so the UTXO writer can resolve `account_index` by address. - -- PK: `(wallet_id, account_type, address)`. -- FK: `wallet_id → wallet_metadata(wallet_id) ON DELETE CASCADE`. -- Index: `idx_core_derived_addresses_addr(wallet_id, address)`. +A live-fed indexed read-cache the UTXO writer joins to resolve a UTXO's +`account_index` by address. The authoritative manifest is +`account_address_pools` (kept complete and in-band by the +`core_bridge` emitter); this table is the fast B-tree probe in front of it. +Fed by exactly one source: + +- **Live `addresses_derived` events** — written before UTXOs in the same + transaction so the writer sees fresh rows. + +UTXO resolution for an unspent UTXO: + +1. **Cache hit** — resolve from this table. +2. **Cache miss, manifest hit** — fall back to `account_address_pools` + (the in-band snapshot is applied earlier in the same tx). Resolved. +3. **Miss in both** — a genuinely undeclared address (not ours, or an SPV + gap-limit edge). The writer **skips** it (with a `warn`) so one + unresolvable row never aborts a whole flush; its balance re-warms once + the address is later derived. + +(The spent-only synthetic-row path is exempt: a spent row uses an inert +`account_index` placeholder and is excluded from reads.) + +A live `addresses_derived` entry whose address is absent from the manifest +is a **fatal** `DerivedIndexInvariantViolated` — the emitter must attach +the pool snapshot in-band with every derivation, so this can only fire on +an emitter bug, never on a benign gap. + +> The non-ECDSA pool gap (BLS/EdDSA addresses are dropped from the event +> projection, so they never produce an `addresses_derived` entry) cannot +> manifest here: only ECDSA Standard/CoinJoin External/Internal addresses +> are ever classified `Received`/`Change`, so a non-ECDSA address can never +> be a `new_utxos` UTXO address. This is an upstream classifier property +> (`key-wallet` `account_checker`), not enforceable at the storage layer. + +- PK: `(wallet_id, account_type, account_index, pool_type, derivation_index)` — the BIP32 + leaf identity (one row per derived address). +- `UNIQUE(wallet_id, address)` — the read-index invariant (one + account_index per address); its index also backs the address lookup, so + no separate index is needed. `address` is a derived attribute, never a + key, so every collision surfaces loud. +- FK: `wallet_id → wallets(wallet_id) ON DELETE CASCADE`. ### `core_sync_state` @@ -414,7 +453,7 @@ One row per wallet, holding monotonically-advancing SPV sync watermarks. block is processed. - PK: `wallet_id` (single-row-per-wallet). -- FK: `wallet_id → wallet_metadata(wallet_id) ON DELETE CASCADE`. +- FK: `wallet_id → wallets(wallet_id) ON DELETE CASCADE`. ### `identities` @@ -424,7 +463,7 @@ NULL means the identity was written before a parent wallet was registered marks a logical delete; the row is retained for cascade integrity. - PK: `identity_id`. -- FK: `wallet_id → wallet_metadata(wallet_id) ON DELETE CASCADE` (nullable). +- FK: `wallet_id → wallets(wallet_id) ON DELETE CASCADE` (nullable). - Index: `idx_identities_wallet(wallet_id)`. ### `identity_keys` @@ -432,11 +471,14 @@ marks a logical delete; the row is retained for cascade integrity. Public identity keys only — no private material. The `public_key_blob` is a custom wire format (`IdentityKeyWire`) that pre-encodes the `IdentityPublicKey` via bincode 2 native `Encode/Decode` -to work around a serde-tag incompatibility. +to work around a serde-tag incompatibility. `derivation_blob` is a +reserved column for a future typed projection and is always NULL today +(derivation indices live inside `public_key_blob`). -- PK: `(identity_id, key_id)`. +- PK: `(wallet_id, identity_id, key_id)`. +- FK: `wallet_id → wallets(wallet_id) ON DELETE CASCADE`. - FK: `identity_id → identities(identity_id) ON DELETE CASCADE`. -- Index: `idx_identity_keys_identity(identity_id)`. +- Index: `idx_identity_keys_wallet_identity(wallet_id, identity_id)`. ### `contacts` @@ -451,7 +493,7 @@ hold a bincode-encoded `ContactRequest`; `accepted_accounts` holds a bincode-encoded `Vec`. - PK: `(wallet_id, owner_id, contact_id)`. -- FK: `wallet_id → wallet_metadata(wallet_id) ON DELETE CASCADE`. +- FK: `wallet_id → wallets(wallet_id) ON DELETE CASCADE`. - `state` CHECK: sourced from `sqlite::schema::contacts::CONTACT_STATE_LABELS`. ### `platform_addresses` @@ -461,7 +503,7 @@ HASH160; `balance` and `nonce` are the last-synced values from the Platform layer. - PK: `(wallet_id, address)`. -- FK: `wallet_id → wallet_metadata(wallet_id) ON DELETE CASCADE`. +- FK: `wallet_id → wallets(wallet_id) ON DELETE CASCADE`. ### `platform_address_sync` @@ -469,22 +511,27 @@ Per-wallet watermark for platform address sync. All three height/timestamp fields advance monotonically (new values are `max(current, incoming)`). - PK: `wallet_id` (single-row-per-wallet). -- FK: `wallet_id → wallet_metadata(wallet_id) ON DELETE CASCADE`. +- FK: `wallet_id → wallets(wallet_id) ON DELETE CASCADE`. ### `asset_locks` Lifecycle tracking for asset-lock outpoints. `status` is a queryable text column; `lifecycle_blob` carries the full `AssetLockEntry`. Consumed -locks are removed via `AssetLockChangeSet::removed`, not retained with a -consumed status. +locks are **retained permanently** with `status = 'consumed'` (an upsert, +never a `DELETE` — they are not routed through `AssetLockChangeSet::removed`), +so the full lifecycle history stays on disk and remains visible via the +unfiltered inspection reader (`schema::asset_locks::list_active`). The +rehydration feed reads through `schema::asset_locks::load_unconsumed`, which +filters at the SQL level (`status NOT IN ('consumed')`), so a spent one-shot +lock is never resurrected as actionable. - PK: `(wallet_id, outpoint)`. -- FK: `wallet_id → wallet_metadata(wallet_id) ON DELETE CASCADE`. +- FK: `wallet_id → wallets(wallet_id) ON DELETE CASCADE`. ### `token_balances` Per-identity token balance cache, keyed by `(identity_id, token_id)`. -Cascade flows `wallet_metadata → identities → token_balances` through the +Cascade flows `wallets → identities → token_balances` through the nullable `identities.wallet_id` link; no direct `wallet_id` column exists. - PK: `(identity_id, token_id)`. @@ -524,7 +571,7 @@ Unlike every other per-wallet table, the five typed `meta_*` tables carry host apps can attach metadata independently of sync ordering (and a global-config persister can write to typed scopes whose parent tables stay empty). Cleanup is instead a soft cascade. Deleting a -`wallet_metadata` row fires a wallet-rooted `AFTER DELETE` trigger that +`wallets` row fires a wallet-rooted `AFTER DELETE` trigger that brooms the wallet-scoped tables (`meta_wallet`, `meta_contact`, `meta_platform_address`) by `wallet_id`, and the FK cascade through `identities` fires a per-identity trigger that brooms `meta_identity` + @@ -546,7 +593,7 @@ Global metadata with no parent — survives every wallet delete. Per-wallet metadata. Writable before the wallet exists. - PK: `(wallet_id, key)`. -- No FK. Cleanup: `cascade_meta_on_wallet_delete` (AFTER DELETE ON `wallet_metadata`, by `wallet_id`). +- No FK. Cleanup: `cascade_meta_on_wallet_delete` (AFTER DELETE ON `wallets`, by `wallet_id`). #### `meta_identity` @@ -567,7 +614,7 @@ Per-token-balance metadata. Writable before the token balance exists. Per-contact metadata for any lifecycle state. Writable before the contact exists. - PK: `(wallet_id, owner_id, contact_id, key)`. -- No FK. Cleanup: `cascade_meta_on_wallet_delete` (AFTER DELETE ON `wallet_metadata`, by `wallet_id`) on a wallet delete, plus `cascade_meta_contact_on_contact_delete` (AFTER DELETE ON `contacts`, any state) for a direct contact delete. +- No FK. Cleanup: `cascade_meta_on_wallet_delete` (AFTER DELETE ON `wallets`, by `wallet_id`) on a wallet delete, plus `cascade_meta_contact_on_contact_delete` (AFTER DELETE ON `contacts`, any state) for a direct contact delete. #### `meta_platform_address` @@ -575,23 +622,25 @@ Per-platform-address metadata. `address` is an opaque `BLOB`. Writable before the address exists. - PK: `(wallet_id, address, key)`. -- No FK. Cleanup: `cascade_meta_on_wallet_delete` (AFTER DELETE ON `wallet_metadata`, by `wallet_id`) on a wallet delete, plus `cascade_meta_platform_address_on_address_delete` (AFTER DELETE ON `platform_addresses`) for a direct address delete. +- No FK. Cleanup: `cascade_meta_on_wallet_delete` (AFTER DELETE ON `wallets`, by `wallet_id`) on a wallet delete, plus `cascade_meta_platform_address_on_address_delete` (AFTER DELETE ON `platform_addresses`) for a direct address delete. ## Enum-domain CHECK constraints -Six TEXT columns carry a `CHECK (col IN (...))` clause whose IN-list is -built at migration time from `pub(crate) const *_LABELS` arrays declared -next to each writer function. Five mirror an upstream Rust enum; the -sixth (`contacts.state`) is a synthetic lifecycle label naming which +Eight TEXT columns carry a `CHECK (col IN (...))` across five enum +domains — `account_type` is reused in three tables. The IN-list is built +at migration time from `pub(crate) const *_LABELS` arrays declared next +to each writer function. Four domains mirror an upstream Rust enum; the +fifth (`contacts.state`) is a synthetic lifecycle label naming which `ContactChangeSet` slot a row came from: | Table | Column | Source-of-truth const | |---|---|---| -| `wallet_metadata` | `network` | `sqlite::schema::wallet_meta::NETWORK_LABELS` | +| `wallets` | `network` | `sqlite::schema::wallets::NETWORK_LABELS` | | `account_registrations` | `account_type` | `sqlite::schema::accounts::ACCOUNT_TYPE_LABELS` | | `account_address_pools` | `account_type` | `sqlite::schema::accounts::ACCOUNT_TYPE_LABELS` | | `account_address_pools` | `pool_type` | `sqlite::schema::accounts::POOL_TYPE_LABELS` | | `core_derived_addresses` | `account_type` | `sqlite::schema::accounts::ACCOUNT_TYPE_LABELS` | +| `core_derived_addresses` | `pool_type` | `sqlite::schema::accounts::POOL_TYPE_LABELS` | | `asset_locks` | `status` | `sqlite::schema::asset_locks::ASSET_LOCK_STATUS_LABELS` | | `contacts` | `state` | `sqlite::schema::contacts::CONTACT_STATE_LABELS` | @@ -634,13 +683,17 @@ having to grep this repo. ## Foreign-key conventions - All direct-child `wallet_id` columns are `BLOB(32)` references to - `wallet_metadata.wallet_id` with `ON DELETE CASCADE`. + `wallets.wallet_id` with `ON DELETE CASCADE`. - `identities.wallet_id` is the single nullable FK: NULL means orphan (no parent wallet registered yet). The orphan-to-parented promotion uses `COALESCE(identities.wallet_id, excluded.wallet_id)` on upsert. -- Identity-owned tables (`identity_keys`, `token_balances`, - `dashpay_profiles`, `dashpay_payments_overlay`) have no `wallet_id` - column. Cascade reaches them via `identities(identity_id)`. +- Identity-owned tables (`token_balances`, `dashpay_profiles`, + `dashpay_payments_overlay`) have no `wallet_id` column. Cascade reaches + them via `identities(identity_id)`. +- `identity_keys` is the exception among identity-owned tables: it carries + a `wallet_id BLOB NOT NULL` column and two `ON DELETE CASCADE` FKs + (`wallet_id → wallets`, `identity_id → identities`), so a delete on + either parent cascades to it. - `core_utxos.spent_in_txid` is cleared by the `setnull_core_utxos_on_tx_delete` trigger rather than a native `ON DELETE SET NULL` FK, because SQLite would null every column of a composite FK on SET NULL — including the NOT NULL `wallet_id`. @@ -657,7 +710,7 @@ having to grep this repo. | Trigger | Fires | Action | |---|---|---| | `setnull_core_utxos_on_tx_delete` | AFTER DELETE ON `core_transactions` | NULL `core_utxos.spent_in_txid` for the deleted tx | -| `cascade_meta_on_wallet_delete` | AFTER DELETE ON `wallet_metadata` | delete `meta_wallet`, `meta_contact`, `meta_platform_address` rows by `wallet_id` | +| `cascade_meta_on_wallet_delete` | AFTER DELETE ON `wallets` | delete `meta_wallet`, `meta_contact`, `meta_platform_address` rows by `wallet_id` | | `cascade_meta_on_identity_delete` | AFTER DELETE ON `identities` | delete `meta_identity`, `meta_token` rows by `identity_id` | | `cascade_meta_token_on_token_balance_delete` | AFTER DELETE ON `token_balances` | delete matching `meta_token` rows (direct balance delete) | | `cascade_meta_contact_on_contact_delete` | AFTER DELETE ON `contacts` | delete matching `meta_contact` rows (any state; direct contact delete) | diff --git a/packages/rs-platform-wallet-storage/SECRETS.md b/packages/rs-platform-wallet-storage/SECRETS.md index 7f983aa071c..8732e041cce 100644 --- a/packages/rs-platform-wallet-storage/SECRETS.md +++ b/packages/rs-platform-wallet-storage/SECRETS.md @@ -30,6 +30,17 @@ The rest of this document is the technical detail behind that boundary: the `secrets` backends, the `SecretStore` API, the error surface, and the threat model. +### Exception: the KV metadata API stores caller-supplied plaintext + +The boundary above is about the persister's own domain state. The +separate `KvStore` API (`kv` feature) is a deliberate, explicit exception: +it stores **arbitrary caller-supplied `Vec` values as PLAINTEXT** in +`meta_*` BLOB columns of the same `.db` (and therefore in every backup). +There is no encryption and no runtime content guard — the safety is +**caller-policed**. Callers MUST NOT put key or signing material through +`KvStore`; that is what `SecretStore` is for. The `KvStore` / +`KvStore::put` rustdoc carries the same `# Security` warning. + ## The `secrets` submodule `platform_wallet_storage::secrets` is part of the crate's default @@ -51,8 +62,22 @@ use platform_wallet_storage::secrets::{SecretBytes, SecretStore, SecretString, W let store = SecretStore::file("/var/lib/wallet/secrets.pwsvault", SecretString::new("pw"))?; let wallet = WalletId::from(wallet_id); + +// Tier-1 only (unprotected by an object password). `set`/`get` are +// `..,None` wrappers over `set_secret`/`get_secret`. store.set(&wallet, "mnemonic", &SecretBytes::from_slice(b"abandon ability ..."))?; let plaintext: Option = store.get(&wallet, "mnemonic")?; // never a bare Vec + +// Tier-2: protect a critical object under an extra OBJECT PASSWORD that +// the backend never sees. Reading it back REQUIRES the password. +let pw = SecretString::new("a strong object password"); +store.set_secret(&wallet, "seed", &SecretBytes::from_slice(b""), Some(&pw))?; +let seed = store.get_secret(&wallet, "seed", Some(&pw))?; // Some(secret) +// Reading a protected object WITHOUT the password fails closed: +assert!(store.get_secret(&wallet, "seed", None).is_err()); // NeedsPassword + +// Add / change / remove an object password in one atomic same-slot flow: +store.reprotect(&wallet, "seed", Some(&pw), None)?; // remove → now unprotected store.delete(&wallet, "mnemonic")?; // idempotent ``` @@ -61,6 +86,171 @@ filename); the parent directory is materialized on the first write. Use `SecretStore::os()` for the platform OS keyring arm instead of `SecretStore::file(..)`. +See **Two-tier secret protection** below for the model, the envelope +format, which tier defeats which adversary, and the strict fail-closed +read that is the heart of the opt-in scheme. + +### Two-tier secret protection + +Secret protection comes in two layers. Tier-1 is always on (it is just +"which backend you opened"); Tier-2 is opt-in, per critical object, and +backend-independent. + +| Tier | Provided by | Defeats | Mechanism | +|---|---|---|---| +| **1 — backend baseline** | the *backend* | another local user, a lost laptop, the vault at rest | OS keychain ACLs **or** Argon2id + XChaCha20-Poly1305 vault under a **real** passphrase | +| **2 — per-object password** | the *library*, above `SecretStore`, over **both** arms | **backend compromise** — the keychain scraped, or the vault stolen *and* its passphrase cracked | the object's bytes are Argon2id + XChaCha20-Poly1305 **enveloped under a per-object password BEFORE they reach the backend** | + +**Why Tier-2 is more than key granularity.** Its value is not a sub-key — +it is (a) an **independent human password the backend never sees** and (b) +**envelope-before-backend ordering**, so for a protected object the backend +only ever stores ciphertext. That is the first and only control that keeps +a chosen critical object confidential across a *full* backend compromise +(the A2/A3/A6 gap Tier-1 leaves open). + +Tier-2 has two guarantees of different strength: + +- **Confidentiality** (an attacker cannot *read* a protected secret) is + **unconditional** — the object password never enters any backend, so a + full backend dump yields only ciphertext + a per-object salt to + offline-Argon2id-crack against the password's entropy. +- **Integrity / anti-downgrade** is delivered by the **strict fail-closed + read** below and is **conditional on the caller's trusted model staying + intact** (see the documented residual). + +#### The envelope (wire format) + +Every value written through `set_secret`/`set` is wrapped in a +self-describing, authenticated envelope before it reaches the backend. The +backend (file vault or OS keychain) stores only these opaque bytes. + +```text +magic b"PWSEV" (5) +version u8 = 1 (envelope version — independent of the vault FORMAT_VERSION) +scheme u8 (0 = unprotected passthrough, 1 = password) +── scheme 0 ── payload: the raw secret bytes +── scheme 1 ── kdf(id u8 ‖ m_kib u32 LE ‖ t u32 LE ‖ p u32 LE) (13) + ‖ salt[32] ‖ nonce[24] ‖ ciphertext+tag +``` + +- **AAD (scheme 1)** binds `domain ‖ magic ‖ version ‖ scheme ‖ kdf ‖ salt + ‖ wallet_id ‖ label` (length-prefixed), mirroring the vault's own + `aad()`/`verify_aad()`. A protected blob relocated to another slot — or + any in-place header edit — fails the tag (relocation/header-tamper + resistance). On the file arm this AAD is *in addition* to the vault's own + per-entry AAD + tag; on the OS arm it is the only authentication layer. +- **KDF ceiling before derivation (anti-DoS).** The KDF params live in the + (attacker-controllable) header, so on a read the Argon2 **ceiling is + enforced before** any derivation/allocation — a forged `m_kib`/`t` cannot + force a giant allocation or an unbounded stall on the victim's unlock. +- **No vault format bump.** The envelope lives *inside* the entry bytes, + identical over File and Os, so there is no vault-parser or migration + change. +- **Size cap.** The plaintext is capped at `MAX_PLAINTEXT_LEN` + (`MAX_SECRET_LEN − MAX_ENVELOPE_OVERHEAD` = 64 KiB − 128 = 65 408 bytes), + uniformly for both schemes, so the enveloped bytes always fit the + backend's own `MAX_SECRET_LEN` cap and the user-visible limit is stable + regardless of scheme. Oversize → `SecretTooLarge { found, max }` with + `max = MAX_PLAINTEXT_LEN` (re-exported as `secrets::MAX_PLAINTEXT_LEN`). +- **Unknown version/scheme** (magic present) → `UnsupportedEnvelopeVersion` + — fail closed **regardless of the password**: an unparseable future + format can be neither safely unwrapped nor treated as unprotected. + +#### The strict, fail-closed read + +The defining risk of any opt-in "some objects are extra-protected" scheme +is **strip / downgrade**: an attacker who can WRITE the backend replaces a +protected blob with a fresh, internally-valid *unprotected* (scheme-0) blob +carrying a chosen seed/xpriv. There is nothing in that blob alone to prove +an envelope was *expected*, so inferring protection from the stored bytes +would silently return the attacker's secret — funds redirection, password +prompt bypassed. + +The fix: **the "expected-protected" bit lives in the CALLER's trusted +model, surfaced solely by whether a password is supplied to `get_secret` — +NEVER inferred from the blob.** The library does not guess and does not +persist the expectation. A supplied password *is* the assertion "this +object must be protected": + +| `password` arg | stored blob | result | +|---|---|---| +| `Some(pw)` | valid scheme-1 | the secret, or `WrongPassword` on tag fail | +| **`Some(pw)`** | **scheme-0 / legacy magic-less raw** | **`ExpectedProtectedButUnsealed` — FAIL CLOSED** | +| `Some(pw)` | scheme-1 but truncated/corrupt | `Corruption` | +| `Some/None` | magic present, unknown version/scheme | `UnsupportedEnvelopeVersion` | +| `None` | valid scheme-1 | `NeedsPassword` (never ciphertext) | +| `None` | scheme-0 | the secret | +| `None` | legacy magic-less raw | the secret (+ a one-time warning; re-wrapped on next write) | +| `None` | magic present but truncated header | `Corruption` | +| any | absent entry | `Ok(None)` (deletion = DoS, never injection) | + +The load-bearing row is **`Some(pw)` + non-envelope ⇒ +`ExpectedProtectedButUnsealed`**: with a password in hand, a non-protected +blob can only mean a strip, so it is refused and **no bytes are returned**. +A consumer bug alone — over- or under-supplying a password — fails closed +in *every* direction. + +**Arm asymmetry.** On the file arm the stored bytes are themselves sealed +under the vault key, so producing a *readable* stripped blob at a slot +requires the vault key; a cold/backup-swap actor can only corrupt +(→ DoS), not inject-to-readable. On the OS-keychain arm the stored item is +the bare envelope with no second seal, so the strip defence there leans +entirely on the `Some(pw)` strict rule plus the consumer's metadata +integrity — this is where the residual bites hardest. + +**Documented residual (out of the library's reach).** If an attacker ALSO +rewrites the consumer's trusted DB so the consumer calls `get_secret(X, +None)` for a stripped object, the `(scheme-0, None)` quadrant returns the +attacker's bytes. The library only ever sees the blob and the caller's +`Some/None`; the "should be protected" fact lives entirely in the +consumer's metadata store. **Anti-downgrade strength therefore equals the +tamper-resistance of the consumer's protection-status record** — store it +as integrity-protected, security-critical state (it is one more field +alongside the addresses/policy the wallet DB must already protect). + +**Value rollback is NOT defended.** Restoring an *older valid* scheme-1 +envelope under the *current* password decrypts cleanly. The strict read +closes the strip/downgrade injection, not value rollback; if +backup-swap/restore-old is in scope, anchor a monotonic version in +integrity-protected consumer metadata. Do not mistake the strict read for +rollback protection. + +#### Add / change / remove an object password + +`reprotect(service, label, current, new)` does it in one same-slot +unwrap→rewrap→overwrite: read under the `current` expectation (so a strip +is caught before any rewrite), then write under `new` — `None`→`Some` adds, +`Some`→`Some` changes, `Some`→`None` removes. An absent object is a no-op +(`Ok(())`). The rewrite is a same-slot overwrite — atomic on the file arm, +and on the OS arm inheriting the backend's single-item-replace contract — +so a crash between the read and the commit leaves the prior value intact +and readable under `current`. **After a successful call the consumer MUST +update its own protection-status record** (the protection expectation lives +there). There is **no password recovery** — losing an object password +bricks that object (an availability trade-off the UX must state plainly). + +#### Entropy policy is the consumer's + +The library enforces only **non-blank** at enrol (and a coarse +`MIN_PASSPHRASE_LEN` floor, `1` today = merely non-blank) for both the +vault passphrase and the Tier-2 object password. It ships **no** +password-strength estimator: real entropy policy (zxcvbn-style strength, +dictionary checks, UX feedback) is locale- and threat-specific and is the +**consumer's responsibility**. For a protected object the password's +entropy is the *whole* guarantee against an offline Argon2id attacker who +already holds the backend — choose it accordingly. + +#### Greenfield / legacy entries + +The envelope is net-new, so post-feature reads/writes go through it. A +decrypted entry that lacks the `PWSEV` magic is treated as a **legacy +unprotected** value: returned on a `None` read (with a one-time warning, +and re-wrapped on the next write) and refused (`ExpectedProtectedButUnsealed`) +on a `Some(pw)` read — so legacy tolerance never weakens the strict read. +(A pre-feature build that persisted vault files is a deployment fact outside +this crate; the legacy-tolerant read makes the transition seamless either +way.) + ### Internal SPI Below `SecretStore`, `EncryptedFileStore` and `default_credential_store` @@ -118,6 +308,29 @@ unwrapped copy is allocated. One file, one passphrase, one lock — a multi-wallet store cannot lock its other wallets out by construction. Errors surface as the typed `SecretStoreError` through `SecretStore`. + On Unix the vault's parent directory must not be group/other writable + (`mode & 0o022`): directory write access governs rename/replace of the + vault, so a writable parent is refused at `open` with + `SecretStoreError::InsecureParentDir` (the A1 guarantee depends on it). + A read-only group-accessible parent (`0o750`) is accepted — it only + leaks filenames, never the 0600-protected vault contents. + Each secret is capped at `MAX_SECRET_LEN` (64 KiB) at the write + boundary — generously above any mnemonic/seed/xpriv — so a single + oversized entry cannot inflate the shared document past the read-side + 128 MiB ceiling and brick every wallet on the next open. (Through + `SecretStore::set_secret`/`set` the user-facing plaintext cap is the + slightly lower `MAX_PLAINTEXT_LEN`, leaving room for the envelope + overhead; see **Two-tier secret protection**.) + **Blank passphrase is rejected.** `open` (and `rekey`) refuse a blank + (empty / all-whitespace) passphrase with `SecretStoreError::BlankPassphrase` + — a blank passphrase derives a key from a public salt only, i.e. + obfuscation, not confidentiality. This is an **intended behavioural + break** for any caller that relied on `SecretString::empty()`. A + deliberate keyless vault uses the explicit + `EncryptedFileStore::open_unprotected(path)` / + `SecretStore::file_unprotected(path)` door instead (use it only where the + stored secrets carry their own Tier-2 object password, or as a staging + step before `rekey` to a real passphrase — the empty→real migration). - **OS keyring (`SecretStore::os` / `default_credential_store`)** — returns an `Arc` over the platform's default credential store. The backend on Linux/FreeBSD is @@ -135,6 +348,18 @@ unwrapped copy is allocated. with `NoDefaultStore`. Callers that need durable storage on a headless host should pin `SecretStore::file(...)` (encrypted-file vault) instead of relying on the OS keyring. + + **Enumerable metadata (OS arm).** Each entry is keyed by + `service = SERVICE_PREFIX + hex(wallet_id)` and `user = label`, stored + as **plaintext, enumerable** keyring metadata: same-user list-only + tooling can see which wallet ids exist and which slot kinds (labels) + each has, without unlocking any secret. This is dominated by the + already-accepted same-user (A2/A3) residual. The `keyring-core` 1.0.0 + `build` modifiers are vendor-specific creation hints, not a replacement + for the `(service, user)` identity, so there is no portable knob to + redact the pair; operators who need metadata hiding should use the file + vault, whose `(wallet_id, label)` map lives only inside the sealed + vault. Prefer non-descriptive labels on the OS arm regardless. - **Tests** — integration tests construct a tempdir-backed `EncryptedFileStore` directly via `EncryptedFileStore::open(tempfile::tempdir()?.path().join("vault.pwsvault"), SecretString::new("..."))`, @@ -150,25 +375,52 @@ automatic fallback between backends. `SecretStore` returns the typed `SecretStoreError`. For the file arm this is **lossless**: `WrongPassphrase`, `Corruption`, `AlreadyLocked`, `KdfFailure`, `VersionUnsupported`, `MalformedVault`, `InsecurePermissions`, -`VaultTooLarge`, and `InvalidLabel` are distinct typed variants -(`VaultTooLarge` surfaces when the on-disk vault exceeds the 128 MiB -ceiling). For the OS arm, +`InsecureParentDir`, `SecretTooLarge`, `VaultTooLarge`, `Encrypt`, and +`InvalidLabel` are distinct typed variants. The Tier-2 layer adds five more: +`ExpectedProtectedButUnsealed` (the fail-closed strip refusal), +`NeedsPassword` (a protected object read with no password), `WrongPassword` +(object-password tag fail — distinct from the Tier-1 `WrongPassphrase`), +`BlankPassphrase` (a blank vault passphrase or object password), and +`UnsupportedEnvelopeVersion { found }` (a future envelope format, fail +closed regardless of the password). The four Tier-2 credential/protection +*state* variants project to a recoverable `NoStorageAccess` (boxed, +downcast-recoverable, like `WrongPassphrase`); `UnsupportedEnvelopeVersion` +joins the secret-free `BadStoreFormat` group. `VaultTooLarge` surfaces when +the on-disk vault exceeds the read-side ceiling; `SecretTooLarge` rejects an +oversized secret at the write boundary before it can inflate the shared +vault; `InsecureParentDir` refuses a vault whose parent directory is +group/other-writable (a writable parent governs rename/replace despite the +file's own `0600`); `Encrypt` is the (effectively unreachable) AEAD +encrypt-side failure, kept typed so a write failure is never mislabeled a +key-derivation error. For the OS arm, `keyring_core::Error` projects best-effort into `SecretStoreError::OsKeyring { kind: OsKeyringErrorKind }`, a payload-free discriminant — keyring variants carrying raw bytes (`BadEncoding`, `BadDataFormat`) are collapsed so their bytes never enter the error (CWE-209/CWE-532). +**`WrongPassword` on the OS arm is ambiguous.** A Tier-2 envelope AEAD tag +failure surfaces as `WrongPassword`, but on the OS-keyring arm the stored +item is the bare envelope with no second authentication layer, so a tag +failure can mean EITHER a wrong object password OR a corrupted keychain +item — one AEAD tag cannot disambiguate the two. Treat `WrongPassword` on +the OS arm as "wrong password or corrupted item." On the file arm it is +unambiguous: the vault's own per-entry tag has already authenticated the +stored bytes before the envelope is parsed. + The internal SPI projection `From for keyring_core::Error` keeps the `WrongPassphrase` / `AlreadyLocked` variants recoverable: they ride in `NoStorageAccess` with the typed `SecretStoreError` boxed as the source, so an SPI-only consumer can recover them via `err.source().and_then(|s| s.downcast_ref::())`. The `BadStoreFormat` group (`Corruption`, `KdfFailure`, -`VersionUnsupported`, `MalformedVault`, `InsecurePermissions`, -`VaultTooLarge`, `Decrypt`, `OsKeyring`) has no box slot and carries only a -secret-free string; those remain fully typed on the `SecretStore` path -(so `VaultTooLarge` is not losslessly recoverable through the SPI downcast). +`VersionUnsupported`, `UnsupportedEnvelopeVersion`, `MalformedVault`, +`InsecurePermissions`, `InsecureParentDir`, `SecretTooLarge`, +`VaultTooLarge`, `Decrypt`, `Encrypt`, `OsKeyring`) has no box slot and +carries only a secret-free +string; those remain fully typed on the `SecretStore` path (so e.g. +`VaultTooLarge` / `SecretTooLarge` are not losslessly recoverable through +the SPI downcast). `keyring_core::Error` is safe to `Display` (`{ }`-format), but `{:?}`-format embeds `BadEncoding(Vec)` / `BadDataFormat(Vec, _)` diff --git a/packages/rs-platform-wallet-storage/migrations/V001__initial.rs b/packages/rs-platform-wallet-storage/migrations/V001__initial.rs index 59a6e45eaea..4ee50a3fc79 100644 --- a/packages/rs-platform-wallet-storage/migrations/V001__initial.rs +++ b/packages/rs-platform-wallet-storage/migrations/V001__initial.rs @@ -7,15 +7,17 @@ //! //! Per-wallet tables carry `wallet_id BLOB` in (or as all of) their //! primary key plus a native `FOREIGN KEY (wallet_id) REFERENCES -//! wallet_metadata(wallet_id) ON DELETE CASCADE`. Identity-owned -//! tables (`identity_keys`, `dashpay_profiles`, -//! `dashpay_payments_overlay`, `token_balances`) are keyed by -//! `identity_id` only; their FK targets `identities(identity_id)` so -//! cascade flows `wallet_metadata → identities → child` through the -//! nullable `identities.wallet_id` link. `identities.wallet_id` is -//! NULL-allowed so identity-only flows (no parent wallet, e.g. the -//! identity-sync manager populating rows before any wallet is -//! registered) work without a placeholder. +//! wallets(wallet_id) ON DELETE CASCADE`. Identity-owned +//! tables (`dashpay_profiles`, `dashpay_payments_overlay`, +//! `token_balances`) are keyed by `identity_id` only; their FK targets +//! `identities(identity_id)` so cascade flows `wallets → +//! identities → child` through the nullable `identities.wallet_id` +//! link. `identity_keys` additionally carries its own `wallet_id` +//! column (so per-wallet reads stay a direct `WHERE wallet_id = ?`) +//! and keeps the `identity_id` FK for the identity-delete cascade. +//! `identities.wallet_id` is NULL-allowed so identity-only flows (no +//! parent wallet, e.g. the identity-sync manager populating rows +//! before any wallet is registered) work without a placeholder. //! //! The one relationship that stays a trigger is //! `core_utxos.spent_in_txid` clearing to NULL on transaction delete — @@ -30,7 +32,7 @@ //! Enum-shaped TEXT columns (`network`, `account_type`, `pool_type`, //! `status`, `state`) carry a `CHECK (col IN (...))` clause whose //! IN-list is built from the `*_LABELS` const arrays in -//! `crate::sqlite::schema::{wallet_meta, accounts, asset_locks, +//! `crate::sqlite::schema::{wallets, accounts, asset_locks, //! contacts}`. The consts are the single source of truth shared with //! the writer mapping functions; the per-module `*_labels_match_enum` //! unit tests enforce set-equality between each const and its writer's @@ -46,18 +48,25 @@ fn build_check_in(labels: &[&str]) -> String { } pub fn migration() -> String { - let network_check = build_check_in(crate::sqlite::schema::wallet_meta::NETWORK_LABELS); + let network_check = build_check_in(crate::sqlite::schema::wallets::NETWORK_LABELS); let account_type_check = build_check_in(crate::sqlite::schema::accounts::ACCOUNT_TYPE_LABELS); - let pool_type_check = build_check_in(crate::sqlite::schema::accounts::POOL_TYPE_LABELS); let asset_lock_status_check = build_check_in(crate::sqlite::schema::asset_locks::ASSET_LOCK_STATUS_LABELS); let contact_state_check = build_check_in(crate::sqlite::schema::contacts::CONTACT_STATE_LABELS); + // Stamp the header `application_id` so a foreign refinery-versioned + // SQLite DB can be told apart from a wallet-storage DB (asserted in + // `open()` pre-migration and in `restore_from`'s staged validation). + // Splice the constant in decimal — `PRAGMA` takes no bound params. + let application_id = crate::sqlite::conn::APPLICATION_ID; + format!( "\ -CREATE TABLE wallet_metadata ( +PRAGMA application_id = {application_id}; + +CREATE TABLE wallets ( wallet_id BLOB NOT NULL PRIMARY KEY, network TEXT NOT NULL CHECK (network IN {network_check}), birth_height INTEGER NOT NULL @@ -69,17 +78,7 @@ CREATE TABLE account_registrations ( account_index INTEGER NOT NULL, account_xpub_bytes BLOB NOT NULL, PRIMARY KEY (wallet_id, account_type, account_index), - FOREIGN KEY (wallet_id) REFERENCES wallet_metadata(wallet_id) ON DELETE CASCADE -); - -CREATE TABLE account_address_pools ( - wallet_id BLOB NOT NULL, - account_type TEXT NOT NULL CHECK (account_type IN {account_type_check}), - account_index INTEGER NOT NULL, - pool_type TEXT NOT NULL CHECK (pool_type IN {pool_type_check}), - snapshot_blob BLOB NOT NULL, - PRIMARY KEY (wallet_id, account_type, account_index, pool_type), - FOREIGN KEY (wallet_id) REFERENCES wallet_metadata(wallet_id) ON DELETE CASCADE + FOREIGN KEY (wallet_id) REFERENCES wallets(wallet_id) ON DELETE CASCADE ); CREATE TABLE core_transactions ( @@ -91,7 +90,7 @@ CREATE TABLE core_transactions ( finalized INTEGER NOT NULL, record_blob BLOB NOT NULL, PRIMARY KEY (wallet_id, txid), - FOREIGN KEY (wallet_id) REFERENCES wallet_metadata(wallet_id) ON DELETE CASCADE + FOREIGN KEY (wallet_id) REFERENCES wallets(wallet_id) ON DELETE CASCADE ); CREATE INDEX idx_core_transactions_height ON core_transactions(wallet_id, height); @@ -106,7 +105,7 @@ CREATE TABLE core_utxos ( spent INTEGER NOT NULL, spent_in_txid BLOB, PRIMARY KEY (wallet_id, outpoint), - FOREIGN KEY (wallet_id) REFERENCES wallet_metadata(wallet_id) ON DELETE CASCADE + FOREIGN KEY (wallet_id) REFERENCES wallets(wallet_id) ON DELETE CASCADE ); CREATE INDEX idx_core_utxos_spent ON core_utxos(wallet_id, spent); @@ -130,50 +129,43 @@ CREATE TABLE core_instant_locks ( txid BLOB NOT NULL, islock_blob BLOB NOT NULL, PRIMARY KEY (wallet_id, txid), - FOREIGN KEY (wallet_id) REFERENCES wallet_metadata(wallet_id) ON DELETE CASCADE + FOREIGN KEY (wallet_id) REFERENCES wallets(wallet_id) ON DELETE CASCADE ); -CREATE TABLE core_derived_addresses ( - wallet_id BLOB NOT NULL, - account_type TEXT NOT NULL CHECK (account_type IN {account_type_check}), - account_index INTEGER NOT NULL, - address TEXT NOT NULL, - derivation_path TEXT NOT NULL, - used INTEGER NOT NULL, - PRIMARY KEY (wallet_id, account_type, address), - FOREIGN KEY (wallet_id) REFERENCES wallet_metadata(wallet_id) ON DELETE CASCADE -); - -CREATE INDEX idx_core_derived_addresses_addr ON core_derived_addresses(wallet_id, address); - CREATE TABLE core_sync_state ( wallet_id BLOB NOT NULL PRIMARY KEY, last_processed_height INTEGER, synced_height INTEGER, - FOREIGN KEY (wallet_id) REFERENCES wallet_metadata(wallet_id) ON DELETE CASCADE + FOREIGN KEY (wallet_id) REFERENCES wallets(wallet_id) ON DELETE CASCADE ); CREATE TABLE identities ( identity_id BLOB NOT NULL PRIMARY KEY, wallet_id BLOB, - wallet_index INTEGER, + identity_index INTEGER, entry_blob BLOB NOT NULL, tombstoned INTEGER NOT NULL, - FOREIGN KEY (wallet_id) REFERENCES wallet_metadata(wallet_id) ON DELETE CASCADE + FOREIGN KEY (wallet_id) REFERENCES wallets(wallet_id) ON DELETE CASCADE ); CREATE INDEX idx_identities_wallet ON identities(wallet_id); CREATE TABLE identity_keys ( + wallet_id BLOB NOT NULL, identity_id BLOB NOT NULL, key_id INTEGER NOT NULL, public_key_blob BLOB NOT NULL, public_key_hash BLOB NOT NULL, - PRIMARY KEY (identity_id, key_id), + -- Reserved for a future typed projection; always NULL today. + -- derivation_indices lives inside public_key_blob (the + -- IdentityKeyWire blob is the single source of truth). + derivation_blob BLOB, + PRIMARY KEY (wallet_id, identity_id, key_id), + FOREIGN KEY (wallet_id) REFERENCES wallets(wallet_id) ON DELETE CASCADE, FOREIGN KEY (identity_id) REFERENCES identities(identity_id) ON DELETE CASCADE ); -CREATE INDEX idx_identity_keys_identity ON identity_keys(identity_id); +CREATE INDEX idx_identity_keys_wallet_identity ON identity_keys(wallet_id, identity_id); CREATE TABLE contacts ( wallet_id BLOB NOT NULL, @@ -188,7 +180,7 @@ CREATE TABLE contacts ( accepted_accounts BLOB, updated_at INTEGER NOT NULL DEFAULT (unixepoch()), PRIMARY KEY (wallet_id, owner_id, contact_id), - FOREIGN KEY (wallet_id) REFERENCES wallet_metadata(wallet_id) ON DELETE CASCADE + FOREIGN KEY (wallet_id) REFERENCES wallets(wallet_id) ON DELETE CASCADE ); CREATE TABLE platform_addresses ( @@ -199,7 +191,7 @@ CREATE TABLE platform_addresses ( balance INTEGER NOT NULL, nonce INTEGER NOT NULL, PRIMARY KEY (wallet_id, address), - FOREIGN KEY (wallet_id) REFERENCES wallet_metadata(wallet_id) ON DELETE CASCADE + FOREIGN KEY (wallet_id) REFERENCES wallets(wallet_id) ON DELETE CASCADE ); CREATE TABLE platform_address_sync ( @@ -207,7 +199,7 @@ CREATE TABLE platform_address_sync ( sync_height INTEGER NOT NULL, sync_timestamp INTEGER NOT NULL, last_known_recent_block INTEGER NOT NULL, - FOREIGN KEY (wallet_id) REFERENCES wallet_metadata(wallet_id) ON DELETE CASCADE + FOREIGN KEY (wallet_id) REFERENCES wallets(wallet_id) ON DELETE CASCADE ); CREATE TABLE asset_locks ( @@ -219,7 +211,7 @@ CREATE TABLE asset_locks ( amount_duffs INTEGER NOT NULL, lifecycle_blob BLOB NOT NULL, PRIMARY KEY (wallet_id, outpoint), - FOREIGN KEY (wallet_id) REFERENCES wallet_metadata(wallet_id) ON DELETE CASCADE + FOREIGN KEY (wallet_id) REFERENCES wallets(wallet_id) ON DELETE CASCADE ); CREATE TABLE token_balances ( @@ -316,7 +308,7 @@ CREATE TABLE meta_platform_address ( -- Soft-cascade cleanup: drop a scope's metadata when its parent object -- is deleted. SQLite fires these for parents removed by an FK cascade --- too (e.g. wallet_metadata delete → identities cascade → identity +-- too (e.g. wallets delete → identities cascade → identity -- trigger), so deleting a wallet cleans its metadata transitively. -- -- Two root brooms key on the deleted parent's id alone so they reach @@ -330,7 +322,7 @@ CREATE TABLE meta_platform_address ( -- row, parentless included. Keys on wallet_id only, so contact state and -- whether the typed parent ever existed are both irrelevant. CREATE TRIGGER cascade_meta_on_wallet_delete -AFTER DELETE ON wallet_metadata +AFTER DELETE ON wallets FOR EACH ROW BEGIN DELETE FROM meta_wallet WHERE wallet_id = OLD.wallet_id; diff --git a/packages/rs-platform-wallet-storage/src/bin/platform-wallet-storage.rs b/packages/rs-platform-wallet-storage/src/bin/platform-wallet-storage.rs index 17ff3d0ba52..698447fe1c9 100644 --- a/packages/rs-platform-wallet-storage/src/bin/platform-wallet-storage.rs +++ b/packages/rs-platform-wallet-storage/src/bin/platform-wallet-storage.rs @@ -149,11 +149,8 @@ impl CliError { fn run(cli: Cli) -> Result { let auto_backup_dir: Option = cli.auto_backup_dir; - // `prune` is a pure filesystem op against the backups directory — - // `--db` is meaningless for it and must not be required. Handle the - // subcommand BEFORE extracting `cli.db` so the operator can run - // `prune --backups-dir ... --keep-last N` without - // also passing a database path. + // `prune` is a pure filesystem op; `--db` is meaningless, so handle it + // before requiring `cli.db`. if let Cmd::Prune(args) = &cli.cmd { return run_prune(args); } @@ -167,11 +164,8 @@ fn run(cli: Cli) -> Result { return run_restore(&db, args, auto_backup_dir.as_deref()); } - // For `migrate --no-auto-backup`, we must keep `auto_backup_dir = - // None` so the open-time pre-migration backup is skipped. For - // every other subcommand we leave the user-configured dir (or the - // default) in place — the library's safe-by-default semantics - // still apply. + // `migrate --no-auto-backup` clears `auto_backup_dir` so the open-time + // pre-migration backup is skipped; other subcommands keep the default. let mut config = SqlitePersisterConfig::new(&db); if let Some(dir) = auto_backup_dir.clone() { config = config.with_auto_backup_dir(Some(dir)); @@ -183,10 +177,8 @@ fn run(cli: Cli) -> Result { } } - // Migrate (idempotent): open performs it. We capture the prior - // schema version so we can print "applied: N". A transient read - // failure must surface — silently reading 0 would print a wrong - // `applied:` count. + // Migrate is done by `open`; capture pre/post versions to print + // "applied: N". A read failure must surface, not be read as 0. if let Cmd::Migrate(_) = &cli.cmd { let pre_version = peek_schema_version(&db).map_err(|e| CliError::runtime(e.to_string()))?; let _persister = SqlitePersister::open(config.clone()).map_err(map_open_err_for_cli)?; @@ -228,21 +220,16 @@ fn map_open_err_for_cli(err: WalletStorageError) -> CliError { /// transient failure for "version 0". fn peek_schema_version(db: &Path) -> Result, rusqlite::Error> { use rusqlite::{OpenFlags, OptionalExtension}; - // Open READ-ONLY (no SQLITE_OPEN_CREATE) so a typo'd --db path errors - // out at this gate rather than silently materialising a zero-byte - // SQLite file that bypasses the crate's 0o600 invariant. A genuinely - // fresh `migrate` invocation against a non-existent DB file is normal - // — surface that as `Ok(None)` so the migrate path proceeds and - // `SqlitePersister::open` creates the file under the 0o600 invariant. + // A missing path is a normal fresh `migrate`: `Ok(None)` lets + // `SqlitePersister::open` create the file under the 0o600 invariant, + // instead of materialising a stub here that bypasses it. if !db.exists() { return Ok(None); } - let conn = rusqlite::Connection::open_with_flags( - db, - OpenFlags::SQLITE_OPEN_READ_ONLY | OpenFlags::SQLITE_OPEN_URI, - )?; - // Pre-migration the history table may not exist yet — that is a - // legitimate "no version" answer, not a failure. + // READ-ONLY, URI parsing off (matches the open-conn choke-point) so a + // `--db` path can't smuggle `file:` query params defeating read-only. + let conn = rusqlite::Connection::open_with_flags(db, OpenFlags::SQLITE_OPEN_READ_ONLY)?; + // Pre-migration the history table may legitimately not exist. let has_history = conn .query_row( "SELECT 1 FROM sqlite_master WHERE type = 'table' AND name = 'refinery_schema_history'", @@ -266,8 +253,7 @@ fn peek_schema_version(db: &Path) -> Result, rusqlite::Error> { } fn run_backup(persister: &SqlitePersister, args: BackupArgs) -> Result { - // `backup_to` is the single authority on refuse-to-overwrite — it - // returns `BackupDestinationExists` for a pre-existing file path. + // `backup_to` owns refuse-to-overwrite (`BackupDestinationExists`). let path = persister.backup_to(&args.out).map_err(|e| match e { WalletStorageError::BackupDestinationExists { path } => CliError::runtime(format!( "backup destination exists and refuses to overwrite: {}", @@ -294,9 +280,8 @@ fn run_restore( eprintln!("warning: auto-backup skipped (--no-auto-backup)"); SqlitePersister::restore_from_skip_backup(db, &args.from) } else { - // CLI default mirrors the persister config default - // (`/backups/auto/`). The CLI doesn't open a - // persister here, so we compute the default inline. + // No persister is opened here, so compute the config default + // (`/backups/auto/`) inline. let resolved_dir: PathBuf = match auto_backup_dir { None => default_auto_backup_dir(db), Some(p) => p.to_path_buf(), @@ -350,10 +335,8 @@ fn run_prune(args: &PruneArgs) -> Result { mod tests { use super::*; - /// `peek_schema_version` on a non-existent path must NOT materialise - /// a zero-byte SQLite file at that path — opening READ-ONLY (no - /// SQLITE_OPEN_CREATE) keeps a typo from being rewarded with a stub - /// file lacking the crate's 0o600 mode invariant. + /// `peek_schema_version` on a missing path must not materialise a stub + /// file (opening READ-ONLY) that would lack the 0o600 invariant. #[test] fn peek_schema_version_on_missing_db_does_not_create_stub() { let tmp = tempfile::tempdir().expect("tempdir"); diff --git a/packages/rs-platform-wallet-storage/src/kv.rs b/packages/rs-platform-wallet-storage/src/kv.rs index 8dd1e3d22a1..ddfadbfcb2d 100644 --- a/packages/rs-platform-wallet-storage/src/kv.rs +++ b/packages/rs-platform-wallet-storage/src/kv.rs @@ -10,19 +10,15 @@ //! serialization (bincode, JSON, protobuf, raw bytes). Keys are //! bounded `TEXT` (1..=128 chars). //! -//! Scoping: each [`ObjectId`] variant addresses a dedicated table. The -//! [`ObjectId::Global`] slot has no parent and survives wallet deletion. -//! Every other variant names a wallet object, but a write does NOT -//! require that object to exist yet — metadata may be attached ahead of -//! sync. When the object is later deleted, an `AFTER DELETE` trigger on -//! its parent table removes the matching metadata. However, if the -//! parent object is never created, or is removed via a path the trigger -//! does not cover, the metadata row may persist as an orphan. This is an -//! accepted limitation across all scopes; a future garbage-collection pass -//! is expected to reap such orphans (no live parent, e.g. older than ~1 -//! week) — callers should not rely on orphan metadata persisting forever. -//! The same key string under different scopes is independent — the scopes -//! live in separate tables. +//! Scoping: each [`ObjectId`] variant addresses a dedicated table, so the +//! same key string under different scopes is independent. +//! [`ObjectId::Global`] has no parent and survives wallet deletion. Other +//! variants name a wallet object but a write does NOT require it to exist +//! yet (metadata may be attached ahead of sync); an `AFTER DELETE` trigger +//! reaps the metadata when the object is deleted. Rows whose parent is +//! never created, or removed via a path the trigger misses, may persist as +//! orphans — an accepted limitation; a future GC pass is expected to reap +//! them, so callers must not rely on orphans living forever. //! //! This API is **independent of [`platform_wallet::changeset::PlatformWalletPersistence`]**: //! KV is for app metadata, not wallet domain state. Reads and writes go @@ -33,16 +29,10 @@ use platform_wallet::wallet::platform_wallet::WalletId; /// Scope of a metadata entry — one variant per dedicated `meta_*` table. /// -/// [`ObjectId::Global`] has no parent and survives wallet deletion. The -/// other variants name a wallet object but carry no insert-time -/// existence requirement: metadata may be written before its parent -/// object is synced into its typed table. An `AFTER DELETE` trigger on -/// each parent removes the matching metadata when the object is deleted. -/// -/// **Orphan metadata:** if the parent object is never created, or is -/// removed via a path the trigger does not cover, the metadata row may -/// persist as an orphan. A future GC pass is expected to reap such -/// rows; do not rely on them living forever. +/// [`ObjectId::Global`] has no parent and survives wallet deletion. Other +/// variants name a wallet object but may be written before it is synced; +/// an `AFTER DELETE` trigger reaps the metadata when the object is deleted. +/// See the module docs for the orphan-metadata limitation. #[derive(Debug, Clone, PartialEq, Eq)] pub enum ObjectId { /// Global app metadata; no parent (`meta_global`). @@ -69,25 +59,17 @@ pub enum ObjectId { }, } -/// Maximum allowed key length. Enforced in Rust as a **byte**-length -/// bound (`validate_key` rejects with `KeyTooLong`/`KeyEmpty` on -/// `key.len()`) and in SQL as a **code-point** bound -/// (`CHECK (length(key) BETWEEN 1 AND 128)`, where SQLite's `length()` -/// counts UTF-8 code points). For ASCII keys the two coincide; for -/// non-ASCII keys the Rust byte bound is the stricter of the two, so no -/// over-length key reaches SQL. +/// Maximum allowed key length, in **code points**. `validate_key` counts +/// `chars().count()`; the SQL `CHECK (length(key) BETWEEN 1 AND 128)` uses +/// the same unit (SQLite `length()` counts code points), so the two bounds +/// accept exactly the same key set. pub const MAX_KEY_LEN: usize = 128; /// Hard cap on the size of a single KV value, in bytes, so a tampered or /// corrupted backup row cannot force a multi-gigabyte allocation on the -/// next `get`. -/// -/// Kept in sync MANUALLY with the `BLOB_SIZE_LIMIT_BYTES` ceiling on -/// bincode-serde blobs in `sqlite::schema::blob`: the `sqlite` and `kv` -/// features compile independently, so a `const`-level cross-reference -/// between the two modules can't be relied on. Change both together if -/// the ceiling moves. -pub const MAX_VALUE_LEN: usize = 16 * 1024 * 1024; +/// next `get`. Shares the crate-root [`SIZE_LIMIT_BYTES`](crate::SIZE_LIMIT_BYTES) +/// ceiling with the bincode-serde BLOB decode cap. +pub const MAX_VALUE_LEN: usize = crate::SIZE_LIMIT_BYTES; /// Errors returned by [`KvStore`] operations. /// @@ -99,14 +81,14 @@ pub enum KvError { #[error("kv key is empty")] KeyEmpty, - /// Key exceeded [`MAX_KEY_LEN`]. - #[error("kv key too long: {len} bytes (max {})", MAX_KEY_LEN)] + /// Key exceeded [`MAX_KEY_LEN`]. `len` is the key's code-point count + /// (the same unit the SQL `length()` CHECK uses). + #[error("kv key too long: {len} code points (max {})", MAX_KEY_LEN)] KeyTooLong { len: usize }, /// A value exceeded [`MAX_VALUE_LEN`]. Raised by `put` before the - /// INSERT and by `get` before the bytes are materialised, so an - /// oversize value never lands and a tampered row never OOMs the - /// process. + /// INSERT and by `get` before materialising, so a tampered row can't + /// OOM the process. #[error("kv value too large: {found} bytes (max {max})")] ValueTooLarge { found: usize, max: usize }, @@ -124,6 +106,15 @@ pub enum KvError { /// /// See the module-level docs for scoping and value semantics. Each /// [`ObjectId`] variant addresses a dedicated table. +/// +/// # Security +/// +/// Values are stored **PLAINTEXT** in the persister `.db` and in every +/// backup copied from it. This API is the explicit, caller-policed +/// plaintext exception to the crate's no-secrets-in-the-db boundary +/// (see `SECRETS.md`). **NEVER store key or signing material here** — +/// mnemonics, seeds, private keys, or anything that could move funds. +/// Use [`SecretStore`](crate::secrets::SecretStore) for secret material. pub trait KvStore { /// Read the value bound to `(scope, key)`. Returns `Ok(None)` when /// the key is absent. Backends MUST reject values larger than @@ -140,6 +131,12 @@ pub trait KvStore { /// Backends MUST reject a `value` larger than [`MAX_VALUE_LEN`] with /// [`KvError::ValueTooLarge`] before writing, so a `put` can never /// plant a row a later `get` would refuse to materialise. + /// + /// # Security + /// + /// `value` is stored **PLAINTEXT** in the `.db` and all backups. + /// NEVER store key/signing material here — use + /// [`SecretStore`](crate::secrets::SecretStore). fn put(&self, scope: &ObjectId, key: &str, value: &[u8]) -> Result<(), KvError>; /// Remove the row bound to `(scope, key)`. Idempotent — a missing @@ -156,14 +153,15 @@ pub trait KvStore { fn list_keys(&self, scope: &ObjectId, prefix: Option<&str>) -> Result, KvError>; } -/// Validate a key against the length bounds. Used by [`KvStore`] -/// implementations as a typed-error pre-check before reaching SQL. +/// Typed-error pre-check used by [`KvStore`] impls before reaching SQL. +/// Counts code points to match the SQL CHECK unit (see [`MAX_KEY_LEN`]). pub(crate) fn validate_key(key: &str) -> Result<(), KvError> { if key.is_empty() { return Err(KvError::KeyEmpty); } - if key.len() > MAX_KEY_LEN { - return Err(KvError::KeyTooLong { len: key.len() }); + let code_points = key.chars().count(); + if code_points > MAX_KEY_LEN { + return Err(KvError::KeyTooLong { len: code_points }); } Ok(()) } diff --git a/packages/rs-platform-wallet-storage/src/lib.rs b/packages/rs-platform-wallet-storage/src/lib.rs index b75ddee4658..dbfb05b9c9a 100644 --- a/packages/rs-platform-wallet-storage/src/lib.rs +++ b/packages/rs-platform-wallet-storage/src/lib.rs @@ -27,6 +27,13 @@ #![deny(rust_2018_idioms)] #![deny(unsafe_code)] +/// Shared 16 MiB ceiling for the two independent size caps in this crate: +/// the KV value cap ([`kv::MAX_VALUE_LEN`]) and the bincode-serde BLOB +/// decode cap (`sqlite::schema::blob::BLOB_SIZE_LIMIT_BYTES`). At the crate +/// root so the independently-compiled `kv` and `sqlite` features share one +/// source of truth. +pub const SIZE_LIMIT_BYTES: usize = 16 * 1024 * 1024; + #[cfg(feature = "kv")] pub mod kv; #[cfg(feature = "sqlite")] @@ -35,10 +42,8 @@ pub mod sqlite; #[cfg(feature = "secrets")] pub mod secrets; -// Convenience re-exports kept under the crate root so embedders don't -// have to spell out the `::sqlite::` middle segment for the common -// names. Adding to or trimming from this list does NOT count as a -// breaking change of the submodule API. +// Convenience re-exports so embedders can skip the `::sqlite::` segment +// for common names. #[cfg(feature = "kv")] pub use kv::{KvError, KvStore, ObjectId}; #[cfg(feature = "sqlite")] @@ -48,9 +53,8 @@ pub use sqlite::{ WalletStorageError, }; -// Compile-time assertions — `Send + Sync`, `PlatformWalletPersistence` -// object-safety, and the no-boxed-trait-object error policy. -// Lint-gated to the SQLite feature because they reference its types. +// Compile-time assertions: `Send + Sync` and `PlatformWalletPersistence` +// object-safety. Gated to `sqlite` because they reference its types. #[cfg(feature = "sqlite")] #[allow(dead_code)] const fn _send_sync_check() {} @@ -67,9 +71,8 @@ fn _object_safety_check(persister: SqlitePersister) { std::sync::Arc::new(persister); } -// The keyring SPI must be object-safe and its error `Send + Sync`, so -// a backend can be held behind `Arc` and its errors crossed between threads / FFI. +// The keyring SPI must be object-safe with `Send + Sync` errors so a +// backend can live behind `Arc`. #[cfg(feature = "secrets")] #[allow(dead_code)] const fn _secrets_send_sync_check() {} diff --git a/packages/rs-platform-wallet-storage/src/secrets/envelope.rs b/packages/rs-platform-wallet-storage/src/secrets/envelope.rs new file mode 100644 index 00000000000..725d76eccfe --- /dev/null +++ b/packages/rs-platform-wallet-storage/src/secrets/envelope.rs @@ -0,0 +1,807 @@ +//! Tier-2 opt-in per-object password envelope (backend-independent). +//! +//! Sits ABOVE [`SecretStore`](crate::secrets::SecretStore), over both the +//! `File` vault and `Os` keyring arms: the backend stores opaque bytes, +//! and a chosen critical object (a seed wallet, a single privkey) can be +//! wrapped under an extra, user-supplied **object password** before it +//! ever reaches the backend. Reading a protected object then needs BOTH +//! backend access AND the password — the first control that survives a +//! full backend compromise (the keychain scraped, the vault stolen and its +//! passphrase cracked). +//! +//! # Wire format (self-describing, authenticated) +//! +//! ```text +//! magic b"PWSEV" (5) +//! version u8 = 1 (ENVELOPE_VERSION — independent of the vault FORMAT_VERSION) +//! scheme u8 (0 = unprotected passthrough, 1 = argon2id-xchacha password) +//! ── scheme 0 ── payload: raw secret bytes +//! ── scheme 1 ── kdf(id u8 ‖ m_kib u32 LE ‖ t u32 LE ‖ p u32 LE) (13) +//! ‖ salt[32] ‖ nonce[24] ‖ ciphertext+tag +//! ``` +//! +//! The header proves what the blob **is**, never what the caller +//! **expected** — that expectation lives solely in the caller's `Some/None` +//! password argument (see [`unwrap`]'s strict, fail-closed table). The +//! self-description is a convenience for `NeedsPassword`/`WrongPassword`/ +//! version UX, **not** the security boundary. +//! +//! ## Reused, never reinvented +//! - KDF: [`crypto::derive_key`] (Argon2id) with a fresh 32-byte salt; the +//! param **ceiling is enforced BEFORE derivation** on the +//! attacker-controllable header ([`KdfParams::enforce_bounds`]). +//! - AEAD: [`crypto::seal`]/[`crypto::open`] (XChaCha20-Poly1305), fresh +//! per-wrap nonce; a tag failure maps to +//! [`SecretStoreError::WrongPassword`] with no plaintext. +//! - AAD binds `domain ‖ magic ‖ version ‖ scheme ‖ kdf ‖ salt ‖ wallet_id +//! ‖ label`, mirroring [`format::aad`]/[`format::verify_aad`] so a +//! relocated/confused blob fails the tag. +//! +//! No bespoke crypto. +//! +//! [`format::aad`]: super::file::format::aad +//! [`format::verify_aad`]: super::file::format::verify_aad + +use std::sync::Once; + +use super::error::SecretStoreError; +use super::file::crypto::{self, KdfParams, NONCE_LEN, SALT_LEN}; +use super::secret::{SecretBytes, SecretString}; +use super::validate::WalletId; +use super::MAX_SECRET_LEN; + +/// 5-byte sentinel marking a Tier-2 envelope. A decrypted entry NOT +/// starting with this is a legacy magic-less raw value (see [`unwrap`]). +pub(crate) const MAGIC: &[u8; 5] = b"PWSEV"; + +/// Envelope wire version — bumped only on a breaking layout change, and +/// independent of the vault `FORMAT_VERSION` (the envelope rides inside the +/// entry bytes, identical over File/Os). +pub(crate) const ENVELOPE_VERSION: u8 = 1; + +/// Scheme 0: unprotected passthrough — payload is the raw secret. +pub(crate) const SCHEME_UNPROTECTED: u8 = 0; +/// Scheme 1: Argon2id + XChaCha20-Poly1305 under an object password. +pub(crate) const SCHEME_PASSWORD: u8 = 1; + +/// Domain-separation tag leading the scheme-1 AAD, so a Tier-2 tag can +/// never be confused with the vault's own verify/entry AAD. +const TIER2_DOMAIN: &[u8] = b"PWSEV-TIER2-AAD-v1"; + +/// Fixed header: `magic ‖ version ‖ scheme`. +const HEADER_LEN: usize = MAGIC.len() + 2; +/// Encoded KDF-params field: `id u8 ‖ m_kib u32 ‖ t u32 ‖ p u32`. +const KDF_FIELD_LEN: usize = 1 + 4 + 4 + 4; +/// Poly1305 tag length — present even for empty plaintext. +const AEAD_TAG_LEN: usize = 16; +/// Smallest valid scheme-1 body (kdf ‖ salt ‖ nonce ‖ bare tag). +const MIN_SCHEME1_BODY: usize = KDF_FIELD_LEN + SALT_LEN + NONCE_LEN + AEAD_TAG_LEN; + +/// Fixed, bounded envelope overhead (`magic 5 + version 1 + scheme 1 + kdf +/// 13 + salt 32 + nonce 24 + tag 16 = 92`), rounded up to 128 for headroom +/// (future header fields / versions). Used to derive the plaintext cap. +pub(crate) const MAX_ENVELOPE_OVERHEAD: usize = 128; + +/// Plaintext cap at the envelope boundary: `MAX_SECRET_LEN − +/// MAX_ENVELOPE_OVERHEAD`. Capping the **plaintext** (uniformly for both +/// schemes) keeps the user-visible limit stable AND guarantees the +/// enveloped bytes always fit the backend vault's own `MAX_SECRET_LEN` +/// `put_bytes` cap. Re-exported at +/// [`crate::secrets`] as the documented, stable user-facing cap. +pub const MAX_PLAINTEXT_LEN: usize = MAX_SECRET_LEN - MAX_ENVELOPE_OVERHEAD; + +/// Wrap `plaintext` for `(wallet_id, label)` using the shipped default +/// Argon2 target (64 MiB / t=3) when a password is supplied. +/// +/// `None` → an unprotected (scheme-0) envelope; `Some(pw)` → a scheme-1 +/// envelope sealed under `pw`. A blank password is rejected at enrol +/// ([`SecretStoreError::BlankPassphrase`]). +/// +/// Returns the envelope inside a zeroizing [`SecretBytes`]: a scheme-0 +/// envelope embeds the raw plaintext, so the wire bytes are handled as +/// sensitive (mlock'd, wiped on drop) by construction — symmetric with +/// [`unwrap`]'s return. +pub(crate) fn wrap( + wallet_id: &WalletId, + label: &str, + password: Option<&SecretString>, + plaintext: &[u8], +) -> Result { + wrap_with_params( + wallet_id, + label, + password, + plaintext, + KdfParams::default_target(), + ) +} + +/// [`wrap`] with explicit Argon2 `params` (tests use the floor params for +/// speed; production uses [`KdfParams::default_target`]). `params` is +/// ignored when `password` is `None`. +pub(crate) fn wrap_with_params( + wallet_id: &WalletId, + label: &str, + password: Option<&SecretString>, + plaintext: &[u8], + params: KdfParams, +) -> Result { + // Cap the PLAINTEXT (before overhead) uniformly for both schemes so the + // enveloped bytes always fit the backend cap and the limit is stable. + if plaintext.len() > MAX_PLAINTEXT_LEN { + return Err(SecretStoreError::SecretTooLarge { + found: plaintext.len(), + max: MAX_PLAINTEXT_LEN, + }); + } + + let Some(pw) = password else { + // Scheme 0: magic ‖ version ‖ scheme ‖ raw payload. + let mut out = Vec::with_capacity(HEADER_LEN + plaintext.len()); + out.extend_from_slice(MAGIC); + out.push(ENVELOPE_VERSION); + out.push(SCHEME_UNPROTECTED); + out.extend_from_slice(plaintext); + // `SecretBytes::new` moves `out` into a zeroizing, mlock'd buffer + // (no copy) — the scheme-0 plaintext never lives in a bare Vec. + return Ok(SecretBytes::new(out)); + }; + + // Reject a blank object password BEFORE any derivation. + if pw.is_blank() { + return Err(SecretStoreError::BlankPassphrase); + } + + // Fresh per-object salt so the same password on two objects yields + // different keys and precomputation is defeated. + let mut salt = [0u8; SALT_LEN]; + crypto::random_bytes(&mut salt)?; + // `derive_key` enforces the param bounds before allocating. + let key = crypto::derive_key(pw, &salt, params)?; + let aad = scheme1_aad(¶ms, &salt, wallet_id.as_bytes(), label); + let (nonce, ciphertext) = crypto::seal(&key, &aad, plaintext)?; + + let mut out = + Vec::with_capacity(HEADER_LEN + KDF_FIELD_LEN + SALT_LEN + NONCE_LEN + ciphertext.len()); + out.extend_from_slice(MAGIC); + out.push(ENVELOPE_VERSION); + out.push(SCHEME_PASSWORD); + out.extend_from_slice(&encode_kdf(¶ms)); + out.extend_from_slice(&salt); + out.extend_from_slice(&nonce); + out.extend_from_slice(&ciphertext); + Ok(SecretBytes::new(out)) +} + +/// Unwrap `blob` for `(wallet_id, label)`, applying the **strict, +/// fail-closed** read. The "expected-protected" bit is +/// the caller's assertion, surfaced solely by `password`, and is NEVER +/// inferred from the blob's scheme byte. +/// +/// | `password` | stored blob | result | +/// |---|---|---| +/// | `Some(pw)` | valid scheme-1 | secret, or [`WrongPassword`] on tag fail | +/// | `Some(pw)` | scheme-0 **or** magic-less (legacy raw) | [`ExpectedProtectedButUnsealed`] | +/// | `Some(pw)` | scheme-1 but too short | [`Corruption`] (sealed-but-broken) | +/// | `Some/None` | magic present, unknown version/scheme | [`UnsupportedEnvelopeVersion`] | +/// | `None` | valid scheme-1 | [`NeedsPassword`] (never ciphertext) | +/// | `None` | scheme-0 | secret | +/// | `None` | magic-less (legacy raw) | secret (+ one-time warn; re-wrapped on next write) | +/// | `None` | magic present but truncated header | [`Corruption`] | +/// +/// The load-bearing row is `Some(pw)` + non-envelope ⇒ +/// [`ExpectedProtectedButUnsealed`]: with a password in hand, a +/// non-protected blob can only mean a strip → refuse, return no bytes. +/// +/// [`WrongPassword`]: SecretStoreError::WrongPassword +/// [`ExpectedProtectedButUnsealed`]: SecretStoreError::ExpectedProtectedButUnsealed +/// [`Corruption`]: SecretStoreError::Corruption +/// [`UnsupportedEnvelopeVersion`]: SecretStoreError::UnsupportedEnvelopeVersion +/// [`NeedsPassword`]: SecretStoreError::NeedsPassword +pub(crate) fn unwrap( + wallet_id: &WalletId, + label: &str, + password: Option<&SecretString>, + blob: &[u8], +) -> Result { + // Magic-less ⇒ a legacy unprotected raw value (scheme-0-equivalent), + // (legacy-tolerant read-path: a None read returns it, a Some(pw) read refuses). + if !blob.starts_with(MAGIC) { + return match password { + None => { + warn_legacy_once(); + Ok(SecretBytes::from_slice(blob)) + } + // Caller asserted protection but found a magic-less raw value: + // a strip/downgrade ⇒ FAIL CLOSED. Never returns bytes. + Some(_) => Err(SecretStoreError::ExpectedProtectedButUnsealed), + }; + } + + // Magic present but truncated before version+scheme: a broken envelope. + if blob.len() < HEADER_LEN { + return Err(SecretStoreError::Corruption); + } + + let version = blob[MAGIC.len()]; + if version != ENVELOPE_VERSION { + // Fail closed regardless of password — an unparseable future format + // can be neither safely unwrapped nor treated as scheme-0. + return Err(SecretStoreError::UnsupportedEnvelopeVersion { found: version }); + } + + let scheme = blob[MAGIC.len() + 1]; + let body = &blob[HEADER_LEN..]; + match scheme { + SCHEME_UNPROTECTED => match password { + None => Ok(SecretBytes::from_slice(body)), + // Strip: caller expected protection, blob is unprotected. + Some(_) => Err(SecretStoreError::ExpectedProtectedButUnsealed), + }, + SCHEME_PASSWORD => match password { + None => Err(SecretStoreError::NeedsPassword), + Some(pw) => unwrap_scheme1(wallet_id, label, pw, body), + }, + // Unknown scheme under a known version ⇒ forward-incompatible + // layout; report the (known) version byte. Fail closed. + _ => Err(SecretStoreError::UnsupportedEnvelopeVersion { found: version }), + } +} + +/// Decrypt a scheme-1 body. The KDF params, salt, and nonce are all read +/// from the (attacker-controllable) header; the param **ceiling is +/// enforced before** [`crypto::derive_key`] allocates, and every +/// header field that feeds key/AAD is bound into the AAD so any in-place +/// edit fails the tag. +fn unwrap_scheme1( + wallet_id: &WalletId, + label: &str, + password: &SecretString, + body: &[u8], +) -> Result { + if body.len() < MIN_SCHEME1_BODY { + // The scheme byte says protected, but the body cannot hold a sealed + // payload — corrupt, not a strip. + return Err(SecretStoreError::Corruption); + } + let kdf = decode_kdf(&body[..KDF_FIELD_LEN]); + // Gate the inflated/unknown header BEFORE any derivation/alloc. + kdf.enforce_bounds()?; + + let mut salt = [0u8; SALT_LEN]; + salt.copy_from_slice(&body[KDF_FIELD_LEN..KDF_FIELD_LEN + SALT_LEN]); + let mut nonce = [0u8; NONCE_LEN]; + nonce.copy_from_slice(&body[KDF_FIELD_LEN + SALT_LEN..KDF_FIELD_LEN + SALT_LEN + NONCE_LEN]); + let ciphertext = &body[KDF_FIELD_LEN + SALT_LEN + NONCE_LEN..]; + + let aad = scheme1_aad(&kdf, &salt, wallet_id.as_bytes(), label); + let key = crypto::derive_key(password, &salt, kdf)?; + match crypto::open(&key, &nonce, &aad, ciphertext) { + Ok(plaintext) => Ok(plaintext), + // Tag failure (wrong password, relocated blob, or header tamper): + // no plaintext is ever materialized (CWE-347). + Err(SecretStoreError::Decrypt) => Err(SecretStoreError::WrongPassword), + Err(e) => Err(e), + } +} + +/// Build the scheme-1 AAD binding object identity + header, +/// length-prefixed for the variable fields, mirroring +/// [`format::aad`](super::file::format::aad)/`verify_aad`. +fn scheme1_aad( + kdf: &KdfParams, + salt: &[u8; SALT_LEN], + wallet_id: &[u8; 32], + label: &str, +) -> Vec { + let lb = label.as_bytes(); + let mut v = Vec::with_capacity( + TIER2_DOMAIN.len() + + MAGIC.len() + + 2 + + KDF_FIELD_LEN + + 4 + + SALT_LEN + + 4 + + wallet_id.len() + + 4 + + lb.len(), + ); + v.extend_from_slice(TIER2_DOMAIN); + v.extend_from_slice(MAGIC); + v.push(ENVELOPE_VERSION); + v.push(SCHEME_PASSWORD); + v.extend_from_slice(&encode_kdf(kdf)); + v.extend_from_slice(&(salt.len() as u32).to_le_bytes()); + v.extend_from_slice(salt); + v.extend_from_slice(&(wallet_id.len() as u32).to_le_bytes()); + v.extend_from_slice(wallet_id); + v.extend_from_slice(&(lb.len() as u32).to_le_bytes()); + v.extend_from_slice(lb); + v +} + +/// Encode KDF params to the fixed 13-byte header field (LE). +fn encode_kdf(kdf: &KdfParams) -> [u8; KDF_FIELD_LEN] { + let mut out = [0u8; KDF_FIELD_LEN]; + out[0] = kdf.id; + out[1..5].copy_from_slice(&kdf.m_kib.to_le_bytes()); + out[5..9].copy_from_slice(&kdf.t.to_le_bytes()); + out[9..13].copy_from_slice(&kdf.p.to_le_bytes()); + out +} + +/// Decode the fixed 13-byte KDF header field. Out-of-range values are +/// caught downstream by [`KdfParams::enforce_bounds`]. +fn decode_kdf(b: &[u8]) -> KdfParams { + debug_assert_eq!(b.len(), KDF_FIELD_LEN); + KdfParams { + id: b[0], + m_kib: u32::from_le_bytes([b[1], b[2], b[3], b[4]]), + t: u32::from_le_bytes([b[5], b[6], b[7], b[8]]), + p: u32::from_le_bytes([b[9], b[10], b[11], b[12]]), + } +} + +/// Emit a single process-lifetime warning that a legacy magic-less entry +/// was read. Carries no secret (the message is static). +fn warn_legacy_once() { + static WARN: Once = Once::new(); + WARN.call_once(|| { + tracing::warn!( + "read a legacy unprotected secret entry with no envelope header; \ + it will be re-wrapped on the next write" + ); + }); +} + +#[cfg(test)] +mod tests { + use subtle::ConstantTimeEq; + + use super::super::file::crypto::{ + ARGON2_MAX_M_KIB, ARGON2_MAX_T, ARGON2_MIN_M_KIB, ARGON2_MIN_T, ARGON2_P, + }; + use super::super::file::format::KDF_ID_ARGON2ID; + use super::*; + + // Wire offsets into a scheme-1 envelope (for surgical tampering). + const O_VERSION: usize = 5; + const O_SCHEME: usize = 6; + const O_KDF: usize = HEADER_LEN; // 7 + const O_ID: usize = O_KDF; // 7 + const O_MKIB: usize = O_KDF + 1; // 8 + const O_T: usize = O_KDF + 5; // 12 + const O_SALT: usize = O_KDF + KDF_FIELD_LEN; // 20 + const O_NONCE: usize = O_SALT + SALT_LEN; // 52 + + fn wid(b: u8) -> WalletId { + WalletId::from([b; 32]) + } + + /// Argon2id floor params — fast enough for unit tests. + fn floor() -> KdfParams { + KdfParams { + id: KDF_ID_ARGON2ID, + m_kib: ARGON2_MIN_M_KIB, + t: ARGON2_MIN_T, + p: ARGON2_P, + } + } + + fn pw(s: &str) -> SecretString { + SecretString::new(s) + } + + /// Wrap and expose the envelope as a `Vec` for byte-level + /// inspection/mutation in tests (the production `wrap` returns a + /// zeroizing `SecretBytes`). + fn wrap_bytes( + w: &WalletId, + label: &str, + password: Option<&SecretString>, + pt: &[u8], + ) -> Vec { + wrap(w, label, password, pt) + .unwrap() + .expose_secret() + .to_vec() + } + + /// [`wrap_bytes`] with explicit (floor) params, for the scheme-1 tests. + fn wrap_p( + w: &WalletId, + label: &str, + password: Option<&SecretString>, + pt: &[u8], + params: KdfParams, + ) -> Vec { + wrap_with_params(w, label, password, pt, params) + .unwrap() + .expose_secret() + .to_vec() + } + + /// scheme-0 passthrough round-trip; the wrapped form leads + /// with magic, version=1, scheme=0, then the raw payload. + #[test] + fn scheme0_passthrough_round_trip() { + let secret = b"top secret seed bytes"; + let blob = wrap_bytes(&wid(1), "seed", None, secret); + assert!(blob.starts_with(MAGIC)); + assert_eq!(blob[O_VERSION], 1); + assert_eq!(blob[O_SCHEME], 0); + assert_eq!(&blob[HEADER_LEN..], secret); + let got = unwrap(&wid(1), "seed", None, &blob).unwrap(); + assert_eq!(got.expose_secret(), secret); + } + + /// scheme-1 round-trip; header records the argon2id id, a + /// 32-byte fresh salt and 24-byte nonce, ct != pt, and two wraps of the + /// same secret/pw differ in salt+nonce (no reuse). + #[test] + fn scheme1_round_trip_and_fresh_salt_nonce() { + let secret = b"correct horse battery staple seed"; + let p = pw("hunter2-but-better"); + let blob = wrap_p(&wid(7), "seed", Some(&p), secret, floor()); + assert!(blob.starts_with(MAGIC)); + assert_eq!(blob[O_VERSION], 1); + assert_eq!(blob[O_SCHEME], 1); + assert_eq!(blob[O_ID], KDF_ID_ARGON2ID); + // ciphertext differs from plaintext. + assert_ne!(&blob[O_NONCE + NONCE_LEN..], secret); + + let got = unwrap(&wid(7), "seed", Some(&p), &blob).unwrap(); + assert_eq!(got.expose_secret(), secret); + + let blob2 = wrap_p(&wid(7), "seed", Some(&p), secret, floor()); + assert_ne!( + &blob[O_SALT..O_SALT + SALT_LEN], + &blob2[O_SALT..O_SALT + SALT_LEN], + "salt must be fresh per wrap" + ); + assert_ne!( + &blob[O_NONCE..O_NONCE + NONCE_LEN], + &blob2[O_NONCE..O_NONCE + NONCE_LEN], + "nonce must be fresh per wrap" + ); + } + + /// Wrong object password → WrongPassword, no plaintext. + #[test] + fn wrong_password_fails_closed() { + let blob = wrap_p(&wid(1), "seed", Some(&pw("right")), b"seed", floor()); + let err = unwrap(&wid(1), "seed", Some(&pw("wrong")), &blob).unwrap_err(); + assert!( + matches!(err, SecretStoreError::WrongPassword), + "got {err:?}" + ); + } + + /// Identity AAD — a protected blob unwrapped at any + /// other (wallet, label) fails the tag; same-identity still succeeds. + #[test] + fn relocation_across_identity_is_rejected() { + let p = pw("pw"); + let blob = wrap_p(&wid(0xA), "labelA", Some(&p), b"seed", floor()); + for (w, l) in [(0xB, "labelB"), (0xA, "labelB"), (0xB, "labelA")] { + let err = unwrap(&wid(w), l, Some(&p), &blob).unwrap_err(); + assert!( + matches!(err, SecretStoreError::WrongPassword), + "relocation to ({w:#x},{l}) must fail, got {err:?}" + ); + } + let ok = unwrap(&wid(0xA), "labelA", Some(&p), &blob).unwrap(); + assert_eq!(ok.expose_secret(), b"seed"); + } + + /// Per-field header tamper. Unknown KDF id is rejected by + /// `enforce_bounds` (KdfFailure) before derive; in-bounds KDF shifts, + /// salt, and nonce all fail the AEAD tag (WrongPassword) — never the + /// plaintext. + #[test] + fn header_tamper_fails_closed_per_field() { + let p = pw("pw"); + let base = wrap_p(&wid(1), "seed", Some(&p), b"seed", floor()); + + // kdf.id → 7 (unknown) ⇒ KdfFailure (bounds reject pre-derive). + let mut b = base.clone(); + b[O_ID] = 7; + assert!(matches!( + unwrap(&wid(1), "seed", Some(&p), &b).unwrap_err(), + SecretStoreError::KdfFailure + )); + + // kdf.m_kib → a different IN-BOUNDS value ⇒ WrongPassword (AAD + key). + let mut b = base.clone(); + b[O_MKIB..O_MKIB + 4].copy_from_slice(&(ARGON2_MIN_M_KIB + 1024).to_le_bytes()); + assert!(matches!( + unwrap(&wid(1), "seed", Some(&p), &b).unwrap_err(), + SecretStoreError::WrongPassword + )); + + // kdf.t → a different IN-BOUNDS value ⇒ WrongPassword. + let mut b = base.clone(); + b[O_T..O_T + 4].copy_from_slice(&(ARGON2_MIN_T + 1).to_le_bytes()); + assert!(matches!( + unwrap(&wid(1), "seed", Some(&p), &b).unwrap_err(), + SecretStoreError::WrongPassword + )); + + // salt[0] flip ⇒ WrongPassword (wrong key + AAD-bound salt). + let mut b = base.clone(); + b[O_SALT] ^= 1; + assert!(matches!( + unwrap(&wid(1), "seed", Some(&p), &b).unwrap_err(), + SecretStoreError::WrongPassword + )); + + // nonce[0] flip ⇒ WrongPassword (nonce feeds decrypt ⇒ tag fail). + let mut b = base; + b[O_NONCE] ^= 1; + assert!(matches!( + unwrap(&wid(1), "seed", Some(&p), &b).unwrap_err(), + SecretStoreError::WrongPassword + )); + } + + /// An inflated KDF param on a forged header is + /// rejected by `enforce_bounds` BEFORE `derive_key` allocates — the + /// ~4 TiB allocation never happens (the test would OOM if it did). The + /// exact ceilings remain valid params. + #[test] + fn kdf_ceiling_enforced_before_derivation() { + let p = pw("pw"); + let base = wrap_p(&wid(1), "seed", Some(&p), b"seed", floor()); + + // m_kib = u32::MAX ⇒ KdfFailure, no allocation. + let mut b = base.clone(); + b[O_MKIB..O_MKIB + 4].copy_from_slice(&u32::MAX.to_le_bytes()); + assert!(matches!( + unwrap(&wid(1), "seed", Some(&p), &b).unwrap_err(), + SecretStoreError::KdfFailure + )); + + // t = ARGON2_MAX_T + 1 ⇒ KdfFailure. + let mut b = base; + b[O_T..O_T + 4].copy_from_slice(&(ARGON2_MAX_T + 1).to_le_bytes()); + assert!(matches!( + unwrap(&wid(1), "seed", Some(&p), &b).unwrap_err(), + SecretStoreError::KdfFailure + )); + + // The exact ceilings are accepted by the bounds check (no derive + // here — a 1 GiB Argon2 run is not a unit-test concern). + assert!(KdfParams { + id: KDF_ID_ARGON2ID, + m_kib: ARGON2_MAX_M_KIB, + t: ARGON2_MAX_T, + p: ARGON2_P, + } + .enforce_bounds() + .is_ok()); + } + + /// A blank object password is rejected at enrol; nothing + /// is sealed. + #[test] + fn blank_object_password_rejected_at_enrol() { + for blank in [SecretString::empty(), pw(""), pw(" "), pw("\t\n")] { + let err = + wrap_with_params(&wid(1), "seed", Some(&blank), b"seed", floor()).unwrap_err(); + assert!( + matches!(err, SecretStoreError::BlankPassphrase), + "got {err:?}" + ); + } + } + + /// The plaintext is capped at `MAX_PLAINTEXT_LEN` (`MAX_SECRET_LEN − + /// MAX_ENVELOPE_OVERHEAD`), uniform across schemes, so plaintext + + /// overhead always fits the backend's own `MAX_SECRET_LEN` cap. Accept + /// at the cap, reject at cap+1 with `max = MAX_PLAINTEXT_LEN`. + #[test] + fn plaintext_size_cap_at_envelope_boundary() { + let at_cap = vec![0x5Au8; MAX_PLAINTEXT_LEN]; + let over = vec![0x5Au8; MAX_PLAINTEXT_LEN + 1]; + + // Unprotected (scheme 0): cap accepted, +1 rejected. + assert!(wrap(&wid(1), "seed", None, &at_cap).is_ok()); + assert!(matches!( + wrap(&wid(1), "seed", None, &over).unwrap_err(), + SecretStoreError::SecretTooLarge { found, max } + if found == MAX_PLAINTEXT_LEN + 1 && max == MAX_PLAINTEXT_LEN + )); + + // Protected (scheme 1): same cap (checked before any derivation). + let p = pw("pw"); + assert!(matches!( + wrap_with_params(&wid(1), "seed", Some(&p), &over, floor()).unwrap_err(), + SecretStoreError::SecretTooLarge { found, max } + if found == MAX_PLAINTEXT_LEN + 1 && max == MAX_PLAINTEXT_LEN + )); + // The enveloped bytes for an at-cap plaintext fit the backend cap. + let enveloped = wrap(&wid(1), "seed", None, &at_cap).unwrap(); + assert!(enveloped.len() <= MAX_SECRET_LEN); + } + + /// Scheme-1 accepts a plaintext of EXACTLY `MAX_PLAINTEXT_LEN` (the + /// accept boundary), round-trips it, and the enveloped bytes still fit + /// the backend's `MAX_SECRET_LEN` cap. + #[test] + fn scheme1_accepts_plaintext_at_exact_cap() { + let p = pw("pw"); + let pt = vec![0x5Au8; MAX_PLAINTEXT_LEN]; + let blob = wrap_with_params(&wid(1), "seed", Some(&p), &pt, floor()).unwrap(); + assert!( + blob.len() <= MAX_SECRET_LEN, + "enveloped bytes exceed backend cap" + ); + let got = unwrap(&wid(1), "seed", Some(&p), blob.expose_secret()).unwrap(); + assert_eq!(got.expose_secret(), &pt[..]); + } + + /// Value rollback is intentionally NOT defended: an older valid scheme-1 + /// envelope still decrypts cleanly under the current password. Pinned so + /// a future reader does not mistake the strict read for rollback + /// protection (anti-rollback would need a monotonic anchor in the + /// consumer's integrity-protected metadata). + #[test] + fn value_rollback_is_not_defended() { + let p = pw("pw"); + let old_blob = wrap_with_params(&wid(1), "seed", Some(&p), b"OLD-VALUE", floor()).unwrap(); + // A newer value is written under the same identity + password … + let _new_blob = wrap_with_params(&wid(1), "seed", Some(&p), b"NEW-VALUE", floor()).unwrap(); + // … yet "restoring" the OLD envelope still decrypts cleanly. + let restored = unwrap(&wid(1), "seed", Some(&p), old_blob.expose_secret()).unwrap(); + assert_eq!( + restored.expose_secret(), + b"OLD-VALUE", + "older envelope still decrypts: value rollback is a known, undefended residual" + ); + } + + /// magic/version discrimination: a magic-less blob is a legacy raw + /// value — returned on a `None` read (with a one-time warning), refused + /// fail-closed on `Some(pw)` so the strict rule holds. A magic-present + /// blob with an unknown version fails closed both ways; truncated- + /// after-magic is corruption. + #[test] + fn magic_and_version_discrimination() { + let p = pw("pw"); + // (a) Magic-less / wrong magic. + let legacy = b"NOTPWSEV raw legacy seed bytes".to_vec(); + // None ⇒ legacy raw bytes (adopted contingency; NOT Corruption). + let got = unwrap(&wid(1), "seed", None, &legacy).unwrap(); + assert_eq!(got.expose_secret(), &legacy[..]); + // Some(pw) ⇒ strip/downgrade ⇒ fail closed. + assert!(matches!( + unwrap(&wid(1), "seed", Some(&p), &legacy).unwrap_err(), + SecretStoreError::ExpectedProtectedButUnsealed + )); + + // (b) Magic present but truncated below the header ⇒ Corruption. + let mut trunc = MAGIC.to_vec(); + trunc.push(ENVELOPE_VERSION); // no scheme byte + assert!(matches!( + unwrap(&wid(1), "seed", None, &trunc).unwrap_err(), + SecretStoreError::Corruption + )); + + // (c) Magic OK but version = 2 ⇒ UnsupportedEnvelopeVersion{2}, + // regardless of password. + let mut v2 = wrap_bytes(&wid(1), "seed", None, b"x"); + v2[O_VERSION] = 2; + for arg in [None, Some(&p)] { + assert!(matches!( + unwrap(&wid(1), "seed", arg, &v2).unwrap_err(), + SecretStoreError::UnsupportedEnvelopeVersion { found: 2 } + )); + } + + // (d) Magic+version OK but unknown scheme = 9 ⇒ fail closed. + let mut s9 = wrap_bytes(&wid(1), "seed", None, b"x"); + s9[O_SCHEME] = 9; + assert!(matches!( + unwrap(&wid(1), "seed", None, &s9).unwrap_err(), + SecretStoreError::UnsupportedEnvelopeVersion { found: 1 } + )); + } + + /// Non-vacuity helper for the strict read (used here and by the store + /// tests): a scheme-0 blob carrying `secret` DOES decode under `None`. + #[test] + fn scheme0_some_password_fails_closed_strip() { + let blob = wrap_bytes(&wid(1), "seed", None, b"attacker-seed"); + // None ⇒ it WOULD decode to the (attacker) bytes… + assert_eq!( + unwrap(&wid(1), "seed", None, &blob) + .unwrap() + .expose_secret(), + b"attacker-seed" + ); + // …but Some(pw) ⇒ ExpectedProtectedButUnsealed, no bytes. + assert!(matches!( + unwrap(&wid(1), "seed", Some(&pw("pw")), &blob).unwrap_err(), + SecretStoreError::ExpectedProtectedButUnsealed + )); + } + + /// `ct_eq` sanity: a round-tripped secret matches the original under a + /// constant-time compare (no `==` on secret bytes). + #[test] + fn round_trip_is_constant_time_equal() { + let p = pw("pw"); + let original = SecretBytes::from_slice(b"seed material"); + let blob = wrap_p(&wid(1), "seed", Some(&p), original.expose_secret(), floor()); + let got = unwrap(&wid(1), "seed", Some(&p), &blob).unwrap(); + assert!(bool::from(got.ct_eq(&original))); + } + + /// Deterministic byte-level fuzz. Every mutant unwrap is a + /// clean `Ok` or a TYPED `SecretStoreError` — never a panic, never + /// plaintext from a tag-failing branch. The `None` path (no Argon2 + /// derivation) runs the full 2000 mutants + every truncation; the + /// `Some(pw)` path — each mutant of which may trigger a real Argon2 + /// derive — runs a representative subset so the suite stays fast while + /// still exercising the derive/open code path. + #[test] + fn fuzz_byte_mutation_never_panics() { + let p = pw("fuzz-pw"); + let valid = wrap_p(&wid(0xAB), "seed", Some(&p), b"seed-bytes", floor()); + // The pristine envelope unwraps. + assert_eq!( + unwrap(&wid(0xAB), "seed", Some(&p), &valid) + .unwrap() + .expose_secret(), + b"seed-bytes" + ); + + // xorshift32 — deterministic, std-only. + let mut state: u32 = 0x9E37_79B9; + let mut next = || { + state ^= state << 13; + state ^= state >> 17; + state ^= state << 5; + state + }; + + let assert_typed = |arg: Option<&SecretString>, buf: &[u8]| { + let res = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + unwrap(&wid(0xAB), "seed", arg, buf) + })) + .expect("unwrap must never panic on hostile input"); + match res { + Ok(_) + | Err(SecretStoreError::Corruption) + | Err(SecretStoreError::WrongPassword) + | Err(SecretStoreError::NeedsPassword) + | Err(SecretStoreError::ExpectedProtectedButUnsealed) + | Err(SecretStoreError::UnsupportedEnvelopeVersion { .. }) + | Err(SecretStoreError::KdfFailure) => {} + Err(other) => panic!("unexpected error variant: {other:?}"), + } + }; + + for i in 0..2_000 { + let mut buf = valid.clone(); + let flips = 1 + (next() % 4) as usize; + for _ in 0..flips { + let idx = (next() as usize) % buf.len(); + buf[idx] ^= (next() & 0xFF) as u8; + } + // None path every iteration (cheap, no derive). + assert_typed(None, &buf); + // Some path on a representative subset (each may derive Argon2). + if i % 16 == 0 { + assert_typed(Some(&p), &buf); + } + } + + // Truncation at every offset — a short read must never panic. + for cut in 0..valid.len() { + assert_typed(None, &valid[..cut]); + assert_typed(Some(&p), &valid[..cut]); + } + } +} diff --git a/packages/rs-platform-wallet-storage/src/secrets/error.rs b/packages/rs-platform-wallet-storage/src/secrets/error.rs index 506814cd49f..6c69e9e8b93 100644 --- a/packages/rs-platform-wallet-storage/src/secrets/error.rs +++ b/packages/rs-platform-wallet-storage/src/secrets/error.rs @@ -1,24 +1,10 @@ //! Secret-store error taxonomy and its `keyring_core::Error` projection. //! -//! One concrete `thiserror` enum shared by both -//! [`SecretStore`](crate::secrets::SecretStore) backends (the encrypted -//! file vault and the OS keyring), no `#[non_exhaustive]`, **no** secret -//! byte, passphrase, plaintext, or stringified source that could carry -//! one in any variant. `#[error]` strings are static + structural; only -//! non-secret diagnostics (POSIX mode bits, header version int, vault -//! path) are carried as typed fields (CWE-209/CWE-532). -//! -//! The `EncryptedFileStore` surfaces this enum at its construction / -//! `rekey` API; its `keyring_core::api::CredentialApi` / -//! `CredentialStoreApi` impls project it into `keyring_core::Error` via -//! [`From`] so SPI callers see a uniform error. The `WrongPassphrase` / -//! `AlreadyLocked` variants box the typed `SecretStoreError` as the -//! `NoStorageAccess` source, so an SPI consumer can recover them -//! losslessly via `source().downcast_ref::()`; the -//! `BadStoreFormat` group has no box slot and carries only a secret-free -//! string. Either way, the fully typed path is the public -//! [`SecretStore`](crate::secrets::SecretStore) API, which returns -//! `SecretStoreError` directly. +//! Variants carry only non-secret diagnostics (POSIX mode bits, header +//! version, vault path) — never a secret byte, passphrase, plaintext, or +//! stringified source (CWE-209/CWE-532). The public, fully-typed path is +//! the [`SecretStore`](crate::secrets::SecretStore) API; the SPI +//! projection into `keyring_core::Error` is lossy (see the [`From`] impl). use std::path::Path; @@ -34,16 +20,49 @@ pub enum SecretStoreError { #[error("wrong passphrase")] WrongPassphrase, - /// AEAD tag failure on a stored entry (or a rekey re-encrypt) *after* - /// the header verify-token already passed: the entry ciphertext is - /// corrupt or tampered, **not** a wrong passphrase. Carries no - /// plaintext (CWE-347). + /// Tier-2 strip/downgrade guard: the caller asserted — by supplying an object + /// password — that this object MUST be password-protected, but the + /// stored value is a well-formed UNPROTECTED envelope (scheme-0) or a + /// legacy magic-less raw value, i.e. a strip/downgrade. **Fails + /// closed:** the stored bytes are NEVER returned (CWE-757/CWE-345). + #[error("expected a password-protected secret but the stored value is unprotected")] + ExpectedProtectedButUnsealed, + + /// Tier-2: a valid password-protected (scheme-1) envelope was read + /// with NO object password supplied. Never returns ciphertext. + #[error("secret is password-protected; a password is required")] + NeedsPassword, + + /// Tier-2: the object password failed the envelope's AEAD tag. Carries + /// **no** plaintext and no source (CWE-347). Distinct from + /// [`WrongPassphrase`] (the Tier-1 vault passphrase). On the + /// [`SecretStore::Os`] arm a tag failure may also indicate keychain + /// corruption rather than a wrong password — documented in + /// `SECRETS.md`; one AEAD tag cannot disambiguate the two. + /// + /// [`WrongPassphrase`]: SecretStoreError::WrongPassphrase + /// [`SecretStore::Os`]: crate::secrets::SecretStore::Os + #[error("wrong object password")] + WrongPassword, + + /// A vault passphrase (Tier-1 `open`/`rekey`) or an object password + /// (Tier-2 enrol) was blank — empty or all-whitespace — rejected via + /// [`SecretString::is_blank`]. CWE-521. + /// + /// [`SecretString::is_blank`]: crate::secrets::SecretString::is_blank + #[error( + "passphrase must not be blank; for a deliberately keyless file vault use open_unprotected" + )] + BlankPassphrase, + + /// AEAD tag failure on a stored entry (or rekey re-encrypt) *after* + /// the header verify-token passed: the entry ciphertext is corrupt or + /// tampered, **not** a wrong passphrase. No plaintext (CWE-347). #[error("vault entry failed integrity check (corruption or tampering)")] Corruption, - /// Argon2 key derivation failed. The upstream error carries no - /// useful non-secret diagnostic, so it is intentionally not - /// embedded. + /// Argon2 key derivation failed. The upstream error carries no useful + /// non-secret diagnostic, so it is not embedded. #[error("key derivation failed")] KdfFailure, @@ -55,6 +74,22 @@ pub enum SecretStoreError { found: u32, }, + /// A Tier-2 secret envelope carried the magic but a `version` (or, at a + /// known version, a `scheme`) this build does not understand. Fails + /// closed REGARDLESS of the password argument — an unparseable future + /// format can be neither safely unwrapped nor safely treated as + /// unprotected, so it is refused both ways. Mirrors + /// [`VersionUnsupported`] for the vault format. + /// + /// [`VersionUnsupported`]: SecretStoreError::VersionUnsupported + #[error("unsupported secret envelope version {found}")] + UnsupportedEnvelopeVersion { + /// The envelope `version` byte read from the (unauthenticated) + /// header. An unknown `scheme` under a known version reports the + /// known version byte (a forward-incompatible scheme). + found: u8, + }, + /// The vault file was malformed (bad magic, truncated header, bad /// record framing) — no plaintext was produced. #[error("malformed vault file")] @@ -73,13 +108,35 @@ pub enum SecretStoreError { mode: u32, }, - /// The vault sidecar (`.lock`) is already held by - /// another `EncryptedFileStore` handle — in this process or in - /// another process. The resident-vault model requires exclusive - /// ownership of the vault file for the store's lifetime, so the - /// second `open()` fails fast (no retry, no wait budget). Drop the - /// other handle, or wait for the other process to exit, and retry. - /// A recoverable runtime state, not a logic bug. + /// The vault file's parent directory was group/other WRITABLE + /// (`mode & 0o022 != 0`). Directory write governs rename/unlink, so a + /// writable parent lets another local user swap the vault despite its + /// own `0600`. Read-only group access (`0o750`) is fine — it leaks + /// filenames, not the 0600-protected contents. + #[error("vault parent directory has insecure permissions")] + InsecureParentDir { + /// The offending POSIX mode bits on the parent directory (not + /// secret). + mode: u32, + }, + + /// A secret offered for storage exceeded the per-secret write cap + /// ([`MAX_SECRET_LEN`](crate::secrets::MAX_SECRET_LEN)). Rejected at + /// the write boundary so an oversized entry never inflates the shared + /// vault past the read-side ceiling and bricks every wallet on reopen. + #[error("secret exceeds maximum size of {max} bytes (got {found})")] + SecretTooLarge { + /// The offered secret length (bytes). + found: usize, + /// The compiled-in per-secret ceiling (bytes). + max: usize, + }, + + /// The vault sidecar (`.lock`) is already held by another + /// `EncryptedFileStore` handle in this or another process. The + /// resident-vault model needs exclusive ownership for the store's + /// lifetime, so a second `open()` fails fast (no retry). Recoverable: + /// drop the other handle and retry. #[error("vault is already locked by another store handle")] AlreadyLocked, @@ -95,28 +152,35 @@ pub enum SecretStoreError { max: u64, }, - /// Internal AEAD tag failure with no vault context yet attached. The - /// crypto seam (`crypto::open`) cannot tell *why* a tag failed, so it - /// returns this; callers translate it to [`WrongPassphrase`] (in the - /// verify-token context) or [`Corruption`] (in an entry context). - /// Never escapes to the SPI / public surface. + /// Internal AEAD tag failure with no vault context attached: + /// `crypto::open` cannot tell *why* a tag failed, so callers translate + /// this to [`WrongPassphrase`] (verify-token context) or + /// [`Corruption`] (entry context). Never escapes to the SPI surface. /// /// [`WrongPassphrase`]: SecretStoreError::WrongPassphrase /// [`Corruption`]: SecretStoreError::Corruption #[error("decryption/integrity check failed")] Decrypt, + /// AEAD encrypt-side failure (cipher construction or `encrypt`). + /// Effectively unreachable — the key is always 32 bytes and plaintext + /// never approaches XChaCha20's ~256 GiB limit — but kept typed so a + /// write failure is never mislabeled a [`KdfFailure`]. + /// + /// [`KdfFailure`]: SecretStoreError::KdfFailure + #[error("encryption failed")] + Encrypt, + /// Filesystem error (open / write / rename / fsync). The inner - /// [`IoError`] carries an OS code and, when the failing operation - /// knew it, the *non-secret* path it was operating on — a - /// caller-supplied filesystem path, never a secret byte. + /// [`IoError`] carries an OS code and, when known, the *non-secret* + /// caller-supplied path — never a secret byte. #[error("{0}")] Io(#[from] IoError), - /// An OS-keyring backend (the [`SecretStore::Os`] arm) failure, - /// projected to a non-secret discriminant. Keyring variants that - /// carry raw bytes (`BadEncoding`, `BadDataFormat`) are collapsed to - /// [`OsKeyringErrorKind::BadStoreFormat`] — their bytes never enter + /// An OS-keyring backend ([`SecretStore::Os`] arm) failure, projected + /// to a non-secret discriminant. Byte-bearing keyring variants + /// (`BadEncoding`, `BadDataFormat`) collapse to + /// [`OsKeyringErrorKind::BadStoreFormat`]; their bytes never enter /// this type (CWE-209/CWE-532). /// /// [`SecretStore::Os`]: crate::secrets::SecretStore::Os @@ -128,11 +192,9 @@ pub enum SecretStoreError { } impl SecretStoreError { - /// Build an [`Io`](SecretStoreError::Io) error that names the - /// non-secret filesystem `path` the failing operation touched. - /// Use at the vault read / write / lock seams where the path is - /// known; the bare `?`/`From` conversion (path - /// unknown) stays available for the deep helpers. + /// Build an [`Io`](SecretStoreError::Io) error naming the non-secret + /// `path` the failing operation touched. Use at the read/write/lock + /// seams; deep helpers can still use the bare `?` (path unknown). pub(crate) fn io_at(path: &Path, source: std::io::Error) -> Self { Self::Io(IoError { path: Some(path.to_path_buf()), @@ -142,14 +204,12 @@ impl SecretStoreError { } /// Filesystem-error payload for [`SecretStoreError::Io`]. Wraps the OS -/// [`std::io::Error`] and, when the failing operation knew it, the -/// non-secret path it was operating on. `From` is -/// derived so a bare `?` still works (path defaults to `None`); the -/// path-aware seams attach it via [`SecretStoreError::io_at`]. +/// [`std::io::Error`] plus the non-secret path, when known. A bare `?` +/// works (path `None`); path-aware seams use [`SecretStoreError::io_at`]. #[derive(Debug, thiserror::Error)] pub struct IoError { - /// The non-secret filesystem path, when the failing operation knew - /// it. A caller-supplied path, never a secret. + /// The non-secret caller-supplied path, when the failing operation + /// knew it. pub path: Option, /// The underlying OS error. pub source: std::io::Error, @@ -171,8 +231,8 @@ impl From for IoError { } /// Non-secret discriminant for an OS-keyring backend failure, projected -/// from `keyring_core::Error` for the [`SecretStore::Os`] arm. Carries no -/// payload, so no secret byte, path, or attribute value can ride along. +/// from `keyring_core::Error` for the [`SecretStore::Os`] arm. Payload- +/// less, so no secret byte / path / attribute value can ride along. /// /// [`SecretStore::Os`]: crate::secrets::SecretStore::Os #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -210,56 +270,66 @@ impl From for SecretStoreError { } } -/// Bare `?` on a [`std::io::Error`] inside a function returning -/// [`SecretStoreError`] threads through [`IoError`] (path `None`); the -/// path-aware seams call [`SecretStoreError::io_at`] instead. +/// Bare `?` on an [`std::io::Error`] threads through [`IoError`] with +/// path `None`; path-aware seams call [`SecretStoreError::io_at`]. impl From for SecretStoreError { fn from(source: std::io::Error) -> Self { Self::Io(IoError::from(source)) } } -/// Project a [`SecretStoreError`] into `keyring_core::Error` for the -/// `CredentialApi` / `CredentialStoreApi` SPI seam. +/// Project a [`SecretStoreError`] into `keyring_core::Error` for the SPI +/// seam. Lossy by design — the lossless typed path is the +/// [`SecretStore`](crate::secrets::SecretStore) API. /// -/// - [`WrongPassphrase`] and [`AlreadyLocked`] ride in -/// [`KeyringError::NoStorageAccess`] (operator UX: "ask the operator to -/// unlock / retry") with the typed `SecretStoreError` boxed as the -/// source, so an SPI consumer can losslessly recover the variant via +/// - [`WrongPassphrase`] / [`AlreadyLocked`] and the Tier-2 credential / +/// protection states ([`NeedsPassword`], [`WrongPassword`], +/// [`ExpectedProtectedButUnsealed`], [`BlankPassphrase`]) ride in +/// [`KeyringError::NoStorageAccess`] with the typed error boxed as the +/// source, recoverable via /// `err.source().and_then(|s| s.downcast_ref::())`. -/// - [`Corruption`], [`KdfFailure`], [`VersionUnsupported`], -/// [`MalformedVault`], [`InsecurePermissions`], the internal -/// [`Decrypt`], and [`OsKeyring`] collapse into -/// [`KeyringError::BadStoreFormat`], whose `String` payload has no box -/// slot, so they carry only a static secret-free string (never secret -/// data in a format error). They remain losslessly typed on the -/// [`SecretStore`](crate::secrets::SecretStore) path. -/// - [`InvalidLabel`] becomes `KeyringError::Invalid("user", _)`. -/// - [`Io`] becomes [`KeyringError::PlatformFailure`]. +/// These are all "the caller must act on a credential/expectation to +/// proceed" states, so lossless recovery lets an SPI consumer react +/// precisely. +/// - The format/crypto group — including [`UnsupportedEnvelopeVersion`] +/// (a fail-closed forward-format incompatibility, mirroring +/// [`VersionUnsupported`]) — collapses into +/// [`KeyringError::BadStoreFormat`] (a static secret-free string — that +/// variant has no box slot). +/// - [`InvalidLabel`] → `KeyringError::Invalid("user", _)`; +/// [`Io`] → [`KeyringError::PlatformFailure`]. /// /// [`WrongPassphrase`]: SecretStoreError::WrongPassphrase /// [`AlreadyLocked`]: SecretStoreError::AlreadyLocked -/// [`Corruption`]: SecretStoreError::Corruption -/// [`KdfFailure`]: SecretStoreError::KdfFailure +/// [`NeedsPassword`]: SecretStoreError::NeedsPassword +/// [`WrongPassword`]: SecretStoreError::WrongPassword +/// [`ExpectedProtectedButUnsealed`]: SecretStoreError::ExpectedProtectedButUnsealed +/// [`BlankPassphrase`]: SecretStoreError::BlankPassphrase +/// [`UnsupportedEnvelopeVersion`]: SecretStoreError::UnsupportedEnvelopeVersion /// [`VersionUnsupported`]: SecretStoreError::VersionUnsupported -/// [`MalformedVault`]: SecretStoreError::MalformedVault -/// [`InsecurePermissions`]: SecretStoreError::InsecurePermissions -/// [`Decrypt`]: SecretStoreError::Decrypt -/// [`OsKeyring`]: SecretStoreError::OsKeyring /// [`InvalidLabel`]: SecretStoreError::InvalidLabel /// [`Io`]: SecretStoreError::Io impl From for KeyringError { fn from(e: SecretStoreError) -> Self { use SecretStoreError as E; match e { - E::WrongPassphrase | E::AlreadyLocked => KeyringError::NoStorageAccess(Box::new(e)), + E::WrongPassphrase + | E::AlreadyLocked + | E::NeedsPassword + | E::WrongPassword + | E::ExpectedProtectedButUnsealed + | E::BlankPassphrase => KeyringError::NoStorageAccess(Box::new(e)), E::Corruption | E::KdfFailure | E::VersionUnsupported { .. } + | E::UnsupportedEnvelopeVersion { .. } | E::MalformedVault | E::InsecurePermissions { .. } + | E::InsecureParentDir { .. } + | E::SecretTooLarge { .. } | E::VaultTooLarge { .. } | E::Decrypt + | E::Encrypt | E::OsKeyring { .. } => KeyringError::BadStoreFormat(e.to_string()), E::InvalidLabel => { KeyringError::Invalid("user".to_string(), "label allowlist violation".to_string()) @@ -289,10 +359,20 @@ mod tests { for e in [ SecretStoreError::Corruption, SecretStoreError::Decrypt, + SecretStoreError::Encrypt, SecretStoreError::KdfFailure, SecretStoreError::VersionUnsupported { found: 999 }, SecretStoreError::MalformedVault, SecretStoreError::InsecurePermissions { mode: 0o644 }, + SecretStoreError::InsecureParentDir { mode: 0o777 }, + SecretStoreError::SecretTooLarge { + found: 100, + max: 10, + }, + SecretStoreError::VaultTooLarge { + found: 100, + max: 10, + }, ] { let k: KeyringError = e.into(); assert!(matches!(k, KeyringError::BadStoreFormat(_))); @@ -316,9 +396,6 @@ mod tests { #[test] fn io_at_names_path_in_display_without_leaking_secret() { - // The path-aware Io error renders the offending path so operators - // can see which file failed; the source message rides along, but - // no secret byte does (the path is caller-supplied). let err = SecretStoreError::io_at( std::path::Path::new("/var/lib/wallet/vault.pwsvault"), std::io::Error::new(std::io::ErrorKind::PermissionDenied, "denied"), @@ -351,9 +428,6 @@ mod tests { #[test] fn wrong_passphrase_is_recoverable_from_no_storage_access_source() { - // WrongPassphrase / AlreadyLocked box the typed SecretStoreError - // as the NoStorageAccess source, so an SPI consumer recovers the - // variant losslessly via `source().downcast_ref::()`. use std::error::Error as _; for original in [ SecretStoreError::WrongPassphrase, @@ -382,6 +456,102 @@ mod tests { assert!(!format!("{k}").contains("plaintext")); } + /// The five new variants exist, are constructable, render + /// distinct non-empty messages, and the Tier-2 `WrongPassword` is NOT + /// the Tier-1 `WrongPassphrase` (nor is the unseal error `Corruption`). + #[test] + fn new_variants_exist_and_are_distinct() { + use SecretStoreError as E; + assert_ne!(E::WrongPassword.to_string(), E::WrongPassphrase.to_string()); + assert_ne!( + E::ExpectedProtectedButUnsealed.to_string(), + E::Corruption.to_string() + ); + let msgs: std::collections::HashSet = [ + E::NeedsPassword.to_string(), + E::WrongPassword.to_string(), + E::BlankPassphrase.to_string(), + E::ExpectedProtectedButUnsealed.to_string(), + E::UnsupportedEnvelopeVersion { found: 2 }.to_string(), + ] + .into_iter() + .collect(); + assert_eq!(msgs.len(), 5, "all five messages must be distinct"); + } + + /// Display + Debug render static, secret-free text. The + /// version variant surfaces the (non-secret) version byte and nothing + /// more. + #[test] + fn new_variants_carry_no_secret_in_display() { + use SecretStoreError as E; + assert_eq!( + E::NeedsPassword.to_string(), + "secret is password-protected; a password is required" + ); + assert_eq!(E::WrongPassword.to_string(), "wrong object password"); + assert_eq!( + E::BlankPassphrase.to_string(), + "passphrase must not be blank; for a deliberately keyless file vault use open_unprotected" + ); + assert_eq!( + E::ExpectedProtectedButUnsealed.to_string(), + "expected a password-protected secret but the stored value is unprotected" + ); + assert_eq!( + E::UnsupportedEnvelopeVersion { found: 7 }.to_string(), + "unsupported secret envelope version 7" + ); + // Debug is non-empty and free of plaintext-ish tokens for all. + for e in [ + E::NeedsPassword, + E::WrongPassword, + E::BlankPassphrase, + E::ExpectedProtectedButUnsealed, + E::UnsupportedEnvelopeVersion { found: 7 }, + ] { + let rendered = format!("{e} {e:?}"); + assert!(!rendered.contains("plaintext")); + } + } + + /// The four Tier-2 credential / + /// protection states project to a recoverable `NoStorageAccess` with + /// the typed error losslessly downcast-able, leaking no secret. + #[test] + fn tier2_state_errors_project_to_recoverable_no_storage_access() { + for original in [ + SecretStoreError::NeedsPassword, + SecretStoreError::WrongPassword, + SecretStoreError::ExpectedProtectedButUnsealed, + SecretStoreError::BlankPassphrase, + ] { + let want = original.to_string(); + let k: KeyringError = original.into(); + assert!(!format!("{k}").contains("plaintext")); + match &k { + KeyringError::NoStorageAccess(src) => { + let recovered = src.downcast_ref::(); + assert!( + matches!(recovered, Some(e) if e.to_string() == want), + "expected recoverable {want}, got {recovered:?}" + ); + } + other => panic!("expected NoStorageAccess for {want}, got {other:?}"), + } + } + } + + /// `UnsupportedEnvelopeVersion` projects to the + /// secret-free `BadStoreFormat` group (forward-format incompat, + /// mirroring `VersionUnsupported`). + #[test] + fn unsupported_envelope_version_projects_to_bad_store_format() { + let k: KeyringError = SecretStoreError::UnsupportedEnvelopeVersion { found: 9 }.into(); + assert!(matches!(k, KeyringError::BadStoreFormat(_))); + assert!(!format!("{k}").contains("plaintext")); + } + #[test] fn os_keyring_projects_to_bad_store_format() { let k: KeyringError = SecretStoreError::OsKeyring { diff --git a/packages/rs-platform-wallet-storage/src/secrets/file/crypto.rs b/packages/rs-platform-wallet-storage/src/secrets/file/crypto.rs index 3205db672f5..4bb0a350b64 100644 --- a/packages/rs-platform-wallet-storage/src/secrets/file/crypto.rs +++ b/packages/rs-platform-wallet-storage/src/secrets/file/crypto.rs @@ -19,11 +19,10 @@ pub(crate) const ARGON2_MIN_T: u32 = 2; pub(crate) const ARGON2_P: u32 = 1; /// Argon2 parameter ceilings. Vault `kdf` params are attacker- -/// controllable JSON, so an oversized `m_kib`/`t` would let a crafted -/// vault force a multi-GiB allocation or an unbounded-time derivation (a -/// DoS) before any tag check. 1 GiB memory and 16 passes bound the cost -/// well above the shipped default (64 MiB, t=3) yet far below an -/// exhaustion threshold. +/// controllable JSON, so without a cap an oversized `m_kib`/`t` could +/// force a multi-GiB allocation or unbounded derivation (DoS) before any +/// tag check. 1 GiB / 16 passes is well above the default, far below +/// exhaustion. pub(crate) const ARGON2_MAX_M_KIB: u32 = 1_048_576; pub(crate) const ARGON2_MAX_T: u32 = 16; @@ -43,11 +42,10 @@ pub(crate) fn random_bytes(buf: &mut [u8]) -> Result<(), SecretStoreError> { getrandom(buf).map_err(|_| SecretStoreError::KdfFailure) } -/// Argon2id parameters as stored in / read from the vault. Serializes -/// directly to the on-disk `kdf` object — `id` discriminates the KDF -/// algorithm (only [`KDF_ID_ARGON2ID`] is accepted today), validated -/// alongside the parameter ranges in [`KdfParams::enforce_bounds`]. -/// `deny_unknown_fields` fails closed on a stray sibling (C3). +/// Argon2id parameters stored in the on-disk `kdf` object. `id` +/// discriminates the algorithm (only [`KDF_ID_ARGON2ID`] today), +/// validated with the parameter ranges in [`KdfParams::enforce_bounds`]. +/// `deny_unknown_fields` fails closed on a stray sibling. #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] #[serde(deny_unknown_fields)] pub(crate) struct KdfParams { @@ -68,13 +66,11 @@ impl KdfParams { } } - /// Reject params outside the accepted bounds before any derivation - /// or allocation runs. The lower bound refuses a downgraded vault; - /// the upper bound refuses an inflated vault from an - /// attacker-controllable JSON file that would otherwise force a - /// huge allocation / unbounded derivation ahead of any tag check. - /// An unknown algorithm `id` is also a bounds failure — Argon2id is - /// the only KDF family this version supports. + /// Reject out-of-bounds params before any derivation/allocation: the + /// lower bound refuses a downgraded vault, the upper bound an inflated + /// one (huge allocation / unbounded derivation ahead of any tag + /// check). An unknown algorithm `id` also fails — Argon2id is the only + /// supported family. pub(crate) fn enforce_bounds(&self) -> Result<(), SecretStoreError> { if self.id != KDF_ID_ARGON2ID || self.m_kib < ARGON2_MIN_M_KIB @@ -89,20 +85,20 @@ impl KdfParams { } } -/// Derive a 32-byte AEAD key from `passphrase` + `salt` with Argon2id. -/// Output lands directly in a [`SecretBytes`]. +/// Derive a 32-byte AEAD key from `passphrase` + `salt` with Argon2id, +/// landing directly in a [`SecretBytes`]. Takes `&SecretString` so the +/// bare-byte passphrase view lives only inside this function. /// -/// Takes `&SecretString` directly so the bare-byte view of the -/// passphrase lives only inside this function — callers can no -/// longer accidentally hand a `&[u8]` (e.g. by holding a stray -/// `expose_secret().as_bytes()` longer than intended) into KDF input. +/// Zeroization residual: argon2 0.5.3's `zeroize` feature wipes +/// `initial_hash` / `blockhash` but NOT the bulk `Block` matrix (up to +/// `m_kib` of derived state). Accepted residual against A5 (swap / +/// core-dump while unlocked); closing it needs an upstream fix. pub(crate) fn derive_key( passphrase: &SecretString, - salt: &[u8], + salt: &[u8; SALT_LEN], params: KdfParams, ) -> Result { - // Bounds MUST gate before Params::new / hash_password_into so an - // inflated m_kib never reaches the allocator. + // Bounds MUST gate first so an inflated m_kib never reaches the allocator. params.enforce_bounds()?; let argon_params = Params::new(params.m_kib, params.t, params.p, Some(KEY_LEN)) .map_err(|_| SecretStoreError::KdfFailure)?; @@ -126,7 +122,7 @@ pub(crate) fn seal( plaintext: &[u8], ) -> Result<([u8; NONCE_LEN], Vec), SecretStoreError> { let cipher = XChaCha20Poly1305::new_from_slice(key.expose_secret()) - .map_err(|_| SecretStoreError::KdfFailure)?; + .map_err(|_| SecretStoreError::Encrypt)?; let mut nonce_bytes = [0u8; NONCE_LEN]; random_bytes(&mut nonce_bytes)?; let nonce = XNonce::from_slice(&nonce_bytes); @@ -138,11 +134,9 @@ pub(crate) fn seal( aad, }, ) - // Encrypt-path failure (XChaCha20-Poly1305 only fails here when - // the plaintext exceeds the construction's length limit), so it is - // not a decryption concern; keep it on the same write-oriented - // variant the cipher-construction failure above uses. - .map_err(|_| SecretStoreError::KdfFailure)?; + // AEAD write-side failure (only when plaintext exceeds the length + // limit), not a key-derivation one. + .map_err(|_| SecretStoreError::Encrypt)?; Ok((nonce_bytes, ct)) } @@ -157,7 +151,7 @@ pub(crate) fn open( ciphertext: &[u8], ) -> Result { let cipher = XChaCha20Poly1305::new_from_slice(key.expose_secret()) - .map_err(|_| SecretStoreError::KdfFailure)?; + .map_err(|_| SecretStoreError::Encrypt)?; let nonce = XNonce::from_slice(nonce); let pt = cipher .decrypt( @@ -175,6 +169,10 @@ pub(crate) fn open( mod tests { use super::*; + // Compile-time guard: argon2's `impl Zeroize for Block` is feature- + // gated, so this fails to build if `argon2/zeroize` is ever dropped. + static_assertions::assert_impl_all!(argon2::Block: zeroize::Zeroize); + /// Argon2id floor params — fast enough for unit tests; production /// runs at the default target (64 MiB). fn floor_params() -> KdfParams { @@ -253,10 +251,8 @@ mod tests { #[test] fn derive_key_rejects_inflated_m_kib_before_allocating() { - // u32::MAX m_kib must error fast (enforce_bounds) and never reach - // the multi-GiB allocator. A real allocation of ~4 TiB would OOM - // the test, so reaching here at all proves the ceiling fired - // first. + // u32::MAX m_kib must error via enforce_bounds before the ~4 TiB + // allocation — which would OOM the test if it ever ran. let err = derive_key( &SecretString::new("pw"), &[0u8; SALT_LEN], diff --git a/packages/rs-platform-wallet-storage/src/secrets/file/format.rs b/packages/rs-platform-wallet-storage/src/secrets/file/format.rs index d188658dd3b..bb6d90769e3 100644 --- a/packages/rs-platform-wallet-storage/src/secrets/file/format.rs +++ b/packages/rs-platform-wallet-storage/src/secrets/file/format.rs @@ -1,10 +1,7 @@ //! Versioned, self-describing vault format + canonical AAD. //! -//! The vault is one `serde_json` document covering every wallet in the -//! store: a single passphrase / salt / KDF block at the top, and a -//! nested map keyed first by `wallet_id` (lowercase hex) and then by -//! `label`. One file, one passphrase, one lock — a multi-wallet store -//! cannot lock its other wallets out by construction. +//! The vault is one `serde_json` document: a single salt / KDF block at +//! the top, then a map keyed by `wallet_id` (lowercase hex) and `label`. //! //! ```json //! { @@ -21,22 +18,18 @@ //! } //! ``` //! -//! Entries are nested `BTreeMap`s so lookup is O(log n) and the on-disk -//! shape excludes duplicate `(wallet_id, label)` pairs by construction -//! (a JSON object cannot carry two values under the same key). +//! Nested `BTreeMap`s give O(log n) lookup and a JSON-object shape that +//! excludes duplicate `(wallet_id, label)` pairs by construction. //! //! Parsing is two-step: a lax [`VersionProbe`] reads `version` first -//! (tolerating future-version sibling fields), then — only for the -//! compiled-in [`FORMAT_VERSION`] — the strict [`Vault`] payload is -//! parsed. All byte fields are lowercase hex; Argon2 params are JSON -//! numbers. +//! (tolerating future-version siblings), then the strict [`Vault`] +//! payload is parsed only for the compiled-in [`FORMAT_VERSION`]. //! -//! KDF params/salt are store-wide. `verify_ct` is an AEAD seal of a -//! fixed constant under the header-derived key — a wrong passphrase -//! fails its tag, so a mismatched key is rejected before any entry is -//! written or read (no mixed-key corruption). The verify-token AAD is -//! NOT bound to any wallet id (the store is now multi-wallet) so the -//! token validates the store-wide passphrase exactly once per op. +//! `verify_ct` is an AEAD seal of a fixed constant under the +//! header-derived key, so a wrong passphrase fails its tag and a +//! mismatched key is rejected before any entry is touched (no mixed-key +//! corruption). The verify-token AAD is not bound to any wallet id, so it +//! validates the store-wide passphrase once per op. use std::collections::BTreeMap; @@ -58,12 +51,10 @@ pub(crate) const VERIFY_CONSTANT: &[u8] = b"PWSVAULT-VERIFY-v1"; /// alias a real entry's AAD. pub(crate) const VERIFY_LABEL: &str = "\0verify"; -/// Sentinel wallet id used as the verify-token AAD's wallet slot. The -/// store-wide token is not bound to any real wallet; this 32-byte zero -/// id keeps the AAD shape identical to entry AAD (same length-prefixed -/// construction) without aliasing a real wallet's namespace — a real -/// wallet id `[0u8; 32]` would still produce a different AAD because -/// the label slot differs ([`VERIFY_LABEL`] vs any allowlisted label). +/// Sentinel wallet id for the verify-token AAD slot. Keeps the AAD shape +/// identical to entry AAD without aliasing a real wallet: even a real +/// `[0u8; 32]` id yields a different AAD because [`VERIFY_LABEL`] differs +/// from any allowlisted label. const VERIFY_WALLET_ID: [u8; 32] = [0u8; 32]; /// Minimum AEAD ciphertext length: the Poly1305 tag is always present @@ -71,17 +62,11 @@ const VERIFY_WALLET_ID: [u8; 32] = [0u8; 32]; /// than this is structurally impossible and rejected. const AEAD_TAG_LEN: usize = 16; -/// The full parsed vault: format `version`, KDF parameters, salt, the -/// passphrase-verification token, and every wallet's entries. -/// Serializes directly to the on-disk wire form — `hex_array` validates -/// `salt`/`verify_nonce` widths at the serde seam, so no parallel -/// `Vec`-typed wire mirror is needed. Field order matches the -/// documented schema and `serde_json` preserves it, so the byte layout -/// is stable. -/// -/// `deny_unknown_fields` fails closed on a stray sibling for this -/// compiled-in [`FORMAT_VERSION`] (C3). Forward-compat dispatch on -/// `version` runs through [`VersionProbe`] before this strict parse. +/// The full parsed vault, serializing directly to the on-disk wire form. +/// `hex_array` validates fixed-width fields at the serde seam, and +/// `serde_json` preserves field order, so the byte layout is stable. +/// `deny_unknown_fields` fails closed on a stray sibling; forward-compat +/// dispatch runs through [`VersionProbe`] before this strict parse. #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(deny_unknown_fields)] pub(crate) struct Vault { @@ -99,12 +84,9 @@ pub(crate) struct Vault { pub wallets: BTreeMap>, } -/// One decrypted-on-demand vault entry body. The owning -/// `Vault.wallets[wallet]` `BTreeMap` keys this by `label`, so the -/// label is the map key — not a field — and the on-disk shape can't -/// carry two entries under the same label. `hex_array` validates -/// `nonce`'s fixed width at parse; `deny_unknown_fields` fails closed -/// on a stray sibling (C3). +/// One vault entry body, keyed by `label` in the owning `BTreeMap` (so +/// the label is the map key, not a field). `hex_array` validates `nonce`'s +/// width at parse; `deny_unknown_fields` fails closed on a stray sibling. #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(deny_unknown_fields)] pub(crate) struct EntryBody { @@ -115,15 +97,13 @@ pub(crate) struct EntryBody { } /// Canonical length-prefixed AAD binding ciphertext to its slot: -/// `format_version ‖ wallet_id ‖ label`. A blob moved to another slot, -/// or a rolled-back `format_version`, fails the tag. +/// `format_version ‖ wallet_id ‖ label`. A blob moved to another slot, or +/// a rolled-back `format_version`, fails the tag. /// -/// AAD-DETERMINISM INVARIANT (C1): AAD is built solely from the typed -/// `(format_version, wallet_id, label)` triple via this length-prefixed -/// layout — never from any serialized JSON bytes or JSON key order. The -/// `format_version` argument is always the compiled-in [`FORMAT_VERSION`] -/// constant at every call site; the JSON `version` field is used ONLY as -/// the two-step dispatch gate and is NEVER routed into AAD. +/// Determinism invariant: AAD is built solely from this typed triple, +/// never from serialized JSON bytes or key order. `format_version` is +/// always the compiled-in [`FORMAT_VERSION`]; the JSON `version` field is +/// a dispatch gate only and is never routed into AAD. pub(crate) fn aad(format_version: u32, wallet_id: &[u8; 32], label: &str) -> Vec { let lb = label.as_bytes(); let mut v = Vec::with_capacity(4 + 4 + 32 + 4 + lb.len()); @@ -135,21 +115,30 @@ pub(crate) fn aad(format_version: u32, wallet_id: &[u8; 32], label: &str) -> Vec v } -/// AAD for the passphrase-verification token. Uses the same canonical -/// construction as entry AAD but with a sentinel zero wallet id and -/// [`VERIFY_LABEL`] (NUL-prefixed, disjoint from every allowlisted -/// label) so the token is cryptographically tied to this -/// `format_version` only and cannot be replayed into any real entry -/// slot. -pub(crate) fn verify_aad(format_version: u32) -> Vec { - aad(format_version, &VERIFY_WALLET_ID, VERIFY_LABEL) +/// AAD for the verify-token. Reuses the entry-AAD construction (sentinel +/// wallet id + NUL-prefixed [`VERIFY_LABEL`], disjoint from any real +/// slot), then binds the KDF header: `salt` plus a length-prefixed LE +/// encoding of (`id`, `m_kib`, `t`, `p`). +/// +/// Folding the header in makes the token authenticate the salt + KDF +/// params it was derived under, so header tamper / KDF downgrade is +/// detected fail-closed (it surfaces as `WrongPassphrase` because a +/// tampered header also yields a different derived key). +pub(crate) fn verify_aad(format_version: u32, salt: &[u8; SALT_LEN], kdf: &KdfParams) -> Vec { + let mut v = aad(format_version, &VERIFY_WALLET_ID, VERIFY_LABEL); + v.extend_from_slice(&(salt.len() as u32).to_le_bytes()); + v.extend_from_slice(salt); + v.extend_from_slice(&[kdf.id]); + v.extend_from_slice(&kdf.m_kib.to_le_bytes()); + v.extend_from_slice(&kdf.t.to_le_bytes()); + v.extend_from_slice(&kdf.p.to_le_bytes()); + v } -/// Serde helpers encoding `Vec` as lowercase hex strings. Hex is -/// already a crate dependency (`WalletId::to_hex`), is deterministic and -/// self-validating, and avoids adding `base64`. The encoding sits wholly -/// outside the AEAD envelope and the AAD (C1), so it has no bearing on -/// any cryptographic binding. +/// Serde helpers encoding `Vec` as lowercase hex. Hex is already a +/// crate dependency, deterministic, and avoids adding `base64`. The +/// encoding sits outside the AEAD envelope and the AAD, so it has no +/// cryptographic bearing. mod hex_bytes { use serde::{Deserialize, Deserializer, Serializer}; @@ -163,12 +152,10 @@ mod hex_bytes { } } -/// Const-generic companion to [`hex_bytes`] for fixed-width byte fields. -/// Wire form is identical (lowercase hex), but the `[u8; N]` deserialize -/// target moves length validation into the serde seam — a wrong-length -/// hex blob is rejected at parse with a `serde::de::Error` naming both -/// the offending size and the expected `N`, so the field is identifiable -/// in the error message (no anonymous "invalid length"). +/// Const-generic companion to [`hex_bytes`] for fixed-width fields. The +/// `[u8; N]` target moves length validation into the serde seam: a +/// wrong-length blob is rejected at parse with an error naming the +/// offending size and the expected `N`. pub(super) mod hex_array { use serde::{de::Error as DeError, Deserialize, Deserializer, Serializer}; @@ -201,33 +188,32 @@ pub(super) mod hex_array { } } -/// Step-1 probe: read ONLY `version`, tolerating unknown sibling fields -/// so a future v-N file can be dispatched on before its payload shape is -/// committed to. MUST NOT use `deny_unknown_fields` (C3). +/// Step-1 probe: read ONLY `version`, tolerating unknown siblings so a +/// future vN file can be dispatched on. MUST NOT use `deny_unknown_fields`. #[derive(Deserialize)] struct VersionProbe { version: u32, } -/// Serialize a full vault to JSON bytes. Contains only salt/params -/// (non-secret) + ciphertext — never plaintext. +/// Serialize a vault to JSON bytes — salt/params + ciphertext only, never +/// plaintext. pub(crate) fn serialize(vault: &Vault) -> Vec { - // Vault carries only fixed-width arrays and owned Vecs that serialize - // infallibly; a serializer error would be a logic bug. + // Vault holds only fixed arrays and owned Vecs; serialization is + // infallible, so an error would be a logic bug. serde_json::to_vec(vault).expect("vault serialization is infallible") } -/// Parse a vault. Two-step: probe `version` (lax), then parse the strict -/// payload for the known version. Refuses unknown versions and any -/// malformed/short byte field — fail closed. Unknown KDF -/// algorithm ids and out-of-range Argon2 params are caught later at -/// `KdfParams::enforce_bounds` (called on every `derive_key`), so they -/// can't silently slip past. All `serde_json` errors are mapped to a -/// static [`SecretStoreError`] with the source DISCARDED so input bytes -/// can never leak into an error string or log. Salt and nonce widths -/// are validated by `hex_array` at the serde seam; the AEAD-tag-length -/// floor remains a post-parse check. +/// Parse a vault: probe `version` (lax), then parse the strict payload +/// for the known version. Fails closed on unknown versions and malformed +/// fields. `serde_json` errors are mapped to a static +/// [`SecretStoreError`] with the source DISCARDED so input bytes never +/// leak. Unknown KDF ids / out-of-range Argon2 params are caught later at +/// `KdfParams::enforce_bounds`. pub(crate) fn deserialize(buf: &[u8]) -> Result { + // INTENTIONAL: the 2x parse (probe + strict) over the 128MiB-capped, + // lock-gated local file is accepted for forward-version dispatch. + // INTENTIONAL: relies on serde_json's default recursion limit (128) + // for deep-nesting DoS safety — MUST NOT disable it or use from_reader. let probe: VersionProbe = serde_json::from_slice(buf).map_err(|_| SecretStoreError::MalformedVault)?; if probe.version != FORMAT_VERSION { @@ -242,12 +228,9 @@ pub(crate) fn deserialize(buf: &[u8]) -> Result { return Err(SecretStoreError::MalformedVault); } - // Validate outer wallet-id keys and inner label keys at parse time. - // The serde shape allows any string for either key, so - // a malformed file (or a tampered one) could otherwise smuggle a - // bogus wallet id past parse and surface only at the first `put` / - // `get` / `delete`. Reject the whole vault on the first offender so - // a single bad key fails the file open, not a downstream op. + // Validate wallet-id and label keys at parse: the serde shape allows + // any string, so a bogus key would otherwise surface only at the + // first put/get/delete. Reject the whole vault on the first offender. for (wallet_hex, entries) in &vault.wallets { super::decode_wallet_id_hex(wallet_hex)?; for (label, body) in entries { @@ -282,17 +265,54 @@ mod tests { #[test] fn verify_aad_disjoint_from_every_entry_aad() { - // The verify-token's slot is `(VERIFY_WALLET_ID, VERIFY_LABEL)`. - // VERIFY_LABEL starts with NUL, which the allowlist forbids, so - // no real entry's AAD can collide with the token's AAD — even - // if a caller happens to register the all-zero wallet id. - let v = verify_aad(FORMAT_VERSION); - // A real entry on the same sentinel wallet id can never match - // because its label cannot contain NUL. + // VERIFY_LABEL starts with NUL (allowlist-forbidden), so no real + // entry's AAD can collide — even on the all-zero wallet id. + let salt = [7u8; SALT_LEN]; + let kdf = KdfParams::default_target(); + let v = verify_aad(FORMAT_VERSION, &salt, &kdf); assert_ne!(v, aad(FORMAT_VERSION, &VERIFY_WALLET_ID, "seed")); assert_ne!(v, aad(FORMAT_VERSION, &[1u8; 32], "seed")); } + #[test] + fn verify_aad_binds_salt_and_kdf_params() { + // The verify-token AAD authenticates the salt + KDF header, so a + // flipped salt or an in-bounds KDF-param shift yields a different + // AAD (and hence a token-tag failure at verify). + let salt = [7u8; SALT_LEN]; + let kdf = KdfParams::default_target(); + let base = verify_aad(FORMAT_VERSION, &salt, &kdf); + + let mut salt2 = salt; + salt2[0] ^= 0x01; + assert_ne!(base, verify_aad(FORMAT_VERSION, &salt2, &kdf)); + + assert_ne!( + base, + verify_aad( + FORMAT_VERSION, + &salt, + &KdfParams { + m_kib: kdf.m_kib / 2, + ..kdf + } + ) + ); + assert_ne!( + base, + verify_aad( + FORMAT_VERSION, + &salt, + &KdfParams { + t: kdf.t - 1, + ..kdf + } + ) + ); + // Identical inputs are deterministic. + assert_eq!(base, verify_aad(FORMAT_VERSION, &salt, &kdf)); + } + fn test_vault(wallets: BTreeMap>) -> Vault { Vault { version: FORMAT_VERSION, @@ -366,9 +386,8 @@ mod tests { #[test] fn deserialize_accepts_unknown_kdf_id_and_bounds_check_rejects_later() { - // Unknown algo ids ride through parse so the algorithm gate - // lives in one place — `KdfParams::enforce_bounds`, called on - // every `derive_key`. The format layer no longer guards it. + // Unknown algo ids ride through parse; the gate lives solely at + // `KdfParams::enforce_bounds` (called on every `derive_key`). let mut vault = test_vault(BTreeMap::new()); vault.kdf.id = 7; let bytes = serialize(&vault); @@ -618,4 +637,146 @@ mod tests { "error leaked input bytes: {rendered}" ); } + + /// A parse of mutated bytes must be a clean `Ok` or a typed error + /// variant — never a panic / abort. + fn assert_deserialize_outcome_is_typed(bytes: &[u8]) { + let res = std::panic::catch_unwind(|| deserialize(bytes)); + let parsed = res.expect("deserialize must never panic on hostile input"); + match parsed { + Ok(_) + | Err(SecretStoreError::MalformedVault) + | Err(SecretStoreError::VersionUnsupported { .. }) + | Err(SecretStoreError::InvalidLabel) => {} + Err(other) => panic!("unexpected error variant from parser: {other:?}"), + } + } + + /// Deterministic byte-level fuzz: flip bytes and truncate at every + /// offset of a valid vault, asserting the parser stays fail-closed and + /// never panics. Fixed seed, no proptest dependency. + #[test] + fn parser_is_fuzz_resistant_to_byte_mutation() { + let mut entries = BTreeMap::new(); + entries.insert( + "bip39_mnemonic".to_string(), + EntryBody { + nonce: [0x33; NONCE_LEN], + ciphertext: vec![0x44; AEAD_TAG_LEN + 16], + }, + ); + let mut wallets = BTreeMap::new(); + wallets.insert(hex::encode([0xABu8; 32]), entries); + let valid = serialize(&test_vault(wallets)); + + // The pristine vault parses. + assert!(deserialize(&valid).is_ok()); + + // xorshift32 — deterministic, std-only. + let mut state: u32 = 0x1234_5678; + let mut next = || { + state ^= state << 13; + state ^= state >> 17; + state ^= state << 5; + state + }; + + for _ in 0..2_000 { + let mut buf = valid.clone(); + // Flip 1..=4 random bytes. + let flips = 1 + (next() % 4) as usize; + for _ in 0..flips { + let idx = (next() as usize) % buf.len(); + buf[idx] ^= (next() & 0xFF) as u8; + } + assert_deserialize_outcome_is_typed(&buf); + } + + // Truncation at every offset — a short read must never panic. + for cut in 0..valid.len() { + assert_deserialize_outcome_is_typed(&valid[..cut]); + } + } + + /// Structural fuzz: hostile shapes a byte-flip rarely hits (oversized + /// KDF params, deep nesting, bad labels, wrong-width hex). Each must be + /// a typed error or a valid Ok, never a panic. Inflated KDF params + /// parse Ok by design (the bounds gate lives at `derive_key`). + #[test] + fn parser_is_fuzz_resistant_to_structural_mutation() { + let base: serde_json::Value = + serde_json::from_slice(&serialize(&test_vault(BTreeMap::new()))).unwrap(); + let wid_owned = hex::encode([1u8; 32]); + let wid = wid_owned.as_str(); + let good_nonce = "0".repeat(NONCE_LEN * 2); + let good_ct = "0".repeat((AEAD_TAG_LEN + 1) * 2); + + let mut cases: Vec = Vec::new(); + + // Oversized / absurd KDF params. + for (k, v) in [ + ("m_kib", serde_json::json!(u32::MAX)), + ("t", serde_json::json!(u32::MAX)), + ("p", serde_json::json!(u32::MAX)), + ("id", serde_json::json!(255)), + ] { + let mut c = base.clone(); + c["kdf"][k] = v; + cases.push(c); + } + + // Deep nesting in the wallets map (well past the type's depth). + { + let mut nested = serde_json::json!(0); + for _ in 0..512 { + nested = serde_json::json!([nested]); + } + let mut c = base.clone(); + c["wallets"] = nested; + cases.push(c); + } + + // Hostile labels and key shapes. + for label in ["\0null", "../escape", &"a".repeat(65), "has space"] { + let mut c = base.clone(); + c["wallets"] = serde_json::json!({ wid: { label: { "nonce": good_nonce.as_str(), "ciphertext": good_ct.as_str() } } }); + cases.push(c); + } + + // Wrong-width hex and oversized declared sizes. + for (nonce, ct) in [ + ("00", good_ct.as_str()), // short nonce + (good_nonce.as_str(), "00"), // short ciphertext + (&"0".repeat(NONCE_LEN * 4), good_ct.as_str()), // over-wide nonce + ("zz", good_ct.as_str()), // non-hex nonce + ] { + let mut c = base.clone(); + c["wallets"] = + serde_json::json!({ wid: { "seed": { "nonce": nonce, "ciphertext": ct } } }); + cases.push(c); + } + + // Non-hex / wrong-length outer wallet-id key. + for bad_wid in ["not-hex", &"aa".repeat(8), &"AB".repeat(32)] { + let mut c = base.clone(); + c["wallets"] = serde_json::json!({ bad_wid: { "seed": { "nonce": good_nonce.as_str(), "ciphertext": good_ct.as_str() } } }); + cases.push(c); + } + + // Header fields (salt / verify_nonce / verify_ct): empty / short / + // over-wide / non-hex must each be a typed error, never a panic. + let over_wide = "0".repeat(SALT_LEN * 4); + for field in ["salt", "verify_nonce", "verify_ct"] { + for bad in ["", "00", over_wide.as_str(), "zz"] { + let mut c = base.clone(); + c[field] = serde_json::json!(bad); + cases.push(c); + } + } + + for c in cases { + let bytes = serde_json::to_vec(&c).unwrap(); + assert_deserialize_outcome_is_typed(&bytes); + } + } } diff --git a/packages/rs-platform-wallet-storage/src/secrets/file/mod.rs b/packages/rs-platform-wallet-storage/src/secrets/file/mod.rs index c2ae67e7352..8131f2643be 100644 --- a/packages/rs-platform-wallet-storage/src/secrets/file/mod.rs +++ b/packages/rs-platform-wallet-storage/src/secrets/file/mod.rs @@ -1,28 +1,24 @@ //! [`EncryptedFileStore`] — passphrase-encrypted on-disk vault, resident //! in memory while the store handle lives. //! -//! # Lifecycle +//! [`open`] takes the advisory lock on a sibling `.lock` sidecar (single +//! attempt), then creates or decrypts the vault and keeps the plaintext +//! entry map resident. [`get`] serves from memory (no per-op KDF/disk); +//! every mutation ([`put`], [`delete`], [`rekey`]) edits memory then +//! re-encrypts and atomically rewrites the file. [`Drop`] best-effort +//! re-syncs, re-asserts `0600` on Unix, and releases the lock. A second +//! `open()` of a held path fails fast with +//! [`SecretStoreError::AlreadyLocked`]. //! -//! - [`open`] grabs the cross-platform advisory lock on a sibling -//! `.lock` sidecar (single attempt, no retry), creates a fresh vault -//! if none exists yet, otherwise decrypts the existing one, and keeps -//! the plaintext entry map resident. -//! - Every mutation ([`put`], [`delete`], [`rekey`]) edits the in-memory -//! vault and immediately re-encrypts and atomically writes it back to -//! disk (eager sync). -//! - [`get`] reads from the in-memory map — no KDF, no disk hit per op. -//! - [`Drop`] best-effort-syncs the resident state once more, re-asserts -//! `0600` on Unix, and releases the lock when the file descriptor -//! closes. +//! **Cross-process exclusion is LOCAL-filesystem only.** `AlreadyLocked` +//! rests on `fd-lock` (`flock` / `LockFileEx`), which does NOT interlock +//! over NFS/CIFS/SMB — two hosts could each "lock" and last-writer-wins +//! would lose secrets. A vault file MUST NOT be shared across hosts; use +//! the OS keyring ([`SecretStore::Os`]) for multi-host access. The lock +//! sidecar is distinct from the vault file so the atomic `persist` rename +//! never touches the inode the open lock fd points at. //! -//! Concurrency is intentionally not supported: a second `open()` against -//! a path some other store handle (in this or another process) is -//! already holding fails fast with [`SecretStoreError::AlreadyLocked`]. -//! -//! One file, one passphrase, one lock — a multi-wallet store cannot -//! lock its other wallets out by construction. The lock sidecar -//! (`.lock`) is distinct from the vault file itself so the atomic -//! `persist` rename never touches the inode an open lock fd points at. +//! [`SecretStore::Os`]: crate::secrets::SecretStore::Os //! //! [`open`]: EncryptedFileStore::open //! [`put`]: EncryptedFileStore::put_bytes @@ -30,21 +26,21 @@ //! [`rekey`]: EncryptedFileStore::rekey //! [`get`]: EncryptedFileStore::get_bytes //! -//! ## Threat coverage -//! -//! Covers **A1** (other local user), **A4** (lost laptop / cold -//! backup), **A6** (synced backup of the vault file): the at-rest file -//! is Argon2id + AEAD, useless without the passphrase. Does **not** -//! cover **A3** (passphrase / derived key resident while unlocked), a -//! weak operator passphrase (KDF raises cost, does not eliminate the -//! risk — an accepted residual), or **A5** if the derived key / plaintext is -//! swapped or core-dumped while unlocked (best-effort mitigated by -//! zeroize + mlock, not eliminated). The derived AEAD key is held -//! resident inside a [`SecretBytes`] for the store's lifetime so reads -//! and writes do not pay the Argon2 cost per op; it is zeroized on Drop. - -mod crypto; -mod format; +//! Threat coverage: the at-rest file is Argon2id + AEAD, so it protects +//! **A1** (other local user), **A4** (lost laptop / cold backup), and +//! **A6** (synced backup). It does NOT cover **A3** (key/passphrase +//! resident while unlocked), a weak operator passphrase, or **A5** +//! (swap / core-dump while unlocked) — the last is best-effort mitigated +//! by zeroize + mlock. The derived AEAD key stays resident in a +//! [`SecretBytes`] (to avoid per-op Argon2) and is zeroized on Drop. + +// `pub(super)` (= visible within `crate::secrets`) so the Tier-2 +// `envelope` module — a sibling of `file` under `secrets` — can reuse the +// shared Argon2id/XChaCha primitives and `KDF_ID_ARGON2ID` without +// duplicating crypto. Items inside stay `pub(crate)`/`pub(in …file)`, so +// nothing escapes the secrets tree (see the crypto.rs module doc). +pub(super) mod crypto; +pub(super) mod format; use std::any::Any; use std::collections::HashMap; @@ -61,12 +57,11 @@ use format::{EntryBody, Vault}; use super::error::SecretStoreError; -use super::secret::{SecretBytes, SecretString}; +use super::secret::{SecretBytes, SecretString, MIN_PASSPHRASE_LEN}; use super::validate::{validated_label, WalletId}; -/// Upstream service-prefix for vault entries. The full `service` -/// string is `SERVICE_PREFIX + hex(wallet_id)`, mapping each wallet -/// to its own keyring "service" namespace. +/// Service-prefix for vault entries: the full `service` string is +/// `SERVICE_PREFIX + hex(wallet_id)`, one namespace per wallet. pub const SERVICE_PREFIX: &str = "dash.platform-wallet-storage/"; /// Vendor / id tags published through `CredentialStoreApi`. @@ -74,113 +69,118 @@ const VENDOR: &str = "dash.platform-wallet-storage"; const STORE_ID: &str = "encrypted-file-store-v1"; /// Structural ceiling on the on-disk vault file. The vault is -/// attacker-controllable JSON; a multi-GiB file would force a huge -/// `fs::read` allocation ahead of any tag check, so refuse to even -/// allocate beyond this cap and surface -/// [`SecretStoreError::VaultTooLarge`]. +/// attacker-controllable JSON, so refuse to allocate / parse beyond this +/// cap (surfacing [`SecretStoreError::VaultTooLarge`]) ahead of any tag +/// check rather than let a multi-GiB file force a huge `fs::read`. pub const MAX_VAULT_SIZE_BYTES: u64 = 128 * 1024 * 1024; +/// Per-secret write-side ceiling. The vault is ONE shared document, so an +/// uncapped oversized entry would inflate it past [`MAX_VAULT_SIZE_BYTES`] +/// and brick every wallet on reopen. 64 KiB is far above any legitimate +/// mnemonic / seed / xpriv. Enforced with +/// [`SecretStoreError::SecretTooLarge`] at the write boundary before the +/// secret is sealed or inserted. +pub const MAX_SECRET_LEN: usize = 64 * 1024; + /// A passphrase-encrypted file-backed credential store. /// -/// One file, one passphrase, one lock — the whole store rotates -/// together via [`rekey`](Self::rekey). Every [`SecretString`] and the -/// resident derived AEAD key are zeroized when the store drops. -/// The plaintext entry map is held in -/// [`EntryBody`]-shaped form: the bytes inside `ciphertext` are -/// ciphertext, but the structure is fully populated so reads do not -/// re-touch disk. -/// -/// The handle is cheap-`Clone` — both clones share the same -/// `Arc>`, so every operation through any clone sees the same -/// resident state and serializes against every other operation. +/// One file, one passphrase, one lock; the whole store rotates together +/// via [`rekey`](Self::rekey). The cheap-`Clone` handle shares one +/// `Arc>`, so every clone sees the same resident state and +/// serializes against every other operation. All [`SecretString`]s and +/// the resident AEAD key are zeroized on drop. #[derive(Clone)] pub struct EncryptedFileStore { inner: Arc>, } -/// All resident state for the store — path, advisory file lock, -/// in-memory vault, cached AEAD key, and the store-wide passphrase — -/// coalesced behind a single [`Mutex`] at the [`Arc`] wrapper level so -/// every mutation observes a consistent triple (vault matches the key -/// it was sealed under, key matches the passphrase that derived it). -/// -/// A single lock keeps put/get/delete/rekey serialized against each -/// other so a concurrent put cannot seal under an old key while rekey -/// is swapping in a new one. The disk write happens while -/// the lock is held; the file-lock sidecar already serializes -/// cross-process so this does not introduce a new I/O contention -/// point. +/// Resident store state behind a single [`Mutex`] so every mutation sees +/// a consistent triple (vault matches the key it was sealed under, key +/// matches the passphrase that derived it). The single lock serializes +/// put/get/delete/rekey so a put cannot seal under an old key mid-rekey; +/// the disk write happens under it, and the file-lock sidecar already +/// serializes cross-process, so this adds no new I/O contention point. struct EncryptedFileStoreInner { /// Vault file path supplied by the caller at [`open`]. /// /// [`open`]: EncryptedFileStore::open path: PathBuf, - /// In-memory vault. Mutations edit this directly and then call - /// `sync_to_disk` to re-encrypt and atomically replace the - /// on-disk file. Reads return clones from here without hitting - /// disk. + /// In-memory vault. Mutations edit it then `sync_to_disk` to + /// re-encrypt and atomically replace the file; reads clone from here. vault: Vault, - /// Cached AEAD key derived once at [`open`] from the salt + KDF - /// params + passphrase. Re-derived only on [`rekey`]. Keeping the - /// key resident is what makes mutations cheap (one AEAD seal per - /// entry, no Argon2 per op) and matches the resident-vault model. - /// A3 (key resident while unlocked) is an accepted threat in the - /// module docs; the buffer zeroizes when the state drops. + /// AEAD key derived once at [`open`] (re-derived only on [`rekey`]). + /// Held resident to avoid per-op Argon2 — A3 (key resident while + /// unlocked) is an accepted threat; zeroized when the state drops. /// /// [`open`]: EncryptedFileStore::open /// [`rekey`]: EncryptedFileStore::rekey derived_key: SecretBytes, - /// The store-wide passphrase. Swapped atomically with `vault` and - /// `derived_key` under the same lock during [`rekey`]. + /// Store-wide passphrase, swapped with `vault` + `derived_key` under + /// the same lock during [`rekey`]. /// /// [`rekey`]: EncryptedFileStore::rekey passphrase: SecretString, - /// Holds the cross-platform advisory write-lock on `.lock` - /// for the entire lifetime of the store. Dropped (releasing the - /// flock / LockFileEx) when the store drops. + /// Holds the advisory write-lock on `.lock` for the store's + /// lifetime; dropped (releasing the lock) when the store drops. _lock: VaultLock, } impl EncryptedFileStore { - /// Open a vault store at `path`, unlocked by `passphrase`. `path` - /// is the vault FILE, not a directory — the operator picks the - /// filename. - /// - /// The call acquires an exclusive advisory lock on a sibling - /// `.lock` sidecar before touching the vault. If the lock is - /// already held (by another handle in this process or by another - /// process) the call returns [`SecretStoreError::AlreadyLocked`] - /// immediately — there is no retry loop. + /// Open a vault store at `path` (the vault FILE, not a directory), + /// unlocked by `passphrase`. /// - /// If `path` does not exist yet a fresh vault (random salt, default - /// Argon2 params, sealed verify token, no entries) is created at - /// `0600` on Unix. If it exists the vault is read, the passphrase - /// is verified against the header verify-token, and the plaintext - /// entry map is loaded into memory. Either way the returned store - /// is immediately usable. + /// Acquires an exclusive advisory lock on a sibling `.lock` + /// first; if already held, returns [`SecretStoreError::AlreadyLocked`] + /// immediately (no retry). A missing `path` is created fresh (random + /// salt, default Argon2 params, sealed verify token) at `0600` on + /// Unix; an existing one is read and its passphrase verified against + /// the header verify-token. Either way the returned store is usable. pub fn open( path: impl AsRef, passphrase: SecretString, ) -> Result { - let path = path.as_ref().to_path_buf(); - - // Make sure the parent directory exists so both the lock sidecar - // open and the vault create do not fail on a not-yet-materialized - // dir (canonical for first-setup operators). `Path::parent()` - // returns `Some("")` for a bare relative filename, which neither - // `create_dir_all` nor the cross-platform persist path can - // consume — normalize the empty-string parent to ".". + // Tier-1 baseline: reject a blank passphrase (empty / all-whitespace) + // BEFORE touching the filesystem. A blank passphrase derives a key + // from a public salt only — obfuscation, not confidentiality + // (obfuscation, not confidentiality). This is an INTENDED behavioural break for any caller + // that relied on `SecretString::empty()`; a deliberate keyless vault + // must use [`open_unprotected`](Self::open_unprotected). No vault + // file is created or altered for a blank passphrase. + reject_weak_passphrase(&passphrase)?; + Self::open_inner(path.as_ref(), passphrase) + } + + /// Open (or create) a **deliberately keyless** vault — the only door + /// that accepts no passphrase. The vault key is derived from an empty + /// passphrase under the public salt, so this is **obfuscation, not + /// confidentiality**: use it only where the stored secrets carry their + /// own Tier-2 object password, or as a staging step before + /// [`rekey`](Self::rekey) to a real passphrase. This is the explicit + /// keyless door, distinct from [`open`](Self::open) (which now rejects a + /// blank passphrase). + pub fn open_unprotected(path: impl AsRef) -> Result { + Self::open_inner(path.as_ref(), SecretString::empty()) + } + + /// Shared open/create core for [`open`](Self::open) and + /// [`open_unprotected`](Self::open_unprotected). Does NOT apply the + /// blank-passphrase guard — the public doors decide that. + fn open_inner(path: &Path, passphrase: SecretString) -> Result { + let path = path.to_path_buf(); + + // Materialize the parent so the lock-sidecar open and vault + // create do not fail on a not-yet-existing dir. let parent = normalized_parent(&path); create_parent_dir(parent)?; + // Refuse a group/other-WRITABLE parent: directory write governs + // rename/unlink, so a writable parent lets another local user + // replace the vault despite its own 0600 (the A1 guarantee). + check_parent_perms(parent)?; - // Acquire the lock first — every subsequent step assumes - // exclusive ownership of the vault file. + // Lock first — every subsequent step assumes exclusive ownership. let lock = VaultLock::acquire(&lock_path_for(&path))?; - // Decide between load-existing and create-fresh based on a - // single open attempt: NotFound → fresh; anything else → load - // (the perm check inside `read_existing_vault` covers loose - // perms on a real file). + // NotFound → create fresh; anything else → load. let (vault, derived_key) = match Self::load_existing_vault(&path, &passphrase)? { Some(loaded) => loaded, None => Self::create_new_vault(&path, &passphrase)?, @@ -222,32 +222,28 @@ impl EncryptedFileStore { Ok((vault, key)) } - /// Re-encrypt the whole store under `new_passphrase`: fresh salt + - /// fresh per-entry nonces for every wallet's entries, then - /// atomically replace the vault file. No `.bak` retains old key - /// material. The swap is whole-store: every - /// wallet's entries are re-keyed in one shot, so the store cannot - /// end up half-rotated. The in-memory vault, derived key, and - /// passphrase advance together under the resident-state mutex. + /// Re-encrypt the whole store under `new_passphrase` — fresh salt and + /// per-entry nonces for every wallet, atomically in one shot (no + /// half-rotated state, no `.bak` retaining old key material). Vault, + /// derived key, and passphrase advance together under the mutex. /// - /// The fresh KDF / Argon2 derivation runs OUTSIDE the lock — it - /// only touches the new passphrase + a fresh salt and never reads - /// resident state, so paying ~hundreds of ms inside the critical - /// section would just stall unrelated put/get operations. + /// The Argon2 derivation runs OUTSIDE the lock — it touches only the + /// new passphrase + fresh salt, so paying ~hundreds of ms inside the + /// critical section would needlessly stall unrelated put/get ops. pub fn rekey(&self, new_passphrase: SecretString) -> Result<(), SecretStoreError> { + // Reject a blank target passphrase: `rekey` always advances to a + // REAL passphrase (the empty→real migration uses this). The resident + // vault, key, and on-disk file are untouched on rejection. To make a + // vault keyless, use `open_unprotected` on a fresh path instead. + reject_weak_passphrase(&new_passphrase)?; let (new_vault, new_key) = build_fresh_vault(&new_passphrase)?; lock_inner(&self.inner).rekey(new_vault, new_key, new_passphrase) } /// Store `secret` under `(wallet_id, label)`, returning the typed - /// [`SecretStoreError`] (lossless — no `keyring_core::Error` seam). - /// The public [`SecretStore`](crate::secrets::SecretStore) file - /// arm delegates here so the structural error distinction - /// survives. Symmetric with [`get_bytes`]: the secret stays - /// wrapped in [`SecretBytes`] across this seam; the lone bare-buffer - /// exposure lives one layer down at the AEAD seal call. - /// - /// [`get_bytes`]: Self::get_bytes + /// [`SecretStoreError`] losslessly (no SPI seam). The secret stays + /// wrapped across this boundary; the bare-buffer exposure is one layer + /// down at the AEAD seal. pub(crate) fn put_bytes( &self, wallet_id: &WalletId, @@ -258,12 +254,9 @@ impl EncryptedFileStore { } /// Retrieve the plaintext under `(wallet_id, label)`, or `None` if - /// absent, returning the typed [`SecretStoreError`]. The plaintext - /// stays inside a zeroizing [`SecretBytes`] all the way to this - /// boundary; the single `.expose_secret().to_vec()` conversion lives - /// at the upstream `CredentialApi::get_secret` - /// SPI seam, the only point where the SPI contract demands a bare - /// `Vec`. + /// absent. The plaintext stays inside a zeroizing [`SecretBytes`] to + /// this boundary; the bare-`Vec` conversion lives only at the + /// `CredentialApi::get_secret` SPI seam, where the contract demands it. pub(crate) fn get_bytes( &self, wallet_id: &WalletId, @@ -292,11 +285,9 @@ impl EncryptedFileStore { write_vault_at(&lock_inner(&self.inner).path, vault) } - /// Drop the in-memory copy of the vault and reload it from disk - /// under the current passphrase. Useful for tests that mutate the - /// on-disk file out from under the store and want subsequent reads - /// to observe the new bytes (the resident-vault model otherwise - /// caches the loaded state). + /// Reload the vault from disk under the current passphrase, so a test + /// that patched the on-disk file sees the new bytes (the resident + /// model otherwise serves the cached state). #[cfg(test)] pub(crate) fn test_reload_from_disk(&self) -> Result<(), SecretStoreError> { let mut state = lock_inner(&self.inner); @@ -310,11 +301,9 @@ impl EncryptedFileStore { } } -/// Acquire the single coarse-grained state lock on `inner`. -/// Poisoned-mutex recovery is "log and continue": a previously-panicked -/// holder cannot have left the [`EncryptedFileStoreInner`] half-written -/// (every mutation either succeeds wholesale and writes to disk or -/// reverts), so the inner value is safe to keep using. +/// Acquire the state lock on `inner`. A poisoned mutex is recovered (not +/// propagated): every mutation either commits wholesale or reverts, so a +/// panicked holder cannot have left the inner value half-written. fn lock_inner( inner: &Arc>, ) -> std::sync::MutexGuard<'_, EncryptedFileStoreInner> { @@ -337,32 +326,38 @@ impl EncryptedFileStoreInner { secret: &SecretBytes, ) -> Result<(), SecretStoreError> { let label = validated_label(label)?.to_string(); + // Reject before sealing: the shared document would otherwise + // inflate past the read-side ceiling and brick every wallet. + if secret.len() > MAX_SECRET_LEN { + return Err(SecretStoreError::SecretTooLarge { + found: secret.len(), + max: MAX_SECRET_LEN, + }); + } let aad = format::aad(format::FORMAT_VERSION, wallet_id.as_bytes(), &label); let (nonce, ciphertext) = crypto::seal(&self.derived_key, &aad, secret.expose_secret())?; - // Mutate in memory; remember the prior body so we can roll - // back on a disk-write failure (the resident state must - // always match what is on disk after a returned-Ok mutation). + // Remember the prior body so a disk-write failure can revert the + // resident state to match disk (Ok must imply memory == disk). let prior = { let entries = self.vault.wallets.entry(wallet_id.to_hex()).or_default(); entries.insert(label.clone(), EntryBody { nonce, ciphertext }) }; if let Err(e) = self.sync_to_disk() { - let entries = self - .vault - .wallets - .get_mut(&wallet_id.to_hex()) - .expect("entry just inserted"); - match prior { - Some(prev) => { - entries.insert(label, prev); - } - None => { - entries.remove(&label); - if entries.is_empty() { - self.vault.wallets.remove(&wallet_id.to_hex()); + // A missing bucket means the insert never landed (nothing to + // undo) — return the error rather than panic. + if let Some(entries) = self.vault.wallets.get_mut(&wallet_id.to_hex()) { + match prior { + Some(prev) => { + entries.insert(label, prev); + } + None => { + entries.remove(&label); + if entries.is_empty() { + self.vault.wallets.remove(&wallet_id.to_hex()); + } } } } @@ -457,9 +452,8 @@ impl EncryptedFileStoreInner { new_vault.wallets.insert(wallet_hex.clone(), new_entries); } - // Stage the new triple in memory, write to disk, and on - // failure restore the old triple so the live handle keeps - // serving under the still-on-disk key. + // Stage the new triple; on disk-write failure restore the old one + // so the live handle keeps serving under the still-on-disk key. let old_vault = std::mem::replace(&mut self.vault, new_vault); let old_key = std::mem::replace(&mut self.derived_key, new_key); let old_pp = std::mem::replace(&mut self.passphrase, new_passphrase); @@ -476,53 +470,60 @@ impl EncryptedFileStoreInner { impl Drop for EncryptedFileStoreInner { fn drop(&mut self) { - // Belt-and-suspenders sync of resident state. Eager-sync on - // every mutation makes this redundant in the success path, but - // a final write lets a future feature (e.g. opportunistic - // background buffering) hang off the same Drop without changing - // the contract. `&mut self` here implies unique ownership — - // the outer `Mutex` is being dropped too, so no other holder - // can be waiting. + // Best-effort final sync. Redundant in the success path (every + // mutation eager-syncs) but kept as a contract anchor. if let Err(e) = self.sync_to_disk() { tracing::warn!(error = %e, "drop-time vault sync failed"); } - // Re-assert restrictive perms on Unix. Between writes the file - // is already 0600, but this defends against a peer that - // loosened them through some other path while we held the - // lock. Best-effort: any failure is non-fatal at Drop. + // Re-assert 0600 on Unix in case a peer loosened it while we held + // the lock. Best-effort: failures are non-fatal at Drop. #[cfg(unix)] - if let Ok(file) = open_no_follow(&self.path) { - if let Err(e) = set_restrictive_perms(&file) { - tracing::warn!(error = %e, "drop-time perm re-assert failed"); + match open_no_follow(&self.path) { + Ok(file) => { + if let Err(e) = set_restrictive_perms(&file) { + tracing::warn!(error = %e, "drop-time perm re-assert failed"); + } + } + Err(e) => { + tracing::warn!( + error = %e, + "drop-time perm re-assert skipped: vault re-open refused" + ); } } - // The `VaultLock` field drops naturally after this method - // returns, releasing the OS advisory lock. + // `VaultLock` drops after this returns, releasing the OS lock. } } -/// Sidecar advisory-lock path for the store's vault file. Kept -/// distinct from the vault file itself so the cross-platform -/// `persist` swap never touches the inode an open lock fd points -/// at — the lock fd remains valid across the atomic replace. +/// Sidecar lock path (`.lock`). Distinct from the vault file so the +/// atomic `persist` swap never touches the inode the lock fd points at. fn lock_path_for(path: &Path) -> PathBuf { let mut s = path.to_path_buf().into_os_string(); s.push(".lock"); PathBuf::from(s) } -/// Build a fresh vault skeleton: random salt, default Argon2 -/// params, and a passphrase-verification token sealed under the -/// freshly derived key (the token is the mixed-key-corruption guard). -/// Returns the (entry-less) vault and the -/// derived key so the caller can seal entries against it without -/// re-deriving. +/// Reject a blank (empty / all-whitespace) or sub-floor passphrase → +/// [`SecretStoreError::BlankPassphrase`]. The floor is the coarse +/// [`MIN_PASSPHRASE_LEN`] (1 today = merely non-blank); the real entropy +/// policy is the consumer's (see `SECRETS.md`). A blank check alone closes +/// the length term keeps the floor wired for a future bump. +fn reject_weak_passphrase(passphrase: &SecretString) -> Result<(), SecretStoreError> { + if passphrase.is_blank() || passphrase.trimmed().len() < MIN_PASSPHRASE_LEN { + return Err(SecretStoreError::BlankPassphrase); + } + Ok(()) +} + +/// Build a fresh entry-less vault (random salt, default Argon2 params, +/// verify-token sealed under the derived key) plus that derived key, so +/// the caller can seal entries without re-deriving. fn build_fresh_vault(passphrase: &SecretString) -> Result<(Vault, SecretBytes), SecretStoreError> { let mut salt = [0u8; SALT_LEN]; crypto::random_bytes(&mut salt)?; let kdf = KdfParams::default_target(); let key = crypto::derive_key(passphrase, &salt, kdf)?; - let v_aad = format::verify_aad(format::FORMAT_VERSION); + let v_aad = format::verify_aad(format::FORMAT_VERSION, &salt, &kdf); let (verify_nonce, verify_ct) = crypto::seal(&key, &v_aad, format::VERIFY_CONSTANT)?; Ok(( Vault { @@ -546,7 +547,7 @@ fn derive_and_verify( passphrase: &SecretString, ) -> Result { let key = crypto::derive_key(passphrase, &vault.salt, vault.kdf)?; - let v_aad = format::verify_aad(format::FORMAT_VERSION); + let v_aad = format::verify_aad(format::FORMAT_VERSION, &vault.salt, &vault.kdf); match crypto::open(&key, &vault.verify_nonce, &v_aad, &vault.verify_ct) { Ok(_) => Ok(key), Err(SecretStoreError::Decrypt) => Err(SecretStoreError::WrongPassphrase), @@ -554,13 +555,10 @@ fn derive_and_verify( } } -/// Read + parse the vault at `path`, or `None` if it does not exist. -/// Refuses a pre-existing file with looser-than-0600 perms and a file -/// exceeding [`MAX_VAULT_SIZE_BYTES`]. -/// -/// Eliminates the metadata→read TOCTOU: opens the file once with -/// `O_NOFOLLOW` on Unix, then derives perms / size from -/// the open handle's `metadata()` and reads from the same fd. +/// Read + parse the vault at `path`, or `None` if absent. Refuses +/// looser-than-0600 perms and a file over [`MAX_VAULT_SIZE_BYTES`]. +/// Opens once with `O_NOFOLLOW` and derives perms/size from the same fd +/// to avoid a metadata→read TOCTOU. fn read_vault_at(path: &Path) -> Result, SecretStoreError> { let file = match open_no_follow(path) { Ok(file) => file, @@ -592,16 +590,12 @@ fn read_vault_at(path: &Path) -> Result, SecretStoreError> { Ok(Some(format::deserialize(&bytes)?)) } -/// Atomically replace the vault at `path`, cross-platform. -/// -/// Stages into a `NamedTempFile` in the SAME directory (so `persist` -/// cannot fail cross-volume), tightens perms to 0600 on Unix before -/// any byte is written, then: `write_all` → `sync_all` → -/// `persist(path)` → Unix parent-dir fsync. The destination is never -/// pre-removed, so a crash leaves either the old or the new vault, -/// never an absent one. On `persist` failure the temp drops and -/// self-cleans — no manual remove racing it. The temp holds only -/// ciphertext+header, never plaintext. +/// Atomically replace the vault at `path`. Stages into a same-directory +/// `NamedTempFile` (so `persist` cannot fail cross-volume), tightens to +/// 0600 before writing, then `write_all` → `sync_all` → `persist` → Unix +/// parent-dir fsync. The destination is never pre-removed, so a crash +/// leaves the old or new vault, never none. The temp holds only +/// ciphertext + header, never plaintext. fn write_vault_at(path: &Path, vault: &Vault) -> Result<(), SecretStoreError> { do_write_vault_at(path, vault).inspect_err(|e| { tracing::warn!(error = %e, "failed to write vault file"); @@ -610,9 +604,14 @@ fn write_vault_at(path: &Path, vault: &Vault) -> Result<(), SecretStoreError> { fn do_write_vault_at(path: &Path, vault: &Vault) -> Result<(), SecretStoreError> { let serialized = format::serialize(vault); - // Normalize an empty / bare-filename parent to "." so neither - // `NamedTempFile::new_in` nor the Unix parent-dir fsync sees an - // empty path. + // Defence in depth: never write a vault the read path would refuse, + // so the on-disk file is never left unopenable. + if serialized.len() as u64 > MAX_VAULT_SIZE_BYTES { + return Err(SecretStoreError::VaultTooLarge { + found: serialized.len() as u64, + max: MAX_VAULT_SIZE_BYTES, + }); + } let parent = normalized_parent(path); create_parent_dir(parent)?; let mut tmp = @@ -635,21 +634,18 @@ fn do_write_vault_at(path: &Path, vault: &Vault) -> Result<(), SecretStoreError> Ok(()) } -/// Normalize `path.parent()` for callers that need a directory path -/// they can pass to `fs::create_dir_all`, `NamedTempFile::new_in`, and -/// the Unix parent-dir fsync. `Path::parent()` returns `Some("")` for a -/// bare relative filename like `"vault.pwsvault"`, and the empty path -/// errors out at every one of those calls — normalize to "." so a -/// caller that supplies a bare filename in their cwd just works. +/// Normalize `path.parent()` to a usable directory: `Path::parent()` +/// returns `Some("")` for a bare filename, which errors at +/// `create_dir_all` / `NamedTempFile::new_in` / parent-dir fsync — map +/// the empty parent to "." so a bare filename in the cwd just works. fn normalized_parent(path: &Path) -> &Path { path.parent() .filter(|p| !p.as_os_str().is_empty()) .unwrap_or_else(|| Path::new(".")) } -/// Create the parent directory for a vault file, applying a `0700` mode -/// on Unix so the directory created at first-setup is not -/// world-readable. Idempotent: a pre-existing directory is left alone. +/// Create the vault's parent directory at `0700` on Unix (not +/// world-readable). Idempotent — a pre-existing directory is left alone. fn create_parent_dir(parent: &Path) -> Result<(), SecretStoreError> { #[cfg(unix)] { @@ -659,10 +655,8 @@ fn create_parent_dir(parent: &Path) -> Result<(), SecretStoreError> { .recursive(true) .create(parent)?; } - // INTENTIONAL: Windows ACL hardening on the parent dir is deferred - // to https://github.com/dashpay/platform/issues/3754. The recursive - // create still runs so the path materializes; operators on Windows - // MUST tighten ACLs manually until the follow-up lands. + // INTENTIONAL: Windows parent-dir ACL hardening deferred to + // https://github.com/dashpay/platform/issues/3754 — tighten manually. #[cfg(not(unix))] { fs::create_dir_all(parent)?; @@ -670,28 +664,25 @@ fn create_parent_dir(parent: &Path) -> Result<(), SecretStoreError> { Ok(()) } -/// Cross-platform advisory write-lock holder. Owns a `Box>` -/// (so the address is stable) and an owned `RwLockWriteGuard` borrowing -/// from it. Dropping the holder drops the guard first (which releases -/// the OS lock via `fd-lock`'s Drop impl, calling `flock(LOCK_UN)` on -/// Unix and `UnlockFileEx` on Windows) and then frees the heap-pinned -/// `RwLock`. -/// -/// The self-reference is unavoidable: `fd-lock`'s guard borrows the -/// `RwLock`, and the resident-vault model requires the lock to stay -/// held continuously between `open` and `Drop`. Wrapped in a small -/// allow-unsafe island so the rest of the crate keeps -/// `deny(unsafe_code)`. Safety arguments: +/// Advisory write-lock holder owning a heap-pinned `Box>` +/// and a self-referential `'static` guard borrowing from it. The +/// self-reference is unavoidable: `fd-lock`'s guard borrows the `RwLock`, +/// and the resident-vault model needs the lock held continuously between +/// `open` and `Drop`. Safety: /// -/// 1. The `RwLock` lives on the heap via `Box::into_raw`, so its -/// address is stable for the holder's lifetime. -/// 2. The `'static` lifetime on the guard is a lie tolerated only -/// because the guard never outlives the holder, and the holder's -/// `Drop` impl takes the guard out (running its Drop) *before* -/// reclaiming the box. +/// 1. `Box::into_raw` gives the `RwLock` a stable address for the +/// holder's lifetime. +/// 2. The `'static` guard lifetime is a lie sound only because `Drop` +/// takes the guard out (running its Drop, releasing the OS lock) +/// BEFORE reclaiming the box. /// 3. The raw pointer never escapes this module. +/// +/// Calibrated to `fd-lock = "=4.0.4"` (exact-pinned): any bump must +/// re-verify the guard releases the OS lock before the box is reclaimed. mod vault_lock { - #![allow(unsafe_code)] + // INTENTIONAL: the crate's only unsafe island; soundness rests on the + // drop-order argument above, not a Miri test. `#![deny(unsafe_code)]` + // still applies everywhere outside the narrowed per-item allows. use std::fs; use std::path::Path; @@ -709,26 +700,30 @@ mod vault_lock { // member is a `File`/`RawFd`, both `Send + Sync`). The raw pointer // points at the heap-pinned `RwLock` this struct owns; sending the // struct moves ownership of the box address with it. + #[allow(unsafe_code)] unsafe impl Send for VaultLock {} + #[allow(unsafe_code)] unsafe impl Sync for VaultLock {} impl VaultLock { pub(super) fn acquire(lock_path: &Path) -> Result { - // INTENTIONAL: on non-unix platforms the symlink-following - // hardening is deferred to - // https://github.com/dashpay/platform/issues/3754 — Windows - // requires `FILE_FLAG_OPEN_REPARSE_POINT` via the raw API - // and is out of scope for the secrets-feature landing. + // INTENTIONAL: non-unix symlink hardening (Windows needs + // FILE_FLAG_OPEN_REPARSE_POINT) deferred to + // https://github.com/dashpay/platform/issues/3754. let mut opts = fs::OpenOptions::new(); opts.read(true).write(true).create(true).truncate(false); #[cfg(unix)] { use std::os::unix::fs::OpenOptionsExt; opts.custom_flags(libc::O_NOFOLLOW); + // Restrictive from the first byte — no loose-perm window. + opts.mode(0o600); } let lock_file = opts .open(lock_path) .map_err(|e| SecretStoreError::io_at(lock_path, e))?; + // `mode()` only applies to a file this call creates; re-assert + // 0600 on a pre-existing sidecar. #[cfg(unix)] set_restrictive_perms(&lock_file)?; @@ -740,6 +735,7 @@ mod vault_lock { // at a valid `RwLock`. No other reference exists // yet, so promoting it to `&'static mut` is sound for the // borrow we hand to `try_write`. + #[allow(unsafe_code)] let static_ref: &'static mut fd_lock::RwLock = unsafe { &mut *raw }; let guard = match static_ref.try_write() { @@ -749,7 +745,10 @@ mod vault_lock { // no live borrow points at the box; reclaiming // here is sound and avoids leaking on the error // path. - unsafe { drop(Box::from_raw(raw)) }; + #[allow(unsafe_code)] + unsafe { + drop(Box::from_raw(raw)) + }; return Err(match e.kind() { std::io::ErrorKind::WouldBlock => SecretStoreError::AlreadyLocked, _ => SecretStoreError::from(e), @@ -773,19 +772,19 @@ mod vault_lock { // the guard has just been dropped (no live borrow), and we // are the only owner. Reclaiming the Box runs the // `RwLock`'s Drop, which closes the file fd. - unsafe { drop(Box::from_raw(self.rwlock)) }; + #[allow(unsafe_code)] + unsafe { + drop(Box::from_raw(self.rwlock)) + }; } } } use vault_lock::VaultLock; -/// Why a wallet-id hex string failed the canonical-form check. -/// -/// `WalletId::to_hex` only ever emits 64 lowercase hex chars, so every -/// seam that parses a wallet id back enforces exactly that shape in one -/// place via [`wallet_id_hex_to_bytes`]; this enum lets each caller map -/// the reason onto its own error type with the right message. +/// Why a wallet-id hex string failed the canonical-form check, so each +/// caller of [`wallet_id_hex_to_bytes`] can map the reason onto its own +/// error type and message. enum WalletIdHexError { /// Not exactly 64 characters. WrongLength, @@ -795,10 +794,9 @@ enum WalletIdHexError { NotHex, } -/// Decode a 64-lowercase-hex-char wallet id into its 32 bytes, enforcing -/// the canonical form `WalletId::to_hex` writes (64 chars, lowercase). -/// The single seam both the on-disk outer-key check and the SPI -/// service-string parse go through so the contract lives in one place. +/// Decode a wallet id into 32 bytes, enforcing the canonical form +/// `WalletId::to_hex` writes (64 lowercase hex chars). The single seam +/// for both the on-disk outer-key check and the SPI service-string parse. fn wallet_id_hex_to_bytes(s: &str) -> Result<[u8; 32], WalletIdHexError> { if s.len() != 64 { return Err(WalletIdHexError::WrongLength); @@ -811,13 +809,10 @@ fn wallet_id_hex_to_bytes(s: &str) -> Result<[u8; 32], WalletIdHexError> { Ok(out) } -/// Decode a wallet-id hex string (the on-disk outer key) into the -/// 32-byte form the AAD construction expects. A malformed key here is -/// an on-disk integrity failure — the format-layer parse already -/// constrains entries to JSON object semantics, but the outer key is -/// a free-form string at the type level, so the bytes-back check is a -/// defence-in-depth structural guard. Off-canonical (uppercase / wrong -/// length / non-hex) keys are all rejected as corruption. +/// Decode the on-disk outer-key wallet hex into the 32 bytes the AAD +/// expects. The outer key is a free-form string at the type level, so +/// this bytes-back check is a defence-in-depth structural guard; any +/// off-canonical key is rejected as corruption. pub(super) fn decode_wallet_id_hex(s: &str) -> Result<[u8; 32], SecretStoreError> { wallet_id_hex_to_bytes(s).map_err(|_| SecretStoreError::MalformedVault) } @@ -843,14 +838,10 @@ fn parse_service(service: &str) -> Result { Ok(WalletId::from(bytes)) } -/// A `(wallet_id, label)` row in an [`EncryptedFileStore`]. -/// -/// Holds a [`Clone`]d handle to the parent store so each credential -/// goes through the same public store API (and the same single-lock -/// critical section per operation). All four operations re-validate -/// `user` (label); the store key is resident on the inner so a -/// wrong-passphrase race cannot happen at the credential layer — the -/// open already failed if the passphrase was wrong. +/// A `(wallet_id, label)` row in an [`EncryptedFileStore`]. Holds a +/// cloned handle to the parent so each op goes through the same store API +/// and single-lock critical section. All ops re-validate the label; the +/// passphrase was already verified at open, so no wrong-pass race here. pub struct EncryptedFileCredential { store: EncryptedFileStore, wallet_id: WalletId, @@ -869,6 +860,13 @@ impl std::fmt::Debug for EncryptedFileCredential { impl CredentialApi for EncryptedFileCredential { fn set_secret(&self, secret: &[u8]) -> KeyringResult<()> { let _ = validated_label(&self.label).map_err(SecretStoreError::from)?; + // Cap before wrapping so an oversized secret is never materialized. + if secret.len() > MAX_SECRET_LEN { + return Err(KeyringError::from(SecretStoreError::SecretTooLarge { + found: secret.len(), + max: MAX_SECRET_LEN, + })); + } self.store .put_bytes( &self.wallet_id, @@ -881,6 +879,8 @@ impl CredentialApi for EncryptedFileCredential { fn get_secret(&self) -> KeyringResult> { let _ = validated_label(&self.label).map_err(SecretStoreError::from)?; match self.store.get_bytes(&self.wallet_id, &self.label) { + // SPI contract forces a bare Vec; caller owns disposal — + // prefer SecretStore::get for a zeroizing SecretBytes. Ok(Some(v)) => Ok(v.expose_secret().to_vec()), Ok(None) => Err(KeyringError::NoEntry), Err(e) => Err(e.into()), @@ -921,6 +921,11 @@ impl CredentialStoreApi for EncryptedFileStore { STORE_ID.to_string() } + /// Build a credential for `(service, user)`. SPI-direct consumers: + /// format the returned [`KeyringError`] with `Display`, never `Debug` + /// — byte-bearing variants embed raw bytes in `Debug` (CWE-209/ + /// CWE-532). Prefer the typed + /// [`SecretStore`](crate::secrets::SecretStore) path. fn build( &self, service: &str, @@ -950,10 +955,9 @@ impl CredentialStoreApi for EncryptedFileStore { impl std::fmt::Debug for EncryptedFileStore { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - // `try_lock` rather than `lock_inner` so a Debug invoked from - // a panic path while the same thread already holds the state - // lock cannot deadlock or double-panic. Poison is folded into - // the same fallback display as contention. + // `try_lock` (not `lock_inner`) so a Debug from a panic path that + // already holds the lock cannot deadlock; poison folds into the + // same fallback as contention. let path: PathBuf = match self.inner.try_lock() { Ok(guard) => guard.path.clone(), Err(_) => PathBuf::from(""), @@ -964,14 +968,11 @@ impl std::fmt::Debug for EncryptedFileStore { } } -/// Project an entry-level `crypto::open` result into the typed -/// distinction the secret backend exposes. The verify-token has already -/// passed at every caller (open / get / rekey), so a -/// `SecretStoreError::Decrypt` here is corruption or tampering of the -/// individual entry — **not** a wrong passphrase. Logs the non-secret -/// `(wallet_id, label)` pair at error level (never the secret) and -/// maps to `SecretStoreError::Corruption`. Every other variant rides -/// through unchanged. +/// Map an entry-level `crypto::open` failure to the typed distinction. +/// The verify-token has already passed at every caller, so a `Decrypt` +/// here is entry corruption/tampering, NOT a wrong passphrase — logs the +/// non-secret `(wallet_id, label)` and maps to `Corruption`; other +/// variants pass through. fn entry_decrypt_or_corruption( wallet_hex: &str, label: &str, @@ -1000,17 +1001,37 @@ fn check_perms(meta: &fs::Metadata) -> Result<(), SecretStoreError> { Ok(()) } -// INTENTIONAL: Windows ACL read-check deferred to a follow-up PR — -// tracked at https://github.com/dashpay/platform/issues/3754. Vault -// file mode hardening on Windows requires GetSecurityInfo via -// `windows-acl` or `winapi`; out of scope for the secrets-feature -// landing. Operators on Windows MUST set ACLs manually until the -// follow-up lands. +// INTENTIONAL: Windows ACL read-check (needs GetSecurityInfo) deferred to +// https://github.com/dashpay/platform/issues/3754 — set ACLs manually. #[cfg(not(unix))] fn check_perms(_meta: &fs::Metadata) -> Result<(), SecretStoreError> { Ok(()) } +/// Refuse a group/other-WRITABLE vault parent (`mode & 0o022`). The +/// threat is rename/unlink/replace, which POSIX gates on directory WRITE, +/// so this targets write bits only — a 0o755 read-only parent leaks +/// filenames but not the 0600 contents and is the common layout. +/// `DirBuilder::mode` only hardens dirs this process creates, so a +/// pre-existing loose dir must still be checked here. +#[cfg(unix)] +fn check_parent_perms(parent: &Path) -> Result<(), SecretStoreError> { + use std::os::unix::fs::MetadataExt; + let meta = fs::metadata(parent).map_err(|e| SecretStoreError::io_at(parent, e))?; + let mode = meta.mode() & 0o777; + if mode & 0o022 != 0 { + return Err(SecretStoreError::InsecureParentDir { mode }); + } + Ok(()) +} + +// INTENTIONAL: Windows parent-dir ACL check deferred to the same +// follow-up as `check_perms` — https://github.com/dashpay/platform/issues/3754. +#[cfg(not(unix))] +fn check_parent_perms(_parent: &Path) -> Result<(), SecretStoreError> { + Ok(()) +} + #[cfg(unix)] fn set_restrictive_perms(f: &fs::File) -> Result<(), SecretStoreError> { use std::os::unix::fs::PermissionsExt; @@ -1018,12 +1039,8 @@ fn set_restrictive_perms(f: &fs::File) -> Result<(), SecretStoreError> { Ok(()) } -// INTENTIONAL: Windows ACL tightening deferred to the same follow-up -// as `check_perms` above — tracked at -// https://github.com/dashpay/platform/issues/3754. Vault file mode -// hardening on Windows requires SetSecurityInfo via `windows-acl` or -// `winapi`; out of scope for the secrets-feature landing. Operators on -// Windows MUST set ACLs manually until the follow-up lands. +// INTENTIONAL: Windows ACL tightening (needs SetSecurityInfo) deferred to +// https://github.com/dashpay/platform/issues/3754 — set ACLs manually. #[cfg(not(unix))] fn set_restrictive_perms(_f: &fs::File) -> Result<(), SecretStoreError> { Ok(()) @@ -1055,6 +1072,13 @@ mod tests { } fn vault_path(dir: &Path) -> PathBuf { + // Tighten the umask-0002 tempdir (0o775) to 0o700 so it passes the + // parent-dir perm check (dedicated perm tests use a subdir). + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + let _ = fs::set_permissions(dir, fs::Permissions::from_mode(0o700)); + } dir.join("vault.pwsvault") } @@ -1086,9 +1110,7 @@ mod tests { #[test] fn open_creates_vault_file_on_first_open() { - // Resident-vault model: open() creates a usable vault file even - // without any subsequent put, so a second open() of the same - // path observes a real on-disk file (modulo the lock). + // open() creates a usable vault file even without a put. let dir = tempfile::tempdir().unwrap(); let path = vault_path(dir.path()); { @@ -1135,10 +1157,8 @@ mod tests { #[test] fn open_acquires_exclusive_lock_until_drop() { - // Resident-vault model: a second open() of the same path while - // the first store is alive returns AlreadyLocked immediately - // (no retry, no wait). Once the first store drops the lock is - // released and a fresh open() succeeds. + // A second open() while the first store is alive returns + // AlreadyLocked; once it drops, a fresh open() succeeds. let dir = tempfile::tempdir().unwrap(); let path = vault_path(dir.path()); let s1 = store_at(&path); @@ -1455,6 +1475,174 @@ mod tests { ); } + /// The no-plaintext-at-rest guarantee also holds through the public + /// `SecretStore::set` path (which writes an unprotected envelope sealed + /// under the vault key), not just the raw SPI entry path. + #[test] + fn no_plaintext_in_vault_file_via_secret_store_set() { + use crate::secrets::SecretStore; + let dir = tempfile::tempdir().unwrap(); + let path = vault_path(dir.path()); + let store = SecretStore::file(&path, SecretString::new("pw-correct")).unwrap(); + store + .set( + &wid(1), + "seed", + &SecretBytes::from_slice(b"PLAINTEXTNEEDLE"), + ) + .unwrap(); + let raw = fs::read(&path).unwrap(); + assert!( + raw.windows(b"PLAINTEXTNEEDLE".len()) + .all(|w| w != b"PLAINTEXTNEEDLE"), + "plaintext leaked into vault file via SecretStore::set" + ); + } + + /// A blank passphrase is rejected at `open` → + /// `BlankPassphrase`; no vault file (or lock sidecar) is created. + #[test] + fn open_rejects_blank_passphrase() { + for blank in [ + SecretString::empty(), + SecretString::new(""), + SecretString::new(" "), + SecretString::new("\t\n"), + ] { + let dir = tempfile::tempdir().unwrap(); + let path = vault_path(dir.path()); + let err = EncryptedFileStore::open(&path, blank).unwrap_err(); + assert!( + matches!(err, SecretStoreError::BlankPassphrase), + "blank passphrase must be rejected, got {err:?}" + ); + assert!(!path.exists(), "no vault file for a blank passphrase"); + assert!( + !lock_path_for(&path).exists(), + "no lock sidecar for a blank passphrase" + ); + } + } + + /// A blank passphrase is rejected at `rekey`; the resident + /// vault, key, and on-disk file are UNCHANGED — the original passphrase + /// still reads every entry, live and after reopen. + #[test] + fn rekey_rejects_blank_passphrase_vault_unchanged() { + let dir = tempfile::tempdir().unwrap(); + let path = vault_path(dir.path()); + let s = store_at(&path); // real "pw-correct" + entry(&s, wid(1), "seed").set_secret(b"v1").unwrap(); + for blank in [SecretString::empty(), SecretString::new(" ")] { + let err = s.rekey(blank).unwrap_err(); + assert!( + matches!(err, SecretStoreError::BlankPassphrase), + "blank rekey must be rejected, got {err:?}" + ); + } + // Old passphrase still reads the entry, live… + assert_eq!(entry(&s, wid(1), "seed").get_secret().unwrap(), b"v1"); + // …and after a clean reopen under the original passphrase. + drop(s); + let s2 = store_at(&path); + assert_eq!(entry(&s2, wid(1), "seed").get_secret().unwrap(), b"v1"); + } + + /// `open_unprotected` permits a deliberate keyless vault that + /// round-trips; a real-passphrase `open` of that keyless vault then + /// fails with `WrongPassphrase` (it is keyless, not real-pass). + #[test] + fn open_unprotected_permits_keyless_vault() { + let dir = tempfile::tempdir().unwrap(); + let path = vault_path(dir.path()); + { + let s = EncryptedFileStore::open_unprotected(&path).unwrap(); + entry(&s, wid(1), "seed") + .set_secret(b"keyless-seed") + .unwrap(); + } + { + let s = EncryptedFileStore::open_unprotected(&path).unwrap(); + assert_eq!( + entry(&s, wid(1), "seed").get_secret().unwrap(), + b"keyless-seed" + ); + } + let err = EncryptedFileStore::open(&path, SecretString::new("real")).unwrap_err(); + assert!( + matches!(err, SecretStoreError::WrongPassphrase), + "real-pass open of a keyless vault must fail, got {err:?}" + ); + } + + /// Empty→real passphrase migration via `rekey`. After rekey, + /// `open(real)` reads every entry; the keyless door no longer opens it; + /// no `.bak`/`.tmp` residue beside the vault. + #[test] + fn empty_to_real_rekey_migration() { + let dir = tempfile::tempdir().unwrap(); + let path = vault_path(dir.path()); + { + let s = EncryptedFileStore::open_unprotected(&path).unwrap(); + entry(&s, wid(1), "seed").set_secret(b"migrate-me").unwrap(); + s.rekey(SecretString::new("real-pass")).unwrap(); + // The live handle keeps working post-rekey. + assert_eq!( + entry(&s, wid(1), "seed").get_secret().unwrap(), + b"migrate-me" + ); + } + // Reopen under the real passphrase reads the entry. + { + let s = EncryptedFileStore::open(&path, SecretString::new("real-pass")).unwrap(); + assert_eq!( + entry(&s, wid(1), "seed").get_secret().unwrap(), + b"migrate-me" + ); + } + // The keyless door no longer opens it. + let err = EncryptedFileStore::open_unprotected(&path).unwrap_err(); + assert!( + matches!(err, SecretStoreError::WrongPassphrase), + "keyless open after migration must fail, got {err:?}" + ); + // No .bak / .tmp residue (mirrors rekey_reencrypts_and_old_passphrase_fails). + for sibling in fs::read_dir(dir.path()).unwrap().flatten() { + let name = sibling.file_name(); + let name = name.to_string_lossy(); + assert!( + !name.ends_with(".bak") && !name.ends_with(".tmp"), + "unexpected residue: {name}" + ); + } + } + + /// Crash-safety: a disk-write failure mid-rekey leaves the + /// pre-rekey keyless vault intact and readable via `open_unprotected` + /// (mirrors rekey_does_not_corrupt_on_disk_temp_failure). + #[cfg(unix)] + #[test] + fn empty_to_real_rekey_crash_safe_stays_keyless() { + use std::os::unix::fs::PermissionsExt; + let dir = tempfile::tempdir().unwrap(); + let path = vault_path(dir.path()); + let s = EncryptedFileStore::open_unprotected(&path).unwrap(); + entry(&s, wid(1), "seed").set_secret(b"keyless").unwrap(); + + // Read-only parent → the rekey atomic temp-write fails. + fs::set_permissions(dir.path(), fs::Permissions::from_mode(0o500)).unwrap(); + let err = s.rekey(SecretString::new("real-pass")).unwrap_err(); + assert!(matches!(err, SecretStoreError::Io(_)), "got {err:?}"); + fs::set_permissions(dir.path(), fs::Permissions::from_mode(0o700)).unwrap(); + + // The live handle still serves the pre-rekey keyless vault… + assert_eq!(entry(&s, wid(1), "seed").get_secret().unwrap(), b"keyless"); + // …and on disk it is still the keyless vault. + drop(s); + let s2 = EncryptedFileStore::open_unprotected(&path).unwrap(); + assert_eq!(entry(&s2, wid(1), "seed").get_secret().unwrap(), b"keyless"); + } + #[test] fn build_rejects_malformed_service() { let dir = tempfile::tempdir().unwrap(); @@ -1552,13 +1740,9 @@ mod tests { #[test] fn inflated_kdf_params_fail_open_with_kdf_failure() { - // A vault whose JSON declares m_kib = u32::MAX must be refused - // at open() with KdfFailure — before the verify-token is - // derived and without the ~4 TiB allocation the inflated param - // would demand. Under the resident-vault model this surfaces at - // open() rather than on first get(). Drop the store BEFORE - // patching the on-disk file so the drop-time sync cannot - // overwrite our injected corruption. + // A JSON m_kib = u32::MAX must be refused at open() with + // KdfFailure, before the ~4 TiB allocation it would demand. Drop + // the store before patching disk so the drop-sync can't undo it. let dir = tempfile::tempdir().unwrap(); let path = vault_path(dir.path()); { @@ -1613,14 +1797,10 @@ mod tests { ); } - /// Two threads share a cloned [`EncryptedFileStore`] handle (same - /// shape [`EncryptedFileCredential`] uses internally): one hammers - /// `put_bytes` + `get_bytes`, the other calls `rekey`. The single - /// state lock serializes every put/get/rekey, so a put can never - /// capture an old key and insert under a newly-swapped vault — every - /// `get` returns either the right plaintext, `Ok(None)`, or a clean - /// typed error, NEVER garbled bytes from a mis-keyed seal (which - /// would surface as `Corruption`). + /// Two threads share a cloned handle: one hammers put/get, the other + /// rekeys. The single state lock serializes them, so a `get` only ever + /// returns the right plaintext, `Ok(None)`, or a clean typed error — + /// never garbled bytes from a put that sealed under a swapped key. #[test] fn rekey_does_not_race_put_into_corruption() { let dir = tempfile::tempdir().unwrap(); @@ -1629,19 +1809,15 @@ mod tests { let writer_store = store.clone(); let rekeyer_store = store.clone(); - // Iteration counts are tuned for cost: every rekey runs Argon2 - // at the default-target params, so the rekey loop dominates - // wall-clock. 16 rekeys overlap a 200-iter put loop reliably - // enough to hit the pre-fix race window on the test runner - // without dragging the suite out. + // Counts tuned for cost: each rekey runs Argon2, so 16 rekeys + // overlapping a 200-iter put loop hits the race window affordably. const PUT_ITERS: usize = 200; const REKEY_ITERS: usize = 16; let wallet = wid(7); let label = "racy"; - // A fixed-prefix payload byte vector — never built with - // `format!` so the in-source secrets-guard scanner does not - // flag this test as a sink/expose_secret pairing. + // Fixed prefix, never built with `format!` so the secrets-guard + // scanner does not flag this as a sink/expose_secret pairing. const PREFIX: &[u8] = b"payload-"; let writer = std::thread::spawn(move || { let mut buf = Vec::with_capacity(PREFIX.len() + 4); @@ -1654,9 +1830,8 @@ mod tests { .expect("put"); match writer_store.get_bytes(&wallet, label) { Ok(Some(bytes)) => { - // Must be one of OUR payloads — never random - // bytes from a mis-keyed seal. Compare only - // length + prefix; never log the bytes. + // Must be one of OUR payloads, never mis-keyed + // garbage. Check length + prefix; never log bytes. let got = bytes.expose_secret(); assert!(got.starts_with(PREFIX), "garbled get-after-put"); assert_eq!(got.len(), PREFIX.len() + 4); @@ -1668,10 +1843,8 @@ mod tests { }); let rekeyer = std::thread::spawn(move || { - // Alternate two passphrases so consecutive rekeys actually - // change the resident key (the salt rerolls regardless, but - // alternating distinct passphrases is the operator-facing - // model and keeps the race window real). + // Alternate passphrases so consecutive rekeys change the + // resident key, keeping the race window real. let passphrases = ["pw-A", "pw-B"]; for i in 0..REKEY_ITERS { rekeyer_store @@ -1684,20 +1857,23 @@ mod tests { rekeyer.join().expect("rekeyer thread"); } - /// A bare relative filename makes `Path::parent()` return `Some("")`, - /// which `NamedTempFile::new_in("")` and the Unix parent-dir fsync - /// both reject; the `normalized_parent` helper rewrites the empty - /// parent to ".". Switch cwd to a temp dir for the test scope so we - /// exercise the bare-filename path without scribbling in the - /// workspace. + /// A bare filename makes `Path::parent()` return `Some("")`, which + /// `normalized_parent` rewrites to "."; exercise that path in a temp + /// cwd so nothing lands in the workspace. #[test] fn open_and_put_with_bare_filename_uses_cwd() { - // A static mutex serializes cwd-changing tests so they cannot - // race each other across the suite. + // Serialize cwd-changing tests so they cannot race each other. static CWD_GUARD: std::sync::Mutex<()> = std::sync::Mutex::new(()); let _g = CWD_GUARD.lock().unwrap_or_else(|p| p.into_inner()); let dir = tempfile::tempdir().unwrap(); + // Tighten the cwd-parent so the parent-dir perm check passes (a + // umask-0002 tempdir is group-writable at 0o775). + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + fs::set_permissions(dir.path(), fs::Permissions::from_mode(0o700)).unwrap(); + } let prior = std::env::current_dir().unwrap(); std::env::set_current_dir(dir.path()).unwrap(); // Tear-down guard so a panic still restores cwd. @@ -1720,9 +1896,8 @@ mod tests { assert!(dir.path().join("vault.pwsvault").exists()); } - /// The lock sidecar must refuse to traverse a pre-existing symlink - /// at the lock path on Unix. Without `O_NOFOLLOW` an attacker could - /// redirect the lock file's open to an unrelated inode. + /// The lock-sidecar open must refuse a pre-existing symlink + /// (`O_NOFOLLOW`) so an attacker can't redirect it to another inode. #[cfg(unix)] #[test] fn vault_lock_rejects_symlink_at_lock_path() { @@ -1731,9 +1906,7 @@ mod tests { let dir = tempfile::tempdir().unwrap(); let path = vault_path(dir.path()); let lock = lock_path_for(&path); - // Point the lock path at /dev/null. Any successful open of the - // symlink would land on /dev/null's inode; O_NOFOLLOW makes the - // open itself fail with ELOOP. + // O_NOFOLLOW makes the open of this symlink fail with ELOOP. symlink("/dev/null", &lock).unwrap(); let err = EncryptedFileStore::open(&path, SecretString::new("pw-correct")) @@ -1743,4 +1916,153 @@ mod tests { "expected an Io error from O_NOFOLLOW refusal, got {err:?}" ); } + + /// A group/other-WRITABLE parent is refused at open (it would let a + /// peer rename/replace the vault despite its 0600); a read-only 0o750 + /// parent is fine. + #[cfg(unix)] + #[test] + fn writable_parent_dir_is_refused() { + use std::os::unix::fs::PermissionsExt; + let dir = tempfile::tempdir().unwrap(); + let sub = dir.path().join("vaultdir"); + fs::create_dir(&sub).unwrap(); + // Group-writable (0o770) trips the write-bit check. Build the path + // directly — `vault_path` would tighten the dir back to 0o700. + fs::set_permissions(&sub, fs::Permissions::from_mode(0o770)).unwrap(); + let path = sub.join("vault.pwsvault"); + let err = EncryptedFileStore::open(&path, SecretString::new("pw-correct")) + .expect_err("writable parent dir must be refused"); + assert!( + matches!(err, SecretStoreError::InsecureParentDir { mode } if mode & 0o022 != 0), + "got {err:?}" + ); + // Dropping the write bits (still group-readable at 0o750) lets the + // open succeed: read-only group access is not a rename threat. + fs::set_permissions(&sub, fs::Permissions::from_mode(0o750)).unwrap(); + let _s = store_at(&path); + } + + /// An oversized secret is rejected at the write boundary with + /// `SecretTooLarge`, and the vault stays openable — the per-secret + /// cap prevents the shared document from being inflated past the + /// read-side ceiling. + #[test] + fn oversized_secret_rejected_and_vault_stays_openable() { + let dir = tempfile::tempdir().unwrap(); + let path = vault_path(dir.path()); + { + let s = store_at(&path); + entry(&s, wid(1), "ok").set_secret(b"small").unwrap(); + let too_big = vec![0xABu8; MAX_SECRET_LEN + 1]; + let err = entry(&s, wid(1), "huge").set_secret(&too_big).unwrap_err(); + // Surfaces through the SPI as BadStoreFormat carrying the + // secret-free SecretTooLarge message. + assert!( + matches!(&err, KeyringError::BadStoreFormat(m) + if *m == SecretStoreError::SecretTooLarge { + found: MAX_SECRET_LEN + 1, + max: MAX_SECRET_LEN, + }.to_string()), + "got {err:?}" + ); + // The earlier good entry is still readable on this handle. + assert_eq!(entry(&s, wid(1), "ok").get_secret().unwrap(), b"small"); + } + // The vault reopens cleanly — the oversized put never landed. + let s2 = store_at(&path); + assert_eq!(entry(&s2, wid(1), "ok").get_secret().unwrap(), b"small"); + assert!(matches!( + entry(&s2, wid(1), "huge").get_secret(), + Err(KeyringError::NoEntry) + )); + } + + /// An in-bounds KDF-param shift on a correct-passphrase vault is + /// rejected at open with `WrongPassphrase` — driven by the changed + /// DERIVED KEY, not the AAD binding (which `verify_aad_binds_salt_and_ + /// kdf_params` covers). + #[test] + fn header_tamper_kdf_shift_smoke_test() { + let dir = tempfile::tempdir().unwrap(); + let path = vault_path(dir.path()); + { + let s = store_at(&path); + entry(&s, wid(1), "seed").set_secret(b"value").unwrap(); + } + let mut vault = read_vault_at(&path).unwrap().unwrap(); + // Shift to still-valid-but-weaker params (defaults are 64 MiB/t=3; + // halving m_kib and dropping t stays above the 19 MiB / t=2 floor). + vault.kdf.m_kib /= 2; + vault.kdf.t -= 1; + assert!( + vault.kdf.enforce_bounds().is_ok(), + "shift must stay in bounds" + ); + write_vault_at(&path, &vault).unwrap(); + let err = EncryptedFileStore::open(&path, SecretString::new("pw-correct")) + .expect_err("KDF-param shift must fail the verify-token"); + assert!( + matches!(err, SecretStoreError::WrongPassphrase), + "got {err:?}" + ); + } + + /// A flipped salt byte on a correct-passphrase vault is rejected at + /// open with `WrongPassphrase` — driven by the changed DERIVED KEY + /// (salt feeds the KDF), not the AAD binding. + #[test] + fn header_tamper_flipped_salt_smoke_test() { + let dir = tempfile::tempdir().unwrap(); + let path = vault_path(dir.path()); + { + let s = store_at(&path); + entry(&s, wid(1), "seed").set_secret(b"value").unwrap(); + } + let mut vault = read_vault_at(&path).unwrap().unwrap(); + vault.salt[0] ^= 0x01; + write_vault_at(&path, &vault).unwrap(); + let err = EncryptedFileStore::open(&path, SecretString::new("pw-correct")) + .expect_err("flipped salt must fail open"); + assert!( + matches!(err, SecretStoreError::WrongPassphrase), + "got {err:?}" + ); + } + + /// A flipped entry NONCE byte (verify-token intact) surfaces as + /// `Corruption`: the per-entry AEAD-open fails its tag under the + /// correct key, mirroring the ciphertext-flip route. + #[test] + fn flipped_entry_nonce_is_corruption() { + let dir = tempfile::tempdir().unwrap(); + let path = vault_path(dir.path()); + let s = store_at(&path); + entry(&s, wid(1), "seed").set_secret(b"value").unwrap(); + let mut vault = s.test_read_vault_from_disk().unwrap().unwrap(); + vault + .wallets + .get_mut(&wid(1).to_hex()) + .unwrap() + .get_mut("seed") + .unwrap() + .nonce[0] ^= 0x01; + s.test_write_vault_to_disk(&vault).unwrap(); + s.test_reload_from_disk().unwrap(); + let err = entry(&s, wid(1), "seed").get_secret().unwrap_err(); + assert!(is_corruption(&err), "unexpected error: {err:?}"); + } + + /// A secret exactly at the cap is accepted (boundary is inclusive). + #[test] + fn secret_exactly_at_cap_is_accepted() { + let dir = tempfile::tempdir().unwrap(); + let s = store_at(&vault_path(dir.path())); + let at_cap = vec![0x5Au8; MAX_SECRET_LEN]; + entry(&s, wid(1), "atcap").set_secret(&at_cap).unwrap(); + assert_eq!( + entry(&s, wid(1), "atcap").get_secret().unwrap().len(), + MAX_SECRET_LEN + ); + } } diff --git a/packages/rs-platform-wallet-storage/src/secrets/keyring.rs b/packages/rs-platform-wallet-storage/src/secrets/keyring.rs index 618706276e3..4d58d407656 100644 --- a/packages/rs-platform-wallet-storage/src/secrets/keyring.rs +++ b/packages/rs-platform-wallet-storage/src/secrets/keyring.rs @@ -1,57 +1,49 @@ //! OS-keyring construction helper. //! -//! Built on `keyring-core 1.0.0` (the SPI library) plus the -//! per-platform credential-store crates; the `keyring` 4.x sample CLI -//! crate itself is intentionally not a dependency. +//! Built on `keyring-core 1.0.0` (the SPI) plus the per-platform +//! credential-store crates; the `keyring` 4.x sample CLI crate is +//! deliberately not a dependency. There is no crate-local wrapper — +//! [`default_credential_store`]'s return value is used directly via +//! [`keyring_core::api::CredentialStoreApi`] or installed as the process +//! default via [`keyring_core::set_default_store`]. //! -//! There is no crate-local wrapper around the per-platform store: a -//! caller takes [`default_credential_store`]'s return value and either -//! uses it directly via [`keyring_core::api::CredentialStoreApi`] or -//! installs it as the process default via -//! [`keyring_core::set_default_store`]. +//! Threat coverage: protects **A1** (other local user) and **A4** (lost +//! laptop) where the platform encrypts items at rest and scopes them to +//! the user. Does **not** cover **A2/A3** same-user malware (most OS +//! keyrings hand the secret to any same-user process) or **A5** keyring +//! scraping; headless Linux without Secret Service fails closed +//! ([`keyring_core::Error::NoDefaultStore`]), never degrading to plaintext. //! -//! ## Threat coverage +//! Metadata is enumerable plaintext: entries are keyed by `service = +//! SERVICE_PREFIX + hex(wallet_id)` and `user = label`, stored as +//! plaintext keyring metadata. Same-user list-only tooling can enumerate +//! which wallet ids and slot kinds exist without unlocking a secret — +//! dominated by the accepted A2/A3 residual, with no portable knob to +//! redact it. Operators wanting metadata hiding should prefer +//! [`EncryptedFileStore`](super::EncryptedFileStore), whose +//! `(wallet_id, label)` map lives only inside the sealed vault. //! -//! Covers **A1** (other local user) and **A4** (lost laptop) where the -//! platform encrypts keyring items at rest and scopes them to the user. -//! Does **not** cover **A2/A3** same-user malware (most OS keyrings -//! hand the secret to any same-user process that asks), **A5** if the -//! keyring daemon itself is scraped, or **headless Linux** with no -//! Secret Service — that fails closed -//! ([`keyring_core::Error::NoDefaultStore`]), never degrades to -//! plaintext. -//! -//! ### Per-OS reality -//! -//! - **Linux/FreeBSD:** Secret Service (gnome-keyring / KWallet) is the -//! sole backend. It requires a D-Bus session + unlocked collection; -//! headless / SSH / CI boxes frequently lack it, in which case the -//! store fails closed with `NoDefaultStore` and the operator selects -//! [`EncryptedFileStore`](super::EncryptedFileStore) explicitly. -//! Items persist `UntilDelete`. Callers that need durable storage on -//! a headless host should pin -//! [`EncryptedFileStore`](super::EncryptedFileStore) instead. -//! - **macOS:** Keychain ACL — a re-signed binary with the same -//! code-signing identity is an accepted residual risk. Items persist -//! `UntilDelete`. -//! - **Windows:** Credential Manager / DPAPI is user-profile scoped; a -//! same-user process can unprotect it. DPAPI is **not** a defense -//! against same-user malware, only A1/A4. Items persist -//! `UntilDelete`. +//! Per-OS: items persist `UntilDelete` everywhere. Linux/FreeBSD use +//! Secret Service (gnome-keyring / KWallet), which needs a D-Bus session +//! and an unlocked collection. macOS Keychain ACL accepts a re-signed +//! binary with the same code-signing identity (residual). Windows DPAPI +//! is user-profile scoped and defends A1/A4 only, not same-user malware. use std::sync::Arc; use keyring_core::api::CredentialStoreApi; use keyring_core::Error as KeyringError; -/// Open the platform's default credential store, failing closed -/// (typed [`KeyringError::NoDefaultStore`]) when none is reachable -/// (headless / no Secret Service / no D-Bus). Never panics, never -/// falls back to a weaker store. +/// Open the platform's default credential store, failing closed (typed +/// [`KeyringError::NoDefaultStore`]) when none is reachable (headless / no +/// Secret Service / no D-Bus). Never panics, never falls back to a weaker +/// store. The returned `Arc` works with +/// [`keyring_core::set_default_store`] or builds entries directly. /// -/// The returned `Arc` may be passed straight to -/// [`keyring_core::set_default_store`] or used directly to build -/// entries. +/// SPI-direct consumers: format the returned [`KeyringError`] with +/// `Display` (`{}`), **never** `Debug` — upstream `BadEncoding` / +/// `BadDataFormat` variants embed raw bytes in `Debug` (CWE-209/CWE-532). +/// The typed [`SecretStore`](super::SecretStore) path avoids the SPI error. pub fn default_credential_store() -> Result, KeyringError> { platform_default_store() @@ -59,10 +51,8 @@ pub fn default_credential_store() -> Result Result, KeyringError> { - // Secret Service (gnome-keyring / KWallet) is the only OS backend. - // No reachable D-Bus session / unlocked collection (headless, SSH, - // CI) is fail-closed by design — the operator selects - // EncryptedFileStore explicitly instead. + // Secret Service is the only backend; an unreachable D-Bus session + // (headless / SSH / CI) is fail-closed by design. match dbus_secret_service_keyring_store::Store::new() { Ok(s) => Ok(s), Err(_) => Err(KeyringError::NoDefaultStore), @@ -71,11 +61,8 @@ fn platform_default_store() -> Result, #[cfg(target_os = "macos")] fn platform_default_store() -> Result, KeyringError> { - // `apple-native-keyring-store` >= 1.0 with the `keychain` feature - // exposes `Store` under the `keychain` module, not at the crate - // root (sibling backends — `dbus-secret-service-keyring-store`, - // `windows-native-keyring-store` — do put `Store` at the root, hence - // the asymmetric path). + // `apple-native-keyring-store` >= 1.0 exposes `Store` under the + // `keychain` module, not the crate root like the sibling backends. match apple_native_keyring_store::keychain::Store::new() { Ok(s) => Ok(s), Err(_) => Err(KeyringError::NoDefaultStore), diff --git a/packages/rs-platform-wallet-storage/src/secrets/mod.rs b/packages/rs-platform-wallet-storage/src/secrets/mod.rs index f44699f6ae7..4161e423006 100644 --- a/packages/rs-platform-wallet-storage/src/secrets/mod.rs +++ b/packages/rs-platform-wallet-storage/src/secrets/mod.rs @@ -1,55 +1,28 @@ //! Out-of-band storage for wallet secret material (mnemonic / seed / //! xpriv), kept entirely off the SQLite persister's data path. //! -//! # Consumer entry point: [`SecretStore`] -//! -//! [`SecretStore`] is the public, never-leaking front door. Its read -//! path ([`SecretStore::get`]) yields a zeroizing [`SecretBytes`] — a raw -//! `Vec` never crosses this boundary — and its write path -//! ([`SecretStore::set`]) takes `&SecretBytes`, so a caller cannot pass an -//! unwrapped buffer. Errors surface as the typed [`SecretStoreError`], -//! losslessly for the file arm (`WrongPassphrase` vs `Corruption` vs -//! `AlreadyLocked` stay distinct). -//! -//! - [`SecretStore::file`] — Argon2id + XChaCha20-Poly1305 vault file. -//! Recommended on **headless / server** hosts; fully self-contained. -//! - [`SecretStore::os`] — the platform OS keyring, fail-closed on -//! headless Linux. Recommended on **desktop**. -//! -//! # Internal SPI -//! -//! Below `SecretStore`, the backend SPI is upstream's -//! [`keyring_core::api::CredentialStoreApi`] / [`CredentialApi`]. -//! [`EncryptedFileStore`] and [`default_credential_store`] expose that -//! SPI directly; their `keyring_core::Error` projection is **lossy and -//! string-only** (the typed distinction lives on the `SecretStore` path). -//! Consumers should prefer `SecretStore`. -//! -//! - [`SecretBytes`] / [`SecretString`] — zeroize-on-drop wrappers. -//! - [`SecretStoreError`] — the typed error returned by `SecretStore` -//! and both backends, projected into `keyring_core::Error` for the SPI. +//! Consumers use [`SecretStore`], the public never-leaking front door: +//! reads yield a zeroizing [`SecretBytes`] (a raw `Vec` never crosses +//! the boundary), writes take `&SecretBytes`, and errors are the typed +//! [`SecretStoreError`] (lossless on the file arm). Pick a backend +//! explicitly — [`SecretStore::file`] (Argon2id + XChaCha20-Poly1305 +//! vault, headless/server) or [`SecretStore::os`] (OS keyring, desktop; +//! fail-closed on headless Linux). There is no silent fallback. +//! +//! Below `SecretStore` the backend SPI is upstream's +//! [`keyring_core::api::CredentialStoreApi`] / [`CredentialApi`], exposed +//! directly by [`EncryptedFileStore`] / [`default_credential_store`]; +//! its `keyring_core::Error` projection is lossy and string-only, so +//! consumers should prefer `SecretStore`. //! //! [`CredentialApi`]: keyring_core::api::CredentialApi //! [`CredentialStoreApi`]: keyring_core::api::CredentialStoreApi //! -//! Everything secret-bearing lives under this `src/secrets/` tree by -//! design: `tests/secrets_scan.rs` scans only `src/sqlite/schema/` + -//! `migrations/` and exempts this module, so this module owns its own -//! review discipline (`tests/secrets_guard.rs`). -//! -//! # Memory hygiene -//! -//! At the SPI seam the upstream `get_secret` returns `Vec`; -//! [`SecretStore::get`] wraps it via [`SecretBytes::new`] **immediately** -//! (no named intermediate `Vec` binding) so the bare buffer's window is -//! zero statements: `SecretBytes::new` moves the `Vec` into a -//! `Zeroizing>` without copying. -//! -//! # Backend selection -//! -//! Selection is an explicit operator decision — there is no silent -//! fallback between the file vault and the OS keyring. +//! This `src/secrets/` tree is the sole secret-bearing module: +//! `tests/secrets_scan.rs` exempts it, so it owns its own review +//! discipline via `tests/secrets_guard.rs`. +mod envelope; mod error; mod file; mod keyring; @@ -57,9 +30,13 @@ mod secret; mod store; mod validate; +pub use envelope::MAX_PLAINTEXT_LEN; pub use error::{IoError, OsKeyringErrorKind, SecretStoreError}; -pub use file::{EncryptedFileCredential, EncryptedFileStore, MAX_VAULT_SIZE_BYTES, SERVICE_PREFIX}; +pub use file::{ + EncryptedFileCredential, EncryptedFileStore, MAX_SECRET_LEN, MAX_VAULT_SIZE_BYTES, + SERVICE_PREFIX, +}; pub use keyring::default_credential_store; -pub use secret::{SecretBytes, SecretString}; +pub use secret::{SecretBytes, SecretString, MIN_PASSPHRASE_LEN}; pub use store::SecretStore; pub use validate::WalletId; diff --git a/packages/rs-platform-wallet-storage/src/secrets/secret.rs b/packages/rs-platform-wallet-storage/src/secrets/secret.rs index 6a2a593af33..8e4d6dc4d8c 100644 --- a/packages/rs-platform-wallet-storage/src/secrets/secret.rs +++ b/packages/rs-platform-wallet-storage/src/secrets/secret.rs @@ -9,25 +9,32 @@ use std::fmt; use subtle::ConstantTimeEq; use zeroize::{Zeroize, Zeroizing}; -/// Pre-allocation capacity for [`SecretString`] buffers. -/// -/// `mlock` is page-granular, so a sub-page buffer locks a whole page -/// regardless; 4096 bytes also makes `String` reallocation (which -/// leaves an un-zeroed freed buffer the allocator owns) virtually -/// impossible for any human-entered passphrase or mnemonic. +/// Pre-allocation capacity for [`SecretString`] buffers. `mlock` is +/// page-granular (a sub-page buffer locks a whole page anyway), and 4096 +/// bytes makes a reallocation — which would leave an un-zeroed freed +/// buffer behind — virtually impossible for any human-entered secret. const DEFAULT_CAPACITY: usize = 4096; +/// Minimal post-trim length floor for a vault passphrase or a Tier-2 +/// object password, in bytes. A **coarse** guard only: `1` means "merely +/// non-blank" (the same outcome [`SecretString::is_blank`] enforces). +/// +/// The library deliberately ships **no** password-strength estimator. The +/// real entropy policy — zxcvbn-style strength, dictionary checks, UX +/// feedback — is locale- and threat-specific and therefore the +/// **consumer's** responsibility (documented in `SECRETS.md`). Baking a +/// fixed estimator into a storage crate would be both too weak for some +/// callers and too rigid for others. +pub const MIN_PASSPHRASE_LEN: usize = 1; + /// Zeroize-on-drop wrapper for secret UTF-8 strings (BIP-39 mnemonic, /// `EncryptedFileStore` passphrase). /// -/// `Display`, `Deref`, `DerefMut`, `Serialize`, `PartialEq`, `Eq` are -/// intentionally **not** implemented; read access is the explicit -/// [`expose_secret`] only, and equality goes through -/// [`subtle::ConstantTimeEq`] (`==` on secret bytes is forbidden, no -/// exception, so future bridge code cannot inherit a non-constant-time -/// path). `Debug` is redacted. `Zeroizing` -/// wipes the buffer over its full capacity on drop; the buffer is -/// best-effort `mlock`ed against swap. +/// Read access is [`expose_secret`] only; equality goes through +/// [`subtle::ConstantTimeEq`] (`==` is forbidden so bridge code cannot +/// inherit a non-constant-time path). `Display`/`Deref`/`Serialize`/`Eq` +/// are deliberately absent, `Debug` is redacted, and the buffer wipes +/// over its full capacity on drop and is best-effort `mlock`ed. /// /// [`expose_secret`]: SecretString::expose_secret /// @@ -38,9 +45,8 @@ const DEFAULT_CAPACITY: usize = 4096; /// let _ = a == b; // `==` on SecretString is forbidden; use ConstantTimeEq::ct_eq /// ``` pub struct SecretString { - // Field order is load-bearing: `inner` drops (and `Zeroizing` wipes - // it) before `_lock` releases the page, so the buffer is wiped while - // still mlock'ed. + // Field order is load-bearing: `inner` drops (Zeroizing wipes it) + // before `_lock` releases the page, so the wipe runs while mlock'ed. inner: Zeroizing, _lock: Option, } @@ -53,6 +59,8 @@ impl SecretString { let cap = source.len().max(DEFAULT_CAPACITY); let mut buf = String::with_capacity(cap); buf.push_str(&source); + // Do not remove: wipes the moved-in plaintext source before it drops + // (its freed buffer cannot be scanned in a test under deny(unsafe_code)). source.zeroize(); let lock = region::lock(buf.as_ptr(), buf.capacity()) .map_err(|e| { @@ -93,6 +101,18 @@ impl SecretString { pub fn trimmed(&self) -> Self { Self::new(self.inner.trim().to_string()) } + + /// Whether the secret is empty or all Unicode-whitespace. + /// + /// Returns only blank-ness — never a borrowed view of the plaintext — + /// and uses [`str::trim`] (the Unicode `White_Space` property), so a + /// NBSP (`U+00A0`) trims to blank but a ZWSP (`U+200B`, not + /// `White_Space`) does not. This is the enforcement primitive behind + /// the Tier-1 blank-passphrase guard and the Tier-2 blank-object- + /// password reject. Always available — **not** feature-gated. + pub fn is_blank(&self) -> bool { + self.inner.trim().is_empty() + } } impl Default for SecretString { @@ -120,9 +140,8 @@ impl fmt::Debug for SecretString { } impl ConstantTimeEq for SecretString { - /// Constant-time compare over the equal-length region. Unequal - /// lengths return `0` without revealing where they differ; the - /// only observable is the (non-secret) length difference. + /// Constant-time compare. Unequal lengths return `0` without + /// revealing where they differ; the only leak is the non-secret length. fn ct_eq(&self, other: &Self) -> subtle::Choice { self.expose_secret() .as_bytes() @@ -130,6 +149,14 @@ impl ConstantTimeEq for SecretString { } } +impl Zeroize for SecretString { + /// Wipe the buffer in place on a live value. `Drop` runs the same + /// wipe automatically; this lets a holder zeroize early. + fn zeroize(&mut self) { + self.inner.zeroize(); + } +} + impl From for SecretString { fn from(s: String) -> Self { Self::new(s) @@ -142,18 +169,95 @@ impl From<&str> for SecretString { } } +/// Deserialize a UTF-8 secret (a vault passphrase or a Tier-2 object +/// password arriving via config), routing the owned `String` through +/// [`SecretString::new`] — which zeroizes its source — so no +/// intermediate plaintext buffer **we own** lingers (CWE-316). +/// +/// Gated behind the dedicated, default-off `secret-serde` feature, NOT the +/// crate's internal `serde` dep (which `secrets` already pulls): the gate +/// is on the IMPL, so the impl is absent unless explicitly opted in, even +/// though `serde` itself is compiled. There is deliberately **no** +/// `Serialize` companion (a secret is read-from-config, never written +/// back / round-tripped / logged), so this type cannot leak out through +/// serde under any feature combination. +/// +/// **Residual (documented, not closeable here):** the deserializer's own +/// input buffer holds the cleartext before this visitor runs and is +/// outside `SecretString`'s ownership, so it cannot be wiped here — feed +/// secrets from a zeroizing source. Mirrors the Argon2 `Block` residual +/// noted at `crypto::derive_key`. +#[cfg(feature = "secret-serde")] +impl<'de> serde::Deserialize<'de> for SecretString { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + struct SecretStringVisitor; + + impl<'v> serde::de::Visitor<'v> for SecretStringVisitor { + type Value = SecretString; + + fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("a secret string") + } + + fn visit_str(self, v: &str) -> Result + where + E: serde::de::Error, + { + // Take ownership of the borrowed bytes, then hand the owned + // `String` to the zeroizing constructor below. + self.visit_string(v.to_owned()) + } + + fn visit_string(self, v: String) -> Result + where + E: serde::de::Error, + { + // `SecretString::new` zeroizes the moved-in `String`. + Ok(SecretString::new(v)) + } + } + + deserializer.deserialize_string(SecretStringVisitor) + } +} + +/// Render the JSON schema as a plain `string` carrying **no** length or +/// value policy: no `minLength`/`maxLength`/`pattern`/`format` (would leak +/// a length policy) and no `example`/`default` (would embed a value) +/// A short, value-free `description` marks sensitivity. +/// +/// Gated behind the default-off `secret-schemars` feature (which implies +/// `secret-serde`). Pulls in no `Serialize`/`Display` path. +#[cfg(feature = "secret-schemars")] +impl schemars::JsonSchema for SecretString { + fn schema_name() -> std::borrow::Cow<'static, str> { + std::borrow::Cow::Borrowed("SecretString") + } + + fn schema_id() -> std::borrow::Cow<'static, str> { + std::borrow::Cow::Borrowed("platform_wallet_storage::secrets::SecretString") + } + + fn json_schema(_generator: &mut schemars::SchemaGenerator) -> schemars::Schema { + schemars::json_schema!({ + "type": "string", + "description": "A secret string. Write-only: never serialized, never echoed." + }) + } +} + /// Zeroize-on-drop wrapper for secret **bytes**: BIP-32 seed -/// (`[u8; 64]`), xpriv, Argon2 output, AEAD key, decrypted plaintext, -/// ciphertext-in-flight. +/// (`[u8; 64]`), xpriv, Argon2 output, AEAD key, decrypted plaintext. /// -/// Not `Copy`; `Clone` is intentionally absent to enforce copy -/// minimization — move it, or `expose_secret()` and copy -/// deliberately into another wrapper. `Display`, `Deref`, `Serialize`, -/// `PartialEq`, `Eq` are intentionally **not** implemented; equality -/// goes through [`subtle::ConstantTimeEq`] only (`==` on secret bytes is -/// forbidden, no exception, so future bridge code cannot inherit a -/// non-constant-time path). `Debug` is redacted; the -/// buffer is wiped on drop and best-effort `mlock`ed. +/// `Clone` is absent to force deliberate copies (move it, or +/// `expose_secret()` into another wrapper). Equality goes through +/// [`subtle::ConstantTimeEq`] only (`==` is forbidden so bridge code +/// cannot inherit a non-constant-time path). `Display`/`Deref`/`Serialize` +/// /`Eq` are absent, `Debug` is redacted, and the buffer wipes on drop +/// and is best-effort `mlock`ed. /// /// ```compile_fail /// use platform_wallet_storage::secrets::SecretBytes; @@ -162,9 +266,8 @@ impl From<&str> for SecretString { /// let _ = a == b; // `==` on SecretBytes is forbidden; use ConstantTimeEq::ct_eq /// ``` pub struct SecretBytes { - // Field order is load-bearing: `inner` drops (and `Zeroizing` wipes - // it) before `_lock` releases the page, so the buffer is wiped while - // still mlock'ed. + // Field order is load-bearing: `inner` drops (Zeroizing wipes it) + // before `_lock` releases the page, so the wipe runs while mlock'ed. inner: Zeroizing>, _lock: Option, } @@ -173,8 +276,8 @@ impl SecretBytes { /// Wrap a byte vector, moving it into the wrapper and best-effort /// `mlock`ing the buffer. pub fn new(bytes: Vec) -> Self { - // Lock only a non-empty allocation: an empty `Vec`'s `as_ptr()` - // is dangling, and `region::lock` rejects a 0-length region. + // Skip an empty allocation: an empty `Vec`'s `as_ptr()` is + // dangling and `region::lock` rejects a 0-length region. let lock = if bytes.capacity() > 0 { region::lock(bytes.as_ptr(), bytes.capacity()) .map_err(|e| { @@ -187,9 +290,6 @@ impl SecretBytes { } else { None }; - // The move transfers ownership of the allocation into - // `Zeroizing`; the source buffer is not copied, so there is - // nothing left behind to wipe. Self { inner: Zeroizing::new(bytes), _lock: lock, @@ -230,15 +330,22 @@ impl SecretBytes { } impl ConstantTimeEq for SecretBytes { - /// Fixed-width constant-time compare over the byte region — no - /// length early-return. `subtle::ConstantTimeEq` on - /// unequal-length slices yields `0` without leaking *where* they - /// differ; the only observable is the (non-secret) length. + /// Constant-time compare, no length early-return. Unequal lengths + /// yield `0` without leaking *where* they differ; only the non-secret + /// length is observable. fn ct_eq(&self, other: &Self) -> subtle::Choice { self.inner.as_slice().ct_eq(other.inner.as_slice()) } } +impl Zeroize for SecretBytes { + /// Wipe the buffer in place on a live value. `Drop` runs the same + /// wipe automatically; this lets a holder zeroize early. + fn zeroize(&mut self) { + self.inner.zeroize(); + } +} + impl fmt::Debug for SecretBytes { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "SecretBytes([REDACTED; {}])", self.inner.len()) @@ -264,6 +371,21 @@ mod tests { assert_eq!(s.trimmed().expose_secret(), "abandon ability"); } + /// Two sound checks (a direct freed-buffer scan would be use-after-free, + /// and this crate forbids `unsafe`): (1) `String::zeroize` empties a + /// buffer — the primitive `new` relies on; (2) `new` copies the content + /// into the wrapper faithfully. That `new` actually calls + /// `source.zeroize()` on its moved-in source is pinned by the + /// do-not-remove comment at that call site, not asserted here. + #[test] + fn secret_string_new_zeroizes_string_source() { + let mut source = String::from("super secret seed material"); + source.zeroize(); + assert!(source.is_empty(), "String::zeroize must empty the source"); + let s = SecretString::new(String::from("super secret seed material")); + assert_eq!(s.expose_secret(), "super secret seed material"); + } + #[test] fn secret_string_ct_eq_is_value_based() { // Equality goes through `ConstantTimeEq` only. @@ -281,6 +403,112 @@ mod tests { assert_eq!(SecretString::default().len(), 0); } + /// `is_blank()` truth table. The boundary deliberately + /// exercises Unicode whitespace — `str::trim` uses the `White_Space` + /// property, so NBSP (`U+00A0`) trims to blank but ZWSP (`U+200B`, + /// not `White_Space`) does not. + #[test] + fn is_blank_truth_table() { + // Blank inputs. + assert!(SecretString::empty().is_blank()); + assert!(SecretString::new("").is_blank()); + assert!(SecretString::new(" ").is_blank()); + assert!(SecretString::new("\t\r\n ").is_blank()); + assert!( + SecretString::new("\u{00A0}").is_blank(), + "NBSP is White_Space" + ); + // Non-blank inputs. + assert!(!SecretString::new("pw").is_blank()); + assert!(!SecretString::new(" pw ").is_blank()); + assert!( + !SecretString::new("\u{200B}").is_blank(), + "ZWSP is NOT White_Space" + ); + } + + /// `is_blank` returns a `bool` and exposes no borrowed + /// plaintext, callable with only `secrets` (no serde/schemars). + #[test] + fn is_blank_signature_returns_bool_no_borrow() { + let f: fn(&SecretString) -> bool = SecretString::is_blank; + assert!(f(&SecretString::new(""))); + assert!(!f(&SecretString::new("x"))); + } + + /// `SecretString` must never implement + /// `Serialize` or `Display`, even with serde compiled in. This is a + /// compile-time `!impl` assertion — adding either impl breaks the + /// build. `serde::Serialize` is nameable here because `secrets` always + /// pulls the `serde` dep. + #[test] + fn secret_string_has_no_serialize_no_display() { + static_assertions::assert_not_impl_any!(SecretString: serde::Serialize, std::fmt::Display); + } + + /// Regression: the `serde` DEP is on under + /// `secrets`, yet the `Deserialize` IMPL stays ABSENT because it is + /// gated on the dedicated `secret-serde` feature — proving the + /// default-off gate is satisfiable even while serde is compiled. + #[cfg(not(feature = "secret-serde"))] + #[test] + fn deserialize_absent_without_secret_serde_even_though_serde_dep_on() { + static_assertions::assert_not_impl_any!( + SecretString: serde::de::DeserializeOwned + ); + } + + /// With `secret-serde` on, the `Deserialize` impl is + /// present (and `Serialize` is still absent — see the always-on test). + #[cfg(feature = "secret-serde")] + #[test] + fn deserialize_present_with_secret_serde() { + static_assertions::assert_impl_all!(SecretString: serde::de::DeserializeOwned); + static_assertions::assert_not_impl_any!(SecretString: serde::Serialize); + } + + /// `Deserialize` round-trips the value through the + /// zeroizing constructor; the result `ct_eq`s a directly-built secret + /// and has the right length. + #[cfg(feature = "secret-serde")] + #[test] + fn deserialize_routes_value_through_zeroizing_constructor() { + let s: SecretString = serde_json::from_str("\"correct horse battery staple\"").unwrap(); + assert!(bool::from( + s.ct_eq(&SecretString::new("correct horse battery staple")) + )); + assert_eq!(s.len(), 28); + } + + /// `JsonSchema` renders a plain `string` and leaks no + /// length/value policy — no `minLength`/`maxLength`/`pattern`/`format`, + /// no `example`/`default`/`enum`. + #[cfg(feature = "secret-schemars")] + #[test] + fn json_schema_is_plain_string_no_policy_leak() { + let schema = schemars::schema_for!(SecretString); + let v = serde_json::to_value(&schema).unwrap(); + assert_eq!(v["type"], serde_json::json!("string")); + for forbidden in [ + "minLength", + "maxLength", + "pattern", + "format", + "example", + "default", + "enum", + ] { + assert!( + v.get(forbidden).is_none(), + "schema leaked `{forbidden}`: {v}" + ); + } + // Any description present must carry no example/secret value. + if let Some(desc) = v.get("description").and_then(|d| d.as_str()) { + assert!(!desc.contains("horse")); + } + } + #[test] fn secret_bytes_debug_redacted() { let b = SecretBytes::from_slice(&[1, 2, 3, 4, 5]); @@ -301,8 +529,7 @@ mod tests { #[test] fn empty_secret_bytes_constructs_without_mlocking_dangling_ptr() { // A capacity-0 `Vec` has a dangling `as_ptr()`; `new` must not - // pass it to `region::lock`. Constructing must not panic and the - // wrapper must round-trip as empty. + // pass it to `region::lock` or panic. let b = SecretBytes::new(Vec::new()); assert!(b.is_empty()); assert_eq!(b.len(), 0); @@ -336,53 +563,31 @@ mod tests { assert!(std::mem::needs_drop::()); }; - /// Best-effort runtime check that `Drop` wipes the full `SecretString` - /// capacity. Reads freed memory — UB in the strict sense, flaky under - /// parallelism; run single-threaded: - /// `cargo test --features secrets -- secret_string_drop_zeroes --ignored --test-threads=1` + /// Proves zeroize wipes the buffer. Every read is on a STILL-LIVE + /// value (no post-free deref / UB); the in-place slice wipe also + /// proves the bytes go to zero with the length preserved. #[test] - #[ignore] - fn secret_string_drop_zeroes_full_capacity() { - let ptr: *const u8; - let cap: usize; - { - let s = SecretString::new("sensitive_seed_material"); - ptr = s.inner.as_ptr(); - cap = s.inner.capacity(); - // SAFETY: live allocation, read for `cap` bytes pre-drop. - #[allow(unsafe_code)] - let pre = unsafe { std::slice::from_raw_parts(ptr, cap) }; - assert!(pre.iter().any(|&b| b != 0)); - } - // SAFETY: best-effort post-free read; single-thread makes page - // reuse before this read unlikely. - #[allow(unsafe_code)] - let post = unsafe { std::slice::from_raw_parts(ptr, cap) }; - assert!(post.iter().all(|&b| b == 0), "buffer not zeroed on drop"); - } - - /// Best-effort runtime check that `Drop` wipes `SecretBytes`. Same - /// caveat as above; run single-threaded with `--ignored`. A - /// page-sized buffer is used so the allocator is unlikely to reuse - /// the freed page before the post-drop read (a tiny `Vec` would be - /// recycled immediately, making the check meaningless). - #[test] - #[ignore] - fn secret_bytes_drop_zeroes() { - let ptr: *const u8; - let cap: usize; - { - let b = SecretBytes::from_slice(&[0xAB; 4096]); - ptr = b.inner.as_ptr(); - cap = b.inner.capacity(); - // SAFETY: live allocation, read for `cap` bytes pre-drop. - #[allow(unsafe_code)] - let pre = unsafe { std::slice::from_raw_parts(ptr, cap) }; - assert!(pre.iter().any(|&x| x != 0)); - } - // SAFETY: best-effort post-free read; see note above. - #[allow(unsafe_code)] - let post = unsafe { std::slice::from_raw_parts(ptr, cap) }; - assert!(post.iter().all(|&x| x == 0), "buffer not zeroed on drop"); + fn manual_zeroize_wipes_live_buffer() { + let mut b = SecretBytes::from_slice(&[0xABu8; 64]); + assert!(b.expose_secret().iter().any(|&x| x != 0)); + b.expose_secret_mut().zeroize(); + assert_eq!(b.len(), 64, "in-place wipe must preserve length"); + assert!( + b.expose_secret().iter().all(|&x| x == 0), + "SecretBytes buffer not zeroed by manual zeroize" + ); + + // SecretBytes wrapper-level zeroize empties the buffer. + let mut b2 = SecretBytes::from_slice(&[0xCDu8; 32]); + b2.zeroize(); + assert!(b2.is_empty(), "SecretBytes::zeroize must empty the buffer"); + + // SecretString wrapper-level zeroize empties the buffer; the + // exposed view holds no residual plaintext. + let mut s = SecretString::new("sensitive_seed_material"); + assert!(!s.is_empty()); + s.zeroize(); + assert!(s.is_empty(), "SecretString::zeroize must empty the buffer"); + assert_eq!(s.expose_secret(), ""); } } diff --git a/packages/rs-platform-wallet-storage/src/secrets/store.rs b/packages/rs-platform-wallet-storage/src/secrets/store.rs index b4e75512e9e..0ff9e393a97 100644 --- a/packages/rs-platform-wallet-storage/src/secrets/store.rs +++ b/packages/rs-platform-wallet-storage/src/secrets/store.rs @@ -1,34 +1,31 @@ //! [`SecretStore`] — the public, never-leaking secrets entry point. //! -//! Consumers use this enum, not the `keyring_core` SPI. Its read path -//! ([`SecretStore::get`]) yields a zeroizing [`SecretBytes`]; a raw -//! `Vec` never crosses this boundary, and the write path -//! ([`SecretStore::set`]) takes `&SecretBytes` so a caller cannot pass an -//! unwrapped buffer (M-STRONG-TYPES). -//! -//! Errors surface as the typed [`SecretStoreError`] — losslessly for the -//! [`SecretStore::File`] arm (so `WrongPassphrase` vs `Corruption` vs -//! `AlreadyLocked` stay distinct), and as a best-effort projection of -//! `keyring_core::Error` for the [`SecretStore::Os`] arm. The internal -//! `keyring_core::api::CredentialApi` / `CredentialStoreApi` impls remain -//! the backend SPI; `SecretStore` delegates through them. +//! Consumers use this enum, not the `keyring_core` SPI it delegates to. +//! Reads yield a zeroizing [`SecretBytes`] and writes take `&SecretBytes` +//! so a raw buffer never crosses the boundary. Errors are the typed +//! [`SecretStoreError`] — lossless on the [`SecretStore::File`] arm, a +//! best-effort projection of `keyring_core::Error` on the +//! [`SecretStore::Os`] arm. use std::sync::Arc; use keyring_core::api::CredentialStoreApi; use keyring_core::{Entry, Error as KeyringError}; +use super::envelope; use super::error::{OsKeyringErrorKind, SecretStoreError}; -use super::secret::SecretBytes; +use super::secret::{SecretBytes, SecretString}; use super::validate::WalletId; -use super::{default_credential_store, EncryptedFileStore, SERVICE_PREFIX}; +use super::{default_credential_store, EncryptedFileStore, MAX_SECRET_LEN, SERVICE_PREFIX}; /// A passphrase-or-OS-keyring backed store for wallet secret material. /// -/// The only public read path is [`get`](SecretStore::get), which yields a -/// zeroizing [`SecretBytes`] — a raw `Vec` never crosses this -/// boundary. Backend selection is an explicit operator decision; there is -/// no silent fallback between the two arms. +/// Every read path ([`get`](SecretStore::get), +/// [`get_secret`](SecretStore::get_secret), and the read inside +/// [`reprotect`](SecretStore::reprotect)) yields a zeroizing +/// [`SecretBytes`] — a raw `Vec` never crosses this boundary. Backend +/// selection is an explicit operator decision; there is no silent fallback +/// between the two arms. pub enum SecretStore { /// Self-contained Argon2id + XChaCha20-Poly1305 vault file. /// Recommended on headless / server hosts. @@ -49,52 +46,142 @@ impl SecretStore { Ok(Self::File(EncryptedFileStore::open(path, passphrase)?)) } + /// Open (or create) a **deliberately keyless** file-backed vault — the + /// only door that takes no passphrase. Obfuscation, not confidentiality + /// (the key derives from an empty passphrase under the public salt): use + /// it where the stored secrets carry their own Tier-2 object password, + /// or as a staging step before [`EncryptedFileStore::rekey`] to a real + /// passphrase. [`file`](SecretStore::file) rejects a blank passphrase; + /// this is the explicit keyless alternative. + pub fn file_unprotected(path: impl AsRef) -> Result { + Ok(Self::File(EncryptedFileStore::open_unprotected(path)?)) + } + /// Open the platform's default OS keyring, failing closed when none /// is reachable (headless / no Secret Service). pub fn os() -> Result { Ok(Self::Os(default_credential_store().map_err(map_spi)?)) } - /// Store `secret` under `(service, label)`, overwriting any prior - /// value. Takes `&SecretBytes` so the caller cannot pass an unwrapped - /// buffer; the wrapped bytes are exposed to the SPI only at the last - /// moment. + /// Store `secret` under `(service, label)` UNPROTECTED (Tier-2 + /// scheme-0), overwriting any prior value — a `set_secret(.., None)` + /// wrapper kept for non-breaking back-compat. Takes `&SecretBytes` so + /// the caller cannot pass an unwrapped buffer. pub fn set( &self, service: &WalletId, label: &str, secret: &SecretBytes, + ) -> Result<(), SecretStoreError> { + self.set_secret(service, label, secret, None) + } + + /// Store `secret` under `(service, label)`, overwriting any prior value. + /// + /// `password` selects the protection: `None` writes an unprotected + /// envelope; `Some(pw)` seals the bytes under the object password `pw` + /// (Argon2id + XChaCha20-Poly1305) **before** they reach the backend, so + /// a protected object stays confidential even under a full backend + /// compromise. A blank `pw` is rejected + /// ([`BlankPassphrase`](SecretStoreError::BlankPassphrase)). + /// + /// **No recovery (availability):** if a protected object's password is + /// lost, the object is permanently unrecoverable — there is no reset + /// path. The UX must state this plainly. + /// + /// **Entropy is the caller's:** a protected object's confidentiality + /// rests entirely on the password's entropy against an offline Argon2id + /// attacker who already holds the backend. This crate enforces only + /// non-blank; strength estimation / policy is the caller's job. + /// + /// The write is a same-slot overwrite that leaves the prior value intact + /// on a crash: on the `File` arm via the vault's atomic replace; on the + /// `Os` arm via the backend's single-item-replace contract. + /// Add/change/remove flows go through [`reprotect`](SecretStore::reprotect). + pub fn set_secret( + &self, + service: &WalletId, + label: &str, + secret: &SecretBytes, + password: Option<&SecretString>, + ) -> Result<(), SecretStoreError> { + // Wrap above the backend: the backend only ever stores the opaque + // envelope (ciphertext for a protected object). + let blob = envelope::wrap(service, label, password, secret.expose_secret())?; + self.put_raw(service, label, &blob) + } + + /// Store the already-enveloped opaque `blob` under `(service, label)`. + /// The shared write seam under [`set`] and [`set_secret`]. + /// + /// [`set`]: SecretStore::set + fn put_raw( + &self, + service: &WalletId, + label: &str, + blob: &SecretBytes, ) -> Result<(), SecretStoreError> { match self { - // File arm: the inherent typed path — no lossy SPI seam. - // `put_bytes` takes `&SecretBytes` directly, so the - // bare-buffer view never crosses this boundary. - Self::File(s) => s.put_bytes(service, label, secret), + // Inherent typed path — no lossy SPI seam, no bare buffer. + Self::File(s) => s.put_bytes(service, label, blob), Self::Os(store) => { let entry = build_os(store, service, label)?; - entry.set_secret(secret.expose_secret()).map_err(map_spi) + entry.set_secret(blob.expose_secret()).map_err(map_spi) } } } - /// Retrieve the secret stored under `(service, label)`, or `Ok(None)` - /// if absent. The plaintext is wrapped into [`SecretBytes`] at the - /// seam with no named `Vec` intermediate, so the bare-buffer window is - /// zero statements. + /// Retrieve the UNPROTECTED secret stored under `(service, label)`, or + /// `Ok(None)` if absent — a `get_secret(.., None)` wrapper kept for + /// non-breaking back-compat. A scheme-1 (password-protected) object read + /// through this path returns + /// [`NeedsPassword`](SecretStoreError::NeedsPassword); use + /// [`get_secret`](SecretStore::get_secret) with the object password. pub fn get( &self, service: &WalletId, label: &str, + ) -> Result, SecretStoreError> { + self.get_secret(service, label, None) + } + + /// Read the opaque bytes stored under `(service, label)`, or `Ok(None)` + /// if absent — the raw backend value (a Tier-2 envelope once writes go + /// through [`set_secret`](SecretStore::set_secret), or a legacy raw + /// value). The typed-vs-SPI distinction is preserved exactly as the + /// pre-Tier-2 path did. This is the shared seam under [`get`] and + /// [`get_secret`]; it does NOT interpret the envelope. + /// + /// [`get`]: SecretStore::get + fn get_raw( + &self, + service: &WalletId, + label: &str, ) -> Result, SecretStoreError> { match self { - // File arm: the inherent typed path keeps `WrongPassphrase` - // vs `Corruption` distinct (lossless). Plaintext rides as - // `SecretBytes` all the way; no rewrap needed. + // Inherent typed path: keeps WrongPassphrase vs Corruption + // distinct; plaintext rides as SecretBytes, no rewrap. Self::File(s) => s.get_bytes(service, label), Self::Os(store) => { let entry = build_os(store, service, label)?; match entry.get_secret() { - Ok(v) => Ok(Some(SecretBytes::new(v))), + Ok(v) => { + // Defense-in-depth: reject an oversized backend blob + // before it reaches the envelope parse/derive path. + // The File arm's stored bytes are already capped at + // MAX_SECRET_LEN by `put_bytes`; the Os backend has no + // such ceiling, so cap here. A legitimate envelope + // never exceeds MAX_SECRET_LEN; the overhead is + // headroom. + let cap = MAX_SECRET_LEN + envelope::MAX_ENVELOPE_OVERHEAD; + if v.len() > cap { + return Err(SecretStoreError::SecretTooLarge { + found: v.len(), + max: cap, + }); + } + Ok(Some(SecretBytes::new(v))) + } Err(KeyringError::NoEntry) => Ok(None), Err(e) => Err(map_spi(e)), } @@ -102,6 +189,78 @@ impl SecretStore { } } + /// Retrieve the secret under `(service, label)` applying the strict, + /// fail-closed read, or `Ok(None)` if absent. + /// + /// `password` IS the caller's protection assertion — supply `Some(pw)` + /// for an object the caller's trusted model says is protected, `None` + /// otherwise. The expectation lives ONLY here, never in the stored + /// blob (see [`envelope::unwrap`]): + /// + /// - `Some(pw)` + a protected blob → the secret (or + /// [`WrongPassword`](SecretStoreError::WrongPassword) on tag fail); + /// - `Some(pw)` + a non-protected blob (unprotected / legacy raw) → + /// [`ExpectedProtectedButUnsealed`](SecretStoreError::ExpectedProtectedButUnsealed) + /// — a strip/downgrade, refused, no bytes returned; + /// - `None` + a protected blob → + /// [`NeedsPassword`](SecretStoreError::NeedsPassword) (never ciphertext); + /// - `None` + an unprotected / legacy raw blob → the secret. + /// + /// **Documented residual:** an attacker who ALSO rewrites the + /// consumer's trusted DB so the caller passes `None` for a stripped + /// object can still downgrade — out of this library's reach by + /// construction (the protection expectation is the caller's; see + /// `SECRETS.md`). The expectation is NEVER persisted by the library. + pub fn get_secret( + &self, + service: &WalletId, + label: &str, + password: Option<&SecretString>, + ) -> Result, SecretStoreError> { + // Absence is availability-only (deletion = DoS, never injection): + // a missing entry is Ok(None) under either password argument. + let Some(stored) = self.get_raw(service, label)? else { + return Ok(None); + }; + envelope::unwrap(service, label, password, stored.expose_secret()).map(Some) + } + + /// Add / change / remove an object password in one same-slot + /// unwrap→rewrap→overwrite — the canonical re-protection flow. + /// + /// Reads the object under the `current` expectation (so a strip is + /// caught fail-closed before any rewrap), then re-writes it under + /// `new`: + /// - **add:** `current = None`, `new = Some(pw)`; + /// - **change:** `current = Some(old)`, `new = Some(pw_new)`; + /// - **remove:** `current = Some(old)`, `new = None`. + /// + /// An absent object is a no-op (`Ok(())`). The rewrite is the same-slot + /// overwrite of [`set_secret`], so a crash between the read and the + /// commit leaves the prior value intact and readable under `current`. + /// After a successful call the consumer MUST update its own trusted + /// protection-status record (the protection expectation lives there). + /// + /// **No recovery:** changing or removing requires the `current` + /// password; if it is lost the object cannot be re-protected or read, + /// and is permanently unrecoverable (availability trade-off). + /// + /// **Entropy is the caller's:** the `new` password's entropy is the + /// whole confidentiality guarantee for the re-protected object; this + /// crate enforces only non-blank, not strength. + pub fn reprotect( + &self, + service: &WalletId, + label: &str, + current: Option<&SecretString>, + new: Option<&SecretString>, + ) -> Result<(), SecretStoreError> { + let Some(secret) = self.get_secret(service, label, current)? else { + return Ok(()); + }; + self.set_secret(service, label, &secret, new) + } + /// Delete the secret stored under `(service, label)`. Absent entries /// are a no-op (`Ok(())`), so deletion is idempotent. pub fn delete(&self, service: &WalletId, label: &str) -> Result<(), SecretStoreError> { @@ -123,12 +282,10 @@ impl SecretStore { /// Build the SPI [`Entry`] for `(service, label)` on the OS-keyring arm. /// -/// The reject-not-sanitize label allowlist (`^[A-Za-z0-9._-]{1,64}$`) -/// is enforced here before the call crosses into the OS backend. -/// Different OS keyrings accept, normalize, or reject non-allowlisted -/// bytes inconsistently; enforcing the allowlist at -/// this shim keeps `(service, label)` invariants identical to the -/// `File` arm and across every OS backend. +/// Enforces the label allowlist (`^[A-Za-z0-9._-]{1,64}$`) before the +/// call crosses into the OS backend, so the `(service, label)` invariant +/// stays identical to the `File` arm and across every OS keyring (each +/// accepts / normalizes / rejects non-allowlisted bytes differently). fn build_os( store: &Arc, service: &WalletId, @@ -140,12 +297,9 @@ fn build_os( } impl std::fmt::Debug for SecretStore { - /// Surfaces the backend engine/service identity without exposing any - /// secret material. The `Os` arm reports the SPI - /// `vendor()`/`id()` — non-secret backend tags (e.g. which OS keyring - /// is wired up) — rather than an opaque `Os(..)`. The `File` arm - /// delegates to [`EncryptedFileStore`]'s redacting `Debug` (path - /// only, no key/passphrase). + /// Surfaces the backend identity without any secret material: the `Os` + /// arm reports the SPI `vendor()`/`id()` tags; the `File` arm delegates + /// to [`EncryptedFileStore`]'s redacting `Debug` (path only). fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::File(s) => f.debug_tuple("SecretStore::File").field(s).finish(), @@ -159,15 +313,11 @@ impl std::fmt::Debug for SecretStore { } /// Project an OS-keyring SPI [`KeyringError`] into the typed -/// [`SecretStoreError`] for the [`Os`](SecretStore::Os) arm. -/// -/// The OS keyring has no typed `SecretStoreError` origin, so its variants -/// map best-effort into [`SecretStoreError::OsKeyring`] (carrying only a -/// non-secret discriminant) or the closest existing variant. Secret- -/// bearing keyring variants (`BadEncoding`, `BadDataFormat`) are -/// collapsed to a discriminant — their raw bytes never enter -/// `SecretStoreError`. (The [`File`](SecretStore::File) arm never reaches -/// this projection: it uses the inherent typed path.) +/// [`SecretStoreError`] for the [`Os`](SecretStore::Os) arm. Best-effort: +/// variants map into [`SecretStoreError::OsKeyring`] (non-secret +/// discriminant only) or the closest existing variant; byte-bearing +/// keyring variants are collapsed so their bytes never enter the type. +/// The [`File`](SecretStore::File) arm never reaches this projection. fn map_spi(e: KeyringError) -> SecretStoreError { match e { KeyringError::NoEntry => SecretStoreError::OsKeyring { @@ -197,7 +347,18 @@ mod tests { use crate::secrets::SecretString; fn file_store(dir: &std::path::Path) -> SecretStore { - SecretStore::file(dir.join("vault.pwsvault"), SecretString::new("pw-correct")).unwrap() + SecretStore::file(secure_vault_path(dir), SecretString::new("pw-correct")).unwrap() + } + + /// Tighten the umask-0002 tempdir (0o775) to 0o700 so it passes the + /// parent-dir perm check, then return a vault path inside it. + fn secure_vault_path(dir: &std::path::Path) -> std::path::PathBuf { + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + let _ = std::fs::set_permissions(dir, std::fs::Permissions::from_mode(0o700)); + } + dir.join("vault.pwsvault") } fn wid(b: u8) -> WalletId { @@ -241,19 +402,14 @@ mod tests { #[test] fn wrong_passphrase_surfaces_typed_lossless() { - // Resident-vault model: the passphrase is verified at open() - // time (header verify-token), so a wrong-pass reopen fails at - // open() rather than on the first get(). The typed distinction - // still survives losslessly on the public path. + // Resident-vault model verifies the passphrase at open() (header + // verify-token), so a wrong-pass reopen fails at open(), losslessly. let dir = tempfile::tempdir().unwrap(); file_store(dir.path()) .set(&wid(1), "seed", &SecretBytes::from_slice(b"orig")) .unwrap(); - let err = SecretStore::file( - dir.path().join("vault.pwsvault"), - SecretString::new("pw-wrong"), - ) - .expect_err("wrong pass must fail open"); + let err = SecretStore::file(secure_vault_path(dir.path()), SecretString::new("pw-wrong")) + .expect_err("wrong pass must fail open"); assert!( matches!(err, SecretStoreError::WrongPassphrase), "expected WrongPassphrase, got {err:?}" @@ -266,9 +422,8 @@ mod tests { let s = file_store(dir.path()); s.set(&wid(1), "seed", &SecretBytes::from_slice(b"value")) .unwrap(); - // Corrupt the entry ciphertext while leaving the verify-token - // intact: the passphrase is still correct, so this is corruption, - // not a wrong passphrase. The lossless typed path keeps them apart. + // Corrupt the entry ciphertext but leave the verify-token intact: + // passphrase still correct, so this is Corruption, not WrongPassphrase. let SecretStore::File(ref fs) = s else { unreachable!() }; @@ -291,11 +446,10 @@ mod tests { #[test] fn already_locked_surfaces_typed_lossless() { - // Resident-vault model: a second open() of the same path while - // the first store is alive returns AlreadyLocked. The typed - // distinction survives losslessly on the public path. + // A second open() of a path the first store still holds returns + // AlreadyLocked, losslessly on the public path. let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("vault.pwsvault"); + let path = secure_vault_path(dir.path()); let _s1 = SecretStore::file(&path, SecretString::new("pw")).unwrap(); let err = SecretStore::file(&path, SecretString::new("pw")).unwrap_err(); assert!( @@ -312,16 +466,10 @@ mod tests { assert!(!dbg.contains("pw-correct")); } - /// The OS-keyring shim must enforce the label allowlist BEFORE - /// handing the value to the OS backend. The per-backend label - /// policies (macOS Keychain vs Windows - /// Credential Manager vs Secret Service) differ in what they accept, - /// normalize, or reject; the shim must keep the `(service, label)` - /// invariant uniform across every arm. - /// - /// A mock `CredentialStoreApi` that panics if its `build()` is - /// invoked proves the bad label never crosses the SPI seam — the - /// shim rejects with `SecretStoreError::InvalidLabel` first. + /// The shim must enforce the label allowlist before reaching the OS + /// backend (per-backend policies differ). A `CredentialStoreApi` that + /// panics on `build()` proves a bad label is rejected with + /// `InvalidLabel` before it ever crosses the SPI seam. #[test] fn build_os_rejects_invalid_label_before_spi() { use std::any::Any; @@ -355,9 +503,8 @@ mod tests { let store: Arc = Arc::new(PanickingStore); let os = SecretStore::Os(store); - // Every operation on the OS arm goes through `build_os`; the - // allowlist rejection MUST fire here, so the panicking SPI is - // never reached. + // Every OS-arm op goes through `build_os`, so the allowlist + // rejection fires before the panicking SPI is reached. for bad in ["lab el", "../escape", "", "a:b", "a/b", "lab\0el"] { let err = os .set(&wid(1), bad, &SecretBytes::from_slice(b"x")) @@ -378,4 +525,671 @@ mod tests { ); } } + + // ===== Tier-2 strict fail-closed read ===== + // + // Parameterised over BOTH arms. The "attacker who can write the + // backend" is modelled per arm by `Backend::place_raw`: on File it + // re-seals the chosen blob under the resident vault key via `put_bytes` + // (a cold/backup-swap actor could only corrupt → DoS, so the strip + // requires the vault key — the File-arm asymmetry); on Os it overwrites + // the keychain item directly (the bare envelope, no second AEAD — where + // the strip residual bites hardest). The writable Os fixture is the + // upstream `keyring_core::mock::Store` (a raw SPI `set_secret` bypasses + // the envelope), so no bespoke mock is needed. + + use keyring_core::mock; + + use crate::secrets::file::crypto::{KdfParams, ARGON2_MIN_M_KIB, ARGON2_MIN_T, ARGON2_P}; + use crate::secrets::file::format::KDF_ID_ARGON2ID; + + /// Argon2id floor params — fast enough for these tests. + fn floor() -> KdfParams { + KdfParams { + id: KDF_ID_ARGON2ID, + m_kib: ARGON2_MIN_M_KIB, + t: ARGON2_MIN_T, + p: ARGON2_P, + } + } + + fn protected(w: &WalletId, label: &str, pw: &str, secret: &[u8]) -> Vec { + envelope::wrap_with_params(w, label, Some(&SecretString::new(pw)), secret, floor()) + .unwrap() + .expose_secret() + .to_vec() + } + + fn unprotected(w: &WalletId, label: &str, secret: &[u8]) -> Vec { + envelope::wrap(w, label, None, secret) + .unwrap() + .expose_secret() + .to_vec() + } + + /// A backend under test plus the raw-write hook that plays the + /// backend-write attacker. + struct Backend { + store: SecretStore, + _dir: Option, + mock: Option>, + name: &'static str, + } + + impl Backend { + /// Write `blob` to `(w, label)` as opaque backend bytes (the + /// attacker's primitive / the protected-enrol setup). On Os this is + /// a raw SPI `set_secret` on the shared mock store, bypassing the + /// `SecretStore` envelope layer exactly as a breached keychain write + /// would. + fn place_raw(&self, w: &WalletId, label: &str, blob: &[u8]) { + match (&self.store, &self.mock) { + (SecretStore::File(fs), _) => fs + .put_bytes(w, label, &SecretBytes::from_slice(blob)) + .unwrap(), + (SecretStore::Os(_), Some(mock)) => { + let service = format!("{SERVICE_PREFIX}{}", w.to_hex()); + mock.build(&service, label, None) + .unwrap() + .set_secret(blob) + .unwrap(); + } + _ => unreachable!("os backend must carry its mock"), + } + } + } + + fn file_backend() -> Backend { + let dir = tempfile::tempdir().unwrap(); + let store = file_store(dir.path()); + Backend { + store, + _dir: Some(dir), + mock: None, + name: "File", + } + } + + fn os_backend() -> Backend { + // The upstream in-memory mock store. The clone handed to + // `SecretStore::Os` and the handle kept for raw attacker writes + // share the same backing credentials by `Arc`. + let mock = mock::Store::new().unwrap(); + let store = SecretStore::Os(mock.clone()); + Backend { + store, + _dir: None, + mock: Some(mock), + name: "Os", + } + } + + /// The strict-read quadrant. + fn run_quadrant(b: &Backend) { + let w = wid(1); + let pw = SecretString::new("object-pw"); + + // scheme-0 + None → bytes (the ONLY byte-returning quadrant). + b.place_raw(&w, "u0", &unprotected(&w, "u0", b"plain-seed")); + assert_eq!( + b.store + .get_secret(&w, "u0", None) + .unwrap() + .unwrap() + .expose_secret(), + b"plain-seed", + "[{}] scheme-0 + None", + b.name + ); + + // scheme-1 + None → NeedsPassword (never ciphertext). + b.place_raw(&w, "p1", &protected(&w, "p1", "object-pw", b"real-seed")); + assert!( + matches!( + b.store.get_secret(&w, "p1", None).unwrap_err(), + SecretStoreError::NeedsPassword + ), + "[{}] scheme-1 + None", + b.name + ); + + // scheme-1 + Some(correct) → secret. + assert_eq!( + b.store + .get_secret(&w, "p1", Some(&pw)) + .unwrap() + .unwrap() + .expose_secret(), + b"real-seed", + "[{}] scheme-1 + Some(correct)", + b.name + ); + + // scheme-1 + Some(wrong) → WrongPassword. + assert!( + matches!( + b.store + .get_secret(&w, "p1", Some(&SecretString::new("nope"))) + .unwrap_err(), + SecretStoreError::WrongPassword + ), + "[{}] scheme-1 + Some(wrong)", + b.name + ); + + // scheme-0 + Some(pw) → ExpectedProtectedButUnsealed (fail closed). + assert!( + matches!( + b.store.get_secret(&w, "u0", Some(&pw)).unwrap_err(), + SecretStoreError::ExpectedProtectedButUnsealed + ), + "[{}] scheme-0 + Some", + b.name + ); + + // magic-present-but-truncated + None → Corruption. + let mut trunc = envelope::MAGIC.to_vec(); + trunc.push(envelope::ENVELOPE_VERSION); // no scheme byte + b.place_raw(&w, "broken", &trunc); + assert!( + matches!( + b.store.get_secret(&w, "broken", None).unwrap_err(), + SecretStoreError::Corruption + ), + "[{}] truncated-with-magic + None", + b.name + ); + + // magic-less legacy raw + None → returns the bytes (legacy + // tolerance); + Some(pw) → fails closed, so the strict rule holds. + b.place_raw(&w, "legacy", b"raw-legacy-seed-no-magic"); + assert_eq!( + b.store + .get_secret(&w, "legacy", None) + .unwrap() + .unwrap() + .expose_secret(), + b"raw-legacy-seed-no-magic", + "[{}] legacy magic-less + None", + b.name + ); + assert!( + matches!( + b.store.get_secret(&w, "legacy", Some(&pw)).unwrap_err(), + SecretStoreError::ExpectedProtectedButUnsealed + ), + "[{}] legacy magic-less + Some", + b.name + ); + + // absent entry → Ok(None) under either arg (deletion = DoS). + assert!(b.store.get_secret(&w, "absent", None).unwrap().is_none()); + assert!(b + .store + .get_secret(&w, "absent", Some(&pw)) + .unwrap() + .is_none()); + } + + #[test] + fn l1_quadrant_file() { + run_quadrant(&file_backend()); + } + + #[test] + fn l1_quadrant_os() { + run_quadrant(&os_backend()); + } + + /// The non-vacuous strip-injection regression. The single + /// test the whole feature exists to make pass. + fn run_strip_injection(b: &Backend) { + let w = wid(2); + let pw = SecretString::new("object-pw"); + + // Enrol protected: stored = a valid scheme-1 envelope of S_real. + b.place_raw( + &w, + "seed", + &protected(&w, "seed", "object-pw", b"REAL-SEED-S_real"), + ); + assert_eq!( + b.store + .get_secret(&w, "seed", Some(&pw)) + .unwrap() + .unwrap() + .expose_secret(), + b"REAL-SEED-S_real", + "[{}] legit protected read", + b.name + ); + + // Attacker overwrites the slot with a fresh, internally-valid + // scheme-0 envelope carrying a DIFFERENT seed S_evil. + let attacker_blob = unprotected(&w, "seed", b"EVIL-SEED-S_evil"); + b.place_raw(&w, "seed", &attacker_blob); + + // A password-supplied read of the stripped slot fails closed; + // S_evil is NEVER returned. + let err = b.store.get_secret(&w, "seed", Some(&pw)).unwrap_err(); + assert!( + matches!(err, SecretStoreError::ExpectedProtectedButUnsealed), + "[{}] strip must fail closed, got {err:?}", + b.name + ); + + // Non-vacuity: the attacker blob IS a valid unprotected envelope + // that WOULD decode to S_evil under `None` — so the refusal above is + // caused SOLELY by the Some(pw)+scheme-0 strict rule, not by any + // malformation (without the strict rule, S_evil would be returned). + let would_be = envelope::unwrap(&w, "seed", None, &attacker_blob).unwrap(); + assert_eq!( + would_be.expose_secret(), + b"EVIL-SEED-S_evil", + "[{}] non-vacuity: blob decodes to S_evil under None", + b.name + ); + } + + #[test] + fn l1_strip_injection_file() { + run_strip_injection(&file_backend()); + } + + #[test] + fn l1_strip_injection_os() { + run_strip_injection(&os_backend()); + } + + /// A consumer bug alone fails closed in BOTH directions. + fn run_both_det_bug_directions(b: &Backend) { + let w = wid(3); + let pw = SecretString::new("pw"); + // (a) over-supply a password on a genuinely unprotected object. + b.place_raw(&w, "u", &unprotected(&w, "u", b"x")); + assert!(matches!( + b.store.get_secret(&w, "u", Some(&pw)).unwrap_err(), + SecretStoreError::ExpectedProtectedButUnsealed + )); + // (b) under-supply on a genuinely protected object. + b.place_raw(&w, "p", &protected(&w, "p", "pw", b"y")); + assert!(matches!( + b.store.get_secret(&w, "p", None).unwrap_err(), + SecretStoreError::NeedsPassword + )); + } + + #[test] + fn l1_both_det_bug_directions_file() { + run_both_det_bug_directions(&file_backend()); + } + + #[test] + fn l1_both_det_bug_directions_os() { + run_both_det_bug_directions(&os_backend()); + } + + /// The expectation is NEVER inferred from the blob's scheme + /// byte — identical scheme-1 blobs diverge solely on the password arg. + fn run_expectation_not_inferred(b: &Backend) { + let w = wid(4); + let pw = SecretString::new("pw"); + let blob = protected(&w, "a", "pw", b"seed"); + b.place_raw(&w, "a", &blob); + b.place_raw(&w, "b", &blob); + assert_eq!( + b.store + .get_secret(&w, "a", Some(&pw)) + .unwrap() + .unwrap() + .expose_secret(), + b"seed" + ); + assert!(matches!( + b.store.get_secret(&w, "b", None).unwrap_err(), + SecretStoreError::NeedsPassword + )); + } + + #[test] + fn l1_expectation_not_inferred_file() { + run_expectation_not_inferred(&file_backend()); + } + + #[test] + fn l1_expectation_not_inferred_os() { + run_expectation_not_inferred(&os_backend()); + } + + /// Unprotected→protected upgrade confusion is availability- + /// only, fail-closed (NeedsPassword), no leak / no injection. + fn run_upgrade_confusion(b: &Backend) { + let w = wid(5); + b.place_raw(&w, "x", &protected(&w, "x", "attacker-pw", b"whatever")); + assert!(matches!( + b.store.get_secret(&w, "x", None).unwrap_err(), + SecretStoreError::NeedsPassword + )); + } + + #[test] + fn l1_upgrade_confusion_file() { + run_upgrade_confusion(&file_backend()); + } + + #[test] + fn l1_upgrade_confusion_os() { + run_upgrade_confusion(&os_backend()); + } + + /// An in-place scheme-byte flip (protected→unprotected). Some(pw) is caught by + /// the strict rule regardless. None reads the body as scheme-0 opaque + /// bytes (never the real seed) — a known residual, dominated by the + /// consumer-DB residual; pinned, not "fixed". + fn run_scheme_flip(b: &Backend) { + let w = wid(6); + let pw = SecretString::new("pw"); + let mut blob = protected(&w, "x", "pw", b"real-seed"); + let scheme_off = envelope::MAGIC.len() + 1; + assert_eq!(blob[scheme_off], envelope::SCHEME_PASSWORD); + blob[scheme_off] = envelope::SCHEME_UNPROTECTED; + b.place_raw(&w, "x", &blob); + + assert!(matches!( + b.store.get_secret(&w, "x", Some(&pw)).unwrap_err(), + SecretStoreError::ExpectedProtectedButUnsealed + )); + let got = b.store.get_secret(&w, "x", None).unwrap().unwrap(); + assert_ne!( + got.expose_secret(), + b"real-seed", + "the real seed must never surface from a flipped scheme byte" + ); + } + + #[test] + fn l1_scheme_flip_file() { + run_scheme_flip(&file_backend()); + } + + #[test] + fn l1_scheme_flip_os() { + run_scheme_flip(&os_backend()); + } + + // ===== Add / change / remove password + arm matrix ===== + // + // These exercise the PUBLIC set_secret/get_secret/reprotect API, so the + // protected writes/reads run the real (default 64 MiB) Argon2 — kept to + // a small number of derivations per test. + + /// The full enrol → change → remove lifecycle, each + /// step verified through the strict read. + fn run_pw_lifecycle(b: &Backend) { + let w = wid(10); + let pw1 = SecretString::new("pw-one"); + let pw2 = SecretString::new("pw-two"); + + // ADD: start unprotected, enrol a password. + b.store + .set(&w, "seed", &SecretBytes::from_slice(b"SEED")) + .unwrap(); + assert_eq!( + b.store.get(&w, "seed").unwrap().unwrap().expose_secret(), + b"SEED" + ); + b.store.reprotect(&w, "seed", None, Some(&pw1)).unwrap(); + assert!( + matches!( + b.store.get(&w, "seed").unwrap_err(), + SecretStoreError::NeedsPassword + ), + "[{}] after add, None read needs a password", + b.name + ); + assert_eq!( + b.store + .get_secret(&w, "seed", Some(&pw1)) + .unwrap() + .unwrap() + .expose_secret(), + b"SEED" + ); + + // CHANGE: rotate to a new password (unwrap-old → rewrap-new). + b.store + .reprotect(&w, "seed", Some(&pw1), Some(&pw2)) + .unwrap(); + assert_eq!( + b.store + .get_secret(&w, "seed", Some(&pw2)) + .unwrap() + .unwrap() + .expose_secret(), + b"SEED" + ); + assert!( + matches!( + b.store.get_secret(&w, "seed", Some(&pw1)).unwrap_err(), + SecretStoreError::WrongPassword + ), + "[{}] old password no longer unlocks after change", + b.name + ); + + // REMOVE: back to unprotected. + b.store.reprotect(&w, "seed", Some(&pw2), None).unwrap(); + assert_eq!( + b.store.get(&w, "seed").unwrap().unwrap().expose_secret(), + b"SEED" + ); + assert!( + matches!( + b.store.get_secret(&w, "seed", Some(&pw2)).unwrap_err(), + SecretStoreError::ExpectedProtectedButUnsealed + ), + "[{}] after remove, a password read fails closed until the consumer updates its DB", + b.name + ); + } + + #[test] + fn pw_lifecycle_file() { + run_pw_lifecycle(&file_backend()); + } + + #[test] + fn pw_lifecycle_os() { + run_pw_lifecycle(&os_backend()); + } + + /// Losing the object password bricks the object — no recovery + /// path exists, every read fails closed. + fn run_pw_no_recovery(b: &Backend) { + let w = wid(11); + let pw = SecretString::new("the-only-pw"); + b.store + .set_secret(&w, "seed", &SecretBytes::from_slice(b"SEED"), Some(&pw)) + .unwrap(); + assert!(matches!( + b.store + .get_secret(&w, "seed", Some(&SecretString::new("guess"))) + .unwrap_err(), + SecretStoreError::WrongPassword + )); + assert!(matches!( + b.store.get(&w, "seed").unwrap_err(), + SecretStoreError::NeedsPassword + )); + } + + #[test] + fn pw_no_recovery_file() { + run_pw_no_recovery(&file_backend()); + } + + #[test] + fn pw_no_recovery_os() { + run_pw_no_recovery(&os_backend()); + } + + /// `set`/`get` are additive `..,None` wrappers — `set` + /// writes a scheme-0 envelope, `get` reads it byte-exact, and a + /// password-supplied read of that unprotected object fails closed. + fn run_set_get_wrappers(b: &Backend) { + let w = wid(12); + b.store + .set(&w, "seed", &SecretBytes::from_slice(b"plain")) + .unwrap(); + assert_eq!( + b.store.get(&w, "seed").unwrap().unwrap().expose_secret(), + b"plain" + ); + assert!(matches!( + b.store + .get_secret(&w, "seed", Some(&SecretString::new("pw"))) + .unwrap_err(), + SecretStoreError::ExpectedProtectedButUnsealed + )); + } + + #[test] + fn set_get_wrappers_file() { + run_set_get_wrappers(&file_backend()); + } + + #[test] + fn set_get_wrappers_os() { + run_set_get_wrappers(&os_backend()); + } + + /// The Os arm has no passphrase concept; the Tier-1 blank + /// guard never fires and the round-trip is byte-exact. + #[test] + fn os_arm_roundtrip_no_blank_guard() { + let b = os_backend(); + let w = wid(13); + b.store + .set(&w, "seed", &SecretBytes::from_slice(b"abc")) + .unwrap(); + assert_eq!( + b.store.get(&w, "seed").unwrap().unwrap().expose_secret(), + b"abc" + ); + b.store.delete(&w, "seed").unwrap(); + assert!(b.store.get(&w, "seed").unwrap().is_none()); + } + + /// [File]: a crash (disk-write failure) between the unwrap + /// and the overwrite-commit leaves the OLD protected value intact and + /// readable — no half-rotated / unprotected state. + #[cfg(unix)] + #[test] + fn pw_change_crash_safety_leaves_old_intact_file() { + use std::os::unix::fs::PermissionsExt; + + let dir = tempfile::tempdir().unwrap(); + let s = file_store(dir.path()); + let w = wid(14); + let old = SecretString::new("old-pw"); + let new = SecretString::new("new-pw"); + + s.set_secret(&w, "seed", &SecretBytes::from_slice(b"REAL"), Some(&old)) + .unwrap(); + + // Make the vault's parent read-only so the atomic temp-write fails + // mid-change (mirrors rekey_does_not_corrupt_on_disk_temp_failure). + std::fs::set_permissions(dir.path(), std::fs::Permissions::from_mode(0o500)).unwrap(); + let err = s.reprotect(&w, "seed", Some(&old), Some(&new)).unwrap_err(); + assert!(matches!(err, SecretStoreError::Io(_)), "got {err:?}"); + + // Restore write so the resident store can sync/clean up at drop. + std::fs::set_permissions(dir.path(), std::fs::Permissions::from_mode(0o700)).unwrap(); + + // The OLD value is still readable under the OLD password; the new + // password does not unlock it (no half-rotation). + assert_eq!( + s.get_secret(&w, "seed", Some(&old)) + .unwrap() + .unwrap() + .expose_secret(), + b"REAL" + ); + assert!(matches!( + s.get_secret(&w, "seed", Some(&new)).unwrap_err(), + SecretStoreError::WrongPassword + )); + } + + /// [Os]: a backend failure during the rewrite's write (after the read + /// succeeds) leaves the OLD value intact — no half-rotation. The mock's + /// one-shot error injection fails the next write, simulating a crash + /// mid-rewrite. `reprotect` is read-then-`set_secret`, split here so the + /// error lands on the write. + #[test] + fn os_rewrite_mid_write_failure_leaves_old_intact() { + let mock = mock::Store::new().unwrap(); + let store = SecretStore::Os(mock.clone()); + let w = wid(15); + let old = SecretString::new("old-pw"); + let new = SecretString::new("new-pw"); + store + .set_secret(&w, "seed", &SecretBytes::from_slice(b"REAL"), Some(&old)) + .unwrap(); + + // Read succeeds (the rewrite's first step) … + let secret = store.get_secret(&w, "seed", Some(&old)).unwrap().unwrap(); + // … then inject a one-shot backend error so the write fails. + let service = format!("{SERVICE_PREFIX}{}", w.to_hex()); + let entry = mock.build(&service, "seed", None).unwrap(); + let cred: &mock::Cred = entry.as_any().downcast_ref().unwrap(); + cred.set_error(KeyringError::PlatformFailure(Box::new( + std::io::Error::other("simulated backend write failure"), + ))); + let err = store + .set_secret(&w, "seed", &secret, Some(&new)) + .unwrap_err(); + assert!( + matches!(err, SecretStoreError::OsKeyring { .. }), + "got {err:?}" + ); + + // The OLD value is still readable; nothing rotated to `new`. + assert_eq!( + store + .get_secret(&w, "seed", Some(&old)) + .unwrap() + .unwrap() + .expose_secret(), + b"REAL" + ); + assert!(matches!( + store.get_secret(&w, "seed", Some(&new)).unwrap_err(), + SecretStoreError::WrongPassword + )); + } + + /// [Os]: the read-size guard rejects an oversized backend blob (a + /// malicious keychain returning more than a legitimate envelope ever + /// could) BEFORE it reaches the envelope parse/derive path. The bound is + /// `MAX_SECRET_LEN + MAX_ENVELOPE_OVERHEAD`; both the `get_secret` and + /// legacy `get` read paths enforce it. + #[test] + fn os_read_rejects_oversized_blob() { + let b = os_backend(); + let w = wid(16); + let cap = MAX_SECRET_LEN + envelope::MAX_ENVELOPE_OVERHEAD; + // Attacker writes a blob one byte over the cap straight to the slot. + b.place_raw(&w, "seed", &vec![0u8; cap + 1]); + let err = b.store.get_secret(&w, "seed", None).unwrap_err(); + assert!( + matches!(err, SecretStoreError::SecretTooLarge { found, max } if found == cap + 1 && max == cap), + "get_secret got {err:?}" + ); + // The legacy `get` path is bounded too. + assert!(matches!( + b.store.get(&w, "seed").unwrap_err(), + SecretStoreError::SecretTooLarge { found, max } if found == cap + 1 && max == cap + )); + } } diff --git a/packages/rs-platform-wallet-storage/src/sqlite/backup.rs b/packages/rs-platform-wallet-storage/src/sqlite/backup.rs index 064ee3a23a2..7eaca6fe6aa 100644 --- a/packages/rs-platform-wallet-storage/src/sqlite/backup.rs +++ b/packages/rs-platform-wallet-storage/src/sqlite/backup.rs @@ -12,11 +12,9 @@ use crate::sqlite::error::WalletStorageError; use crate::sqlite::persister::{PruneReport, RetentionPolicy}; use crate::sqlite::util::permissions::apply_secure_permissions; -/// Fsync the parent directory of `path` on Unix so the rename entry -/// that materialised `path` is durable across power loss. -/// `persist` only fsyncs the file inode; on most Unix filesystems the -/// dentry update is journalled separately and can be lost on crash -/// without this step. No-op on non-Unix platforms. +/// Fsync `path`'s parent dir on Unix so the rename's dentry update is +/// durable across power loss (`persist` only fsyncs the file inode; the +/// dentry is journalled separately). No-op on non-Unix. #[cfg(unix)] fn fsync_parent_dir(path: &Path) -> Result<(), WalletStorageError> { if let Some(parent) = path.parent() { @@ -69,41 +67,27 @@ pub fn auto_backup_filename(kind: BackupKind) -> String { } } -/// Take an online backup of `src` to `dest`. Uses the -/// `rusqlite::backup::Backup::run_to_completion` page-stepping API -/// so writers aren't blocked. +/// Take an online backup of `src` to `dest` via the page-stepping +/// `Backup::run_to_completion` API so writers aren't blocked. /// /// # Atomicity /// -/// The page-stepping copy runs against a `NamedTempFile` staged in -/// `dest`'s parent directory. The temp is `persist_noclobber`-ed over -/// `dest` only on success — any failure (open, chmod, backup-stream) -/// drops the temp without ever materialising a partial `.db` file at -/// the caller's path. A pre-existing `dest` is rejected atomically by -/// `persist_noclobber` (no TOCTOU window). On Unix, the parent -/// directory is `fsync`-ed after the rename so the dentry update -/// survives power loss; on non-Unix this fsync step is a no-op. +/// The copy is staged in a `NamedTempFile` next to `dest` and +/// `persist_noclobber`-ed over `dest` only on success, so a failure never +/// materialises a partial `.db`. A pre-existing `dest` is rejected +/// atomically (no TOCTOU window), and the parent dir is fsynced afterward. pub fn run_to(src: &Connection, dest: &Path) -> Result<(), WalletStorageError> { if let Some(parent) = dest.parent() { if !parent.as_os_str().is_empty() && !parent.exists() { std::fs::create_dir_all(parent)?; } } - // Pre-existing-destination rejection happens at the - // `persist_noclobber` site below — that's atomic against the rename - // (no TOCTOU window between `dest.exists()` and persist). The - // CLI's `backup_to(file_path)` still gets the typed - // `BackupDestinationExists` error; auto-backup callers can't trip - // it because the filename carries a unique timestamp suffix. - - // Stage the backup into an unguessable temp file in the same - // directory. Same-FS guarantee makes `persist` an atomic rename. + // Stage in an unguessable temp file in the same dir; the same-FS + // guarantee makes `persist` an atomic rename. let parent = dest.parent().unwrap_or(Path::new(".")); let tmp = tempfile::NamedTempFile::new_in(parent)?; - // Tighten the temp's mode to 0o600 BEFORE persist so the - // destination inherits owner-only permissions via the atomic - // rename. Running chmod after persist would leave a brief - // umask-default window where the destination is observable. + // chmod 0o600 BEFORE persist so the destination inherits owner-only + // mode via the rename; chmod after would leave an observable window. #[cfg(unix)] { use std::os::unix::fs::PermissionsExt; @@ -111,8 +95,6 @@ pub fn run_to(src: &Connection, dest: &Path) -> Result<(), WalletStorageError> { .set_permissions(std::fs::Permissions::from_mode(0o600))?; } - // Page-stepping copy against the temp. The dest Connection has to - // own its own file handle; rusqlite opens it from a path. let mut backup_conn = crate::sqlite::conn::open_conn(tmp.path(), crate::sqlite::conn::Access::ReadWrite)?; { @@ -120,15 +102,12 @@ pub fn run_to(src: &Connection, dest: &Path) -> Result<(), WalletStorageError> { // 100 pages × 4 KiB = 400 KiB per step on default SQLite page size. backup.run_to_completion(100, Duration::from_millis(5), None)?; } - // Close the backup Connection before persisting so SQLite flushes - // its own WAL/SHM siblings against the temp path — those go away - // with the rename since `persist` atomically renames the temp file. + // Close before persisting so SQLite flushes its WAL/SHM siblings + // against the temp path; the rename then sweeps them away. drop(backup_conn); - // `persist_noclobber` is the atomic check-and-rename — SQLite-free, - // no TOCTOU window between an `exists()` probe and the rename. - // `AlreadyExists` maps to the typed `BackupDestinationExists` for - // the CLI's overwrite-refusal contract. + // Atomic check-and-rename with no TOCTOU window; `AlreadyExists` maps + // to the typed `BackupDestinationExists` overwrite-refusal contract. tmp.persist_noclobber(dest).map_err(|e| { if e.error.kind() == std::io::ErrorKind::AlreadyExists { WalletStorageError::BackupDestinationExists { @@ -138,86 +117,41 @@ pub fn run_to(src: &Connection, dest: &Path) -> Result<(), WalletStorageError> { WalletStorageError::Io(e.error) } })?; - // Fsync the parent directory so the atomic rename's dentry update is - // durable across power loss. On non-Unix this is a no-op. fsync_parent_dir(dest)?; - // Re-tighten in case a non-Unix build (or a future platform-specific - // tweak) needs to refresh sibling perms after SQLite materialised - // them. No-op on Unix where the temp already landed at 0o600. + // Re-tighten for non-Unix builds; no-op on Unix where the temp + // already landed at 0o600. apply_secure_permissions(dest)?; Ok(()) } -/// Restore a `.db` backup over `dest_db_path`. Associated function; -/// caller must guarantee the destination is not held open by this -/// process. The caller (the persister's `restore_from_inner`) handles -/// the pre-restore auto-backup gate. +/// Restore a `.db` backup over `dest_db_path`. The caller must guarantee +/// the destination is not held open by this process and owns the +/// pre-restore auto-backup gate. /// /// # Atomicity /// -/// The restore is staged in two phases bounded by a SQLite-native -/// `BEGIN EXCLUSIVE` transaction on `dest_db_path` (kept across the -/// entire restore body): -/// -/// 1. Open the source read-only; run `PRAGMA integrity_check` + -/// schema-history + max-version sniffs. Any failure here aborts -/// before the live destination is touched. -/// 2. Open a short-lived writer connection on the destination and -/// `BEGIN EXCLUSIVE`. This blocks every other SQLite peer -/// (other `SqlitePersister` handles in this or sibling processes, -/// bare `rusqlite::Connection`s, the CLI) from writing the file -/// until restore completes. Peers waiting for the lock back off -/// via SQLite's own busy_timeout. The lock conn is DROPPED right -/// before `persist` so SQLite releases its file handle on the old -/// inode before the atomic rename takes its place. -/// 3. Stream the source into a `NamedTempFile` in `dest_db_path`'s -/// parent directory; re-run integrity + schema gates against the -/// STAGED bytes (catches a torn `io::copy`); unlink the existing -/// `-wal` / `-shm` siblings; chmod the temp to 0o600; then -/// `persist` over `dest_db_path` as an atomic rename. -/// -/// Either both the main DB and its WAL/SHM siblings are replaced, or -/// — on any pre-persist failure — none of them are touched. The -/// SQLite-native lock prevents a racing peer from committing rows -/// between the staged validation and the rename, which the prior -/// flock-based approach could not do (flock doesn't see SQLite peers). -/// -/// On Unix, the parent directory is `fsync`-ed after the rename so the -/// dentry update is durable across power loss; on non-Unix this is a -/// no-op. +/// Validation runs against the source and again against the STAGED bytes, +/// under a SQLite-native `BEGIN EXCLUSIVE` on `dest_db_path` that blocks +/// every other SQLite peer (which advisory flock could not). The staged +/// temp is `persist`-ed as an atomic rename only after all gates pass, so +/// either the DB and its WAL/SHM siblings are replaced together or nothing +/// is touched; the parent dir is fsynced afterward. See the numbered steps +/// in the body for the per-phase rationale. /// /// # Lock-release-before-rename trade-off /// -/// The EXCLUSIVE lock is released BEFORE the atomic rename, on -/// purpose. SQLite keeps a kernel file handle on the destination's -/// (old) inode for as long as the lock conn is alive; holding that -/// handle across the rename would leave it pointing at the unlinked -/// old inode while peers opening the new path would race the rename -/// itself (on some filesystems the rename can outright fail). -/// Releasing the lock first lets SQLite drop its old-inode handle -/// before the rename swaps it. -/// -/// The trade-off: a microsecond window opens between lock release and -/// rename in which a peer can acquire its own SQLite lock on the -/// destination's old inode. Any writes it makes within that window -/// land in the old inode, which the rename immediately unlinks — the -/// peer's writes are effectively dropped on the floor (the peer keeps -/// a handle on an inode that no longer has any directory entry; once -/// it closes, the bytes are reclaimed). That is acceptable for the -/// restore contract: callers serialize their own restore intent at -/// the application layer; the window is too short for a non-malicious -/// peer to land more than a transient miss, and a malicious peer -/// cannot escalate beyond losing its own write. Correct file-handle -/// semantics across the rename matter more than absolute lock -/// coverage. +/// The EXCLUSIVE lock is dropped just BEFORE the rename: SQLite holds a +/// kernel handle on the old inode while the lock conn is alive, and +/// holding it across the rename would point it at the unlinked inode and +/// can make the rename fail on some filesystems. The cost is a microsecond +/// window where a peer could write into the old inode the rename then +/// unlinks — its own write is lost, nothing escalates. Correct file-handle +/// semantics across the rename outweigh absolute lock coverage. pub fn restore_from(dest_db_path: &Path, src_backup: &Path) -> Result<(), WalletStorageError> { - // 1. Confirm the source is openable, then run cheap pre-staging - // integrity + schema-history + max-version sniffs against the - // source itself so an obviously-incompatible input fails before - // we stream the whole file into the destination's partition. - // The authoritative schema-history / version gate still re-runs - // on the STAGED copy (step 4) — that's the TOCTOU-safe check - // bound to the exact bytes about to be persisted. + // 1. Cheap early-out: sniff integrity + schema-history + version + + // wallet-identity against the source so an incompatible input fails + // before we stream the whole file. The authoritative, TOCTOU-safe + // gate re-runs on the STAGED bytes (step 4). let src = crate::sqlite::conn::open_conn(src_backup, crate::sqlite::conn::Access::ReadOnly) .map_err(map_source_open_err)?; run_integrity_check(&src, |report| WalletStorageError::IntegrityCheckFailed { @@ -227,30 +161,22 @@ pub fn restore_from(dest_db_path: &Path, src_backup: &Path) -> Result<(), Wallet return Err(WalletStorageError::SchemaHistoryMissing); } crate::sqlite::migrations::assert_schema_version_supported(&src)?; + crate::sqlite::conn::assert_wallet_application_id(&src)?; + crate::sqlite::migrations::assert_schema_history_well_formed(&src)?; drop(src); - // 2. SQLite-native exclusion. `BEGIN EXCLUSIVE` against a short- - // lived writer connection on the destination blocks every other - // SQLite peer (rusqlite Connection, sibling `SqlitePersister`) - // until the tx is committed/rolled-back or the conn drops. The - // prior flock approach was a false promise: advisory locks - // don't interlock with SQLite's own locking, so a peer mid-write - // could race the swap. The lock conn is dropped (`take()` + end - // of scope) BEFORE `tmp.persist` so SQLite releases its file - // handle on the old inode before the atomic rename — otherwise - // we'd leave a dangling handle on the unlinked inode. + // 2. SQLite-native exclusion: `BEGIN EXCLUSIVE` on a short-lived + // writer conn blocks every other SQLite peer until it drops (which + // advisory flock could not — it doesn't interlock with SQLite). The + // conn is dropped before `persist` (see lock-release trade-off). let mut dest_lock_conn: Option = if dest_db_path.exists() { let conn = crate::sqlite::conn::open_conn(dest_db_path, crate::sqlite::conn::Access::ReadWrite)?; - // Reuse a sensible busy_timeout so peers don't immediately - // surface BUSY without a backoff window. The destination DB - // may not have a persister attached yet (the persister is the - // CALLER), so this conn applies its own. + // The destination has no persister yet (the persister is the + // caller), so apply our own busy_timeout for a backoff window. conn.busy_timeout(std::time::Duration::from_secs(5))?; - // Take EXCLUSIVE up-front by promoting an immediate tx. If a - // peer holds the DB, SQLite waits for busy_timeout then - // returns BUSY — we surface that as `RestoreDestinationLocked` - // so callers keep their existing branch. + // BUSY after busy_timeout becomes `RestoreDestinationLocked` so + // callers keep their existing branch. match conn.execute_batch("BEGIN EXCLUSIVE") { Ok(()) => Some(conn), Err(rusqlite::Error::SqliteFailure(err, _)) @@ -267,19 +193,19 @@ pub fn restore_from(dest_db_path: &Path, src_backup: &Path) -> Result<(), Wallet None }; - // 3. Stage the source into a NamedTempFile in the destination's - // parent dir (unguessable name, no symlink-plant TOCTOU). + // 3. Stage the source into a NamedTempFile in the destination's parent + // dir (unguessable name, no symlink-plant TOCTOU). let parent = dest_db_path.parent().unwrap_or(Path::new(".")); let mut tmp = tempfile::NamedTempFile::new_in(parent)?; let mut src_file = std::fs::File::open(src_backup)?; std::io::copy(&mut src_file, tmp.as_file_mut())?; tmp.as_file().sync_all()?; - // 4. Re-run integrity_check on the STAGED file before - // persisting. A torn `std::io::copy` or transient FS error - // that escaped `sync_all`'s notice would otherwise persist a - // corrupted database. If the recheck fails, the temp file - // drops naturally and the live destination stays untouched. + // 4. Re-validate the STAGED bytes before persisting: a torn + // `io::copy` that escaped `sync_all` would otherwise persist a + // corrupt DB, and the recheck failing just drops the temp. Bound to + // the staged bytes (not the source handle) so a swap during the + // restore window can't slip a forward-version or foreign DB through. { let staged = crate::sqlite::conn::open_conn(tmp.path(), crate::sqlite::conn::Access::ReadOnly) @@ -287,19 +213,17 @@ pub fn restore_from(dest_db_path: &Path, src_backup: &Path) -> Result<(), Wallet run_integrity_check(&staged, |report| WalletStorageError::IntegrityCheckFailed { report, })?; - // Schema-history presence + max-version gate, bound to the - // staged bytes (not the first source handle) so a swap during - // the restore window can't slip a forward-version DB through. if !crate::sqlite::migrations::has_schema_history(&staged)? { return Err(WalletStorageError::SchemaHistoryMissing); } crate::sqlite::migrations::assert_schema_version_supported(&staged)?; + crate::sqlite::conn::assert_wallet_application_id(&staged)?; + crate::sqlite::migrations::assert_schema_history_well_formed(&staged)?; } - // 5. chmod 600 on the temp BEFORE persist so the destination - // inherits owner-only mode via the atomic rename. Chmodding - // post-persist would leave the new DB live at the destination on - // a chmod failure, contradicting the rolled-back error. + // 5. chmod 0o600 on the temp BEFORE persist so the destination + // inherits owner-only mode via the rename (post-persist chmod could + // fail with the new DB already live). #[cfg(unix)] { use std::os::unix::fs::PermissionsExt; @@ -307,33 +231,20 @@ pub fn restore_from(dest_db_path: &Path, src_backup: &Path) -> Result<(), Wallet .set_permissions(std::fs::Permissions::from_mode(0o600))?; } - // 6. Release the SQLite-native EXCLUSIVE lock BEFORE touching the - // on-disk WAL/SHM siblings or running the rename. On Windows / - // some FUSE / AV-scanned mounts, `remove_file` against a file - // still held open by another handle on the same process returns - // `PermissionDenied`; on Unix the unlinked inodes remain - // reachable through the open fd but the rename window still - // benefits from a clean close. + // 6. Release the EXCLUSIVE lock before touching siblings/rename: on + // Windows / some FUSE mounts `remove_file` on a still-open file + // returns `PermissionDenied`, and the rename window wants a clean + // close (see lock-release trade-off above). if let Some(conn) = dest_lock_conn.take() { - // Best-effort rollback of the empty EXCLUSIVE tx; an error here - // means SQLite is already in trouble and `drop(conn)` covers - // the rest. Silent because the conn is about to drop anyway. let _ = conn.execute_batch("ROLLBACK"); drop(conn); } - // 7. Atomicity gate: every staged-file validation has now passed - // and our writer handle is closed, so it's safe to clear WAL/SHM - // siblings the replaced DB might have left behind. Doing this - // BEFORE persist ensures that either both the main DB and its - // siblings get replaced/cleared, or — if any earlier check - // failed — none of them are touched. - // - // Build sibling paths via `OsString::push` so non-UTF-8 bytes - // round-trip intact; `remove_file` runs unconditionally and - // `ErrorKind::NotFound` is a silent no-op (closes the `exists()` - // TOCTOU gate). Ordering requires the dest lock conn to be dropped - // first so cross-platform unlink semantics hold. + // 7. Clear any WAL/SHM siblings BEFORE persist so the DB and its + // siblings are replaced atomically (all-or-nothing). Sibling paths + // use `OsString::push` so non-UTF-8 bytes round-trip; `NotFound` is + // a silent no-op. Requires the lock conn dropped first for + // cross-platform unlink semantics. if let Some(file_name) = dest_db_path.file_name() { for ext in ["-wal", "-shm"] { let mut sibling_name = file_name.to_os_string(); @@ -351,28 +262,20 @@ pub fn restore_from(dest_db_path: &Path, src_backup: &Path) -> Result<(), Wallet tmp.persist(dest_db_path) .map_err(|e| WalletStorageError::Io(e.error))?; - // 9. Fsync the destination's parent directory so the atomic rename's - // dentry update is durable across power loss (no-op on non-Unix). + // 9. Make the rename's dentry update durable. fsync_parent_dir(dest_db_path)?; - // 10. Re-tighten siblings (SQLite may materialise -wal/-shm on next - // open; this is idempotent at restore-completion time). + // 10. Re-tighten perms (idempotent; SQLite may re-materialise -wal/-shm). apply_secure_permissions(dest_db_path)?; Ok(()) } -/// Run `PRAGMA integrity_check` and return `Ok(())` when SQLite reports -/// the single row `"ok"`. Any other result becomes a typed -/// `IntegrityCheckFailed` via the caller-supplied builder; an -/// underlying rusqlite error surfaces as `IntegrityCheckRunFailed`. -/// -/// SQLite returns one row per detected problem (capped at -/// `PRAGMA integrity_check(N)`; default 100). All rows are collected -/// and joined with `\n` so the typed report carries every diagnostic -/// instead of just the first line. -/// -/// `pub(crate)` so the persister's open-time A-8 probe shares the -/// same helper rather than reimplementing the report-rendering rule. +/// Run `PRAGMA integrity_check` and return `Ok(())` only on the single +/// row `"ok"`. Any other result becomes a typed `IntegrityCheckFailed` via +/// the caller-supplied builder; an underlying rusqlite error surfaces as +/// `IntegrityCheckRunFailed`. SQLite returns one row per detected problem +/// (default cap 100); all rows are `\n`-joined so the report carries every +/// diagnostic, not just the first. pub(crate) fn run_integrity_check( conn: &Connection, on_failure: F, @@ -392,12 +295,9 @@ where match item { Ok(s) => rows.push(s), Err(e) => { - // Severe corruption can cause SQLite to surface a - // `DatabaseCorrupt` SqliteFailure partway through the - // integrity_check stream. Treat it as end-of-stream - // when we already have diagnostics (the rows we have - // are still valid); if we have NOTHING, surface the - // typed `IntegrityCheckRunFailed`. + // SQLite can surface a `DatabaseCorrupt` partway through + // the stream; treat it as end-of-stream when we already + // have diagnostic rows, else surface it below. trailing_err = Some(e); break; } @@ -458,6 +358,10 @@ pub fn prune(dir: &Path, policy: RetentionPolicy) -> Result = Vec::new(); let mut kept = 0; for (idx, (ts, path)) in files.into_iter().enumerate() { + // `keep_last_n` is a FLOOR: the N newest are always kept even if + // `max_age` would evict them, so an age+count policy can't delete + // every backup. `None` gives no floor (age-only may prune all). + let within_floor = matches!(policy.keep_last_n, Some(n) if idx < n); let pass_count = match policy.keep_last_n { Some(n) => idx < n, None => true, @@ -466,16 +370,14 @@ pub fn prune(dir: &Path, policy: RetentionPolicy) -> Result now.duration_since(ts).map(|d| d <= max).unwrap_or(true), None => true, }; - if pass_count && pass_age { + if within_floor || (pass_count && pass_age) { kept += 1; } else { match std::fs::remove_file(&path) { Ok(()) => removed.push(path), Err(e) => { - // A failed `remove_file` leaves the file on disk, so - // it MUST be counted in `kept`. The invariant - // `kept + removed.len() == total` then holds and - // `failed_removals` is a subset of `kept`. + // A failed removal leaves the file on disk, so count it + // as kept to preserve `kept + removed == total`. failed_removals.push((path, e)); kept += 1; } @@ -559,6 +461,46 @@ mod tests { assert_eq!(secs, 1767225600); } + /// `backup_timestamp` must extract the embedded timestamp (not fall + /// back to mtime) for every `BackupKind` shape, including ones with + /// inner `-`. Guards the `rsplit('-')` coupling against a future label + /// that shifts the trailing token. + #[test] + fn backup_timestamp_extracts_embedded_token_for_all_kinds() { + let want = parse_compact_timestamp("20260101T000000Z").unwrap(); + let real_wallet_id = hex::encode([0xABu8; 32]); + let names = [ + "wallet-20260101T000000Z.db".to_string(), + // Multiple `-` from the from/to version segments. + "pre-migration-1-to-2-20260101T000000Z.db".to_string(), + // 64 lowercase hex chars: hex::encode never emits `-`, so the + // timestamp stays the last `-`-delimited token. + format!("pre-delete-{real_wallet_id}-20260101T000000Z.db"), + "pre-restore-20260101T000000Z.db".to_string(), + ]; + for name in names { + let got = backup_timestamp(Path::new(&name)); + assert_eq!( + got, + Some(want), + "backup_timestamp must parse the embedded token, not fall back to mtime, for {name}" + ); + } + } + + /// A label with a trailing non-timestamp segment must return `None` + /// (prune falls back to mtime) rather than misread a wrong token as a + /// valid time — a detectable regression if a future `BackupKind` + /// appends a `-`-bearing suffix after the timestamp. + #[test] + fn backup_timestamp_rejects_trailing_non_timestamp_segment() { + assert_eq!( + backup_timestamp(Path::new("pre-delete-20260101T000000Z-label.db")), + None, + "a trailing non-timestamp segment must not parse as a timestamp" + ); + } + #[test] fn is_backup_file_recognises_prefixes() { assert!(is_backup_file(Path::new("/tmp/wallet-20260101T000000Z.db"))); diff --git a/packages/rs-platform-wallet-storage/src/sqlite/config.rs b/packages/rs-platform-wallet-storage/src/sqlite/config.rs index 1beb7c2c021..90832bfdc68 100644 --- a/packages/rs-platform-wallet-storage/src/sqlite/config.rs +++ b/packages/rs-platform-wallet-storage/src/sqlite/config.rs @@ -40,11 +40,19 @@ impl JournalMode { } /// SQLite synchronous mode. +/// +/// `Normal` (the default, paired with WAL) is **app-crash durable**: a +/// committed write survives a process crash but NOT a power loss / OS +/// crash mid-checkpoint, where the last transactions in the WAL can be +/// lost. Choose `Full` for power-loss durability at the cost of an fsync +/// per commit. #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] pub enum Synchronous { Off, + /// WAL default: durable across application crash, not power loss. #[default] Normal, + /// fsync on every commit: durable across power loss / OS crash. Full, Extra, } @@ -109,10 +117,8 @@ impl SqlitePersisterConfig { /// `/backups/auto/` (or `./backups/auto/` if the DB path has no parent). /// -/// Public so the CLI binary (a separate compilation unit) can share the -/// same resolution as the library's `SqlitePersisterConfig::new`. The -/// preferred narrower visibility would be `pub(super)`, but `pub use` -/// re-exports up to the crate root cannot expose a `pub(super)` item. +/// Public so the CLI binary (a separate compilation unit) shares the same +/// resolution as `SqlitePersisterConfig::new`. pub fn default_auto_backup_dir(db_path: &Path) -> PathBuf { let parent = db_path .parent() diff --git a/packages/rs-platform-wallet-storage/src/sqlite/conn.rs b/packages/rs-platform-wallet-storage/src/sqlite/conn.rs index de8e182f208..e136e34a6c5 100644 --- a/packages/rs-platform-wallet-storage/src/sqlite/conn.rs +++ b/packages/rs-platform-wallet-storage/src/sqlite/conn.rs @@ -1,23 +1,40 @@ //! Single connection-open choke-point. //! -//! `PRAGMA foreign_keys` is per-connection and resets to OFF on every -//! open — it is not persisted in the database file, and no compile-time -//! knob in `libsqlite3-sys`'s bundled build forces it on. Enforcement is -//! therefore a runtime discipline: every connection that mutates rows -//! must enable it, and we must *prove* it took, because the pragma -//! silently no-ops on a SQLite built without FK support. -//! -//! Every library connection-open site routes through [`open_conn`] so -//! there is exactly one place that owns flags + FK enforcement. The CLI -//! binary's read-only `peek_schema_version` probe opens directly — it -//! never mutates rows, so FK enforcement is moot, and `open_conn` is -//! `pub(crate)` (not reachable from the separate bin target). +//! `PRAGMA foreign_keys` is per-connection, defaults to OFF on every open, +//! and silently no-ops on a SQLite built without FK support — so every +//! writer connection must enable it and read it back to prove it took. +//! All library opens route through [`open_conn`]; the CLI's read-only +//! `peek_schema_version` probe opens directly (no mutations, and +//! `open_conn` is `pub(crate)`, unreachable from the bin target). use rusqlite::{Connection, OpenFlags}; use std::path::Path; use crate::sqlite::error::WalletStorageError; +/// Magic stamped into the SQLite header `application_id` (offset 68) by +/// `V001__initial`. ASCII `"PLWT"` (Platform Wallet) big-endian. A +/// refinery-versioned DB whose `application_id` does not equal this is a +/// foreign SQLite database, not a wallet-storage DB. +pub(crate) const APPLICATION_ID: i32 = 0x504C_5754; + +/// Read the header `application_id` and assert it equals +/// [`APPLICATION_ID`]. Returns [`WalletStorageError::NotAWalletDb`] on +/// mismatch. The caller decides WHEN to run this — `open()` runs it +/// pre-migration on a refinery-versioned DB; `restore_from` runs it on +/// the staged copy. A brand-new (unmigrated) DB reports `0` and is the +/// caller's responsibility to skip (V001 stamps the real value). +pub(crate) fn assert_wallet_application_id(conn: &Connection) -> Result<(), WalletStorageError> { + let found: i32 = conn.pragma_query_value(None, "application_id", |row| row.get(0))?; + if found != APPLICATION_ID { + return Err(WalletStorageError::NotAWalletDb { + expected: APPLICATION_ID, + found, + }); + } + Ok(()) +} + /// How the opened connection will be used. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub(crate) enum Access { @@ -35,10 +52,9 @@ pub(crate) enum Access { /// For [`Access::ReadWrite`], enables `PRAGMA foreign_keys = ON` and /// reads it back, returning [`WalletStorageError::ForeignKeysNotEnforced`] /// if the result is not `1`. For [`Access::ReadOnly`], opens with -/// `SQLITE_OPEN_READ_ONLY` and performs no pragma. URI filename parsing -/// is deliberately not enabled: the crate never constructs `file:` URIs, -/// and leaving it off keeps a path from ever smuggling query parameters -/// (e.g. `?mode=rwc`) that could defeat the read-only intent. +/// `SQLITE_OPEN_READ_ONLY` and performs no pragma. URI filename parsing is +/// deliberately left off so a path can't smuggle query parameters (e.g. +/// `?mode=rwc`) that defeat the read-only intent. pub(crate) fn open_conn(path: &Path, access: Access) -> Result { let conn = match access { Access::ReadWrite => Connection::open(path)?, @@ -77,10 +93,8 @@ mod tests { assert_eq!(on, 1, "read-back must observe FK enforcement is on"); } - /// The hard-error variant the read-back returns when the pragma is a - /// no-op is wired and reachable. We can't build a FK-less SQLite in - /// the bundled build, so assert the typed error renders the intended - /// message rather than truncating the contract to "untestable". + /// The bundled build can't produce a FK-less SQLite, so assert the + /// read-back error variant at least renders its intended message. #[test] fn foreign_keys_not_enforced_variant_renders() { let err = WalletStorageError::ForeignKeysNotEnforced; diff --git a/packages/rs-platform-wallet-storage/src/sqlite/error.rs b/packages/rs-platform-wallet-storage/src/sqlite/error.rs index c1767d5b7e3..48647095012 100644 --- a/packages/rs-platform-wallet-storage/src/sqlite/error.rs +++ b/packages/rs-platform-wallet-storage/src/sqlite/error.rs @@ -1,18 +1,13 @@ //! Typed errors for `platform-wallet-storage`. //! -//! Every variant carries the upstream error via `#[source]` (or -//! `#[from]` where the conversion is the only thing the trait does), -//! never via a stringified copy. Variants never store user-facing -//! prose — the `#[error("...")]` attribute provides the renderable -//! `Display` form; the typed fields carry diagnostics. +//! Variants carry the upstream error via `#[source]`/`#[from]`, never a +//! stringified copy; the `#[error("...")]` attribute provides `Display`. //! -//! At the `PlatformWalletPersistence` trait boundary, this type -//! converts into `PersistenceError`: `LockPoisoned` keeps its -//! dedicated variant; everything else flows through -//! `PersistenceError::Backend { kind, source }` — `kind` is classified -//! by [`WalletStorageError::persistence_kind`] (Transient / Constraint / -//! Fatal) and `source` carries the boxed typed error so consumers can -//! walk `Error::source()` to the underlying `rusqlite` payload. +//! At the `PlatformWalletPersistence` boundary this converts into +//! `PersistenceError`: `LockPoisoned` keeps its dedicated variant, and +//! everything else flows through `Backend { kind, source }` where `kind` +//! comes from [`WalletStorageError::persistence_kind`] and `source` +//! preserves the typed error for `Error::source()` walking. use std::path::PathBuf; @@ -99,7 +94,7 @@ pub enum WalletStorageError { }, /// `delete_wallet` (or another wallet-id-keyed operation) was - /// called with an id that has no matching `wallet_metadata` row. + /// called with an id that has no matching `wallets` row. #[error("wallet not found: {}", hex::encode(wallet_id))] WalletNotFound { wallet_id: [u8; 32] }, @@ -200,11 +195,8 @@ pub enum WalletStorageError { IdentityEntryIdMismatch, /// An `asset_locks` row's typed-column `(outpoint, account_index)` - /// disagreed with the lifecycle blob's `(out_point, account_index)`. - /// Mirrors `IdentityKeyEntryMismatch` — a torn write, partial - /// migration, or restored corruption that survives the per-row - /// `integrity_check` is still rejected at decode time rather than - /// mis-bucketing the lock under the wrong account. + /// disagreed with the lifecycle blob's. Rejected at decode time rather + /// than mis-bucketing the lock under the wrong account. #[error( "asset_lock entry fields disagree with typed columns \ (typed outpoint={typed_outpoint}, blob outpoint={blob_outpoint}, \ @@ -217,26 +209,15 @@ pub enum WalletStorageError { blob_account_index: u32, }, - /// A blob payload exceeded the configured allocation cap during - /// decode. Surfaced separately from generic [`Self::BlobDecode`] so - /// operators can distinguish a hostile or corrupted oversize blob - /// from a structural decode failure. Defaults to 16 MiB — well - /// above any legitimate per-row payload. + /// A blob exceeded the decode allocation cap (default 16 MiB). + /// Separate from [`Self::BlobDecode`] so operators can distinguish an + /// oversize blob from a structural decode failure. #[error("blob exceeded decode size limit ({len_bytes} bytes > {limit_bytes} byte cap)")] BlobTooLarge { len_bytes: usize, limit_bytes: usize, }, - /// An unspent UTXO named an address absent from - /// `core_derived_addresses`, so its owning account index can't be - /// resolved. Persisting it would mis-file live funds under account - /// 0 with no path back to the real account, so the write is refused. - /// Spent-only placeholder rows tolerate a missing mapping (they're - /// excluded from the unspent set) and do not raise this. - #[error("unspent utxo address {address} is not in core_derived_addresses")] - UtxoAddressNotDerived { address: String }, - /// `PRAGMA foreign_keys = ON` was issued on open but the read-back /// reported the constraint enforcement is still off — the linked /// SQLite build silently ignores the pragma (no FK support compiled @@ -244,6 +225,42 @@ pub enum WalletStorageError { #[error("SQLite foreign-key enforcement could not be enabled on this connection")] ForeignKeysNotEnforced, + /// The requested `journal_mode` read back as a different mode — + /// SQLite silently fell back (e.g. WAL→DELETE on some FUSE mounts). + /// With `synchronous=NORMAL` that risks corruption on power loss, so + /// open hard-errors instead of running downgraded. + #[error("journal_mode {requested} could not be applied (SQLite reports {actual})")] + JournalModeNotApplied { + requested: &'static str, + actual: String, + }, + + /// A pre-existing / restored DB passed `integrity_check` but its + /// `refinery_schema_history` carries a malformed row (non-RFC3339 + /// `applied_on` or non-numeric `checksum`). Probed BEFORE refinery + /// runs so a foreign or corrupted-but-integrity-valid input returns + /// a typed error instead of refinery panicking on the parse. + #[error("refinery_schema_history is malformed: {reason}")] + SchemaHistoryMalformed { reason: &'static str }, + + /// A restore source / opened DB carries a `refinery_schema_history` + /// (so it is refinery-versioned) but its `application_id` header does + /// not match the wallet-storage magic — it is a foreign SQLite DB, + /// not a wallet database. Rejected before it can be persisted over + /// the live wallet DB or migrated in place. + #[error( + "not a platform-wallet-storage database: application_id {found:#010x} != expected {expected:#010x}" + )] + NotAWalletDb { expected: i32, found: i32 }, + + /// A second [`SqlitePersister`](crate::SqlitePersister) `open()` on a + /// path already open in THIS process. Each handle has its own + /// `Mutex` and write buffer, so buffered writes on one are + /// invisible to the other — silent state divergence. Refused until the + /// first persister drops. + #[error("a SqlitePersister is already open on {} in this process", path.display())] + AlreadyOpen { path: PathBuf }, + /// A value couldn't be cast to the database's native i64 /// representation without losing magnitude. #[error("integer overflow casting `{field}` (value={value}) to {target}")] @@ -253,20 +270,11 @@ pub enum WalletStorageError { target: SafeCastTarget, }, - /// Flush failed transiently (e.g. `SQLITE_BUSY` / `SQLITE_LOCKED`) - /// for `wallet_id`. The buffered changeset has been restored — the - /// next `flush(wallet_id)` will retry the same data merged with - /// anything stored in between. Callers should back off and retry - /// rather than dropping state. - /// - /// **Use exponential backoff; do NOT tight-loop on this error** — - /// hammering the persister at full speed turns a transient lock - /// contention into a hot CPU spin and delays whoever holds the - /// lock from releasing it. - /// - /// The variant name `FlushRetryable` is intentionally embedded in - /// the `Display` output so operators grepping production logs can - /// match on the variant directly. + /// Flush failed transiently (e.g. `SQLITE_BUSY` / `SQLITE_LOCKED`) for + /// `wallet_id`. The buffered changeset is restored, so the next + /// `flush(wallet_id)` retries it merged with anything stored in + /// between. Use **exponential backoff** — tight-looping turns lock + /// contention into a CPU spin that starves the lock holder. #[error( "FlushRetryable: flush failed transiently for wallet {}; buffer preserved for retry", hex::encode(wallet_id) @@ -291,31 +299,21 @@ impl From for PersistenceError { } impl WalletStorageError { - /// Construct a typed `BlobDecode` error from a static reason. - /// Used by schema modules that hit a structural decode error - /// (e.g. a 32-byte id column with the wrong length, or trailing - /// bytes after a payload). + /// Construct a `BlobDecode` error from a static reason. Used by schema + /// modules on a structural decode error (wrong-length id, trailing + /// bytes). pub(crate) fn blob_decode(reason: &'static str) -> Self { Self::BlobDecode { reason } } - /// `true` when the underlying failure is safe to retry — the - /// caller should preserve in-flight state and call again. - /// Transient codes: - /// - `DatabaseBusy` / `DatabaseLocked`: contention. - /// - `DiskFull`: operator clears disk space. - /// - `SystemIoFailure`: kernel-level I/O blip (NFS, raid rebuild). - /// - `OutOfMemory`: transient memory pressure. - /// - /// All four classes are recoverable environmental conditions — - /// dropping buffered state on them would be data loss for a - /// problem the operator (or kernel) clears on its own. + /// `true` when the failure is safe to retry — the caller should + /// preserve in-flight state and call again. Transient codes are the + /// recoverable environmental ones: `DatabaseBusy`/`DatabaseLocked` + /// (contention), `DiskFull`, `SystemIoFailure`, `OutOfMemory`. /// - /// The OUTER match on `WalletStorageError` is intentionally - /// wildcard-free: the enum MUST NOT gain `#[non_exhaustive]` so a - /// future variant forces the author to classify it here. The - /// INNER match on `rusqlite::ErrorCode` uses a wildcard because - /// `ErrorCode` is `#[non_exhaustive]` upstream. + /// The OUTER match is intentionally wildcard-free so a future variant + /// forces explicit classification here; the INNER `ErrorCode` match + /// needs a wildcard because that enum is upstream `#[non_exhaustive]`. pub fn is_transient(&self) -> bool { use rusqlite::ErrorCode; match self { @@ -343,13 +341,10 @@ impl WalletStorageError { | Self::AutoBackupDirUnwritable { .. } | Self::WalletNotFound { .. } | Self::WalletIdMismatch { .. } - // TODO(qa): `LockPoisoned` is classified as fatal here, but - // the end-to-end mutex-poison flow has no automated test (a - // panicking thread + join is hard to reproduce - // deterministically). Manual verification only via the - // table-driven test in `tests/sqlite_error_classification`. - // If you change this classification, re-derive - // `handle_flush_error`'s fatal-branch behavior to match. + // TODO(qa): `LockPoisoned` fatal classification has no e2e + // mutex-poison test; verified manually via + // `tests/sqlite_error_classification`. Re-check + // `handle_flush_error`'s fatal branch if you change it. | Self::LockPoisoned | Self::RestoreDestinationLocked | Self::InvalidWalletIdHex { .. } @@ -362,28 +357,26 @@ impl WalletStorageError { | Self::ConsensusCodec { .. } | Self::BackupDestinationExists { .. } | Self::ForeignKeysNotEnforced + | Self::JournalModeNotApplied { .. } + | Self::SchemaHistoryMalformed { .. } + | Self::NotAWalletDb { .. } + | Self::AlreadyOpen { .. } | Self::IdentityKeyEntryMismatch | Self::IdentityEntryIdMismatch | Self::AssetLockEntryMismatch { .. } | Self::BlobTooLarge { .. } - | Self::UtxoAddressNotDerived { .. } | Self::IntegerOverflow { .. } => false, } } - /// Trait-boundary classification for the - /// [`PersistenceError::Backend`] kind field. Three classes: + /// Trait-boundary classification for [`PersistenceError::Backend`]: /// - /// - [`PersistenceErrorKind::Transient`] — every variant where - /// [`Self::is_transient`] is `true`. Caller MAY retry. - /// - [`PersistenceErrorKind::Constraint`] — SQL constraint / - /// FK / NOT NULL / UNIQUE / PK / CHECK violations. Schema / - /// integrity failure; caller bug, not infra. + /// - [`PersistenceErrorKind::Transient`] — [`Self::is_transient`] true; caller MAY retry. + /// - [`PersistenceErrorKind::Constraint`] — SQL constraint/FK/CHECK violation; caller bug. /// - [`PersistenceErrorKind::Fatal`] — everything else. /// - /// [`Self::LockPoisoned`] is handled by the `From` impl directly - /// (it maps to [`PersistenceError::LockPoisoned`] rather than - /// flowing through `Backend`). + /// [`Self::LockPoisoned`] never reaches here; the `From` impl maps it + /// straight to [`PersistenceError::LockPoisoned`]. pub fn persistence_kind(&self) -> PersistenceErrorKind { use rusqlite::ErrorCode; if self.is_transient() { @@ -395,10 +388,8 @@ impl WalletStorageError { { PersistenceErrorKind::Constraint } - // Refinery surfaces FK / constraint problems through - // rusqlite; if that path leaks through here the typed - // variant lives in `Self::Migration`, which we leave as - // `Fatal` since a migration failure isn't a caller bug. + // A migration failure (`Self::Migration`) isn't a caller bug, + // so it stays `Fatal` rather than `Constraint`. _ => PersistenceErrorKind::Fatal, } } @@ -442,11 +433,14 @@ impl WalletStorageError { Self::ConsensusCodec { .. } => "consensus_codec", Self::BackupDestinationExists { .. } => "backup_destination_exists", Self::ForeignKeysNotEnforced => "foreign_keys_not_enforced", + Self::JournalModeNotApplied { .. } => "journal_mode_not_applied", + Self::SchemaHistoryMalformed { .. } => "schema_history_malformed", + Self::NotAWalletDb { .. } => "not_a_wallet_db", + Self::AlreadyOpen { .. } => "already_open", Self::IdentityKeyEntryMismatch => "identity_key_entry_mismatch", Self::IdentityEntryIdMismatch => "identity_entry_id_mismatch", Self::AssetLockEntryMismatch { .. } => "asset_lock_entry_mismatch", Self::BlobTooLarge { .. } => "blob_too_large", - Self::UtxoAddressNotDerived { .. } => "utxo_address_not_derived", Self::IntegerOverflow { .. } => "integer_overflow", } } diff --git a/packages/rs-platform-wallet-storage/src/sqlite/kv.rs b/packages/rs-platform-wallet-storage/src/sqlite/kv.rs index bdf502e5b1a..865b351f269 100644 --- a/packages/rs-platform-wallet-storage/src/sqlite/kv.rs +++ b/packages/rs-platform-wallet-storage/src/sqlite/kv.rs @@ -1,24 +1,14 @@ //! SQLite-backed [`KvStore`] implementation for [`SqlitePersister`]. //! -//! One dedicated table per [`ObjectId`] variant (`meta_global`, -//! `meta_wallet`, `meta_identity`, `meta_token`, `meta_contact`, -//! `meta_platform_address`). Each table has a composite PRIMARY KEY of -//! its id column(s) plus `key`, so uniqueness comes straight from the PK -//! — no partial indexes, no nullable scope column. None of the tables -//! carry an FK: a `put` succeeds before its parent object exists. The -//! `AFTER DELETE` triggers in `V001__initial.rs` clean a scope's -//! metadata up when its parent is deleted. +//! One dedicated `meta_*` table per [`ObjectId`] variant, each with a +//! composite PRIMARY KEY (id columns + `key`) for uniqueness and no FK +//! (a `put` may precede its parent; `AFTER DELETE` triggers in +//! `V001__initial.rs` reap metadata when the parent is deleted). //! -//! `match scope` resolves each operation to its table and id-column -//! bindings; the SQL body (length-precheck read, upsert, delete, -//! prefix-list) is factored once per op and parameterised by table name -//! and id predicate. Table and column names come from the matched -//! variant — compile-time constants, never caller input — so they are -//! `format!`-spliced; the id *values* are bound parameters. -//! -//! All operations reuse `SqlitePersister`'s single `Mutex` -//! via the crate-private `conn()` accessor; no separate connection is -//! opened. +//! `format!`-spliced table/column names are always compile-time constants +//! from the matched variant, never caller input; id *values* and keys are +//! bound parameters. Operations reuse the persister's single +//! `Mutex` via `conn()`. use rusqlite::{OptionalExtension, ToSql}; @@ -133,10 +123,9 @@ impl From for KvError { WalletStorageError::LockPoisoned => KvError::LockPoisoned, WalletStorageError::Sqlite(e) => KvError::Sqlite(e), other => { - // Other variants don't arise from the `conn()` accessor - // — the accessor either yields `LockPoisoned` or hands - // back the guard. Stuff anything else into `Sqlite` - // via its Display, preserving the source chain. + // `conn()` only ever yields `LockPoisoned` or the guard, + // so other variants are unreachable here; preserve the + // source chain anyway by wrapping into `Sqlite`. KvError::Sqlite(rusqlite::Error::ToSqlConversionFailure(Box::new(other))) } } @@ -152,13 +141,10 @@ impl KvStore for SqlitePersister { // Bind the id values then the key in placeholder order. let mut params: Vec<&dyn ToSql> = sql.id_vals.iter().map(|v| v as &dyn ToSql).collect(); params.push(&key); - // Single-snapshot read: select `length(value)` and `value` in one - // row. The length (column 0) is checked against `MAX_VALUE_LEN` - // before `row.get(1)` materialises the BLOB — rusqlite reads the - // BLOB lazily on that call, so the cap gates the allocation with - // no cross-snapshot TOCTOU window. The inner `Result` carries the - // over-cap length out of the closure without ever touching - // column 1. + // Select `length(value)` and `value` in one row: rusqlite reads + // the BLOB lazily on `row.get(1)`, so checking the length first + // gates the allocation with no TOCTOU window. The inner `Result` + // carries an over-cap length out without touching column 1. let row: Option, usize>> = conn .query_row( &format!("SELECT length(value), value FROM {} {where_key}", sql.table), @@ -185,9 +171,8 @@ impl KvStore for SqlitePersister { fn put(&self, scope: &ObjectId, key: &str, value: &[u8]) -> Result<(), KvError> { validate_key(key)?; - // Cap the value before it reaches SQL so a `put` can never plant - // a row that a later `get` would refuse to materialise. The read - // path gates on the same `MAX_VALUE_LEN`. + // Cap before SQL so a `put` can't plant a row a later `get` would + // refuse to materialise (same `MAX_VALUE_LEN` on both paths). if value.len() > MAX_VALUE_LEN { return Err(KvError::ValueTooLarge { found: value.len(), @@ -196,9 +181,8 @@ impl KvStore for SqlitePersister { } let sql = ScopeSql::resolve(scope); let conn = self.conn().map_err(KvError::from)?; - // Column list / placeholders / conflict target all include the - // id columns ahead of `key`; the plain composite PK is the - // conflict target. Upsert refreshes `updated_at` on overwrite. + // Columns/placeholders/conflict-target put id columns ahead of + // `key` (the composite PK); upsert refreshes `updated_at`. let mut cols: Vec<&str> = sql.id_cols.to_vec(); cols.push("key"); let col_list = cols.join(", "); @@ -289,11 +273,11 @@ mod tests { let wid: WalletId = [id; 32]; let conn = p.lock_conn_for_test(); conn.execute( - "INSERT OR IGNORE INTO wallet_metadata (wallet_id, network, birth_height) \ + "INSERT OR IGNORE INTO wallets (wallet_id, network, birth_height) \ VALUES (?1, 'testnet', 0)", params![wid.as_slice()], ) - .expect("seed wallet_metadata"); + .expect("seed wallets"); wid } @@ -333,8 +317,7 @@ mod tests { #[test] fn global_composite_pk_rejects_duplicate() { - // Direct INSERTs (no ON CONFLICT) must be rejected because the - // composite PRIMARY KEY enforces per-key uniqueness. + // A direct INSERT (no ON CONFLICT) must hit the composite PK. let (p, _tmp) = open_persister(); let conn = p.lock_conn_for_test(); conn.execute( @@ -388,9 +371,8 @@ mod tests { #[test] fn get_rejects_oversized_value_before_materialising() { - // A row larger than MAX_VALUE_LEN (planted via direct SQL — - // bypassing `put`'s cap) must surface as ValueTooLarge instead - // of OOMing the process. + // A row over MAX_VALUE_LEN (planted directly, bypassing `put`) + // must surface ValueTooLarge instead of OOMing. let (p, _tmp) = open_persister(); let oversize = vec![0u8; MAX_VALUE_LEN + 1]; { @@ -442,7 +424,7 @@ mod tests { unreachable!() }; conn.execute( - "DELETE FROM wallet_metadata WHERE wallet_id = ?1", + "DELETE FROM wallets WHERE wallet_id = ?1", params![wid.as_slice()], ) .expect("delete wallet"); diff --git a/packages/rs-platform-wallet-storage/src/sqlite/migrations.rs b/packages/rs-platform-wallet-storage/src/sqlite/migrations.rs index 1163fe62044..d8600c55c6d 100644 --- a/packages/rs-platform-wallet-storage/src/sqlite/migrations.rs +++ b/packages/rs-platform-wallet-storage/src/sqlite/migrations.rs @@ -8,9 +8,8 @@ use rusqlite::OptionalExtension; use crate::sqlite::error::WalletStorageError; -// `embed_migrations!` generates a `migrations` module with a `runner()` -// function. The path is relative to the crate root (where `Cargo.toml` -// lives). +// Generates a `migrations` module with `runner()`; path is relative to +// the crate root. refinery::embed_migrations!("./migrations"); /// Apply every pending migration to `conn`. @@ -20,10 +19,8 @@ pub fn run(conn: &mut rusqlite::Connection) -> Result Result { @@ -65,13 +62,15 @@ pub(crate) fn has_schema_history(conn: &rusqlite::Connection) -> Result Result<(), WalletStorageError> { @@ -98,6 +97,41 @@ pub fn assert_schema_version_supported( Ok(()) } +/// Probe `refinery_schema_history` rows BEFORE handing the connection to +/// refinery, which parses `applied_on` (RFC3339) and `checksum` (`u64`) +/// with `unwrap()` — a malformed value would abort the process. Surfaces +/// a typed [`WalletStorageError::SchemaHistoryMalformed`] instead. +/// Quietly succeeds when the table is absent. +pub(crate) fn assert_schema_history_well_formed( + conn: &rusqlite::Connection, +) -> Result<(), WalletStorageError> { + if !has_schema_history(conn)? { + return Ok(()); + } + let mut stmt = conn.prepare("SELECT applied_on, checksum FROM refinery_schema_history")?; + let rows = stmt.query_map([], |row| { + let applied_on: String = row.get(0)?; + let checksum: String = row.get(1)?; + Ok((applied_on, checksum)) + })?; + for row in rows { + let (applied_on, checksum) = row?; + // Validate the way refinery will parse, so a malformed value fails + // typed here rather than panicking inside the runner. + if chrono::DateTime::parse_from_rfc3339(&applied_on).is_err() { + return Err(WalletStorageError::SchemaHistoryMalformed { + reason: "applied_on is not a valid RFC3339 timestamp", + }); + } + if checksum.parse::().is_err() { + return Err(WalletStorageError::SchemaHistoryMalformed { + reason: "checksum is not a valid u64", + }); + } + } + Ok(()) +} + /// List `(version, name)` of every embedded migration. Used by tests and /// the migration-drift hash check. pub fn embedded_migrations() -> Vec<(i32, String)> { @@ -109,8 +143,10 @@ pub fn embedded_migrations() -> Vec<(i32, String)> { } /// SHA-256 over `(version, name)` of every embedded migration in version -/// order. Pinning this in tests catches edits to committed migrations -/// (forbidden by the append-only migration policy). +/// order. Deliberately content-blind: it hashes the migration set's +/// identity, not the SQL bodies, so it catches an added/removed/renamed +/// migration but ignores in-place DDL edits (a content-pinning guard +/// belongs with the schema freeze at release). #[cfg(any(test, feature = "__test-helpers"))] pub fn embedded_migrations_fingerprint() -> [u8; 32] { use sha2::{Digest, Sha256}; diff --git a/packages/rs-platform-wallet-storage/src/sqlite/persister.rs b/packages/rs-platform-wallet-storage/src/sqlite/persister.rs index 1cecf885308..9369fce02d9 100644 --- a/packages/rs-platform-wallet-storage/src/sqlite/persister.rs +++ b/packages/rs-platform-wallet-storage/src/sqlite/persister.rs @@ -1,7 +1,8 @@ //! [`SqlitePersister`] — the canonical `PlatformWalletPersistence` impl. +use std::collections::HashSet; use std::path::{Path, PathBuf}; -use std::sync::{Arc, Mutex, MutexGuard}; +use std::sync::{Arc, Mutex, MutexGuard, OnceLock}; use rusqlite::{Connection, OptionalExtension}; @@ -19,44 +20,45 @@ use crate::sqlite::schema; use crate::sqlite::util::permissions::{apply_secure_permissions, precreate_secure}; use crate::sqlite::util::safe_cast; -/// Sub-areas of `ClientStartState` that `load()` does not yet -/// reconstruct (blocked on upstream `Wallet::from_persisted`). +/// Persisted-but-not-rehydrated areas, surfaced in the structured +/// `tracing::info!` summary on every `load()`. /// -/// Surfaced via the structured `tracing::info!` summary on every -/// `load()` (`unimplemented` + `wallets_pending_rehydration` fields). -pub(crate) const LOAD_UNIMPLEMENTED: &[&str] = &["ClientStartState::wallets"]; +/// - `core::last_applied_chain_lock`: no V001 column; re-warms on the +/// first post-load SPV chainlock. +/// - `token_balances`: written by the `token_balances` slot but not read +/// back by `load()` (no reader wired in yet). +/// - `dashpay::overlay`: the `dashpay_profiles` / +/// `dashpay_payments_overlay` tables are a write-only indexed overlay; +/// DashPay state rehydrates from the identities blob, not these tables. +pub(crate) const LOAD_UNIMPLEMENTED: &[&str] = &[ + "core::last_applied_chain_lock", + "token_balances", + "dashpay::overlay", +]; /// Outcome of a `prune_backups` call. /// -/// Invariant: `kept == total_eligible - removed.len()`. A file is -/// counted as `kept` if it survived the policy (retained-by-rule) OR -/// if `remove_file` failed (`failed_removals` is a subset of `kept`). -/// Either way, the file is still on disk after this call. +/// Invariant: `kept == total_eligible - removed.len()`; a file is `kept` +/// if the policy retained it OR `remove_file` failed (so `failed_removals` +/// is a subset of `kept`). Either way it's still on disk. #[derive(Debug)] pub struct PruneReport { - /// Paths that were unlinked, sorted oldest-first by filename - /// timestamp. + /// Unlinked paths, oldest-first by filename timestamp. pub removed: Vec, - /// Files still on disk after this call. Equals - /// `total_eligible - removed.len()` and includes every - /// `failed_removals` entry — a file that couldn't be unlinked is - /// still on disk and therefore "kept". + /// Count still on disk (`total_eligible - removed.len()`), including + /// every `failed_removals` entry. pub kept: usize, - /// Files we tried to remove but couldn't, paired with the - /// underlying `io::Error`. Returned as part of `Ok(report)` so a - /// partial failure surfaces every removed AND every failed entry - /// — the caller can re-invoke `prune_backups` to retry just the - /// stragglers. + /// Files we couldn't remove, paired with the `io::Error`. Returned in + /// `Ok(report)` so the caller can re-invoke to retry the stragglers. pub failed_removals: Vec<(PathBuf, std::io::Error)>, } /// Retention policy for `prune_backups`. /// -/// **AND-semantics**: a file is kept iff it satisfies BOTH rules. A -/// policy with `keep_last_n = Some(3)` and `max_age = Some(30d)` keeps -/// at most the three newest backups AND only those younger than 30 -/// days — a four-day-old backup that's the fifth-newest is removed. -/// `RetentionPolicy::default()` (both `None`) keeps every file. +/// `keep_last_n` is a **floor**: the N newest backups are always kept even +/// if `max_age` would evict them, so a policy setting both can never delete +/// everything. `keep_last_n = None` gives no floor (age-only may prune +/// all); `default()` (both `None`) keeps every file. #[derive(Debug, Clone, Copy, Default)] pub struct RetentionPolicy { pub keep_last_n: Option, @@ -78,24 +80,51 @@ impl RetentionPolicy { } } +/// Canonicalized paths held by a live [`SqlitePersister`] in this process. +/// Refusing a second in-process open ([`WalletStorageError::AlreadyOpen`]) +/// prevents two handles with independent buffers diverging; cross-process +/// peers are handled by SQLite's own EXCLUSIVE locking. +fn open_path_registry() -> &'static Mutex> { + static REGISTRY: OnceLock>> = OnceLock::new(); + REGISTRY.get_or_init(|| Mutex::new(HashSet::new())) +} + +/// Insert `path`, returning [`WalletStorageError::AlreadyOpen`] if held. +/// Recover from a poisoned registry mutex rather than wedging every open. +fn register_open_path(path: PathBuf) -> Result<(), WalletStorageError> { + let mut set = open_path_registry() + .lock() + .unwrap_or_else(|p| p.into_inner()); + if set.contains(&path) { + return Err(WalletStorageError::AlreadyOpen { path }); + } + set.insert(path); + Ok(()) +} + +/// Remove `path` from the open-path registry on persister drop. +fn release_open_path(path: &Path) { + let mut set = open_path_registry() + .lock() + .unwrap_or_else(|p| p.into_inner()); + set.remove(path); +} + /// SQLite-backed `PlatformWalletPersistence`. pub struct SqlitePersister { config: SqlitePersisterConfig, - // Single connection serializes reads through the write lock. - // Acceptable for the current workload (per-wallet operations, small - // read footprint); a read-only pool over the same WAL-mode file is + /// Canonicalized DB path held in the process-wide open-path registry. + /// Removed from the registry when this persister drops. + registered_path: PathBuf, + // Single connection serializes reads through the write lock — + // acceptable for the current per-wallet workload; a read-only pool is // the planned follow-up if read contention becomes measurable. conn: Arc>, buffer: Buffer, - /// Test-only one-shot injector for `flush_inner`. Lives on the - /// struct so `force_next_flush_to_fail` can survive across `&self` - /// calls. Production builds keep the slot but never write to it - /// (no public setter outside `#[cfg(any(test, feature = "__test-helpers"))]`). + /// Test-only one-shot injector for `flush_inner`. #[cfg(any(test, feature = "__test-helpers"))] primed_flush_error: Mutex>, - /// Test-only one-shot injection consumed by `delete_wallet`'s - /// pre-flush phase. Lets a test assert the buffer-restore and - /// skip-backup semantics without provoking a real SQL error. + /// Test-only one-shot injector for `delete_wallet`'s pre-flush phase. #[cfg(any(test, feature = "__test-helpers"))] primed_pre_flush_error: Mutex>, } @@ -139,47 +168,41 @@ impl SqlitePersister { } } - // Pre-create the DB file owner-only (0600) with O_EXCL BEFORE - // rusqlite opens it: the file is born at 0600 (no umask window) - // and an attacker-planted symlink at the path makes the create - // fail rather than redirect (no chmod-by-path TOCTOU). A no-op - // when the DB already exists. Brings the SQLite path to parity - // with the secrets-vault file path. + // Pre-create owner-only (0600) with O_EXCL before rusqlite opens: + // no umask window, and a planted symlink makes the create fail + // rather than redirect (no chmod-by-path TOCTOU). No-op if it + // already exists. precreate_secure(&config.path)?; - // Open the connection AND apply pragmas before checking for - // pending migrations so the integrity probe sees the configured - // journal mode and busy timeout. `open_conn` enables foreign-key - // enforcement and asserts the read-back before any write lands. + // Open + apply pragmas before checking pending migrations so the + // integrity probe sees the configured journal mode / busy timeout. let mut conn = crate::sqlite::conn::open_conn(&config.path, crate::sqlite::conn::Access::ReadWrite)?; - // Re-tighten to 0600 on Unix (idempotent on re-open) and sweep - // the WAL/SHM sidecars that SQLite creates after open. + // Re-tighten to 0600 and sweep the WAL/SHM sidecars SQLite created. apply_secure_permissions(&config.path)?; apply_pragmas(&mut conn, &config)?; - // Determine whether `schema_history` exists *before* we run - // migrations — that's the signal for "is this DB pre-existing or - // brand-new?". Errors from the underlying query are propagated, - // not silently treated as "no history". + // `schema_history` presence is the pre-existing-vs-brand-new + // signal; query errors propagate rather than masking as "none". let had_schema_history = crate::sqlite::migrations::has_schema_history(&conn)?; - // Run integrity_check on a pre-existing DB BEFORE migrations alter - // it. Bit-rot or escaped-WAL corruption detected here surfaces as - // the typed `IntegrityCheckFailed` before any schema mutation - // lands. The pre-migration auto-backup snapshots the live state, - // so without this gate a corrupt DB gets backed up and migrated in - // the same pass — making the auto-backup useless for rollback. + // Integrity-check a pre-existing DB BEFORE migrations alter it, + // else a corrupt DB gets backed up and migrated in one pass, + // making the pre-migration auto-backup useless for rollback. if had_schema_history { crate::sqlite::backup::run_integrity_check(&conn, |report| { WalletStorageError::IntegrityCheckFailed { report } })?; } - // Refuse to open a DB produced by a newer binary — refinery's - // run() would no-op on pending_count==0, after which blob decoders - // would see forward-schema bytes. Symmetric with restore_from's - // max-version gate (both call the same helper). + // Refuse a newer-binary DB: refinery's run() no-ops at + // pending==0, after which blob decoders would read forward-schema + // bytes. Then assert the wallet application_id and a well-formed + // schema_history BEFORE refinery, so a foreign or + // corrupted-but-integrity-valid DB fails typed instead of being + // migrated in place or panicking the runner. if had_schema_history { crate::sqlite::migrations::assert_schema_version_supported(&conn)?; + crate::sqlite::conn::assert_wallet_application_id(&conn)?; + crate::sqlite::migrations::assert_schema_history_well_formed(&conn)?; } let pending = crate::sqlite::migrations::embedded_migrations(); let pending_count = if had_schema_history { @@ -199,11 +222,20 @@ impl SqlitePersister { )?; } - // Apply migrations through the typed-error chokepoint. let _report = crate::sqlite::migrations::run_for_open(&mut conn)?; + // Claim the path LAST so a failed open leaves no stale claim; + // canonicalize so symlinks / `.`-segments key the same as a + // sibling open would. + let registered_path = config + .path + .canonicalize() + .unwrap_or_else(|_| config.path.clone()); + register_open_path(registered_path.clone())?; + Ok(Self { config, + registered_path, conn: Arc::new(Mutex::new(conn)), buffer: Buffer::new(), #[cfg(any(test, feature = "__test-helpers"))] @@ -244,11 +276,10 @@ impl SqlitePersister { /// /// # Cross-process rollback caveat /// - /// The pre-restore auto-backup is taken BEFORE the SQLite-native - /// `BEGIN EXCLUSIVE` that guards the restore body. Under concurrent - /// cross-process access the rollback point may therefore miss writes - /// a peer committed between the snapshot and the lock. Serializing - /// restore intent across processes is the caller's responsibility. + /// The pre-restore auto-backup is taken BEFORE the restore body's + /// `BEGIN EXCLUSIVE`, so under concurrent cross-process access the + /// rollback point may miss writes a peer committed in between. Callers + /// must serialize restore intent across processes. pub fn restore_from( dest_db_path: &Path, src_backup: &Path, @@ -281,8 +312,7 @@ impl SqlitePersister { let dir = auto_backup_dir.ok_or(WalletStorageError::AutoBackupDisabled { operation: AutoBackupOperation::Restore, })?; - // Open the destination read-only just long enough to - // page-stream a snapshot to disk under auto_backup_dir. + // Open read-only just long enough to snapshot under auto_backup_dir. let dest_conn = crate::sqlite::conn::open_conn( dest_db_path, crate::sqlite::conn::Access::ReadOnly, @@ -295,13 +325,10 @@ impl SqlitePersister { )?; drop(dest_conn); } - // No row-count fingerprint guards the snapshot → EXCLUSIVE - // window: `backup::restore_from` holds a SQLite-native `BEGIN - // EXCLUSIVE` over the whole restore body, so peers that race the - // snapshot are excluded from there on. A count fingerprint would - // miss in-place UPDATEs on single-row tables and give operators - // false confidence; callers needing a quiesced rollback point - // must serialize restore intent at the application layer. + // No row-count fingerprint guards the snapshot→EXCLUSIVE window: + // `backup::restore_from`'s `BEGIN EXCLUSIVE` covers the body, and a + // count would miss in-place UPDATEs and give false confidence. + // Callers needing a quiesced point serialize restore intent. backup::restore_from(dest_db_path, src_backup) } @@ -326,22 +353,17 @@ impl SqlitePersister { /// /// # Cross-process rollback caveat /// - /// The pre-delete auto-backup is taken BEFORE the SQLite-native - /// `BEGIN EXCLUSIVE` that guards the cascade. Under concurrent - /// cross-process access the rollback point may therefore miss writes - /// a peer committed between the snapshot and the lock. Serializing - /// delete intent across processes is the caller's responsibility. + /// The pre-delete auto-backup is taken BEFORE the cascade's + /// `BEGIN EXCLUSIVE`, so under concurrent cross-process access the + /// rollback point may miss writes a peer committed in between. Callers + /// must serialize delete intent across processes. /// /// # Racing stores /// - /// Calls to `store(wallet_id, ...)` for the same wallet while - /// `delete_wallet` is in progress will be **discarded** after the - /// delete commits. The store call may return `Ok(())` (in - /// `FlushMode::Manual` it lands in the buffer), but its data does - /// not survive the delete — the post-commit re-drain inside - /// `delete_wallet` removes any buffered changeset that arrived - /// during the delete window. Synchronize at the caller layer if - /// you need different semantics. + /// A `store(wallet_id, ...)` racing this call is **discarded** after + /// the delete commits — it may return `Ok(())` (Manual mode buffers + /// it) but a post-commit re-drain removes it. Synchronize at the + /// caller layer if you need other semantics. pub fn delete_wallet( &self, wallet_id: WalletId, @@ -369,24 +391,20 @@ impl SqlitePersister { wallet_id: WalletId, skip_backup: bool, ) -> Result { - // Acquire the connection mutex FIRST so concurrent in-process - // `store()` calls block on it. Cross-process peers (other - // rusqlite Connections / sibling `SqlitePersister`s) are excluded - // by `BEGIN EXCLUSIVE` below — the in-process mutex alone never - // gave that guarantee. + // Take the conn mutex first so in-process `store()` blocks; + // cross-process peers are excluded by `BEGIN EXCLUSIVE` below. let mut conn = self.conn()?; - // Drain the buffered changeset so a later flush can't - // resurrect the wallet, and so the wallet counts as existing - // even when its only state is buffered. Hold the drained value - // in `drained_slot` and only consume it AFTER tx.commit(). + // Drain the buffer so a later flush can't resurrect the wallet and + // so a buffer-only wallet still counts as existing. Held in + // `drained_slot` and consumed only after commit. let drained = self.buffer.take_for_flush(&wallet_id)?; let had_buffered = drained.is_some(); let drained_slot: std::cell::Cell> = std::cell::Cell::new(drained); - // Helper: any pre-commit failure must restore the changeset so - // we don't lose pending writes on a delete that didn't happen. + // Any pre-commit failure must restore the changeset so a delete + // that didn't happen doesn't lose pending writes. let restore_buffer = |slot: &std::cell::Cell>| { if let Some(cs) = slot.take() { if let Err(e) = self.buffer.restore(wallet_id, cs) { @@ -400,11 +418,11 @@ impl SqlitePersister { }; let result: Result = (|| { - // Pre-flight existence check on the bare conn (no tx) so - // we don't waste a backup file on an unknown wallet. + // Existence check before backup so we don't snapshot for an + // unknown wallet. let exists_pre_flush = conn .query_row( - "SELECT 1 FROM wallet_metadata WHERE wallet_id = ?1", + "SELECT 1 FROM wallets WHERE wallet_id = ?1", rusqlite::params![wallet_id.as_slice()], |_| Ok(()), ) @@ -414,36 +432,30 @@ impl SqlitePersister { return Err(WalletStorageError::WalletNotFound { wallet_id }); } - // Test-only injector — force the pre-flush below to fail with - // the primed error without depending on a real SQL failure. - // Keeps the test free of FK-poisoning scaffolding. + // Test-only injector to fail the pre-flush below. #[cfg(any(test, feature = "__test-helpers"))] let primed_pre_flush_error = self.consume_primed_pre_flush_error(); - // Flush the drained buffer to disk BEFORE `run_auto_backup` - // so the pre-delete snapshot includes every pending write. - // Without this the backup captures only already-persisted - // state and rollback-from-backup cannot recover the buffered - // (lost) data. - // - // The flush opens its own EXCLUSIVE tx and commits; - // `run_auto_backup` then runs against the freshly-flushed - // DB. On flush failure we restore the buffer via the outer - // `restore_buffer` helper and abort the delete. - // - // The cascade-side backup runs BEFORE the cascade's - // `BEGIN EXCLUSIVE` because rusqlite's `Backup::new` can't - // establish a backup whose source connection holds an - // active write tx on its own DB — `sqlite3_backup_step` - // would deadlock against the in-flight EXCLUSIVE. + // Flush the drained buffer (its own EXCLUSIVE tx) BEFORE + // `run_auto_backup` so the snapshot includes pending writes; + // otherwise rollback-from-backup can't recover them. The backup + // must precede the cascade's `BEGIN EXCLUSIVE` because + // `Backup::new` deadlocks if the source holds an active write tx. if let Some(cs) = drained_slot.take() { #[cfg(any(test, feature = "__test-helpers"))] if let Some(primed) = primed_pre_flush_error { drained_slot.set(Some(cs)); return Err(primed); } - let pre_flush_tx = - conn.transaction_with_behavior(rusqlite::TransactionBehavior::Exclusive)?; + let pre_flush_tx = match conn + .transaction_with_behavior(rusqlite::TransactionBehavior::Exclusive) + { + Ok(tx) => tx, + Err(e) => { + drained_slot.set(Some(cs)); + return Err(WalletStorageError::Sqlite(e)); + } + }; if let Err(e) = apply_changeset_to_tx(&pre_flush_tx, &wallet_id, &cs) { let _ = pre_flush_tx.rollback(); drained_slot.set(Some(cs)); @@ -455,12 +467,6 @@ impl SqlitePersister { } } - // Concurrent-peer detection relies on the auto-backup taken - // before the cascade plus the SQLite-native `BEGIN EXCLUSIVE` - // below — not a row-count fingerprint, which an in-place - // UPDATE on a single-row table would evade. Pre-flushing - // before the backup ensures the snapshot captures every - // buffered write. let backup_path = if skip_backup { None } else { @@ -472,27 +478,20 @@ impl SqlitePersister { )? }; - // SQLite-native EXCLUSIVE for the cascade window. Excludes - // cross-process peers (other rusqlite Connections, sibling - // `SqlitePersister`s) that would otherwise commit rows for - // `wallet_id` during the cascade. The in-process mutex on - // `conn` alone never gave that guarantee. Peers waiting on - // the lock back off via SQLite's `busy_timeout`. + // EXCLUSIVE for the cascade window excludes cross-process peers + // that the in-process conn mutex can't; they back off via + // `busy_timeout`. let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Exclusive)?; - // Deleting the parent `wallet_metadata` row drives the whole - // cleanup: native `ON DELETE CASCADE` removes every FK-bearing - // per-wallet/per-identity table, and the AFTER DELETE triggers - // broom every wallet/identity-scoped `meta_*` row (parentless - // included). No per-table accounting is needed — the - // cascade-completeness test asserts no row survives. - crate::sqlite::schema::wallet_meta::delete(&tx, &wallet_id)?; + // Deleting the parent `wallets` row drives all cleanup: native + // `ON DELETE CASCADE` clears FK-bearing tables and AFTER DELETE + // triggers reap the `meta_*` rows (the completeness test + // asserts nothing survives). + crate::sqlite::schema::wallets::delete(&tx, &wallet_id)?; tx.commit()?; - // Commit succeeded — drop the original drained changeset. drop(drained_slot.take()); - // Re-drain any changeset a Manual-mode store dropped into the - // buffer while we held conn. The wallet is gone — these - // writes are intentionally void. + // Discard any changeset a Manual-mode store buffered during the + // delete window — the wallet is gone. if let Ok(Some(_late)) = self.buffer.take_for_flush(&wallet_id) { tracing::warn!( wallet_id = %hex::encode(wallet_id), @@ -511,24 +510,16 @@ impl SqlitePersister { result } - /// Attempt to flush every dirty wallet, regardless of flush mode. + /// Flush every dirty wallet regardless of flush mode — the only way + /// `Manual` writes become durable, and the retry path for transient + /// `Immediate`-mode failures left in the buffer. "Durable" means across + /// application crash (WAL + `synchronous=NORMAL`); use + /// [`Synchronous::Full`](crate::Synchronous) for power-loss durability. /// - /// In `Manual` mode this is the only way pending writes become - /// durable. In `Immediate` mode the buffer is normally empty (each - /// `store` flushes inline) but a transient failure during `store` - /// leaves the changeset in the buffer — `commit_writes` is the - /// retry path that drains those leftovers. - /// - /// Continues past per-wallet failures instead of fails-fast. - /// Each wallet's flush outcome lands on the returned - /// [`CommitReport`]: `succeeded` for durable writes, `failed` for - /// the classified `PersistenceError`. `still_pending` only fills - /// when a `LockPoisoned` short-circuit prevents the loop from - /// attempting the remaining wallets. - /// - /// Returns `Err` ONLY when even enumerating the dirty set fails - /// (e.g. the buffer mutex is poisoned). Once the loop starts, - /// every dirty wallet has a slot in the report. + /// Continues past per-wallet failures: each outcome lands on the + /// [`CommitReport`] (`succeeded` / `failed`), and `still_pending` fills + /// only when a `LockPoisoned` short-circuit skips the rest. Returns + /// `Err` only when enumerating the dirty set itself fails. pub fn commit_writes(&self) -> Result { self.commit_writes_inner() } @@ -539,12 +530,9 @@ impl SqlitePersister { failed: Vec::new(), still_pending: Vec::new(), }; - // Even in `FlushMode::Immediate` the buffer can be non-empty: - // a transient failure during `store()` re-merges the changeset - // back into the buffer via `handle_flush_error`. The retry path - // — `commit_writes()` — has to drain that leftover regardless - // of flush mode, otherwise transient-failure data sits there - // until the next per-wallet `store` happens to retry it. + // Even in `Immediate` mode the buffer can be non-empty: a transient + // `store()` failure re-merges the changeset, and only this drains + // it regardless of flush mode. let dirty = self .buffer .dirty_wallets() @@ -554,10 +542,8 @@ impl SqlitePersister { match self.flush_inner(&id) { Ok(()) => report.succeeded.push(id), Err(PersistenceError::LockPoisoned) => { - // Mutex is gone — no point hammering the remaining - // wallets. Record this one as failed and shovel the - // rest into still_pending so the caller knows what - // was never attempted. + // Mutex is gone; record this as failed and the rest as + // never-attempted instead of hammering them. report.failed.push((id, PersistenceError::LockPoisoned)); report.still_pending.extend(iter); return Ok(report); @@ -575,18 +561,10 @@ impl SqlitePersister { .map_err(|_| WalletStorageError::LockPoisoned) } - // The feature is named with Cargo's `__` prefix convention to - // signal "not part of the public API; downstream MUST NOT enable - // it" (https://doc.rust-lang.org/cargo/reference/features.html). - // The methods themselves are `#[doc(hidden)]` so they don't show - // up on docs.rs even when the feature is on. - /// Test-only: borrow the write connection. - /// - /// Tests use this to seed `wallet_metadata` rows directly, run - /// SELECTs against tables that aren't part of the public surface, - /// or probe `PRAGMA foreign_keys` / `PRAGMA journal_mode`. Gated - /// behind `cfg(test)` and the `__test-helpers` feature — - /// downstream crates MUST NOT enable it. + // The `__test-helpers` feature uses Cargo's `__` prefix convention: + // not public API, downstream MUST NOT enable it. + /// Test-only: borrow the write connection to seed rows or probe + /// non-public tables/pragmas. Downstream MUST NOT enable the feature. #[doc(hidden)] #[cfg(any(test, feature = "__test-helpers"))] pub fn lock_conn_for_test(&self) -> MutexGuard<'_, Connection> { @@ -608,8 +586,7 @@ impl SqlitePersister { .map_err(PersistenceError::from)?; let Some(cs) = cs else { return Ok(()) }; - // Test-only injector: surface a primed failure without ever - // touching SQL so take/restore semantics are exercised end-to-end. + // Test-only injector: surface a primed failure without touching SQL. #[cfg(any(test, feature = "__test-helpers"))] if let Some(injected) = self.consume_primed_flush_error() { return self.handle_flush_error(wallet_id, cs, injected); @@ -637,15 +614,11 @@ impl SqlitePersister { } /// Classify the failure: transient errors restore the buffer and - /// surface as `FlushRetryable`; everything else drops the - /// changeset and returns the original variant. + /// surface as `FlushRetryable`; everything else drops the changeset + /// and returns the original variant. // - // TODO(qa): the fatal branch below covers `LockPoisoned`, but no - // end-to-end mutex-poison test exists (a panicking thread plus a join - // is hard to reproduce deterministically). It is verified by hand via - // `Mutex::lock` failure injection at the typed-error layer; anyone - // touching the classification policy or this branch must reconfirm - // by hand. + // TODO(qa): the fatal `LockPoisoned` branch has no e2e mutex-poison + // test; verified by hand — reconfirm if you touch the classification. fn handle_flush_error( &self, wallet_id: &WalletId, @@ -655,9 +628,8 @@ impl SqlitePersister { let field_count = populated_field_count(&cs); let kind = err.error_kind_str(); if err.is_transient() { - // A failed restore (e.g. poisoned buffer mutex) means the - // buffered changeset is gone — that is itself fatal and - // must surface, not be masked by the transient signal. + // A failed restore loses the changeset — itself fatal, so + // surface it instead of the transient signal. if let Err(restore_err) = self.buffer.restore(*wallet_id, cs) { tracing::error!( wallet_id = %hex::encode(wallet_id), @@ -667,16 +639,13 @@ impl SqlitePersister { ); return Err(PersistenceError::from(restore_err)); } - // Narrow the error to its rusqlite source — only - // `Sqlite(SqliteFailure(BUSY|LOCKED, _))` qualifies for - // surfacing as `FlushRetryable`. + // Narrow to the rusqlite source for `FlushRetryable`. let source = match err { WalletStorageError::Sqlite(rusq) => rusq, WalletStorageError::FlushRetryable { source, .. } => source, other => { - // Defensive: classifier said "transient" but source - // isn't rusqlite. Surface unwrapped — better than - // lying about the source type. + // Defensive: "transient" but non-rusqlite source — + // surface raw rather than mislabel the source type. tracing::warn!( wallet_id = %hex::encode(wallet_id), error_kind = kind, @@ -703,16 +672,13 @@ impl SqlitePersister { dropped_field_count = field_count, "flush failed fatally — buffer wiped" ); - // `cs` dropped here. drop(cs); Err(PersistenceError::from(err)) } } - /// Test-only: arm a one-shot injection consumed by the next - /// `flush_inner`. Higher-level than `FailingConnection`; useful - /// when the test doesn't care which SQL error fires, only how the - /// wrapper reacts. + /// Test-only: arm a one-shot injection for the next `flush_inner`, + /// for tests that care only how the wrapper reacts to the error. #[doc(hidden)] #[cfg(any(test, feature = "__test-helpers"))] pub fn force_next_flush_to_fail(&self, err: WalletStorageError) { @@ -728,9 +694,7 @@ impl SqlitePersister { } /// Test-only: arm a one-shot pre-flush failure for the next - /// `delete_wallet` call. The injection fires only when there is - /// a drained buffered changeset to flush — i.e. when `delete_wallet` - /// actually exercises the pre-flush branch. + /// `delete_wallet`; fires only when there's a drained changeset to flush. #[doc(hidden)] #[cfg(any(test, feature = "__test-helpers"))] pub fn force_next_pre_flush_to_fail(&self, err: WalletStorageError) { @@ -748,9 +712,8 @@ impl SqlitePersister { .take() } - /// Test-only: probe whether the wallet has a buffered changeset. - /// Used to assert the buffer survives a failed pre-flush without - /// consuming it. + /// Test-only: whether the wallet has a buffered changeset (asserts the + /// buffer survives a failed pre-flush without consuming it). #[doc(hidden)] #[cfg(any(test, feature = "__test-helpers"))] pub fn buffer_has_changeset_for_test(&self, wallet_id: &WalletId) -> bool { @@ -761,22 +724,20 @@ impl SqlitePersister { } } -/// When a `Manual`-mode persister is dropped while dirty wallets remain, -/// log a structured `tracing::error!` so the silent-data-loss footgun -/// (the buffer dies with the persister) surfaces in operator logs. -/// -/// We intentionally do NOT auto-flush from `Drop` — `flush_inner` -/// can fail and `Drop` cannot propagate errors, so a swallow there -/// would be a worse failure mode than the loud log. `Immediate`-mode -/// persisters are durable on every `store` so they never trip this. +/// On drop of a `Manual`-mode persister with dirty wallets, log an error +/// so the silent-data-loss footgun surfaces. We do NOT auto-flush from +/// `Drop`: `flush_inner` can fail and `Drop` can't propagate, so swallowing +/// would be worse than a loud log. `Immediate` mode never trips this. impl Drop for SqlitePersister { fn drop(&mut self) { + // Release the path claim FIRST so it happens regardless of flush + // mode (the warning below early-returns for Immediate). + release_open_path(&self.registered_path); if self.config.flush_mode != FlushMode::Manual { return; } - // `dirty_wallets` only fails on a poisoned buffer mutex. A - // poisoned mutex on Drop already means the process is wedged; - // we still try to surface the lost state where we can. + // `dirty_wallets` only fails on a poisoned buffer mutex; surface + // the lost state where we can. let dirty = match self.buffer.dirty_wallets() { Ok(d) => d, Err(e) => { @@ -791,11 +752,9 @@ impl Drop for SqlitePersister { if dirty.is_empty() { return; } - // `take_for_flush` mutates the buffer (drains the changeset). - // That is intentional here: the persister is being dropped, no - // future caller can observe the buffer, and `populated_field_count` - // needs to inspect the changeset to produce the diagnostic. Do - // NOT treat `impl Drop` as side-effect-free. + // `take_for_flush` drains the buffer — intentional in `Drop`: no + // future caller can observe it, and we need the changeset to count + // fields for the diagnostic. let total_fields: usize = dirty .iter() .filter_map(|id| { @@ -819,16 +778,14 @@ impl PlatformWalletPersistence for SqlitePersister { /// Merge `changeset` into the per-wallet buffer. /// /// Durability matrix: - /// - In [`FlushMode::Immediate`] the call is **durable on `Ok`** — - /// one SQLite transaction wraps every populated per-table apply, - /// so either all sub-changesets land or none do. A transient - /// failure restores the buffer and surfaces - /// [`WalletStorageError::FlushRetryable`] wrapped in - /// `PersistenceError::Backend`. - /// - In [`FlushMode::Manual`] the call only merges into the - /// in-memory buffer. Durability requires - /// [`flush`](Self::flush) (per-wallet) or - /// [`commit_writes`](Self::commit_writes) (every dirty wallet). + /// - [`FlushMode::Immediate`]: on `Ok`, durable across application + /// crash — one transaction wraps every per-table apply (all-or- + /// nothing). A transient failure restores the buffer and surfaces + /// [`WalletStorageError::FlushRetryable`]. Use + /// [`Synchronous::Full`](crate::Synchronous) for power-loss durability. + /// - [`FlushMode::Manual`]: only merges into the buffer; durability + /// needs [`flush`](Self::flush) or + /// [`commit_writes`](Self::commit_writes). fn store( &self, wallet_id: WalletId, @@ -849,27 +806,24 @@ impl PlatformWalletPersistence for SqlitePersister { /// Load every wallet's start-state from disk. /// - /// Populates `platform_addresses` per wallet. `wallets` stays empty - /// pending an upstream `key_wallet::Wallet::from_persisted` - /// constructor — the count of wallets that *would* be rehydrated is - /// surfaced as the structured field `wallets_pending_rehydration` - /// on the `tracing::info!` summary. + /// Populates `platform_addresses` and the keyless per-wallet `wallets` + /// payload (network, birth height, account manifest, core state, + /// identities, `Consumed`-filtered asset locks). Carries **no** `Wallet` + /// or key material — the manager rebuilds each wallet watch-only and + /// signs later on demand. The `tracing::info!` summary reports + /// `wallets_rehydrated`. /// - /// Fail-hard: any row that fails to decode (or carries a malformed - /// `wallet_id`) aborts the whole load with a typed - /// [`WalletStorageError`]. Corruption is never silently skipped. + /// Fail-hard: any row that fails to decode (or has a malformed + /// `wallet_id`) aborts the whole load — corruption is never skipped. /// - /// **Query budget.** Constant w.r.t. wallet count: one `SELECT` over - /// `wallet_metadata` for the wallet-id list plus a fixed set of - /// grouped scans (sync state, addresses, platform-payment - /// registrations), not a per-wallet fan-out. + /// **Query budget.** Constant w.r.t. wallet count: one `SELECT` for the + /// id list plus a fixed set of grouped scans, not a per-wallet fan-out. /// /// # Concurrency /// - /// Holds the connection mutex for the duration of the read. - /// Concurrent `store` / `flush` / `delete_wallet` calls block - /// until `load` returns. Intended for one-shot use at process - /// startup, not interleaved with the hot write path. + /// Holds the connection mutex for the whole read, so concurrent + /// `store` / `flush` / `delete_wallet` block until it returns. Intended + /// for one-shot startup use, not the hot write path. /// /// # Examples /// @@ -907,23 +861,14 @@ impl PlatformWalletPersistence for SqlitePersister { /// # } /// ``` fn load(&self) -> Result { - // TODO: repopulate ClientStartState.wallets. The - // identity/contacts/asset-lock readers exist, but the - // `Wallet::from_persisted` wiring lands with the rehydration work; - // until then `load()` only rebuilds `platform_addresses` and emits - // a structured-log summary so operators see the gap. let conn = self.conn().map_err(PersistenceError::from)?; let mut state = ClientStartState::default(); let addrs_all = schema::platform_addrs::load_all(&conn).map_err(PersistenceError::from)?; - let wallets_seen = addrs_all.len(); let mut addresses_loaded: usize = 0; - for (wallet_id, (addrs, count)) in addrs_all { - // Omit a wallet that carries no platform state at all: no - // per-account registrations, no addresses, and all sync - // watermarks zero. Such a wallet contributes nothing to a - // restored provider. + // Skip a wallet with no platform state at all (no addresses, + // no registrations, all sync watermarks zero). if count > 0 || !addrs.per_account.is_empty() || addrs.sync_height > 0 @@ -935,11 +880,61 @@ impl PlatformWalletPersistence for SqlitePersister { } } + // Per-wallet keyless rehydration payload; the manager rebuilds each + // wallet watch-only and derives signing keys later on demand. + let wallet_ids = schema::wallets::list_ids(&conn).map_err(PersistenceError::from)?; + let wallets_seen = wallet_ids.len(); + for wallet_id in wallet_ids { + let (network_str, birth_height) = schema::wallets::fetch(&conn, &wallet_id) + .map_err(PersistenceError::from)? + .ok_or_else(|| { + PersistenceError::backend(format!( + "wallets row vanished mid-load for {}", + hex::encode(wallet_id) + )) + })?; + let network = schema::wallets::parse_network(&network_str).ok_or_else(|| { + PersistenceError::backend(format!( + "unknown persisted network {:?} for wallet {}", + network_str, + hex::encode(wallet_id) + )) + })?; + + let account_manifest = + schema::accounts::load_state(&conn, &wallet_id).map_err(PersistenceError::from)?; + let core_state = schema::core_state::load_state(&conn, &wallet_id, network) + .map_err(PersistenceError::from)?; + let identity_manager = schema::identities::load_state(&conn, &wallet_id) + .map_err(PersistenceError::from)?; + let unused_asset_locks = schema::asset_locks::load_unconsumed(&conn, &wallet_id) + .map_err(PersistenceError::from)?; + let contacts = schema::contacts::load_changeset(&conn, &wallet_id) + .map_err(PersistenceError::from)?; + let identity_keys = schema::identity_keys::load_state(&conn, &wallet_id) + .map_err(PersistenceError::from)?; + + state.wallets.insert( + wallet_id, + platform_wallet::changeset::ClientWalletStartState { + network, + birth_height, + account_manifest, + core_state, + identity_manager, + unused_asset_locks, + contacts, + identity_keys, + }, + ); + } + let wallets_rehydrated = state.wallets.len(); + tracing::info!( wallets_seen, addresses_loaded, - wallets_rehydrated = 0usize, - wallets_pending_rehydration = wallets_seen, + wallets_rehydrated, + wallets_pending_rehydration = 0usize, unimplemented = ?LOAD_UNIMPLEMENTED, "load() summary" ); @@ -959,14 +954,10 @@ impl PlatformWalletPersistence for SqlitePersister { } } -// ----- Helpers ----- - -/// Count of top-level slots that carry any data. Feeds the persister's -/// `restored_field_count` / `dropped_field_count` tracing fields so -/// operators can see how much was kept or dropped on a flush retry / -/// fatal failure. Computed here from the public `PlatformWalletChangeSet` -/// fields + `Merge::is_empty()` so no storage-only helper leaks into -/// the `rs-platform-wallet` public API. +/// Count of top-level changeset slots carrying data, for the +/// `restored_field_count` / `dropped_field_count` tracing fields. Computed +/// from the public fields so no storage-only helper leaks into the +/// `rs-platform-wallet` API. fn populated_field_count(cs: &PlatformWalletChangeSet) -> usize { [ cs.core.is_empty(), @@ -995,10 +986,8 @@ fn validate_config(config: &SqlitePersisterConfig) -> Result<(), WalletStorageEr reason: "synchronous=Off is rejected (data-loss footgun)", }); } - // `journal_mode=Memory` keeps the rollback journal in RAM and - // `journal_mode=Off` disables it outright. Either turns crash- - // safety into a coin flip for a wallet DB — reject loudly instead - // of silently corrupting on the next power loss. + // `journal_mode` Memory/Off keeps no on-disk rollback journal, making + // a wallet DB crash-unsafe — reject loudly. match config.journal_mode { crate::sqlite::config::JournalMode::Memory => { return Err(WalletStorageError::ConfigInvalid { @@ -1012,10 +1001,8 @@ fn validate_config(config: &SqlitePersisterConfig) -> Result<(), WalletStorageEr } _ => {} } - // `busy_timeout=0` makes contended writers fail-fast with BUSY - // instead of waiting — non-fatal, but the operator almost certainly - // didn't mean it. Warn rather than reject because a few tests - // legitimately want the fail-fast behaviour. + // `busy_timeout=0` makes contended writers fail-fast with BUSY; + // warn (not reject) since a few tests legitimately want that. if config.busy_timeout.is_zero() { tracing::warn!( "SqlitePersisterConfig.busy_timeout=0; contended writers will return BUSY \ @@ -1029,9 +1016,19 @@ fn apply_pragmas( conn: &mut Connection, config: &SqlitePersisterConfig, ) -> Result<(), WalletStorageError> { - // `foreign_keys` is enabled + read-back-asserted in - // `crate::sqlite::conn::open_conn`, the single open choke-point. + // `foreign_keys` is enabled + read-back-asserted in `open_conn`. conn.pragma_update(None, "journal_mode", config.journal_mode.pragma_value())?; + // Read `journal_mode` back: `pragma_update` doesn't error when SQLite + // silently falls back (e.g. WAL→DELETE on FUSE), which with + // synchronous=NORMAL risks corruption on power loss. + let applied_journal: String = + conn.pragma_query_value(None, "journal_mode", |row| row.get(0))?; + if !applied_journal.eq_ignore_ascii_case(config.journal_mode.pragma_value()) { + return Err(WalletStorageError::JournalModeNotApplied { + requested: config.journal_mode.pragma_value(), + actual: applied_journal, + }); + } conn.pragma_update(None, "synchronous", config.synchronous.pragma_value())?; let ms = safe_cast::u64_to_i64( "busy_timeout_ms", @@ -1041,25 +1038,25 @@ fn apply_pragmas( Ok(()) } -/// Apply every populated sub-changeset of `cs` against the supplied -/// SQLite transaction. Does not commit; the caller owns the tx -/// lifecycle. Splitting this out from `write_changeset_in_one_tx` -/// lets `delete_wallet_inner` flush a drained buffer into a bespoke -/// pre-delete tx without re-opening the connection. +/// Apply every populated sub-changeset of `cs` against `tx` without +/// committing (caller owns the tx). Separate from +/// `write_changeset_in_one_tx` so `delete_wallet_inner` can flush a drained +/// buffer into its own pre-delete tx. fn apply_changeset_to_tx( tx: &rusqlite::Transaction<'_>, wallet_id: &WalletId, cs: &PlatformWalletChangeSet, ) -> Result<(), WalletStorageError> { if let Some(meta) = cs.wallet_metadata.as_ref() { - schema::wallet_meta::upsert(tx, wallet_id, meta)?; + schema::wallets::upsert(tx, wallet_id, meta)?; } if !cs.account_registrations.is_empty() { schema::accounts::apply_registrations(tx, wallet_id, &cs.account_registrations)?; } - if !cs.account_address_pools.is_empty() { - schema::accounts::apply_pools(tx, wallet_id, &cs.account_address_pools)?; - } + // `account_address_pools` is intentionally NOT applied: UTXO attribution + // is hardcoded to the default account (index 0) in `core_state`, so the + // pool snapshot is no longer a storage input. The changeset field is kept + // for API stability and still feeds non-storage consumers. if let Some(core) = cs.core.as_ref() { schema::core_state::apply(tx, wallet_id, core)?; } @@ -1120,12 +1117,9 @@ fn ensure_dir(dir: &Path) -> Result<(), WalletStorageError> { } })?; } - // Best-effort writability probe via `NamedTempFile` (unguessable - // name, no race against concurrent persister opens). This is TOCTOU - // by construction — the dir CAN flip to unwritable between the probe - // and `backup::run_to` below — but the real write has its own error - // path, so the worst case is the operator gets the typed error from - // the actual backup attempt instead of this fast-fail probe. + // Fast-fail writability probe. TOCTOU by construction (the dir can flip + // before `run_to`), but the real write has its own error path, so the + // worst case is a later typed error instead of this early one. match tempfile::NamedTempFile::new_in(dir) { Ok(_probe) => Ok(()), Err(source) => Err(WalletStorageError::AutoBackupDirUnwritable { diff --git a/packages/rs-platform-wallet-storage/src/sqlite/schema/accounts.rs b/packages/rs-platform-wallet-storage/src/sqlite/schema/accounts.rs index 04e251496f0..7a71ad881e1 100644 --- a/packages/rs-platform-wallet-storage/src/sqlite/schema/accounts.rs +++ b/packages/rs-platform-wallet-storage/src/sqlite/schema/accounts.rs @@ -1,15 +1,21 @@ -//! `account_registrations` + `account_address_pools` writers + readers. +//! `account_registrations` writer + keyless reader (platform-payment +//! registrations and the rehydration account-manifest oracle). use std::collections::BTreeMap; use key_wallet::bip32::ExtendedPubKey; use rusqlite::{params, Connection, Transaction}; -use platform_wallet::changeset::{AccountAddressPoolEntry, AccountRegistrationEntry}; +use platform_wallet::changeset::AccountRegistrationEntry; use platform_wallet::wallet::platform_wallet::WalletId; use crate::sqlite::error::WalletStorageError; use crate::sqlite::schema::blob; +use crate::sqlite::schema::blob::impl_persistable_blob; + +// PUBLIC material only: the account-registration xpub manifest reaching +// the `account_xpub_bytes` blob column. +impl_persistable_blob!(AccountRegistrationEntry); /// Decoded `platform_payment` account registration: the DIP-17 account /// index and its extended public key, recovered from the bincode-serde @@ -22,20 +28,16 @@ fn decode_platform_payment_row( account_index: i64, xpub_bytes: &[u8], ) -> Result { - let account_index = - u32::try_from(account_index).map_err(|_| WalletStorageError::IntegerOverflow { - field: "account_registrations.account_index", - value: account_index as u64, - target: crate::sqlite::util::safe_cast::SafeCastTarget::U64, - })?; + let account_index = crate::sqlite::util::safe_cast::i64_to_u32( + "account_registrations.account_index", + account_index, + )?; let entry: AccountRegistrationEntry = blob::decode(xpub_bytes)?; Ok((account_index, entry.account_xpub)) } -/// Every `platform_payment` account registration for one wallet, decoded -/// into `(account_index, xpub)`. The xpub is recovered from the -/// bincode-serde `AccountRegistrationEntry` `apply_registrations` writes -/// into `account_xpub_bytes`. +/// Every `platform_payment` registration for one wallet, decoded into +/// `(account_index, xpub)`. #[cfg(any(test, feature = "__test-helpers"))] pub(crate) fn list_platform_payment_registrations( conn: &Connection, @@ -99,10 +101,8 @@ pub fn apply_registrations( if entries.is_empty() { return Ok(()); } - // `account_xpub_bytes` carries the bincode-serde encoded - // `AccountRegistrationEntry` (xpub + account_type). The - // separate `account_type` / `account_index` columns mirror - // the entry for direct SQL lookups. + // `account_xpub_bytes` holds the encoded `AccountRegistrationEntry`; the + // separate `account_type` / `account_index` columns mirror it for SQL. let mut stmt = tx.prepare_cached( "INSERT INTO account_registrations \ (wallet_id, account_type, account_index, account_xpub_bytes) \ @@ -124,52 +124,43 @@ pub fn apply_registrations( Ok(()) } -pub fn apply_pools( - tx: &Transaction<'_>, +/// Read every `account_registrations` row for `wallet_id` into a keyless +/// [`AccountRegistrationEntry`] manifest — the rehydration account-set oracle +/// (which accounts to re-derive + the per-account xpubs the wrong-account gate +/// checks). PUBLIC material only (xpub + account type), no `Wallet` minted. +/// Ordered by `(account_type, account_index)` for determinism; a row that +/// fails to decode is a hard [`WalletStorageError`]. +pub fn load_state( + conn: &Connection, wallet_id: &WalletId, - entries: &[AccountAddressPoolEntry], -) -> Result<(), WalletStorageError> { - if entries.is_empty() { - return Ok(()); - } - let mut stmt = tx.prepare_cached( - "INSERT INTO account_address_pools \ - (wallet_id, account_type, account_index, pool_type, snapshot_blob) \ - VALUES (?1, ?2, ?3, ?4, ?5) \ - ON CONFLICT(wallet_id, account_type, account_index, pool_type) DO UPDATE SET \ - snapshot_blob = excluded.snapshot_blob", +) -> Result, WalletStorageError> { + let mut stmt = conn.prepare( + "SELECT account_xpub_bytes FROM account_registrations \ + WHERE wallet_id = ?1 ORDER BY account_type, account_index", )?; - for entry in entries { - let account_type = account_type_db_label(&entry.account_type); - let account_index = account_index(&entry.account_type); - let pool_type = pool_type_db_label(&entry.pool_type); - let payload = blob::encode(entry)?; - stmt.execute(params![ - wallet_id.as_slice(), - account_type, - i64::from(account_index), - pool_type, - payload, - ])?; + let rows = stmt.query_map(params![wallet_id.as_slice()], |row| { + row.get::<_, Vec>(0) + })?; + let mut out = Vec::new(); + for r in rows { + let payload = r?; + out.push(blob::decode::(&payload)?); } - Ok(()) + Ok(out) } -/// Single source of truth for the `account_type` TEXT-column domain -/// across `account_registrations`, `account_address_pools`, and -/// `core_derived_addresses`. +/// Source of truth for the `account_registrations.account_type` TEXT domain, +/// mirroring [`key_wallet::account::AccountType`]. +/// `migrations/V001__initial.rs` interpolates it into the table's +/// `CHECK (account_type IN (...))`; `account_type_labels_match_enum` keeps it +/// in sync with [`account_type_db_label`]. /// -/// Mirrors every variant of [`key_wallet::account::AccountType`] -/// (writer side: [`account_type_db_label`]). The migration in -/// `migrations/V001__initial.rs` interpolates this array into the -/// `CHECK (account_type IN (...))` clause on each of those tables, so -/// an unknown label is rejected at insert time rather than landing as -/// silent garbage. The `account_type_labels_match_enum` unit test -/// below enforces set-equality between this array and the writer's -/// output — drift (a renamed/added variant) becomes a failing test, -/// not a runtime divergence between Rust and SQLite. +/// `Standard` maps to two distinct labels by `StandardAccountType` variant +/// (`"standard_bip44"` / `"standard_bip32"`) so BIP44 and BIP32 standard +/// accounts with the same index never collide on their shared PK columns. pub(crate) const ACCOUNT_TYPE_LABELS: &[&str] = &[ - "standard", + "standard_bip44", + "standard_bip32", "coinjoin", "identity_registration", "identity_topup", @@ -186,30 +177,23 @@ pub(crate) const ACCOUNT_TYPE_LABELS: &[&str] = &[ "platform_payment", ]; -/// Single source of truth for the `account_address_pools.pool_type` -/// TEXT-column domain. +/// Stable database label for an `AccountType` variant (the `Debug` impl is not +/// a stable format; this match is the contract). An added upstream variant +/// fails this match's exhaustiveness check at compile time. /// -/// Mirrors every variant of -/// [`key_wallet::managed_account::address_pool::AddressPoolType`] -/// (writer side: [`pool_type_db_label`]). See [`ACCOUNT_TYPE_LABELS`] -/// for the broader rationale and the parity-test contract. -pub(crate) const POOL_TYPE_LABELS: &[&str] = &["external", "internal", "absent", "absent_hardened"]; - -/// Stable database label for an `AccountType` variant. -/// -/// Used for the `account_type` text column on `account_registrations`, -/// `account_address_pools`, and `core_derived_addresses`. The -/// `Debug` impl on `AccountType` is NOT a stable serialisation -/// format; this match is the contract. Variants identical in -/// label are distinguished by the companion `account_index` column. -/// -/// Adding a variant to upstream `AccountType` makes this match -/// exhaustive-check fail at compile time, forcing an explicit label -/// decision rather than silent garbage. +/// `Standard` maps to two distinct labels by `StandardAccountType` so BIP44 +/// and BIP32 accounts with the same `index` never collapse onto the same PK. pub(crate) fn account_type_db_label(at: &key_wallet::account::AccountType) -> &'static str { - use key_wallet::account::AccountType; + use key_wallet::account::{AccountType, StandardAccountType}; match at { - AccountType::Standard { .. } => "standard", + AccountType::Standard { + standard_account_type: StandardAccountType::BIP44Account, + .. + } => "standard_bip44", + AccountType::Standard { + standard_account_type: StandardAccountType::BIP32Account, + .. + } => "standard_bip32", AccountType::CoinJoin { .. } => "coinjoin", AccountType::IdentityRegistration => "identity_registration", AccountType::IdentityTopUp { .. } => "identity_topup", @@ -227,24 +211,8 @@ pub(crate) fn account_type_db_label(at: &key_wallet::account::AccountType) -> &' } } -/// Stable database label for an `AddressPoolType` variant. -pub(crate) fn pool_type_db_label( - pool: &key_wallet::managed_account::address_pool::AddressPoolType, -) -> &'static str { - use key_wallet::managed_account::address_pool::AddressPoolType; - match pool { - AddressPoolType::External => "external", - AddressPoolType::Internal => "internal", - AddressPoolType::Absent => "absent", - AddressPoolType::AbsentHardened => "absent_hardened", - } -} - -/// Numeric account index embedded in an `AccountType`. -/// -/// Persisted in the `account_index` column of `account_registrations`, -/// `account_address_pools`, and `core_derived_addresses` (the last of -/// which is the script→account lookup the UTXO writer joins against). +/// Numeric account index embedded in an `AccountType`, persisted in the +/// `account_registrations.account_index` column. pub(crate) fn account_index(at: &key_wallet::account::AccountType) -> u32 { use key_wallet::account::AccountType; match at { @@ -271,11 +239,10 @@ mod tests { use super::*; use std::collections::HashSet; - /// Exhaustive sample of every [`key_wallet::account::AccountType`] - /// variant. The match arm in the loop below uses no wildcard, so - /// an upstream-added variant becomes a compile error here and - /// forces the developer to extend the sample list (and the - /// matching arm in `account_type_db_label` / [`ACCOUNT_TYPE_LABELS`]). + /// Every [`key_wallet::account::AccountType`] variant; the wildcard-free + /// match below fails to compile if upstream adds one. `Standard` appears + /// twice — once per `StandardAccountType` — because both map to distinct + /// labels. fn all_account_type_variants() -> Vec { use key_wallet::account::{AccountType, StandardAccountType}; let variants = vec![ @@ -283,6 +250,10 @@ mod tests { index: 0, standard_account_type: StandardAccountType::BIP44Account, }, + AccountType::Standard { + index: 0, + standard_account_type: StandardAccountType::BIP32Account, + }, AccountType::CoinJoin { index: 0 }, AccountType::IdentityRegistration, AccountType::IdentityTopUp { @@ -311,9 +282,6 @@ mod tests { key_class: 0, }, ]; - // Compile-time exhaustiveness gate: an added upstream variant - // makes this match fail to compile and forces the sample list - // (and `account_type_db_label`) to be updated. for v in &variants { match v { AccountType::Standard { .. } @@ -336,25 +304,6 @@ mod tests { variants } - fn all_pool_type_variants() -> Vec { - use key_wallet::managed_account::address_pool::AddressPoolType; - let variants = vec![ - AddressPoolType::External, - AddressPoolType::Internal, - AddressPoolType::Absent, - AddressPoolType::AbsentHardened, - ]; - for v in &variants { - match v { - AddressPoolType::External - | AddressPoolType::Internal - | AddressPoolType::Absent - | AddressPoolType::AbsentHardened => {} - } - } - variants - } - #[test] fn account_type_labels_match_enum() { let from_writer: HashSet<&'static str> = all_account_type_variants() @@ -368,18 +317,4 @@ mod tests { from_const, from_writer ); } - - #[test] - fn pool_type_labels_match_enum() { - let from_writer: HashSet<&'static str> = all_pool_type_variants() - .iter() - .map(pool_type_db_label) - .collect(); - let from_const: HashSet<&'static str> = POOL_TYPE_LABELS.iter().copied().collect(); - assert_eq!( - from_writer, from_const, - "POOL_TYPE_LABELS ({:?}) drifted from pool_type_db_label codomain ({:?})", - from_const, from_writer - ); - } } diff --git a/packages/rs-platform-wallet-storage/src/sqlite/schema/asset_locks.rs b/packages/rs-platform-wallet-storage/src/sqlite/schema/asset_locks.rs index af89cdc1e4d..1a62a6eb2fd 100644 --- a/packages/rs-platform-wallet-storage/src/sqlite/schema/asset_locks.rs +++ b/packages/rs-platform-wallet-storage/src/sqlite/schema/asset_locks.rs @@ -13,14 +13,17 @@ use platform_wallet::wallet::platform_wallet::WalletId; use crate::sqlite::error::WalletStorageError; use crate::sqlite::schema::blob; -// Imports used only by the test-gated readers below. -#[cfg(any(test, feature = "__test-helpers"))] use { dashcore::OutPoint, platform_wallet::changeset::AssetLockEntry, platform_wallet::wallet::asset_lock::tracked::TrackedAssetLock, rusqlite::Connection, std::collections::BTreeMap, }; +use crate::sqlite::schema::blob::impl_persistable_blob; + +// PUBLIC material only: asset-lock lifecycle reaching `lifecycle_blob`. +impl_persistable_blob!(AssetLockEntry); + pub fn apply( tx: &Transaction<'_>, wallet_id: &WalletId, @@ -66,19 +69,11 @@ pub fn apply( Ok(()) } -/// Single source of truth for the `asset_locks.status` TEXT-column -/// domain. -/// -/// Mirrors every variant of -/// [`platform_wallet::wallet::asset_lock::tracked::AssetLockStatus`] -/// (writer side: [`status_str`]). The migration in -/// `migrations/V001__initial.rs` interpolates this array into the -/// `CHECK (status IN (...))` clause so an unknown label is rejected at -/// insert time rather than landing as silent garbage. The -/// `asset_lock_status_labels_match_enum` unit test below enforces -/// set-equality between this array and the writer's output — drift (a -/// renamed/added variant) becomes a failing test, not a runtime -/// divergence between Rust and SQLite. +/// Source of truth for the `asset_locks.status` TEXT domain, mirroring +/// [`platform_wallet::wallet::asset_lock::tracked::AssetLockStatus`]. +/// `migrations/V001__initial.rs` interpolates it into a `CHECK (status IN +/// (...))`; `asset_lock_status_labels_match_enum` keeps it in sync with +/// [`status_str`]. pub(crate) const ASSET_LOCK_STATUS_LABELS: &[&str] = &[ "built", "broadcast", @@ -99,7 +94,6 @@ fn status_str(s: &AssetLockStatus) -> &'static str { /// Per-wallet asset-lock slice as returned by the readers — outer-keyed /// by `account_index`, inner-keyed by outpoint. -#[cfg(any(test, feature = "__test-helpers"))] pub type AssetLocksByAccount = BTreeMap>; /// Decode one raw `(outpoint_bytes, account_index, lifecycle_blob)` @@ -109,7 +103,6 @@ pub type AssetLocksByAccount = BTreeMap Result { + let mut stmt = conn.prepare( + "SELECT outpoint, account_index, lifecycle_blob \ + FROM asset_locks WHERE wallet_id = ?1 AND status NOT IN ('consumed')", + )?; + let rows = stmt.query_map(params![wallet_id.as_slice()], |row| { + let op_bytes: Vec = row.get(0)?; + let account_index: i64 = row.get(1)?; + let blob_bytes: Vec = row.get(2)?; + Ok((op_bytes, account_index, blob_bytes)) + })?; + let mut out: AssetLocksByAccount = BTreeMap::new(); + for r in rows { + let (op_bytes, account_index, blob_bytes) = r?; + let (acct, outpoint, tracked) = decode_row(&op_bytes, account_index, &blob_bytes)?; + out.entry(acct).or_default().insert(outpoint, tracked); + } + Ok(out) +} + +/// Every asset lock bucketed by account index, **including** terminal +/// `Consumed` — history/inspection only; use [`load_unconsumed`] for the +/// rehydration feed. A row that fails to decode is a hard +/// [`WalletStorageError`]. +#[cfg(any(test, feature = "__test-helpers"))] +pub fn list_active( + conn: &Connection, + wallet_id: &WalletId, +) -> Result { + let mut stmt = conn.prepare( + "SELECT outpoint, account_index, lifecycle_blob \ + FROM asset_locks WHERE wallet_id = ?1", + )?; + let rows = stmt.query_map(params![wallet_id.as_slice()], |row| { + let op_bytes: Vec = row.get(0)?; + let account_index: i64 = row.get(1)?; + let blob_bytes: Vec = row.get(2)?; + Ok((op_bytes, account_index, blob_bytes)) + })?; + let mut out: AssetLocksByAccount = BTreeMap::new(); + for r in rows { + let (op_bytes, account_index, blob_bytes) = r?; + let (acct, outpoint, tracked) = decode_row(&op_bytes, account_index, &blob_bytes)?; + out.entry(acct).or_default().insert(outpoint, tracked); + } + Ok(out) +} + #[cfg(test)] mod tests { use super::*; use std::collections::HashSet; - /// Exhaustive sample of every [`AssetLockStatus`] variant. The - /// trailing match arm in the loop fails to compile if upstream - /// adds a variant — forcing the developer to extend the list, - /// `status_str`, and [`ASSET_LOCK_STATUS_LABELS`] together. + /// Every [`AssetLockStatus`] variant; the wildcard-free match below fails + /// to compile if upstream adds one. fn all_asset_lock_status_variants() -> Vec { let variants = vec![ AssetLockStatus::Built, diff --git a/packages/rs-platform-wallet-storage/src/sqlite/schema/blob.rs b/packages/rs-platform-wallet-storage/src/sqlite/schema/blob.rs index 502d984116d..3cb375f30dd 100644 --- a/packages/rs-platform-wallet-storage/src/sqlite/schema/blob.rs +++ b/packages/rs-platform-wallet-storage/src/sqlite/schema/blob.rs @@ -1,27 +1,46 @@ -//! BLOB-column codec helpers. +//! BLOB-column codec helpers: thin `bincode::serde` wrappers so every +//! `_blob` column uses one encoding path. Schema evolution is gated by the +//! refinery migration version — no per-blob revision tag. //! -//! Thin error-mapping wrappers around `bincode::serde` so every -//! `_blob` column in the SQLite schema uses one encoding path. Schema -//! evolution is gated by the refinery migration version on the -//! database as a whole — there is no per-blob revision tag. -//! -//! [`encode_outpoint`] / [`decode_outpoint`] encode a `dashcore::OutPoint` -//! the same way — via bincode-serde — for the `outpoint` PRIMARY KEY -//! columns (`core_utxos`, `asset_locks`). The bytes are a stable but not -//! fixed-length key; both columns are used for exact-match PK lookups, so -//! variable width is fine (no range scans or byte-order dependence). +//! [`encode_outpoint`] / [`decode_outpoint`] encode `dashcore::OutPoint` +//! the same way for the `outpoint` PK columns. The key is variable-width, +//! which is fine for the exact-match PK lookups (no range scans). use serde::de::DeserializeOwned; use serde::Serialize; use crate::sqlite::error::WalletStorageError; -/// Hard cap on bincode-serde decode allocations. 16 MiB is two orders -/// of magnitude above any legitimate per-row payload we ship — a -/// hostile or corrupted backup with an inflated length prefix is -/// rejected before the allocator wakes up. Applied symmetrically to -/// encode + decode so we can't write a payload we'd then refuse. -pub const BLOB_SIZE_LIMIT_BYTES: usize = 16 * 1024 * 1024; +/// Sealed-trait machinery enforcing the no-key-material-in-DB invariant at +/// the type level: only types opting in via [`impl_persistable_blob!`] can +/// reach [`encode`]. +pub(crate) mod sealed { + /// `pub(crate)` supertrait of [`PersistableBlob`] — downstream cannot + /// name it, so the trait is sealed. + pub trait Sealed {} +} + +/// Marker for types allowed into a `_blob` column. Sealed via +/// [`sealed::Sealed`] so adding a (possibly key-bearing) type to the +/// persistence path is an explicit, reviewable `impl` rather than a silent +/// `T: Serialize` slip. +pub trait PersistableBlob: Serialize + sealed::Sealed {} + +/// Seal-and-mark a type for [`blob::encode`](encode). +macro_rules! impl_persistable_blob { + ($($t:ty),+ $(,)?) => { + $( + impl $crate::sqlite::schema::blob::sealed::Sealed for $t {} + impl $crate::sqlite::schema::blob::PersistableBlob for $t {} + )+ + }; +} +pub(crate) use impl_persistable_blob; + +/// Hard cap on bincode-serde allocations, applied symmetrically to encode + +/// decode so a crafted length prefix can't OOM the host. Shares the crate-root +/// [`SIZE_LIMIT_BYTES`](crate::SIZE_LIMIT_BYTES) with the KV value cap. +pub const BLOB_SIZE_LIMIT_BYTES: usize = crate::SIZE_LIMIT_BYTES; fn bounded_config() -> bincode::config::Configuration< bincode::config::LittleEndian, @@ -31,8 +50,10 @@ fn bounded_config() -> bincode::config::Configuration< bincode::config::standard().with_limit::() } -/// Encode a serde-derived value into a `BLOB` payload. -pub fn encode(value: &T) -> Result, WalletStorageError> { +/// Encode a [`PersistableBlob`] value into a `BLOB` payload. The sealed bound +/// (not a bare `T: Serialize`) guards against unreviewed types reaching a +/// `_blob` column. +pub fn encode(value: &T) -> Result, WalletStorageError> { Ok(bincode::serde::encode_to_vec(value, bounded_config())?) } @@ -66,9 +87,11 @@ pub fn decode(blob: &[u8]) -> Result Ok(value) } -/// Encode a `dashcore::OutPoint` for an `outpoint` PRIMARY KEY column. -/// Uses the same bincode-serde path as every other column — a stable -/// (not fixed-length) key, which the exact-match PK lookups don't mind. +// An outpoint is a PUBLIC (txid, vout) reference — never key material. +impl_persistable_blob!(dashcore::OutPoint); + +/// Encode a `dashcore::OutPoint` for an `outpoint` PRIMARY KEY column via the +/// shared [`encode`] path. pub fn encode_outpoint(op: &dashcore::OutPoint) -> Result, WalletStorageError> { encode(op) } @@ -76,7 +99,6 @@ pub fn encode_outpoint(op: &dashcore::OutPoint) -> Result, WalletStorage /// Decode an outpoint key produced by [`encode_outpoint`]. Rejects /// malformed or trailing bytes with a typed [`WalletStorageError`] via /// the shared [`decode`] path. -#[cfg(any(test, feature = "__test-helpers"))] pub fn decode_outpoint(bytes: &[u8]) -> Result { decode(bytes) } @@ -90,6 +112,7 @@ mod tests { a: u32, b: String, } + impl_persistable_blob!(Dummy); #[test] fn encode_decode_roundtrip() { @@ -161,11 +184,9 @@ mod tests { assert_eq!(decode_outpoint(&bytes).unwrap(), op); } - /// A truncated / malformed outpoint key is a typed decode error, not - /// a panic — replaces the old fixed-36-byte length check. A 4-byte - /// input is too short for the 32-byte txid prefix, so bincode fails - /// deterministically with `BincodeDecode` (UnexpectedEnd) before the - /// trailing-bytes check. + /// A truncated outpoint key is a typed decode error, not a panic: a + /// 4-byte input is too short for the 32-byte txid prefix, so bincode + /// fails deterministically with `BincodeDecode` (UnexpectedEnd). #[test] fn decode_outpoint_rejects_malformed_bytes() { let res = decode_outpoint(&[0x01u8; 4]); diff --git a/packages/rs-platform-wallet-storage/src/sqlite/schema/contacts.rs b/packages/rs-platform-wallet-storage/src/sqlite/schema/contacts.rs index c1314115ebd..aa69e5fd8ae 100644 --- a/packages/rs-platform-wallet-storage/src/sqlite/schema/contacts.rs +++ b/packages/rs-platform-wallet-storage/src/sqlite/schema/contacts.rs @@ -1,45 +1,36 @@ //! Unified `contacts` table writer and per-wallet reader. //! -//! One row per `(wallet_id, owner_id, contact_id)` relationship. The -//! `state` column records which lifecycle stage the relationship is in -//! (`sent` / `received` / `established`), collapsing what used to be -//! three sibling tables into one. A pending relationship is `sent` XOR -//! `received` and carries only the matching request blob; an -//! `established` relationship carries both request blobs plus the four +//! One row per `(wallet_id, owner_id, contact_id)` relationship; the `state` +//! column records its lifecycle stage (`sent` / `received` / `established`). A +//! pending relationship is `sent` XOR `received` and carries only the matching +//! request blob; an `established` one carries both request blobs plus the four //! metadata columns (`alias`, `note`, `is_hidden`, `accepted_accounts`). -use rusqlite::{params, Transaction}; - -use platform_wallet::changeset::ContactChangeSet; -use platform_wallet::wallet::platform_wallet::WalletId; +use std::collections::BTreeMap; -use crate::sqlite::error::WalletStorageError; -use crate::sqlite::schema::blob; +use rusqlite::{params, Connection, Transaction}; -#[cfg(feature = "__test-helpers")] use dpp::prelude::Identifier; -#[cfg(feature = "__test-helpers")] use platform_wallet::changeset::{ - ContactRequestEntry, ReceivedContactRequestKey, SentContactRequestKey, + ContactChangeSet, ContactRequestEntry, ReceivedContactRequestKey, SentContactRequestKey, }; -#[cfg(feature = "__test-helpers")] use platform_wallet::wallet::identity::{ContactRequest, EstablishedContact}; -#[cfg(feature = "__test-helpers")] -use rusqlite::Connection; -#[cfg(feature = "__test-helpers")] -use std::collections::BTreeMap; +use platform_wallet::wallet::platform_wallet::WalletId; -/// Single source of truth for the `contacts.state` TEXT-column domain. -/// -/// One label per lifecycle stage of a DashPay contact relationship -/// (writer side: [`contact_state_db_label`]). The migration in -/// `migrations/V001__initial.rs` interpolates this array into the -/// `CHECK (state IN (...))` clause so an unknown label is rejected at -/// insert time rather than landing as silent garbage. The -/// `contact_state_labels_match_enum` unit test below enforces -/// set-equality between this array and the writer's output — drift (a -/// renamed/added stage) becomes a failing test, not a runtime -/// divergence between Rust and SQLite. +use crate::sqlite::error::WalletStorageError; +use crate::sqlite::schema::blob; +use crate::sqlite::schema::blob::impl_persistable_blob; + +// PUBLIC material only: contact-request types + the accepted-account +// index reaching the contacts `_blob` columns (contact requests carry +// public keys/refs; accepted_accounts is a list of account indices). +impl_persistable_blob!(ContactRequest, Vec); + +/// Source of truth for the `contacts.state` TEXT domain, one label per +/// contact-relationship lifecycle stage. `migrations/V001__initial.rs` +/// interpolates it into a `CHECK (state IN (...))`; +/// `contact_state_labels_match_enum` keeps it in sync with +/// [`contact_state_db_label`]. pub(crate) const CONTACT_STATE_LABELS: &[&str] = &["sent", "received", "established"]; /// Lifecycle stage of a `contacts` row. @@ -67,7 +58,6 @@ fn contact_state_db_label(state: ContactState) -> &'static str { /// unknown label is a hard error — the migration's `CHECK` constraint /// already rejects writes outside the domain, so reaching this arm /// means on-disk corruption or a forward-incompatible row. -#[cfg(feature = "__test-helpers")] fn contact_state_from_label(label: &str) -> Result { match label { "sent" => Ok(ContactState::Sent), @@ -79,16 +69,21 @@ fn contact_state_from_label(label: &str) -> Result, + pub incoming_requests: BTreeMap, + pub established: BTreeMap, +} + +/// See the `not(__test-helpers)` definition for the canonical docs. #[derive(Debug, Default, PartialEq)] #[cfg(feature = "__test-helpers")] pub struct ContactsRecords { @@ -97,26 +92,20 @@ pub struct ContactsRecords { pub established: BTreeMap, } -/// Apply a [`ContactChangeSet`] onto the unified `contacts` table. -/// -/// Ordering matches the previous three-table writer: inserts before -/// removes. The auto-establishment contract is honoured by `state` -/// precedence — a pending upsert (`sent` / `received`) never downgrades -/// an already-`established` row, and an `established` upsert collapses -/// any prior pending row for the same pair (both request blobs + the -/// four metadata columns are set, `state = 'established'`). +/// Apply a [`ContactChangeSet`] onto the `contacts` table (inserts before +/// removes). Auto-establishment is honoured by `state` precedence: a pending +/// upsert never downgrades an already-`established` row, and an `established` +/// upsert collapses any prior pending row for the pair (both request blobs + +/// the four metadata columns, `state = 'established'`). pub fn apply( tx: &Transaction<'_>, wallet_id: &WalletId, cs: &ContactChangeSet, ) -> Result<(), WalletStorageError> { if !cs.sent_requests.is_empty() { - // Pending-sent upsert. `outgoing_request` is set; `state` becomes - // 'sent' UNLESS the row is already established (don't downgrade) - // or an incoming request blob is already stored — in which case - // both sides are present and the row promotes to 'established' in - // the same statement. The inserted label flows through - // `contact_state_db_label` so it stays the writer's codomain. + // Pending-sent upsert: `state` becomes 'sent' unless the row is + // already established or an incoming blob is present, in which case + // both sides exist and it promotes to 'established' in one statement. let sent = contact_state_db_label(ContactState::Sent); let mut stmt = tx.prepare_cached( "INSERT INTO contacts (wallet_id, owner_id, contact_id, state, outgoing_request) \ @@ -152,11 +141,7 @@ pub fn apply( } } if !cs.incoming_requests.is_empty() { - // Pending-received upsert. Symmetric to the sent path: - // `incoming_request` is set, `state` becomes 'received' unless the - // row is already established or an outgoing request blob is already - // stored — in which case both sides are present and the row - // promotes to 'established' in the same statement. + // Pending-received upsert, symmetric to the sent path. let received = contact_state_db_label(ContactState::Received); let mut stmt = tx.prepare_cached( "INSERT INTO contacts (wallet_id, owner_id, contact_id, state, incoming_request) \ @@ -192,9 +177,8 @@ pub fn apply( } } if !cs.established.is_empty() { - // Establishment collapses any prior pending row for the pair: set - // both request blobs + the four metadata columns and force the - // `established` state. + // Collapse any prior pending row: both request blobs + the four + // metadata columns, forcing the `established` state. let established_label = contact_state_db_label(ContactState::Established); let mut stmt = tx.prepare_cached( "INSERT INTO contacts \ @@ -236,7 +220,6 @@ pub fn apply( /// decode (bad blob, non-32-byte id, unknown state, or a pending row /// missing its request blob) is a hard error — corruption is never /// silently dropped. -#[cfg(feature = "__test-helpers")] pub(crate) fn load_state( conn: &Connection, wallet_id: &WalletId, @@ -311,11 +294,29 @@ pub(crate) fn load_state( Ok(state) } +/// Build a keyless [`ContactChangeSet`] for one wallet — the +/// rehydration feed the manager layers onto the restored managed +/// identities. PUBLIC material only; `removed_*` are always empty +/// (deletes never reach storage as rows). Fail-hard on a corrupt row, +/// inherited from [`load_state`]. +pub fn load_changeset( + conn: &Connection, + wallet_id: &WalletId, +) -> Result { + let records = load_state(conn, wallet_id)?; + Ok(ContactChangeSet { + sent_requests: records.sent_requests, + incoming_requests: records.incoming_requests, + established: records.established, + removed_sent: Default::default(), + removed_incoming: Default::default(), + }) +} + /// Decode a `ContactRequest` from a nullable request column. A NULL /// column on a state that requires it (a pending row missing its blob, /// or an established row missing either side) is a hard error — the /// shape invariant is part of the on-disk contract. -#[cfg(feature = "__test-helpers")] fn decode_request( column: &'static str, bytes: Option<&[u8]>, @@ -329,7 +330,6 @@ fn decode_request( } } -#[cfg(feature = "__test-helpers")] fn decode_pair_key(a: &[u8], b: &[u8]) -> Result<(Identifier, Identifier), WalletStorageError> { let a32 = <[u8; 32]>::try_from(a) .map_err(|_| WalletStorageError::blob_decode("contacts.id column is not 32 bytes"))?; @@ -338,9 +338,8 @@ fn decode_pair_key(a: &[u8], b: &[u8]) -> Result<(Identifier, Identifier), Walle Ok((Identifier::from(a32), Identifier::from(b32))) } -/// Test-helper wrapper over [`load_state`] so this crate's integration -/// tests can assert on the hardened (fail-hard) contacts reader without -/// promoting the production surface beyond `pub(crate)`. +/// Test-helper wrapper over [`load_state`] without promoting the production +/// surface beyond `pub(crate)`. #[cfg(feature = "__test-helpers")] pub fn load_state_for_test( conn: &Connection, @@ -354,10 +353,8 @@ mod tests { use super::*; use std::collections::HashSet; - /// Exhaustive sample of every [`ContactState`] variant. The match - /// arm uses no wildcard, so an added variant becomes a compile error - /// here and forces the sample list, `contact_state_db_label`, and - /// [`CONTACT_STATE_LABELS`] to be updated together. + /// Every [`ContactState`] variant; the wildcard-free match below fails to + /// compile if one is added. fn all_contact_state_variants() -> Vec { let variants = vec![ ContactState::Sent, diff --git a/packages/rs-platform-wallet-storage/src/sqlite/schema/core_state.rs b/packages/rs-platform-wallet-storage/src/sqlite/schema/core_state.rs index 129819b0bce..567b2e0998d 100644 --- a/packages/rs-platform-wallet-storage/src/sqlite/schema/core_state.rs +++ b/packages/rs-platform-wallet-storage/src/sqlite/schema/core_state.rs @@ -12,6 +12,11 @@ use platform_wallet::wallet::platform_wallet::WalletId; use crate::sqlite::error::WalletStorageError; use crate::sqlite::schema::blob; +use crate::sqlite::schema::blob::impl_persistable_blob; + +// PUBLIC material only: core-chain state reaching `record_blob` / +// `islock_blob` (transaction records + InstantLocks are public chain data). +impl_persistable_blob!(TransactionRecord, dashcore::InstantLock); /// Apply a `CoreChangeSet` inside a transaction. pub fn apply( @@ -49,40 +54,15 @@ pub fn apply( ])?; } } - // Derived addresses are written BEFORE UTXOs (within the same - // transaction) so the UTXO writer's address→account_index lookup - // sees the freshly recorded rows. - if !cs.addresses_derived.is_empty() { - let mut stmt = tx.prepare_cached( - "INSERT INTO core_derived_addresses \ - (wallet_id, account_type, account_index, address, derivation_path, used) \ - VALUES (?1, ?2, ?3, ?4, ?5, ?6) \ - ON CONFLICT(wallet_id, account_type, address) DO UPDATE SET \ - account_index = excluded.account_index, \ - derivation_path = excluded.derivation_path", - )?; - for da in &cs.addresses_derived { - let account_type = - crate::sqlite::schema::accounts::account_type_db_label(&da.account_type); - let account_index = crate::sqlite::schema::accounts::account_index(&da.account_type); - let pool_type = crate::sqlite::schema::accounts::pool_type_db_label(&da.pool_type); - let address = da.address.to_string(); - let path = format!("{}/{}", pool_type, da.derivation_index); - stmt.execute(params![ - wallet_id.as_slice(), - account_type, - i64::from(account_index), - address, - path, - false - ])?; - } - } + // `addresses_derived` is intentionally NOT persisted here. The iOS + // address registry is fed by the FFI `addresses_derived` callback (fired + // before the UTXO changeset in the same round), and UTXO attribution is + // hardcoded to the default account (index 0), so the storage layer no + // longer keeps a derived-address lookup table. if !cs.new_utxos.is_empty() { let mut stmt = tx.prepare_cached(UPSERT_UTXO_SQL)?; - let mut lookup_stmt = tx.prepare_cached(ACCOUNT_INDEX_BY_ADDRESS_SQL)?; for utxo in &cs.new_utxos { - execute_upsert_utxo(&mut stmt, &mut lookup_stmt, wallet_id, utxo, false)?; + execute_upsert_utxo(&mut stmt, wallet_id, utxo, false)?; } } if !cs.spent_utxos.is_empty() { @@ -92,7 +72,6 @@ pub fn apply( "UPDATE core_utxos SET spent = 1 WHERE wallet_id = ?1 AND outpoint = ?2", )?; let mut upsert_stmt = tx.prepare_cached(UPSERT_UTXO_SQL)?; - let mut lookup_stmt = tx.prepare_cached(ACCOUNT_INDEX_BY_ADDRESS_SQL)?; for utxo in &cs.spent_utxos { let op = blob::encode_outpoint(&utxo.outpoint)?; let exists: bool = exists_stmt @@ -102,12 +81,11 @@ pub fn apply( if exists { mark_spent_stmt.execute(params![wallet_id.as_slice(), &op[..]])?; } else { - // Spent-only synthetic row: best-effort account_index - // from the derived-address map. A spend of an - // externally-funded address we never derived defaults - // to 0 (logged) — harmless, since spent rows are - // excluded from `list_unspent_utxos`. - execute_upsert_utxo(&mut upsert_stmt, &mut lookup_stmt, wallet_id, utxo, true)?; + // Spent-only synthetic row for a UTXO we never saw unspent. + // account_index is the hardcoded default like every row, and + // inert anyway since spent rows are excluded from + // `list_unspent_utxos`. + execute_upsert_utxo(&mut upsert_stmt, wallet_id, utxo, true)?; } } } @@ -132,12 +110,6 @@ pub fn apply( Ok(()) } -/// Resolve the owning account index for a UTXO by its rendered address, -/// joining against the `core_derived_addresses` map written earlier in -/// the same transaction. -const ACCOUNT_INDEX_BY_ADDRESS_SQL: &str = - "SELECT account_index FROM core_derived_addresses WHERE wallet_id = ?1 AND address = ?2"; - const UPSERT_UTXO_SQL: &str = "INSERT INTO core_utxos \ (wallet_id, outpoint, value, script, height, account_index, spent, spent_in_txid) \ VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, NULL) \ @@ -148,49 +120,29 @@ const UPSERT_UTXO_SQL: &str = "INSERT INTO core_utxos \ account_index = excluded.account_index, \ spent = excluded.spent"; +/// Account index written for every `core_utxos` row. The product uses only +/// the default account (index 0); a non-default funds account is rejected +/// upstream by the `core_bridge` single-account guard, so the writer never +/// resolves per-UTXO attribution. The one reader (`list_unspent_utxos` +/// per-account grouping) groups everything under 0. +const CORE_UTXO_ACCOUNT_INDEX: i64 = 0; + +/// Upsert one `core_utxos` row. `account_index` is the hardcoded default +/// ([`CORE_UTXO_ACCOUNT_INDEX`]); `spent` marks spent-only synthetic rows. fn execute_upsert_utxo( stmt: &mut rusqlite::CachedStatement<'_>, - lookup_stmt: &mut rusqlite::CachedStatement<'_>, wallet_id: &WalletId, utxo: &Utxo, spent: bool, ) -> Result<(), WalletStorageError> { let op = blob::encode_outpoint(&utxo.outpoint)?; - let address = utxo.address.to_string(); - // `Utxo` carries no account index; recover it from the - // derived-address map written earlier in this transaction. - let looked_up: Option = lookup_stmt - .query_row(params![wallet_id.as_slice(), &address], |row| row.get(0)) - .optional()?; - let account_index: i64 = match looked_up { - Some(idx) => idx, - // An unspent UTXO whose address we never derived would land in - // the wallet's funds under account 0 and never re-derive — silent - // mis-bucketing of live money. Refuse it. The spent-only - // placeholder path tolerates the fallback because spent rows are - // excluded from `list_unspent_utxos`, so a wrong index there is - // inert. - None if !spent => { - return Err(WalletStorageError::UtxoAddressNotDerived { - address: address.clone(), - }); - } - None => { - tracing::debug!( - wallet_id = %hex::encode(wallet_id), - address = %address, - "spent-only UTXO address not found in core_derived_addresses; using account_index 0 placeholder" - ); - 0 - } - }; stmt.execute(params![ wallet_id.as_slice(), &op[..], crate::sqlite::util::safe_cast::u64_to_i64("core_utxos.value", utxo.value())?, utxo.txout.script_pubkey.as_bytes(), i64::from(utxo.height), - account_index, + CORE_UTXO_ACCOUNT_INDEX, spent, ])?; Ok(()) @@ -234,22 +186,137 @@ fn upsert_sync_state( Ok(()) } +/// Bulk-reconstruct the keyless [`CoreChangeSet`] projection for one wallet +/// from the `core_*` tables. PUBLIC material only; mints no `Wallet`. `network` +/// (from `wallets`) turns a persisted `script` back into an `Address`. +/// +/// # Reconstructed (safety-critical-correct) +/// +/// - **Unspent UTXOs** (`new_utxos`): every `spent = 0` row — the balance +/// source (no-silent-zero); a row with a block `height` is confirmed. +/// - **Transaction records** / **IS-locks** / **sync watermarks**: decoded +/// bit-exact, fail-hard on a corrupt blob. +/// +/// # Deferred to the first post-load `sync` (safe re-warm) +/// +/// - **`last_applied_chain_lock`**: left `None` — not a V001 column; SPV +/// re-applies a fresh chainlock on the next sync. Persisting it would need a +/// schema migration (outside this reader's no-migration scope). +/// - **Per-account UTXO attribution / `is_coinbase` / `is_instantlocked` / +/// `is_trusted` / `used` flags**: not carried by `core_utxos`; defaulted and +/// refreshed on the next scan. The wallet *total* balance is unaffected. +pub fn load_state( + conn: &Connection, + wallet_id: &WalletId, + network: dashcore::Network, +) -> Result { + let mut cs = CoreChangeSet::default(); + + // Unspent UTXOs → new_utxos (the balance source). + { + let mut stmt = conn.prepare( + "SELECT outpoint, value, script, height FROM core_utxos \ + WHERE wallet_id = ?1 AND spent = 0", + )?; + let rows = stmt.query_map(params![wallet_id.as_slice()], |row| { + let op: Vec = row.get(0)?; + let value: i64 = row.get(1)?; + let script: Vec = row.get(2)?; + let height: Option = row.get(3)?; + Ok((op, value, script, height)) + })?; + for r in rows { + let (op_bytes, value, script_bytes, height) = r?; + let outpoint = blob::decode_outpoint(&op_bytes)?; + let value = crate::sqlite::util::safe_cast::i64_to_u64("core_utxos.value", value)?; + let height_u32 = match height { + None => 0u32, + Some(h) => crate::sqlite::util::safe_cast::i64_to_u32("core_utxos.height", h)?, + }; + let script = dashcore::ScriptBuf::from_bytes(script_bytes); + let address = dashcore::Address::from_script(&script, network) + .map_err(|_| WalletStorageError::blob_decode("core_utxos.script not an address"))?; + let confirmed = height.map(|h| h > 0).unwrap_or(false); + let utxo = Utxo { + outpoint, + txout: dashcore::TxOut { + value, + script_pubkey: script, + }, + address, + height: height_u32, + is_coinbase: false, + is_confirmed: confirmed, + is_instantlocked: false, + is_locked: false, + is_trusted: false, + }; + cs.new_utxos.push(utxo); + } + } + + { + let mut stmt = + conn.prepare("SELECT record_blob FROM core_transactions WHERE wallet_id = ?1")?; + let rows = stmt.query_map(params![wallet_id.as_slice()], |row| { + row.get::<_, Vec>(0) + })?; + for r in rows { + let payload = r?; + cs.records + .push(blob::decode::(&payload)?); + } + } + + { + let mut stmt = + conn.prepare("SELECT txid, islock_blob FROM core_instant_locks WHERE wallet_id = ?1")?; + let rows = stmt.query_map(params![wallet_id.as_slice()], |row| { + let txid: Vec = row.get(0)?; + let blob_bytes: Vec = row.get(1)?; + Ok((txid, blob_bytes)) + })?; + for r in rows { + use dashcore::hashes::Hash; + let (txid_bytes, blob_bytes) = r?; + let txid = dashcore::Txid::from_slice(&txid_bytes) + .map_err(|_| WalletStorageError::blob_decode("core_instant_locks.txid"))?; + let islock: dashcore::ephemerealdata::instant_lock::InstantLock = + blob::decode(&blob_bytes)?; + cs.instant_locks_for_non_final_records.insert(txid, islock); + } + } + + // Sync watermarks. + if let Some((lp, sy)) = conn + .query_row( + "SELECT last_processed_height, synced_height FROM core_sync_state WHERE wallet_id = ?1", + params![wallet_id.as_slice()], + |row| { + let lp: Option = row.get(0)?; + let sy: Option = row.get(1)?; + Ok((lp, sy)) + }, + ) + .optional()? + { + // Fail-hard on an out-of-range watermark (corruption is never skipped). + cs.last_processed_height = sync_height_u32("core_sync_state.last_processed_height", lp)?; + cs.synced_height = sync_height_u32("core_sync_state.synced_height", sy)?; + } + + Ok(cs) +} + /// Convert a stored sync-height column to `u32`, erroring on overflow /// rather than silently truncating a corrupt/out-of-range value. fn sync_height_u32( field: &'static str, value: Option, ) -> Result, WalletStorageError> { - match value { - None => Ok(None), - Some(v) => Ok(Some(u32::try_from(v).map_err(|_| { - WalletStorageError::IntegerOverflow { - field, - value: v as u64, - target: crate::sqlite::util::safe_cast::SafeCastTarget::U64, - } - })?)), - } + value + .map(|v| crate::sqlite::util::safe_cast::i64_to_u32(field, v)) + .transpose() } /// Fetch a single transaction record by txid. Returns `Ok(None)` if @@ -310,20 +377,13 @@ pub fn list_unspent_utxos( let value = crate::sqlite::util::safe_cast::i64_to_u64("core_utxos.value", value)?; let height = match height { None => None, - Some(h) => Some( - u32::try_from(h).map_err(|_| WalletStorageError::IntegerOverflow { - field: "core_utxos.height", - value: h as u64, - target: crate::sqlite::util::safe_cast::SafeCastTarget::U64, - })?, - ), + Some(h) => Some(crate::sqlite::util::safe_cast::i64_to_u32( + "core_utxos.height", + h, + )?), }; let account_index = - u32::try_from(account_index).map_err(|_| WalletStorageError::IntegerOverflow { - field: "core_utxos.account_index", - value: account_index as u64, - target: crate::sqlite::util::safe_cast::SafeCastTarget::U64, - })?; + crate::sqlite::util::safe_cast::i64_to_u32("core_utxos.account_index", account_index)?; let row = UnspentRow { outpoint, value, diff --git a/packages/rs-platform-wallet-storage/src/sqlite/schema/dashpay.rs b/packages/rs-platform-wallet-storage/src/sqlite/schema/dashpay.rs index b6caa52fb11..e9dd5cdb6ae 100644 --- a/packages/rs-platform-wallet-storage/src/sqlite/schema/dashpay.rs +++ b/packages/rs-platform-wallet-storage/src/sqlite/schema/dashpay.rs @@ -1,16 +1,19 @@ //! `dashpay_profiles` + `dashpay_payments_overlay` writers. //! +//! # Write-only indexed overlay (NOT a rehydration source) +//! +//! These tables are honored on write but `load()` does NOT read them back: +//! DashPay state is rehydrated from the identities `entry_blob`, which is the +//! authoritative load source. They exist for future per-profile/per-payment +//! indexed queries. Round-trip pinned by +//! `tests/sqlite_dashpay_overlay_contract.rs`. +//! //! # Precondition //! -//! Every `identity_id` in the supplied profile / payment maps MUST -//! already exist in the `identities` table and belong to the flush's -//! `wallet_id`. The writer relies on the -//! `identities(identity_id, wallet_id)` row produced by -//! [`super::identities::apply`] (in the same transaction or earlier) -//! for parenting; the FK to `identities(identity_id)` enforces the -//! existence half, but not the wallet match. The precondition check -//! below runs in every build and propagates -//! [`WalletStorageError::WalletIdMismatch`] on a mis-attributed caller. +//! Every `identity_id` MUST already exist in `identities` and belong to the +//! flush's `wallet_id`. The FK enforces existence; the wallet match is checked +//! here and propagates [`WalletStorageError::WalletIdMismatch`] on a +//! mis-attributed caller. use std::collections::BTreeMap; @@ -22,23 +25,20 @@ use platform_wallet::wallet::platform_wallet::WalletId; use crate::sqlite::error::WalletStorageError; use crate::sqlite::schema::blob; +use crate::sqlite::schema::blob::impl_persistable_blob; + +// PUBLIC material only: DashPay overlay types reaching `_blob` columns. +impl_persistable_blob!(DashPayProfile, PaymentEntry); -/// Both dashpay tables are keyed by identity only; their FK targets -/// `identities(identity_id)` so cascade flows through the -/// `wallet_metadata → identities` chain. -/// -/// The `wallet_id` parameter is kept on the signature for symmetry -/// with the persister's `write_changeset_in_one_tx` dispatch table, -/// and feeds the precondition check; it does not feed any column. +/// Both tables are keyed by identity only; their FK to +/// `identities(identity_id)` cascades via the `wallets → identities` chain. +/// `wallet_id` feeds the precondition check only — no column. pub fn apply( tx: &Transaction<'_>, wallet_id: &WalletId, profiles: Option<&BTreeMap>>, payments: Option<&BTreeMap>>, ) -> Result<(), WalletStorageError> { - // Precondition: every identity_id we touch must already belong to - // the flush-scope wallet (or to no wallet if scope is the - // sentinel). Cheap SELECT inside the same tx, run in every build. let touched: std::collections::BTreeSet = profiles .iter() .flat_map(|m| m.keys().copied()) diff --git a/packages/rs-platform-wallet-storage/src/sqlite/schema/identities.rs b/packages/rs-platform-wallet-storage/src/sqlite/schema/identities.rs index aa74f87b919..d8ac1ab8373 100644 --- a/packages/rs-platform-wallet-storage/src/sqlite/schema/identities.rs +++ b/packages/rs-platform-wallet-storage/src/sqlite/schema/identities.rs @@ -5,12 +5,14 @@ use rusqlite::{params, Transaction}; use platform_wallet::changeset::IdentityChangeSet; use platform_wallet::wallet::platform_wallet::WalletId; -// Imports used only by the test-gated readers below. -#[cfg(any(test, feature = "__test-helpers"))] use {platform_wallet::changeset::IdentityEntry, rusqlite::Connection}; use crate::sqlite::error::WalletStorageError; use crate::sqlite::schema::blob; +use crate::sqlite::schema::blob::impl_persistable_blob; + +// PUBLIC material only: identity snapshot reaching the `entry_blob` column. +impl_persistable_blob!(IdentityEntry); pub fn apply( tx: &Transaction<'_>, @@ -18,43 +20,29 @@ pub fn apply( cs: &IdentityChangeSet, ) -> Result<(), WalletStorageError> { if !cs.identities.is_empty() { - // PK is `identity_id` alone; `wallet_id` is nullable and links - // the identity to its parent wallet for cascade. The all-zero - // wallet id is treated as "no parent wallet known" and stored - // as NULL so the FK to `wallet_metadata` doesn't activate. - // - // COALESCE order — `COALESCE(identities.wallet_id, - // excluded.wallet_id)` — preserves an already-parented row's - // wallet_id on re-upsert; the excluded value only fills when - // the on-disk row is still NULL. This is the orphan → parented - // promotion path; the reverse (mismatched re-parent) is caught - // by the per-entry cross-check below. + // COALESCE keeps an already-parented row's wallet_id on re-upsert + // (excluded fills only when on-disk is NULL): the orphan → parented + // promotion path. The all-zero sentinel stores NULL (no parent). let scope_is_sentinel = wallet_id.iter().all(|b| *b == 0); let mut stmt = tx.prepare_cached( - "INSERT INTO identities (identity_id, wallet_id, wallet_index, entry_blob, tombstoned) \ + "INSERT INTO identities (identity_id, wallet_id, identity_index, entry_blob, tombstoned) \ VALUES (?1, ?2, ?3, ?4, 0) \ ON CONFLICT(identity_id) DO UPDATE SET \ wallet_id = COALESCE(identities.wallet_id, excluded.wallet_id), \ - wallet_index = excluded.wallet_index, \ + identity_index = excluded.identity_index, \ entry_blob = excluded.entry_blob, \ tombstoned = 0", )?; let wallet_id_param = wallet_id_to_param(wallet_id); for (id, entry) in &cs.identities { - // The map key is bound into the `identity_id` column while - // `entry` is what the serialized blob carries; a disagreement - // would persist a row whose typed id names a different - // identity than its blob. Reject before encoding so the two - // representations can never diverge on disk. + // Typed id column and blob must name the same identity; reject + // before encoding so the two can never diverge on disk. if entry.id != *id { return Err(WalletStorageError::IdentityEntryIdMismatch); } - // Cross-check: the entry's own wallet_id (when set) must - // agree with the flush scope so the typed columns and the - // serialized blob describe the same parenting. Sentinel - // scope ("no parent wallet known") requires the entry's - // wallet_id to also be `None` — otherwise a real wallet's - // identity would be written under the orphan slot. + // The entry's wallet_id (when set) must match the flush scope; + // sentinel scope requires it to be `None`, else a real wallet's + // identity would land in the orphan slot. if let Some(entry_wallet_id) = entry.wallet_id { if scope_is_sentinel { return Err(WalletStorageError::WalletIdMismatch { @@ -79,18 +67,22 @@ pub fn apply( } } if !cs.removed.is_empty() { - let mut stmt = - tx.prepare_cached("UPDATE identities SET tombstoned = 1 WHERE identity_id = ?1")?; + // Scope the tombstone to the flush wallet (NULL-safe `IS`) so wallet + // A's `removed` set can't tombstone wallet B's identity; the sentinel + // scope maps to NULL and tombstones only orphan rows. + let wallet_id_param = wallet_id_to_param(wallet_id); + let mut stmt = tx.prepare_cached( + "UPDATE identities SET tombstoned = 1 WHERE identity_id = ?1 AND wallet_id IS ?2", + )?; for id in &cs.removed { - stmt.execute(params![id.as_slice()])?; + stmt.execute(params![id.as_slice(), wallet_id_param])?; } } Ok(()) } -/// Map the caller-supplied `WalletId` (32 bytes) to the nullable -/// `identities.wallet_id` column: the all-zero id is treated as "no -/// parent wallet" and stored as NULL so the FK doesn't activate. +/// Map a `WalletId` to the nullable `identities.wallet_id` column: the +/// all-zero sentinel becomes NULL so the FK doesn't activate. fn wallet_id_to_param(wallet_id: &WalletId) -> Option<&[u8]> { if wallet_id.iter().all(|b| *b == 0) { None @@ -112,11 +104,8 @@ pub fn fetch( identity_id: &[u8; 32], ) -> Result, WalletStorageError> { use rusqlite::OptionalExtension; - // Scope the lookup to the caller's wallet so a peer wallet that - // happens to share the identity-id row can never leak through. - // The sentinel WalletId (all zeros) matches orphan rows (NULL - // wallet_id); a real WalletId matches only that wallet's rows. - // `IS` is NULL-safe equality so the NULL branch works uniformly. + // Scope to the caller's wallet (NULL-safe `IS`) so a peer wallet sharing + // the identity-id row can't leak through; sentinel matches orphan rows. let wallet_id_param = wallet_id_to_param(wallet_id); let row: Option<(Vec, i64)> = conn .query_row( @@ -132,29 +121,19 @@ pub fn fetch( } } -/// Build a [`platform_wallet::changeset::IdentityManagerStartState`] -/// for one wallet from the `identities` table. Tombstoned rows are skipped (a logical delete, -/// not corruption); any row that fails to decode is a hard error — -/// corruption is never silently dropped. -/// -/// The bucket selection mirrors `IdentityManager`'s layout: -/// rows with `IdentityEntry.identity_index = Some(_)` go into -/// `wallet_identities[wallet_id]`; rows with `None` go into +/// Build an [`IdentityManagerStartState`](platform_wallet::changeset::IdentityManagerStartState) +/// for one wallet. Tombstoned rows are skipped; a row that fails to decode is +/// a hard error (corruption is never silently dropped). Rows with +/// `identity_index = Some(_)` bucket into `wallet_identities`, `None` into /// `out_of_wallet_identities`. -/// -/// Retained for this crate's integration tests until the -/// `Wallet::from_persisted` rehydration path consumes it in `load()`. -#[cfg(any(test, feature = "__test-helpers"))] pub fn load_state( conn: &Connection, wallet_id: &WalletId, ) -> Result { use platform_wallet::changeset::IdentityManagerStartState; - // `identities.wallet_id` is nullable; this load path wants only the - // rows belonging to the wallet the caller asked for, so the WHERE - // clause matches by wallet_id (orphan identities — wallet_id NULL — - // are out of scope for this per-wallet loader). + // Per-wallet loader: match by wallet_id, so orphan rows (NULL wallet_id) + // are out of scope. let mut stmt = conn.prepare( "SELECT identity_id, entry_blob, tombstoned FROM identities WHERE wallet_id = ?1", )?; @@ -189,7 +168,6 @@ pub fn load_state( /// using a freshly minted V0 [`Identity`] for `(id, balance, revision)`. /// Live runtime fields (contacts maps, public-key derivations) are /// recovered separately via the contacts / identity_keys readers. -#[cfg(any(test, feature = "__test-helpers"))] fn managed_identity_from_entry( entry: &IdentityEntry, wallet_id: &WalletId, @@ -220,12 +198,10 @@ fn managed_identity_from_entry( } } -/// Insert a stub identity row so identity_keys / dashpay_profiles can -/// reference it via their native composite FK. Used by tests that exercise -/// identity_keys persistence without going through the full identity -/// flow. The stub row carries a `null`-encoded `IdentityEntry` so the -/// `entry_blob` column always decodes — callers wanting real data -/// overwrite via [`apply`]. +/// Insert a stub identity row (test helper) so identity_keys / +/// dashpay_profiles can reference it via their FK. The stub carries a +/// `null`-encoded `IdentityEntry` so `entry_blob` always decodes; real data +/// overwrites via [`apply`]. #[cfg(any(test, feature = "__test-helpers"))] pub fn ensure_exists( conn: &Connection, @@ -253,7 +229,7 @@ pub fn ensure_exists( let wallet_id_param = wallet_id_to_param(wallet_id); conn.execute( "INSERT OR IGNORE INTO identities \ - (identity_id, wallet_id, wallet_index, entry_blob, tombstoned) \ + (identity_id, wallet_id, identity_index, entry_blob, tombstoned) \ VALUES (?1, ?2, NULL, ?3, 0)", params![&identity_id[..], wallet_id_param, payload], )?; diff --git a/packages/rs-platform-wallet-storage/src/sqlite/schema/identity_keys.rs b/packages/rs-platform-wallet-storage/src/sqlite/schema/identity_keys.rs index fc85a19c6a7..784480b7719 100644 --- a/packages/rs-platform-wallet-storage/src/sqlite/schema/identity_keys.rs +++ b/packages/rs-platform-wallet-storage/src/sqlite/schema/identity_keys.rs @@ -1,23 +1,16 @@ //! `identity_keys` table writer. Stores PUBLIC key material only — no -//! signing-key bytes ever reach this table. +//! signing-key bytes reach this table. //! -//! `IdentityKeyEntry`'s `public_key: dpp::IdentityPublicKey` uses -//! `#[serde(tag = "$formatVersion")]` on the parent enum, which -//! bincode-serde rejects (it requires `deserialize_any`). The other -//! fields are plain serde-compatible types. To keep the -//! "one blob per row" property we transcribe the entry into a wire -//! shape where the public key is bincode-2-native-encoded (the dpp -//! types derive `Encode`/`Decode`) and the surrounding fields ride -//! the bincode-serde encoder. The shape is documented on the -//! `IdentityKeyWire` struct below. +//! `IdentityKeyEntry.public_key`'s `#[serde(tag = ...)]` enum is rejected by +//! bincode-serde (needs `deserialize_any`), so `IdentityKeyWire` pre-encodes +//! the key with bincode's native `Encode`/`Decode` and rides the surrounding +//! fields on the serde encoder, keeping one blob per row. -use rusqlite::{params, Transaction}; +use rusqlite::{params, Connection, Transaction}; use serde::{Deserialize, Serialize}; -use dpp::identity::KeyID; -// Used only by the test-gated `into_entry` and the unit tests below. -#[cfg(any(test, feature = "__test-helpers"))] use dpp::identity::IdentityPublicKey; +use dpp::identity::KeyID; use dpp::prelude::Identifier; use platform_wallet::changeset::{ IdentityKeyDerivationIndices, IdentityKeyEntry, IdentityKeysChangeSet, @@ -27,10 +20,8 @@ use platform_wallet::wallet::platform_wallet::WalletId; use crate::sqlite::error::WalletStorageError; use crate::sqlite::schema::blob; -/// On-disk wire shape for `IdentityKeyEntry`. The `public_key` field -/// is pre-encoded via bincode 2's native `Encode/Decode` impls on -/// `dpp::IdentityPublicKey` so bincode-serde doesn't trip on dpp's -/// `serde(tag = ...)` representation. +/// On-disk wire shape for `IdentityKeyEntry`, with `public_key_bincode` +/// holding the natively-encoded key (see module docs). #[derive(Debug, Clone, Serialize, Deserialize)] struct IdentityKeyWire { identity_id: Identifier, @@ -41,6 +32,10 @@ struct IdentityKeyWire { derivation_indices: Option, } +// PUBLIC material only reaching `entry_blob`: the wire shape carries +// bincode-encoded public keys + public-key hashes. No private bytes. +crate::sqlite::schema::blob::impl_persistable_blob!(IdentityKeyWire); + impl IdentityKeyWire { fn from_entry(entry: &IdentityKeyEntry) -> Result { let pk = bincode::encode_to_vec(&entry.public_key, bincode::config::standard())?; @@ -54,14 +49,11 @@ impl IdentityKeyWire { }) } - #[cfg(any(test, feature = "__test-helpers"))] fn into_entry(self) -> Result { let (public_key, consumed): (IdentityPublicKey, usize) = bincode::decode_from_slice(&self.public_key_bincode, bincode::config::standard())?; - // Consistent with the outer blob::decode trailing-byte guard: a - // valid-prefix + trailing-garbage payload that bincode's decoder - // happily accepts (it stops after the typed length) is corruption - // or forward-schema drift — refuse it. + // Reject a valid-prefix + trailing-garbage payload (bincode stops + // after the typed length); mirrors the outer blob::decode guard. if consumed != self.public_key_bincode.len() { return Err(WalletStorageError::blob_decode( "unexpected trailing bytes in identity_keys.public_key_bincode", @@ -78,49 +70,42 @@ impl IdentityKeyWire { } } -/// `identity_keys` is keyed by `(identity_id, key_id)`; the parent FK -/// targets `identities(identity_id)`. The caller-supplied [`WalletId`] -/// scopes cross-checks against the entry's own `wallet_id` field so -/// the entry-blob and the typed columns stay aligned. +/// Keyed by `(wallet_id, identity_id, key_id)` with FKs to `wallets` and +/// `identities`. The typed `wallet_id` column comes from the flush scope; the +/// entry's own `wallet_id` (when set) is cross-checked against it so the typed +/// columns and the blob stay aligned. pub fn apply( tx: &Transaction<'_>, wallet_id: &WalletId, cs: &IdentityKeysChangeSet, ) -> Result<(), WalletStorageError> { if !cs.upserts.is_empty() { + // `derivation_blob` is always NULL (reserved); derivation_indices ride + // inside the IdentityKeyWire blob, the source of truth. let mut stmt = tx.prepare_cached( "INSERT INTO identity_keys \ - (identity_id, key_id, public_key_blob, public_key_hash) \ - VALUES (?1, ?2, ?3, ?4) \ - ON CONFLICT(identity_id, key_id) DO UPDATE SET \ + (wallet_id, identity_id, key_id, public_key_blob, public_key_hash, derivation_blob) \ + VALUES (?1, ?2, ?3, ?4, ?5, NULL) \ + ON CONFLICT(wallet_id, identity_id, key_id) DO UPDATE SET \ public_key_blob = excluded.public_key_blob, \ - public_key_hash = excluded.public_key_hash", + public_key_hash = excluded.public_key_hash, \ + derivation_blob = NULL", )?; for ((identity_id, key_id), entry) in &cs.upserts { - // Reject any disagreement between the map key / outer - // wallet_id (informational scope) and the entry fields - // (what the serialized blob carries) so the two - // representations of a row can never diverge on disk. + // Typed columns and blob fields must agree so a row can never + // diverge on disk. if entry.identity_id != *identity_id || entry.key_id != *key_id { return Err(WalletStorageError::IdentityKeyEntryMismatch); } - // Sentinel scope ("no parent wallet known") requires the - // entry's wallet_id to also be `None`; a real entry - // wallet_id under sentinel scope would silently file the - // key under the wrong parenting. Non-sentinel scope - // requires the entry's wallet_id (when set) to match - // exactly. - let scope_is_sentinel = wallet_id.iter().all(|b| *b == 0); - match (scope_is_sentinel, entry.wallet_id) { - (true, Some(_)) => return Err(WalletStorageError::IdentityKeyEntryMismatch), - (false, Some(entry_wallet_id)) if entry_wallet_id != *wallet_id => { + if let Some(entry_wallet_id) = entry.wallet_id { + if entry_wallet_id != *wallet_id { return Err(WalletStorageError::IdentityKeyEntryMismatch); } - _ => {} } let wire = IdentityKeyWire::from_entry(entry)?; let entry_blob = blob::encode(&wire)?; stmt.execute(params![ + wallet_id.as_slice(), identity_id.as_slice(), i64::from(*key_id), entry_blob, @@ -129,22 +114,63 @@ pub fn apply( } } if !cs.removed.is_empty() { - let mut stmt = - tx.prepare_cached("DELETE FROM identity_keys WHERE identity_id = ?1 AND key_id = ?2")?; + let mut stmt = tx.prepare_cached( + "DELETE FROM identity_keys \ + WHERE wallet_id = ?1 AND identity_id = ?2 AND key_id = ?3", + )?; for (identity_id, key_id) in &cs.removed { - stmt.execute(params![identity_id.as_slice(), i64::from(*key_id)])?; + stmt.execute(params![ + wallet_id.as_slice(), + identity_id.as_slice(), + i64::from(*key_id), + ])?; } } Ok(()) } /// Decode an `identity_keys.public_key_blob` cell back to the entry. -#[cfg(any(test, feature = "__test-helpers"))] pub fn decode_entry(payload: &[u8]) -> Result { let wire: IdentityKeyWire = blob::decode(payload)?; wire.into_entry() } +/// Read every `identity_keys` row for `wallet_id` back into a keyless +/// [`IdentityKeysChangeSet`] (PUBLIC material only — the blob is an +/// `IdentityPublicKey`; private keys are NOT stored or read here). +/// +/// Keyed by `(identity_id, key_id)`; `removed` is always empty (deletes +/// reach storage as `DELETE`s, never as rows). Any row whose blob fails +/// to decode is a hard, typed [`WalletStorageError`] — corruption is +/// never silently dropped. +pub fn load_state( + conn: &Connection, + wallet_id: &WalletId, +) -> Result { + let mut cs = IdentityKeysChangeSet::default(); + let mut stmt = conn.prepare( + "SELECT identity_id, key_id, public_key_blob FROM identity_keys WHERE wallet_id = ?1", + )?; + let mut rows = stmt.query(params![wallet_id.as_slice()])?; + while let Some(row) = rows.next()? { + let identity_id_bytes: Vec = row.get(0)?; + let key_id: i64 = row.get(1)?; + let payload: Vec = row.get(2)?; + let id32 = <[u8; 32]>::try_from(identity_id_bytes.as_slice()).map_err(|_| { + WalletStorageError::blob_decode("identity_keys.identity_id is not 32 bytes") + })?; + let identity_id = Identifier::from(id32); + let key_id = KeyID::try_from(key_id).map_err(|_| WalletStorageError::IntegerOverflow { + field: "identity_keys.key_id", + value: key_id as u64, + target: crate::sqlite::util::safe_cast::SafeCastTarget::U64, + })?; + let entry = decode_entry(&payload)?; + cs.upserts.insert((identity_id, key_id), entry); + } + Ok(cs) +} + #[cfg(test)] mod tests { use super::*; diff --git a/packages/rs-platform-wallet-storage/src/sqlite/schema/mod.rs b/packages/rs-platform-wallet-storage/src/sqlite/schema/mod.rs index bcd8ef00ab9..40a8ed251d2 100644 --- a/packages/rs-platform-wallet-storage/src/sqlite/schema/mod.rs +++ b/packages/rs-platform-wallet-storage/src/sqlite/schema/mod.rs @@ -1,17 +1,10 @@ -//! Per-area SQLite writers + readers. +//! Per-area SQLite writers + readers, one submodule per table or cluster. //! -//! Each submodule owns one table or a small cluster (e.g. `accounts` -//! owns the registration + address-pool tables). Writers take a -//! `&rusqlite::Transaction` and an already resolved sub-changeset; -//! readers take `&rusqlite::Connection`. -//! -//! Encoding policy: scalars that fan out to per-row indexes go into -//! typed SQLite columns (heights, hashes, outpoints, flags). The -//! `_blob` columns carry the full sub-changeset entry encoded with -//! `bincode::serde::encode_to_vec` against the serde-derived types in -//! `platform-wallet` — see [`blob::encode`] / [`blob::decode`]. -//! Schema evolution is gated by the refinery migration version on -//! the database; individual blobs have no inline revision tag. +//! Encoding policy: scalars that fan out to per-row indexes go into typed +//! columns (heights, hashes, outpoints, flags); `_blob` columns carry the +//! full sub-changeset entry via [`blob::encode`] / [`blob::decode`]. Schema +//! evolution is gated by the refinery migration version — blobs carry no +//! inline revision tag. pub mod accounts; pub mod asset_locks; @@ -23,17 +16,12 @@ pub mod identities; pub mod identity_keys; pub mod platform_addrs; pub mod token_balances; -pub mod wallet_meta; +pub mod wallets; -/// Defensive check that every `identity_id` in `touched` exists in -/// `identities` and belongs to `wallet_id` (or has NULL wallet_id when -/// scope is the all-zero sentinel). Used by identity-owned writers -/// (`dashpay`, `token_balances`) to reject mis-attributed callers; the -/// check runs in every build. -/// -/// Returns [`WalletStorageError::WalletIdMismatch`] for the first -/// offending row found. Rows that don't exist in `identities` aren't -/// flagged here — the FK on the child table will reject the write. +/// Reject any `identity_id` in `touched` whose `identities` row does not +/// belong to `wallet_id` (NULL wallet_id matches the all-zero sentinel), +/// returning [`WalletStorageError::WalletIdMismatch`] on the first offender. +/// Absent rows are left to the child-table FK. pub(crate) fn assert_identities_belong_to_wallet( tx: &rusqlite::Transaction<'_>, wallet_id: &platform_wallet::wallet::platform_wallet::WalletId, @@ -48,15 +36,11 @@ pub(crate) fn assert_identities_belong_to_wallet( .query_row(rusqlite::params![identity_id.as_slice()], |row| row.get(0)) .optional()?; let Some(found_wallet_id) = row else { - // Row absent — FK on the child table will reject the - // upcoming write with a clearer error than guessing. + // Row absent — let the child-table FK reject the write. continue; }; - // INTENTIONAL: the `Some(found)` arms below zero-pad a stored - // wallet_id whose width is not 32 into the diagnostic `found` field. - // This is diagnostic-only and cosmetic — a malformed stored width - // already triggers a mismatch error; reporting it zero-padded carries - // no security impact, so a typed length error is not warranted. + // INTENTIONAL: arms below zero-pad a non-32-byte stored wallet_id into + // the diagnostic `found` field — cosmetic only, a mismatch still errors. match (scope_is_sentinel, found_wallet_id) { (true, None) => {} // sentinel scope matches NULL parenting (true, Some(found)) => { diff --git a/packages/rs-platform-wallet-storage/src/sqlite/schema/platform_addrs.rs b/packages/rs-platform-wallet-storage/src/sqlite/schema/platform_addrs.rs index 95291ecf3b1..eb12d1d0e55 100644 --- a/packages/rs-platform-wallet-storage/src/sqlite/schema/platform_addrs.rs +++ b/packages/rs-platform-wallet-storage/src/sqlite/schema/platform_addrs.rs @@ -33,11 +33,8 @@ pub fn apply( nonce = excluded.nonce", )?; for entry in &cs.addresses { - // The row is keyed by the outer `wallet_id`; an entry that - // names a different wallet would otherwise be mis-filed. The - // native FK also rejects an unknown parent, but this typed - // error pinpoints the mismatch instead of surfacing a raw - // FOREIGN KEY failure. + // Reject an entry naming a different wallet; the typed error + // pinpoints the mismatch instead of a raw FOREIGN KEY failure. if entry.wallet_id != *wallet_id { return Err(WalletStorageError::WalletIdMismatch { expected: *wallet_id, @@ -141,17 +138,11 @@ pub fn list_per_wallet( Ok(out) } -/// Reassemble the per-account committed state for one wallet from its -/// `platform_payment` registrations (xpub per account index) and its -/// `platform_addresses` rows (the derived address set + known balances). -/// -/// Each registration becomes one [`PerAccountPlatformAddressState`]; the -/// address rows whose `account_index` matches populate its `addresses` -/// bijection and `found` balance map via -/// [`PerAccountPlatformAddressState::insert_persisted_entry`]. Address -/// rows for an account with no registration are skipped — without the -/// xpub the provider can't extend that account's gap window, so there's -/// nothing to restore. +/// Reassemble the per-account committed state from `platform_payment` +/// registrations (one [`PerAccountPlatformAddressState`] each) plus the +/// `platform_addresses` rows whose `account_index` matches. Address rows for an +/// unregistered account are skipped — without the xpub there's nothing to +/// restore. fn build_per_account( registrations: &[accounts::PlatformPaymentRegistration], address_rows: &[PlatformAddressRow], @@ -187,12 +178,9 @@ fn account_state_from_rows( state } -/// Build `PlatformAddressSyncStartState` for one wallet: the -/// network-scoped sync watermark plus the per-account committed state -/// reconstructed from registrations + address rows. -/// -/// `load()` uses the grouped [`load_all`] path; this per-wallet form is -/// retained for this crate's integration tests. +/// Build `PlatformAddressSyncStartState` for one wallet: the sync watermark +/// plus the per-account state from registrations + address rows. `load()` uses +/// the grouped [`load_all`] path; this per-wallet form is for tests. #[cfg(any(test, feature = "__test-helpers"))] pub fn load_state( conn: &Connection, @@ -240,24 +228,19 @@ pub fn count_per_wallet( Ok(usize::try_from(n).unwrap_or(usize::MAX)) } -/// One row of [`load_all`] aggregated state per wallet: -/// `(sync_state, address_row_count)`. -/// -/// `address_row_count` is the number of `platform_addresses` rows for the -/// wallet — `load()` uses it (with the watermark and per_account) to -/// decide whether the wallet carries any platform state worth surfacing. +/// One [`load_all`] row per wallet: `(sync_state, +/// reconstructed_address_count)`. The count includes only address rows +/// actually rebuilt into `sync_state.per_account` (those with a matching +/// registration), so it never claims state `load()` did not surface. pub type LoadAllEntry = (PlatformAddressSyncStartState, usize); -/// Bulk reader for `load()`. Cost is a fixed number of grouped scans — -/// one over `platform_address_sync`, one over `platform_addresses`, and -/// one over the `platform_payment` `account_registrations` — regardless -/// of wallet count, rather than a per-wallet fan-out. +/// Bulk reader for `load()`: three grouped scans (over `platform_address_sync`, +/// `platform_addresses`, and the `platform_payment` `account_registrations`) +/// regardless of wallet count, instead of a per-wallet fan-out. /// -/// Driven by [`wallet_meta::list_ids`](crate::sqlite::schema::wallet_meta::list_ids): -/// orphaned `platform_addresses` / `platform_address_sync` rows whose -/// `wallet_id` is absent from `wallet_metadata` are intentionally NOT -/// surfaced. Native foreign keys prevent such orphans; a future re-wire -/// that needs them must restore the id-union over the area tables. +/// Driven by [`wallets::list_ids`](crate::sqlite::schema::wallets::list_ids), so +/// rows whose `wallet_id` is absent from `wallets` are not surfaced (native FKs +/// prevent such orphans anyway). pub fn load_all(conn: &Connection) -> Result, WalletStorageError> { let sync_by_wallet = all_sync_state(conn)?; let addresses_by_wallet = all_address_rows(conn)?; @@ -267,7 +250,7 @@ pub fn load_all(conn: &Connection) -> Result, W let empty_regs: Vec = Vec::new(); let mut out: BTreeMap = BTreeMap::new(); - for wallet_id in crate::sqlite::schema::wallet_meta::list_ids(conn)? { + for wallet_id in crate::sqlite::schema::wallets::list_ids(conn)? { let (h, t, r) = sync_by_wallet.get(&wallet_id).copied().unwrap_or((0, 0, 0)); let address_rows = addresses_by_wallet.get(&wallet_id).unwrap_or(&empty_rows); let registrations = registrations_by_wallet @@ -279,11 +262,27 @@ pub fn load_all(conn: &Connection) -> Result, W sync_timestamp: t, last_known_recent_block: r, }; - out.insert(wallet_id, (sync, address_rows.len())); + let reconstructed = reconstructed_address_count(registrations, address_rows); + out.insert(wallet_id, (sync, reconstructed)); } Ok(out) } +/// Count the `platform_addresses` rows [`build_per_account`] rebuilds: those +/// whose `account_index` has a matching registration, keeping the count aligned +/// with `per_account`. +fn reconstructed_address_count( + registrations: &[accounts::PlatformPaymentRegistration], + address_rows: &[PlatformAddressRow], +) -> usize { + let registered: std::collections::BTreeSet = + registrations.iter().map(|(idx, _)| *idx).collect(); + address_rows + .iter() + .filter(|r| registered.contains(&r.account_index)) + .count() +} + /// One grouped scan of `platform_address_sync` → `(sync_height, /// sync_timestamp, last_known_recent_block)` per wallet. fn all_sync_state( @@ -367,23 +366,9 @@ fn decode_address_row( let mut hash160 = [0u8; 20]; hash160.copy_from_slice(address_bytes); let balance = safe_cast::i64_to_u64("platform_addresses.balance", balance)?; - let nonce = u32::try_from(nonce).map_err(|_| WalletStorageError::IntegerOverflow { - field: "platform_addresses.nonce", - value: nonce as u64, - target: safe_cast::SafeCastTarget::U64, - })?; - let account_index = - u32::try_from(account_index).map_err(|_| WalletStorageError::IntegerOverflow { - field: "platform_addresses.account_index", - value: account_index as u64, - target: safe_cast::SafeCastTarget::U64, - })?; - let address_index = - u32::try_from(address_index).map_err(|_| WalletStorageError::IntegerOverflow { - field: "platform_addresses.address_index", - value: address_index as u64, - target: safe_cast::SafeCastTarget::U64, - })?; + let nonce = safe_cast::i64_to_u32("platform_addresses.nonce", nonce)?; + let account_index = safe_cast::i64_to_u32("platform_addresses.account_index", account_index)?; + let address_index = safe_cast::i64_to_u32("platform_addresses.address_index", address_index)?; Ok(PlatformAddressRow { account_index, address_index, diff --git a/packages/rs-platform-wallet-storage/src/sqlite/schema/token_balances.rs b/packages/rs-platform-wallet-storage/src/sqlite/schema/token_balances.rs index 8c8d05de68e..25df36e4a4d 100644 --- a/packages/rs-platform-wallet-storage/src/sqlite/schema/token_balances.rs +++ b/packages/rs-platform-wallet-storage/src/sqlite/schema/token_balances.rs @@ -2,15 +2,11 @@ //! //! # Precondition //! -//! Every `identity_id` in the supplied changeset MUST already exist in -//! the `identities` table and belong to the flush's `wallet_id` (or -//! have a NULL `identities.wallet_id` when the scope is the all-zero -//! sentinel). The writer relies on -//! [`super::identities::apply`] for parenting; the FK to -//! `identities(identity_id)` enforces existence but not the wallet -//! match. The precondition check below runs in every build and -//! propagates [`WalletStorageError::WalletIdMismatch`] on a -//! mis-attributed caller. +//! Every `identity_id` MUST already exist in `identities` and belong to the +//! flush's `wallet_id` (or have NULL `wallet_id` for the all-zero sentinel +//! scope). The FK enforces existence; the wallet match is checked here and +//! propagates [`WalletStorageError::WalletIdMismatch`] on a mis-attributed +//! caller. use rusqlite::{params, Transaction}; @@ -20,16 +16,11 @@ use platform_wallet::wallet::platform_wallet::WalletId; use crate::sqlite::error::WalletStorageError; use crate::sqlite::util::safe_cast; -/// `token_balances` is keyed by `(identity_id, token_id)`. The caller -/// supplies a [`WalletId`] for symmetry with sibling writers and to -/// feed the precondition check; it does not feed any column, because -/// cascade flows -/// `wallet_metadata → identities → token_balances` through the -/// nullable `identities.wallet_id` FK. -// -// Orphan-row policy: there is no automatic prune API. Cascade flows -// through `identities`; hosts that delete identities out-of-band must -// prune `token_balances` themselves. +/// Keyed by `(identity_id, token_id)`. `wallet_id` feeds the precondition +/// check only — no column — since cascade flows +/// `wallets → identities → token_balances` via the nullable +/// `identities.wallet_id` FK. No auto-prune: hosts deleting identities +/// out-of-band must prune `token_balances` themselves. pub fn apply( tx: &Transaction<'_>, wallet_id: &WalletId, diff --git a/packages/rs-platform-wallet-storage/src/sqlite/schema/wallet_meta.rs b/packages/rs-platform-wallet-storage/src/sqlite/schema/wallets.rs similarity index 65% rename from packages/rs-platform-wallet-storage/src/sqlite/schema/wallet_meta.rs rename to packages/rs-platform-wallet-storage/src/sqlite/schema/wallets.rs index a76b517ca73..66d3dbaed3c 100644 --- a/packages/rs-platform-wallet-storage/src/sqlite/schema/wallet_meta.rs +++ b/packages/rs-platform-wallet-storage/src/sqlite/schema/wallets.rs @@ -1,4 +1,4 @@ -//! `wallet_metadata` writer + helpers. +//! `wallets` writer + helpers. use rusqlite::{params, Connection, Transaction}; @@ -7,7 +7,7 @@ use platform_wallet::wallet::platform_wallet::WalletId; use crate::sqlite::error::WalletStorageError; -/// Insert / replace a `wallet_metadata` row. +/// Insert / replace a `wallets` row. pub fn upsert( tx: &Transaction<'_>, wallet_id: &WalletId, @@ -15,7 +15,7 @@ pub fn upsert( ) -> Result<(), WalletStorageError> { let network = network_to_str(entry.network); let mut stmt = tx.prepare_cached( - "INSERT INTO wallet_metadata (wallet_id, network, birth_height) \ + "INSERT INTO wallets (wallet_id, network, birth_height) \ VALUES (?1, ?2, ?3) \ ON CONFLICT(wallet_id) DO UPDATE SET network = excluded.network, \ birth_height = excluded.birth_height", @@ -24,16 +24,12 @@ pub fn upsert( Ok(()) } -/// Ensure a `wallet_metadata` parent row exists for the given id. Used -/// by tests that exercise persistence without going through registration. -/// -/// Idempotent — silently a no-op when the row already exists. Defaults -/// `network = "testnet"`, `birth_height = 0` (the same fall-back the -/// SPV scan uses when the chain tip is unknown). +/// Ensure a `wallets` parent row exists for the given id (test helper). +/// Idempotent; defaults `network = "testnet"`, `birth_height = 0`. #[cfg(any(test, feature = "__test-helpers"))] pub fn ensure_exists(conn: &Connection, wallet_id: &WalletId) -> Result<(), WalletStorageError> { conn.execute( - "INSERT OR IGNORE INTO wallet_metadata (wallet_id, network, birth_height) \ + "INSERT OR IGNORE INTO wallets (wallet_id, network, birth_height) \ VALUES (?1, ?2, ?3)", params![wallet_id.as_slice(), "testnet", 0i64], )?; @@ -42,7 +38,7 @@ pub fn ensure_exists(conn: &Connection, wallet_id: &WalletId) -> Result<(), Wall /// All known wallet ids (used by `delete_wallet`, `load`, `inspect`). pub fn list_ids(conn: &Connection) -> Result, WalletStorageError> { - let mut stmt = conn.prepare("SELECT wallet_id FROM wallet_metadata ORDER BY wallet_id")?; + let mut stmt = conn.prepare("SELECT wallet_id FROM wallets ORDER BY wallet_id")?; let rows = stmt.query_map([], |row| row.get::<_, Vec>(0))?; let mut out = Vec::new(); for r in rows { @@ -58,48 +54,36 @@ pub fn list_ids(conn: &Connection) -> Result, WalletStorageError> } /// Lookup `(network, birth_height)` for a wallet, if known. -#[cfg(any(test, feature = "__test-helpers"))] pub fn fetch( conn: &Connection, wallet_id: &WalletId, ) -> Result, WalletStorageError> { let mut stmt = - conn.prepare("SELECT network, birth_height FROM wallet_metadata WHERE wallet_id = ?1")?; + conn.prepare("SELECT network, birth_height FROM wallets WHERE wallet_id = ?1")?; let mut rows = stmt.query(params![wallet_id.as_slice()])?; if let Some(row) = rows.next()? { let network: String = row.get(0)?; let height: i64 = row.get(1)?; - let height = u32::try_from(height).map_err(|_| WalletStorageError::IntegerOverflow { - field: "wallet_metadata.birth_height", - value: height as u64, - target: crate::sqlite::util::safe_cast::SafeCastTarget::U64, - })?; + let height = crate::sqlite::util::safe_cast::i64_to_u32("wallets.birth_height", height)?; Ok(Some((network, height))) } else { Ok(None) } } -/// Delete a wallet_metadata row (native `ON DELETE CASCADE` fires). +/// Delete a wallets row (native `ON DELETE CASCADE` fires). pub fn delete(tx: &Transaction<'_>, wallet_id: &WalletId) -> Result { let n = tx.execute( - "DELETE FROM wallet_metadata WHERE wallet_id = ?1", + "DELETE FROM wallets WHERE wallet_id = ?1", params![wallet_id.as_slice()], )?; Ok(n) } -/// Single source of truth for the `wallet_metadata.network` TEXT-column -/// domain. -/// -/// Mirrors every variant of [`key_wallet::Network`] (writer side: -/// [`network_to_str`]). The migration in `migrations/V001__initial.rs` -/// interpolates this array into a `CHECK (network IN (...))` clause so -/// an unknown label is rejected at insert time rather than landing as -/// silent garbage. The `network_labels_match_enum` unit test below -/// enforces set-equality between this array and the writer's output — -/// drift (a renamed/added variant) becomes a failing test, not a -/// runtime divergence between Rust and SQLite. +/// Source of truth for the `wallets.network` TEXT domain, mirroring +/// [`key_wallet::Network`]. `migrations/V001__initial.rs` interpolates it into +/// a `CHECK (network IN (...))` clause; `network_labels_match_enum` keeps it in +/// sync with [`network_to_str`]. pub(crate) const NETWORK_LABELS: &[&str] = &["mainnet", "testnet", "devnet", "regtest"]; fn network_to_str(net: key_wallet::Network) -> &'static str { @@ -112,7 +96,6 @@ fn network_to_str(net: key_wallet::Network) -> &'static str { } /// Inverse of `network_to_str`. -#[cfg(any(test, feature = "__test-helpers"))] pub fn parse_network(s: &str) -> Option { match s { "mainnet" => Some(key_wallet::Network::Mainnet), @@ -128,13 +111,9 @@ mod tests { use super::*; use std::collections::HashSet; - /// Every [`key_wallet::Network`] variant — kept exhaustive by the - /// `match` arm below, which the compiler's exhaustiveness check - /// turns into a build failure if upstream adds a variant. + /// Every [`key_wallet::Network`] variant; the `match` below fails to + /// compile if upstream adds one, keeping the list in lockstep. fn all_network_variants() -> Vec { - // The match's exhaustiveness fails to compile on a new variant. - // Mapping every existing variant to itself keeps the list and the - // enum in lockstep. let variants = [ key_wallet::Network::Mainnet, key_wallet::Network::Testnet, diff --git a/packages/rs-platform-wallet-storage/src/sqlite/util/safe_cast.rs b/packages/rs-platform-wallet-storage/src/sqlite/util/safe_cast.rs index c02632913b2..1ef1f5823ac 100644 --- a/packages/rs-platform-wallet-storage/src/sqlite/util/safe_cast.rs +++ b/packages/rs-platform-wallet-storage/src/sqlite/util/safe_cast.rs @@ -1,18 +1,10 @@ //! Safe integer conversions for the SQLite `INTEGER` column boundary. //! -//! SQLite's `INTEGER` affinity is `i64`. Rust's wallet types (credits -//! balances, durations cast to milliseconds, monotonic-max heights, -//! token balances) are `u64`. Naively `as i64` casting wraps values -//! ≥ `i64::MAX` to negative numbers and silently sign-extends them -//! back to large `u64` on read. -//! -//! Every cross-boundary cast in the writer / reader paths runs through -//! one of these helpers and produces a typed +//! SQLite's `INTEGER` affinity is `i64`, but wallet types are `u64`; a +//! naive `as i64` wraps values ≥ `i64::MAX` to negatives and sign-extends +//! them back on read. Every durable boundary cast routes through one of +//! these helpers, which return a typed //! [`WalletStorageError::IntegerOverflow`] on out-of-range input. -//! `clippy::cast_possible_wrap` and `cast_sign_loss` warnings stay -//! allowed crate-wide because many in-crate casts are bounded (e.g. -//! `u8` tags, `u32` indices ≤ `i32::MAX`); the contract is that -//! *durable boundary casts* go through this module. use crate::sqlite::error::WalletStorageError; @@ -23,6 +15,8 @@ pub enum SafeCastTarget { I64, #[error("u64")] U64, + #[error("u32")] + U32, } /// Cast `value: u64` to `i64`, surfacing @@ -40,24 +34,62 @@ pub fn u64_to_i64(field: &'static str, value: u64) -> Result Result { u64::try_from(value).map_err(|_| WalletStorageError::IntegerOverflow { field, - // For negative inputs the wrapped representation is what we - // surface — the operator looks at the original bits, not the - // post-cast u64 garbage. + // Surface the original bit pattern, not post-cast garbage. value: value as u64, target: SafeCastTarget::U64, }) } +/// Cast a stored `i64` column to `u32`, surfacing +/// [`WalletStorageError::IntegerOverflow`] when the value is negative or +/// exceeds `u32::MAX`. The single boundary helper for the readers that +/// map `INTEGER` columns (heights, account/address indices, nonces) back +/// to their `u32` Rust types. +/// +/// `field` is a compile-time identifier (e.g. +/// `"core_sync_state.synced_height"`) naming the column so the resulting +/// error is actionable. +pub fn i64_to_u32(field: &'static str, value: i64) -> Result { + u32::try_from(value).map_err(|_| WalletStorageError::IntegerOverflow { + field, + value: value as u64, + target: SafeCastTarget::U32, + }) +} + #[cfg(test)] mod tests { use super::*; + #[test] + fn i64_to_u32_happy_path() { + assert_eq!(i64_to_u32("x", 0).unwrap(), 0); + assert_eq!(i64_to_u32("x", u32::MAX as i64).unwrap(), u32::MAX); + } + + #[test] + fn i64_to_u32_overflow_high_and_negative() { + assert!(matches!( + i64_to_u32("h", i64::from(u32::MAX) + 1).unwrap_err(), + WalletStorageError::IntegerOverflow { + target: SafeCastTarget::U32, + .. + } + )); + assert!(matches!( + i64_to_u32("h", -1).unwrap_err(), + WalletStorageError::IntegerOverflow { + target: SafeCastTarget::U32, + .. + } + )); + } + #[test] fn u64_to_i64_happy_path() { assert_eq!(u64_to_i64("x", 0).unwrap(), 0); diff --git a/packages/rs-platform-wallet-storage/tests/common/mod.rs b/packages/rs-platform-wallet-storage/tests/common/mod.rs index cc0f4d1d70d..6f58c0c81df 100644 --- a/packages/rs-platform-wallet-storage/tests/common/mod.rs +++ b/packages/rs-platform-wallet-storage/tests/common/mod.rs @@ -41,18 +41,18 @@ pub fn ro_conn(path: &std::path::Path) -> Connection { .expect("open ro conn") } -/// Insert a stub `wallet_metadata` row so child writes pass the native +/// Insert a stub `wallets` row so child writes pass the native /// FK. Bypasses the buffer/flush layer — tests use this when they /// want to exercise a single sub-changeset writer in isolation. pub fn ensure_wallet_meta(persister: &SqlitePersister, wallet_id: &WalletId) { use rusqlite::params; let conn = persister.lock_conn_for_test(); conn.execute( - "INSERT OR IGNORE INTO wallet_metadata (wallet_id, network, birth_height) \ + "INSERT OR IGNORE INTO wallets (wallet_id, network, birth_height) \ VALUES (?1, 'testnet', 0)", params![wallet_id.as_slice()], ) - .expect("ensure wallet_metadata"); + .expect("ensure wallets"); } /// Insert a stub `identities` row so identity-owned table writes @@ -66,16 +66,14 @@ pub fn ensure_identity( identity_id: &[u8; 32], parent_wallet_id: Option<&WalletId>, ) { - use rusqlite::params; let conn = persister.lock_conn_for_test(); - let wid_param: Option<&[u8]> = parent_wallet_id.map(|w| w.as_slice()); - conn.execute( - "INSERT OR IGNORE INTO identities \ - (identity_id, wallet_id, wallet_index, entry_blob, tombstoned) \ - VALUES (?1, ?2, NULL, X'00', 0)", - params![&identity_id[..], wid_param], - ) - .expect("ensure identity"); + // Delegate to the production stub writer so `entry_blob` holds a + // real, decodable `IdentityEntry` (the wired `load()` decodes every + // identity row). The all-zero sentinel WalletId maps to a NULL + // `wallet_id` column, so `None` lands as an orphan identity. + let scope: WalletId = parent_wallet_id.copied().unwrap_or([0u8; 32]); + platform_wallet_storage::sqlite::schema::identities::ensure_exists(&conn, &scope, identity_id) + .expect("ensure identity"); } /// Insert a stub `token_balances` row so `meta_token` writes pass the @@ -100,7 +98,7 @@ pub fn ensure_token_balance( /// Insert a stub `established` row in the unified `contacts` table so /// the `cascade_meta_contact_on_contact_delete` trigger has an /// established-contact parent to fire on for `meta_contact` writes keyed -/// by `(wallet_id, owner_id, contact_id)`. The parent `wallet_metadata` +/// by `(wallet_id, owner_id, contact_id)`. The parent `wallets` /// row must already exist (seed via [`ensure_wallet_meta`]). pub fn ensure_contact_established( persister: &SqlitePersister, @@ -121,7 +119,7 @@ pub fn ensure_contact_established( /// Insert a stub `sent` contact row (pending outgoing request) so a /// `meta_contact` write keyed by `(wallet_id, owner_id, contact_id)` has -/// a non-established parent to exercise. The parent `wallet_metadata` +/// a non-established parent to exercise. The parent `wallets` /// row must already exist. pub fn ensure_contact_sent( persister: &SqlitePersister, @@ -162,7 +160,7 @@ pub fn ensure_contact_received( /// Insert a stub `platform_addresses` row so `meta_platform_address` /// writes pass the composite FK to /// `platform_addresses(wallet_id, address)`. The parent -/// `wallet_metadata` row must already exist (seed via +/// `wallets` row must already exist (seed via /// [`ensure_wallet_meta`]). `address` is an opaque BLOB. pub fn ensure_platform_address(persister: &SqlitePersister, wallet_id: &WalletId, address: &[u8]) { use rusqlite::params; diff --git a/packages/rs-platform-wallet-storage/tests/marvin_gate_in_band_ordering.rs b/packages/rs-platform-wallet-storage/tests/marvin_gate_in_band_ordering.rs new file mode 100644 index 00000000000..d604c48aa87 --- /dev/null +++ b/packages/rs-platform-wallet-storage/tests/marvin_gate_in_band_ordering.rs @@ -0,0 +1,121 @@ +#![allow(clippy::field_reassign_with_default)] + +//! Genesis-rescan regression for the hardcoded `account_index = 0` design +//! (PR #3828). +//! +//! Before: a UTXO landing on a freshly-derived gap-limit-edge address could +//! race address-derivation persistence and be mis-attributed or dropped, so +//! the bridge smuggled a full pool snapshot in-band to resolve it. Now UTXO +//! attribution is hardcoded to the default account (index 0) at the storage +//! writer — no in-band snapshot, no address→account lookup table. This test +//! pins that a UTXO on a real gap-limit-edge address persists directly with +//! `account_index == 0`, contributes the exact balance, and never aborts +//! the flush. + +mod common; + +use common::{ensure_wallet_meta, fresh_persister, wid}; + +use key_wallet::wallet::initialization::WalletAccountCreationOptions; +use key_wallet::wallet::managed_wallet_info::ManagedWalletInfo; +use key_wallet::wallet::Wallet; +use key_wallet::AddressInfo; +use platform_wallet::changeset::{ + CoreChangeSet, PlatformWalletChangeSet, PlatformWalletPersistence, +}; +use platform_wallet::wallet::platform_wallet::WalletId; +use platform_wallet_storage::sqlite::schema::core_state; + +/// The LAST address in the wallet's Standard BIP44 external pool — the +/// gap-limit-edge address, the one most likely to be a fresh extension and +/// thus the worst case for the retired attribution race. +fn gap_limit_edge_address(seed_byte: u8) -> AddressInfo { + use key_wallet::account::AccountType; + use key_wallet::managed_account::address_pool::AddressPoolType; + + let wallet = Wallet::from_seed_bytes( + [seed_byte; 64], + key_wallet::Network::Testnet, + WalletAccountCreationOptions::Default, + ) + .unwrap(); + let info = ManagedWalletInfo::from_wallet(&wallet, 0); + + for managed in info.all_managed_accounts() { + let account_type = managed.managed_account_type().to_account_type(); + if !matches!(account_type, AccountType::Standard { index: 0, .. }) { + continue; + } + for pool in managed.managed_account_type().address_pools() { + if pool.pool_type != AddressPoolType::External || pool.addresses.is_empty() { + continue; + } + let infos: Vec = pool.addresses.values().cloned().collect(); + return infos.last().cloned().unwrap(); + } + } + panic!("wallet must expose a non-empty Standard BIP44 external pool"); +} + +fn utxo_at(addr: &dashcore::Address, vout: u32, value: u64) -> key_wallet::Utxo { + use dashcore::hashes::Hash; + key_wallet::Utxo { + outpoint: dashcore::OutPoint { + txid: dashcore::Txid::from_byte_array([0x7E; 32]), + vout, + }, + txout: dashcore::TxOut { + value, + script_pubkey: addr.script_pubkey(), + }, + address: addr.clone(), + height: 7, + is_coinbase: false, + is_confirmed: true, + is_instantlocked: false, + is_locked: false, + is_trusted: false, + } +} + +/// A UTXO on a freshly-derived gap-limit-edge address persists directly with +/// the hardcoded `account_index == 0`: no snapshot, no lookup, no flush +/// abort, and the unspent balance is exact. +#[test] +fn utxo_on_fresh_gap_limit_address_persists_under_account_zero() { + let (persister, _tmp, _path) = fresh_persister(); + let w: WalletId = wid(0xD1); + ensure_wallet_meta(&persister, &w); + + let edge = gap_limit_edge_address(0x55); + let addr = edge.address.clone(); + + persister + .store( + w, + PlatformWalletChangeSet { + core: Some(CoreChangeSet { + new_utxos: vec![utxo_at(&addr, 0, 777_000)], + ..Default::default() + }), + ..Default::default() + }, + ) + .expect("a UTXO on a fresh gap-limit address must persist, not abort"); + + let conn = persister.lock_conn_for_test(); + let by_account = core_state::list_unspent_utxos(&conn, &w).unwrap(); + + let total: usize = by_account.values().map(|v| v.len()).sum(); + assert_eq!(total, 1, "the edge-address UTXO must persist"); + + let rows = by_account + .get(&0) + .expect("UTXO must be attributed to the default account (index 0)"); + assert_eq!( + rows.len(), + 1, + "exactly the one edge-address UTXO under account 0" + ); + assert_eq!(rows[0].value, 777_000, "value preserved"); +} diff --git a/packages/rs-platform-wallet-storage/tests/secrets_api.rs b/packages/rs-platform-wallet-storage/tests/secrets_api.rs index aab8ab8d57c..aa9800ec260 100644 --- a/packages/rs-platform-wallet-storage/tests/secrets_api.rs +++ b/packages/rs-platform-wallet-storage/tests/secrets_api.rs @@ -17,6 +17,13 @@ use platform_wallet_storage::secrets::{ }; fn vault_path(dir: &Path) -> PathBuf { + // `open` refuses a group/other-writable parent dir; a umask-0002 + // tempdir lands at 0o775, so tighten it to 0o700 first. + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + let _ = std::fs::set_permissions(dir, std::fs::Permissions::from_mode(0o700)); + } dir.join("vault.pwsvault") } @@ -169,3 +176,75 @@ fn wrapper_debug_is_redacted() { let s = SecretString::new("PLAINTEXTNEEDLE"); assert!(!format!("{s:?}").contains("PLAINTEXT")); } + +/// SECRETS.md (the on-disk vault is "explicitly attacker-controllable", +/// defenses must "fail closed", error doc: "malformed vault file ... +/// truncated header"). A garbage / truncated / empty / non-UTF-8 vault +/// fed through the FULL `EncryptedFileStore::open` integration path +/// (`read_vault_at` -> `Vec::with_capacity(len)` -> `format::deserialize`) +/// must surface a clean typed `MalformedVault` and NEVER panic. The +/// `format.rs` unit tests exercise `deserialize` in isolation; they do +/// not prove the file-open seam (perms check, size cap, allocation, +/// take()) is wired to the same clean-error outcome. +#[cfg(unix)] +#[test] +fn garbage_vault_file_fails_closed_at_open_no_panic() { + use std::fs; + use std::os::unix::fs::PermissionsExt; + + let cases: &[(&str, &[u8])] = &[ + ("empty", b""), + ("ascii-garbage", b"this is not a vault at all"), + ("truncated-json", b"{\"version\":1,\"kdf\":{\"id\":1,"), + ("non-utf8", &[0xff, 0xfe, 0x00, 0x01, 0x80, 0x80]), + ("json-but-not-a-vault", b"{\"hello\":\"world\"}"), + ]; + for (name, bytes) in cases { + let dir = tempfile::tempdir().unwrap(); + let path = vault_path(dir.path()); + fs::write(&path, bytes).unwrap(); + // Match the resident-vault perm precondition so the failure is + // attributable to parsing, not to the (separately tested) perm + // refusal. + fs::set_permissions(&path, fs::Permissions::from_mode(0o600)).unwrap(); + + let err = EncryptedFileStore::open(&path, SecretString::new("pw-correct")) + .expect_err("garbage vault must fail to open"); + assert!( + matches!(err, SecretStoreError::MalformedVault), + "case `{name}`: expected MalformedVault, got {err:?}" + ); + // The clean error must not echo the offending input bytes. + let rendered = format!("{err}"); + assert!( + !rendered.contains("not a vault") && !rendered.contains("hello"), + "case `{name}`: error leaked input bytes: {rendered}" + ); + } +} + +/// SECRETS.md: an unknown/rolled-forward `format_version` is refused +/// fail-closed through the file-open seam, distinct from a malformed +/// body. The format.rs unit test proves the parser; this proves the +/// `open()` path preserves the distinction end to end. +#[cfg(unix)] +#[test] +fn unknown_version_vault_is_refused_at_open() { + use std::fs; + use std::os::unix::fs::PermissionsExt; + + let dir = tempfile::tempdir().unwrap(); + let path = vault_path(dir.path()); + // A structurally JSON document whose `version` is in the future: + // the lax probe reads it, then the version gate rejects it before + // any KDF/AEAD work. + fs::write(&path, br#"{"version":999,"extra":"tolerated-by-probe"}"#).unwrap(); + fs::set_permissions(&path, fs::Permissions::from_mode(0o600)).unwrap(); + + let err = EncryptedFileStore::open(&path, SecretString::new("pw-correct")) + .expect_err("unknown version must fail to open"); + assert!( + matches!(err, SecretStoreError::VersionUnsupported { found: 999 }), + "expected VersionUnsupported{{999}}, got {err:?}" + ); +} diff --git a/packages/rs-platform-wallet-storage/tests/secrets_default_on_compiles.rs b/packages/rs-platform-wallet-storage/tests/secrets_default_on_compiles.rs index 23cc10d582e..1f45e2ceaeb 100644 --- a/packages/rs-platform-wallet-storage/tests/secrets_default_on_compiles.rs +++ b/packages/rs-platform-wallet-storage/tests/secrets_default_on_compiles.rs @@ -9,7 +9,7 @@ use platform_wallet_storage::secrets::{ default_credential_store, EncryptedFileStore, SecretBytes, SecretStoreError, SecretString, - WalletId, SERVICE_PREFIX, + WalletId, MAX_PLAINTEXT_LEN, MIN_PASSPHRASE_LEN, SERVICE_PREFIX, }; #[test] @@ -23,6 +23,9 @@ fn default_build_exposes_secrets_surface() { } let _ = _accepts_path as fn(_, _) -> _; let _ = SERVICE_PREFIX.len(); + // The Tier-2 public consts are re-exported on the default build. + let _ = MAX_PLAINTEXT_LEN; + let _ = MIN_PASSPHRASE_LEN; let _ = std::mem::size_of::(); let _ = std::mem::size_of::(); let _ = std::mem::size_of::(); diff --git a/packages/rs-platform-wallet-storage/tests/secrets_scan.rs b/packages/rs-platform-wallet-storage/tests/secrets_scan.rs index 68e0cf48d01..9f1111f2899 100644 --- a/packages/rs-platform-wallet-storage/tests/secrets_scan.rs +++ b/packages/rs-platform-wallet-storage/tests/secrets_scan.rs @@ -10,17 +10,13 @@ //! `mnemonic`, `seed`, `xpriv`, or `secret` breaks the test, forcing //! the author to rename or add an allow-list entry with rationale. //! -//! Out of scope by design: files in `src/sqlite/` outside of -//! `schema/` (`persister.rs`, `backup.rs`, `buffer.rs`, `config.rs`, -//! `error.rs`, `migrations.rs`, `util/`) are NOT scanned. They never -//! define database columns and may legitimately reference the -//! forbidden tokens in doc comments. The future `src/secrets/` -//! submodule slot is exempt for the same reason. -//! -//! The check is intentionally string-level: it does not parse SQL or -//! Rust. A column literally named `private_X` is the kind of mistake -//! we want to catch; legitimate uses inside doc comments are -//! allow-listed via the `ALLOWLIST` constant below. +//! Scope and blind spots: this is a column/comment NAMING scan, not a +//! value-content scan — it cannot see the bytes a serialized value +//! carries. Value-level safety is a separate guarantee via the sealed +//! `PersistableBlob` trait in `src/sqlite/schema/blob.rs`. Files outside +//! `schema/` define no columns and are not scanned; `src/secrets/` is +//! exempt by design and covered by its own `tests/secrets_guard.rs`. +//! Legitimate uses inside doc comments are allow-listed via `ALLOWLIST`. use std::path::Path; diff --git a/packages/rs-platform-wallet-storage/tests/sqlite_accounts_reader.rs b/packages/rs-platform-wallet-storage/tests/sqlite_accounts_reader.rs new file mode 100644 index 00000000000..5839b6e0975 --- /dev/null +++ b/packages/rs-platform-wallet-storage/tests/sqlite_accounts_reader.rs @@ -0,0 +1,125 @@ +#![allow(clippy::field_reassign_with_default)] + +//! `schema::accounts::load_state` reads `account_registrations` rows back +//! into a keyless [`AccountRegistrationEntry`] manifest, bit-exact, +//! fail-hard on a corrupt blob, and never mints a `Wallet`. + +mod common; + +use common::{ensure_wallet_meta, fresh_persister, wid}; +use key_wallet::account::AccountType; +use platform_wallet::changeset::{AccountRegistrationEntry, PlatformWalletChangeSet}; +use platform_wallet_storage::sqlite::schema::accounts; +use platform_wallet_storage::WalletStorageError; + +fn xpub() -> key_wallet::bip32::ExtendedPubKey { + use key_wallet::wallet::initialization::WalletAccountCreationOptions; + use key_wallet::wallet::Wallet; + let w = Wallet::from_seed_bytes( + [7u8; 64], + key_wallet::Network::Testnet, + WalletAccountCreationOptions::Default, + ) + .expect("wallet"); + w.accounts + .all_accounts() + .first() + .expect("at least one account") + .account_xpub +} + +fn reopen(path: &std::path::Path) -> platform_wallet_storage::SqlitePersister { + platform_wallet_storage::SqlitePersister::open( + platform_wallet_storage::SqlitePersisterConfig::new(path), + ) + .expect("reopen persister") +} + +/// Registrations round-trip bit-exact, in stable order. +#[test] +fn a1_account_registrations_roundtrip() { + let (persister, _tmp, path) = fresh_persister(); + use platform_wallet::changeset::PlatformWalletPersistence; + let w = wid(0xA1); + ensure_wallet_meta(&persister, &w); + + let entries = vec![ + AccountRegistrationEntry { + account_type: AccountType::Standard { + index: 0, + standard_account_type: key_wallet::account::StandardAccountType::BIP44Account, + }, + account_xpub: xpub(), + }, + AccountRegistrationEntry { + account_type: AccountType::IdentityRegistration, + account_xpub: xpub(), + }, + ]; + let cs = PlatformWalletChangeSet { + account_registrations: entries.clone(), + ..Default::default() + }; + persister.store(w, cs).unwrap(); + drop(persister); + + let p2 = reopen(&path); + let conn = p2.lock_conn_for_test(); + let manifest = accounts::load_state(&conn, &w).expect("load_state"); + drop(conn); + + assert_eq!(manifest.len(), 2, "all rows must be returned"); + // Bit-exact xpub round-trip. + for e in &manifest { + assert_eq!(e.account_xpub, xpub()); + } + let has_standard = manifest + .iter() + .any(|e| matches!(e.account_type, AccountType::Standard { index: 0, .. })); + let has_idreg = manifest + .iter() + .any(|e| matches!(e.account_type, AccountType::IdentityRegistration)); + assert!(has_standard && has_idreg); +} + +/// An empty wallet yields an empty manifest, not an error. +#[test] +fn a1_empty_manifest_is_ok() { + let (persister, _tmp, path) = fresh_persister(); + let w = wid(0xA2); + ensure_wallet_meta(&persister, &w); + drop(persister); + let p2 = reopen(&path); + let conn = p2.lock_conn_for_test(); + let manifest = accounts::load_state(&conn, &w).expect("load_state"); + drop(conn); + assert!(manifest.is_empty()); +} + +/// A corrupt `account_xpub_bytes` blob is a typed hard error, never a +/// silent skip. +#[test] +fn a1_corrupt_blob_is_hard_error() { + let (persister, _tmp, path) = fresh_persister(); + let w = wid(0xA3); + ensure_wallet_meta(&persister, &w); + { + let conn = persister.lock_conn_for_test(); + conn.execute( + "INSERT INTO account_registrations \ + (wallet_id, account_type, account_index, account_xpub_bytes) \ + VALUES (?1, 'standard_bip44', 0, X'00')", + rusqlite::params![w.as_slice()], + ) + .unwrap(); + } + drop(persister); + let p2 = reopen(&path); + let conn = p2.lock_conn_for_test(); + let result = accounts::load_state(&conn, &w); + drop(conn); + assert!( + matches!(result, Err(WalletStorageError::BincodeDecode { .. })), + "corrupt account_xpub_bytes must be a typed BincodeDecode; got {result:?}" + ); +} diff --git a/packages/rs-platform-wallet-storage/tests/sqlite_asset_locks_filter.rs b/packages/rs-platform-wallet-storage/tests/sqlite_asset_locks_filter.rs new file mode 100644 index 00000000000..f187fccdfbe --- /dev/null +++ b/packages/rs-platform-wallet-storage/tests/sqlite_asset_locks_filter.rs @@ -0,0 +1,143 @@ +#![allow(clippy::field_reassign_with_default)] + +//! The status-filtered asset-lock reader excludes terminal `Consumed` +//! rows so a spent one-shot lock never resurrects as actionable on +//! rehydration, while the historical row stays on disk. + +mod common; + +use common::{ensure_wallet_meta, fresh_persister, wid}; +use dashcore::hashes::Hash; +use dashcore::{OutPoint, Transaction, Txid}; +use key_wallet::wallet::managed_wallet_info::asset_lock_builder::AssetLockFundingType; +use platform_wallet::changeset::{AssetLockChangeSet, AssetLockEntry, PlatformWalletPersistence}; +use platform_wallet::wallet::asset_lock::tracked::AssetLockStatus; +use platform_wallet_storage::sqlite::schema::asset_locks; + +fn reopen(path: &std::path::Path) -> platform_wallet_storage::SqlitePersister { + platform_wallet_storage::SqlitePersister::open( + platform_wallet_storage::SqlitePersisterConfig::new(path), + ) + .expect("reopen") +} + +fn entry(op: OutPoint, status: AssetLockStatus) -> AssetLockEntry { + AssetLockEntry { + out_point: op, + transaction: Transaction { + version: 3, + lock_time: 0, + input: vec![], + output: vec![], + special_transaction_payload: None, + }, + account_index: 0, + funding_type: AssetLockFundingType::IdentityTopUp, + identity_index: 0, + amount_duffs: 1000, + status, + proof: None, + } +} + +fn op(b: u8) -> OutPoint { + OutPoint { + txid: Txid::from_byte_array([b; 32]), + vout: 0, + } +} + +/// Store a mix including one terminal `Consumed`. After reopen: the +/// `Consumed` row is still on disk, is absent from the filtered +/// rehydration feed, and non-terminal rows survive. +#[test] +fn rt4_consumed_excluded_from_rehydration_feed() { + let (persister, _tmp, path) = fresh_persister(); + let w = wid(0xA4); + ensure_wallet_meta(&persister, &w); + + let op_built = op(0x10); + let op_cl = op(0x11); + let op_consumed = op(0x12); + let mut cs = AssetLockChangeSet::default(); + cs.asset_locks + .insert(op_built, entry(op_built, AssetLockStatus::Built)); + cs.asset_locks + .insert(op_cl, entry(op_cl, AssetLockStatus::ChainLocked)); + cs.asset_locks + .insert(op_consumed, entry(op_consumed, AssetLockStatus::Consumed)); + persister + .store( + w, + platform_wallet::changeset::PlatformWalletChangeSet { + asset_locks: Some(cs), + ..Default::default() + }, + ) + .unwrap(); + drop(persister); + + let p2 = reopen(&path); + let conn = p2.lock_conn_for_test(); + + // (a) the Consumed row is still physically on disk. + let consumed_rows: i64 = conn + .query_row( + "SELECT COUNT(*) FROM asset_locks WHERE wallet_id = ?1 AND status = 'consumed'", + rusqlite::params![w.as_slice()], + |r| r.get(0), + ) + .unwrap(); + assert_eq!(consumed_rows, 1, "Consumed row must persist on disk"); + + // Unfiltered reader still returns the Consumed entry... + let unfiltered = asset_locks::load_state(&conn, &w).unwrap(); + let all_ops: Vec<_> = unfiltered + .values() + .flat_map(|m| m.keys().copied()) + .collect(); + assert!( + all_ops.contains(&op_consumed), + "unfiltered load_state must still see Consumed (historical)" + ); + + // (b)+(c) the filtered rehydration feed excludes Consumed, keeps + // the rest. + let feed = asset_locks::load_unconsumed(&conn, &w).unwrap(); + drop(conn); + let feed_ops: Vec<_> = feed.values().flat_map(|m| m.keys().copied()).collect(); + assert!( + !feed_ops.contains(&op_consumed), + "Consumed must NOT resurrect in the rehydration feed" + ); + assert!(feed_ops.contains(&op_built), "Built must survive"); + assert!(feed_ops.contains(&op_cl), "ChainLocked must survive"); + assert_eq!(feed_ops.len(), 2); +} + +/// An all-consumed wallet yields an empty rehydration feed, no error. +#[test] +fn a2_all_consumed_yields_empty_feed() { + let (persister, _tmp, path) = fresh_persister(); + let w = wid(0xA5); + ensure_wallet_meta(&persister, &w); + let o = op(0x20); + let mut cs = AssetLockChangeSet::default(); + cs.asset_locks + .insert(o, entry(o, AssetLockStatus::Consumed)); + persister + .store( + w, + platform_wallet::changeset::PlatformWalletChangeSet { + asset_locks: Some(cs), + ..Default::default() + }, + ) + .unwrap(); + drop(persister); + let p2 = reopen(&path); + let conn = p2.lock_conn_for_test(); + let feed = asset_locks::load_unconsumed(&conn, &w).unwrap(); + drop(conn); + assert!(feed.is_empty(), "all-consumed wallet → empty feed"); +} diff --git a/packages/rs-platform-wallet-storage/tests/sqlite_auto_backup.rs b/packages/rs-platform-wallet-storage/tests/sqlite_auto_backup.rs index 085169cd2d4..0597725c44a 100644 --- a/packages/rs-platform-wallet-storage/tests/sqlite_auto_backup.rs +++ b/packages/rs-platform-wallet-storage/tests/sqlite_auto_backup.rs @@ -70,7 +70,7 @@ fn tc052_delete_wallet_auto_backup_disabled() { let conn = persister.lock_conn_for_test(); let n: i64 = conn .query_row( - "SELECT COUNT(*) FROM wallet_metadata WHERE wallet_id = ?1", + "SELECT COUNT(*) FROM wallets WHERE wallet_id = ?1", rusqlite::params![w.as_slice()], |row| row.get(0), ) @@ -106,7 +106,7 @@ fn tc054_unwritable_auto_backup_dir() { let conn = persister.lock_conn_for_test(); let n: i64 = conn .query_row( - "SELECT COUNT(*) FROM wallet_metadata WHERE wallet_id = ?1", + "SELECT COUNT(*) FROM wallets WHERE wallet_id = ?1", rusqlite::params![w.as_slice()], |row| row.get(0), ) @@ -144,3 +144,82 @@ fn tc055_auto_backups_subject_to_retention() { assert_eq!(report.kept, 2); assert_eq!(report.removed.len(), 3); } + +/// Prune orders by the EMBEDDED filename timestamp, not mtime (proven by +/// giving older files newer mtimes). With `keep_last_n = 1` it evicts even +/// a pre-delete safety backup when that backup is not the newest by +/// embedded timestamp: the auto dir is not a protected vault, so operators +/// must size retention above the rollback horizon they care about. +#[test] +fn tc056_aggressive_prune_evicts_safety_backup_and_orders_by_embedded_ts() { + let (persister, _tmp, _path) = fresh_persister(); + let dir = persister.config_for_test().auto_backup_dir.clone().unwrap(); + std::fs::create_dir_all(&dir).unwrap(); + + let stamp = |hours_ago: i64| { + chrono::Utc::now() + .checked_sub_signed(chrono::Duration::hours(hours_ago)) + .unwrap() + .format("%Y%m%dT%H%M%SZ") + .to_string() + }; + + // Newest by embedded timestamp: a manual backup taken AFTER the + // delete. The pre-delete safety backup is older by embedded ts. + let manual = dir.join(format!("wallet-{}.db", stamp(0))); + let safety = dir.join(format!( + "pre-delete-{}-{}.db", + hex::encode([0x11u8; 32]), + stamp(1) + )); + let old_manual = dir.join(format!("wallet-{}.db", stamp(48))); + std::fs::write(&manual, b"m").unwrap(); + std::fs::write(&safety, b"s").unwrap(); + std::fs::write(&old_manual, b"o").unwrap(); + + // Invert mtime vs embedded order: give the OLDEST-by-embedded-ts + // file the NEWEST mtime. If prune (wrongly) sorted by mtime, it + // would keep `old_manual`; sorting by the embedded token keeps + // `manual`. This deterministically exercises the embedded-timestamp + // path rather than the mtime fallback. + let now = std::time::SystemTime::now(); + let hour = std::time::Duration::from_secs(3600); + filetime::set_file_mtime(&old_manual, filetime::FileTime::from_system_time(now)).unwrap(); + filetime::set_file_mtime(&safety, filetime::FileTime::from_system_time(now - hour)).unwrap(); + filetime::set_file_mtime( + &manual, + filetime::FileTime::from_system_time(now - hour * 2), + ) + .unwrap(); + + let report = persister + .prune_backups( + &dir, + platform_wallet_storage::RetentionPolicy { + keep_last_n: Some(1), + max_age: None, + }, + ) + .unwrap(); + + assert_eq!(report.kept, 1, "keep_last_n = 1 keeps exactly one file"); + assert_eq!(report.removed.len(), 2); + // Embedded-ts ordering kept the newest-by-token file (`manual`), + // NOT the newest-by-mtime file (`old_manual`). + assert!( + manual.exists(), + "newest-by-embedded-timestamp file must survive keep_last_n = 1" + ); + assert!( + !old_manual.exists(), + "an old file with a fresh mtime must NOT be treated as newest" + ); + // The safety backup is NOT special-cased: aggressive retention + // evicts it. Operators must size retention above the rollback + // horizon they care about. + assert!( + !safety.exists(), + "pre-delete safety backup is evicted by keep_last_n = 1 when not newest \ + (auto dir is not a protected vault)" + ); +} diff --git a/packages/rs-platform-wallet-storage/tests/sqlite_check_constraints.rs b/packages/rs-platform-wallet-storage/tests/sqlite_check_constraints.rs index 129e76bfdf7..68eadff1d13 100644 --- a/packages/rs-platform-wallet-storage/tests/sqlite_check_constraints.rs +++ b/packages/rs-platform-wallet-storage/tests/sqlite_check_constraints.rs @@ -1,8 +1,7 @@ -//! Smoke tests for the enum-domain `CHECK` constraints on the five -//! enum-shaped TEXT columns (`wallet_metadata.network`, -//! `account_registrations.account_type`, -//! `account_address_pools.account_type`/`pool_type`, -//! `core_derived_addresses.account_type`, and `asset_locks.status`). +//! Smoke tests for the enum-domain `CHECK` constraints. The schema has +//! four such TEXT columns across four domains: `wallets.network`, +//! `account_registrations.account_type`, `asset_locks.status`, and the +//! synthetic `contacts.state`. These tests exercise each directly. //! //! The per-module parity unit tests in `src/sqlite/schema/*` cover the //! Rust↔const-array equality. These tests cover the runtime half: a @@ -43,10 +42,10 @@ fn check_rejects_bad_network_label() { let (persister, _tmp, _path) = fresh_persister(); let conn = persister.lock_conn_for_test(); let res = conn.execute( - "INSERT INTO wallet_metadata (wallet_id, network, birth_height) VALUES (?1, ?2, ?3)", + "INSERT INTO wallets (wallet_id, network, birth_height) VALUES (?1, ?2, ?3)", params![wid(1).as_slice(), "not-a-network", 0i64], ); - assert_constraint_check(res, "wallet_metadata.network"); + assert_constraint_check(res, "wallets.network"); } #[test] @@ -55,10 +54,10 @@ fn check_rejects_bad_account_type_on_registrations() { let conn = persister.lock_conn_for_test(); // First seed a valid parent row so we don't trip the FK. conn.execute( - "INSERT INTO wallet_metadata (wallet_id, network, birth_height) VALUES (?1, ?2, ?3)", + "INSERT INTO wallets (wallet_id, network, birth_height) VALUES (?1, ?2, ?3)", params![wid(2).as_slice(), "testnet", 0i64], ) - .expect("seed wallet_metadata"); + .expect("seed wallets"); let res = conn.execute( "INSERT INTO account_registrations \ (wallet_id, account_type, account_index, account_xpub_bytes) \ @@ -68,39 +67,15 @@ fn check_rejects_bad_account_type_on_registrations() { assert_constraint_check(res, "account_registrations.account_type"); } -#[test] -fn check_rejects_bad_pool_type() { - let (persister, _tmp, _path) = fresh_persister(); - let conn = persister.lock_conn_for_test(); - conn.execute( - "INSERT INTO wallet_metadata (wallet_id, network, birth_height) VALUES (?1, ?2, ?3)", - params![wid(3).as_slice(), "testnet", 0i64], - ) - .expect("seed wallet_metadata"); - let res = conn.execute( - "INSERT INTO account_address_pools \ - (wallet_id, account_type, account_index, pool_type, snapshot_blob) \ - VALUES (?1, ?2, ?3, ?4, ?5)", - params![ - wid(3).as_slice(), - "standard", - 0i64, - "not_a_pool", - &[0u8; 4][..] - ], - ); - assert_constraint_check(res, "account_address_pools.pool_type"); -} - #[test] fn check_rejects_bad_asset_lock_status() { let (persister, _tmp, _path) = fresh_persister(); let conn = persister.lock_conn_for_test(); conn.execute( - "INSERT INTO wallet_metadata (wallet_id, network, birth_height) VALUES (?1, ?2, ?3)", + "INSERT INTO wallets (wallet_id, network, birth_height) VALUES (?1, ?2, ?3)", params![wid(4).as_slice(), "testnet", 0i64], ) - .expect("seed wallet_metadata"); + .expect("seed wallets"); let res = conn.execute( "INSERT INTO asset_locks \ (wallet_id, outpoint, status, account_index, identity_index, amount_duffs, lifecycle_blob) \ @@ -128,9 +103,64 @@ fn check_accepts_every_known_label_network() { { let wid_bytes = [i as u8 + 10; 32]; conn.execute( - "INSERT INTO wallet_metadata (wallet_id, network, birth_height) VALUES (?1, ?2, ?3)", + "INSERT INTO wallets (wallet_id, network, birth_height) VALUES (?1, ?2, ?3)", params![wid_bytes.as_slice(), *label, 0i64], ) .unwrap_or_else(|e| panic!("network={label} should be accepted: {e}")); } } + +#[test] +fn check_rejects_bad_contact_state() { + let (persister, _tmp, _path) = fresh_persister(); + let conn = persister.lock_conn_for_test(); + // Seed a valid parent wallet so the insert trips the state CHECK, not the FK. + conn.execute( + "INSERT INTO wallets (wallet_id, network, birth_height) VALUES (?1, ?2, ?3)", + params![wid(7).as_slice(), "testnet", 0i64], + ) + .expect("seed wallets"); + let res = conn.execute( + "INSERT INTO contacts (wallet_id, owner_id, contact_id, state) \ + VALUES (?1, ?2, ?3, ?4)", + params![ + wid(7).as_slice(), + &[0xAAu8; 32][..], + &[0xBBu8; 32][..], + "not_a_contact_state" + ], + ); + assert_constraint_check(res, "contacts.state"); +} + +#[test] +fn check_accepts_every_known_contact_state_label() { + let (persister, _tmp, _path) = fresh_persister(); + let conn = persister.lock_conn_for_test(); + conn.execute( + "INSERT INTO wallets (wallet_id, network, birth_height) VALUES (?1, ?2, ?3)", + params![wid(8).as_slice(), "testnet", 0i64], + ) + .expect("seed wallets"); + // Mirrors `sqlite::schema::contacts::CONTACT_STATE_LABELS`; hardcoded + // because that const is `pub(crate)` and unreachable from this separate + // integration-test crate (same constraint as the network test above). + // The per-module `contact_state_labels_match_enum` unit test guards the + // const itself against drift, so a label added there without updating + // this list surfaces in that test, not as a silent gap here. + for (i, label) in ["sent", "received", "established"].iter().enumerate() { + // Same wallet+owner, distinct contact_id per label to keep the + // composite PK (wallet_id, owner_id, contact_id) unique. + conn.execute( + "INSERT INTO contacts (wallet_id, owner_id, contact_id, state) \ + VALUES (?1, ?2, ?3, ?4)", + params![ + wid(8).as_slice(), + &[0xC0u8; 32][..], + &[i as u8; 32][..], + *label + ], + ) + .unwrap_or_else(|e| panic!("contact state={label} should be accepted: {e}")); + } +} diff --git a/packages/rs-platform-wallet-storage/tests/sqlite_commit_writes_lock_poison_shortcircuit.rs b/packages/rs-platform-wallet-storage/tests/sqlite_commit_writes_lock_poison_shortcircuit.rs new file mode 100644 index 00000000000..e428c1da708 --- /dev/null +++ b/packages/rs-platform-wallet-storage/tests/sqlite_commit_writes_lock_poison_shortcircuit.rs @@ -0,0 +1,108 @@ +#![allow(clippy::field_reassign_with_default)] + +//! `commit_writes` LockPoisoned short-circuit accounting: a +//! `PersistenceError::LockPoisoned` from any wallet's flush aborts the +//! loop early — the offending wallet lands in `failed` and every +//! not-yet-attempted wallet is moved to `still_pending`. +//! +//! Driven deterministically via the `force_next_flush_to_fail` injector +//! (a real panicking-thread mutex poison is non-deterministic), which +//! sends the exact same `LockPoisoned` through `flush_inner` -> +//! `handle_flush_error`'s fatal branch. + +mod common; + +use common::{ensure_wallet_meta, fresh_persister_with_mode, wid}; +use platform_wallet::changeset::{ + CoreChangeSet, PlatformWalletChangeSet, PlatformWalletPersistence, +}; +use platform_wallet_storage::{FlushMode, WalletStorageError}; + +fn changeset(synced: u32) -> PlatformWalletChangeSet { + PlatformWalletChangeSet { + core: Some(CoreChangeSet { + synced_height: Some(synced), + last_processed_height: Some(synced), + ..Default::default() + }), + ..Default::default() + } +} + +/// Wallets flush in sorted-id order. Priming a `LockPoisoned` to fire on +/// the FIRST flush (wallet A) must: +/// - record A in `failed` (as LockPoisoned), +/// - move the not-yet-attempted wallets B and C into `still_pending`, +/// - leave `succeeded` empty, +/// - and `commit_writes` itself still returns `Ok(report)` (the loop +/// short-circuits cleanly, it does not propagate `Err`). +#[test] +fn lock_poisoned_short_circuit_fills_still_pending() { + let (persister, _tmp, path) = fresh_persister_with_mode(FlushMode::Manual); + let a = wid(0xA0); + let b = wid(0xB0); + let c = wid(0xC0); + for id in [&a, &b, &c] { + ensure_wallet_meta(&persister, id); + } + persister.store(a, changeset(1)).unwrap(); + persister.store(b, changeset(2)).unwrap(); + persister.store(c, changeset(3)).unwrap(); + + // Fires on the first flush_inner -> sorted order -> wallet A. + persister.force_next_flush_to_fail(WalletStorageError::LockPoisoned); + + let report = persister + .commit_writes() + .expect("commit_writes must return Ok(report), not Err, on a LockPoisoned short-circuit"); + + assert_eq!( + report.failed.len(), + 1, + "exactly one wallet (A) must be recorded as failed; report={report:?}" + ); + assert_eq!(report.failed[0].0, a, "the failed wallet must be A"); + assert!( + matches!( + report.failed[0].1, + platform_wallet::changeset::PersistenceError::LockPoisoned + ), + "A's failure must be LockPoisoned, got {:?}", + report.failed[0].1 + ); + + assert!( + report.succeeded.is_empty(), + "no wallet should have flushed after the short-circuit; report={report:?}" + ); + + let mut pending = report.still_pending.clone(); + pending.sort(); + assert_eq!( + pending, + vec![b, c], + "B and C were never attempted and must land in still_pending; report={report:?}" + ); + assert!( + !report.is_ok(), + "a report with failures must not be is_ok()" + ); + + // B and C must NOT be durable — the loop never reached them. + let conn = common::ro_conn(&path); + for id in [&b, &c] { + let n: i64 = conn + .query_row( + "SELECT COUNT(*) FROM core_sync_state WHERE wallet_id = ?1", + rusqlite::params![id.as_slice()], + |row| row.get(0), + ) + .unwrap(); + assert_eq!( + n, + 0, + "still_pending wallet {} must not have been flushed", + hex::encode(id) + ); + } +} diff --git a/packages/rs-platform-wallet-storage/tests/sqlite_compile_time.rs b/packages/rs-platform-wallet-storage/tests/sqlite_compile_time.rs index 9a719306e0b..c294935cbb0 100644 --- a/packages/rs-platform-wallet-storage/tests/sqlite_compile_time.rs +++ b/packages/rs-platform-wallet-storage/tests/sqlite_compile_time.rs @@ -30,12 +30,12 @@ fn tc078_object_safety() { /// rarely do. const READ_ONLY_PREPARE_ALLOWED: &[(&str, &str)] = &[ ( - "wallet_meta.rs", - "SELECT wallet_id FROM wallet_metadata ORDER BY wallet_id", + "wallets.rs", + "SELECT wallet_id FROM wallets ORDER BY wallet_id", ), ( - "wallet_meta.rs", - "SELECT network, birth_height FROM wallet_metadata WHERE wallet_id", + "wallets.rs", + "SELECT network, birth_height FROM wallets WHERE wallet_id", ), ("asset_locks.rs", "SELECT outpoint, account_index"), ("platform_addrs.rs", "SELECT account_index, address_index"), @@ -58,6 +58,31 @@ const READ_ONLY_PREPARE_ALLOWED: &[(&str, &str)] = &[ "SELECT wallet_id, account_index, account_xpub_bytes FROM account_registrations", ), ("core_state.rs", "SELECT outpoint, value, script, height"), + ( + "core_state.rs", + "SELECT account_type, account_index, pool_type, derivation_index, address, used", + ), + // Full-rehydration readers — one-shot SELECTs in `load_state`. + ( + "accounts.rs", + "SELECT account_xpub_bytes FROM account_registrations", + ), + ( + "core_state.rs", + "SELECT record_blob FROM core_transactions WHERE wallet_id", + ), + ( + "core_state.rs", + "SELECT txid, islock_blob FROM core_instant_locks WHERE wallet_id", + ), + ( + "core_state.rs", + "SELECT last_processed_height, synced_height FROM core_sync_state WHERE wallet_id", + ), + ( + "identity_keys.rs", + "SELECT identity_id, key_id, public_key_blob FROM identity_keys WHERE wallet_id", + ), // P4 readers — `load_state` per area uses one-shot SELECTs. ( "identities.rs", diff --git a/packages/rs-platform-wallet-storage/tests/sqlite_contacts_keys_rehydration.rs b/packages/rs-platform-wallet-storage/tests/sqlite_contacts_keys_rehydration.rs new file mode 100644 index 00000000000..94ea6f17faf --- /dev/null +++ b/packages/rs-platform-wallet-storage/tests/sqlite_contacts_keys_rehydration.rs @@ -0,0 +1,187 @@ +#![allow(clippy::field_reassign_with_default)] + +//! Contacts + identity-keys rehydrate through the keyless `load()` path: +//! store → drop → reopen → load → assert the +//! `ClientWalletStartState.contacts` / `.identity_keys` slots carry the +//! persisted PUBLIC material bit-exact. + +mod common; + +use common::{ensure_wallet_meta, fresh_persister, wid}; +use dpp::identity::identity_public_key::v0::IdentityPublicKeyV0; +use dpp::identity::{IdentityPublicKey, KeyType, Purpose, SecurityLevel}; +use dpp::platform_value::BinaryData; +use dpp::prelude::Identifier; +use platform_wallet::changeset::{ + ContactChangeSet, ContactRequestEntry, IdentityKeyEntry, IdentityKeysChangeSet, + PlatformWalletChangeSet, PlatformWalletPersistence, ReceivedContactRequestKey, + SentContactRequestKey, +}; +use platform_wallet::wallet::identity::ContactRequest; + +fn reopen(path: &std::path::Path) -> platform_wallet_storage::SqlitePersister { + platform_wallet_storage::SqlitePersister::open( + platform_wallet_storage::SqlitePersisterConfig::new(path), + ) + .expect("reopen persister") +} + +fn req(sender: u8, recipient: u8) -> ContactRequestEntry { + ContactRequestEntry { + request: ContactRequest { + sender_id: Identifier::from([sender; 32]), + recipient_id: Identifier::from([recipient; 32]), + sender_key_index: 1, + recipient_key_index: 2, + account_reference: 3, + encrypted_account_label: None, + encrypted_public_key: vec![9, 9, 9], + auto_accept_proof: None, + core_height_created_at: 42, + created_at: 7, + }, + } +} + +fn key_entry(identity: Identifier, key_id: u32, byte: u8) -> IdentityKeyEntry { + IdentityKeyEntry { + identity_id: identity, + key_id, + public_key: IdentityPublicKey::V0(IdentityPublicKeyV0 { + id: key_id, + purpose: Purpose::AUTHENTICATION, + security_level: SecurityLevel::HIGH, + contract_bounds: None, + key_type: KeyType::ECDSA_SECP256K1, + read_only: false, + data: BinaryData::new(vec![byte; 33]), + disabled_at: None, + }), + public_key_hash: [byte; 20], + wallet_id: None, + derivation_indices: None, + } +} + +/// Contacts (sent + received) rehydrate bit-exact into the keyless +/// `ClientWalletStartState.contacts` slot. +#[test] +fn g_rt1_contacts_rehydrate_into_keyless_payload() { + let (persister, _tmp, path) = fresh_persister(); + let w = wid(0xC0); + ensure_wallet_meta(&persister, &w); + + let sent_key = SentContactRequestKey { + owner_id: Identifier::from([0x11; 32]), + recipient_id: Identifier::from([0x22; 32]), + }; + let recv_key = ReceivedContactRequestKey { + owner_id: Identifier::from([0x11; 32]), + sender_id: Identifier::from([0x33; 32]), + }; + let sent_entry = req(0x11, 0x22); + let recv_entry = req(0x33, 0x11); + let mut sent = std::collections::BTreeMap::new(); + sent.insert(sent_key, sent_entry.clone()); + let mut recv = std::collections::BTreeMap::new(); + recv.insert(recv_key, recv_entry.clone()); + + persister + .store( + w, + PlatformWalletChangeSet { + contacts: Some(ContactChangeSet { + sent_requests: sent, + incoming_requests: recv, + ..Default::default() + }), + ..Default::default() + }, + ) + .unwrap(); + drop(persister); + + let p2 = reopen(&path); + let state = p2.load().expect("load"); + let slice = state.wallets.get(&w).expect("wallet rehydrated"); + + let got_sent = slice + .contacts + .sent_requests + .get(&sent_key) + .expect("sent request rehydrated"); + assert_eq!( + got_sent.request.core_height_created_at, + sent_entry.request.core_height_created_at + ); + assert_eq!( + got_sent.request.encrypted_public_key, + sent_entry.request.encrypted_public_key + ); + let got_recv = slice + .contacts + .incoming_requests + .get(&recv_key) + .expect("incoming request rehydrated"); + assert_eq!(got_recv.request.sender_id, recv_entry.request.sender_id); + // The rehydration feed never carries deletes. + assert!(slice.contacts.removed_sent.is_empty()); + assert!(slice.contacts.removed_incoming.is_empty()); +} + +/// Identity-key entries rehydrate bit-exact into the keyless +/// `ClientWalletStartState.identity_keys` slot. +#[test] +fn g_rt2_identity_keys_rehydrate_into_keyless_payload() { + let (persister, _tmp, path) = fresh_persister(); + let w = wid(0xC1); + ensure_wallet_meta(&persister, &w); + let id = Identifier::from([0x44; 32]); + platform_wallet_storage::sqlite::schema::identities::ensure_exists( + &persister.lock_conn_for_test(), + &w, + id.as_slice().try_into().unwrap(), + ) + .unwrap(); + + let e0 = key_entry(id, 0, 0xAA); + let e1 = key_entry(id, 1, 0xBB); + let mut keys = IdentityKeysChangeSet::default(); + keys.upserts.insert((id, 0), e0.clone()); + keys.upserts.insert((id, 1), e1.clone()); + persister + .store( + w, + PlatformWalletChangeSet { + identity_keys: Some(keys), + ..Default::default() + }, + ) + .unwrap(); + drop(persister); + + let p2 = reopen(&path); + let state = p2.load().expect("load"); + let slice = state.wallets.get(&w).expect("wallet rehydrated"); + assert_eq!(slice.identity_keys.upserts.len(), 2); + assert_eq!(slice.identity_keys.upserts.get(&(id, 0)), Some(&e0)); + assert_eq!(slice.identity_keys.upserts.get(&(id, 1)), Some(&e1)); + assert!(slice.identity_keys.removed.is_empty()); +} + +/// A metadata-only wallet has empty (not error) contacts / +/// identity-keys slots. +#[test] +fn g_rt3_empty_slots_for_bare_wallet() { + let (persister, _tmp, path) = fresh_persister(); + let w = wid(0xC2); + ensure_wallet_meta(&persister, &w); + drop(persister); + let p2 = reopen(&path); + let state = p2.load().expect("load"); + let slice = state.wallets.get(&w).expect("wallet present"); + assert!(slice.contacts.sent_requests.is_empty()); + assert!(slice.contacts.incoming_requests.is_empty()); + assert!(slice.contacts.established.is_empty()); + assert!(slice.identity_keys.upserts.is_empty()); +} diff --git a/packages/rs-platform-wallet-storage/tests/sqlite_core_state_reader.rs b/packages/rs-platform-wallet-storage/tests/sqlite_core_state_reader.rs new file mode 100644 index 00000000000..ee23b158835 --- /dev/null +++ b/packages/rs-platform-wallet-storage/tests/sqlite_core_state_reader.rs @@ -0,0 +1,286 @@ +#![allow(clippy::field_reassign_with_default)] + +//! `schema::core_state::load_state` bulk-reconstructs the keyless +//! `CoreChangeSet` (UTXOs, records, IS-locks, sync watermarks), and the +//! no-silent-zero balance contract holds end-to-end. + +mod common; + +use common::{ensure_wallet_meta, fresh_persister, wid}; +use dashcore::hashes::Hash; +use dashcore::{OutPoint, Txid}; +use key_wallet::wallet::initialization::WalletAccountCreationOptions; +use key_wallet::wallet::managed_wallet_info::wallet_info_interface::WalletInfoInterface; +use key_wallet::wallet::managed_wallet_info::ManagedWalletInfo; +use key_wallet::wallet::Wallet; +use key_wallet::Utxo; +use platform_wallet::changeset::{ + CoreChangeSet, PlatformWalletChangeSet, PlatformWalletPersistence, +}; +use platform_wallet_storage::sqlite::schema::core_state; +use platform_wallet_storage::WalletStorageError; + +fn reopen(path: &std::path::Path) -> platform_wallet_storage::SqlitePersister { + platform_wallet_storage::SqlitePersister::open( + platform_wallet_storage::SqlitePersisterConfig::new(path), + ) + .expect("reopen persister") +} + +/// Build a wallet + a UTXO paying one of its BIP44 addresses, value +/// `value`, confirmed at `height`. +fn wallet_and_utxo(seed: [u8; 64], value: u64, height: u32, vout: u32) -> (Wallet, Utxo) { + let w = Wallet::from_seed_bytes( + seed, + key_wallet::Network::Testnet, + WalletAccountCreationOptions::Default, + ) + .unwrap(); + let info = ManagedWalletInfo::from_wallet(&w, 1); + // Any monitored address of the wallet — what a real UTXO would pay. + let address = WalletInfoInterface::monitored_addresses(&info) + .into_iter() + .next() + .expect("at least one monitored address"); + let script = address.script_pubkey(); + let utxo = Utxo { + outpoint: OutPoint { + txid: Txid::from_byte_array([0x55; 32]), + vout, + }, + txout: dashcore::TxOut { + value, + script_pubkey: script, + }, + address, + height, + is_coinbase: false, + is_confirmed: true, + is_instantlocked: false, + is_locked: false, + is_trusted: false, + }; + (w, utxo) +} + +/// A non-zero balance survives store → drop → reopen → load, guarding +/// against a silent-zero-balance reconstruction. +#[test] +fn rt2_nonzero_balance_survives_reopen() { + let (persister, _tmp, path) = fresh_persister(); + let w = wid(0xB1); + ensure_wallet_meta(&persister, &w); + + let seed = [0x42; 64]; + let (wallet, utxo) = wallet_and_utxo(seed, 1_234_500, 100, 0); + + let cs = PlatformWalletChangeSet { + core: Some(CoreChangeSet { + new_utxos: vec![utxo.clone()], + last_processed_height: Some(200), + synced_height: Some(200), + ..Default::default() + }), + ..Default::default() + }; + persister.store(w, cs).unwrap(); + drop(persister); + + let p2 = reopen(&path); + let conn = p2.lock_conn_for_test(); + let core = core_state::load_state(&conn, &w, key_wallet::Network::Testnet).expect("load_state"); + drop(conn); + + // The persisted UTXO round-trips by outpoint + value. + assert_eq!(core.new_utxos.len(), 1); + assert_eq!(core.new_utxos[0].outpoint, utxo.outpoint); + assert_eq!(core.new_utxos[0].value(), 1_234_500); + assert_eq!(core.last_processed_height, Some(200)); + assert_eq!(core.synced_height, Some(200)); + + // End-to-end: apply onto a freshly minted skeleton (the manager's + // rehydration path) and assert the wallet balance is the persisted + // amount — NOT a silent zero. + let mut info = ManagedWalletInfo::from_wallet(&wallet, 1); + platform_wallet::manager::rehydrate::apply_persisted_core_state(&mut info, &core) + .expect("BIP44 reconstruction must not error"); + let bal = WalletInfoInterface::balance(&info); + let total = bal.confirmed() + bal.unconfirmed() + bal.immature() + bal.locked(); + assert_eq!( + total, 1_234_500, + "reconstructed wallet balance must be exact" + ); + assert!(total > 0, "silent zero balance is a FAIL"); + // Height-bearing UTXO lands in the confirmed bucket. + assert_eq!(bal.confirmed(), 1_234_500); +} + +/// Spent UTXOs are excluded from the reconstructed feed. +#[test] +fn b2_spent_utxo_excluded() { + let (persister, _tmp, path) = fresh_persister(); + let w = wid(0xB2); + ensure_wallet_meta(&persister, &w); + let seed = [0x07; 64]; + let (_w, u_unspent) = wallet_and_utxo(seed, 1000, 10, 0); + let (_w2, u_spent) = wallet_and_utxo(seed, 9999, 10, 1); + + persister + .store( + w, + PlatformWalletChangeSet { + core: Some(CoreChangeSet { + new_utxos: vec![u_unspent.clone()], + spent_utxos: vec![u_spent.clone()], + ..Default::default() + }), + ..Default::default() + }, + ) + .unwrap(); + drop(persister); + let p2 = reopen(&path); + let conn = p2.lock_conn_for_test(); + let core = core_state::load_state(&conn, &w, key_wallet::Network::Testnet).unwrap(); + drop(conn); + let ops: Vec<_> = core.new_utxos.iter().map(|u| u.outpoint).collect(); + assert!(ops.contains(&u_unspent.outpoint)); + assert!( + !ops.contains(&u_spent.outpoint), + "spent UTXO must not resurrect on reload" + ); +} + +/// A corrupt `record_blob` is a typed hard error. +#[test] +fn b3_corrupt_record_blob_is_hard_error() { + let (persister, _tmp, path) = fresh_persister(); + let w = wid(0xB3); + ensure_wallet_meta(&persister, &w); + { + let conn = persister.lock_conn_for_test(); + conn.execute( + "INSERT INTO core_transactions \ + (wallet_id, txid, height, block_hash, block_time, finalized, record_blob) \ + VALUES (?1, ?2, NULL, NULL, NULL, 0, X'00')", + rusqlite::params![w.as_slice(), &[0x11u8; 32][..]], + ) + .unwrap(); + } + drop(persister); + let p2 = reopen(&path); + let conn = p2.lock_conn_for_test(); + let result = core_state::load_state(&conn, &w, key_wallet::Network::Testnet); + drop(conn); + assert!( + matches!(result, Err(WalletStorageError::BincodeDecode { .. })), + "corrupt record_blob must be a typed BincodeDecode; got {result:?}" + ); +} + +/// A CoinJoin-only wallet (no BIP44 account) with non-zero persisted +/// UTXOs reconstructs to the correct non-zero total, never a silent +/// `Ok` + 0. +#[test] +fn f2_no_bip44_wallet_nonzero_balance_survives_reopen() { + use std::collections::BTreeSet; + + let (persister, _tmp, path) = fresh_persister(); + let w = wid(0xBF); + ensure_wallet_meta(&persister, &w); + + // CoinJoin-only topology: empty BIP44/BIP32 sets, one CoinJoin + // account, no special accounts. + let mut coinjoin = BTreeSet::new(); + coinjoin.insert(0u32); + let opts = WalletAccountCreationOptions::SpecificAccounts( + BTreeSet::new(), + BTreeSet::new(), + coinjoin, + BTreeSet::new(), + BTreeSet::new(), + None, + ); + let seed = [0x4F; 64]; + let wallet = Wallet::from_seed_bytes(seed, key_wallet::Network::Testnet, opts).unwrap(); + assert!( + wallet.accounts.standard_bip44_accounts.is_empty(), + "fixture must be BIP44-free to exercise F2" + ); + let info = ManagedWalletInfo::from_wallet(&wallet, 1); + assert!( + info.accounts.standard_bip44_accounts.is_empty() + && !info.accounts.coinjoin_accounts.is_empty(), + "managed info must be CoinJoin-only" + ); + let address = WalletInfoInterface::monitored_addresses(&info) + .into_iter() + .next() + .expect("CoinJoin-only wallet still has monitored addresses"); + + let utxo = Utxo { + outpoint: OutPoint { + txid: Txid::from_byte_array([0x77; 32]), + vout: 0, + }, + txout: dashcore::TxOut { + value: 9_000_000, + script_pubkey: address.script_pubkey(), + }, + address, + height: 50, + is_coinbase: false, + is_confirmed: true, + is_instantlocked: false, + is_locked: false, + is_trusted: false, + }; + persister + .store( + w, + PlatformWalletChangeSet { + core: Some(CoreChangeSet { + new_utxos: vec![utxo.clone()], + last_processed_height: Some(60), + synced_height: Some(60), + ..Default::default() + }), + ..Default::default() + }, + ) + .unwrap(); + drop(persister); + + let p2 = reopen(&path); + let conn = p2.lock_conn_for_test(); + let core = core_state::load_state(&conn, &w, key_wallet::Network::Testnet).unwrap(); + drop(conn); + assert_eq!(core.new_utxos.len(), 1); + + let mut info = ManagedWalletInfo::from_wallet(&wallet, 1); + platform_wallet::manager::rehydrate::apply_persisted_core_state(&mut info, &core) + .expect("CoinJoin-only reconstruction must not error"); + let bal = WalletInfoInterface::balance(&info); + let total = bal.confirmed() + bal.unconfirmed() + bal.immature() + bal.locked(); + assert_eq!( + total, 9_000_000, + "CoinJoin-only wallet must reconstruct the exact non-zero total — \ + a silent zero is a FAIL" + ); +} + +/// Empty wallet → empty core state, no error. +#[test] +fn b4_empty_core_state_is_ok() { + let (persister, _tmp, path) = fresh_persister(); + let w = wid(0xB4); + ensure_wallet_meta(&persister, &w); + drop(persister); + let p2 = reopen(&path); + let conn = p2.lock_conn_for_test(); + let core = core_state::load_state(&conn, &w, key_wallet::Network::Testnet).unwrap(); + drop(conn); + assert!(core.new_utxos.is_empty()); + assert!(core.records.is_empty()); + assert_eq!(core.last_processed_height, None); +} diff --git a/packages/rs-platform-wallet-storage/tests/sqlite_dashpay_overlay_contract.rs b/packages/rs-platform-wallet-storage/tests/sqlite_dashpay_overlay_contract.rs new file mode 100644 index 00000000000..dd235e1e4d7 --- /dev/null +++ b/packages/rs-platform-wallet-storage/tests/sqlite_dashpay_overlay_contract.rs @@ -0,0 +1,109 @@ +#![allow(clippy::field_reassign_with_default)] + +//! DashPay write-only overlay contract. +//! +//! `dashpay_profiles` / `dashpay_payments_overlay` are a write-only +//! indexed overlay: data written via the dedicated `dashpay_*` changeset +//! slots IS persisted to the tables, but `load()` rehydrates DashPay +//! state from the identities `entry_blob`, NOT from these tables. These +//! tests pin both halves of that contract: +//! +//! 1. A `dashpay_*` write lands in the overlay tables (queryable directly). +//! 2. Writing ONLY the overlay (no identity blob carrying the same data) +//! does not corrupt `load()` — load succeeds and surfaces the wallet's +//! other state intact. + +mod common; + +use std::collections::BTreeMap; + +use common::{ensure_identity, ensure_wallet_meta, fresh_persister, wid}; +use dpp::prelude::Identifier; +use platform_wallet::changeset::{ + CoreChangeSet, PlatformWalletChangeSet, PlatformWalletPersistence, WalletMetadataEntry, +}; +use platform_wallet::wallet::identity::DashPayProfile; + +fn profile(name: &str) -> DashPayProfile { + DashPayProfile { + display_name: Some(name.to_string()), + ..Default::default() + } +} + +#[test] +fn dashpay_overlay_write_is_persisted_to_its_table() { + let (persister, _tmp, _path) = fresh_persister(); + let w = wid(0xA1); + let identity = [0xA2u8; 32]; + ensure_wallet_meta(&persister, &w); + ensure_identity(&persister, &identity, Some(&w)); + + let mut profiles: BTreeMap> = BTreeMap::new(); + profiles.insert(Identifier::from(identity), Some(profile("alice"))); + + let mut cs = PlatformWalletChangeSet::default(); + cs.dashpay_profiles = Some(profiles); + persister.store(w, cs).expect("store dashpay profile"); + persister.flush(w).expect("flush"); + + // The overlay row is physically present in its dedicated table. + let conn = persister.lock_conn_for_test(); + let count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM dashpay_profiles WHERE identity_id = ?1", + rusqlite::params![&identity[..]], + |r| r.get(0), + ) + .unwrap(); + assert_eq!(count, 1, "dashpay_profiles overlay row must be persisted"); +} + +#[test] +fn overlay_only_write_does_not_corrupt_load() { + let (persister, _tmp, _path) = fresh_persister(); + let w = wid(0xB1); + let identity = [0xB2u8; 32]; + ensure_wallet_meta(&persister, &w); + ensure_identity(&persister, &identity, Some(&w)); + + // Give the wallet real, loadable core state plus an overlay-only + // DashPay write (no identity blob carries this profile). + let mut core_cs = PlatformWalletChangeSet::default(); + core_cs.wallet_metadata = Some(WalletMetadataEntry { + network: key_wallet::Network::Testnet, + wallet_group_id: [0u8; 32], + birth_height: 0, + }); + core_cs.core = Some(CoreChangeSet { + synced_height: Some(99), + last_processed_height: Some(99), + ..Default::default() + }); + persister.store(w, core_cs).expect("store core"); + persister.flush(w).expect("flush core"); + + let mut profiles: BTreeMap> = BTreeMap::new(); + profiles.insert(Identifier::from(identity), Some(profile("bob"))); + let mut overlay_cs = PlatformWalletChangeSet::default(); + overlay_cs.dashpay_profiles = Some(profiles); + persister.store(w, overlay_cs).expect("store overlay"); + persister.flush(w).expect("flush overlay"); + + // The documented contract: load() reads DashPay from the identities + // blob (not the overlay table), so the overlay-only write neither + // appears in nor corrupts the loaded state. load() must still + // succeed and surface the wallet's core state. + let state = persister + .load() + .expect("load must succeed despite overlay-only write"); + let wallet = state + .wallets + .get(&w) + .expect("wallet present in loaded state"); + assert_eq!( + wallet.core_state.synced_height, + Some(99), + "core state must rehydrate intact alongside an unread overlay" + ); +} diff --git a/packages/rs-platform-wallet-storage/tests/sqlite_delete_buffer_reconcile.rs b/packages/rs-platform-wallet-storage/tests/sqlite_delete_buffer_reconcile.rs index bfe7b455b05..eed743b7187 100644 --- a/packages/rs-platform-wallet-storage/tests/sqlite_delete_buffer_reconcile.rs +++ b/packages/rs-platform-wallet-storage/tests/sqlite_delete_buffer_reconcile.rs @@ -116,7 +116,7 @@ fn pre_delete_backup_includes_buffered_writes() { .unwrap(); let in_backup_meta: Option = backup .query_row( - "SELECT COUNT(*) FROM wallet_metadata WHERE wallet_id = ?1", + "SELECT COUNT(*) FROM wallets WHERE wallet_id = ?1", rusqlite::params![w.as_slice()], |row| row.get(0), ) @@ -130,7 +130,7 @@ fn pre_delete_backup_includes_buffered_writes() { assert_eq!( in_backup_meta, Some(1), - "pre-delete backup must contain the flushed buffered wallet_metadata row" + "pre-delete backup must contain the flushed buffered wallets row" ); } @@ -148,11 +148,11 @@ fn pre_flush_failure_preserves_buffer_and_skips_backup() { let persister = SqlitePersister::open(cfg).unwrap(); let w = wid(0xC1); - // Seed wallet_metadata so the wallet exists in the live DB. + // Seed wallets so the wallet exists in the live DB. { let conn = persister.lock_conn_for_test(); conn.execute( - "INSERT INTO wallet_metadata (wallet_id, network, birth_height) \ + "INSERT INTO wallets (wallet_id, network, birth_height) \ VALUES (?1, 'testnet', 0)", rusqlite::params![w.as_slice()], ) @@ -186,7 +186,7 @@ fn pre_flush_failure_preserves_buffer_and_skips_backup() { let meta_rows: i64 = { let conn = persister.lock_conn_for_test(); conn.query_row( - "SELECT COUNT(*) FROM wallet_metadata WHERE wallet_id = ?1", + "SELECT COUNT(*) FROM wallets WHERE wallet_id = ?1", rusqlite::params![w.as_slice()], |row| row.get(0), ) diff --git a/packages/rs-platform-wallet-storage/tests/sqlite_delete_cross_process_exclusion.rs b/packages/rs-platform-wallet-storage/tests/sqlite_delete_cross_process_exclusion.rs index 4ac4dbbcab0..927542f252a 100644 --- a/packages/rs-platform-wallet-storage/tests/sqlite_delete_cross_process_exclusion.rs +++ b/packages/rs-platform-wallet-storage/tests/sqlite_delete_cross_process_exclusion.rs @@ -20,9 +20,8 @@ fn delete_wallet_blocks_when_peer_holds_exclusive() { ensure_wallet_meta(&persister, &w); let backup_dir = tempfile::tempdir().expect("backup dir"); - // Wire the persister with auto-backup so delete_wallet exercises - // the backup + cascade path (the canonical path under test). - // Re-open persister using a config that knows about the dir. + // Re-open with auto-backup wired so delete_wallet exercises the + // backup + cascade path (the canonical path under test). drop(persister); let cfg = platform_wallet_storage::SqlitePersisterConfig::new(&db_path) .with_auto_backup_dir(Some(backup_dir.path().to_path_buf())); @@ -89,14 +88,14 @@ fn delete_wallet_single_process_still_works() { let report = persister.delete_wallet(w).expect("delete succeeds"); assert!(report.backup_path.is_some(), "auto-backup should fire"); - // wallet_metadata row should be gone. + // wallets row should be gone. let conn = persister.lock_conn_for_test(); let row: Option = conn .query_row( - "SELECT 1 FROM wallet_metadata WHERE wallet_id = ?1", + "SELECT 1 FROM wallets WHERE wallet_id = ?1", rusqlite::params![w.as_slice()], |r| r.get(0), ) .ok(); - assert!(row.is_none(), "wallet_metadata row must be gone"); + assert!(row.is_none(), "wallets row must be gone"); } diff --git a/packages/rs-platform-wallet-storage/tests/sqlite_delete_partial_commit_window.rs b/packages/rs-platform-wallet-storage/tests/sqlite_delete_partial_commit_window.rs new file mode 100644 index 00000000000..1d34a2b12d8 --- /dev/null +++ b/packages/rs-platform-wallet-storage/tests/sqlite_delete_partial_commit_window.rs @@ -0,0 +1,162 @@ +#![allow(clippy::field_reassign_with_default)] + +//! Coverage for `delete_wallet_inner`'s two-transaction shape: the +//! pre-flush tx (drains + commits the buffered changeset) and the cascade +//! tx (deletes the parent `wallets` row) are SEPARATE SQLite +//! transactions. These tests probe what is durable on disk and what is +//! left in the buffer when the delete aborts AFTER the pre-flush has +//! already committed its changeset. + +mod common; + +use common::wid; +use key_wallet::Network; +use platform_wallet::changeset::{ + CoreChangeSet, PlatformWalletChangeSet, PlatformWalletPersistence, WalletMetadataEntry, +}; +use platform_wallet_storage::{FlushMode, SqlitePersister, SqlitePersisterConfig}; +use rusqlite::TransactionBehavior; + +/// Self-consistent changeset that materializes a brand-new wallet on +/// flush (FK-valid `wallets` row + a `core_sync_state` child row). +fn full_changeset(synced: u32) -> PlatformWalletChangeSet { + let mut cs = PlatformWalletChangeSet::default(); + cs.wallet_metadata = Some(WalletMetadataEntry { + network: Network::Testnet, + wallet_group_id: [0u8; 32], + birth_height: 0, + }); + cs.core = Some(CoreChangeSet { + synced_height: Some(synced), + last_processed_height: Some(synced), + ..Default::default() + }); + cs +} + +fn core_rows_for(persister: &SqlitePersister, w: &[u8; 32]) -> i64 { + let conn = persister.lock_conn_for_test(); + conn.query_row( + "SELECT COUNT(*) FROM core_sync_state WHERE wallet_id = ?1", + rusqlite::params![w.as_slice()], + |row| row.get(0), + ) + .unwrap() +} + +fn wallets_rows_for(persister: &SqlitePersister, w: &[u8; 32]) -> i64 { + let conn = persister.lock_conn_for_test(); + conn.query_row( + "SELECT COUNT(*) FROM wallets WHERE wallet_id = ?1", + rusqlite::params![w.as_slice()], + |row| row.get(0), + ) + .unwrap() +} + +/// A peer holds a SQLite-native EXCLUSIVE on the same DB file, so the +/// pre-flush's own `BEGIN EXCLUSIVE` fails with BUSY (real +/// `?`-propagation, not the injector) and the cascade is never reached. +/// On that failure the buffered changeset must survive — either still in +/// the buffer (restored) or already durable on disk. +#[test] +fn preflush_begin_exclusive_busy_preserves_buffer() { + let tmp = tempfile::tempdir().unwrap(); + let path = tmp.path().join("w.db"); + // No auto_backup_dir + skip_backup keeps the test on the + // pre-flush -> cascade path without a backup dependency. + let cfg = SqlitePersisterConfig::new(&path).with_flush_mode(FlushMode::Manual); + let persister = SqlitePersister::open(cfg).unwrap(); + let w = wid(0xD1); + + // Buffer a brand-new wallet's full changeset (only state is buffered). + persister.store(w, full_changeset(42)).unwrap(); + + // A peer process grabs EXCLUSIVE on the same file and holds it, + // forcing the persister's own `BEGIN EXCLUSIVE` (pre-flush AND + // cascade both use it) to fail with BUSY past the busy_timeout. + let mut peer = rusqlite::Connection::open(&path).unwrap(); + let peer_guard = peer + .transaction_with_behavior(TransactionBehavior::Exclusive) + .expect("peer EXCLUSIVE"); + + let err = persister.delete_wallet_skip_backup(w); + assert!( + err.is_err(), + "delete must fail while a peer holds EXCLUSIVE; got {err:?}" + ); + + drop(peer_guard); + drop(peer); + + // Contract: a failed delete must not have removed the wallet. Either + // the wallet's state is still buffered (pre-flush never committed) OR + // it is durable on disk (pre-flush committed before the cascade + // aborted). Both are acceptable per the two-tx design; what is NOT + // acceptable is the changeset vanishing from BOTH the buffer and disk. + let on_disk_core = core_rows_for(&persister, &w); + let on_disk_wallets = wallets_rows_for(&persister, &w); + let in_buffer = persister.buffer_has_changeset_for_test(&w); + + assert!( + in_buffer || (on_disk_wallets == 1 && on_disk_core == 1), + "after a failed delete the buffered changeset must survive somewhere: \ + in_buffer={in_buffer}, on_disk_wallets={on_disk_wallets}, on_disk_core={on_disk_core}" + ); + + // If it is on disk, the wallet must NOT have been deleted (delete + // returned Err) — i.e. the cascade did not run. + if on_disk_wallets == 1 { + assert_eq!( + on_disk_core, 1, + "pre-flush committed the wallet but its child row is missing — \ + partial pre-flush is a torn write" + ); + } +} + +/// A pre-flush-committed changeset is durable even though `delete_wallet` +/// aborts; a clean retry once the peer lock is gone converges to a fully +/// deleted wallet. +#[test] +fn delete_retry_after_transient_abort_converges_to_deleted() { + let tmp = tempfile::tempdir().unwrap(); + let path = tmp.path().join("w.db"); + let cfg = SqlitePersisterConfig::new(&path).with_flush_mode(FlushMode::Manual); + let persister = SqlitePersister::open(cfg).unwrap(); + let w = wid(0xD2); + + persister.store(w, full_changeset(7)).unwrap(); + + { + let mut peer = rusqlite::Connection::open(&path).unwrap(); + let _peer_guard = peer + .transaction_with_behavior(TransactionBehavior::Exclusive) + .expect("peer EXCLUSIVE"); + let first = persister.delete_wallet_skip_backup(w); + assert!(first.is_err(), "first delete must fail under peer lock"); + // peer guard drops here, releasing the lock + } + + // Retry with the lock gone: the wallet must end fully deleted + // regardless of whether the first attempt left state on disk or in + // the buffer. + persister + .delete_wallet_skip_backup(w) + .expect("retry delete must succeed once the peer lock is gone"); + + persister + .commit_writes() + .expect("commit_writes drains buffer"); + + assert_eq!( + wallets_rows_for(&persister, &w), + 0, + "wallet parent row must be gone after a converged delete" + ); + assert_eq!( + core_rows_for(&persister, &w), + 0, + "wallet child rows must be gone after a converged delete" + ); +} diff --git a/packages/rs-platform-wallet-storage/tests/sqlite_delete_real_apply_failure.rs b/packages/rs-platform-wallet-storage/tests/sqlite_delete_real_apply_failure.rs new file mode 100644 index 00000000000..a0d0cf8a25a --- /dev/null +++ b/packages/rs-platform-wallet-storage/tests/sqlite_delete_real_apply_failure.rs @@ -0,0 +1,73 @@ +#![allow(clippy::field_reassign_with_default)] + +//! `delete_wallet` pre-flush apply failure driven by a REAL SQL error, +//! exercising the apply / commit restore branches a test injector cannot. +//! +//! Strategy: buffer a wallet's full changeset in `Manual` mode, then drop +//! a child table the pre-flush apply needs (`core_sync_state`) via a side +//! connection. The delete's pre-flush `apply_changeset_to_tx` then hits a +//! real "no such table" failure on the core-state INSERT, and the +//! buffered changeset MUST be restored (not lost). + +mod common; + +use common::wid; +use key_wallet::Network; +use platform_wallet::changeset::{ + CoreChangeSet, PlatformWalletChangeSet, PlatformWalletPersistence, WalletMetadataEntry, +}; +use platform_wallet_storage::{FlushMode, SqlitePersister, SqlitePersisterConfig}; + +fn full_changeset(synced: u32) -> PlatformWalletChangeSet { + let mut cs = PlatformWalletChangeSet::default(); + cs.wallet_metadata = Some(WalletMetadataEntry { + network: Network::Testnet, + wallet_group_id: [0u8; 32], + birth_height: 0, + }); + cs.core = Some(CoreChangeSet { + synced_height: Some(synced), + last_processed_height: Some(synced), + ..Default::default() + }); + cs +} + +#[test] +fn delete_wallet_pre_flush_apply_real_sql_failure_restores_buffer() { + let tmp = tempfile::tempdir().unwrap(); + let path = tmp.path().join("w.db"); + let cfg = SqlitePersisterConfig::new(&path).with_flush_mode(FlushMode::Manual); + let persister = SqlitePersister::open(cfg).unwrap(); + let w = wid(0xC7); + + // Buffer a brand-new wallet (state lives only in the buffer). + persister.store(w, full_changeset(21)).unwrap(); + assert!( + persister.buffer_has_changeset_for_test(&w), + "precondition: changeset is buffered" + ); + + // Drop the table the pre-flush apply will INSERT into. Use a side + // connection so the persister's own conn is untouched until delete. + { + let conn = rusqlite::Connection::open(&path).unwrap(); + conn.execute("DROP TABLE core_sync_state", []).unwrap(); + } + + // delete_wallet drains the buffer, opens the pre-flush EXCLUSIVE tx, + // and applies the changeset — the core-state INSERT now fails with a + // real "no such table" SQL error. + let err = persister.delete_wallet_skip_backup(w); + assert!( + err.is_err(), + "delete must fail when the pre-flush apply hits a real SQL error; got {err:?}" + ); + + // The buffered changeset MUST survive the failed delete — the + // apply-branch restore put it back. + assert!( + persister.buffer_has_changeset_for_test(&w), + "buffered changeset must be restored after a real pre-flush apply failure" + ); +} diff --git a/packages/rs-platform-wallet-storage/tests/sqlite_delete_wallet.rs b/packages/rs-platform-wallet-storage/tests/sqlite_delete_wallet.rs index 7f9f18eb71b..64eb8c0a627 100644 --- a/packages/rs-platform-wallet-storage/tests/sqlite_delete_wallet.rs +++ b/packages/rs-platform-wallet-storage/tests/sqlite_delete_wallet.rs @@ -76,7 +76,7 @@ fn concurrent_store_does_not_resurrect_deleted_wallet() { // test; here we only guard against a racing store resurrecting the // wallet after the delete commit. let conn = persister.lock_conn_for_test(); - for table in ["wallet_metadata", "core_sync_state"] { + for table in ["wallets", "core_sync_state"] { let n: i64 = conn .query_row( &format!("SELECT COUNT(*) FROM {table} WHERE wallet_id = ?1"), diff --git a/packages/rs-platform-wallet-storage/tests/sqlite_error_classification.rs b/packages/rs-platform-wallet-storage/tests/sqlite_error_classification.rs index 12415933f2d..9c1e5fdb0d8 100644 --- a/packages/rs-platform-wallet-storage/tests/sqlite_error_classification.rs +++ b/packages/rs-platform-wallet-storage/tests/sqlite_error_classification.rs @@ -1,17 +1,13 @@ #![allow(clippy::field_reassign_with_default)] -//! `WalletStorageError::is_transient` + `error_kind_str` exhaustiveness -//! check via a wildcard-free `match`, plus the boundary mapping of -//! `FlushRetryable` into `PersistenceError::Backend`. +//! `WalletStorageError::is_transient` + `error_kind_str` exhaustiveness, +//! plus the boundary mapping of `FlushRetryable` into +//! `PersistenceError::Backend`. //! -//! The check is structured as a `match` over `&WalletStorageError` -//! that covers every variant explicitly. There is NO `_` arm — when a -//! future variant lands on `WalletStorageError`, this file refuses to -//! compile until the author adds a classification + tag here too. -//! Combined with the wildcard-free matches in -//! `error::is_transient` / `error::error_kind_str` and the workspace -//! ban on `#[non_exhaustive]` for this enum, the policy is enforced -//! at the type system level end-to-end. +//! The check is a wildcard-free `match` with one arm per variant (no +//! `_`), so a new `WalletStorageError` variant fails to compile here +//! until it is classified — mirroring the matches in `error::is_transient` +//! / `error::error_kind_str`. use std::path::PathBuf; @@ -96,15 +92,9 @@ fn samples() -> Vec { sqlite_disk_full(), sqlite_io_failure(), sqlite_oom(), - // Migration uses an internal refinery error — we cannot easily - // synthesise one without a full runner. The `Migration(_)` arm - // in the match below uses a lazily-generated value via - // `unimplemented_variant_marker` since the test body never - // reads the inner error. We construct a different concrete - // variant whose match arm is `Migration` — see comment in arm. - // Skipped from samples because refinery::Error has no public - // `From` we can lean on; the arm is still exhaustively - // covered by the match itself. + // Migration wraps a refinery error with no public constructor, so + // it can't be synthesised here. It's omitted from the samples but + // the `Migration(_)` arm below still keeps the match exhaustive. WalletStorageError::IntegrityCheckFailed { report: "rows missing".into(), }, @@ -145,6 +135,20 @@ fn samples() -> Vec { limit_bytes: 16 * 1024 * 1024, }, WalletStorageError::ForeignKeysNotEnforced, + WalletStorageError::JournalModeNotApplied { + requested: "WAL", + actual: "delete".into(), + }, + WalletStorageError::SchemaHistoryMalformed { + reason: "bad applied_on", + }, + WalletStorageError::NotAWalletDb { + expected: 0x504C_5754, + found: 0, + }, + WalletStorageError::AlreadyOpen { + path: PathBuf::from("/x/w.db"), + }, WalletStorageError::LockPoisoned, WalletStorageError::RestoreDestinationLocked, WalletStorageError::InvalidWalletIdHex { @@ -153,12 +157,8 @@ fn samples() -> Vec { WalletStorageError::InvalidWalletIdLength { actual: 10 }, WalletStorageError::ConfigInvalid { reason: "bad knob" }, WalletStorageError::IdentityEntryIdMismatch, - WalletStorageError::UtxoAddressNotDerived { - address: "yMockAddress".into(), - }, // BincodeEncode / BincodeDecode / HashDecode / ConsensusCodec - // need real upstream errors — synthesise minimal ones via the - // public constructors / `From` impls. + // need real upstream errors; omitted but covered by their arms. WalletStorageError::BlobDecode { reason: "bad shape", }, @@ -183,13 +183,9 @@ fn samples() -> Vec { ] } -/// wildcard-free exhaustiveness gate. -/// -/// The body is a `match` over `&WalletStorageError` with one arm per -/// variant — NO `_` arm, NO `..` rest patterns over enum variants. -/// Adding a new variant to `WalletStorageError` triggers a compile -/// error here AND in `error::is_transient`; the two failures together -/// keep the classification policy honest. +/// Wildcard-free exhaustiveness gate: each variant's expected +/// `(is_transient, error_kind_str)` pair is asserted via a `match` with +/// no `_` arm. #[test] fn tc_p2_005_is_transient_table() { fn classify(err: &WalletStorageError) -> (bool, &'static str) { @@ -246,8 +242,13 @@ fn tc_p2_005_is_transient_table() { (false, "asset_lock_entry_mismatch") } WalletStorageError::BlobTooLarge { .. } => (false, "blob_too_large"), - WalletStorageError::UtxoAddressNotDerived { .. } => (false, "utxo_address_not_derived"), WalletStorageError::ForeignKeysNotEnforced => (false, "foreign_keys_not_enforced"), + WalletStorageError::JournalModeNotApplied { .. } => (false, "journal_mode_not_applied"), + WalletStorageError::SchemaHistoryMalformed { .. } => { + (false, "schema_history_malformed") + } + WalletStorageError::NotAWalletDb { .. } => (false, "not_a_wallet_db"), + WalletStorageError::AlreadyOpen { .. } => (false, "already_open"), WalletStorageError::IntegerOverflow { .. } => (false, "integer_overflow"), } } @@ -305,9 +306,9 @@ fn tc_p2_010_boundary_error_mapping() { "missing wallet_id hex prefix: {outer}" ); - // Walk the typed source chain to the inner rusqlite payload — - // post- the source is `Box` so - // the chain is preserved structurally, not just stringified. + // Walk the typed source chain to the inner rusqlite payload: the + // source is `Box`, so the chain is preserved + // structurally, not just stringified. let mut chain = String::new(); let mut cur: Option<&(dyn std::error::Error + 'static)> = source.source(); while let Some(e) = cur { diff --git a/packages/rs-platform-wallet-storage/tests/sqlite_fk_changeset_ordering.rs b/packages/rs-platform-wallet-storage/tests/sqlite_fk_changeset_ordering.rs new file mode 100644 index 00000000000..867a6f9ce36 --- /dev/null +++ b/packages/rs-platform-wallet-storage/tests/sqlite_fk_changeset_ordering.rs @@ -0,0 +1,269 @@ +#![allow(clippy::field_reassign_with_default)] + +//! FK parent-before-child ordering inside a single immediate-FK +//! transaction, exercised through the production `store()` -> flush path. +//! Two contracts hold: +//! +//! 1. A child whose FK parent is neither in the same payload nor on disk +//! aborts the flush with a `Constraint`-kind `PersistenceError` and +//! wipes the buffer (non-transient => no retry): the caller must +//! include the parent in the same `store()` or write it first. +//! 2. A changeset carrying parent and child together commits — the fixed +//! dispatch order writes the parent first for every FK edge. + +mod common; + +use common::{ensure_wallet_meta, fresh_persister, fresh_persister_with_mode, wid}; + +use dpp::identity::identity_public_key::v0::IdentityPublicKeyV0; +use dpp::identity::{IdentityPublicKey, KeyType, Purpose, SecurityLevel}; +use dpp::platform_value::BinaryData; +use dpp::prelude::Identifier; +use platform_wallet::changeset::{ + IdentityChangeSet, IdentityEntry, IdentityKeyEntry, IdentityKeysChangeSet, + PersistenceErrorKind, PlatformWalletChangeSet, PlatformWalletPersistence, +}; +use platform_wallet::wallet::identity::IdentityStatus; +use platform_wallet::wallet::platform_wallet::WalletId; +use platform_wallet_storage::sqlite::schema::identity_keys; +use platform_wallet_storage::FlushMode; + +fn key_entry(identity: Identifier, key_id: u32, byte: u8) -> IdentityKeyEntry { + IdentityKeyEntry { + identity_id: identity, + key_id, + public_key: IdentityPublicKey::V0(IdentityPublicKeyV0 { + id: key_id, + purpose: Purpose::AUTHENTICATION, + security_level: SecurityLevel::HIGH, + contract_bounds: None, + key_type: KeyType::ECDSA_SECP256K1, + read_only: false, + data: BinaryData::new(vec![byte; 33]), + disabled_at: None, + }), + public_key_hash: [byte; 20], + wallet_id: None, + derivation_indices: None, + } +} + +fn identity_entry(id: Identifier, wallet_id: Option) -> IdentityEntry { + IdentityEntry { + id, + balance: 0, + revision: 0, + identity_index: Some(0), + last_updated_balance_block_time: None, + last_synced_keys_block_time: None, + dpns_names: Vec::new(), + contested_dpns_names: Vec::new(), + status: IdentityStatus::Unknown, + wallet_id, + dashpay_profile: None, + dashpay_payments: Default::default(), + } +} + +fn keys_changeset(identity: Identifier, key_id: u32, byte: u8) -> IdentityKeysChangeSet { + let mut keys = IdentityKeysChangeSet::default(); + keys.upserts + .insert((identity, key_id), key_entry(identity, key_id, byte)); + keys +} + +fn identities_changeset(id: Identifier, wallet_id: Option) -> IdentityChangeSet { + let mut identities = std::collections::BTreeMap::new(); + identities.insert(id, identity_entry(id, wallet_id)); + IdentityChangeSet { + identities, + removed: Default::default(), + } +} + +/// A changeset carrying `identity_keys` for an identity whose +/// `identities` parent is absent from both the payload and the DB (the +/// `wallets` parent is present, isolating the failure) aborts the flush +/// with a `Constraint`-kind `PersistenceError` carrying the constraint +/// class — not a panic or a raw-string-only error. +#[test] +fn identity_keys_without_parent_identity_aborts_with_constraint_kind() { + let (persister, _tmp, _path) = fresh_persister(); + let w = wid(0xA1); + ensure_wallet_meta(&persister, &w); // wallet parent present; identity parent absent + let orphan_identity = Identifier::from([0x33; 32]); + + let err = persister + .store( + w, + PlatformWalletChangeSet { + identity_keys: Some(keys_changeset(orphan_identity, 0, 0x11)), + ..Default::default() + }, + ) + .expect_err("child-without-parent flush must fail, not silently succeed"); + + assert_eq!( + err.kind(), + Some(PersistenceErrorKind::Constraint), + "an immediate-FK abort must surface as a Constraint-kind PersistenceError, got {err:?}" + ); + // The underlying rusqlite source must be walkable to the real FK + // violation — the typed wrapper preserves it rather than flattening + // to a lossy string. + let source_chain = { + use std::error::Error; + let mut s = String::new(); + let mut cur: Option<&dyn Error> = Some(&err); + while let Some(e) = cur { + s.push_str(&e.to_string()); + s.push('\n'); + cur = e.source(); + } + s + }; + assert!( + source_chain.contains("FOREIGN KEY"), + "the FK violation must be reachable via Error::source(), got chain:\n{source_chain}" + ); +} + +/// The constraint abort wipes the buffer: a follow-up `flush()` is a +/// clean no-op and nothing reached disk for the orphaned identity. +#[test] +fn constraint_abort_wipes_buffer_no_silent_retry() { + let (persister, _tmp, _path) = fresh_persister(); + let w = wid(0xA2); + ensure_wallet_meta(&persister, &w); + let orphan_identity = Identifier::from([0x44; 32]); + + let _ = persister + .store( + w, + PlatformWalletChangeSet { + identity_keys: Some(keys_changeset(orphan_identity, 0, 0x55)), + ..Default::default() + }, + ) + .expect_err("must fail"); + + // Buffer wiped: the next flush finds nothing to write and is a no-op. + PlatformWalletPersistence::flush(&persister, w).expect("post-abort flush is a clean no-op"); + + // And nothing was committed for the orphan identity. + let on_disk = + identity_keys::load_state(&persister.lock_conn_for_test(), &w).expect("load identity_keys"); + assert!( + on_disk.upserts.is_empty(), + "no identity_keys row may have been committed for the orphaned identity" + ); +} + +/// The same contract in Manual flush mode: `store` only buffers, the +/// abort surfaces from the explicit `flush`, and the buffer is wiped so +/// the failed write is dropped (not silently re-attempted forever). +#[test] +fn manual_mode_child_without_parent_aborts_on_flush_and_drops_buffer() { + let (persister, _tmp, _path) = fresh_persister_with_mode(FlushMode::Manual); + let w = wid(0xA3); + ensure_wallet_meta(&persister, &w); + let orphan_identity = Identifier::from([0x66; 32]); + + // Manual mode: store buffers without touching SQL. + persister + .store( + w, + PlatformWalletChangeSet { + identity_keys: Some(keys_changeset(orphan_identity, 0, 0x77)), + ..Default::default() + }, + ) + .expect("manual-mode store only buffers"); + + let err = PlatformWalletPersistence::flush(&persister, w) + .expect_err("explicit flush must surface the FK abort"); + assert_eq!( + err.kind(), + Some(PersistenceErrorKind::Constraint), + "manual-mode flush abort must also be Constraint-kind, got {err:?}" + ); + + // Buffer wiped on the fatal classification: a second flush is a no-op. + PlatformWalletPersistence::flush(&persister, w).expect("second flush is a clean no-op"); +} + +/// The recovery contract: a COMPLETE changeset carrying the `identities` +/// parent AND its `identity_keys` child in the SAME `store()` commits. +/// This proves the fixed dispatch order writes `identities` before +/// `identity_keys` so the immediate FK is satisfied at the child insert +/// — the parent-before-child invariant for the `identity_id` FK edge. +#[test] +fn parent_and_child_in_same_changeset_commits() { + let (persister, _tmp, _path) = fresh_persister(); + let w = wid(0xB1); + ensure_wallet_meta(&persister, &w); + let identity = Identifier::from([0x88; 32]); + + persister + .store( + w, + PlatformWalletChangeSet { + identities: Some(identities_changeset(identity, Some(w))), + identity_keys: Some(keys_changeset(identity, 0, 0x99)), + ..Default::default() + }, + ) + .expect("parent+child in one changeset must commit under the fixed dispatch order"); + + let on_disk = + identity_keys::load_state(&persister.lock_conn_for_test(), &w).expect("load identity_keys"); + assert_eq!( + on_disk.upserts.len(), + 1, + "the child identity_keys row must be committed alongside its parent identity" + ); + assert!( + on_disk.upserts.contains_key(&(identity, 0)), + "the committed row must be the one we wrote" + ); +} + +/// The same edge from the wallets side: a complete changeset that carries +/// the `wallets` root anchor (via `wallet_metadata`) AND a `wallet_id`-FK +/// child (`identity_keys`, also needing its identity parent) commits in +/// one flush. `wallets` is dispatched first, so the child's +/// `wallet_id -> wallets` FK is satisfied even when the wallets row did +/// not pre-exist. +#[test] +fn wallets_anchor_and_children_in_same_changeset_commits() { + use key_wallet::Network; + use platform_wallet::changeset::WalletMetadataEntry; + + let (persister, _tmp, _path) = fresh_persister(); + let w = wid(0xB2); // deliberately NOT pre-seeded — the changeset carries it + let identity = Identifier::from([0xAB; 32]); + + persister + .store( + w, + PlatformWalletChangeSet { + wallet_metadata: Some(WalletMetadataEntry { + network: Network::Testnet, + wallet_group_id: [0u8; 32], + birth_height: 0, + }), + identities: Some(identities_changeset(identity, Some(w))), + identity_keys: Some(keys_changeset(identity, 0, 0xCD)), + ..Default::default() + }, + ) + .expect("wallets anchor + children in one changeset must commit"); + + let on_disk = + identity_keys::load_state(&persister.lock_conn_for_test(), &w).expect("load identity_keys"); + assert_eq!( + on_disk.upserts.len(), + 1, + "the wallet_id-FK child must commit because wallets is dispatched first" + ); +} diff --git a/packages/rs-platform-wallet-storage/tests/sqlite_foreign_keys.rs b/packages/rs-platform-wallet-storage/tests/sqlite_foreign_keys.rs index e97a87c3be7..bab34805575 100644 --- a/packages/rs-platform-wallet-storage/tests/sqlite_foreign_keys.rs +++ b/packages/rs-platform-wallet-storage/tests/sqlite_foreign_keys.rs @@ -1,10 +1,10 @@ #![allow(clippy::field_reassign_with_default)] -//! Native foreign-key enforcement and the delete cascade. +//! TC-045..TC-049 — native foreign-key enforcement and the delete cascade. mod common; -use common::{ensure_wallet_meta, fresh_persister, wid}; +use common::{ensure_identity, ensure_wallet_meta, fresh_persister, wid}; /// PRAGMA foreign_keys is ON on the connection. #[test] @@ -17,7 +17,7 @@ fn tc045_foreign_keys_on() { assert_eq!(fk, 1, "foreign_keys pragma not ON"); } -/// insert into a child table without a wallet_metadata parent fails. +/// insert into a child table without a wallets parent fails. #[test] fn tc046_orphan_child_insert_rejected() { let (persister, _tmp, _path) = fresh_persister(); @@ -35,7 +35,7 @@ fn tc046_orphan_child_insert_rejected() { ); } -/// deleting wallet_metadata cascades. +/// deleting wallets cascades. #[test] fn tc047_delete_wallet_cascade() { let (persister, _tmp, _path) = fresh_persister(); @@ -120,3 +120,51 @@ fn tc048_setnull_on_tx_delete() { "spent_in_txid should have been set to NULL" ); } + +/// TC-049: `identity_keys` rows carry TWO `ON DELETE CASCADE` parents +/// (`wallet_id -> wallets`, `identity_id -> identities`). +/// Deleting the wallet must purge the child via that dual-cascade — both +/// paths firing on one row is idempotent, not a double-free error. +#[test] +fn tc049_delete_wallet_cascades_identity_keys() { + let (persister, _tmp, _path) = fresh_persister(); + let w = wid(0xC4); + let identity = [0xE4u8; 32]; + // Seed BOTH FK parents: the wallets row and a wallet-scoped + // identities row, so the child satisfies both cascade chains. + ensure_wallet_meta(&persister, &w); + ensure_identity(&persister, &identity, Some(&w)); + { + let conn = persister.lock_conn_for_test(); + conn.execute( + "INSERT INTO identity_keys \ + (wallet_id, identity_id, key_id, public_key_blob, public_key_hash, derivation_blob) \ + VALUES (?1, ?2, 0, X'01', ?3, NULL)", + rusqlite::params![w.as_slice(), &identity[..], &[0u8; 20][..]], + ) + .unwrap(); + } + + let before: i64 = persister + .lock_conn_for_test() + .query_row( + "SELECT COUNT(*) FROM identity_keys WHERE wallet_id = ?1", + rusqlite::params![w.as_slice()], + |row| row.get(0), + ) + .unwrap(); + assert_eq!(before, 1, "seed row must exist before delete"); + + let report = persister.delete_wallet(w).expect("delete_wallet"); + assert_eq!(report.wallet_id, w); + + let after: i64 = persister + .lock_conn_for_test() + .query_row( + "SELECT COUNT(*) FROM identity_keys WHERE wallet_id = ?1", + rusqlite::params![w.as_slice()], + |row| row.get(0), + ) + .unwrap(); + assert_eq!(after, 0, "dual cascade must purge the identity_keys row"); +} diff --git a/packages/rs-platform-wallet-storage/tests/sqlite_identity_keys_reader.rs b/packages/rs-platform-wallet-storage/tests/sqlite_identity_keys_reader.rs new file mode 100644 index 00000000000..547d00ee241 --- /dev/null +++ b/packages/rs-platform-wallet-storage/tests/sqlite_identity_keys_reader.rs @@ -0,0 +1,150 @@ +#![allow(clippy::field_reassign_with_default)] + +//! `schema::identity_keys::load_state` reads `identity_keys` rows back +//! into a keyless `IdentityKeysChangeSet`, bit-exact, fail-hard on a +//! corrupt blob. + +mod common; + +use common::{ensure_wallet_meta, fresh_persister, wid}; +use dpp::identity::identity_public_key::v0::IdentityPublicKeyV0; +use dpp::identity::{IdentityPublicKey, KeyType, Purpose, SecurityLevel}; +use dpp::platform_value::BinaryData; +use dpp::prelude::Identifier; +use platform_wallet::changeset::{ + IdentityKeyDerivationIndices, IdentityKeyEntry, IdentityKeysChangeSet, PlatformWalletChangeSet, + PlatformWalletPersistence, +}; +use platform_wallet_storage::sqlite::schema::identity_keys; +use platform_wallet_storage::WalletStorageError; + +fn reopen(path: &std::path::Path) -> platform_wallet_storage::SqlitePersister { + platform_wallet_storage::SqlitePersister::open( + platform_wallet_storage::SqlitePersisterConfig::new(path), + ) + .expect("reopen persister") +} + +fn key_entry(identity: Identifier, key_id: u32, byte: u8) -> IdentityKeyEntry { + IdentityKeyEntry { + identity_id: identity, + key_id, + public_key: IdentityPublicKey::V0(IdentityPublicKeyV0 { + id: key_id, + purpose: Purpose::AUTHENTICATION, + security_level: SecurityLevel::HIGH, + contract_bounds: None, + key_type: KeyType::ECDSA_SECP256K1, + read_only: false, + data: BinaryData::new(vec![byte; 33]), + disabled_at: None, + }), + public_key_hash: [byte; 20], + wallet_id: None, + derivation_indices: Some(IdentityKeyDerivationIndices { + identity_index: 0, + key_index: u32::from(byte), + }), + } +} + +/// Identity-key rows round-trip bit-exact into the keyless +/// `IdentityKeysChangeSet`. +#[test] +fn gk1_identity_keys_roundtrip() { + let (persister, _tmp, path) = fresh_persister(); + let w = wid(0xD1); + ensure_wallet_meta(&persister, &w); + let id_a = Identifier::from([0x0A; 32]); + let id_b = Identifier::from([0x0B; 32]); + platform_wallet_storage::sqlite::schema::identities::ensure_exists( + &persister.lock_conn_for_test(), + &w, + id_a.as_slice().try_into().unwrap(), + ) + .unwrap(); + platform_wallet_storage::sqlite::schema::identities::ensure_exists( + &persister.lock_conn_for_test(), + &w, + id_b.as_slice().try_into().unwrap(), + ) + .unwrap(); + + let e1 = key_entry(id_a, 0, 0x11); + let e2 = key_entry(id_a, 1, 0x22); + let e3 = key_entry(id_b, 0, 0x33); + let mut keys = IdentityKeysChangeSet::default(); + keys.upserts.insert((id_a, 0), e1.clone()); + keys.upserts.insert((id_a, 1), e2.clone()); + keys.upserts.insert((id_b, 0), e3.clone()); + persister + .store( + w, + PlatformWalletChangeSet { + identity_keys: Some(keys), + ..Default::default() + }, + ) + .unwrap(); + drop(persister); + + let p2 = reopen(&path); + let conn = p2.lock_conn_for_test(); + let cs = identity_keys::load_state(&conn, &w).expect("load_state"); + drop(conn); + + assert_eq!(cs.upserts.len(), 3); + assert_eq!(cs.upserts.get(&(id_a, 0)), Some(&e1)); + assert_eq!(cs.upserts.get(&(id_a, 1)), Some(&e2)); + assert_eq!(cs.upserts.get(&(id_b, 0)), Some(&e3)); + assert!(cs.removed.is_empty()); +} + +/// An empty wallet yields an empty changeset, not an error. +#[test] +fn gk2_empty_identity_keys_is_ok() { + let (persister, _tmp, path) = fresh_persister(); + let w = wid(0xD2); + ensure_wallet_meta(&persister, &w); + drop(persister); + let p2 = reopen(&path); + let conn = p2.lock_conn_for_test(); + let cs = identity_keys::load_state(&conn, &w).expect("load_state"); + drop(conn); + assert!(cs.upserts.is_empty()); +} + +/// A corrupt `public_key_blob` is a typed hard error, never a silent +/// skip. +#[test] +fn gk3_corrupt_blob_is_hard_error() { + let (persister, _tmp, path) = fresh_persister(); + let w = wid(0xD3); + ensure_wallet_meta(&persister, &w); + let id = Identifier::from([0x0C; 32]); + platform_wallet_storage::sqlite::schema::identities::ensure_exists( + &persister.lock_conn_for_test(), + &w, + id.as_slice().try_into().unwrap(), + ) + .unwrap(); + { + let conn = persister.lock_conn_for_test(); + conn.execute( + "INSERT INTO identity_keys \ + (wallet_id, identity_id, key_id, public_key_blob, public_key_hash, derivation_blob) \ + VALUES (?1, ?2, 0, X'00', ?3, NULL)", + rusqlite::params![w.as_slice(), id.as_slice(), &[0u8; 20][..]], + ) + .unwrap(); + } + drop(persister); + let p2 = reopen(&path); + let conn = p2.lock_conn_for_test(); + let result = identity_keys::load_state(&conn, &w); + drop(conn); + assert!( + matches!(result, Err(WalletStorageError::BincodeDecode { .. })), + "corrupt public_key_blob must be a typed BincodeDecode; got {result:?}" + ); +} diff --git a/packages/rs-platform-wallet-storage/tests/sqlite_load_reconstruction.rs b/packages/rs-platform-wallet-storage/tests/sqlite_load_reconstruction.rs index e4c6ffbf746..e156a51345b 100644 --- a/packages/rs-platform-wallet-storage/tests/sqlite_load_reconstruction.rs +++ b/packages/rs-platform-wallet-storage/tests/sqlite_load_reconstruction.rs @@ -1,14 +1,9 @@ #![allow(clippy::field_reassign_with_default)] -//! `load()` reconstructs the wired-up subset of client start-state. -//! -//! The wallet-level fields (`wallets[*].utxos` / `.unused_asset_locks`) -//! are blocked on upstream `Wallet::from_persisted` — the persister -//! stores the data (verified via direct SQL probes) but cannot -//! reconstruct the `Wallet` + `ManagedWalletInfo` pair that -//! `ClientWalletStartState` requires. The unwired fields are listed in -//! `persister::LOAD_UNIMPLEMENTED` and surfaced via a `tracing::warn!` -//! on every `load`. +//! `load()` reconstruction tests: `load()` returns a keyless per-wallet +//! payload (network, birth height, account manifest, core-state +//! projection, identities, `Consumed`-filtered asset locks, contacts, +//! identity keys) from which the manager re-derives the signing `Wallet`. mod common; @@ -218,15 +213,105 @@ fn wallet_without_platform_state_is_omitted_from_load() { ); } -/// non-wired-up sub-areas are written to disk (verified by -/// direct SQL probes) but do not surface in the load result. -/// -/// Constructs non-empty `ContactChangeSet` and `TokenBalanceChangeSet` -/// payloads — `is_empty()` returns false on either, so the buffer -/// flushes them — then asserts both the `contacts` and `token_balances` -/// rows are present in SQLite after a reopen, while -/// `ClientStartState.platform_addresses` stays empty for the wallet -/// (no platform-address activity was stored). +/// `load_all`'s reported count excludes address rows for an account with +/// no registration. Such rows are skipped during `per_account` +/// reconstruction (no xpub, nothing to restore), so counting them would +/// claim platform state that `load()` never surfaces. +#[test] +fn load_all_count_excludes_unregistered_account_addresses() { + use platform_wallet::changeset::AccountRegistrationEntry; + + let (persister, _tmp, path) = fresh_persister(); + + // Wallet A: one registered account (2 addresses) plus an orphan + // account_index with no registration (1 address). Only the 2 + // registered-account rows reconstruct, so the count must be 2. + let a = wid(0x70); + ensure_wallet_meta(&persister, &a); + let registered = 4u32; + let unregistered = 9u32; + let mut cs_a = PlatformWalletChangeSet::default(); + cs_a.account_registrations = vec![AccountRegistrationEntry { + account_type: key_wallet::account::AccountType::PlatformPayment { + account: registered, + key_class: 0, + }, + account_xpub: test_xpub(), + }]; + cs_a.platform_addresses = Some(PlatformAddressChangeSet { + addresses: vec![ + entry(a, registered, 0, 0xC0), + entry(a, registered, 1, 0xC1), + entry(a, unregistered, 0, 0xC2), + ], + ..Default::default() + }); + persister.store(a, cs_a).unwrap(); + + // Wallet B: only orphan-account rows, no registration and no + // watermark — nothing reconstructs, so the count must be 0 and the + // wallet must be omitted from `load()`. + let b = wid(0x71); + ensure_wallet_meta(&persister, &b); + let mut cs_b = PlatformWalletChangeSet::default(); + cs_b.platform_addresses = Some(PlatformAddressChangeSet { + addresses: vec![entry(b, unregistered, 0, 0xD0)], + ..Default::default() + }); + persister.store(b, cs_b).unwrap(); + drop(persister); + + let p2 = reopen(&path); + let conn = p2.lock_conn_for_test(); + let all = + platform_wallet_storage::sqlite::schema::platform_addrs::load_all(&conn).expect("load_all"); + let total_rows_a = + platform_wallet_storage::sqlite::schema::platform_addrs::count_per_wallet(&conn, &a) + .expect("count_per_wallet"); + drop(conn); + + // Sanity: wallet A really does carry the orphan row on disk. + assert_eq!(total_rows_a, 3, "wallet A has 3 platform_addresses rows"); + + let (sync_a, count_a) = all.get(&a).expect("wallet A present in load_all"); + assert_eq!( + *count_a, 2, + "only the 2 registered-account rows are reconstructed; the orphan row is excluded" + ); + assert_eq!( + sync_a.per_account.len(), + 1, + "exactly the registered account reconstructs" + ); + assert!( + sync_a.per_account.contains_key(®istered), + "the registered account is present in per_account" + ); + + let (_, count_b) = all.get(&b).expect("wallet B present in load_all"); + assert_eq!( + *count_b, 0, + "an orphan-only wallet reconstructs no addresses, so its count is 0" + ); + + // The count drives `load()`'s surfacing gate: wallet B carries no + // reconstructable state, so it must not appear in the load result. + let state = p2.load().unwrap(); + assert!( + state.platform_addresses.contains_key(&a), + "wallet A reconstructs the registered account and must surface" + ); + assert!( + !state.platform_addresses.contains_key(&b), + "wallet B has only an unregistered-account row and must be omitted" + ); +} + +/// `token_balances` is persisted-but-not-rehydrated (deferred) while +/// contacts rehydrate into `state.wallets[w].contacts`. Both tables are +/// durable on disk after reopen (direct SQL probes), the contact +/// round-trips into the keyless payload, and `state.platform_addresses` +/// stays empty (no platform-address activity was stored). #[test] fn tc043_non_wired_up_persisted_but_not_returned() { use dpp::prelude::Identifier; @@ -293,6 +378,16 @@ fn tc043_non_wired_up_persisted_but_not_returned() { !state.platform_addresses.contains_key(&w), "no platform-address activity was stored — wallet must be absent" ); + // Contacts rehydrate into the keyless payload. + let slice = state.wallets.get(&w).expect("wallet rehydrated"); + let key = SentContactRequestKey { + owner_id: owner, + recipient_id: recipient, + }; + assert!( + slice.contacts.sent_requests.contains_key(&key), + "the persisted sent contact request must rehydrate" + ); drop(p2); let conn = common::ro_conn(&path); @@ -368,13 +463,9 @@ fn contact_request_entry(sender: u8, recipient: u8) -> ContactRequestEntry { } } -/// identities reader round-trips per wallet, exact equality -/// on `id`s. -/// -/// `persister.load()` no longer surfaces the identities slot (the -/// `ClientStartState` revert dropped it), so this exercises the -/// hardened dormant reader `schema::identities::load_state` directly — -/// keeping its fail-hard behaviour genuinely covered. +/// identities reader round-trips per wallet, exact equality on `id`s. +/// Exercises the hardened reader `schema::identities::load_state` +/// directly (not surfaced by `load()`), covering its fail-hard behaviour. #[test] fn tc_p4_003_load_identities_two_wallets() { use platform_wallet_storage::sqlite::schema::identities; @@ -988,8 +1079,8 @@ fn tc_p4_005_load_asset_locks_bucketed() { assert_eq!(b_buckets[&0].len(), 1); } -/// empty wallets emit `wallets_pending_rehydration = N` -/// and `wallets` slot stays empty. +/// Every persisted wallet is rehydrated into the keyless `wallets` +/// payload — `wallets_rehydrated = N`, none pending. #[tracing_test::traced_test] #[test] fn tc_p4_006_pending_rehydration_count() { @@ -1000,12 +1091,12 @@ fn tc_p4_006_pending_rehydration_count() { drop(persister); let p2 = reopen(&path); let state = p2.load().unwrap(); - assert!(state.wallets.is_empty()); - assert!(logs_contain("wallets_pending_rehydration=3")); - assert!(logs_contain("wallets_rehydrated=0")); + assert_eq!(state.wallets.len(), 3, "all 3 wallets rehydrated"); + assert!(logs_contain("wallets_rehydrated=3")); + assert!(logs_contain("wallets_pending_rehydration=0")); } -/// load() summary carries every counter, including zeros. +/// load() summary carries the real rehydration counters. #[tracing_test::traced_test] #[test] fn tc_p4_007_summary_log_counters() { @@ -1018,8 +1109,8 @@ fn tc_p4_007_summary_log_counters() { for field in [ "wallets_seen=2", "addresses_loaded=0", - "wallets_rehydrated=0", - "wallets_pending_rehydration=2", + "wallets_rehydrated=2", + "wallets_pending_rehydration=0", ] { assert!(logs_contain(field), "missing structured field: {field}"); } @@ -1088,10 +1179,10 @@ fn tc_p4_008_corruption_is_hard_error() { assert_eq!(b_state.wallet_identities.get(&b).map(|m| m.len()), Some(1)); } -/// 008b: `contacts::load_state` is fail-hard. A garbage -/// `outgoing_request` blob yields a typed `BincodeDecode`; a non-32-byte -/// id column yields a typed `BlobDecode`. Neither is silently skipped, -/// and an intact wallet still decodes cleanly. +/// `contacts::load_state` is fail-hard. A garbage `outgoing_request` +/// blob yields a typed `BincodeDecode`; a non-32-byte id column yields a +/// typed `BlobDecode`. Neither is silently skipped, and an intact wallet +/// still decodes cleanly. #[test] fn tc_p4_008b_contacts_corruption_is_hard_error() { use platform_wallet_storage::sqlite::schema::contacts; @@ -1160,10 +1251,9 @@ fn tc_p4_008b_contacts_corruption_is_hard_error() { assert_eq!(good_state.sent_requests.len(), 1); } -/// 008c: `asset_locks::load_state` is fail-hard. A garbage -/// `lifecycle_blob` yields a typed `BincodeDecode`; a malformed -/// `outpoint` column yields a typed decode error. An intact wallet -/// still decodes cleanly. +/// `asset_locks::load_state` is fail-hard. A garbage `lifecycle_blob` +/// yields a typed `BincodeDecode`; a malformed `outpoint` column yields a +/// typed decode error. An intact wallet still decodes cleanly. #[test] fn tc_p4_008c_asset_locks_corruption_is_hard_error() { use dashcore::hashes::Hash; @@ -1256,19 +1346,18 @@ fn tc_p4_008c_asset_locks_corruption_is_hard_error() { assert_eq!(good_state[&0].len(), 1); } -/// 008d: `wallet_meta::list_ids` is fail-hard on a malformed -/// stored `wallet_id`. This is the code path where a non-32-byte id -/// actually surfaces (the per-area `load_state` readers take a typed -/// `&WalletId`, so the length check belongs here). A 10-byte -/// `wallet_metadata.wallet_id` yields a typed `InvalidWalletIdLength`. +/// `wallets::list_ids` is fail-hard on a malformed stored `wallet_id`. +/// This is the code path where a non-32-byte id actually surfaces (the +/// per-area `load_state` readers take a typed `&WalletId`). A 10-byte +/// `wallets.wallet_id` yields a typed `InvalidWalletIdLength`. #[test] fn tc_p4_008d_list_ids_rejects_non_32_byte_wallet_id() { - use platform_wallet_storage::sqlite::schema::wallet_meta; + use platform_wallet_storage::sqlite::schema::wallets; let (persister, _tmp, path) = fresh_persister(); { let conn = persister.lock_conn_for_test(); conn.execute( - "INSERT INTO wallet_metadata (wallet_id, network, birth_height) \ + "INSERT INTO wallets (wallet_id, network, birth_height) \ VALUES (?1, 'testnet', 0)", rusqlite::params![&[0xAAu8; 10][..]], ) @@ -1278,7 +1367,7 @@ fn tc_p4_008d_list_ids_rejects_non_32_byte_wallet_id() { let p2 = reopen(&path); let conn = p2.lock_conn_for_test(); - let result = wallet_meta::list_ids(&conn); + let result = wallets::list_ids(&conn); drop(conn); assert!( matches!( @@ -1290,19 +1379,10 @@ fn tc_p4_008d_list_ids_rejects_non_32_byte_wallet_id() { ); } -/// `load()` query cost is bounded per wallet. -/// -/// `load()` now drives the platform-address reader off -/// `wallet_meta::list_ids` and issues a fixed, small number of -/// statements per listed wallet (the dedup collapse traded the old -/// constant-query bulk scans for the fail-hard per-wallet readers). -/// This pins the per-wallet statement count so a future regression -/// that fans out into an unbounded per-row round trip is caught. -/// -/// Verified by enabling `sqlite3_trace_v2` on the persister's -/// connection, counting `Stmt` events for the duration of one -/// `load()`. `serial_test::serial` because the trace counter is a -/// process-wide `AtomicUsize` (`Connection::trace_v2`'s callback must +/// `load()` query cost is constant per wallet (no unbounded per-row +/// fan-out), without pinning a brittle magic number. Counts `Stmt` +/// events via `sqlite3_trace_v2` over one `load()`; `serial` because the +/// counter is a process-wide `AtomicUsize` (the `trace_v2` callback must /// be a `fn`, not a `Fn`). #[test] #[serial_test::serial] @@ -1359,21 +1439,37 @@ fn tc_p4_012_load_query_count_bounded() { seed_wallets(&p10, 10); let count_ten = count_load_queries(&p10); - // `load()` issues a fixed number of grouped scans regardless of - // wallet count: `wallet_meta::list_ids` plus one scan each over - // `platform_address_sync`, `platform_addresses`, and the - // `platform_payment` `account_registrations`. The count must NOT - // grow with the number of wallets — that's the constant-query - // contract. + // The per-wallet delta must be a constant (10×N readers minus the + // one shared `wallets::list_ids` divides evenly by 9), i.e. + // load() is O(1) statements per wallet — no unbounded per-row + // fan-out. The exact constant is not pinned (brittle as readers + // evolve) but it must be small and bounded. + let delta = count_ten - count_one; assert_eq!( - count_one, count_ten, - "load() query count must not grow with wallet count \ - (N=1 → {count_one}, N=10 → {count_ten})" + delta % 9, + 0, + "per-wallet statement count must be constant \ + (N=1 → {count_one}, N=10 → {count_ten}, delta → {delta})" + ); + let per_wallet = delta / 9; + assert!( + (1..=20).contains(&per_wallet), + "per-wallet statement count must be small + bounded, got {per_wallet}" + ); + // Shared (wallet-count-independent) overhead: the `list_ids` + + // `platform_addrs::load_all` scans. `count_one = shared + per_wallet` + // ⇒ shared must itself be a small constant, not growing with N. + let shared = count_one - per_wallet; + assert!( + (1..=8).contains(&shared), + "shared load() overhead must be a small constant, got {shared} \ + (N=1 → {count_one}, per-wallet → {per_wallet})" ); + // And it really is N-independent: N=10 total == shared + 10×per_wallet. assert_eq!( - count_one, 4, - "load() must issue exactly 4 grouped statements \ - (list_ids + sync + addresses + registrations), got {count_one}" + count_ten, + shared + 10 * per_wallet, + "load() statement count must be exactly shared + N×per_wallet" ); } diff --git a/packages/rs-platform-wallet-storage/tests/sqlite_load_wiring.rs b/packages/rs-platform-wallet-storage/tests/sqlite_load_wiring.rs new file mode 100644 index 00000000000..ff4d91744f1 --- /dev/null +++ b/packages/rs-platform-wallet-storage/tests/sqlite_load_wiring.rs @@ -0,0 +1,153 @@ +#![allow(clippy::field_reassign_with_default)] + +//! `SqlitePersister::load()` returns the keyless per-wallet rehydration +//! payload in `ClientStartState.wallets` (network, birth height, account +//! manifest, core state, identities, filtered asset locks), carrying no +//! `Wallet`/seed. + +mod common; + +use common::{ensure_wallet_meta, fresh_persister, wid}; +use key_wallet::wallet::initialization::WalletAccountCreationOptions; +use key_wallet::wallet::managed_wallet_info::wallet_info_interface::WalletInfoInterface; +use key_wallet::wallet::managed_wallet_info::ManagedWalletInfo; +use key_wallet::wallet::Wallet; +use platform_wallet::changeset::{ + AccountRegistrationEntry, CoreChangeSet, PlatformWalletChangeSet, PlatformWalletPersistence, + WalletMetadataEntry, +}; +use platform_wallet_storage::{SqlitePersister, SqlitePersisterConfig}; + +fn reopen(path: &std::path::Path) -> SqlitePersister { + SqlitePersister::open(SqlitePersisterConfig::new(path)).expect("reopen") +} + +/// A registered wallet with UTXOs round-trips into the keyless `wallets` +/// payload — manifest, network, birth height, core state. +#[test] +fn c1_load_populates_keyless_wallet_payload() { + let (persister, _tmp, path) = fresh_persister(); + let w = wid(0xC1); + + let seed = [0x21; 64]; + let wallet = Wallet::from_seed_bytes( + seed, + key_wallet::Network::Testnet, + WalletAccountCreationOptions::Default, + ) + .unwrap(); + let info = ManagedWalletInfo::from_wallet(&wallet, 7); + let address = WalletInfoInterface::monitored_addresses(&info) + .into_iter() + .next() + .unwrap(); + + // Registration round: metadata + per-account manifest. + let manifest: Vec = wallet + .accounts + .all_accounts() + .into_iter() + .map(|a| AccountRegistrationEntry { + account_type: a.account_type, + account_xpub: a.account_xpub, + }) + .collect(); + let reg = PlatformWalletChangeSet { + wallet_metadata: Some(WalletMetadataEntry { + network: key_wallet::Network::Testnet, + wallet_group_id: [0u8; 32], + birth_height: 7, + }), + account_registrations: manifest.clone(), + ..Default::default() + }; + persister.store(w, reg).unwrap(); + + // A UTXO so the balance is non-zero. + let utxo = key_wallet::Utxo { + outpoint: dashcore::OutPoint { + txid: { + use dashcore::hashes::Hash; + dashcore::Txid::from_byte_array([0x99; 32]) + }, + vout: 0, + }, + txout: dashcore::TxOut { + value: 777_000, + script_pubkey: address.script_pubkey(), + }, + address, + height: 5, + is_coinbase: false, + is_confirmed: true, + is_instantlocked: false, + is_locked: false, + is_trusted: false, + }; + persister + .store( + w, + PlatformWalletChangeSet { + core: Some(CoreChangeSet { + new_utxos: vec![utxo.clone()], + last_processed_height: Some(50), + synced_height: Some(50), + ..Default::default() + }), + ..Default::default() + }, + ) + .unwrap(); + drop(persister); + + let p2 = reopen(&path); + let state = p2.load().expect("load"); + + assert_eq!(state.wallets.len(), 1, "the wallet must be in the payload"); + let slice = state.wallets.get(&w).expect("wallet slice"); + assert_eq!(slice.network, key_wallet::Network::Testnet); + assert_eq!(slice.birth_height, 7); + // Every persisted row round-trips. The writer's + // `(account_type_label, account_index)` upsert key collapses a few + // distinct special-purpose variants that share a label+index (a + // persist-side characteristic, not a load bug), so the manifest is + // a faithful read of what is on disk: non-empty, containing the + // primary BIP44 account. + assert!(!slice.account_manifest.is_empty()); + assert!( + slice.account_manifest.iter().any(|e| matches!( + e.account_type, + key_wallet::account::AccountType::Standard { .. } + )), + "BIP44 account must be in the manifest" + ); + assert_eq!(slice.core_state.new_utxos.len(), 1); + assert_eq!(slice.core_state.new_utxos[0].value(), 777_000); + assert_eq!(slice.core_state.last_processed_height, Some(50)); +} + +/// Empty DB → empty `wallets`, no error (the `load()` doctest contract). +#[test] +fn c2_empty_db_empty_wallets() { + let (persister, _tmp, path) = fresh_persister(); + drop(persister); + let p2 = reopen(&path); + let state = p2.load().unwrap(); + assert!(state.wallets.is_empty()); + assert!(state.is_empty()); +} + +/// A wallet with only metadata (no UTXOs) still appears, with an empty +/// core projection — not silently dropped. +#[test] +fn c3_metadata_only_wallet_present() { + let (persister, _tmp, path) = fresh_persister(); + let w = wid(0xC3); + ensure_wallet_meta(&persister, &w); + drop(persister); + let p2 = reopen(&path); + let state = p2.load().unwrap(); + let slice = state.wallets.get(&w).expect("metadata-only wallet present"); + assert!(slice.account_manifest.is_empty()); + assert!(slice.core_state.new_utxos.is_empty()); +} diff --git a/packages/rs-platform-wallet-storage/tests/sqlite_migrations.rs b/packages/rs-platform-wallet-storage/tests/sqlite_migrations.rs index 8b90ce8b957..fb402eeb2f6 100644 --- a/packages/rs-platform-wallet-storage/tests/sqlite_migrations.rs +++ b/packages/rs-platform-wallet-storage/tests/sqlite_migrations.rs @@ -67,13 +67,13 @@ fn tc027_smoke_insert_every_table() { let wallet_id = [42u8; 32]; conn.execute( - "INSERT INTO wallet_metadata (wallet_id, network, birth_height) VALUES (?1, 'testnet', 0)", + "INSERT INTO wallets (wallet_id, network, birth_height) VALUES (?1, 'testnet', 0)", params![wallet_id.as_slice()], ) .unwrap(); let identity_id = [7u8; 32]; conn.execute( - "INSERT INTO identities (wallet_id, wallet_index, identity_id, entry_blob, tombstoned) \ + "INSERT INTO identities (wallet_id, identity_index, identity_id, entry_blob, tombstoned) \ VALUES (?1, NULL, ?2, X'01', 0)", params![wallet_id.as_slice(), identity_id.as_slice()], ) @@ -86,12 +86,7 @@ fn tc027_smoke_insert_every_table() { // Labels must match the writer-side canonical strings — see the // CHECK constraint sourced from `ACCOUNT_TYPE_LABELS` in // `sqlite::schema::accounts`. - "INSERT INTO account_registrations (wallet_id, account_type, account_index, account_xpub_bytes) VALUES (?1, 'standard', 0, X'00')", - &[&wallet_id.as_slice()], - ), - ( - "account_address_pools", - "INSERT INTO account_address_pools (wallet_id, account_type, account_index, pool_type, snapshot_blob) VALUES (?1, 'standard', 0, 'external', X'00')", + "INSERT INTO account_registrations (wallet_id, account_type, account_index, account_xpub_bytes) VALUES (?1, 'standard_bip44', 0, X'00')", &[&wallet_id.as_slice()], ), ( @@ -109,11 +104,6 @@ fn tc027_smoke_insert_every_table() { "INSERT INTO core_instant_locks (wallet_id, txid, islock_blob) VALUES (?1, ?2, X'00')", &[&wallet_id.as_slice(), &txid], ), - ( - "core_derived_addresses", - "INSERT INTO core_derived_addresses (wallet_id, account_type, account_index, address, derivation_path, used) VALUES (?1, 'standard', 0, 'addr', '', 0)", - &[&wallet_id.as_slice()], - ), ( "core_sync_state", "INSERT INTO core_sync_state (wallet_id, last_processed_height, synced_height) VALUES (?1, NULL, NULL)", @@ -121,10 +111,11 @@ fn tc027_smoke_insert_every_table() { ), ( "identity_keys", - // identity_keys is keyed by (identity_id, key_id); the FK - // targets identities(identity_id). - "INSERT INTO identity_keys (identity_id, key_id, public_key_blob, public_key_hash) VALUES (?1, 0, X'00', X'00')", - &[&identity_id.as_slice()], + // identity_keys is keyed by (wallet_id, identity_id, key_id); + // the wallet_id FK targets wallets and the + // identity_id FK targets identities(identity_id). + "INSERT INTO identity_keys (wallet_id, identity_id, key_id, public_key_blob, public_key_hash, derivation_blob) VALUES (?1, ?2, 0, X'00', X'00', NULL)", + &[&wallet_id.as_slice(), &identity_id.as_slice()], ), ( "contacts", @@ -208,10 +199,9 @@ fn tc028_idempotent_reopen() { /// append-only migration hash. /// -/// The hash is computed at runtime from the embedded list. Because this -/// test belongs to the migration drift policy, we assert the list is -/// non-empty and the hash is stable across successive calls — not a -/// pinned value (which would force a churn on every committed migration). +/// Asserts intra-run stability and a non-empty list — not content +/// pinning. The fingerprint is content-blind (hashes `(version, name)` +/// only), so this guards the migration set's identity, not its DDL. #[test] fn tc029_migration_fingerprint_stable() { let a = mig::embedded_migrations_fingerprint(); diff --git a/packages/rs-platform-wallet-storage/tests/sqlite_money_column_overflow_on_read.rs b/packages/rs-platform-wallet-storage/tests/sqlite_money_column_overflow_on_read.rs new file mode 100644 index 00000000000..b9426b225ba --- /dev/null +++ b/packages/rs-platform-wallet-storage/tests/sqlite_money_column_overflow_on_read.rs @@ -0,0 +1,84 @@ +#![allow(clippy::field_reassign_with_default)] + +//! Money/balance columns: a negative `i64` stored on disk (a wrapped +//! value, a restored-corruption row, or a torn write that passes +//! `PRAGMA integrity_check`) MUST abort the read with +//! [`WalletStorageError::IntegerOverflow`] rather than sign-extending +//! into a multi-quintillion `u64` balance. `birth_height`/`sync_height` +//! get the same guard in `sqlite_structural_hardening.rs`; here we cover +//! the genuine value-bearing columns, with `platform_addresses.balance` +//! riding the production `load()` path end-to-end. + +mod common; + +use common::{ensure_wallet_meta, fresh_persister, wid}; +use platform_wallet::changeset::PlatformWalletPersistence; +use platform_wallet_storage::WalletStorageError; +use rusqlite::params; + +/// `platform_addresses.balance`: a negative on-disk value must abort +/// the production `load()` with `IntegerOverflow{field: +/// "platform_addresses.balance"}`, NOT load a sign-extended u64 +/// balance. This rides `load() -> platform_addrs::load_all -> +/// decode_address_row -> i64_to_u64`. +#[test] +fn platform_address_balance_negative_on_disk_errors_on_load() { + let (persister, _tmp, _path) = fresh_persister(); + let w = wid(0xE1); + ensure_wallet_meta(&persister, &w); + { + let conn = persister.lock_conn_for_test(); + conn.execute( + "INSERT INTO platform_addresses \ + (wallet_id, account_index, address_index, address, balance, nonce) \ + VALUES (?1, 0, 0, X'0000000000000000000000000000000000000000', ?2, 0)", + params![w.as_slice(), -1i64], + ) + .unwrap(); + } + + let err = PlatformWalletPersistence::load(&persister) + .expect_err("a negative on-disk balance must abort load(), not sign-extend"); + let backend = format!("{err:?}"); + assert!( + backend.contains("IntegerOverflow") && backend.contains("platform_addresses.balance"), + "expected IntegerOverflow for platform_addresses.balance, got {backend}" + ); +} + +/// `core_utxos.value`: a negative on-disk value must abort the unspent +/// read with `IntegerOverflow{field: "core_utxos.value"}` rather than +/// reporting a sign-extended u64 amount for live funds. +#[test] +fn core_utxo_value_negative_on_disk_errors_on_read() { + use dashcore::hashes::Hash; + use platform_wallet_storage::sqlite::schema::{blob, core_state}; + let (persister, _tmp, _path) = fresh_persister(); + let w = wid(0xE2); + ensure_wallet_meta(&persister, &w); + let outpoint = blob::encode_outpoint(&dashcore::OutPoint { + txid: dashcore::Txid::from_byte_array([0x22; 32]), + vout: 0, + }) + .unwrap(); + { + let conn = persister.lock_conn_for_test(); + // Declare the address so the row is treated as a real, unspent + // UTXO and the value cast is reached (account_index 0). + conn.execute( + "INSERT INTO core_utxos \ + (wallet_id, outpoint, value, script, height, account_index, spent, spent_in_txid) \ + VALUES (?1, ?2, ?3, X'00', NULL, 0, 0, NULL)", + params![w.as_slice(), &outpoint, -1i64], + ) + .unwrap(); + } + let conn = persister.lock_conn_for_test(); + let err = core_state::list_unspent_utxos(&conn, &w) + .expect_err("a negative on-disk utxo value must error, not sign-extend"); + let s = format!("{err:?}"); + assert!( + matches!(err, WalletStorageError::IntegerOverflow { field, .. } if field == "core_utxos.value"), + "expected IntegerOverflow for core_utxos.value, got {s}" + ); +} diff --git a/packages/rs-platform-wallet-storage/tests/sqlite_object_metadata.rs b/packages/rs-platform-wallet-storage/tests/sqlite_object_metadata.rs index ad060bdadc5..1efd76e224e 100644 --- a/packages/rs-platform-wallet-storage/tests/sqlite_object_metadata.rs +++ b/packages/rs-platform-wallet-storage/tests/sqlite_object_metadata.rs @@ -24,11 +24,8 @@ fn id32(byte: u8) -> [u8; 32] { [byte; 32] } -// --------------------------------------------------------------------- -// 001..006 — per-scope roundtrip (get→None, put, get, overwrite, -// delete, get→None). Parent rows seeded first. -// --------------------------------------------------------------------- - +/// Full per-scope roundtrip: get→None, put, get, overwrite, delete, +/// get→None. Parent rows must be seeded first by the caller. fn roundtrip(p: &impl KvStore, scope: &ObjectId) { assert_eq!(p.get(scope, "k").unwrap(), None); p.put(scope, "k", b"v1").unwrap(); @@ -111,12 +108,6 @@ fn tc_md_006_roundtrip_platform_address() { ); } -// --------------------------------------------------------------------- -// 007..011 — parentless `put` SUCCEEDS for the five typed scopes -// (no parent row seeded) and the value reads back. — Global -// put on empty DB → Ok. -// --------------------------------------------------------------------- - /// Put `(scope, "k") = b"v"` with NO parent row present, then read it /// back. Asserts the soft-cascade model: writes don't require a parent. fn assert_parentless_put_roundtrips(p: &impl KvStore, scope: &ObjectId) { @@ -183,11 +174,8 @@ fn tc_md_012_put_global_on_empty_db_is_ok() { ); } -// --------------------------------------------------------------------- -// delete of a never-existing key is idempotent (returns Ok), -// for the Global scope and a typed scope. -// --------------------------------------------------------------------- - +/// delete of a never-existing key is idempotent for both the Global +/// scope and a typed scope. #[test] fn delete_missing_key_is_idempotent() { let (p, _tmp, _path) = fresh_persister(); @@ -197,11 +185,6 @@ fn delete_missing_key_is_idempotent() { p.delete(&ObjectId::Wallet(w), "never-existed").unwrap(); } -// --------------------------------------------------------------------- -// list_keys returns keys in ascending order regardless of -// insertion order. -// --------------------------------------------------------------------- - #[test] fn list_keys_is_ascending_regardless_of_insert_order() { let (p, _tmp, _path) = fresh_persister(); @@ -214,10 +197,8 @@ fn list_keys_is_ascending_regardless_of_insert_order() { ); } -// --------------------------------------------------------------------- -// 013..016 — soft cascade via AFTER DELETE trigger: seed+put, -// DELETE FROM the direct parent table, assert the meta row is gone. -// --------------------------------------------------------------------- +// Soft cascade via AFTER DELETE trigger: seed+put, DELETE FROM the +// direct parent table, assert the meta row is gone. #[test] fn tc_md_013_cascade_identity() { @@ -313,9 +294,7 @@ fn tc_md_016_cascade_platform_address() { assert_eq!(p.get(&scope, "k").unwrap(), None); } -// --------------------------------------------------------------------- -// 017 / 017b — wallet cascade (direct + transitive via identities). -// --------------------------------------------------------------------- +// Wallet cascade: direct, plus transitive via identities. #[test] fn tc_md_017_cascade_wallet() { @@ -328,10 +307,10 @@ fn tc_md_017_cascade_wallet() { { let conn = p.lock_conn_for_test(); conn.execute( - "DELETE FROM wallet_metadata WHERE wallet_id = ?1", + "DELETE FROM wallets WHERE wallet_id = ?1", params![w.as_slice()], ) - .expect("delete wallet_metadata"); + .expect("delete wallets"); } assert_eq!(p.get(&scope, "k").unwrap(), None); } @@ -349,20 +328,18 @@ fn tc_md_017b_cascade_identity_via_wallet() { { let conn = p.lock_conn_for_test(); conn.execute( - "DELETE FROM wallet_metadata WHERE wallet_id = ?1", + "DELETE FROM wallets WHERE wallet_id = ?1", params![w.as_slice()], ) - .expect("delete wallet_metadata"); + .expect("delete wallets"); } - // wallet_metadata delete → identities FK cascade → meta_identity + // wallets delete → identities FK cascade → meta_identity // trigger (SQLite fires it for FK-cascade-deleted rows natively). assert_eq!(p.get(&scope, "k").unwrap(), None); } -// --------------------------------------------------------------------- -// 018 / 019 — delete_wallet purges every meta_* for the wallet; -// Global + other wallet's meta_wallet survive; report wiring. -// --------------------------------------------------------------------- +// delete_wallet purges every meta_* for the wallet; Global and another +// wallet's meta_wallet survive. #[test] fn tc_md_018_delete_wallet_purges_all_meta_for_wallet() { @@ -513,12 +490,9 @@ fn tc_md_019_delete_wallet_report_counts_meta_tables() { assert_eq!(global, 1, "meta_global must survive the per-wallet delete"); } -// --------------------------------------------------------------------- -// DET scenario — write metadata before the parent exists, read it back, -// then create the parent (metadata still present), then delete the -// parent (the AFTER DELETE trigger removes the metadata). -// --------------------------------------------------------------------- - +/// Write metadata before the parent exists, read it back, create the +/// parent (metadata persists), then delete it (the AFTER DELETE trigger +/// removes the metadata). #[test] fn det_write_before_parent_then_create_then_delete() { use rusqlite::params; @@ -552,13 +526,9 @@ fn det_write_before_parent_then_create_then_delete() { assert_eq!(p.get(&scope, "alias").unwrap(), None); } -// --------------------------------------------------------------------- -// The meta_* triggers coexist with the pre-existing -// `setnull_core_utxos_on_tx_delete` trigger during delete_wallet: a -// wallet with core_transactions + core_utxos (a UTXO spent_in that tx) -// deletes cleanly and leaves nothing behind. -// --------------------------------------------------------------------- - +/// The meta_* triggers coexist with the `setnull_core_utxos_on_tx_delete` +/// trigger during delete_wallet: a wallet with core_transactions + +/// core_utxos (a UTXO spent_in that tx) deletes cleanly, leaving nothing. #[test] fn delete_wallet_with_core_tx_and_utxo_stays_consistent() { use rusqlite::params; @@ -612,14 +582,10 @@ fn delete_wallet_with_core_tx_and_utxo_stays_consistent() { assert_eq!(p.get(&ObjectId::Wallet(w), "k").unwrap(), None); } -// --------------------------------------------------------------------- -// Trigger-on-FK-cascade proof at SQLite defaults. SQLite fires an AFTER -// DELETE trigger for a row removed by an FK ON DELETE CASCADE natively — -// `recursive_triggers` (off by default) does not gate this. On a RAW -// connection at defaults, the one-hop chain wallet_metadata delete → -// identities FK cascade → meta_identity trigger cleans up. -// --------------------------------------------------------------------- - +/// SQLite fires an AFTER DELETE trigger for a row removed by FK ON DELETE +/// CASCADE natively — `recursive_triggers` (off by default) does not gate +/// this. On a raw connection at defaults, the one-hop chain wallets +/// delete → identities FK cascade → meta_identity trigger cleans up. #[test] fn meta_identity_cleanup_fires_on_wallet_cascade() { use rusqlite::{params, Connection}; @@ -642,13 +608,13 @@ fn meta_identity_cleanup_fires_on_wallet_cascade() { let w = [0x90u8; 32]; let idy = [0x91u8; 32]; conn.execute( - "INSERT INTO wallet_metadata (wallet_id, network, birth_height) \ + "INSERT INTO wallets (wallet_id, network, birth_height) \ VALUES (?1, 'testnet', 0)", params![&w[..]], ) .unwrap(); conn.execute( - "INSERT INTO identities (identity_id, wallet_id, wallet_index, entry_blob, tombstoned) \ + "INSERT INTO identities (identity_id, wallet_id, identity_index, entry_blob, tombstoned) \ VALUES (?1, ?2, NULL, X'00', 0)", params![&idy[..], &w[..]], ) @@ -659,12 +625,9 @@ fn meta_identity_cleanup_fires_on_wallet_cascade() { ) .unwrap(); - // wallet_metadata delete → identities FK cascade → meta_identity trigger. - conn.execute( - "DELETE FROM wallet_metadata WHERE wallet_id = ?1", - params![&w[..]], - ) - .unwrap(); + // wallets delete → identities FK cascade → meta_identity trigger. + conn.execute("DELETE FROM wallets WHERE wallet_id = ?1", params![&w[..]]) + .unwrap(); let identity_rows: i64 = conn .query_row( @@ -688,13 +651,9 @@ fn meta_identity_cleanup_fires_on_wallet_cascade() { ); } -// --------------------------------------------------------------------- -// Two-hop trigger-on-FK-cascade proof at SQLite defaults. The meta_token -// chain spans two FK cascades: wallet_metadata delete → identities (FK -// cascade) → token_balances (FK cascade) → meta_token trigger. This -// fires natively without recursive_triggers. -// --------------------------------------------------------------------- - +/// The meta_token chain spans two FK cascades: wallets delete → +/// identities → token_balances → meta_token trigger, firing natively +/// without recursive_triggers. #[test] fn meta_token_cleanup_fires_on_wallet_cascade_two_hops() { use rusqlite::{params, Connection}; @@ -718,13 +677,13 @@ fn meta_token_cleanup_fires_on_wallet_cascade_two_hops() { let idy = [0xA1u8; 32]; let token = [0xA2u8; 32]; conn.execute( - "INSERT INTO wallet_metadata (wallet_id, network, birth_height) \ + "INSERT INTO wallets (wallet_id, network, birth_height) \ VALUES (?1, 'testnet', 0)", params![&w[..]], ) .unwrap(); conn.execute( - "INSERT INTO identities (identity_id, wallet_id, wallet_index, entry_blob, tombstoned) \ + "INSERT INTO identities (identity_id, wallet_id, identity_index, entry_blob, tombstoned) \ VALUES (?1, ?2, NULL, X'00', 0)", params![&idy[..], &w[..]], ) @@ -743,11 +702,8 @@ fn meta_token_cleanup_fires_on_wallet_cascade_two_hops() { .unwrap(); // Two-hop cascade: wallet → identities → token_balances → trigger. - conn.execute( - "DELETE FROM wallet_metadata WHERE wallet_id = ?1", - params![&w[..]], - ) - .unwrap(); + conn.execute("DELETE FROM wallets WHERE wallet_id = ?1", params![&w[..]]) + .unwrap(); let token_rows: i64 = conn .query_row( @@ -774,10 +730,6 @@ fn meta_token_cleanup_fires_on_wallet_cascade_two_hops() { ); } -// --------------------------------------------------------------------- -// 020..022 — key bounds. -// --------------------------------------------------------------------- - #[test] fn tc_md_020_empty_key_rejected() { let (p, _tmp, _path) = fresh_persister(); @@ -820,11 +772,8 @@ fn tc_md_022_max_length_key_accepted() { ); } -// --------------------------------------------------------------------- -// oversized value planted directly is rejected on `get` -// before materialisation, across every meta_* table. -// --------------------------------------------------------------------- - +/// An oversized value planted directly is rejected on `get` before +/// materialisation, across every meta_* table. #[test] fn tc_md_023_oversized_value_rejected_before_materialising() { use rusqlite::params; @@ -942,10 +891,8 @@ fn tc_md_023_oversized_value_rejected_before_materialising() { } } -// --------------------------------------------------------------------- -// list_keys prefix with literal `%`/`_`/`\` (not wildcards). -// --------------------------------------------------------------------- - +/// list_keys treats `%`/`_`/`\` in the prefix as literals, not LIKE +/// wildcards. #[test] fn tc_md_024_list_keys_escapes_like_metacharacters() { let (p, _tmp, _path) = fresh_persister(); @@ -977,11 +924,8 @@ fn tc_md_024_list_keys_escapes_like_metacharacters() { ); } -// --------------------------------------------------------------------- -// scope isolation: same key string across Wallet(A)/Wallet(B) -// and Global/Wallet(A) stays independent. -// --------------------------------------------------------------------- - +/// The same key string across Wallet(A)/Wallet(B) and Global/Wallet(A) +/// stays scope-independent. #[test] fn tc_md_025_scope_isolation() { let (p, _tmp, _path) = fresh_persister(); @@ -1072,14 +1016,12 @@ fn delete_wallet_leaves_no_surviving_rows() { let txid = vec![0x01u8; 32]; let outpoint = vec![0x02u8; 36]; let stmts: &[(&str, &[&dyn rusqlite::ToSql])] = &[ - ("INSERT INTO account_registrations (wallet_id, account_type, account_index, account_xpub_bytes) VALUES (?1, 'standard', 0, X'00')", &[&a.as_slice()]), - ("INSERT INTO account_address_pools (wallet_id, account_type, account_index, pool_type, snapshot_blob) VALUES (?1, 'standard', 0, 'external', X'00')", &[&a.as_slice()]), + ("INSERT INTO account_registrations (wallet_id, account_type, account_index, account_xpub_bytes) VALUES (?1, 'standard_bip44', 0, X'00')", &[&a.as_slice()]), ("INSERT INTO core_transactions (wallet_id, txid, finalized, record_blob) VALUES (?1, ?2, 0, X'00')", &[&a.as_slice(), &txid]), ("INSERT INTO core_utxos (wallet_id, outpoint, value, script, account_index, spent) VALUES (?1, ?2, 0, X'00', 0, 0)", &[&a.as_slice(), &outpoint]), ("INSERT INTO core_instant_locks (wallet_id, txid, islock_blob) VALUES (?1, ?2, X'00')", &[&a.as_slice(), &txid]), - ("INSERT INTO core_derived_addresses (wallet_id, account_type, account_index, address, derivation_path, used) VALUES (?1, 'standard', 0, 'addr', '', 0)", &[&a.as_slice()]), ("INSERT INTO core_sync_state (wallet_id, last_processed_height, synced_height) VALUES (?1, 1, 1)", &[&a.as_slice()]), - ("INSERT INTO identity_keys (identity_id, key_id, public_key_blob, public_key_hash) VALUES (?1, 0, X'00', X'00')", &[&idy.as_slice()]), + ("INSERT INTO identity_keys (wallet_id, identity_id, key_id, public_key_blob, public_key_hash, derivation_blob) VALUES (?1, ?2, 0, X'00', X'00', NULL)", &[&a.as_slice(), &idy.as_slice()]), ("INSERT INTO platform_address_sync (wallet_id, sync_height, sync_timestamp, last_known_recent_block) VALUES (?1, 0, 0, 0)", &[&a.as_slice()]), ("INSERT INTO asset_locks (wallet_id, outpoint, status, account_index, identity_index, amount_duffs, lifecycle_blob) VALUES (?1, ?2, 'built', 0, 0, 0, X'00')", &[&a.as_slice(), &outpoint]), ("INSERT INTO dashpay_profiles (identity_id, profile_blob) VALUES (?1, X'00')", &[&idy.as_slice()]), @@ -1242,13 +1184,11 @@ fn delete_wallet_leaves_no_surviving_rows() { // survive. Scoping each count by `wallet_id` catches an over-broad // cascade that an unscoped whole-table COUNT(*) would miss. let wallet_scoped = [ - "wallet_metadata", + "wallets", "account_registrations", - "account_address_pools", "core_transactions", "core_utxos", "core_instant_locks", - "core_derived_addresses", "core_sync_state", "identities", "contacts", @@ -1295,7 +1235,7 @@ fn delete_wallet_leaves_no_surviving_rows() { // rows. (b is seeded in a representative subset of the scoped tables, // not all of them, so we check exactly the tables it was given.) let b_wallet_scoped = [ - "wallet_metadata", + "wallets", "core_sync_state", "identities", "contacts", diff --git a/packages/rs-platform-wallet-storage/tests/sqlite_open_integrity_check.rs b/packages/rs-platform-wallet-storage/tests/sqlite_open_integrity_check.rs index 5b16833a06c..a9a3c93e684 100644 --- a/packages/rs-platform-wallet-storage/tests/sqlite_open_integrity_check.rs +++ b/packages/rs-platform-wallet-storage/tests/sqlite_open_integrity_check.rs @@ -53,7 +53,7 @@ fn atom_013_open_rejects_corrupt_db() { // Push the DB past a few pages with a chunky meta row. for i in 0..20u32 { conn.execute( - "INSERT INTO wallet_metadata (wallet_id, network, birth_height) VALUES (?1, 'testnet', ?2)", + "INSERT INTO wallets (wallet_id, network, birth_height) VALUES (?1, 'testnet', ?2)", params![vec![i as u8; 32].as_slice(), i as i64], ) .unwrap(); @@ -120,7 +120,7 @@ fn tc_code_016_a_integrity_report_collects_all_rows() { let conn = persister.lock_conn_for_test(); for i in 0..40u32 { conn.execute( - "INSERT INTO wallet_metadata (wallet_id, network, birth_height) VALUES (?1, 'testnet', ?2)", + "INSERT INTO wallets (wallet_id, network, birth_height) VALUES (?1, 'testnet', ?2)", params![vec![i as u8; 32].as_slice(), i as i64], ) .unwrap(); diff --git a/packages/rs-platform-wallet-storage/tests/sqlite_persist_roundtrip.rs b/packages/rs-platform-wallet-storage/tests/sqlite_persist_roundtrip.rs index 3004b16fc88..b1092736f7f 100644 --- a/packages/rs-platform-wallet-storage/tests/sqlite_persist_roundtrip.rs +++ b/packages/rs-platform-wallet-storage/tests/sqlite_persist_roundtrip.rs @@ -1,17 +1,10 @@ #![allow(clippy::field_reassign_with_default)] -//! Per-sub-changeset round-trip tests. -//! -//! Now that `platform-wallet`'s `serde` feature is active, every -//! changeset blob is a single bincode-serde payload — these tests -//! store a non-trivial entry, reopen the persister, decode the blob, -//! and assert structural equality (where the type allows) or -//! field-level equality (where it doesn't, e.g. `TransactionRecord` -//! which is `Debug + Clone` only upstream). -//! -//! TC-001 (CoreChangeSet records) is exercised through the trait -//! method in `sqlite_buffer_semantics.rs::tc001_get_core_tx_record_roundtrip`. -//! TC-015 (multi-wallet coexistence) lives there too. +//! Per-sub-changeset round-trip tests: store a non-trivial entry, reopen +//! the persister, decode the bincode-serde blob, and assert structural +//! equality (or field-level equality where the type isn't `PartialEq`). +//! CoreChangeSet records and multi-wallet coexistence are covered in +//! `sqlite_buffer_semantics.rs`. mod common; @@ -74,7 +67,7 @@ fn tc013_wallet_metadata_roundtrip() { let conn = persister.lock_conn_for_test(); let (network, birth_height): (String, i64) = conn .query_row( - "SELECT network, birth_height FROM wallet_metadata WHERE wallet_id = ?1", + "SELECT network, birth_height FROM wallets WHERE wallet_id = ?1", rusqlite::params![w.as_slice()], |row| Ok((row.get(0)?, row.get(1)?)), ) @@ -248,8 +241,8 @@ fn tc007_identity_key_entry_roundtrip() { let p2 = SqlitePersister::open(SqlitePersisterConfig::new(&path)).unwrap(); let conn = p2.lock_conn_for_test(); - // identity_keys is keyed by (identity_id, key_id); the wallet_id - // column is not part of the schema. + // Single wallet under test, so (identity_id, key_id) selects the + // one row; the full PK is (wallet_id, identity_id, key_id). let blob_bytes: Vec = conn .query_row( "SELECT public_key_blob FROM identity_keys WHERE identity_id = ?1 AND key_id = ?2", @@ -260,12 +253,9 @@ fn tc007_identity_key_entry_roundtrip() { let decoded = platform_wallet_storage::sqlite::schema::identity_keys::decode_entry(&blob_bytes).unwrap(); assert_eq!(decoded, entry); - // The load-bearing NFR-10 check is `tests/secrets_scan.rs`, - // which greps every file under `src/sqlite/schema/` and - // `migrations/` for forbidden secret-material substrings — - // bincode wire bytes carry no field names, so any runtime - // substring scan against the blob would be a false-confidence - // smoke test. + // No runtime substring scan on the blob: bincode wire bytes carry no + // field names, so it would be false confidence. The real secret-leak + // guard is the source grep in `tests/secrets_scan.rs`. drop(tmp); } diff --git a/packages/rs-platform-wallet-storage/tests/sqlite_qa_identity_tombstone.rs b/packages/rs-platform-wallet-storage/tests/sqlite_qa_identity_tombstone.rs new file mode 100644 index 00000000000..ea0fadd9622 --- /dev/null +++ b/packages/rs-platform-wallet-storage/tests/sqlite_qa_identity_tombstone.rs @@ -0,0 +1,290 @@ +#![allow(clippy::field_reassign_with_default)] + +//! Write-path coverage for the `IdentityChangeSet.removed` tombstone +//! branch. The tombstone runs a wallet-scoped, NULL-safe +//! `UPDATE identities SET tombstoned = 1 WHERE identity_id = ?1 AND +//! wallet_id IS ?2`, mirroring the upsert's per-entry wallet cross-check. +//! These tests pin that a tombstoned identity is excluded from the +//! per-wallet `load_state` and that a foreign wallet's `removed` set +//! cannot tombstone this wallet's identity. + +mod common; + +use std::collections::{BTreeMap, BTreeSet}; + +use common::{ensure_wallet_meta, fresh_persister, wid}; +use dpp::identity::accessors::IdentityGettersV0; +use dpp::prelude::Identifier; +use platform_wallet::changeset::{ + IdentityChangeSet, IdentityEntry, PlatformWalletChangeSet, PlatformWalletPersistence, +}; +use platform_wallet::wallet::identity::IdentityStatus; +use platform_wallet_storage::sqlite::schema::identities; + +fn reopen(path: &std::path::Path) -> platform_wallet_storage::SqlitePersister { + platform_wallet_storage::SqlitePersister::open( + platform_wallet_storage::SqlitePersisterConfig::new(path), + ) + .expect("reopen persister") +} + +/// Build an `IdentityEntry` parented to a specific wallet (so the upsert +/// cross-check passes and the typed `wallet_id` column is populated). +fn entry_for(id: u8, wallet_id: [u8; 32]) -> IdentityEntry { + IdentityEntry { + id: Identifier::from([id; 32]), + balance: u64::from(id), + revision: 1, + identity_index: Some(0), + last_updated_balance_block_time: None, + last_synced_keys_block_time: None, + dpns_names: Vec::new(), + contested_dpns_names: Vec::new(), + status: IdentityStatus::Active, + wallet_id: Some(wallet_id), + dashpay_profile: None, + dashpay_payments: Default::default(), + } +} + +/// An identity routed through `IdentityChangeSet.removed` is tombstoned +/// and disappears from the per-wallet `load_state` while a sibling, +/// non-removed identity survives. +#[test] +fn qa_tomb1_removed_identity_excluded_from_load() { + let (persister, _tmp, path) = fresh_persister(); + let w = wid(0xD0); + ensure_wallet_meta(&persister, &w); + + let keep = entry_for(0x01, w); + let drop_me = entry_for(0x02, w); + let mut idents: BTreeMap = BTreeMap::new(); + idents.insert(keep.id, keep.clone()); + idents.insert(drop_me.id, drop_me.clone()); + + // First flush: insert both. + persister + .store( + w, + PlatformWalletChangeSet { + identities: Some(IdentityChangeSet { + identities: idents, + removed: Default::default(), + }), + ..Default::default() + }, + ) + .unwrap(); + + // Second flush: tombstone drop_me. + let mut removed: BTreeSet = BTreeSet::new(); + removed.insert(drop_me.id); + persister + .store( + w, + PlatformWalletChangeSet { + identities: Some(IdentityChangeSet { + identities: Default::default(), + removed, + }), + ..Default::default() + }, + ) + .unwrap(); + drop(persister); + + let p2 = reopen(&path); + let conn = p2.lock_conn_for_test(); + + // The tombstoned row is still physically present (logical delete). + let total: i64 = conn + .query_row( + "SELECT COUNT(*) FROM identities WHERE wallet_id = ?1", + rusqlite::params![w.as_slice()], + |r| r.get(0), + ) + .unwrap(); + assert_eq!(total, 2, "tombstone is a logical delete; row stays on disk"); + + let tombstoned: i64 = conn + .query_row( + "SELECT COUNT(*) FROM identities WHERE wallet_id = ?1 AND tombstoned = 1", + rusqlite::params![w.as_slice()], + |r| r.get(0), + ) + .unwrap(); + assert_eq!(tombstoned, 1, "exactly one identity tombstoned"); + + // load_state must skip the tombstoned identity and keep the other. + let state = identities::load_state(&conn, &w).unwrap(); + drop(conn); + let wallet_idents = state.wallet_identities.get(&w).expect("wallet bucket"); + assert_eq!( + wallet_idents.len(), + 1, + "load_state must surface only the non-tombstoned identity" + ); + let surviving_ids: Vec = wallet_idents.values().map(|m| m.identity.id()).collect(); + assert!( + surviving_ids.contains(&keep.id), + "kept identity must survive load" + ); + assert!( + !surviving_ids.contains(&drop_me.id), + "tombstoned identity must NOT appear in load" + ); +} + +/// Re-upserting a tombstoned identity clears the tombstone (the upsert +/// sets `tombstoned = 0`) — the resurrection path the writer relies on. +#[test] +fn qa_tomb2_reupsert_clears_tombstone() { + let (persister, _tmp, path) = fresh_persister(); + let w = wid(0xD1); + ensure_wallet_meta(&persister, &w); + + let e = entry_for(0x05, w); + let mut idents: BTreeMap = BTreeMap::new(); + idents.insert(e.id, e.clone()); + persister + .store( + w, + PlatformWalletChangeSet { + identities: Some(IdentityChangeSet { + identities: idents.clone(), + removed: Default::default(), + }), + ..Default::default() + }, + ) + .unwrap(); + + let mut removed: BTreeSet = BTreeSet::new(); + removed.insert(e.id); + persister + .store( + w, + PlatformWalletChangeSet { + identities: Some(IdentityChangeSet { + identities: Default::default(), + removed, + }), + ..Default::default() + }, + ) + .unwrap(); + + // Re-upsert resurrects. + persister + .store( + w, + PlatformWalletChangeSet { + identities: Some(IdentityChangeSet { + identities: idents, + removed: Default::default(), + }), + ..Default::default() + }, + ) + .unwrap(); + drop(persister); + + let p2 = reopen(&path); + let conn = p2.lock_conn_for_test(); + let tombstoned: i64 = conn + .query_row( + "SELECT tombstoned FROM identities WHERE identity_id = ?1", + rusqlite::params![e.id.as_slice()], + |r| r.get(0), + ) + .unwrap(); + let state = identities::load_state(&conn, &w).unwrap(); + drop(conn); + assert_eq!(tombstoned, 0, "re-upsert must clear the tombstone flag"); + assert_eq!( + state + .wallet_identities + .get(&w) + .map(|m| m.len()) + .unwrap_or(0), + 1, + "resurrected identity must reappear in load" + ); +} + +/// The tombstone UPDATE is scoped by `wallet_id`: a `removed` entry +/// naming an identity parented to a different wallet is a no-op against +/// that wallet's row (NULL-safe `wallet_id IS ?2` predicate). An +/// identity_id is globally unique to one wallet, so this is +/// defense-in-depth enforcing the isolation the data model assumes. +#[test] +fn qa_tomb3_tombstone_update_is_wallet_scoped() { + let (persister, _tmp, path) = fresh_persister(); + let wa = wid(0xE0); + let wb = wid(0xE1); + ensure_wallet_meta(&persister, &wa); + ensure_wallet_meta(&persister, &wb); + + // Identity 0x07 is parented to wallet B. + let b_ident = entry_for(0x07, wb); + let mut b_map: BTreeMap = BTreeMap::new(); + b_map.insert(b_ident.id, b_ident.clone()); + persister + .store( + wb, + PlatformWalletChangeSet { + identities: Some(IdentityChangeSet { + identities: b_map, + removed: Default::default(), + }), + ..Default::default() + }, + ) + .unwrap(); + + // Wallet A flushes a `removed` set naming wallet B's identity id. + let mut removed: BTreeSet = BTreeSet::new(); + removed.insert(b_ident.id); + persister + .store( + wa, + PlatformWalletChangeSet { + identities: Some(IdentityChangeSet { + identities: Default::default(), + removed, + }), + ..Default::default() + }, + ) + .unwrap(); + drop(persister); + + let p2 = reopen(&path); + let conn = p2.lock_conn_for_test(); + let tombstoned: i64 = conn + .query_row( + "SELECT tombstoned FROM identities WHERE identity_id = ?1", + rusqlite::params![b_ident.id.as_slice()], + |r| r.get(0), + ) + .unwrap(); + let b_state = identities::load_state(&conn, &wb).unwrap(); + drop(conn); + + // Cross-wallet isolation: wallet A's `removed` set names wallet B's + // identity, but the wallet-scoped tombstone UPDATE leaves B's row + // untouched, so B's load still surfaces the identity. + assert_eq!( + tombstoned, 0, + "wallet-scoped tombstone: A's removed set must NOT affect B's identity" + ); + assert_eq!( + b_state + .wallet_identities + .get(&wb) + .map(|m| m.len()) + .unwrap_or(0), + 1, + "B's identity must survive A's unrelated tombstone" + ); +} diff --git a/packages/rs-platform-wallet-storage/tests/sqlite_second_open_guard.rs b/packages/rs-platform-wallet-storage/tests/sqlite_second_open_guard.rs new file mode 100644 index 00000000000..f69129df335 --- /dev/null +++ b/packages/rs-platform-wallet-storage/tests/sqlite_second_open_guard.rs @@ -0,0 +1,64 @@ +//! Second-open guard: a process-wide registry refuses a second +//! `SqlitePersister::open()` on the same canonical path while the first +//! is alive, so two in-process handles can't diverge (each owns an +//! independent `Mutex` + write buffer). Dropping the first +//! releases the claim so a later open succeeds. + +mod common; + +use platform_wallet_storage::{SqlitePersister, SqlitePersisterConfig, WalletStorageError}; + +/// `SqlitePersister` is not `Debug`, so `Result::expect_err` can't be +/// used on an `open()` result — extract the error by matching instead. +fn open_err(cfg: SqlitePersisterConfig) -> WalletStorageError { + match SqlitePersister::open(cfg) { + Ok(_) => panic!("expected open() to fail"), + Err(e) => e, + } +} + +#[test] +fn second_open_on_same_path_is_refused() { + let tmp = tempfile::tempdir().unwrap(); + let path = tmp.path().join("w.db"); + + let first = SqlitePersister::open(SqlitePersisterConfig::new(&path)).expect("first open"); + + let err = open_err(SqlitePersisterConfig::new(&path)); + assert!( + matches!(err, WalletStorageError::AlreadyOpen { .. }), + "expected AlreadyOpen, got {err:?}" + ); + + // Releasing the first handle frees the claim. + drop(first); + let _reopened = SqlitePersister::open(SqlitePersisterConfig::new(&path)) + .expect("open after the first handle drops must succeed"); +} + +#[test] +fn distinct_paths_open_concurrently() { + let tmp = tempfile::tempdir().unwrap(); + let a = tmp.path().join("a.db"); + let b = tmp.path().join("b.db"); + + let _pa = SqlitePersister::open(SqlitePersisterConfig::new(&a)).expect("open a"); + // A different path is unaffected by the registry. + let _pb = SqlitePersister::open(SqlitePersisterConfig::new(&b)).expect("open b"); +} + +#[test] +fn second_open_via_noncanonical_path_is_refused() { + // A `.`-segmented path canonicalizes to the same key as the plain + // path, so the registry still catches the second open. + let tmp = tempfile::tempdir().unwrap(); + let path = tmp.path().join("w.db"); + let _first = SqlitePersister::open(SqlitePersisterConfig::new(&path)).expect("first open"); + + let dotted = tmp.path().join(".").join("w.db"); + let err = open_err(SqlitePersisterConfig::new(&dotted)); + assert!( + matches!(err, WalletStorageError::AlreadyOpen { .. }), + "expected AlreadyOpen for the equivalent path, got {err:?}" + ); +} diff --git a/packages/rs-platform-wallet-storage/tests/sqlite_structural_hardening.rs b/packages/rs-platform-wallet-storage/tests/sqlite_structural_hardening.rs index 1018974bd56..2f1babb8f4a 100644 --- a/packages/rs-platform-wallet-storage/tests/sqlite_structural_hardening.rs +++ b/packages/rs-platform-wallet-storage/tests/sqlite_structural_hardening.rs @@ -20,14 +20,14 @@ use platform_wallet::wallet::platform_wallet::WalletId; use platform_wallet_storage::WalletStorageError; use rusqlite::params; -/// a child insert without a `wallet_metadata` parent is +/// a child insert without a `wallets` parent is /// rejected by the native FK (not a trigger). #[test] fn native_fk_rejects_orphan_child() { let (persister, _tmp, _path) = fresh_persister(); let conn = persister.lock_conn_for_test(); let res = conn.execute( - "INSERT INTO identities (wallet_id, wallet_index, identity_id, entry_blob, tombstoned) \ + "INSERT INTO identities (wallet_id, identity_index, identity_id, entry_blob, tombstoned) \ VALUES (?1, NULL, ?2, X'00', 0)", params![[7u8; 32].as_slice(), [9u8; 32].as_slice()], ); @@ -38,9 +38,11 @@ fn native_fk_rejects_orphan_child() { ); } -/// an `identity_keys` row whose `identities` parent does not -/// exist is rejected by the FK to `identities(identity_id)` (cascade -/// chain `wallet_metadata → identities → identity_keys`). +/// An `identity_keys` row whose `identities` parent does not exist is +/// rejected by the FK to `identities(identity_id)`. The `wallet_id` +/// parent exists (via `ensure_wallet_meta`), so the failure is +/// specifically the missing identity, not the wallet (cascade chain +/// `wallets → identities → identity_keys`). #[test] fn native_fk_rejects_identity_keys_without_identity() { let (persister, _tmp, _path) = fresh_persister(); @@ -49,9 +51,9 @@ fn native_fk_rejects_identity_keys_without_identity() { let conn = persister.lock_conn_for_test(); let res = conn.execute( "INSERT INTO identity_keys \ - (identity_id, key_id, public_key_blob, public_key_hash) \ - VALUES (?1, 0, X'00', X'00')", - params![[3u8; 32].as_slice()], + (wallet_id, identity_id, key_id, public_key_blob, public_key_hash, derivation_blob) \ + VALUES (?1, ?2, 0, X'00', X'00', NULL)", + params![w.as_slice(), [3u8; 32].as_slice()], ); let err = res.unwrap_err().to_string(); assert!( @@ -114,38 +116,26 @@ fn make_utxo(addr: &Address, vout: u32, value: u64) -> Utxo { Utxo::new(outpoint, txout, addr.clone(), 10, false) } -/// UTXOs resolve their real `account_index` from the derived-address -/// map written earlier in the same transaction, instead of a hardcoded -/// 0. +/// Every `core_utxos` row is written with the hardcoded default +/// `account_index = 0` (the product uses only the default account; a +/// non-default account is rejected upstream by the `core_bridge` guard), +/// so the per-account grouping reader buckets every unspent UTXO — at any +/// address — under account 0. #[test] -fn multi_account_utxos_bucket_to_real_account() { +fn utxos_bucket_under_default_account_index_zero() { use platform_wallet_storage::sqlite::schema::core_state; let (persister, _tmp, _path) = fresh_persister(); let w: WalletId = wid(0xC7); ensure_wallet_meta(&persister, &w); - let addr_acct5 = p2pkh(0x05); - let addr_acct9 = p2pkh(0x09); + let addr_a = p2pkh(0x05); + let addr_b = p2pkh(0x09); { let mut conn = persister.lock_conn_for_test(); - // Pre-seed the derived-address map with two distinct accounts. - for (acct, addr) in [(5u32, &addr_acct5), (9u32, &addr_acct9)] { - conn.execute( - "INSERT INTO core_derived_addresses \ - (wallet_id, account_type, account_index, address, derivation_path, used) \ - VALUES (?1, 'standard', ?2, ?3, '0/0', 0)", - params![w.as_slice(), acct as i64, addr.to_string()], - ) - .unwrap(); - } - let cs = CoreChangeSet { - new_utxos: vec![ - make_utxo(&addr_acct5, 0, 1000), - make_utxo(&addr_acct9, 1, 2000), - ], + new_utxos: vec![make_utxo(&addr_a, 0, 1000), make_utxo(&addr_b, 1, 2000)], ..Default::default() }; let tx = conn.transaction().unwrap(); @@ -156,47 +146,20 @@ fn multi_account_utxos_bucket_to_real_account() { let conn = persister.lock_conn_for_test(); let by_account = core_state::list_unspent_utxos(&conn, &w).unwrap(); assert_eq!( - by_account.get(&5).map(|v| v.len()), - Some(1), - "account 5 should hold exactly one UTXO" + by_account.len(), + 1, + "all UTXOs bucket under a single (default) account" ); assert_eq!( - by_account.get(&9).map(|v| v.len()), - Some(1), - "account 9 should hold exactly one UTXO" - ); -} - -/// A NEW unspent UTXO whose address is absent from -/// `core_derived_addresses` cannot resolve an owning account, so the -/// write is refused with the typed `UtxoAddressNotDerived` instead of -/// silently mis-filing live funds under account 0. -#[test] -fn unspent_utxo_on_undeclared_address_is_rejected() { - use platform_wallet_storage::sqlite::schema::core_state; - - let (persister, _tmp, _path) = fresh_persister(); - let w: WalletId = wid(0xC8); - ensure_wallet_meta(&persister, &w); - - let addr_unknown = p2pkh(0xEE); - let mut conn = persister.lock_conn_for_test(); - let cs = CoreChangeSet { - new_utxos: vec![make_utxo(&addr_unknown, 0, 3000)], - ..Default::default() - }; - let tx = conn.transaction().unwrap(); - let err = core_state::apply(&tx, &w, &cs) - .expect_err("unspent UTXO on an undeclared address must error"); - assert!( - matches!(err, WalletStorageError::UtxoAddressNotDerived { .. }), - "expected UtxoAddressNotDerived, got {err:?}" + by_account.get(&0).map(|v| v.len()), + Some(2), + "both UTXOs are attributed to the default account (index 0)" ); } -/// A spent-only placeholder UTXO whose address was never derived still -/// persists with the account-0 fallback — spent rows are excluded from -/// the unspent set, so the placeholder index is inert. +/// A spent-only placeholder UTXO (no prior unspent row to mark) persists +/// with the hardcoded account 0 — spent rows are excluded from the unspent +/// set, so the index is inert. #[test] fn spent_only_utxo_on_undeclared_address_uses_zero_fallback() { use platform_wallet_storage::sqlite::schema::core_state; @@ -230,20 +193,20 @@ fn spent_only_utxo_on_undeclared_address_uses_zero_fallback() { /// an out-of-range `birth_height` errors rather than truncating. #[test] fn birth_height_overflow_errors_not_truncates() { - use platform_wallet_storage::sqlite::schema::wallet_meta; + use platform_wallet_storage::sqlite::schema::wallets; let (persister, _tmp, _path) = fresh_persister(); let w = wid(0xD1); { let conn = persister.lock_conn_for_test(); // 1<<40 overflows u32 but fits the i64 column. conn.execute( - "INSERT INTO wallet_metadata (wallet_id, network, birth_height) VALUES (?1, 'testnet', ?2)", + "INSERT INTO wallets (wallet_id, network, birth_height) VALUES (?1, 'testnet', ?2)", params![w.as_slice(), 1_099_511_627_776i64], ) .unwrap(); } let conn = persister.lock_conn_for_test(); - let err = wallet_meta::fetch(&conn, &w).expect_err("overflow must error"); + let err = wallets::fetch(&conn, &w).expect_err("overflow must error"); assert!( matches!(err, WalletStorageError::IntegerOverflow { .. }), "expected IntegerOverflow, got {err:?}" diff --git a/packages/rs-platform-wallet-storage/tests/sqlite_wallet_db_identity.rs b/packages/rs-platform-wallet-storage/tests/sqlite_wallet_db_identity.rs new file mode 100644 index 00000000000..ae3ef2ce92c --- /dev/null +++ b/packages/rs-platform-wallet-storage/tests/sqlite_wallet_db_identity.rs @@ -0,0 +1,168 @@ +#![allow(clippy::field_reassign_with_default)] + +//! Wallet-DB identity gates: the `application_id` header magic and the +//! `refinery_schema_history` well-formedness probe. +//! +//! - A foreign refinery-versioned SQLite DB (has `schema_history`, passes +//! `integrity_check`, version within range) but the WRONG +//! `application_id` must be rejected as `NotAWalletDb` — both on +//! `restore_from` (destination untouched) and on `open()`. +//! - A wallet DB whose `refinery_schema_history` carries a malformed +//! `applied_on` / `checksum` must surface a typed +//! `SchemaHistoryMalformed` rather than panicking inside refinery. + +mod common; + +use common::{fresh_persister, wid}; +use platform_wallet::changeset::{ + CoreChangeSet, PlatformWalletChangeSet, PlatformWalletPersistence, WalletMetadataEntry, +}; +use platform_wallet_storage::{SqlitePersister, SqlitePersisterConfig, WalletStorageError}; +use rusqlite::Connection; + +/// `SqlitePersister` is not `Debug`, so `Result::expect_err` can't be +/// used on an `open()` result — extract the error by matching instead. +fn open_err(cfg: SqlitePersisterConfig) -> WalletStorageError { + match SqlitePersister::open(cfg) { + Ok(_) => panic!("expected open() to fail"), + Err(e) => e, + } +} + +/// Build a "foreign" refinery-versioned DB at `path`: it has a +/// `refinery_schema_history` table with a well-formed row and passes +/// `integrity_check`, but carries a DIFFERENT `application_id`, so it is +/// NOT a wallet-storage database. +fn write_foreign_refinery_db(path: &std::path::Path, application_id: i32) { + let conn = Connection::open(path).expect("open foreign db"); + conn.pragma_update(None, "application_id", application_id) + .expect("stamp foreign application_id"); + conn.execute_batch( + "CREATE TABLE refinery_schema_history ( + version INTEGER PRIMARY KEY, + name TEXT, + applied_on TEXT, + checksum TEXT + ); + INSERT INTO refinery_schema_history (version, name, applied_on, checksum) + VALUES (1, 'initial', '2026-01-01T00:00:00+00:00', '12345'); + CREATE TABLE some_foreign_table (x INTEGER);", + ) + .expect("seed foreign schema"); + drop(conn); +} + +/// Materialize a real wallet DB on disk, then return its path inside a +/// kept-alive tempdir. +fn fresh_wallet_db() -> (tempfile::TempDir, std::path::PathBuf) { + let (persister, tmp, path) = fresh_persister(); + let w = wid(0x11); + let mut cs = PlatformWalletChangeSet::default(); + cs.wallet_metadata = Some(WalletMetadataEntry { + network: key_wallet::Network::Testnet, + wallet_group_id: [0u8; 32], + birth_height: 0, + }); + cs.core = Some(CoreChangeSet { + synced_height: Some(5), + last_processed_height: Some(5), + ..Default::default() + }); + persister.store(w, cs).expect("store"); + persister.flush(w).expect("flush"); + drop(persister); + (tmp, path) +} + +#[test] +fn restore_from_rejects_foreign_application_id_destination_untouched() { + let (_tmp, dest) = fresh_wallet_db(); + + // Snapshot the live destination bytes so we can prove restore left it + // untouched on rejection. + let before = std::fs::read(&dest).expect("read dest before"); + + let src_tmp = tempfile::tempdir().unwrap(); + let foreign = src_tmp.path().join("foreign.db"); + // Anything but the wallet-storage magic. + write_foreign_refinery_db(&foreign, 0x0BAD_F00D_u32 as i32); + + let err = SqlitePersister::restore_from_skip_backup(&dest, &foreign) + .expect_err("restore of a foreign refinery DB must fail"); + assert!( + matches!(err, WalletStorageError::NotAWalletDb { .. }), + "expected NotAWalletDb, got {err:?}" + ); + + let after = std::fs::read(&dest).expect("read dest after"); + assert_eq!( + before, after, + "destination wallet DB must be byte-identical after a rejected restore" + ); + + // The destination must still open as a wallet DB. + SqlitePersister::open(SqlitePersisterConfig::new(&dest)) + .expect("destination still opens after rejected restore"); +} + +#[test] +fn open_rejects_foreign_application_id() { + let tmp = tempfile::tempdir().unwrap(); + let foreign = tmp.path().join("foreign.db"); + write_foreign_refinery_db(&foreign, 0x0BAD_F00D_u32 as i32); + + let err = open_err(SqlitePersisterConfig::new(&foreign)); + assert!( + matches!(err, WalletStorageError::NotAWalletDb { .. }), + "expected NotAWalletDb, got {err:?}" + ); +} + +#[test] +fn open_accepts_a_real_wallet_db_with_stamped_application_id() { + let (_tmp, path) = fresh_wallet_db(); + // Reopening a genuine wallet DB must pass the application_id gate. + SqlitePersister::open(SqlitePersisterConfig::new(&path)) + .expect("reopen of a genuine wallet DB must succeed"); +} + +#[test] +fn open_rejects_malformed_schema_history_without_panicking() { + let (_tmp, path) = fresh_wallet_db(); + + // Corrupt the schema_history `applied_on` to a non-RFC3339 value via + // a side connection, then reopen. Refinery would unwrap()-panic on + // this; the pre-run probe must turn it into a typed error. + { + let conn = Connection::open(&path).expect("side conn"); + conn.execute( + "UPDATE refinery_schema_history SET applied_on = 'not-a-timestamp'", + [], + ) + .expect("corrupt applied_on"); + } + + let err = open_err(SqlitePersisterConfig::new(&path)); + assert!( + matches!(err, WalletStorageError::SchemaHistoryMalformed { .. }), + "expected SchemaHistoryMalformed, got {err:?}" + ); +} + +#[test] +fn open_rejects_non_numeric_checksum_in_schema_history() { + let (_tmp, path) = fresh_wallet_db(); + { + let conn = Connection::open(&path).expect("side conn"); + conn.execute( + "UPDATE refinery_schema_history SET checksum = 'deadbeef'", + [], + ) + .expect("corrupt checksum"); + } + let err = open_err(SqlitePersisterConfig::new(&path)); + assert!( + matches!(err, WalletStorageError::SchemaHistoryMalformed { .. }), + "expected SchemaHistoryMalformed, got {err:?}" + ); +} diff --git a/packages/rs-platform-wallet/Cargo.toml b/packages/rs-platform-wallet/Cargo.toml index 1362523ecea..5398e9c009d 100644 --- a/packages/rs-platform-wallet/Cargo.toml +++ b/packages/rs-platform-wallet/Cargo.toml @@ -31,6 +31,7 @@ bimap = "0.6" # Async runtime tokio = { version = "1", features = ["sync", "rt", "time", "macros"] } tokio-util = { version = "0.7.12" } +dash-async = { path = "../rs-dash-async" } # Logging tracing = "0.1" @@ -80,6 +81,9 @@ name = "shielded_chunk_timing_bench" required-features = ["shielded"] [dev-dependencies] +# Enables `ThreadRegistry::park_orphan_for_test` for the manager's F2-gate +# regression tests; the seam is feature-gated so it never ships in release. +dash-async = { path = "../rs-dash-async", features = ["test-util"] } # Used by `examples/shielded_chunk_timing_bench.rs` and # `tests/shielded_decrypt_bench.rs` to assemble per-chunk wire # fixtures and decode the `ShieldedEncryptedNote` wire type. diff --git a/packages/rs-platform-wallet/src/changeset/changeset.rs b/packages/rs-platform-wallet/src/changeset/changeset.rs index b4c18917c44..7b0839cf1ec 100644 --- a/packages/rs-platform-wallet/src/changeset/changeset.rs +++ b/packages/rs-platform-wallet/src/changeset/changeset.rs @@ -963,9 +963,11 @@ pub struct PlatformWalletChangeSet { /// the merge policy (plain `Vec::extend`, dedup is the apply-side /// caller's job). pub account_registrations: Vec, - /// Address-pool snapshots emitted at wallet create (initial - /// gap-limit population) and on any pool extension / "used" flip. - /// See [`AccountAddressPoolEntry`] for the merge policy. + /// Full address-pool snapshots: emitted at wallet create and, in-band, + /// on every block that derives new pool addresses (the + /// `core.addresses_derived` delta). Each entry is the whole current + /// pool, not just the new index. See [`AccountAddressPoolEntry`] for + /// the merge policy and `core_bridge::build_platform_changeset`. pub account_address_pools: Vec, /// Shielded sub-wallet deltas: per-subwallet decrypted notes, /// spent marks, sync watermarks, nullifier checkpoints. The diff --git a/packages/rs-platform-wallet/src/changeset/client_wallet_start_state.rs b/packages/rs-platform-wallet/src/changeset/client_wallet_start_state.rs index 83b6d860742..128a38d29b3 100644 --- a/packages/rs-platform-wallet/src/changeset/client_wallet_start_state.rs +++ b/packages/rs-platform-wallet/src/changeset/client_wallet_start_state.rs @@ -1,36 +1,66 @@ //! Per-wallet portion of [`ClientStartState`](crate::changeset::ClientStartState). //! -//! Everything a single wallet contributes to the startup snapshot: the -//! key-wallet [`Wallet`] + [`ManagedWalletInfo`] pair, a lean -//! identity-manager snapshot, and still-unused asset locks bucketed by -//! account index. +//! **Keyless by type.** This carries everything needed to *reconstruct* +//! a watch-only wallet — network, birth height, the account manifest, +//! the rebuilt core-state projection, identities, filtered asset locks — +//! but **no** [`Wallet`](key_wallet::Wallet) and no seed. The persister +//! can never mint a `Wallet`; the manager rebuilds a watch-only one via +//! [`Wallet::new_watch_only`](key_wallet::wallet::Wallet::new_watch_only) +//! from the manifest, applies this state, and defers signing-key +//! derivation to the on-demand sign path +//! ([`sign_with_mnemonic_resolver`] and its siblings). +//! +//! [`sign_with_mnemonic_resolver`]: https://docs.rs/rs-platform-wallet-ffi/ use std::collections::BTreeMap; use crate::changeset::identity_manager_start_state::IdentityManagerStartState; +use crate::changeset::{ + AccountRegistrationEntry, ContactChangeSet, CoreChangeSet, IdentityKeysChangeSet, +}; use crate::wallet::asset_lock::tracked::TrackedAssetLock; use dashcore::OutPoint; -use key_wallet::wallet::ManagedWalletInfo; -use key_wallet::Wallet; +use key_wallet::Network; -/// Per-wallet slice of the startup snapshot. +/// Keyless per-wallet slice of the startup snapshot. /// -/// Used as the value type in [`ClientStartState::wallets`](crate::changeset::ClientStartState::wallets). +/// Used as the value type in +/// [`ClientStartState::wallets`](crate::changeset::ClientStartState::wallets). +/// The structural absence of a `Wallet`/seed field is the SECRETS.md +/// boundary, enforced by type rather than convention. #[derive(Debug)] pub struct ClientWalletStartState { - /// The key-wallet [`Wallet`] to rehydrate on startup. Carries the - /// HD key material and account configuration the rest of the - /// per-wallet state hangs off of. - pub wallet: Wallet, - /// Managed wallet info holding non-key-material state (balances, - /// account metadata, UTXO set, etc.) for this wallet. - pub wallet_info: ManagedWalletInfo, + /// Network the wallet is bound to (from `wallet_metadata`). + pub network: Network, + /// Best estimate of the chain tip at creation time (`0` = scan + /// from genesis / unknown). + pub birth_height: u32, + /// Keyless account manifest — the account-set oracle for building the + /// watch-only wallet (one watch-only account per entry's xpub). + pub account_manifest: Vec, + /// Keyless projection of the persisted core rows (UTXOs, tx + /// records, IS-locks, sync watermarks, `last_applied_chain_lock`). + /// The manager applies this onto a fresh + /// `ManagedWalletInfo::from_wallet` skeleton built from the + /// watch-only wallet. Rebuilt by the `core_state::load_state` reader + /// (item B). + pub core_state: CoreChangeSet, /// Lean snapshot of this wallet's - /// [`IdentityManager`](crate::wallet::identity::IdentityManager): - /// owned + watched identities, primary selection, and the - /// gap-limit scan watermark. + /// [`IdentityManager`](crate::wallet::identity::IdentityManager). pub identity_manager: IdentityManagerStartState, - /// Asset locks that have not yet been consumed by an identity - /// registration / top-up, keyed by account index → outpoint. + /// Asset locks not yet consumed by an identity registration / + /// top-up, keyed by account index → outpoint. Terminal `Consumed` + /// rows are already filtered out by the asset-lock reader. pub unused_asset_locks: BTreeMap>, + /// Persisted DashPay contact state (sent/received requests + + /// established contacts) to layer onto the rehydrated managed + /// identities. PUBLIC material — `removed_*` are always empty + /// (deletes never reach storage as rows). Routed by the manager + /// after `IdentityManager::from`, mirroring the runtime apply path. + pub contacts: ContactChangeSet, + /// Persisted per-identity PUBLIC key entries (no private key + /// material) to layer onto the rehydrated managed identities so + /// `Identity.public_keys` is populated at load time instead of + /// only after the next sync. `removed` is always empty. + pub identity_keys: IdentityKeysChangeSet, } diff --git a/packages/rs-platform-wallet/src/changeset/core_bridge.rs b/packages/rs-platform-wallet/src/changeset/core_bridge.rs index 46945667ef8..206b42af9c9 100644 --- a/packages/rs-platform-wallet/src/changeset/core_bridge.rs +++ b/packages/rs-platform-wallet/src/changeset/core_bridge.rs @@ -3,7 +3,7 @@ //! Upstream `key_wallet_manager::WalletManager` exposes a //! `broadcast::Sender` and a `subscribe_events()` accessor //! returning a `broadcast::Receiver`; consumers attach at -//! startup and drain the stream. [`spawn_wallet_event_adapter`] is the +//! startup and drain the stream. [`wallet_event_adapter_loop`] is the //! platform-wallet-side consumer: a tokio task that pulls events off //! that broadcast, projects each one into a //! [`CoreChangeSet`](crate::changeset::CoreChangeSet), wraps it in a @@ -19,10 +19,11 @@ //! //! # Lifetime //! -//! [`spawn_wallet_event_adapter`] returns a [`JoinHandle`]. The caller -//! (typically `PlatformWalletManager`) keeps the handle for the -//! manager's lifetime; on shutdown, fire the [`CancellationToken`] to -//! make the task exit cleanly. +//! [`wallet_event_adapter_loop`] is the task body. The caller (typically +//! `PlatformWalletManager`) registers it on the shared `ThreadRegistry` +//! via `start_task`, which owns its [`JoinHandle`](tokio::task::JoinHandle) +//! and cancellation; on shutdown the registry fires the +//! [`CancellationToken`] to make the task exit cleanly and joins it. use std::sync::Arc; @@ -32,89 +33,197 @@ use key_wallet::managed_account::transaction_record::{OutputRole, TransactionRec use key_wallet::transaction_checking::TransactionContext; use key_wallet::Utxo; use key_wallet_manager::{WalletEvent, WalletId, WalletManager}; -use tokio::sync::broadcast::error::RecvError; +use tokio::sync::broadcast::error::{RecvError, TryRecvError}; +use tokio::sync::broadcast::Receiver; use tokio::sync::RwLock; -use tokio::task::JoinHandle; use tokio_util::sync::CancellationToken; use crate::changeset::changeset::{CoreChangeSet, PlatformWalletChangeSet}; use crate::changeset::traits::PlatformWalletPersistence; +use crate::changeset::Merge; use crate::wallet::platform_wallet::PlatformWalletInfo; -/// Spawn the wallet-event subscriber task. +/// Bound on the on-cancel drain — caps how many buffered events the +/// adapter persists after cancellation before exiting. Sized well above +/// the upstream broadcast capacity (currently 256) so a normal teardown +/// drains everything, while a pathological flood can't stall shutdown. +const CANCEL_DRAIN_BUDGET: usize = 4096; + +/// Single-account observation. The storage writer hardcodes +/// `core_utxos.account_index = 0` (the product uses only the default +/// account, and that column drives only cosmetic per-account grouping). A +/// UTXO-bearing record owned by a non-default funds account is STILL +/// persisted under index 0 — never skipped, because skipping it would +/// undercount the wallet balance and lose funds. We only `warn!` so the +/// approximate grouping is visible. Identity/provider account types carry +/// no funds index (`AccountType::index() == None`) and never emit +/// `Received`/`Change` UTXOs, so they never warn. +fn warn_if_non_default_account(record: &TransactionRecord) { + if let Some(index) = record.account_type.index() { + if index != 0 { + tracing::warn!( + account_index = index, + txid = %record.txid, + "non-default account UTXO persisted under account_index 0; \ + per-account grouping is approximate" + ); + } + } +} + +/// The wallet-event subscriber loop (the task body owned by the registry). +/// +/// Subscribes to `wallet_manager.subscribe_events()` from inside the task +/// (so the call-site doesn't need to be on a tokio runtime), then loops +/// dispatching events to the persister via +/// [`PlatformWalletPersistence::store`]. Exits when `cancel` fires or the +/// upstream broadcast channel closes. /// -/// Subscribes to `wallet_manager.subscribe_events()` from inside the -/// spawned task (so the call-site doesn't need to be on a tokio -/// runtime), then loops dispatching events to the persister via -/// [`PlatformWalletPersistence::store`]. Exits when `cancel` fires -/// or the upstream broadcast channel closes. +/// On cancellation the adapter drains any events already buffered on the +/// receiver before exiting (bounded by [`CANCEL_DRAIN_BUDGET`]). Without +/// that drain, a `TransactionInstantLocked` (which P2P does not replay) +/// emitted just before stop would be lost — the next sync subscribes +/// fresh and only sees future events. Other event kinds (block-driven) +/// are re-emitted on the next SPV resync from `last_processed_height`, +/// but the drain treats them uniformly. /// -/// Generic over `P` so the spawned task gets static-dispatch on -/// every `persister.store(...)` call. Pass the manager's own -/// `Arc

` (not the `Arc` -/// coercion) to actually realize the static-dispatch win. -pub fn spawn_wallet_event_adapter

( +/// Generic over `P` so the task gets static-dispatch on every +/// `persister.store(...)` call. Pass the manager's own `Arc

` (not the +/// `Arc` coercion) to realize that win. +pub async fn wallet_event_adapter_loop

( wallet_manager: Arc>>, persister: Arc

, cancel: CancellationToken, -) -> JoinHandle<()> -where +) where P: PlatformWalletPersistence + 'static, { - tokio::spawn(async move { - let mut receiver = { - let guard = wallet_manager.read().await; - guard.subscribe_events() - }; - tracing::debug!("wallet-event adapter task started"); + let mut receiver = { + let guard = wallet_manager.read().await; + guard.subscribe_events() + }; + tracing::debug!("wallet-event adapter task started"); - loop { - tokio::select! { - recv = receiver.recv() => { - match recv { - Ok(event) => { - let wallet_id = event.wallet_id(); - // For events that need to consult per-wallet - // state (today only `TransactionInstantLocked`, - // which checks finality before recording the IS - // lock), grab a brief read lock on the manager. - let core = build_core_changeset(&wallet_manager, &event).await; - if core.is_empty_no_records() { - // SyncHeightAdvanced for an unknown wallet, - // empty BlockProcessed, etc. — nothing to - // persist. Skip the round-trip. - continue; - } - let cs = PlatformWalletChangeSet { - core: Some(core), - ..PlatformWalletChangeSet::default() - }; - if let Err(e) = persister.store(wallet_id, cs) { - tracing::warn!( - wallet_id = %hex::encode(wallet_id), - error = %e, - "Persister rejected core changeset; state will be re-emitted on next sync round" - ); - } - } - Err(RecvError::Closed) if cancel.is_cancelled() => break, - Err(RecvError::Closed) => { - tracing::error!("WalletEvent broadcast closed unexpectedly"); - break; - } - Err(RecvError::Lagged(n)) => { - tracing::warn!( - missed = n, - "wallet-event adapter lagged on broadcast channel; some events were dropped" - ); - } + loop { + tokio::select! { + recv = receiver.recv() => { + match recv { + Ok(event) => { + process_event(&wallet_manager, persister.as_ref(), event).await; + } + Err(RecvError::Closed) if cancel.is_cancelled() => break, + Err(RecvError::Closed) => { + tracing::error!("WalletEvent broadcast closed unexpectedly"); + break; } + Err(RecvError::Lagged(n)) => { + tracing::warn!( + missed = n, + "wallet-event adapter lagged on broadcast channel; some events were dropped" + ); + } + } + } + _ = cancel.cancelled() => { + // Drain buffered events before exiting: P2P does not + // replay IS-locks, so an event emitted just before + // cancel would be silently lost without this drain. + let drained = drain_buffered_events( + &mut receiver, + &wallet_manager, + persister.as_ref(), + CANCEL_DRAIN_BUDGET, + ) + .await; + if drained > 0 { + tracing::debug!( + drained, + "wallet-event adapter drained buffered events on cancel", + ); } - _ = cancel.cancelled() => break, + break; + } + } + } + tracing::debug!("wallet-event adapter task exiting"); +} + +/// Project a single event into the persister. Shared by the live loop +/// and the on-cancel drain so they cannot drift in behaviour. +async fn process_event

( + wallet_manager: &Arc>>, + persister: &P, + event: WalletEvent, +) where + P: PlatformWalletPersistence + 'static, +{ + let wallet_id = event.wallet_id(); + // For events that need to consult per-wallet state (today only + // `TransactionInstantLocked`, which checks finality before recording + // the IS lock), grab a brief read lock on the manager. + let core = build_core_changeset(wallet_manager, &event).await; + if core.is_empty() { + // SyncHeightAdvanced for an unknown wallet, empty BlockProcessed, + // etc. — nothing to persist. Skip the round-trip. + return; + } + let cs = PlatformWalletChangeSet { + core: Some(core), + ..PlatformWalletChangeSet::default() + }; + if let Err(e) = persister.store(wallet_id, cs) { + tracing::warn!( + wallet_id = %hex::encode(wallet_id), + error = %e, + "Persister rejected core changeset; state will be re-emitted on next sync round" + ); + } +} + +/// Drain at most `budget` buffered events from `receiver` through +/// [`process_event`]. Returns the count drained. Stops on empty, closed +/// channel, or budget exhaustion; logs a warning on `Lagged`. +// +// TODO: add a unit test covering this path — synthesise WalletEvents on +// the broadcast, fire `cancel`, assert the drained count. Blocked on +// the absence of an existing test scaffold for `wallet_event_adapter_loop`. +async fn drain_buffered_events

( + receiver: &mut Receiver, + wallet_manager: &Arc>>, + persister: &P, + budget: usize, +) -> usize +where + P: PlatformWalletPersistence + 'static, +{ + let mut drained = 0; + let mut attempts = 0; + // `attempts` (not `drained`) bounds the loop so a sustained `Lagged` + // stream — which logs but produces no persisted event — still hits + // the cap and exits, preserving the bounded-teardown guarantee. + while attempts < budget { + attempts += 1; + match receiver.try_recv() { + Ok(event) => { + process_event(wallet_manager, persister, event).await; + drained += 1; + } + Err(TryRecvError::Empty) | Err(TryRecvError::Closed) => break, + Err(TryRecvError::Lagged(n)) => { + tracing::warn!( + missed = n, + "wallet-event adapter lagged during cancel drain; some events were dropped" + ); } } - tracing::debug!("wallet-event adapter task exiting"); - }) + } + if attempts == budget { + tracing::warn!( + budget, + drained, + "wallet-event adapter cancel-drain hit budget; further buffered events dropped" + ); + } + drained } /// Project an upstream [`WalletEvent`] into a [`CoreChangeSet`] suitable @@ -129,18 +238,17 @@ async fn build_core_changeset( addresses_derived, .. } => { + // Persist regardless of account; warn on a non-default account. + warn_if_non_default_account(record); // Derive UTXO deltas before moving the record into `records` // so the per-record borrows are still live. CoreChangeSet { new_utxos: derive_new_utxos(record), spent_utxos: derive_spent_utxos(record), records: vec![(**record).clone()], - // Mirror the upstream-emitted derived addresses - // through to the persister so newly-extended pool - // rows are written transactionally with the tx that - // triggered the extension. See - // `CoreChangeSet.addresses_derived` for the cascade- - // link rationale. + // Forward the upstream-emitted derived addresses to the + // persister; the FFI layer feeds the iOS address registry + // from this delta. See `CoreChangeSet.addresses_derived`. addresses_derived: addresses_derived.clone(), ..CoreChangeSet::default() } @@ -171,8 +279,10 @@ async fn build_core_changeset( .. } => { let mut cs = CoreChangeSet::default(); - // Inserted records bring fresh UTXOs and may consume previous ones. + // Inserted records bring fresh UTXOs and may consume previous + // ones — warn on a non-default account, but always project. for r in inserted { + warn_if_non_default_account(r); cs.new_utxos.extend(derive_new_utxos(r)); cs.spent_utxos.extend(derive_spent_utxos(r)); } @@ -215,11 +325,8 @@ async fn build_core_changeset( // // `ChainLockProcessed` fires every time the wallet's // `last_applied_chain_lock` advances (dashpay/rust-dashcore#769), - // even when no record was promoted — so a quiescent wallet's - // boundary advance is no longer invisible to this bridge. - // The earlier `TransactionsChainlocked`-only signal had a - // gap on the "metadata advanced but per-account empty" - // path; the new event closes it deterministically. + // so a quiescent wallet's boundary advance still reaches + // this bridge even when no record was promoted. CoreChangeSet { last_applied_chain_lock: Some(chain_lock.clone()), ..CoreChangeSet::default() @@ -340,20 +447,121 @@ fn derive_spent_utxos(record: &TransactionRecord) -> Vec { .collect() } -impl CoreChangeSet { - /// Cheap "should we bother round-tripping the persister" check used - /// by the adapter to drop empty events without locking. Skips the - /// `is_empty()` walk over `instant_locks_for_non_final_records` - /// since that map is rarely populated and `Vec::is_empty` short- - /// circuits on the common case. - fn is_empty_no_records(&self) -> bool { - self.records.is_empty() - && self.spent_utxos.is_empty() - && self.new_utxos.is_empty() - && self.instant_locks_for_non_final_records.is_empty() - && self.last_processed_height.is_none() - && self.synced_height.is_none() - && self.last_applied_chain_lock.is_none() - && self.addresses_derived.is_empty() +// merge: keep #3953's projection tests; is_empty_no_records dropped by our +// refactor in favour of Merge::is_empty() (the adapter call site uses it). +#[cfg(test)] +mod tests { + use std::collections::BTreeMap; + + use dashcore::blockdata::transaction::Transaction; + use dashcore::hashes::Hash; + use key_wallet::account::{AccountType, StandardAccountType}; + use key_wallet::managed_account::transaction_record::{ + OutputDetail, TransactionDirection, TransactionRecord, + }; + use key_wallet::transaction_checking::{BlockInfo, TransactionContext, TransactionType}; + use key_wallet::WalletCoreBalance; + + use super::*; + + fn standard(index: u32) -> AccountType { + AccountType::Standard { + index, + standard_account_type: StandardAccountType::BIP44Account, + } + } + + /// A throwaway testnet P2PKH address keyed off `seed`. + fn p2pkh(seed: u8) -> dashcore::Address { + use dashcore::address::Payload; + use dashcore::PubkeyHash; + dashcore::Address::new( + dashcore::Network::Testnet, + Payload::PubkeyHash(PubkeyHash::from_byte_array([seed; 20])), + ) + } + + /// A confirmed `TransactionRecord` owned by `account_type` carrying a + /// single `Received` output worth `value` at `addr`, so + /// `derive_new_utxos` yields exactly one UTXO. + fn record_with_received_output( + account_type: AccountType, + addr: &dashcore::Address, + value: u64, + ) -> TransactionRecord { + let tx = Transaction { + version: 3, + lock_time: 0, + input: vec![], + output: vec![dashcore::TxOut { + value, + script_pubkey: addr.script_pubkey(), + }], + special_transaction_payload: None, + }; + TransactionRecord::new( + tx, + account_type, + TransactionContext::InChainLockedBlock(BlockInfo::new( + 42, + dashcore::BlockHash::from_byte_array([3u8; 32]), + 1_735_689_600, + )), + TransactionType::Standard, + TransactionDirection::Incoming, + Vec::new(), + vec![OutputDetail { + index: 0, + role: OutputRole::Received, + address: Some(addr.clone()), + value, + }], + value as i64, + ) + } + + /// Project a `TransactionDetected` for `record` through the real bridge + /// path. `balance`/`account_balances` are unused by the projection. + async fn changeset_for(record: TransactionRecord) -> CoreChangeSet { + let wm = Arc::new(RwLock::new(WalletManager::::new( + key_wallet::Network::Testnet, + ))); + let event = WalletEvent::TransactionDetected { + wallet_id: [0u8; 32], + record: Box::new(record), + balance: WalletCoreBalance::default(), + account_balances: BTreeMap::new(), + addresses_derived: Vec::new(), + }; + build_core_changeset(&wm, &event).await + } + + /// A default-account (index 0) UTXO is projected into the changeset. + #[tokio::test] + async fn default_account_utxo_persists() { + let addr = p2pkh(0x11); + let cs = changeset_for(record_with_received_output(standard(0), &addr, 500_000)).await; + assert_eq!( + cs.new_utxos.len(), + 1, + "the default-account UTXO must be projected" + ); + assert_eq!(cs.new_utxos[0].value(), 500_000); + } + + /// REGRESSION (fund-loss): a non-default-account (index != 0) UTXO is + /// STILL projected — never dropped. Storage persists it under + /// `account_index 0`; the only cost is approximate per-account grouping + /// (a `warn!` is logged). Dropping it would undercount the balance. + #[tokio::test] + async fn non_default_account_utxo_persists_under_zero() { + let addr = p2pkh(0x22); + let cs = changeset_for(record_with_received_output(standard(7), &addr, 900_000)).await; + assert_eq!( + cs.new_utxos.len(), + 1, + "a non-default-account UTXO must NOT be dropped" + ); + assert_eq!(cs.new_utxos[0].value(), 900_000, "funds preserved"); } } diff --git a/packages/rs-platform-wallet/src/changeset/mod.rs b/packages/rs-platform-wallet/src/changeset/mod.rs index dc76ddd39ac..208c132e874 100644 --- a/packages/rs-platform-wallet/src/changeset/mod.rs +++ b/packages/rs-platform-wallet/src/changeset/mod.rs @@ -33,7 +33,7 @@ pub use changeset::{ }; pub use client_start_state::ClientStartState; pub use client_wallet_start_state::ClientWalletStartState; -pub use core_bridge::spawn_wallet_event_adapter; +pub use core_bridge::wallet_event_adapter_loop; pub use identity_manager_start_state::IdentityManagerStartState; pub use merge::Merge; pub use platform_address_sync_start_state::PlatformAddressSyncStartState; diff --git a/packages/rs-platform-wallet/src/error.rs b/packages/rs-platform-wallet/src/error.rs index c94cb7093d1..37a6e506d2e 100644 --- a/packages/rs-platform-wallet/src/error.rs +++ b/packages/rs-platform-wallet/src/error.rs @@ -10,6 +10,22 @@ pub enum PlatformWalletError { #[error("Wallet creation failed: {0}")] WalletCreation(String), + /// The persisted wallet has UTXOs to restore but no funds-bearing + /// account in its reconstructed account collection to hold them. + /// Fail-closed rather than reconstructing a silent zero balance — + /// the no-silent-zero mandate. Carries only the (public) wallet id + /// and the dropped-UTXO count, never key material. + #[error( + "rehydration topology unsupported for wallet {}: {utxo_count} persisted UTXO(s) but no funds-bearing account", + hex::encode(wallet_id) + )] + RehydrationTopologyUnsupported { + /// The wallet whose topology could not hold the persisted UTXOs. + wallet_id: [u8; 32], + /// How many persisted UTXOs would have been silently dropped. + utxo_count: usize, + }, + #[error("Wallet not found: {0}")] WalletNotFound(String), @@ -239,6 +255,24 @@ pub enum PlatformWalletError { #[error("Shielded sub-wallet not bound: call bind_shielded first")] ShieldedNotBound, + + /// A Clear/wipe could not safely complete because the shielded sync + /// coordinator's in-flight pass did not drain cleanly first — it either + /// timed out on the join backstop or its loop ended non-cleanly + /// (cancelled / panicked). The shared commitment-tree store is therefore + /// **left intact** (not wiped): a still-running pass could re-persist + /// notes into the store immediately after a `clear()`, desyncing the + /// host's wiped rows from a repopulated tree and gate-skipping every + /// re-downloaded position on the next cold resync. The host **must not** + /// commit its own persistence wipe; retry Clear once the pass settles. + /// Carries the terminal [`WorkerStatus`](dash_async::WorkerStatus) for + /// diagnostics. + #[error( + "shielded clear aborted: sync coordinator did not drain cleanly \ + ({status:?}); commitment-tree store left intact so an in-flight pass \ + cannot re-persist into a wiped store — retry once the pass settles" + )] + ShieldedShutdownIncomplete { status: dash_async::WorkerStatus }, } /// Check whether an SDK error indicates that an InstantSend lock proof was diff --git a/packages/rs-platform-wallet/src/events.rs b/packages/rs-platform-wallet/src/events.rs index 9ac256e8730..7c7f689e1ec 100644 --- a/packages/rs-platform-wallet/src/events.rs +++ b/packages/rs-platform-wallet/src/events.rs @@ -16,9 +16,34 @@ use arc_swap::ArcSwap; pub use dash_spv::EventHandler; pub use key_wallet_manager::WalletEvent; +use crate::manager::load_outcome::SkipReason; use crate::manager::platform_address_sync::PlatformAddressSyncSummary; #[cfg(feature = "shielded")] use crate::manager::shielded_sync::ShieldedSyncPassSummary; +use crate::wallet::platform_wallet::WalletId; + +/// Platform-wallet lifecycle event surfaced to app handlers. +/// +/// Distinct from the SPV `EventHandler` stream — these are +/// platform-specific notifications the app may react to (toast, +/// telemetry) without threading return values through every call site. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum PlatformEvent { + /// A persisted wallet was skipped during + /// [`load_from_persistor`](crate::PlatformWalletManager::load_from_persistor) + /// because its persisted row was corrupt (a structural decode / + /// projection failure). The load path is seedless, so the only + /// reason is [`SkipReason::CorruptPersistedRow`]. + /// + /// Carries the (public, non-secret) wallet id and the structural + /// [`SkipReason`]; never any secret byte. + WalletSkippedOnLoad { + /// The skipped wallet's id. + wallet_id: WalletId, + /// Why it was skipped — always a corrupt persisted row. + reason: SkipReason, + }, +} /// Extension of [`EventHandler`] for platform-wallet consumers. /// @@ -44,6 +69,15 @@ pub trait PlatformEventHandler: EventHandler { #[cfg(feature = "shielded")] fn on_shielded_sync_completed(&self, _summary: &ShieldedSyncPassSummary) {} + /// Fired once per wallet that + /// [`load_from_persistor`](crate::PlatformWalletManager::load_from_persistor) + /// skipped because its persisted row was corrupt. + /// + /// Default impl is a no-op so existing handlers don't have to care + /// (the internal `LockNotifyHandler` / `BalanceUpdateHandler` + /// ignore it; only the app handler typically reacts). + fn on_platform_event(&self, _event: &PlatformEvent) {} + /// Fired periodically during a shielded sync pass — once per /// completed chunk inside `sync_shielded_notes`. Carries the /// cumulative count of encrypted notes scanned so far in the @@ -142,6 +176,17 @@ impl PlatformEventManager { } } + /// Dispatch a [`PlatformEvent`] to every handler. + /// + /// Not on the SPV hot path — called at most once per wallet during + /// a single `load_from_persistor` pass. + pub fn on_platform_event(&self, event: &PlatformEvent) { + let handlers = self.handlers.load(); + for h in handlers.iter() { + h.on_platform_event(event); + } + } + /// Dispatch a shielded sync progress event to every handler. /// /// Called from inside `sync_shielded_notes`'s chunk loop, once diff --git a/packages/rs-platform-wallet/src/lib.rs b/packages/rs-platform-wallet/src/lib.rs index 289a71378fd..cb0b43a77bf 100644 --- a/packages/rs-platform-wallet/src/lib.rs +++ b/packages/rs-platform-wallet/src/lib.rs @@ -22,7 +22,7 @@ pub mod spv; pub mod wallet; pub use error::PlatformWalletError; -pub use events::{PlatformEventHandler, PlatformEventManager}; +pub use events::{PlatformEvent, PlatformEventHandler, PlatformEventManager}; pub use key_wallet::wallet::managed_wallet_info::asset_lock_builder::AssetLockFundingType; // Surface the upstream `DerivedAddress` event payload through this // crate so downstream FFI consumers (rs-platform-wallet-ffi) can @@ -40,11 +40,16 @@ pub use manager::identity_sync::{ DEFAULT_SYNC_INTERVAL_SECS as IDENTITY_SYNC_DEFAULT_INTERVAL_SECS, MAX_TOKENS_PER_BALANCE_BATCH as IDENTITY_SYNC_MAX_TOKENS_PER_BATCH, }; +pub use manager::load_outcome::{LoadOutcome, SkipReason}; pub use manager::platform_address_sync::{ PlatformAddressSyncManager, PlatformAddressSyncSummary, WalletSyncOutcome, DEFAULT_SYNC_INTERVAL_SECS, }; -pub use manager::PlatformWalletManager; +pub use manager::{PlatformWalletManager, SHUTDOWN_JOIN_TIMEOUT_SECS}; +// `shutdown()` returns `ShutdownReport` and +// `ShieldedShutdownIncomplete` carries a `WorkerStatus`; re-export both so +// FFI consumers can read them without a direct `dash_async` dependency. +pub use dash_async::{ShutdownReport, WorkerStatus}; pub use spv::SpvRuntime; pub use wallet::asset_lock::manager::AssetLockManager; pub use wallet::asset_lock::tracked::{AssetLockStatus, TrackedAssetLock}; diff --git a/packages/rs-platform-wallet/src/manager/accessors.rs b/packages/rs-platform-wallet/src/manager/accessors.rs index 7bf901bccf4..51db2d732e3 100644 --- a/packages/rs-platform-wallet/src/manager/accessors.rs +++ b/packages/rs-platform-wallet/src/manager/accessors.rs @@ -299,6 +299,25 @@ impl PlatformWalletManager

{ Arc::clone(&self.shielded_sync_manager) } + /// Whether a shielded-sync worker is still alive — either its live + /// registry slot or a prior-generation thread parked as an orphan after + /// a tight `stop()`->`start()` reap had to detach it past the wedge + /// backstop. Such an orphan still holds an `Arc` to the persister / + /// event-handler context and may fire one final callback, so a clean + /// [`quiesce`](ShieldedSyncManager::quiesce) status alone does not prove + /// the shielded worker is gone. + /// + /// This is the same shielded-scoped liveness gate + /// [`clear_shielded`](Self::clear_shielded) consults; it is exposed so + /// FFI join points (`destroy`, `clear_shielded`) can refuse a + /// misleading clean return while a parked orphan lingers. + /// (`shielded_sync_stop` itself is cancel-only and never joins.) + #[cfg(feature = "shielded")] + pub fn shielded_worker_alive(&self) -> bool { + self.registry + .any_alive_for(super::WalletWorker::ShieldedSync) + } + /// Get a clone of a wallet by its ID. pub async fn get_wallet(&self, wallet_id: &WalletId) -> Option> { let wallets = self.wallets.read().await; diff --git a/packages/rs-platform-wallet/src/manager/coordinator_lifecycle.rs b/packages/rs-platform-wallet/src/manager/coordinator_lifecycle.rs new file mode 100644 index 00000000000..4ca76c3d54b --- /dev/null +++ b/packages/rs-platform-wallet/src/manager/coordinator_lifecycle.rs @@ -0,0 +1,712 @@ +//! Shared lifecycle state + pass protocol for the periodic sync +//! coordinators. +//! +//! The three coordinators ([`IdentitySyncManager`], [`PlatformAddressSyncManager`], +//! [`ShieldedSyncManager`]) each drive a background loop on the shared +//! [`ThreadRegistry`] and gate passes through an `is_syncing` / `quiescing` +//! handshake. That handshake, plus the interval and last-sync bookkeeping, +//! is identical across all three; it lives here so the (subtle, teardown- +//! critical) protocol has a single home and each coordinator keeps only its +//! domain-specific pass body. +//! +//! [`IdentitySyncManager`]: super::identity_sync::IdentitySyncManager +//! [`PlatformAddressSyncManager`]: super::platform_address_sync::PlatformAddressSyncManager +//! [`ShieldedSyncManager`]: super::shielded_sync::ShieldedSyncManager + +use std::sync::atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering}; +use std::sync::Arc; +use std::time::Duration; + +use dash_async::{ + AtomicFlagGuard, RefcountedFlagGuard, ThreadRegistry, WorkerConfig, WorkerStatus, +}; + +use super::{WalletWorker, COORDINATOR_WEIGHT, SHUTDOWN_JOIN_TIMEOUT_SECS}; + +/// Shared lifecycle state and pass-gating protocol for one periodic sync +/// coordinator. Each coordinator embeds one of these and delegates its +/// `start` / `stop` / `quiesce` / `is_running` / interval / pass-gate +/// surface to it. +pub(crate) struct CoordinatorLifecycle { + registry: Arc>, + worker: WalletWorker, + interval_secs: AtomicU64, + is_syncing: AtomicBool, + /// Refcounted "no new pass" gate: raised iff count > 0. Refcount + /// semantics let the two teardown paths — the public `quiesce()`'s + /// local guard and the Clear flow's `hold_quiescing_gate` — compose + /// without one path's Drop lowering the other path's barrier. + quiescing: Arc, + last_sync_unix: AtomicU64, +} + +impl CoordinatorLifecycle { + pub(crate) fn new( + registry: Arc>, + worker: WalletWorker, + default_interval_secs: u64, + ) -> Self { + Self { + registry, + worker, + interval_secs: AtomicU64::new(default_interval_secs), + is_syncing: AtomicBool::new(false), + quiescing: Arc::new(AtomicUsize::new(0)), + last_sync_unix: AtomicU64::new(0), + } + } + + /// Set the polling interval. Clamped to a minimum of 1s. + pub(crate) fn set_interval(&self, interval: Duration) { + let secs = interval.as_secs().max(1); + self.interval_secs.store(secs, Ordering::Release); + } + + /// Current polling interval. + pub(crate) fn interval(&self) -> Duration { + Duration::from_secs(self.interval_secs.load(Ordering::Acquire)) + } + + /// Current polling interval in whole seconds (for `Debug`). + pub(crate) fn interval_secs(&self) -> u64 { + self.interval_secs.load(Ordering::Acquire) + } + + /// Whether the background loop is currently running. + pub(crate) fn is_running(&self) -> bool { + self.registry.is_running(self.worker) + } + + /// Whether a sync pass is in flight right now. + pub(crate) fn is_syncing(&self) -> bool { + self.is_syncing.load(Ordering::Acquire) + } + + /// Unix seconds of the last completed pass, or `None` if none has ever + /// completed. + pub(crate) fn last_sync_unix_seconds(&self) -> Option { + match self.last_sync_unix.load(Ordering::Acquire) { + 0 => None, + n => Some(n), + } + } + + /// Record the unix-seconds stamp of a just-completed pass. + pub(crate) fn store_last_sync_unix(&self, unix_secs: u64) { + self.last_sync_unix.store(unix_secs, Ordering::Release); + } + + /// The registry config a coordinator starts its loop with: coordinator + /// teardown weight and the shared join budget. No registry-side drain + /// hook is installed: the lifecycle's own [`quiesce`](Self::quiesce) + /// holds the gate via a [`RefcountedFlagGuard`] across the whole drain + /// (cancellation-safe — its `Drop` runs on every exit path including a + /// future dropped at an outer `await`), and + /// [`quiesce_under_held_gate`](Self::quiesce_under_held_gate) requires + /// the caller to already hold one. A registry-side hook would do its + /// `fetch_add` inside `registry.quiesce` while the matching `fetch_sub` + /// lived past the same `await` — under a `tokio::time::timeout` wrap + /// (e.g. `clear_shielded_inner`) a timeout firing mid-drain leaked the + /// hook's contribution forever, silently disabling the coordinator. + pub(crate) fn worker_config(&self) -> WorkerConfig { + WorkerConfig { + weight: COORDINATOR_WEIGHT, + join_budget: Duration::from_secs(SHUTDOWN_JOIN_TIMEOUT_SECS), + drain: None, + } + } + + /// Spawn the standard coordinator loop on the registry: take the + /// worker config and drive `biased; cancel-first` select! of `pass()` + /// followed by an `interval()` sleep — both arms break on cancellation. + /// The loop runs inside `Handle::block_on` on a fresh OS thread (so + /// `pass` can yield `!Send` SDK futures); cancellation drops an in- + /// flight `pass()` at its next `.await`. + /// + /// With refcount semantics on `quiescing`, there is nothing to "reopen" + /// here: properly-dropped guards (`quiesce`'s local, the Clear flow's + /// `hold_quiescing_gate`) leave the count at 0, and the gate is only + /// observed "raised" while a teardown holder is in scope. + /// + /// Each coordinator's `start` calls this with a thunk that captures its + /// `Arc` and invokes its own `sync_now`. + pub(crate) fn spawn_periodic_loop(&self, pass: F, interval: I) + where + F: Fn() -> Fut + Send + 'static, + Fut: std::future::Future, + I: Fn() -> Duration + Send + 'static, + { + // Defense-in-depth: bail when our key is in the registry's clearing + // set. `start_thread` below would refuse the start regardless, and + // refcount semantics make the `quiescing` gate race-free in either + // case — the bail just avoids the wasted setup work. + if self.registry.is_clearing(self.worker) { + return; + } + let cfg = self.worker_config(); + let handle = tokio::runtime::Handle::current(); + let registry = Arc::clone(&self.registry); + let worker = self.worker; + registry.start_thread(worker, cfg, move |cancel| { + handle.block_on(async move { + loop { + if cancel.is_cancelled() { + break; + } + tokio::select! { + biased; + _ = cancel.cancelled() => break, + _ = pass() => {} + } + let sleep_for = interval(); + tokio::select! { + _ = tokio::time::sleep(sleep_for) => {} + _ = cancel.cancelled() => break, + } + } + }); + }); + } + + /// Cancel-only stop: signal the loop and return immediately. + pub(crate) fn stop(&self) { + self.registry.cancel(self.worker); + } + + /// Cancel the loop, drain any in-flight pass, and join the worker, + /// returning its terminal status. Raises the `quiescing` gate via a + /// [`RefcountedFlagGuard`] that decrements on every exit path, leaving + /// the gate's overall state determined by whatever other holders + /// (e.g. the Clear flow's `hold_quiescing_gate`) are still in scope. + /// + /// The gate is raised **here**, by the lifecycle itself — there is no + /// registry-side drain hook. The guard's `Drop` is cancellation-safe + /// (it runs on every exit path including a future dropped at an outer + /// `await`), so the gate is reliably released exactly once per call + /// and the "no new pass" barrier holds regardless of whether a + /// background-loop slot is registered. Gate-before-cancel is + /// preserved: the raise is synchronous and lives above the + /// `registry.quiesce` await that issues any cancel. + /// + /// Refcount semantics close the prior TOCTOU between the `is_clearing` + /// short-circuit and the gate-guard install: even if a Clear lands in + /// the ns-window between the check and the guard, its + /// `hold_quiescing_gate` raise composes safely with ours — both refs + /// keep the count > 0 until each holder's own guard drops, so the + /// local guard's decrement on our return can never lower the gate + /// past the Clear's contribution. The short-circuit remains as + /// defense-in-depth (skipping a redundant drain while a Clear is + /// already doing one), not as a correctness anchor. + /// + /// "Direct pass" here means the gated entry points that take the + /// `is_syncing` slot via [`begin_pass`](Self::begin_pass): every + /// coordinator's `sync_now`, plus the shielded coordinator's + /// `sync_wallet`. The platform-address coordinator's `sync_wallet` is + /// intentionally **ungated** (it never touches `is_syncing`; callers + /// that need exclusion gate themselves), so the gate/drain barrier does + /// not apply to it. + pub(crate) async fn quiesce(&self) -> WorkerStatus { + // Defense-in-depth: if a concurrent `clear_shielded` is mid-flight + // for our key, it is already draining + joining the worker under + // its own gate, so a second drain here would be redundant. The + // refcount semantics below make the gate handling race-free + // regardless of whether this check fires, but skipping the work + // saves a wasted drain cycle. + if self.registry.is_clearing(self.worker) { + return WorkerStatus::NotRunning; + } + // Gate up first (instant) and held until the guard drops on return. + // SeqCst on the refcount: store-half of the `quiescing`<-> + // `is_syncing` handshake (see `begin_pass`). + let _quiescing_gate = RefcountedFlagGuard::raise(&self.quiescing); + self.cancel_join_and_drain().await + } + + /// Like [`quiesce`](Self::quiesce) but for a caller that has **already** + /// raised the `quiescing` gate (via [`hold_quiescing_gate`](Self::hold_quiescing_gate)) + /// and is holding the registry's per-key clearing latch around the same + /// teardown: this skips both the [`quiesce`](Self::quiesce) call's + /// `is_clearing` short-circuit (which would otherwise bail without + /// running the drain) and its own gate raise (the caller's ref already + /// keeps the gate up). The shielded Clear flow uses this to keep the + /// "no new pass" barrier raised *continuously* across the drain, the + /// orphan-liveness check, and the store wipe — with no lapse for a + /// direct `sync_now`/`sync_wallet` to slip through and re-persist into + /// the store being cleared. Gate-before-cancel still holds: the caller + /// raised the gate before this runs. + #[cfg(any(test, feature = "shielded"))] + pub(crate) async fn quiesce_under_held_gate(&self) -> WorkerStatus { + debug_assert!( + self.quiescing.load(Ordering::Acquire) > 0, + "quiesce_under_held_gate requires the caller to already hold the quiescing gate" + ); + self.cancel_join_and_drain().await + } + + /// Cancel + bounded-join the background loop (if any), then drain a + /// direct in-flight pass on a clean status. Assumes the `quiescing` + /// gate is **already raised** by the caller: [`quiesce`](Self::quiesce) + /// installs its own local [`RefcountedFlagGuard`] in scope across the + /// whole `await`, and [`quiesce_under_held_gate`](Self::quiesce_under_held_gate) + /// requires the caller to hold one. Either guard's `Drop` runs on + /// every exit path — including a future dropped at an outer `await` + /// (e.g. a `tokio::time::timeout` firing mid-drain) — so the gate is + /// released exactly once per teardown, no matter how the call unwinds. + /// + /// A wedged loop pass surfaces from `registry.quiesce` as a non-clean + /// `Timeout` rather than hanging — its orphaned thread is tracked by + /// the registry for teardown, so the drain below must not wait on it. + /// On a clean status the only possible `is_syncing` holder is a direct + /// `sync_now`/`sync_wallet` that entered before the gate rose (with no + /// loop slot `registry.quiesce` joined nothing; with an idle loop it + /// joined a thread that was not the one holding the flag). The raised + /// gate keeps a new pass from starting, so the drain converges, and a + /// panicked pass clears the flag via its own RAII guard. + async fn cancel_join_and_drain(&self) -> WorkerStatus { + let status = self.registry.quiesce(self.worker).await; + if status.is_clean() { + self.drain_in_flight_pass().await; + } + status + } + + /// Poll until no sync pass holds `is_syncing`. Only sound to call with + /// the `quiescing` gate already raised (refcount > 0, so no new pass + /// can start) and after the background loop has been cancel-joined (so + /// the only possible holder is a direct, non-cancellable pass running + /// to completion). Mirrors the registry's 5ms poll cadence. Unbounded + /// by design — the caller bounds the whole teardown (the FFI `stop` / + /// `clear` bridges wrap it in a `SHUTDOWN_JOIN_TIMEOUT_SECS` timeout). + async fn drain_in_flight_pass(&self) { + // SeqCst: load-half of the `quiescing`<->`is_syncing` handshake (see + // `begin_pass`). Pairs with `begin_pass`'s SeqCst CAS so a pass that + // claimed the slot just as the gate rose is observed here and waited + // out, rather than slipping past an unsynchronized read. + while self.is_syncing.load(Ordering::SeqCst) { + tokio::time::sleep(Duration::from_millis(5)).await; + } + } + + /// Raise the `quiescing` gate and hold it raised until the returned + /// guard drops. Under refcount semantics, multiple holders compose + /// safely: the gate stays raised while ANY guard is held, and each + /// guard's Drop only releases its own contribution. This lets a + /// multi-step teardown (e.g. the shielded Clear flow) keep new direct + /// passes off across a check-then-wipe even while a concurrent public + /// `quiesce()` lands inside the window — neither party's Drop can lower + /// the other's barrier. In production only the shielded Clear flow needs + /// this today; the coordinator pass-gate tests also exercise it. + #[cfg(any(test, feature = "shielded"))] + pub(crate) fn hold_quiescing_gate(&self) -> RefcountedFlagGuard<'_> { + // SeqCst on the refcount: store-half of the `quiescing`<-> + // `is_syncing` handshake (see `begin_pass`). The Clear flow raises + // the gate through here, so this raise must be self-fencing just + // like `quiesce`'s. + RefcountedFlagGuard::raise(&self.quiescing) + } + + /// Test-only read of the `quiescing` gate as a boolean ("is the gate + /// raised?"). Returns `true` iff the refcount is > 0. Used by + /// regression tests that assert the gate stays raised across a refused + /// (re)start or a racing public `quiesce()`. + #[cfg(test)] + pub(crate) fn quiescing_load_for_test(&self, ordering: Ordering) -> bool { + self.quiescing.load(ordering) > 0 + } + + /// Enter a sync pass. Atomically claims the `is_syncing` slot, then + /// checks the `quiescing` gate. Returns the RAII guard that clears + /// `is_syncing` on drop, or `None` when the caller must bail without + /// doing work — because a pass is already in flight, or a teardown has + /// raised the gate. In the gated case the briefly-claimed slot is + /// released before returning (the guard drops), so a later post-quiesce + /// pass can still run. + pub(crate) fn begin_pass(&self) -> Option> { + // LOAD-BEARING MEMORY ORDERING: the `is_syncing` claim (this CAS) and + // the `quiescing` gate read below form a Dekker-style mutual-exclusion + // handshake with `quiesce`'s `store(quiescing) … load(is_syncing)`. + // The guarantee we need is that a teardown and a pass-entry can never + // BOTH miss each other — either this pass observes the raised gate and + // bails, or the drain observes our `is_syncing` claim and waits it + // out. That is a StoreLoad relationship across two distinct atomics, + // which Release/Acquire do NOT order; only SeqCst (a single total + // order over all four ops) does. So the CAS *store* here, the gate + // load here, and the matching `store(quiescing)` / `load(is_syncing)` + // on the teardown side are all `SeqCst`. (Today the lock `registry` + // takes would also fence this, but that is incidental — relying on it + // would make the handshake silently fragile to a lock-free refactor.) + if self + .is_syncing + .compare_exchange(false, true, Ordering::SeqCst, Ordering::Acquire) + .is_err() + { + return None; + } + + // RAII guard: clears `is_syncing` on every exit path, including + // panics. Without it a panic inside the pass would leave + // `is_syncing = true` forever and wedge `quiesce`'s drain loop. + let guard = AtomicFlagGuard::new(&self.is_syncing); + + // A `quiesce` may have raised the gate between our CAS and here; if + // so, bail (dropping `guard`, which clears the slot) so the drain + // can complete and teardown gets a true "no further pass" barrier. + // SeqCst — load-half of the handshake described above. The gate is + // observed "raised" iff the refcount is > 0 (any teardown holder in + // scope). + if self.quiescing.load(Ordering::SeqCst) > 0 { + return None; + } + Some(guard) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tokio::sync::oneshot; + + fn make_lifecycle() -> Arc { + let registry = ThreadRegistry::::new(); + Arc::new(CoordinatorLifecycle::new( + registry, + WalletWorker::IdentitySync, + 60, + )) + } + + /// With NO background loop registered, `quiesce` must still raise the + /// `quiescing` gate — so a concurrent direct `sync_now`/`sync_wallet` + /// that lands after it bails — and drain an already-in-flight direct + /// pass before returning. The registry's drain hook cannot cover this: + /// `registry.quiesce` early-returns `NotRunning` WITHOUT running the + /// hook when no loop slot exists, so the gate would otherwise never go + /// up and the in-flight pass would not be drained. Guards the + /// `clear_shielded`/`stop` contract ("a concurrent direct + /// sync_now/sync_wallet is held off"). Non-vacuous: a `quiesce` that + /// merely delegated to the registry would raise no gate and let the + /// concurrent pass slip through. + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn quiesce_raises_gate_and_drains_direct_pass_without_background_loop() { + let lifecycle = make_lifecycle(); + assert!( + !lifecycle.is_running(), + "precondition: no background loop registered" + ); + + // A direct sync_now/sync_wallet pass already past `begin_pass`, held + // in flight on a task until we release it. + let (ready_tx, ready_rx) = oneshot::channel::<()>(); + let (release_tx, release_rx) = oneshot::channel::<()>(); + let lc_pass = Arc::clone(&lifecycle); + let pass_task = tokio::spawn(async move { + let _pass = lc_pass.begin_pass().expect("first pass enters the slot"); + ready_tx.send(()).expect("signal in-flight"); + release_rx.await.expect("await release"); + // `_pass` drops here → is_syncing = false + }); + + ready_rx.await.expect("pass reached in-flight"); + assert!(lifecycle.is_syncing(), "direct pass holds is_syncing"); + + // Drive `quiesce` concurrently: it must raise the gate, then block + // draining the in-flight pass. + let lc_q = Arc::clone(&lifecycle); + let quiesce_task = tokio::spawn(async move { lc_q.quiesce().await }); + + // Give `quiesce` time to raise the gate and enter the drain. + tokio::time::sleep(Duration::from_millis(50)).await; + assert!( + lifecycle.quiescing.load(Ordering::Acquire) > 0, + "quiesce must raise the gate even with no background loop registered" + ); + assert!( + lifecycle.is_syncing(), + "in-flight direct pass still held; quiesce has not skipped the drain" + ); + assert!( + !quiesce_task.is_finished(), + "quiesce must block until the in-flight pass drains" + ); + + // Release the pass; `quiesce` drains `is_syncing`, then returns. + release_tx.send(()).expect("release the pass"); + let status = tokio::time::timeout(Duration::from_secs(2), quiesce_task) + .await + .expect("quiesce completes once the pass drains") + .expect("quiesce task joined"); + assert_eq!(status, WorkerStatus::NotRunning); + assert!( + !lifecycle.is_syncing(), + "is_syncing was drained before quiesce returned" + ); + + pass_task.await.expect("pass task joined"); + } + + /// `quiesce_under_held_gate` must NOT lower the `quiescing` gate the + /// caller is holding — the mechanism that lets the shielded Clear flow + /// keep the "no new pass" barrier raised *continuously* across the + /// drain, the liveness check, and the store wipe. The plain + /// [`quiesce`](CoordinatorLifecycle::quiesce)'s own RAII guard would + /// lower it on return, leaving a window a direct pass could slip into + /// before Clear re-raised it. Must fail against a variant that delegates + /// to `quiesce` (whose guard clears the shared flag on drop). + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn quiesce_under_held_gate_keeps_caller_gate_raised() { + let lifecycle = make_lifecycle(); + + // Caller (the Clear flow) raises and holds the gate before draining. + let hold = lifecycle.hold_quiescing_gate(); + assert!( + lifecycle.quiescing.load(Ordering::Acquire) > 0, + "caller's hold raised the gate" + ); + + // Drain under the held gate (no loop registered → NotRunning); the + // gate must remain raised across the call. + let status = lifecycle.quiesce_under_held_gate().await; + assert_eq!(status, WorkerStatus::NotRunning); + assert!( + lifecycle.quiescing.load(Ordering::Acquire) > 0, + "gate stays raised across the drain — no lapse for a direct pass" + ); + + // A direct pass attempting to begin during Clear (gate held) is + // refused: it bails after the CAS on the raised gate. + assert!( + lifecycle.begin_pass().is_none(), + "the continuously-held gate holds off a new direct pass" + ); + + // Once Clear's own guard drops, the gate reopens for later work. + drop(hold); + assert_eq!( + lifecycle.quiescing.load(Ordering::Acquire), + 0, + "gate reopens once the caller's hold guard drops" + ); + } + + /// Racing public `quiesce()` during Clear: the Clear flow holds + /// `hold_quiescing_gate` continuously across its drain + wipe; a + /// concurrent public `quiesce()` that arrives in the window AFTER the + /// Clear has raised the gate but BEFORE the Clear has acquired the + /// registry's per-key `hold_clearing` latch must NOT lower the gate + /// on its return — the Clear's continuously-held barrier is what keeps + /// a direct `sync_now`/`sync_wallet` from re-persisting into the store + /// being wiped. We stage exactly this ordering by holding the + /// `hold_quiescing_gate` BUT NOT the clearing latch — so `quiesce`'s + /// `is_clearing` short-circuit does NOT fire and the local guard IS + /// installed. + /// + /// Non-vacuous: against the prior `AtomicBool` `quiescing` flag where + /// `quiesce()` installed an `AtomicFlagGuard` whose Drop unconditionally + /// stored `false`, this test would observe `quiescing == false` after + /// the racing `quiesce()` returned — the exact split-second window the + /// refcount design closes. + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn quiesce_during_held_gate_does_not_lower_clears_barrier() { + let lifecycle = make_lifecycle(); + + // Clear has raised the gate (but has NOT yet taken the clearing + // latch — the racy ordering this test exercises). + let clearing_gate = lifecycle.hold_quiescing_gate(); + assert!( + lifecycle.quiescing_load_for_test(Ordering::Acquire), + "precondition: Clear's hold_quiescing_gate raised the gate" + ); + + // Racing public quiesce arrives. With no clearing latch held, + // `is_clearing` returns false and the local refcount guard IS + // installed (count goes 1 → 2 → back to 1 on its drop). + let status = lifecycle.quiesce().await; + assert_eq!(status, WorkerStatus::NotRunning); + + // The gate is STILL up: Clear's continuously-held ref keeps it so. + // Under the prior bool implementation the racing `quiesce`'s + // `AtomicFlagGuard` would have stored `false` on drop — lowering + // the Clear's own barrier and opening the window for a direct + // `sync_now`/`sync_wallet` to slip past `begin_pass`. + assert!( + lifecycle.quiescing_load_for_test(Ordering::Acquire), + "gate must stay raised across the racing public quiesce — \ + Clear's continuously-held hold_quiescing_gate must not be \ + lowered by another path's Drop" + ); + // And `begin_pass` still bails, so no pass can slip into the + // window the bug used to open. + assert!( + lifecycle.begin_pass().is_none(), + "the still-raised gate keeps a direct pass from claiming the slot" + ); + + drop(clearing_gate); + assert!( + !lifecycle.quiescing_load_for_test(Ordering::Acquire), + "gate reopens once Clear's own guard drops" + ); + } + + /// Refcount composition: two `hold_quiescing_gate` guards raise the + /// gate to count 2; dropping one leaves count 1 (still "raised"); + /// dropping the second returns to 0. The wallet-side invariant that + /// makes the public `quiesce()` ↔ `hold_quiescing_gate` race-free — + /// neither party's Drop can lower the other's barrier — rides on + /// exactly this property. + /// + /// Non-vacuous: against the prior `AtomicBool` `quiescing` flag (where + /// `hold_quiescing_gate` returned an `AtomicFlagGuard` that + /// unconditionally `store(false)` on Drop), dropping the first guard + /// would lower the shared flag while the second guard was still in + /// scope — the gate-continuity gap this fix closes. + #[test] + fn refcount_guards_compose() { + let registry = ThreadRegistry::::new(); + let lifecycle = CoordinatorLifecycle::new(registry, WalletWorker::IdentitySync, 60); + + let g1 = lifecycle.hold_quiescing_gate(); + assert_eq!(lifecycle.quiescing.load(Ordering::Acquire), 1); + let g2 = lifecycle.hold_quiescing_gate(); + assert_eq!(lifecycle.quiescing.load(Ordering::Acquire), 2); + + drop(g1); + assert_eq!( + lifecycle.quiescing.load(Ordering::Acquire), + 1, + "dropping one holder must not lower the gate past the surviving holder's contribution" + ); + assert!( + lifecycle.quiescing_load_for_test(Ordering::Acquire), + "gate is still observed raised while any holder remains in scope" + ); + assert!( + lifecycle.begin_pass().is_none(), + "the still-raised gate keeps `begin_pass` from claiming the slot" + ); + + drop(g2); + assert_eq!(lifecycle.quiescing.load(Ordering::Acquire), 0); + assert!( + !lifecycle.quiescing_load_for_test(Ordering::Acquire), + "gate reopens once the last holder drops" + ); + } + + /// Regression: a timeout / forced cancellation that drops the + /// `lifecycle.quiesce()` future mid-drain must NOT leak the `quiescing` + /// refcount. With a registry-side `drain_hook` whose `fetch_add` ran + /// inside the registry's quiesce, but whose matching `fetch_sub` lived + /// past the inner `.await` in `cancel_join_and_drain`, the outer drop + /// would unwind the local guard's contribution while leaving the hook's + /// contribution stranded — count permanently `+1`, gate stuck raised, + /// every future `begin_pass` would bail and the coordinator would be + /// silently dead for the rest of the process. The wallet's + /// `clear_shielded_inner` wraps `quiesce_under_held_gate()` in a + /// `tokio::time::timeout`, so this is the live cancellation path. + /// + /// Non-vacuous: against the prior commit (drain hook present) the + /// post-cancel count stays at `1` and the assertion below times out; + /// with the hook dropped (this fix), only the lifecycle's own + /// `RefcountedFlagGuard` is in play and its `Drop` runs on the + /// cancellation unwind, returning the count to `0`. + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn quiesce_cancellation_during_drain_does_not_leak_quiescing_refcount() { + let lifecycle = make_lifecycle(); + + // Register a background loop whose `pass` blocks the OS thread via + // `std::thread::sleep` once it is in flight — flagging + // `pass_entered` so the test can wait for the loop body to be + // committed before driving teardown. With the OS thread parked + // inside `pass`, the loop's biased `select!` cannot observe the + // cancel signal and the registry's join poll loop sits waiting — + // owning the mid-drain window we need to drop the future on top of. + // + // Without the `pass_entered` handshake, the OS thread can race + // teardown: `spawn_periodic_loop` returns before the thread has + // even reached the loop body, so `registry.quiesce` can fire its + // cancel signal before the thread enters `select!`. The `biased; + // cancel.cancelled() => break,` arm then wins on first poll — pass + // never runs, the thread exits cleanly, status comes back as `Ok`, + // and the matching `fetch_sub` in `cancel_join_and_drain` runs. + // Result: pre-fix doesn't leak in that race and the assertion is + // vacuously satisfied. Waiting for `pass_entered` closes the race. + let pass_entered = Arc::new(AtomicBool::new(false)); + let pass_release = Arc::new(AtomicBool::new(false)); + let pass_entered_c = Arc::clone(&pass_entered); + let pass_release_c = Arc::clone(&pass_release); + lifecycle.spawn_periodic_loop( + move || { + let entered = Arc::clone(&pass_entered_c); + let release = Arc::clone(&pass_release_c); + async move { + entered.store(true, Ordering::Release); + while !release.load(Ordering::Acquire) { + std::thread::sleep(Duration::from_millis(5)); + } + } + }, + || Duration::from_secs(60), + ); + + // Wait for the OS thread to be IN `pass` (past `select!`'s biased + // cancel arm), so a cancel signal from the upcoming `quiesce` + // cannot win the select before the loop body runs. + let start = std::time::Instant::now(); + while !pass_entered.load(Ordering::Acquire) { + if start.elapsed() > Duration::from_secs(2) { + pass_release.store(true, Ordering::Release); + panic!("background loop never entered `pass`"); + } + tokio::time::sleep(Duration::from_millis(1)).await; + } + + // Drive `quiesce` on a separate task so we can abort its future + // mid-drain — simulating the `clear_shielded_inner` + // `tokio::time::timeout` firing inside `registry.quiesce`. + let lc_q = Arc::clone(&lifecycle); + let quiesce_task = tokio::spawn(async move { lc_q.quiesce().await }); + + // Wait for the lifecycle's local guard to raise the gate, then give + // the registry's drain hook room to run (it would also raise the + // gate, pre-fix). 50ms is generous: the hook is `fetch_add` + a + // boxed empty future — microseconds — and the join poll's 5ms + // cadence means the registry is sitting in the join wait by now. + let start = std::time::Instant::now(); + while !lifecycle.quiescing_load_for_test(Ordering::Acquire) { + if start.elapsed() > Duration::from_secs(2) { + pass_release.store(true, Ordering::Release); + panic!("quiesce never raised the gate"); + } + tokio::time::sleep(Duration::from_millis(1)).await; + } + tokio::time::sleep(Duration::from_millis(50)).await; + + // Cancel mid-drain — the OS thread is still parked in + // `std::thread::sleep`, the registry is still polling the join + // wait. Pre-fix, the drain hook has already run its `fetch_add`; + // the matching `fetch_sub` lives past the registry await and never + // gets a chance to run. + quiesce_task.abort(); + let _ = quiesce_task.await; + + // Assert the refcount returns to 0 within a generous window. With + // the leak, it stays at `1` forever and this loop times out. + let start = std::time::Instant::now(); + while lifecycle.quiescing_load_for_test(Ordering::Acquire) { + if start.elapsed() > Duration::from_millis(500) { + let leaked = lifecycle.quiescing.load(Ordering::Acquire); + pass_release.store(true, Ordering::Release); + panic!( + "quiescing refcount leaked past cancellation: count = {leaked} \ + (expected 0 once the dropped future's guards unwound)" + ); + } + tokio::time::sleep(Duration::from_millis(1)).await; + } + + // Cleanup: release the pass so the OS thread observes cancel and + // exits before the test runtime is dropped. + pass_release.store(true, Ordering::Release); + } +} diff --git a/packages/rs-platform-wallet/src/manager/identity_sync.rs b/packages/rs-platform-wallet/src/manager/identity_sync.rs index 8730398f978..38029c9f4e3 100644 --- a/packages/rs-platform-wallet/src/manager/identity_sync.rs +++ b/packages/rs-platform-wallet/src/manager/identity_sync.rs @@ -47,16 +47,17 @@ //! identities are registered and the SDK is connected. use std::collections::BTreeMap; -use std::sync::{ - atomic::{AtomicBool, AtomicU64, Ordering}, - Arc, Mutex as StdMutex, -}; +use std::sync::Arc; + +use dash_async::ThreadRegistry; use std::time::{Duration, SystemTime, UNIX_EPOCH}; +use super::coordinator_lifecycle::CoordinatorLifecycle; +use super::WalletWorker; + use dpp::balances::credits::TokenAmount; use dpp::prelude::Identifier; use tokio::sync::RwLock; -use tokio_util::sync::CancellationToken; use dash_sdk::platform::tokens::identity_token_balances::{ IdentityTokenBalances, IdentityTokenBalancesQuery, @@ -158,25 +159,14 @@ where /// over `P` so every `persister.store(...)` call on the hot sync /// loop dispatches statically. persister: Arc

, - /// Cancel token for the background loop, if running. - background_cancel: StdMutex>, - /// Monotonically increasing generation counter. Incremented each - /// time `start()` installs a new cancel token so the exiting - /// thread can tell whether its token is still current. - background_generation: AtomicU64, - interval_secs: AtomicU64, - is_syncing: AtomicBool, - /// Set by [`quiesce`](Self::quiesce) to gate new passes while it - /// drains an in-flight one. `sync_now` bails (after taking the - /// `is_syncing` slot) when this is set, so once `quiesce` observes - /// `is_syncing == false` no further pass can start — giving shutdown - /// a real "no more host-visible persister stores" barrier that + /// Shared lifecycle state + pass-gating protocol under the + /// [`WalletWorker::IdentitySync`] key: the registry handle, polling + /// interval, the `is_syncing` / `quiescing` handshake, and the + /// last-sync stamp. `start` / `stop` / `is_running` / `quiesce` and the + /// `sync_now` pass gate delegate to it. The `quiescing` half gives + /// shutdown a real "no more host-visible persister stores" barrier that /// cancel-only [`stop`](Self::stop) does not provide. - quiescing: AtomicBool, - /// Unix seconds of the last completed pass across all identities. - /// `0` = never. Identity-level timestamps live on the per-identity - /// rows in [`IdentitySyncManager::state`]. - last_sync_unix: AtomicU64, + lifecycle: CoordinatorLifecycle, /// Per-identity registry / cache. Keyed by identity id; each row /// carries the per-(identity, token) token rows plus the /// per-identity last-sync timestamp. @@ -199,16 +189,19 @@ where /// writes). The registry starts empty — call /// [`register_identity`](Self::register_identity) before /// [`start`](Self::start). - pub fn new(sdk: Arc, persister: Arc

) -> Self { + pub fn new( + sdk: Arc, + persister: Arc

, + registry: Arc>, + ) -> Self { Self { sdk, persister, - background_cancel: StdMutex::new(None), - background_generation: AtomicU64::new(0), - interval_secs: AtomicU64::new(DEFAULT_SYNC_INTERVAL_SECS), - is_syncing: AtomicBool::new(false), - quiescing: AtomicBool::new(false), - last_sync_unix: AtomicU64::new(0), + lifecycle: CoordinatorLifecycle::new( + registry, + WalletWorker::IdentitySync, + DEFAULT_SYNC_INTERVAL_SECS, + ), state: RwLock::new(BTreeMap::new()), } } @@ -309,35 +302,28 @@ where /// /// The running loop picks this up on its next sleep. pub fn set_interval(&self, interval: Duration) { - let secs = interval.as_secs().max(1); - self.interval_secs.store(secs, Ordering::Release); + self.lifecycle.set_interval(interval); } /// Current polling interval. pub fn interval(&self) -> Duration { - Duration::from_secs(self.interval_secs.load(Ordering::Acquire)) + self.lifecycle.interval() } /// Whether the background loop is currently running. pub fn is_running(&self) -> bool { - self.background_cancel - .lock() - .map(|g| g.is_some()) - .unwrap_or(false) + self.lifecycle.is_running() } /// Whether a sync pass is in flight right now. pub fn is_syncing(&self) -> bool { - self.is_syncing.load(Ordering::Acquire) + self.lifecycle.is_syncing() } /// Unix seconds of the last completed pass (across all identities), /// or `None` if no pass has ever completed. pub fn last_sync_unix_seconds(&self) -> Option { - match self.last_sync_unix.load(Ordering::Acquire) { - 0 => None, - n => Some(n), - } + self.lifecycle.last_sync_unix_seconds() } /// Per-identity last-sync timestamp. @@ -395,64 +381,30 @@ where /// The first pass runs immediately; subsequent passes fire every /// [`interval`](Self::interval). pub fn start(self: Arc) { - let mut guard = self.background_cancel.lock().expect("bg_cancel poisoned"); - if guard.is_some() { - return; - } - let cancel = CancellationToken::new(); - *guard = Some(cancel.clone()); - let my_gen = self.background_generation.fetch_add(1, Ordering::AcqRel) + 1; - drop(guard); - - let handle = tokio::runtime::Handle::current(); - let this = self; - std::thread::Builder::new() - .name("identity-sync".into()) - .spawn(move || { - handle.block_on(async move { - loop { - if cancel.is_cancelled() { - break; - } - - this.sync_now().await; - - let interval = this.interval(); - tokio::select! { - _ = tokio::time::sleep(interval) => {} - _ = cancel.cancelled() => break, - } - } - - // Only clear the slot if no newer start() has - // installed a replacement token since we launched. - if let Ok(mut guard) = this.background_cancel.lock() { - if this.background_generation.load(Ordering::Acquire) == my_gen { - *guard = None; - } - } - }); - }) - .expect("failed to spawn identity-sync thread"); + let pass_self = Arc::clone(&self); + let interval_self = Arc::clone(&self); + self.lifecycle.spawn_periodic_loop( + move || { + let this = Arc::clone(&pass_self); + async move { this.sync_now().await } + }, + move || interval_self.interval(), + ); } /// Stop the background sync loop. No-op if not running. /// /// **Cancel-only**: requests cancellation and returns immediately. A - /// pass already inside `sync_now` keeps running to completion, - /// including its `persister.store(...)` fan-out. For a real "nothing - /// is running and nothing more will be persisted" barrier — required - /// by manager shutdown so the host can free the persister context — - /// use [`quiesce`](Self::quiesce). + /// pass already inside `sync_now` is **cancelled mid-flight** at its + /// next `.await` (the loop's `biased; cancel-first` select drops the + /// `sync_now` future, see `start`). `persister.store(...)` calls that + /// completed before the cancel landed are committed; any in-flight + /// store is abandoned. For a real "nothing is running and nothing + /// more will be persisted" barrier — required by manager shutdown so + /// the host can free the persister context — use + /// [`quiesce`](Self::quiesce). pub fn stop(&self) { - if let Some(token) = self - .background_cancel - .lock() - .expect("bg_cancel poisoned") - .take() - { - token.cancel(); - } + self.lifecycle.stop(); } /// Cancel the background loop **and wait for any in-flight sync pass @@ -473,13 +425,14 @@ where /// so its falling edge (with the gate up) is a sound "fully drained" /// signal. The gate is reopened before returning so a later /// start/sync works normally. - pub async fn quiesce(&self) { - self.quiescing.store(true, Ordering::Release); - self.stop(); - while self.is_syncing.load(Ordering::Acquire) { - tokio::time::sleep(Duration::from_millis(20)).await; - } - self.quiescing.store(false, Ordering::Release); + /// + /// Finally **joins** the loop's OS thread (after the drain, so the + /// thread is on its way out) and returns its terminal status. Joining + /// while the runtime is still alive is what lets the manager promise + /// the `!Send` loop has stopped touching `tokio::time` before a + /// one-shot host drops the runtime. + pub async fn quiesce(&self) -> dash_async::WorkerStatus { + self.lifecycle.quiesce().await } /// Run one sync pass across every registered identity. @@ -493,22 +446,13 @@ where /// `!Send` (no `tokio::spawn`) and because the design brief /// explicitly forbids it. pub async fn sync_now(&self) { - if self - .is_syncing - .compare_exchange(false, true, Ordering::AcqRel, Ordering::Acquire) - .is_err() - { + // Claim the pass slot and honour the quiescing gate; bail without + // work (and without a `persister.store(...)` after quiesce returns) + // if a pass is already in flight or a teardown raised the gate. The + // returned guard clears `is_syncing` on every exit path. + let Some(_pass) = self.lifecycle.begin_pass() else { return; - } - - // A `quiesce()` may have raised the gate between our CAS and - // here; if so, release the slot and bail without running a pass - // so the drain can complete and shutdown gets a true barrier - // (no further `persister.store(...)` after quiesce returns). - if self.quiescing.load(Ordering::Acquire) { - self.is_syncing.store(false, Ordering::Release); - return; - } + }; // Snapshot the per-identity watch list under a short read // lock and release it before any network call. We keep @@ -531,8 +475,8 @@ where .duration_since(UNIX_EPOCH) .map(|d| d.as_secs()) .unwrap_or(0); - self.last_sync_unix.store(now, Ordering::Release); - self.is_syncing.store(false, Ordering::Release); + self.lifecycle.store_last_sync_unix(now); + // `_pass` drops here → `is_syncing = false` } /// Sync a single identity's watched tokens against Platform. @@ -673,7 +617,7 @@ where f.debug_struct("IdentitySyncManager") .field("is_running", &self.is_running()) .field("is_syncing", &self.is_syncing()) - .field("interval_secs", &self.interval_secs.load(Ordering::Acquire)) + .field("interval_secs", &self.lifecycle.interval_secs()) .field("last_sync_unix", &self.last_sync_unix_seconds()) .finish() } @@ -749,7 +693,8 @@ mod tests { fn make_manager() -> Arc> { let sdk = Arc::new(dash_sdk::SdkBuilder::new_mock().build().expect("mock sdk")); let persister = Arc::new(NoopPersister); - Arc::new(IdentitySyncManager::new(sdk, persister)) + let registry = ThreadRegistry::new(); + Arc::new(IdentitySyncManager::new(sdk, persister, registry)) } fn make_recording_manager() -> ( @@ -758,8 +703,13 @@ mod tests { ) { let sdk = Arc::new(dash_sdk::SdkBuilder::new_mock().build().expect("mock sdk")); let persister = Arc::new(RecordingPersister::new()); + let registry = ThreadRegistry::new(); ( - Arc::new(IdentitySyncManager::new(sdk, Arc::clone(&persister))), + Arc::new(IdentitySyncManager::new( + sdk, + Arc::clone(&persister), + registry, + )), persister, ) } @@ -880,63 +830,6 @@ mod tests { assert_eq!(mgr.interval(), Duration::from_secs(120)); } - /// `quiesce()` must not return while a pass is in flight, and must - /// return promptly once the pass drains. - /// - /// Drives the real `is_syncing` lifecycle: a background task takes - /// the slot via the same `compare_exchange` the real `sync_now` - /// uses, holds it across a sleep (standing in for the pass body + - /// persister fan-out, which `sync_now` keeps the flag set across), - /// then clears it. We assert `quiesce()` is still pending while the - /// flag is held and completes after it falls — i.e. the falling edge - /// of `is_syncing` is what unblocks the barrier. - #[tokio::test(flavor = "multi_thread", worker_threads = 2)] - async fn quiesce_blocks_until_in_flight_pass_drains() { - let mgr = make_manager(); - - // Stand in for an in-flight `sync_now`: take the `is_syncing` - // slot exactly as the real pass does, hold it, then release. - let holder = Arc::clone(&mgr); - let pass = tokio::spawn(async move { - assert!( - holder - .is_syncing - .compare_exchange(false, true, Ordering::AcqRel, Ordering::Acquire) - .is_ok(), - "test should own the is_syncing slot" - ); - tokio::time::sleep(Duration::from_millis(200)).await; - holder.is_syncing.store(false, Ordering::Release); - }); - - // Give the holder task a chance to take the slot before we - // start draining. - while !mgr.is_syncing() { - tokio::time::sleep(Duration::from_millis(5)).await; - } - - let quiesce_fut = mgr.quiesce(); - tokio::pin!(quiesce_fut); - - // While the pass holds the flag, quiesce must stay pending. - tokio::select! { - _ = &mut quiesce_fut => panic!("quiesce returned while a pass was in flight"), - _ = tokio::time::sleep(Duration::from_millis(50)) => {} - } - assert!(mgr.is_syncing(), "pass should still be in flight"); - - // Once the pass drains, quiesce must return (well within a - // generous bound — it polls every 20ms). - tokio::time::timeout(Duration::from_secs(2), &mut quiesce_fut) - .await - .expect("quiesce did not return after the pass drained"); - - // The gate is reopened before quiesce returns. - assert!(!mgr.quiescing.load(Ordering::Acquire)); - assert!(!mgr.is_syncing()); - pass.await.unwrap(); - } - /// A `sync_now()` invoked while `quiescing` is set must bail without /// running the pass — in particular, without calling /// `persister.store(...)`. This is the gate that prevents a pass @@ -948,8 +841,8 @@ mod tests { let token_x = Identifier::from([10u8; 32]); mgr.register_identity(id_a, [token_x]).await; - // Raise the gate as `quiesce()` would. - mgr.quiescing.store(true, Ordering::Release); + // Raise the gate as `quiesce()` would, held across the pass. + let _gate = mgr.lifecycle.hold_quiescing_gate(); mgr.sync_now().await; diff --git a/packages/rs-platform-wallet/src/manager/load.rs b/packages/rs-platform-wallet/src/manager/load.rs index 8e7af9be1c7..1d8baf163bc 100644 --- a/packages/rs-platform-wallet/src/manager/load.rs +++ b/packages/rs-platform-wallet/src/manager/load.rs @@ -3,8 +3,12 @@ use std::collections::BTreeMap; use std::sync::Arc; +use key_wallet::wallet::managed_wallet_info::ManagedWalletInfo; + use crate::changeset::{ClientStartState, ClientWalletStartState, PlatformWalletPersistence}; use crate::error::PlatformWalletError; +use crate::events::PlatformEvent; +use crate::manager::load_outcome::{LoadOutcome, SkipReason}; use crate::wallet::core::WalletBalance; use crate::wallet::identity::IdentityManager; use crate::wallet::platform_wallet::{PlatformWalletInfo, WalletId}; @@ -13,23 +17,42 @@ use crate::wallet::PlatformWallet; use super::PlatformWalletManager; impl PlatformWalletManager

{ - /// Load the full [`ClientStartState`] from the configured persister - /// and rehydrate the manager's `wallet_manager` and `wallets` maps. + /// Restore every persisted wallet as a **watch-only** entry — no + /// signing key material is derived here. The persister hands back a + /// keyless reconstruction snapshot; each wallet is rebuilt via + /// [`Wallet::new_watch_only`](key_wallet::wallet::Wallet::new_watch_only) + /// from its [`AccountRegistrationEntry`](crate::changeset::AccountRegistrationEntry) + /// manifest, the keyless core-state projection is applied, and the + /// result is registered into the manager. + /// + /// The load path never touches the seed, so it performs no wrong-seed + /// check. Signing happens later, on demand, via the configured + /// [`MnemonicResolverHandle`]. /// - /// For each persisted wallet this builds a `PlatformWalletInfo` from - /// the snapshot (core wallet info, identity manager, tracked asset - /// locks) and inserts the `(Wallet, PlatformWalletInfo)` pair into - /// the inner [`WalletManager`]. A matching [`PlatformWallet`] handle - /// is then constructed and registered in `self.wallets`. + /// # Skip vs hard-fail /// - /// If the snapshot includes platform-address provider state, each - /// per-wallet slice is handed to - /// [`PlatformAddressWallet::initialize_from_persisted`](crate::wallet::platform_addresses::PlatformAddressWallet::initialize_from_persisted); - /// wallets missing from that slice get a fresh - /// [`PlatformAddressWallet::initialize`](crate::wallet::platform_addresses::PlatformAddressWallet::initialize). + /// - **Per-row decode/projection failure** (empty manifest, malformed + /// xpub, duplicate `account_type`, …): the wallet is **skipped** — + /// never inserted into `wallet_manager` / `self.wallets`, recorded + /// in [`LoadOutcome::skipped`] with a structural + /// [`SkipReason::CorruptPersistedRow`], and a + /// [`PlatformEvent::WalletSkippedOnLoad`] is emitted. One bad row + /// never aborts the others; the call still returns `Ok`. + /// - **Whole-load failure** (persister I/O, programmer error, the + /// no-silent-zero topology check in + /// [`apply_persisted_core_state`](super::rehydrate::apply_persisted_core_state)): + /// `Err(_)` — every wallet inserted earlier in this pass is + /// rolled back. Skipped wallets never entered the maps so the + /// rollback path never sees them. /// - /// [`WalletManager`]: key_wallet_manager::WalletManager - pub async fn load_from_persistor(&self) -> Result<(), PlatformWalletError> { + /// Platform-address provider state is restored per wallet via + /// [`initialize_from_persisted`](crate::wallet::platform_addresses::PlatformAddressWallet::initialize_from_persisted), + /// or a fresh + /// [`initialize`](crate::wallet::platform_addresses::PlatformAddressWallet::initialize) + /// when the snapshot carries no slice for it. + /// + /// [`MnemonicResolverHandle`]: rs_sdk_ffi::MnemonicResolverHandle + pub async fn load_from_persistor(&self) -> Result { let ClientStartState { mut platform_addresses, wallets, @@ -46,47 +69,71 @@ impl PlatformWalletManager

{ let persister_dyn: Arc = Arc::clone(&self.persister) as _; - // Track every wallet successfully inserted into - // `wallet_manager` and `self.wallets` during this call so the - // batch is transactional: if any later iteration fails (id - // mismatch, `initialize_from_persisted` error), we walk back - // every prior insert before bailing. Without this, a clean - // retry would collide on `WalletManager::insert_wallet` - // returning `WalletAlreadyExists` for every previously-loaded - // wallet — half-poisoning the manager until the process - // restarts. The orphan state is observable across the FFI - // boundary with no Swift-side reset path, so transactional - // semantics matter for this hydration API. + // Transactional batch: every wallet inserted into + // `wallet_manager` / `self.wallets` is tracked so a later hard + // error walks back every prior insert. Skipped wallets never + // enter either map, so the rollback path never sees them. let mut inserted_in_manager: Vec = Vec::new(); let mut inserted_in_wallets: Vec = Vec::new(); let mut load_error: Option = None; + let mut outcome = LoadOutcome::default(); 'load: for (expected_wallet_id, wallet_state) in wallets { let ClientWalletStartState { - wallet, - wallet_info, + network, + birth_height, + account_manifest, + core_state, identity_manager, unused_asset_locks, + contacts, + identity_keys, } = wallet_state; - // Flatten the (account → outpoint → lock) map into the flat - // OutPoint → TrackedAssetLock map that `PlatformWalletInfo` - // holds today. + // Build the watch-only wallet from the keyless manifest. A + // structural decode failure skips this row (per-row + // resilience) — it never aborts the batch and never inserts + // a degraded placeholder. + let wallet = match super::rehydrate::build_watch_only_wallet( + network, + expected_wallet_id, + &account_manifest, + ) { + Ok(w) => w, + Err(row_err) => { + let reason = SkipReason::CorruptPersistedRow { + kind: row_err.into(), + }; + outcome.skipped.push((expected_wallet_id, reason.clone())); + self.event_manager + .on_platform_event(&PlatformEvent::WalletSkippedOnLoad { + wallet_id: expected_wallet_id, + reason, + }); + continue 'load; + } + }; + + // Mint the managed-info skeleton from the watch-only wallet, + // then apply the keyless persisted core state (UTXOs, sync + // watermarks, per-account balances). A wallet with persisted + // UTXOs but no funds account hard-fails here rather than + // reconstructing a silent zero balance. + let mut wallet_info = ManagedWalletInfo::from_wallet(&wallet, birth_height); + if let Err(e) = + super::rehydrate::apply_persisted_core_state(&mut wallet_info, &core_state) + { + load_error = Some(e); + break 'load; + } + + // Flatten the (account → outpoint → lock) map. let mut tracked_asset_locks = BTreeMap::new(); for (_account_index, account_locks) in unused_asset_locks { tracked_asset_locks.extend(account_locks); } let balance = Arc::new(WalletBalance::new()); - // Mirror the inner `ManagedWalletInfo.balance` (already - // recomputed from the freshly-loaded UTXO set on the FFI - // side via `update_balance`) into the lock-free `Arc` the - // UI reads. Without this, `wallet.balance()` reports zero - // for restored wallets even though the per-account totals - // and the inner `core_wallet.balance` are correct. - // `WalletBalance::set` is `pub(crate)`, which is why this - // step has to live inside `platform_wallet` rather than - // the FFI loader. let core_balance = &wallet_info.balance; balance.set( core_balance.confirmed(), @@ -94,17 +141,19 @@ impl PlatformWalletManager

{ core_balance.immature(), core_balance.locked(), ); + // Build the identity manager from the (id, balance, + // revision) skeleton, then layer the persisted PUBLIC + // contacts + identity keys onto it — the same routing the + // runtime changeset-replay path uses. + let mut identity_manager = IdentityManager::from(identity_manager); + identity_manager.apply_contacts_and_keys(contacts, identity_keys, network); let platform_info = PlatformWalletInfo { core_wallet: wallet_info, balance: Arc::clone(&balance), - identity_manager: IdentityManager::from(identity_manager), + identity_manager, tracked_asset_locks, }; - // Insert into `wallet_manager` first so we have a wallet - // handle to validate against. Track success in - // `inserted_in_manager` so the batch-rollback at the - // bottom can unwind on any later-iteration failure. let wallet_id = { let mut wm = self.wallet_manager.write().await; match wm.insert_wallet(wallet, platform_info) { @@ -120,15 +169,6 @@ impl PlatformWalletManager

{ }; inserted_in_manager.push(wallet_id); - if wallet_id != expected_wallet_id { - load_error = Some(PlatformWalletError::WalletCreation(format!( - "Persisted wallet id {} does not match recomputed id {}", - hex::encode(expected_wallet_id), - hex::encode(wallet_id) - ))); - break 'load; - } - let broadcaster = Arc::new(crate::broadcaster::SpvBroadcaster::new(Arc::clone( &self.spv_manager, ))); @@ -142,10 +182,6 @@ impl PlatformWalletManager

{ broadcaster, ); - // Initialize the platform-address provider. If the snapshot - // carried a slice for this wallet, restore it directly; - // otherwise do a fresh scan from the live wallet manager. - // Failures break to the rollback path below. if let Some(persisted) = platform_addresses.remove(&wallet_id) { if let Err(e) = platform_wallet .platform() @@ -167,13 +203,10 @@ impl PlatformWalletManager

{ wallets_guard.insert(wallet_id, platform_wallet); drop(wallets_guard); inserted_in_wallets.push(wallet_id); + outcome.loaded.push(wallet_id); } if let Some(err) = load_error { - // Walk back every wallet committed in this call so the - // manager state matches what it was before. Order: - // remove from `self.wallets` first (UI surface), then - // from the inner `wallet_manager`. if !inserted_in_wallets.is_empty() { let mut wallets_guard = self.wallets.write().await; for id in &inserted_in_wallets { @@ -189,6 +222,6 @@ impl PlatformWalletManager

{ return Err(err); } - Ok(()) + Ok(outcome) } } diff --git a/packages/rs-platform-wallet/src/manager/load_outcome.rs b/packages/rs-platform-wallet/src/manager/load_outcome.rs new file mode 100644 index 00000000000..b71b28fe406 --- /dev/null +++ b/packages/rs-platform-wallet/src/manager/load_outcome.rs @@ -0,0 +1,77 @@ +//! Aggregate result of [`load_from_persistor`]. +//! +//! [`load_from_persistor`]: super::PlatformWalletManager::load_from_persistor + +use crate::wallet::platform_wallet::WalletId; + +/// Why a persisted wallet row was skipped during a load pass. +/// +/// Load is **watch-only** (no seed material involved): signing keys are +/// derived later, on demand, via the [`MnemonicResolverHandle`] sign +/// path. A skip therefore means the persisted row itself was unusable — +/// a per-row decode/structural failure that fails one wallet without +/// aborting the batch. The only reason is +/// [`CorruptPersistedRow`](Self::CorruptPersistedRow): the load path +/// never touches the seed, so it cannot skip for a wrong or unavailable +/// seed. Variants carry no key material (SECRETS.md SEC-REQ-2.0.1). +/// +/// [`MnemonicResolverHandle`]: rs_sdk_ffi::MnemonicResolverHandle +#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)] +pub enum SkipReason { + /// The persisted row could not be reconstructed: a structural decode + /// failure on the keyless account manifest or core-state projection. + /// `kind` distinguishes the failure mode without leaking row bytes. + #[error("persisted wallet row corrupt: {kind}")] + CorruptPersistedRow { + /// Structural family of the decode/projection failure. + kind: CorruptKind, + }, +} + +/// Structural family of [`SkipReason::CorruptPersistedRow`]. +/// +/// The variants are deliberately coarse — a finer split would require +/// the persister to round-trip backend error context that may carry +/// row-derived bytes. Apps drive their UI from the *family*, not from +/// the inner message. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum CorruptKind { + /// The wallet row exists but has no usable `AccountRegistrationEntry` + /// manifest to rebuild the account collection from. + MissingManifest, + /// One or more manifest `account_xpub` bytes failed to parse as a + /// well-formed extended public key. + MalformedXpub, + /// Any other structural decode / projection failure surfaced by the + /// persister. The string is a structural projection — never a raw + /// row byte slice or a hex-encoded key. + DecodeError(String), +} + +impl std::fmt::Display for CorruptKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::MissingManifest => f.write_str("missing account manifest"), + Self::MalformedXpub => f.write_str("malformed account xpub"), + Self::DecodeError(s) => write!(f, "decode error: {s}"), + } + } +} + +/// Aggregate, synchronous view of one +/// [`load_from_persistor`](super::PlatformWalletManager::load_from_persistor) +/// pass. +/// +/// `Ok(LoadOutcome)` with a non-empty `skipped` is **success** — a +/// per-row decode failure on one wallet is recorded and the batch +/// continues. The `Err` arm is reserved for whole-load failures +/// (persister I/O, programmer error). The load path is watch-only and +/// never touches the seed, so no wrong-seed outcome appears here. +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct LoadOutcome { + /// Wallets fully reconstructed and registered, in load order. + pub loaded: Vec, + /// Wallets skipped because their persisted row was corrupt, in load + /// order. + pub skipped: Vec<(WalletId, SkipReason)>, +} diff --git a/packages/rs-platform-wallet/src/manager/mod.rs b/packages/rs-platform-wallet/src/manager/mod.rs index 3d04ca086d0..033481b0ca0 100644 --- a/packages/rs-platform-wallet/src/manager/mod.rs +++ b/packages/rs-platform-wallet/src/manager/mod.rs @@ -1,22 +1,26 @@ //! Multi-wallet manager with SPV coordination. pub mod accessors; +mod coordinator_lifecycle; pub mod identity_sync; mod load; +pub mod load_outcome; pub mod platform_address_sync; +pub mod rehydrate; #[cfg(feature = "shielded")] pub mod shielded_sync; mod wallet_lifecycle; use std::sync::Arc; +#[cfg(any(test, feature = "shielded"))] +use dash_async::WorkerStatus; +use dash_async::{ShutdownReport, ShutdownWeight, ThreadRegistry, WorkerConfig}; use tokio::sync::{Notify, RwLock}; -use tokio::task::JoinHandle; -use tokio_util::sync::CancellationToken; use key_wallet_manager::WalletManager; -use crate::changeset::{spawn_wallet_event_adapter, PlatformWalletPersistence}; +use crate::changeset::{wallet_event_adapter_loop, PlatformWalletPersistence}; use crate::events::{PlatformEventHandler, PlatformEventManager}; use crate::manager::identity_sync::IdentitySyncManager; use crate::manager::platform_address_sync::PlatformAddressSyncManager; @@ -28,6 +32,30 @@ use crate::wallet::core::BalanceUpdateHandler; use crate::wallet::platform_wallet::{PlatformWalletInfo, WalletId}; use crate::wallet::PlatformWallet; +/// Identity of a background worker on the manager's shared +/// [`ThreadRegistry`]. The three periodic sync coordinators run as +/// OS-thread workers (their SDK futures are `!Send`); the wallet-event +/// adapter runs as a tokio task. Drained in ascending weight order on +/// [`shutdown`](PlatformWalletManager::shutdown): the coordinators +/// (weight 0) first, then the event adapter (weight 10) they store into. +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] +pub enum WalletWorker { + /// Platform-address (BLAST) balance sync loop. + PlatformAddressSync, + /// Per-identity token-state sync loop. + IdentitySync, + /// Shielded (Orchard) note sync loop. + ShieldedSync, + /// Wallet-event adapter task (sinks coordinator stores). + EventAdapter, +} + +/// Teardown weight of the periodic sync coordinators — drained first. +pub(crate) const COORDINATOR_WEIGHT: ShutdownWeight = ShutdownWeight(0); +/// Teardown weight of the wallet-event adapter — drained after the +/// coordinators that feed it. +pub(crate) const EVENT_ADAPTER_WEIGHT: ShutdownWeight = ShutdownWeight(10); + /// Multi-wallet coordinator with SPV sync and event handling. /// /// Events are dispatched through [`PlatformEventManager`] to all registered @@ -72,23 +100,59 @@ pub struct PlatformWalletManager { #[cfg(feature = "shielded")] pub(super) shielded_coordinator: Arc>>>, - /// Shared `PlatformEventManager` — held on the manager so - /// `configure_shielded` can install a per-chunk progress handler - /// onto the freshly-created `NetworkShieldedCoordinator` that - /// forwards into `on_shielded_sync_progress`. Sub-managers + /// Shared `PlatformEventManager`, retained on the manager for the + /// two callers that fan out platform-wallet events directly: + /// `load_from_persistor` surfaces per-wallet + /// [`PlatformEvent`](crate::events::PlatformEvent) skip + /// notifications to the app handler, and (under the `shielded` + /// feature) `configure_shielded` installs a per-chunk progress + /// handler onto the freshly-created `NetworkShieldedCoordinator` + /// that forwards into `on_shielded_sync_progress`. Sub-managers /// (`SpvRuntime`, `PlatformAddressSyncManager`, etc.) hold their - /// own clones already, so `configure_shielded` is the only reader of - /// this retained handle — hence it is `shielded`-gated. - #[cfg(feature = "shielded")] + /// own clones already. Retained unconditionally because + /// `load_from_persistor` reads it regardless of the `shielded` + /// feature. pub(super) event_manager: Arc, pub(super) persister: Arc

, - /// Cancellation token + join handle for the wallet-event adapter - /// task. Held so [`shutdown`] can stop it cleanly when the manager - /// is torn down. - pub(super) event_adapter_cancel: CancellationToken, - pub(super) event_adapter_join: tokio::sync::Mutex>>, + /// Shared worker-lifecycle engine. Owns every background worker's + /// cancellation token + join handle, the restart reap-or-park, and the + /// orphan list. The coordinators hold a clone and register their loops + /// on it; the event adapter runs here as a tokio task. [`shutdown`] + /// drains it in weight order and joins every worker before returning. + pub(super) registry: Arc>, } +/// Maximum time (seconds) the teardown paths — `shutdown()` and +/// `clear_shielded` — wait for one coordinator's quiesce+join to +/// complete. (The FFI `shielded_sync_stop` bridge is cancel-only and +/// does not consume this budget.) +/// +/// This is a backstop, not the primary stop mechanism. `quiesce()` +/// cancels the loop, which aborts any in-flight pass at its `.await` +/// point (see each coordinator's `start()` select), so the `is_syncing` +/// drain clears promptly and the join normally lands far inside this +/// window. The deadline fires only if a pass's *drop* itself wedges +/// (e.g. a blocking destructor); on timeout the coordinator slot reports +/// [`WorkerStatus::Timeout`] rather than hanging forever. +pub const SHUTDOWN_JOIN_TIMEOUT_SECS: u64 = 30; + +/// Grace period (seconds) [`PlatformWalletManager::shutdown`] spends +/// polling any orphans parked in the shared [`ThreadRegistry`] before +/// declaring a survivor [`Detached`](WorkerStatus::Detached). +/// +/// Unlike a live coordinator — whose `quiesce()` may legitimately spend +/// seconds draining an in-flight pass, hence the 30 s +/// [`SHUTDOWN_JOIN_TIMEOUT_SECS`] — an orphan is a thread an earlier reap +/// already had to detach *because it was wedged past its 1 s backstop*. +/// A healthy detached thread finishes within milliseconds of the +/// cancellation it long ago received (so `is_finished()` is usually true +/// on the first poll and the join is instant); one still alive after this +/// grace is wedged in a non-yielding `Drop` and will not finish however +/// long we wait. A short grace therefore separates "finishing" from +/// "wedged" without stretching teardown, and reporting `Detached` is the +/// conservative, UAF-safe outcome (the host delays freeing its context). +pub(crate) const SHUTDOWN_ORPHAN_GRACE_SECS: u64 = 1; + impl PlatformWalletManager

{ /// Create a new PlatformWalletManager. /// @@ -104,14 +168,28 @@ impl PlatformWalletManager

{ let wallets = Arc::new(RwLock::new(std::collections::BTreeMap::new())); let lock_notify = Arc::new(Notify::new()); - // Spawn the wallet-event adapter that translates upstream - // `WalletEvent`s into `CoreChangeSet`s and forwards them to - // the persister. - let event_adapter_cancel = CancellationToken::new(); - let event_adapter_join = spawn_wallet_event_adapter( - Arc::clone(&wallet_manager), - Arc::clone(&persister), - event_adapter_cancel.clone(), + // Shared worker-lifecycle engine. The 1 s reap backstop (separate + // from the 30 s managed-join budget) is the grace a wedged prior + // thread gets before it is reported `Detached`. + let registry = ThreadRegistry::with_reap_backstop(std::time::Duration::from_secs( + SHUTDOWN_ORPHAN_GRACE_SECS, + )); + + // Register the wallet-event adapter as a tokio task on the + // registry. It sinks the coordinators' stores, so it drains AFTER + // them (weight 10 vs the coordinators' 0). + let adapter_wallet_manager = Arc::clone(&wallet_manager); + let adapter_persister = Arc::clone(&persister); + registry.start_task( + WalletWorker::EventAdapter, + WorkerConfig { + weight: EVENT_ADAPTER_WEIGHT, + join_budget: std::time::Duration::from_secs(SHUTDOWN_JOIN_TIMEOUT_SECS), + drain: None, + }, + move |cancel| { + wallet_event_adapter_loop(adapter_wallet_manager, adapter_persister, cancel) + }, ); // Build handler list: app handler + internal handlers. @@ -135,10 +213,12 @@ impl PlatformWalletManager

{ let platform_address_sync = Arc::new(PlatformAddressSyncManager::new( Arc::clone(&wallets), Arc::clone(&event_manager), + Arc::clone(®istry), )); let identity_sync = Arc::new(IdentitySyncManager::new( Arc::clone(&sdk), Arc::clone(&persister), + Arc::clone(®istry), )); #[cfg(feature = "shielded")] let shielded_coordinator: Arc< @@ -148,6 +228,7 @@ impl PlatformWalletManager

{ let shielded_sync = Arc::new(ShieldedSyncManager::new( Arc::clone(&event_manager), Arc::clone(&shielded_coordinator), + Arc::clone(®istry), )); Self { sdk, @@ -161,11 +242,9 @@ impl PlatformWalletManager

{ shielded_sync_manager: shielded_sync, #[cfg(feature = "shielded")] shielded_coordinator, - #[cfg(feature = "shielded")] event_manager, persister, - event_adapter_cancel, - event_adapter_join: tokio::sync::Mutex::new(Some(event_adapter_join)), + registry, } } @@ -278,47 +357,916 @@ impl PlatformWalletManager

{ /// disk but its contents are reset to empty so the next bind cold- /// resyncs from index 0. /// - /// Returns an error if the coordinator's store reset fails; the host - /// must not commit its own persistence wipe in that case. + /// Returns an error — and leaves the store untouched — in two cases, so + /// the host knows **not** to commit its own persistence wipe: + /// - the in-flight sync pass did not drain cleanly (timed out on the join + /// backstop, or its loop ended non-cleanly) → + /// [`crate::error::PlatformWalletError::ShieldedShutdownIncomplete`]; or + /// - the coordinator's store reset itself fails. + /// + /// **Concurrency**: a concurrent direct `sync_now`/`sync_wallet` and a + /// full `shielded_sync_start` are both held off for the whole clear. The + /// quiescing gate is raised *continuously* (from before the drain, across + /// the liveness check, through the wipe), so a direct pass observes it and + /// bails with no lapse; the registry's per-key clearing latch no-ops a + /// racing `shielded_sync_start` over the same span, so a fresh loop cannot + /// land after the liveness check and re-persist into the store being wiped. #[cfg(feature = "shielded")] pub async fn clear_shielded(&self) -> Result<(), crate::error::PlatformWalletError> { - self.shielded_sync_manager.quiesce().await; + self.clear_shielded_inner(std::time::Duration::from_secs(SHUTDOWN_JOIN_TIMEOUT_SECS)) + .await + } + + /// [`clear_shielded`](Self::clear_shielded) with an explicit drain + /// deadline. Split out so tests can exercise the timeout path without + /// waiting the full production budget. + #[cfg(feature = "shielded")] + async fn clear_shielded_inner( + &self, + drain_timeout: std::time::Duration, + ) -> Result<(), crate::error::PlatformWalletError> { + // Latch the shielded key on the registry for the WHOLE clear: a + // concurrent `shielded_sync_start()` is no-op'd by the registry + // until this clear completes, so a fresh worker cannot land + // after our liveness check and re-persist into the store we are + // about to wipe. Held in addition to the quiescing gate below + // (direct passes go through `begin_pass`, which the gate covers; + // a full start lands on the registry, which the latch covers). + let _clearing_latch = self.registry.hold_clearing(WalletWorker::ShieldedSync); + + // Raise and HOLD the shielded quiescing gate for the WHOLE clear, + // BEFORE quiescing — so the "no new pass" barrier never lapses + // between the drain, the liveness check, and the store wipe: a direct + // `sync_now`/`sync_wallet` landing anywhere in here observes the gate + // and bails instead of re-persisting into the store we are about to + // clear. The refcount semantics on `quiescing` mean this guard's + // contribution stays live across any racing public `quiesce()` — + // neither party's Drop can lower the other's barrier. The guard + // releases its own ref on return (every path). + let _clearing_gate = self.shielded_sync_manager.hold_quiescing_gate(); + + // Cancel the loop and drain any in-flight pass (incl. its persister + // fan-out). Bound the drain so a heavy direct pass cannot hang the + // host's Clear: on timeout the clear reports `Timeout` and aborts + // BEFORE the wipe, leaving the store intact. + let status = match tokio::time::timeout( + drain_timeout, + self.shielded_sync_manager.quiesce_under_held_gate(), + ) + .await + { + Ok(status) => status, + Err(_elapsed) => WorkerStatus::Timeout, + }; + + // Only commit the store wipe once the in-flight pass has fully + // drained. A partial/timed-out drain could let a surviving pass + // write into a store we just cleared, desyncing the host's own + // wipe from a repopulated tree. + if !status.is_clean() { + return Err(crate::error::PlatformWalletError::ShieldedShutdownIncomplete { status }); + } + + // Also refuse if a prior-generation shielded thread is still parked + // alive: it holds an `Arc` to the persister/store and could re-persist + // notes into the store we are about to wipe. The check is shielded- + // scoped (shares the `shielded_worker_alive` gate), so the other + // coordinators / the always-on event adapter running normally do not + // block Clear. + if self.shielded_worker_alive() { + return Err( + crate::error::PlatformWalletError::ShieldedShutdownIncomplete { + status: WorkerStatus::Detached, + }, + ); + } if let Some(coord) = self.shielded_coordinator().await { coord.clear().await?; } Ok(()) } - /// Stop all background tasks and wait for them to exit. + /// Stop all background workers and wait for them to exit. /// - /// **Quiesces** the periodic coordinators - /// (`PlatformAddressSyncManager`, `IdentitySyncManager`, - /// `ShieldedSyncManager`) — cancelling each loop *and draining any - /// in-flight pass to completion*, including its persister / - /// host-callback fan-out — then drains the wallet-event adapter task. - /// Idempotent. Call before dropping the manager when a clean - /// shutdown is required (e.g. on app termination); a dirty drop - /// simply leaks the tasks until the runtime exits. + /// Delegates to the shared [`ThreadRegistry::shutdown`], which drains + /// in ascending weight order: the periodic coordinators (weight 0) + /// first — concurrently, since they share no lock — then the + /// wallet-event adapter (weight 10) that sinks their stores, then any + /// parked orphans. Each worker's drain raises its `quiescing` gate, + /// cancels the loop, and **joins** its OS thread / task, so when this + /// returns every `!Send` loop has fully exited. Idempotent. /// /// Ordering matters: cancel-only `stop()` would let a pass already /// inside `sync_now` keep running and call `persister.store(...)` / - /// fire a host completion callback after the FFI's `destroy` - /// returned and the host freed the persister / event-handler - /// context — a use-after-free. So we `quiesce()` the sync managers - /// FIRST (so no further persister store or host callback can start), - /// and only THEN cancel + join the event adapter, which is the sink - /// those stores feed into. - pub async fn shutdown(&self) { - self.platform_address_sync_manager.quiesce().await; - self.identity_sync_manager.quiesce().await; + /// fire a host completion callback after the FFI's `destroy` returned + /// and the host freed the persister / event-handler context — a + /// use-after-free. Quiescing the coordinators (weight 0) before the + /// event adapter (weight 10) closes that window: no further store can + /// start before its sink is torn down. + /// + /// A host that drops the tokio runtime right after `shutdown()` + /// (one-shot / headless / stdio) is therefore safe — no coordinator + /// can still be polling `tokio::time` on a shutting-down runtime. The + /// returned [`ShutdownReport`] reports per-worker how each ended. + /// + /// **Precondition: must be called from a multi-thread Tokio runtime.** + /// Each coordinator's OS thread drives its loop via + /// [`Handle::block_on`](tokio::runtime::Handle::block_on) and needs the + /// runtime's timer/IO driver; a `current_thread` runtime can only + /// service one `block_on` at a time, so the join would deadlock. + /// [`ThreadRegistry::shutdown`] asserts this in both debug and release. + /// + /// Each worker's join is bounded by its own + /// [`SHUTDOWN_JOIN_TIMEOUT_SECS`] budget; on timeout its handle is + /// re-parked and the slot reports + /// [`WorkerStatus::Timeout`] rather than hanging forever + /// (F1: a dropped/timed-out join can never detach a live + /// thread). The clear-on-panic half rides on unwinding, so it holds + /// under `panic = "unwind"`; under the iOS `panic = "abort"` profiles a + /// pass panic aborts the process outright. + pub async fn shutdown(&self) -> ShutdownReport { + self.registry.shutdown().await + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering as AO}; + use std::time::Duration; + + use crate::changeset::{ClientStartState, PersistenceError, PlatformWalletChangeSet}; + use crate::manager::platform_address_sync::PlatformAddressSyncSummary; + + /// No-op persister — the lifecycle tests below never exercise the + /// real persistence pipeline, they just need a handle that satisfies + /// the manager's `P` bound. + struct NoopPersister; + + impl PlatformWalletPersistence for NoopPersister { + fn store( + &self, + _wallet_id: WalletId, + _changeset: PlatformWalletChangeSet, + ) -> Result<(), PersistenceError> { + Ok(()) + } + + fn flush(&self, _wallet_id: WalletId) -> Result<(), PersistenceError> { + Ok(()) + } + + fn load(&self) -> Result { + Ok(ClientStartState::default()) + } + } + + /// No-op event handler standing in for the host's FFI handler. + struct NoopHandler; + impl dash_spv::EventHandler for NoopHandler {} + impl PlatformEventHandler for NoopHandler {} + + /// Build a manager over a mock SDK + no-op persister/handler. Cheap: + /// `new` wires the sub-managers and spawns the event adapter but + /// starts no coordinator threads. + fn make_manager() -> PlatformWalletManager { + let sdk = Arc::new(dash_sdk::SdkBuilder::new_mock().build().expect("mock sdk")); + let persister = Arc::new(NoopPersister); + let handler: Arc = Arc::new(NoopHandler); + PlatformWalletManager::new(sdk, persister, handler) + } + + /// Build a manager that fires a slow (300 ms std::thread::sleep) callback + /// on `on_platform_address_sync_completed`. Used by the in-flight-pass + /// drain test. + fn make_manager_with_slow_handler( + started: Arc, + completed: Arc, + ) -> PlatformWalletManager { + struct SlowHandler { + started: Arc, + completed: Arc, + } + impl dash_spv::EventHandler for SlowHandler {} + impl PlatformEventHandler for SlowHandler { + fn on_platform_address_sync_completed(&self, _: &PlatformAddressSyncSummary) { + self.started.store(true, AO::Release); + std::thread::sleep(Duration::from_millis(300)); + self.completed.store(true, AO::Release); + } + } + + let sdk = Arc::new(dash_sdk::SdkBuilder::new_mock().build().expect("mock sdk")); + let persister = Arc::new(NoopPersister); + let handler: Arc = Arc::new(SlowHandler { started, completed }); + PlatformWalletManager::new(sdk, persister, handler) + } + + /// Start every periodic coordinator's background OS-thread loop. + fn start_coordinators(m: &PlatformWalletManager

) { + Arc::clone(&m.platform_address_sync_manager).start(); + Arc::clone(&m.identity_sync_manager).start(); + #[cfg(feature = "shielded")] + Arc::clone(&m.shielded_sync_manager).start(); + } + + /// Happy path: `shutdown()` joins every started worker and reports + /// `Ok`; it completes within a bounded time (no `spawn_blocking` + /// starvation/deadlock); a second `shutdown()` finds nothing left to + /// join (`NotRunning`) — idempotent. + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn shutdown_joins_all_workers_reports_ok_and_is_idempotent() { + let manager = make_manager(); + start_coordinators(&manager); + // Let the loops enter `block_on` so we exercise the live-loop + // join path (a thread cancelled before its first poll joins too). + tokio::time::sleep(Duration::from_millis(50)).await; + + let status = tokio::time::timeout(Duration::from_secs(10), manager.shutdown()) + .await + .expect("shutdown join must complete within bound"); + assert_eq!( + status.per_worker.get(&WalletWorker::PlatformAddressSync), + Some(&WorkerStatus::Ok) + ); + assert_eq!( + status.per_worker.get(&WalletWorker::IdentitySync), + Some(&WorkerStatus::Ok) + ); #[cfg(feature = "shielded")] - self.shielded_sync_manager.quiesce().await; + assert_eq!( + status.per_worker.get(&WalletWorker::ShieldedSync), + Some(&WorkerStatus::Ok) + ); + #[cfg(not(feature = "shielded"))] + assert!(status.per_worker.get(&WalletWorker::ShieldedSync).is_none()); + assert_eq!( + status.per_worker.get(&WalletWorker::EventAdapter), + Some(&WorkerStatus::Ok) + ); + assert!(status.all_clean()); + + // Handles consumed by the first join → nothing left to join. + let again = manager.shutdown().await; + assert_eq!( + again.per_worker.get(&WalletWorker::PlatformAddressSync), + Some(&WorkerStatus::NotRunning) + ); + assert_eq!( + again.per_worker.get(&WalletWorker::IdentitySync), + Some(&WorkerStatus::NotRunning) + ); + assert_eq!( + again.per_worker.get(&WalletWorker::EventAdapter), + Some(&WorkerStatus::NotRunning) + ); + assert!(again.all_clean()); + } + + /// Never-started coordinators are absent from the report — the registry + /// only keys workers it actually registered, so a coordinator whose + /// `start()` never ran has no `per_worker` entry. The event adapter is + /// spawned in `new`, so it still joins `Ok`. Absent workers do not affect + /// `all_clean()`. + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] + async fn shutdown_without_starting_reports_not_running() { + let manager = make_manager(); + + let status = manager.shutdown().await; + assert!(!status + .per_worker + .contains_key(&WalletWorker::PlatformAddressSync)); + assert!(!status.per_worker.contains_key(&WalletWorker::IdentitySync)); + assert!(!status.per_worker.contains_key(&WalletWorker::ShieldedSync)); + assert_eq!( + status.per_worker.get(&WalletWorker::EventAdapter), + Some(&WorkerStatus::Ok) + ); + assert!(status.all_clean()); + } - self.event_adapter_cancel.cancel(); - if let Some(handle) = self.event_adapter_join.lock().await.take() { - if let Err(e) = handle.await { - tracing::warn!(error = ?e, "Wallet event adapter task join error"); + /// `Stopped` and `Timeout` are NOT clean; `Ok` and `NotRunning` ARE. + /// Unit-tests the `is_clean` predicate directly so we don't need to + /// trigger a real timeout (30s) in a deterministic test. + #[test] + fn worker_status_clean_predicate() { + assert!(WorkerStatus::Ok.is_clean()); + assert!(WorkerStatus::NotRunning.is_clean()); + + assert!(!WorkerStatus::Stopped(None).is_clean()); + assert!(!WorkerStatus::Stopped(Some("cancelled".into())).is_clean()); + assert!(!WorkerStatus::Panicked("boom".into()).is_clean()); + assert!(!WorkerStatus::Timeout.is_clean()); + assert!(!WorkerStatus::Error("infra".into()).is_clean()); + // A detached-but-still-live coordinator thread is non-clean: the + // host must not free its callback context yet. + assert!(!WorkerStatus::Detached.is_clean()); + } + + /// `all_clean()` on the `ShutdownReport` that `shutdown()` returns is + /// false whenever any per-worker slot is non-clean, a reaped orphan was + /// non-clean, or an orphan survived the reap (`detached > 0`). + #[test] + fn shutdown_report_all_clean() { + use std::collections::BTreeMap; + + let clean = ShutdownReport:: { + per_worker: BTreeMap::from([ + (WalletWorker::PlatformAddressSync, WorkerStatus::Ok), + (WalletWorker::IdentitySync, WorkerStatus::NotRunning), + (WalletWorker::EventAdapter, WorkerStatus::Ok), + ]), + detached: 0, + orphan_status: WorkerStatus::Ok, + }; + assert!(clean.all_clean()); + + let with_timeout = ShutdownReport:: { + per_worker: BTreeMap::from([ + (WalletWorker::PlatformAddressSync, WorkerStatus::Timeout), + (WalletWorker::IdentitySync, WorkerStatus::Ok), + (WalletWorker::EventAdapter, WorkerStatus::Ok), + ]), + detached: 0, + orphan_status: WorkerStatus::Ok, + }; + assert!(!with_timeout.all_clean()); + + let with_stopped = ShutdownReport:: { + per_worker: BTreeMap::from([ + (WalletWorker::PlatformAddressSync, WorkerStatus::Ok), + (WalletWorker::IdentitySync, WorkerStatus::Ok), + ( + WalletWorker::ShieldedSync, + WorkerStatus::Stopped(Some("aborted".into())), + ), + (WalletWorker::EventAdapter, WorkerStatus::Ok), + ]), + detached: 0, + orphan_status: WorkerStatus::Ok, + }; + assert!(!with_stopped.all_clean()); + + // A still-live detached orphan alone makes the aggregate + // non-clean — the slot the rest of the teardown can't see. + let with_detached = ShutdownReport:: { + per_worker: BTreeMap::from([ + (WalletWorker::PlatformAddressSync, WorkerStatus::Ok), + (WalletWorker::IdentitySync, WorkerStatus::Ok), + (WalletWorker::EventAdapter, WorkerStatus::Ok), + ]), + detached: 1, + orphan_status: WorkerStatus::Detached, + }; + assert!(!with_detached.all_clean()); + } + + /// A panicked reaped orphan (orphan finished within the reap grace, so + /// `detached == 0` but `orphan_status` is `Panicked`) must still make the + /// report non-clean. Without `orphan_status` folding into `all_clean()`, + /// a survivor count of zero would silently pass the panic. + #[test] + fn panicked_orphan_status_makes_report_non_clean() { + use std::collections::BTreeMap; + + // No survivors, but a non-clean reaped-orphan status. An empty + // `per_worker` means no worker slot is itself non-clean, so the + // verdict rides entirely on `orphan_status`. + let report = ShutdownReport:: { + per_worker: BTreeMap::new(), + detached: 0, + orphan_status: WorkerStatus::Panicked("orphan panic during reap".into()), + }; + + assert!( + !report.all_clean(), + "all_clean() must reflect a panicked reaped-orphan status", + ); + } + + /// Complement: a clean reaped-orphan status (`Ok`) leaves the report + /// clean. Guards against over-triggering the orphan-status fold. + #[test] + fn clean_orphan_status_keeps_report_clean() { + use std::collections::BTreeMap; + + let mut per_worker = BTreeMap::new(); + per_worker.insert(WalletWorker::PlatformAddressSync, WorkerStatus::Ok); + per_worker.insert(WalletWorker::IdentitySync, WorkerStatus::Ok); + #[cfg(feature = "shielded")] + per_worker.insert(WalletWorker::ShieldedSync, WorkerStatus::Ok); + per_worker.insert(WalletWorker::EventAdapter, WorkerStatus::Ok); + + let report = ShutdownReport:: { + per_worker, + detached: 0, + orphan_status: WorkerStatus::Ok, + }; + + assert!(report.all_clean()); + } + + /// `shutdown()` must wait for an in-flight sync pass to drain before + /// joining the coordinator thread. + /// + /// A slow `on_platform_address_sync_completed` callback (300 ms) + /// keeps `is_syncing=true` while it runs. We call `shutdown()` while + /// the callback is in-flight and assert that `shutdown()` blocks + /// until the callback completes, then returns `Ok`. + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn shutdown_waits_for_in_flight_pass_to_drain() { + let handler_started = Arc::new(AtomicBool::new(false)); + let handler_completed = Arc::new(AtomicBool::new(false)); + let manager = make_manager_with_slow_handler( + Arc::clone(&handler_started), + Arc::clone(&handler_completed), + ); + + // Start the address-sync coordinator; first pass fires immediately. + Arc::clone(&manager.platform_address_sync_manager).start(); + + // Wait until the slow completion callback is running + // (`is_syncing` stays true for its 300 ms duration). + tokio::time::timeout(Duration::from_secs(5), async { + while !handler_started.load(AO::Acquire) { + tokio::time::sleep(Duration::from_millis(5)).await; + } + }) + .await + .expect("handler did not start within 5s"); + + // Shutdown must drain the in-flight pass before joining. + let status = tokio::time::timeout(Duration::from_secs(5), manager.shutdown()) + .await + .expect("shutdown must complete within 5 s"); + + assert_eq!( + status.per_worker.get(&WalletWorker::PlatformAddressSync), + Some(&WorkerStatus::Ok), + "coordinator must join cleanly after drain" + ); + assert!( + handler_completed.load(AO::Acquire), + "shutdown must not return before the in-flight pass completes" + ); + } + + /// Race regression — start coordinators with a long sleep interval so + /// they spend nearly all their time in a live `tokio::time::sleep`, + /// then `shutdown()` and drop the runtime. + /// + /// With the thread join in `shutdown()` every coordinator has fully + /// exited its `block_on` before `drop(runtime)` — no race possible. + /// Loop 10 times to give any latent race a reliable window: WITHOUT + /// the join, the coordinator's `select!` wakeup (via tokio) would + /// race the runtime teardown and reliably trigger the + /// "Tokio … being shutdown" panic across the 10 iterations. + /// + /// Uses `std::panic::catch_unwind` around `drop(runtime)` rather than + /// a process-global panic hook; the hook would be live for seconds and + /// could swallow diagnostics from other concurrently-running tests. + #[test] + fn shutdown_then_drop_runtime_does_not_panic() { + static SHUTDOWN_PANICS: AtomicUsize = AtomicUsize::new(0); + + for _ in 0..10 { + let runtime = tokio::runtime::Builder::new_multi_thread() + .worker_threads(4) + .enable_all() + .build() + .expect("build runtime"); + + let status = runtime.block_on(async { + let manager = make_manager(); + // Long interval: coordinator spends ~10 s in a live + // tokio::time::sleep, maximising the race window for a + // join-less runtime drop. + manager + .platform_address_sync_manager + .set_interval(Duration::from_secs(10)); + manager + .identity_sync_manager + .set_interval(Duration::from_secs(10)); + #[cfg(feature = "shielded")] + manager + .shielded_sync_manager + .set_interval(Duration::from_secs(10)); + start_coordinators(&manager); + // Wait for coordinators to finish their first (instant) + // pass and enter the long sleep. + tokio::time::sleep(Duration::from_millis(100)).await; + // shutdown() joins each thread before returning; without + // the join this drop would race the select!/block_on exit. + manager.shutdown().await + }); + + // Wrap the runtime drop in catch_unwind to intercept the specific + // "A Tokio 1.x context ... being shutdown" panic without installing + // a process-wide hook that would suppress diagnostics from other + // concurrently running tests. + let drop_result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + drop(runtime); + })); + if let Err(payload) = drop_result { + let msg = payload + .downcast_ref::() + .map(String::as_str) + .or_else(|| payload.downcast_ref::<&str>().copied()) + .unwrap_or(""); + if msg.contains("being shutdown") { + SHUTDOWN_PANICS.fetch_add(1, AO::SeqCst); + } else { + // Unexpected panic — propagate so the test fails loudly. + std::panic::resume_unwind(payload); + } } + + // Brief settle — any stray thread activity surfaces here. + std::thread::sleep(Duration::from_millis(50)); + + assert_eq!( + status.per_worker.get(&WalletWorker::PlatformAddressSync), + Some(&WorkerStatus::Ok) + ); + assert_eq!( + status.per_worker.get(&WalletWorker::IdentitySync), + Some(&WorkerStatus::Ok) + ); + assert!(status.all_clean(), "workers did not wind down: {status:?}"); } + + assert_eq!( + SHUTDOWN_PANICS.load(AO::SeqCst), + 0, + "dropping the runtime after shutdown raced a coordinator thread \ + ({} panics across 10 iterations)", + SHUTDOWN_PANICS.load(AO::SeqCst) + ); + } + + /// Spawn a thread that parks until `release` is signalled (or the + /// sender drops), standing in for a coordinator thread wedged in a + /// non-yielding `Drop` that ignores the cancellation it received. + fn spawn_wedged_thread() -> (std::sync::mpsc::Sender<()>, std::thread::JoinHandle<()>) { + let (release_tx, release_rx) = std::sync::mpsc::channel::<()>(); + let handle = std::thread::spawn(move || { + // Block here regardless of any cancellation, exactly like a + // Drop that never yields, until the test releases us. + let _ = release_rx.recv(); + }); + (release_tx, handle) + } + + /// Headline regression: a coordinator thread detached past the reap + /// backstop and parked in the orphans list makes a subsequent + /// `shutdown()` report the result as **non-clean** — so the FFI + /// `destroy` returns `ErrorShutdownIncomplete` and the host delays + /// freeing the callback context the still-live thread may touch. + /// + /// Non-vacuous: if the registry dropped the orphan at reap instead of + /// parking it, `detached` would be `0` and `all_clean()` would be + /// `true`, failing both assertions. + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn shutdown_reports_detached_orphan_as_non_clean() { + let manager = make_manager(); + + // Stand in for the genuine-wedge outcome: an earlier tight + // stop()->start() reap had to detach a still-live coordinator thread + // past its backstop, so the registry parked it as an orphan. + let (release_tx, wedged) = spawn_wedged_thread(); + manager + .registry + .park_orphan_for_test(WalletWorker::ShieldedSync, wedged); + + let status = tokio::time::timeout(Duration::from_secs(10), manager.shutdown()) + .await + .expect("shutdown must complete within bound"); + + assert_eq!( + status.detached, 1, + "a still-live detached orphan must surface in the survivor count" + ); + assert!( + !status.all_clean(), + "all_clean() must be false while a detached coordinator thread is \ + still alive: {status:?}" + ); + + // Cleanup: shutdown() re-parked the survivor; release it and reap so + // no live thread leaks past the test. + release_tx.send(()).unwrap(); + let _ = manager.registry.reap_orphans(Duration::from_secs(5)).await; + } + + /// TC-002 (F2): `clear_shielded` must refuse while a prior-generation + /// shielded thread is parked alive — even though the current shielded + /// quiesce is clean and the other coordinators / the always-on event + /// adapter are legitimately running. Releasing + reaping the orphan + /// lets a retry succeed. + /// + /// Non-vacuous: a gate checking only `!status.is_clean()` would let the + /// clean `NotRunning` quiesce pass the guard and wipe the store under the + /// live orphan — `clear_shielded` would return `Ok`. + #[cfg(feature = "shielded")] + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn clear_shielded_refuses_while_shielded_orphan_alive() { + let manager = make_manager(); + + // Park a wedged thread under the ShieldedSync key: a prior- + // generation shielded thread an earlier reap could not join. + let (release_tx, wedged) = spawn_wedged_thread(); + manager + .registry + .park_orphan_for_test(WalletWorker::ShieldedSync, wedged); + + assert!(manager.registry.any_alive_for(WalletWorker::ShieldedSync)); + assert!(!manager.shielded_sync_manager.is_running()); + + // Refuses: the live shielded orphan could re-persist into the store + // the wipe is about to clear. + let err = manager + .clear_shielded() + .await + .expect_err("clear_shielded must refuse while a shielded orphan is alive"); + assert!( + matches!( + err, + crate::error::PlatformWalletError::ShieldedShutdownIncomplete { .. } + ), + "expected ShieldedShutdownIncomplete, got {err:?}" + ); + + // Release + reap the orphan; the shielded-scoped gate now clears and + // a retry succeeds (no shielded store configured → clear is a no-op). + release_tx.send(()).unwrap(); + let _ = manager.registry.reap_orphans(Duration::from_secs(5)).await; + assert!(!manager.registry.any_alive_for(WalletWorker::ShieldedSync)); + manager + .clear_shielded() + .await + .expect("clear_shielded must succeed once the orphan is reaped"); + } + + /// While `clear_shielded` is mid-flight, a concurrent + /// `shielded_sync().start()` must be no-op'd by the registry's per-key + /// clearing latch — otherwise a fresh worker can land between the + /// clear's liveness check and the store wipe and re-persist into the + /// store about to be cleared. The test holds the latch directly (mirrors + /// what `clear_shielded_inner` does) and verifies a start under the latch + /// leaves `is_running()==false`. + /// + /// Non-vacuous: without the latch the start would register a worker + /// on the registry (`is_running()==true`) regardless of any in- + /// flight clear. + #[cfg(feature = "shielded")] + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn shielded_sync_start_no_ops_while_clearing_latch_held() { + let manager = make_manager(); + + // Hold the same latch `clear_shielded_inner` raises. + let _clearing_latch = manager.registry.hold_clearing(WalletWorker::ShieldedSync); + + // A concurrent caller drives `start()` on the shielded coordinator. + manager.shielded_sync_manager.clone().start(); + assert!( + !manager.shielded_sync_manager.is_running(), + "registry must refuse the shielded start while the clearing latch is held" + ); + + // Sibling coordinator (different key) is unaffected. + manager.platform_address_sync_manager.clone().start(); + assert!( + manager.platform_address_sync_manager.is_running(), + "the latch is per-key; sibling coordinators must still start" + ); + + // Drop the latch; a fresh start now succeeds. + drop(_clearing_latch); + manager.shielded_sync_manager.clone().start(); + assert!( + manager.shielded_sync_manager.is_running(), + "latch release must let the shielded coordinator start again" + ); + + // Cleanup. + let _ = manager.shutdown().await; + } + + /// SEC-002 continuity under concurrent (re)start during clear: when + /// `clear_shielded` holds both the per-key clearing latch AND the + /// quiescing gate continuously, a racing `shielded_sync().start()` + /// must NOT lower the gate even though `start_thread` is refused. + /// Under refcount semantics on `quiescing`, the start path never + /// touches the gate at all — only properly-paired guard holders raise + /// or lower it — so the Clear flow's `hold_quiescing_gate` ref keeps + /// the gate up across the refused start. The `is_clearing` short- + /// circuit in `CoordinatorLifecycle::spawn_periodic_loop` remains as + /// defense-in-depth, skipping the wasted setup work. + #[cfg(feature = "shielded")] + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn shielded_start_during_clear_preserves_quiescing_gate() { + use std::sync::atomic::Ordering; + + let manager = make_manager(); + + // Acquire BOTH guards `clear_shielded_inner` holds: the per-key + // registry latch + the wallet-side quiescing gate. + let _clearing_latch = manager.registry.hold_clearing(WalletWorker::ShieldedSync); + let _clearing_gate = manager.shielded_sync_manager.hold_quiescing_gate(); + assert!( + manager + .shielded_sync_manager + .quiescing_load_for_test(Ordering::SeqCst), + "precondition: gate raised by the held clearing guard" + ); + + // A racing (re)start arrives. + manager.shielded_sync_manager.clone().start(); + + // start_thread is refused by the latch, as before... + assert!( + !manager.shielded_sync_manager.is_running(), + "registry latch must refuse the start" + ); + // ...and the gate stays UP. Pre-fix this would be `false` because + // `spawn_periodic_loop` lowered the gate before the latch check. + assert!( + manager + .shielded_sync_manager + .quiescing_load_for_test(Ordering::SeqCst), + "gate must remain raised: refused start must not lower clear_shielded's continuously-held gate", + ); + + // Cleanup — drop the guards in reverse order before shutdown. + drop(_clearing_gate); + drop(_clearing_latch); + let _ = manager.shutdown().await; + } + + /// SEC-002 continuity under concurrent public `quiesce()` during + /// clear: `CoordinatorLifecycle::quiesce` (reachable via + /// `ShieldedSyncManager::quiesce`, which is `pub(crate)` but routes + /// out via `shielded_sync_arc`) must NOT lower the gate `clear_shielded` + /// is holding continuously. The `is_clearing` short-circuit covers the + /// ordering where the clearing latch is acquired before quiesce arrives; + /// the refcount semantics on `quiescing` cover the racy ordering where + /// `is_clearing` returns false at check time and the clear lands in + /// the ns-window before the local guard install. + /// + /// Non-vacuous: against the pre-fix `quiesce` (which always + /// constructed an `AtomicFlagGuard` and dropped it on return), the + /// gate would be `false` after the racing quiesce returned. + #[cfg(feature = "shielded")] + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn shielded_quiesce_during_clear_preserves_quiescing_gate() { + use std::sync::atomic::Ordering; + + let manager = make_manager(); + + // Acquire BOTH guards `clear_shielded_inner` holds. + let _clearing_latch = manager.registry.hold_clearing(WalletWorker::ShieldedSync); + let _clearing_gate = manager.shielded_sync_manager.hold_quiescing_gate(); + assert!( + manager + .shielded_sync_manager + .quiescing_load_for_test(Ordering::SeqCst), + "precondition: gate raised by the held clearing guard" + ); + + // A racing quiesce arrives (no background loop registered, so + // pre-fix quiesce would early-return NotRunning AFTER dropping + // its locally-installed `AtomicFlagGuard`, lowering the gate). + // Under the refcount design, the `is_clearing` short-circuit + // fires here (clearing latch is held) and quiesce returns + // without raising/lowering anything. + let status = manager.shielded_sync_manager.quiesce().await; + assert_eq!( + status, + dash_async::WorkerStatus::NotRunning, + "racing quiesce should observe the clearing latch and bail" + ); + + // Gate stays UP. Pre-fix this would be `false` because the + // local `AtomicFlagGuard`'s Drop lowered it. + assert!( + manager + .shielded_sync_manager + .quiescing_load_for_test(Ordering::SeqCst), + "gate must remain raised: racing public quiesce must not lower clear_shielded's continuously-held gate", + ); + + // Cleanup. + drop(_clearing_gate); + drop(_clearing_latch); + let _ = manager.shutdown().await; + } + + /// SEC-001: `clear_shielded` must BOUND its in-flight-pass drain so a + /// heavy direct `sync_now`/`sync_wallet` that won't drain in time cannot + /// hang the host's Clear. On the drain deadline the clear reports + /// `ShieldedShutdownIncomplete` and aborts BEFORE the store wipe, leaving + /// the store intact. + /// + /// Non-vacuous: against an unbounded drain the held pass keeps + /// `is_syncing` set forever and `clear_shielded_inner` never returns — the + /// test's outer timeout fires and the `expect` below panics. + #[cfg(feature = "shielded")] + #[tokio::test(flavor = "multi_thread", worker_threads = 4)] + async fn clear_shielded_aborts_without_wiping_when_drain_times_out() { + let manager = Arc::new(make_manager()); + + // A direct sync pass already in flight (holds `is_syncing`); it never + // drains within the clear's drain budget. + let (ready_tx, ready_rx) = tokio::sync::oneshot::channel::<()>(); + let (release_tx, release_rx) = tokio::sync::oneshot::channel::<()>(); + let ssm = Arc::clone(&manager.shielded_sync_manager); + let pass_task = tokio::spawn(async move { + let _pass = ssm + .begin_pass_for_test() + .expect("direct pass enters the slot"); + ready_tx.send(()).expect("signal in-flight"); + release_rx.await.expect("await release"); + // `_pass` drops here → is_syncing = false + }); + + ready_rx.await.expect("pass reached in-flight"); + assert!(manager.shielded_sync_manager.is_syncing()); + + // Clear with a short drain budget: the held pass can't drain in time, + // so the clear must return ShieldedShutdownIncomplete — bounded, never + // hanging — and never reach the wipe. + let result = tokio::time::timeout( + Duration::from_secs(5), + manager.clear_shielded_inner(Duration::from_millis(100)), + ) + .await + .expect("clear must return within its bounded drain, never hang"); + assert!( + matches!( + result, + Err(crate::error::PlatformWalletError::ShieldedShutdownIncomplete { .. }) + ), + "bounded drain timeout must surface as ShieldedShutdownIncomplete, got {result:?}" + ); + + // Release the held pass and join. + release_tx.send(()).expect("release the pass"); + pass_task.await.expect("pass task joined"); + } + + /// The [`ShutdownReport`] that `shutdown()` returns reads per-worker by + /// key, surfaces a surviving orphan through `detached`, and folds all + /// three signals (per-worker status, `detached`, `orphan_status`) into + /// `all_clean()`. + #[test] + fn shutdown_report_per_worker_shape_and_all_clean() { + use std::collections::BTreeMap; + + // All Ok, no orphans. + let report = ShutdownReport:: { + per_worker: BTreeMap::from([ + (WalletWorker::PlatformAddressSync, WorkerStatus::Ok), + (WalletWorker::IdentitySync, WorkerStatus::Ok), + (WalletWorker::ShieldedSync, WorkerStatus::Ok), + (WalletWorker::EventAdapter, WorkerStatus::Ok), + ]), + detached: 0, + orphan_status: WorkerStatus::Ok, + }; + assert_eq!( + report.per_worker.get(&WalletWorker::PlatformAddressSync), + Some(&WorkerStatus::Ok) + ); + assert_eq!( + report.per_worker.get(&WalletWorker::EventAdapter), + Some(&WorkerStatus::Ok) + ); + assert!(report.all_clean()); + + // A surviving orphan -> detached > 0 -> non-clean; an absent worker + // has no `per_worker` entry. + let report = ShutdownReport:: { + per_worker: BTreeMap::new(), + detached: 1, + orphan_status: WorkerStatus::Detached, + }; + assert_eq!(report.detached, 1); + assert!(!report + .per_worker + .contains_key(&WalletWorker::PlatformAddressSync)); + assert!(!report.all_clean()); + + // A per-worker Timeout is read back by key and is non-clean. + let report = ShutdownReport:: { + per_worker: BTreeMap::from([(WalletWorker::IdentitySync, WorkerStatus::Timeout)]), + detached: 0, + orphan_status: WorkerStatus::Ok, + }; + assert_eq!( + report.per_worker.get(&WalletWorker::IdentitySync), + Some(&WorkerStatus::Timeout) + ); + assert!(!report.all_clean()); } } diff --git a/packages/rs-platform-wallet/src/manager/platform_address_sync.rs b/packages/rs-platform-wallet/src/manager/platform_address_sync.rs index e1a229806c2..b4246036670 100644 --- a/packages/rs-platform-wallet/src/manager/platform_address_sync.rs +++ b/packages/rs-platform-wallet/src/manager/platform_address_sync.rs @@ -9,19 +9,19 @@ //! wallets are registered and the SPV runtime is up. use std::collections::BTreeMap; -use std::sync::{ - atomic::{AtomicBool, AtomicU64, Ordering}, - Arc, Mutex as StdMutex, -}; +use std::sync::Arc; + +use dash_async::ThreadRegistry; use std::time::{Duration, SystemTime, UNIX_EPOCH}; use arc_swap::ArcSwapOption; use dash_sdk::platform::address_sync::{AddressSyncConfig, AddressSyncResult}; use key_wallet::PlatformP2PKHAddress; +use super::coordinator_lifecycle::CoordinatorLifecycle; +use super::WalletWorker; use crate::wallet::PlatformAddressTag; use tokio::sync::RwLock; -use tokio_util::sync::CancellationToken; use crate::error::PlatformWalletError; use crate::events::PlatformEventManager; @@ -95,19 +95,14 @@ impl PlatformAddressSyncSummary { pub struct PlatformAddressSyncManager { wallets: Arc>>>, event_manager: Arc, - /// Cancel token for the background loop, if running. - background_cancel: StdMutex>, - interval_secs: AtomicU64, - is_syncing: AtomicBool, - /// Set by [`quiesce`](Self::quiesce) to gate new passes while it - /// drains an in-flight one. `sync_now` bails (after taking the - /// `is_syncing` slot) when this is set, so once `quiesce` observes - /// `is_syncing == false` no further pass can start — giving shutdown - /// a real "no more host-visible sync-completed callbacks" barrier - /// that cancel-only [`stop`](Self::stop) does not provide. - quiescing: AtomicBool, - /// Unix seconds of the last completed pass. `0` = never. - last_sync_unix: AtomicU64, + /// Shared lifecycle state + pass-gating protocol under the + /// [`WalletWorker::PlatformAddressSync`] key: registry handle, polling + /// interval, the `is_syncing` / `quiescing` handshake, and the + /// last-sync stamp. `start` / `stop` / `is_running` / `quiesce` and the + /// `sync_now` pass gate delegate to it. The `quiescing` half gives + /// shutdown a real "no more host-visible sync-completed callbacks" + /// barrier that cancel-only [`stop`](Self::stop) does not provide. + lifecycle: CoordinatorLifecycle, /// Shared config applied uniformly across wallets and accounts. /// /// `ArcSwapOption` instead of a mutex because writes are rare @@ -120,15 +115,16 @@ impl PlatformAddressSyncManager { pub fn new( wallets: Arc>>>, event_manager: Arc, + registry: Arc>, ) -> Self { Self { wallets, event_manager, - background_cancel: StdMutex::new(None), - interval_secs: AtomicU64::new(DEFAULT_SYNC_INTERVAL_SECS), - is_syncing: AtomicBool::new(false), - quiescing: AtomicBool::new(false), - last_sync_unix: AtomicU64::new(0), + lifecycle: CoordinatorLifecycle::new( + registry, + WalletWorker::PlatformAddressSync, + DEFAULT_SYNC_INTERVAL_SECS, + ), config: ArcSwapOption::empty(), } } @@ -137,13 +133,12 @@ impl PlatformAddressSyncManager { /// /// The running loop picks this up on its next sleep. pub fn set_interval(&self, interval: Duration) { - let secs = interval.as_secs().max(1); - self.interval_secs.store(secs, Ordering::Release); + self.lifecycle.set_interval(interval); } /// Current polling interval. pub fn interval(&self) -> Duration { - Duration::from_secs(self.interval_secs.load(Ordering::Acquire)) + self.lifecycle.interval() } /// Replace the shared [`AddressSyncConfig`] used on every pass. @@ -160,24 +155,18 @@ impl PlatformAddressSyncManager { /// Whether the background loop is currently running. pub fn is_running(&self) -> bool { - self.background_cancel - .lock() - .map(|g| g.is_some()) - .unwrap_or(false) + self.lifecycle.is_running() } /// Whether a sync pass is in flight right now. pub fn is_syncing(&self) -> bool { - self.is_syncing.load(Ordering::Acquire) + self.lifecycle.is_syncing() } /// Unix seconds of the last completed pass, or `None` if no pass /// has ever completed. pub fn last_sync_unix_seconds(&self) -> Option { - match self.last_sync_unix.load(Ordering::Acquire) { - 0 => None, - n => Some(n), - } + self.lifecycle.last_sync_unix_seconds() } /// Start the background sync loop. Idempotent — calling while @@ -195,60 +184,32 @@ impl PlatformAddressSyncManager { /// The first pass runs immediately; subsequent passes fire every /// [`interval`](Self::interval). pub fn start(self: Arc) { - let mut guard = self.background_cancel.lock().expect("bg_cancel poisoned"); - if guard.is_some() { - return; - } - let cancel = CancellationToken::new(); - *guard = Some(cancel.clone()); - drop(guard); - - let handle = tokio::runtime::Handle::current(); - let this = self; - std::thread::Builder::new() - .name("platform-address-sync".into()) - .spawn(move || { - handle.block_on(async move { - loop { - if cancel.is_cancelled() { - break; - } - - this.sync_now().await; - - let interval = this.interval(); - tokio::select! { - _ = tokio::time::sleep(interval) => {} - _ = cancel.cancelled() => break, - } - } - - if let Ok(mut guard) = this.background_cancel.lock() { - *guard = None; - } - }); - }) - .expect("failed to spawn platform-address-sync thread"); + let pass_self = Arc::clone(&self); + let interval_self = Arc::clone(&self); + self.lifecycle.spawn_periodic_loop( + move || { + let this = Arc::clone(&pass_self); + async move { + let _ = this.sync_now().await; + } + }, + move || interval_self.interval(), + ); } /// Stop the background sync loop. No-op if not running. /// /// **Cancel-only**: requests cancellation and returns immediately. A - /// pass already inside `sync_now` keeps running to completion, - /// including its `on_platform_address_sync_completed` host-callback - /// dispatch. For a real "nothing is running and nothing more will - /// fire a host callback" barrier — required by manager shutdown so - /// the host can free the event-handler context — use - /// [`quiesce`](Self::quiesce). + /// pass already inside `sync_now` is **cancelled mid-flight** at its + /// next `.await` (the loop's `biased; cancel-first` select drops the + /// `sync_now` future, see `start`). The + /// `on_platform_address_sync_completed` host callback dispatch may + /// not fire if cancel lands before the callback. For a real "nothing + /// is running and nothing more will fire a host callback" barrier — + /// required by manager shutdown so the host can free the + /// event-handler context — use [`quiesce`](Self::quiesce). pub fn stop(&self) { - if let Some(token) = self - .background_cancel - .lock() - .expect("bg_cancel poisoned") - .take() - { - token.cancel(); - } + self.lifecycle.stop(); } /// Cancel the background loop **and wait for any in-flight sync pass @@ -270,13 +231,14 @@ impl PlatformAddressSyncManager { /// falling edge (with the gate up) is a sound "fully drained" signal. /// The gate is reopened before returning so a later start/sync works /// normally. - pub async fn quiesce(&self) { - self.quiescing.store(true, Ordering::Release); - self.stop(); - while self.is_syncing.load(Ordering::Acquire) { - tokio::time::sleep(Duration::from_millis(20)).await; - } - self.quiescing.store(false, Ordering::Release); + /// + /// Finally **joins** the loop's OS thread (after the drain, so the + /// thread is on its way out) and returns its terminal status. Joining + /// while the runtime is still alive is what lets the manager promise + /// the `!Send` loop has stopped touching `tokio::time` before a + /// one-shot host drops the runtime. + pub async fn quiesce(&self) -> dash_async::WorkerStatus { + self.lifecycle.quiesce().await } /// Run one sync pass across every registered wallet. @@ -284,23 +246,13 @@ impl PlatformAddressSyncManager { /// If a pass is already in flight, returns an empty summary and /// skips — the caller can inspect [`is_syncing`] to distinguish. pub async fn sync_now(&self) -> PlatformAddressSyncSummary { - if self - .is_syncing - .compare_exchange(false, true, Ordering::AcqRel, Ordering::Acquire) - .is_err() - { + // Claim the pass slot and honour the quiescing gate; bail with an + // empty summary (and without a host completion callback after + // quiesce returns) if a pass is already in flight or a teardown + // raised the gate. The guard clears `is_syncing` on every exit path. + let Some(_pass) = self.lifecycle.begin_pass() else { return PlatformAddressSyncSummary::default(); - } - - // A `quiesce()` may have raised the gate between our CAS and - // here; if so, release the slot and bail without running a pass - // so the drain can complete and shutdown gets a true barrier - // (no further `on_platform_address_sync_completed` host callback - // after quiesce returns). - if self.quiescing.load(Ordering::Acquire) { - self.is_syncing.store(false, Ordering::Release); - return PlatformAddressSyncSummary::default(); - } + }; let snapshot: Vec<(WalletId, Arc)> = { let wallets = self.wallets.read().await; @@ -330,22 +282,20 @@ impl PlatformAddressSyncManager { .map(|d| d.as_secs()) .unwrap_or(0); summary.sync_unix_seconds = now; - self.last_sync_unix.store(now, Ordering::Release); - - // Dispatch the completion event BEFORE clearing `is_syncing`. - // `quiesce()` drains on the falling edge of `is_syncing`, so if - // we cleared the flag first a shutdown caller could unblock and - // free the host event-handler context while this completion - // event (FFI callback → host handler) is still pending — a - // use-after-free. Holding the flag across the dispatch makes - // quiesce's barrier cover the host callback too. Mirrors the - // ordering in `ShieldedSyncManager::sync_now`. + self.lifecycle.store_last_sync_unix(now); + + // Dispatch the completion event BEFORE the `_pass` guard drops. + // `quiesce()` drains on the falling edge of `is_syncing`; if the + // guard cleared the flag before the dispatch a shutdown caller + // could unblock and free the host event-handler context while + // the callback is still pending — a use-after-free. The guard + // drops (clearing `is_syncing`) after this call returns, when + // the function frame unwinds. self.event_manager .on_platform_address_sync_completed(&summary); - self.is_syncing.store(false, Ordering::Release); - summary + // `_pass` drops here → `is_syncing = false` } /// Sync a single wallet on demand. Does not set the global @@ -373,7 +323,7 @@ impl std::fmt::Debug for PlatformAddressSyncManager { f.debug_struct("PlatformAddressSyncManager") .field("is_running", &self.is_running()) .field("is_syncing", &self.is_syncing()) - .field("interval_secs", &self.interval_secs.load(Ordering::Acquire)) + .field("interval_secs", &self.lifecycle.interval_secs()) .field("last_sync_unix", &self.last_sync_unix_seconds()) .finish() } @@ -422,8 +372,13 @@ mod tests { let event_manager = Arc::new(PlatformEventManager::new(vec![ Arc::clone(&counter) as Arc ])); + let registry = ThreadRegistry::new(); ( - Arc::new(PlatformAddressSyncManager::new(wallets, event_manager)), + Arc::new(PlatformAddressSyncManager::new( + wallets, + event_manager, + registry, + )), counter, ) } @@ -438,53 +393,6 @@ mod tests { assert!(!mgr.is_syncing()); } - /// `quiesce()` must not return while a pass is in flight, and must - /// return promptly once the pass drains. - /// - /// Drives the real `is_syncing` lifecycle: a background task takes - /// the slot via the same `compare_exchange` the real `sync_now` - /// uses, holds it across a sleep (standing in for the pass body + - /// completion-event dispatch, which `sync_now` keeps the flag set - /// across), then clears it. - #[tokio::test(flavor = "multi_thread", worker_threads = 2)] - async fn quiesce_blocks_until_in_flight_pass_drains() { - let (mgr, _counter) = make_manager(); - - let holder = Arc::clone(&mgr); - let pass = tokio::spawn(async move { - assert!( - holder - .is_syncing - .compare_exchange(false, true, Ordering::AcqRel, Ordering::Acquire) - .is_ok(), - "test should own the is_syncing slot" - ); - tokio::time::sleep(Duration::from_millis(200)).await; - holder.is_syncing.store(false, Ordering::Release); - }); - - while !mgr.is_syncing() { - tokio::time::sleep(Duration::from_millis(5)).await; - } - - let quiesce_fut = mgr.quiesce(); - tokio::pin!(quiesce_fut); - - tokio::select! { - _ = &mut quiesce_fut => panic!("quiesce returned while a pass was in flight"), - _ = tokio::time::sleep(Duration::from_millis(50)) => {} - } - assert!(mgr.is_syncing(), "pass should still be in flight"); - - tokio::time::timeout(Duration::from_secs(2), &mut quiesce_fut) - .await - .expect("quiesce did not return after the pass drained"); - - assert!(!mgr.quiescing.load(Ordering::Acquire)); - assert!(!mgr.is_syncing()); - pass.await.unwrap(); - } - /// A `sync_now()` invoked while `quiescing` is set must bail without /// running the pass — in particular, without firing the /// `on_platform_address_sync_completed` host callback. This is the @@ -494,8 +402,8 @@ mod tests { async fn sync_now_bails_when_quiescing() { let (mgr, counter) = make_manager(); - // Raise the gate as `quiesce()` would. - mgr.quiescing.store(true, Ordering::Release); + // Raise the gate as `quiesce()` would, held across the pass. + let _gate = mgr.lifecycle.hold_quiescing_gate(); let summary = mgr.sync_now().await; @@ -505,4 +413,57 @@ mod tests { assert_eq!(counter.completions.load(AtomicOrdering::SeqCst), 0); assert!(!mgr.is_syncing()); } + + // merge: #3953's generation-guard test ported onto our ThreadRegistry + // refactor — the registry's per-key clearing latch is the equivalent guard. + /// Restart-in-place regression: a tight `start()` → `stop()` → `start()` + /// must leave the manager *running* on the new loop. The cancelled stale + /// loop races to clear its registry slot as it exits; the registry's + /// per-key clearing latch must stop it from stripping the freshly + /// installed loop's running state — otherwise the new loop keeps running + /// but becomes invisible to `is_running()` / `stop()`. + /// + /// Determinism: the only wait is a *bounded* poll. With the latch in + /// place `is_running()` is true for the whole window, so the test + /// never fails spuriously on correct code. A regression flips it false + /// within milliseconds once the stale loop clears the slot, which the + /// poll catches. Needs the multi-thread flavor because `start()` + /// drives its loop via `Handle::current().block_on` on a dedicated OS + /// thread, which would deadlock a single-threaded test runtime. + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] + async fn restart_in_place_keeps_running_after_stale_loop_exits() { + let (mgr, _counter) = make_manager(); + + // Gen 1. Wait (bounded) for the first pass to land — a real + // lifecycle signal that the loop is now parked in its interval + // sleep, so its cleanup is still pending when we stop+restart. + Arc::clone(&mgr).start(); + let mut waited = 0; + while mgr.last_sync_unix_seconds().is_none() { + assert!(waited < 200, "gen-1's first sync pass never completed"); + tokio::time::sleep(Duration::from_millis(10)).await; + waited += 1; + } + + // Tight stop→start with no await between: the just-cancelled gen-1 + // loop cannot reach its cleanup before gen 2 is installed, so the + // race window the guard protects is reliably open. + mgr.stop(); + Arc::clone(&mgr).start(); + + // Give the stale gen-1 loop ample time to run its (guarded) + // cleanup. `is_running()` must stay true throughout. + for _ in 0..100 { + assert!( + mgr.is_running(), + "stale gen-1 loop cleared gen-2's cancel token — generation guard regressed" + ); + tokio::time::sleep(Duration::from_millis(10)).await; + } + + // The surviving loop is the tracked one: a single `stop()` fully + // reflects it, so there is no orphaned unreflectable duplicate. + mgr.stop(); + assert!(!mgr.is_running(), "stop() must reflect the live loop"); + } } diff --git a/packages/rs-platform-wallet/src/manager/rehydrate.rs b/packages/rs-platform-wallet/src/manager/rehydrate.rs new file mode 100644 index 00000000000..d5fca89a207 --- /dev/null +++ b/packages/rs-platform-wallet/src/manager/rehydrate.rs @@ -0,0 +1,238 @@ +//! Watch-only wallet reconstruction + persisted core-state application. +//! +//! Load is **seedless** (see [`load_from_persistor`]). For each +//! persisted wallet we build a watch-only [`Wallet`] from its keyless +//! `AccountRegistrationEntry` manifest, then apply the keyless +//! core-state projection on top. No seed, no signing-key derivation. +//! +//! Because load never touches the seed, it performs no wrong-seed check. +//! A sign-time wrong-seed gate is deferred to separate FFI work and is +//! not part of this path. +//! +//! [`load_from_persistor`]: super::PlatformWalletManager::load_from_persistor + +use key_wallet::account::account_collection::AccountCollection; +use key_wallet::account::Account; +use key_wallet::wallet::managed_wallet_info::ManagedWalletInfo; +use key_wallet::wallet::Wallet; +use key_wallet::Network; + +use crate::changeset::AccountRegistrationEntry; +use crate::error::PlatformWalletError; +use crate::manager::load_outcome::CorruptKind; + +/// Per-row failure surfacing during watch-only rehydration of a single +/// persisted wallet. Maps 1:1 to [`CorruptKind`] for the +/// [`SkipReason`](super::load_outcome::SkipReason) the load loop +/// records. +#[derive(Debug)] +pub(super) enum RehydrateRowError { + /// Manifest was empty — no account to rebuild the wallet around. + MissingManifest, + /// Building a watch-only [`Account`] from a manifest entry failed + /// (xpub structurally malformed for its [`AccountType`]). + /// + /// [`AccountType`]: key_wallet::account::AccountType + MalformedXpub, + /// `AccountCollection::insert` rejected an account (typically a + /// duplicate `account_type` within the manifest). + DecodeError(String), +} + +impl From for CorruptKind { + fn from(e: RehydrateRowError) -> Self { + match e { + RehydrateRowError::MissingManifest => CorruptKind::MissingManifest, + RehydrateRowError::MalformedXpub => CorruptKind::MalformedXpub, + RehydrateRowError::DecodeError(s) => CorruptKind::DecodeError(s), + } + } +} + +/// Build a watch-only [`Wallet`] from the keyless account manifest. +/// +/// Each `AccountRegistrationEntry` becomes an [`Account::from_xpub`] +/// (watch-only) keyed to `expected_wallet_id`; the assembled +/// [`AccountCollection`] is handed to [`Wallet::new_watch_only`] under +/// the same id. No key material crosses this function. +/// +/// Returns [`RehydrateRowError`] when the row is structurally unusable +/// (caller maps it onto a per-row [`SkipReason`]). +pub(super) fn build_watch_only_wallet( + network: Network, + expected_wallet_id: [u8; 32], + manifest: &[AccountRegistrationEntry], +) -> Result { + if manifest.is_empty() { + return Err(RehydrateRowError::MissingManifest); + } + let mut accounts = AccountCollection::new(); + for entry in manifest { + let account = Account::from_xpub( + Some(expected_wallet_id), + entry.account_type, + entry.account_xpub, + network, + ) + .map_err(|_| RehydrateRowError::MalformedXpub)?; + accounts + .insert(account) + .map_err(|e| RehydrateRowError::DecodeError(e.to_string()))?; + } + Ok(Wallet::new_watch_only( + network, + expected_wallet_id, + accounts, + )) +} + +/// Apply the keyless persisted core-state projection onto a +/// freshly-minted `ManagedWalletInfo` skeleton. +/// +/// # Reconstructed (safety-critical-correct) +/// +/// - **Wallet balance** (`wallet_info.balance`, the no-silent-zero +/// guarantee): every persisted UTXO is restored and the per-account +/// + wallet totals are recomputed via `update_balance()`. A UTXO +/// carrying a block height is marked confirmed so it lands in the +/// `confirmed` bucket; the wallet total is exact regardless. +/// - **UTXO set**: every unspent persisted outpoint is restored into a +/// funds-bearing account of the wallet (whatever topology it has — +/// BIP44, BIP32, CoinJoin, DashPay). +/// - **Sync watermarks**: `synced_height` / `last_processed_height`. +/// +/// # Deferred to the first post-load `sync` (safe re-warm) +/// +/// - **Per-account UTXO attribution**: `core_utxos.account_index` is +/// written as `0` at persist time, so per-account bucketing is not +/// recoverable from disk; UTXOs are restored against the wallet's +/// first funds-bearing account and re-attributed on the next scan. +/// The *wallet total* is unaffected (it is a sum across all funds +/// accounts). +/// - **`last_applied_chain_lock`**: not a persisted column (V001) and +/// never written by the core-state writer; always `None` from disk. +/// SPV re-applies a fresh chainlock on the first post-restart sync. +/// - **Per-UTXO `is_coinbase` / `is_instantlocked` / `is_trusted` +/// flags**: not columns in `core_utxos`; conservatively defaulted +/// (non-coinbase, confirmed-by-height) and refreshed on the next +/// scan. Coinbase-maturity nuance re-warms on sync. +/// - **Transaction-record history**: rebuilt by the next scan; not a +/// balance input. +/// +/// # Errors +/// +/// [`PlatformWalletError::RehydrationTopologyUnsupported`] if there are +/// persisted UTXOs to restore but the reconstructed account collection +/// has **no** funds-bearing account to hold them. Fail-closed rather +/// than reconstructing a silent zero balance (the no-silent-zero +/// mandate). An empty UTXO set is always `Ok`. +/// +/// This never logs and never touches key material. +pub fn apply_persisted_core_state( + wallet_info: &mut ManagedWalletInfo, + core: &crate::changeset::CoreChangeSet, +) -> Result<(), PlatformWalletError> { + use key_wallet::wallet::managed_wallet_info::wallet_info_interface::WalletInfoInterface; + + // Sync watermarks first so `update_balance`'s maturity check sees + // the restored tip. + if let Some(h) = core.last_processed_height { + wallet_info.metadata.last_processed_height = + wallet_info.metadata.last_processed_height.max(h); + } + if let Some(h) = core.synced_height { + wallet_info.metadata.synced_height = wallet_info.metadata.synced_height.max(h); + } + + // Restore the UTXO set. Persisted attribution is lost at write time + // (account_index is always 0), so route every restored UTXO to the + // wallet's first funds-bearing account *of any topology* (BIP44, + // BIP32, CoinJoin, DashPay) — the wallet total is a sum across all + // funds accounts and stays exact. A wallet with persisted UTXOs but + // no funds account at all cannot be represented: fail closed rather + // than silently reconstruct a zero balance. + let unspent: Vec<&key_wallet::Utxo> = core + .new_utxos + .iter() + .filter(|u| !core.spent_utxos.iter().any(|s| s.outpoint == u.outpoint)) + .collect(); + if !unspent.is_empty() { + match wallet_info + .accounts + .all_funding_accounts_mut() + .into_iter() + .next() + { + Some(account) => { + for utxo in unspent { + account.utxos.insert(utxo.outpoint, utxo.clone()); + } + } + None => { + return Err(PlatformWalletError::RehydrationTopologyUnsupported { + wallet_id: wallet_info.wallet_id, + utxo_count: core.new_utxos.len(), + }); + } + } + } + + // Recompute per-account + wallet balance from the restored set. + // After this, a non-zero persisted balance is non-zero here — a + // silent zero would be a hard FAIL of the rehydration contract. + wallet_info.update_balance(); + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use key_wallet::wallet::initialization::WalletAccountCreationOptions; + + fn manifest_for(w: &Wallet) -> Vec { + w.accounts + .all_accounts() + .into_iter() + .map(|a| AccountRegistrationEntry { + account_type: a.account_type, + account_xpub: a.account_xpub, + }) + .collect() + } + + #[test] + fn watch_only_rebuild_round_trips_manifest_and_id() { + let seed = [3u8; 64]; + let w = Wallet::from_seed_bytes( + seed, + Network::Testnet, + WalletAccountCreationOptions::Default, + ) + .unwrap(); + let id = w.compute_wallet_id(); + let manifest = manifest_for(&w); + + let restored = build_watch_only_wallet(Network::Testnet, id, &manifest).unwrap(); + assert_eq!(restored.wallet_id, id); + assert_eq!(restored.compute_wallet_id(), id); + // Every manifest account survives the round trip (count, types). + let restored_types: Vec<_> = restored + .accounts + .all_accounts() + .into_iter() + .map(|a| a.account_type) + .collect(); + let manifest_types: Vec<_> = manifest.iter().map(|e| e.account_type).collect(); + assert_eq!(restored_types.len(), manifest_types.len()); + for t in &manifest_types { + assert!(restored_types.contains(t)); + } + } + + #[test] + fn empty_manifest_is_missing_manifest() { + let err = build_watch_only_wallet(Network::Testnet, [0u8; 32], &[]) + .expect_err("empty manifest must be MissingManifest"); + assert!(matches!(err, RehydrateRowError::MissingManifest)); + } +} diff --git a/packages/rs-platform-wallet/src/manager/shielded_sync.rs b/packages/rs-platform-wallet/src/manager/shielded_sync.rs index 482674b4322..e865640ede7 100644 --- a/packages/rs-platform-wallet/src/manager/shielded_sync.rs +++ b/packages/rs-platform-wallet/src/manager/shielded_sync.rs @@ -26,15 +26,17 @@ //! [`configure_shielded`]: crate::manager::PlatformWalletManager::configure_shielded use std::collections::BTreeMap; -use std::sync::{ - atomic::{AtomicBool, AtomicU64, Ordering}, - Arc, Mutex as StdMutex, -}; +use std::sync::Arc; + +#[cfg(test)] +use dash_async::AtomicFlagGuard; +use dash_async::{RefcountedFlagGuard, ThreadRegistry}; use std::time::{Duration, SystemTime, UNIX_EPOCH}; use tokio::sync::RwLock; -use tokio_util::sync::CancellationToken; +use super::coordinator_lifecycle::CoordinatorLifecycle; +use super::WalletWorker; use crate::events::PlatformEventManager; use crate::wallet::platform_wallet::WalletId; use crate::wallet::shielded::{NetworkShieldedCoordinator, ShieldedSyncSummary}; @@ -139,43 +141,30 @@ pub struct ShieldedSyncManager { /// run first, so an empty slot guarantees no shielded state /// exists). coordinator_slot: Arc>>>, - /// Cancel token for the background loop, if running. - background_cancel: StdMutex>, - /// Monotonically increasing generation counter. Bumped on every - /// `start()` so the exiting thread can tell whether its - /// generation is still the active one before clearing - /// `background_cancel`. Without this, a `stop()` → `start()` - /// overlap lets the prior thread's cleanup strip the new - /// generation's token, leaving the new loop running but - /// untrackable via `is_running()`. - background_generation: AtomicU64, - interval_secs: AtomicU64, - is_syncing: AtomicBool, - /// Set by [`quiesce`](Self::quiesce) to gate new passes while it - /// drains an in-flight one. `sync_now` / `sync_wallet` bail (after - /// taking the `is_syncing` slot) when this is set, so once `quiesce` - /// observes `is_syncing == false` no further pass can start — giving - /// Clear / stop a real "no more host-visible mutations" barrier that - /// cancel-only [`stop`](Self::stop) does not provide. - quiescing: AtomicBool, - /// Unix seconds of the last completed pass. `0` = never. - last_sync_unix: AtomicU64, + /// Shared lifecycle state + pass-gating protocol under the + /// [`WalletWorker::ShieldedSync`] key: registry handle, polling + /// interval, the `is_syncing` / `quiescing` handshake, and the + /// last-sync stamp. `start` / `stop` / `is_running` / `quiesce` and the + /// `sync_now` / `sync_wallet` pass gate delegate to it. The `quiescing` + /// half gives Clear / stop a real "no more host-visible mutations" + /// barrier that cancel-only [`stop`](Self::stop) does not provide. + lifecycle: CoordinatorLifecycle, } impl ShieldedSyncManager { pub fn new( event_manager: Arc, coordinator_slot: Arc>>>, + registry: Arc>, ) -> Self { Self { event_manager, coordinator_slot, - background_cancel: StdMutex::new(None), - background_generation: AtomicU64::new(0), - interval_secs: AtomicU64::new(DEFAULT_SYNC_INTERVAL_SECS), - is_syncing: AtomicBool::new(false), - quiescing: AtomicBool::new(false), - last_sync_unix: AtomicU64::new(0), + lifecycle: CoordinatorLifecycle::new( + registry, + WalletWorker::ShieldedSync, + DEFAULT_SYNC_INTERVAL_SECS, + ), } } @@ -183,35 +172,28 @@ impl ShieldedSyncManager { /// /// The running loop picks this up on its next sleep. pub fn set_interval(&self, interval: Duration) { - let secs = interval.as_secs().max(1); - self.interval_secs.store(secs, Ordering::Release); + self.lifecycle.set_interval(interval); } /// Current polling interval. pub fn interval(&self) -> Duration { - Duration::from_secs(self.interval_secs.load(Ordering::Acquire)) + self.lifecycle.interval() } /// Whether the background loop is currently running. pub fn is_running(&self) -> bool { - self.background_cancel - .lock() - .map(|g| g.is_some()) - .unwrap_or(false) + self.lifecycle.is_running() } /// Whether a sync pass is in flight right now. pub fn is_syncing(&self) -> bool { - self.is_syncing.load(Ordering::Acquire) + self.lifecycle.is_syncing() } /// Unix seconds of the last completed pass, or `None` if no pass /// has ever completed. pub fn last_sync_unix_seconds(&self) -> Option { - match self.last_sync_unix.load(Ordering::Acquire) { - 0 => None, - n => Some(n), - } + self.lifecycle.last_sync_unix_seconds() } /// Start the background sync loop. Idempotent — calling while @@ -222,78 +204,35 @@ impl ShieldedSyncManager { /// GRPC client state isn't `Send + Sync`). Same trade-off as /// [`PlatformAddressSyncManager::start`](super::platform_address_sync::PlatformAddressSyncManager::start). pub fn start(self: Arc) { - let mut guard = self.background_cancel.lock().expect("bg_cancel poisoned"); - if guard.is_some() { - return; - } - let cancel = CancellationToken::new(); - *guard = Some(cancel.clone()); - // Bump the generation while we still hold the slot lock so - // the load below in any prior thread's cleanup observes - // `current_gen != my_gen` ordered against this token swap. - let my_gen = self.background_generation.fetch_add(1, Ordering::AcqRel) + 1; - drop(guard); - - let handle = tokio::runtime::Handle::current(); - let this = self; - std::thread::Builder::new() - .name("shielded-sync".into()) - .spawn(move || { - handle.block_on(async move { - loop { - if cancel.is_cancelled() { - break; - } - - // Background-loop cadence — honor the - // per-wallet caught-up cooldown so a - // sleepy network doesn't refetch + - // re-trial-decrypt the partial buffer - // chunk every interval. User-initiated - // syncs pass `force=true` to the FFI - // entry point below and bypass this. - this.sync_now(false).await; - - let interval = this.interval(); - tokio::select! { - _ = tokio::time::sleep(interval) => {} - _ = cancel.cancelled() => break, - } - } - - // Only clear `background_cancel` if the active - // generation is still ours. Without this guard a - // tight `stop()` → `start()` reschedule has the - // exiting thread overwrite the *new* generation's - // token, leaving the new loop running but - // unreflectable via `is_running()` / `stop()`. - if this.background_generation.load(Ordering::Acquire) == my_gen { - if let Ok(mut guard) = this.background_cancel.lock() { - *guard = None; - } - } - }); - }) - .expect("failed to spawn shielded-sync thread"); + // Background cadence passes `force=false` to honor the per-wallet + // caught-up cooldown; user-initiated syncs pass `force=true` via the + // FFI `sync_now`. + let pass_self = Arc::clone(&self); + let interval_self = Arc::clone(&self); + self.lifecycle.spawn_periodic_loop( + move || { + let this = Arc::clone(&pass_self); + async move { + let _ = this.sync_now(false).await; + } + }, + move || interval_self.interval(), + ); } /// Stop the background sync loop. No-op if not running. /// /// **Cancel-only**: this requests cancellation and returns /// immediately. A pass already inside `sync_now` / - /// `coordinator.sync()` keeps running to completion (including its - /// persister-callback fan-out). For a real "nothing is running and - /// nothing more will be persisted" barrier — required by Clear, - /// unregister, and rebind — use [`quiesce`](Self::quiesce). + /// `coordinator.sync()` is **cancelled mid-flight** at its next + /// `.await` (the loop's `biased; cancel-first` select drops the sync + /// future, see `start`). Persister-callback fan-out for already- + /// completed stores has fired; any in-flight store is abandoned. + /// For a real "nothing is running and nothing more will be + /// persisted" barrier — required by Clear, unregister, and rebind — + /// use [`quiesce`](Self::quiesce). pub fn stop(&self) { - if let Some(token) = self - .background_cancel - .lock() - .expect("bg_cancel poisoned") - .take() - { - token.cancel(); - } + self.lifecycle.stop(); } /// Cancel the background loop **and wait for any in-flight sync pass @@ -313,13 +252,51 @@ impl ShieldedSyncManager { /// including the persister fan-out, so its falling edge (with the /// gate up) is a sound "fully drained" signal. The gate is reopened /// before returning so a later start/sync works normally. - pub async fn quiesce(&self) { - self.quiescing.store(true, Ordering::Release); - self.stop(); - while self.is_syncing.load(Ordering::Acquire) { - tokio::time::sleep(Duration::from_millis(20)).await; - } - self.quiescing.store(false, Ordering::Release); + /// + /// Finally **joins** the loop's OS thread (after the drain, so the + /// thread is on its way out) and returns its terminal status. Joining + /// while the runtime is still alive is what lets the manager promise + /// the `!Send` loop has stopped touching `tokio::time` before a + /// one-shot host drops the runtime. + pub async fn quiesce(&self) -> dash_async::WorkerStatus { + self.lifecycle.quiesce().await + } + + /// Drain + join **without touching the `quiescing` gate**, for a caller + /// (the Clear flow) that already holds it raised via + /// [`hold_quiescing_gate`](Self::hold_quiescing_gate) and keeps holding + /// it across the whole teardown. See + /// [`CoordinatorLifecycle::quiesce_under_held_gate`]. + pub(crate) async fn quiesce_under_held_gate(&self) -> dash_async::WorkerStatus { + self.lifecycle.quiesce_under_held_gate().await + } + + /// Test seam: enter a sync pass directly (claim `is_syncing` via the + /// pass gate) so a teardown test can stand in for a direct + /// `sync_now`/`sync_wallet` already in flight, without driving the real + /// (coordinator-backed) sync path. The returned guard clears the flag + /// on drop. + #[cfg(test)] + pub(crate) fn begin_pass_for_test(&self) -> Option> { + self.lifecycle.begin_pass() + } + + /// Raise the `quiescing` gate and hold it raised until the returned + /// guard drops. Under refcount semantics multiple holders compose, so + /// this lets a multi-step teardown (Clear) keep new direct `sync_now` / + /// `sync_wallet` passes off across a check-then-wipe even when a racing + /// public `quiesce()` lands inside the window — neither party's Drop + /// can lower the other's barrier. + pub(crate) fn hold_quiescing_gate(&self) -> RefcountedFlagGuard<'_> { + self.lifecycle.hold_quiescing_gate() + } + + /// Test-only read of the underlying `quiescing` flag. Used by + /// regression tests asserting gate-continuity under concurrent + /// (re)starts during a Clear. + #[cfg(test)] + pub(crate) fn quiescing_load_for_test(&self, ordering: std::sync::atomic::Ordering) -> bool { + self.lifecycle.quiescing_load_for_test(ordering) } /// Run one sync pass across every registered wallet. @@ -334,21 +311,13 @@ impl ShieldedSyncManager { /// If a pass is already in flight, returns an empty summary and /// skips — the caller can inspect [`is_syncing`] to distinguish. pub async fn sync_now(&self, force: bool) -> ShieldedSyncPassSummary { - if self - .is_syncing - .compare_exchange(false, true, Ordering::AcqRel, Ordering::Acquire) - .is_err() - { + // Claim the pass slot and honour the quiescing gate; bail with an + // empty summary if a pass is already in flight or a teardown + // (Clear/stop) raised the gate. The guard clears `is_syncing` on + // every exit path. + let Some(_pass) = self.lifecycle.begin_pass() else { return ShieldedSyncPassSummary::default(); - } - - // A `quiesce()` may have raised the gate between our CAS and - // here; if so, release the slot and bail without running a pass - // so the drain can complete and Clear/stop get a true barrier. - if self.quiescing.load(Ordering::Acquire) { - self.is_syncing.store(false, Ordering::Release); - return ShieldedSyncPassSummary::default(); - } + }; // Snapshot the coordinator Arc and release the slot lock // before awaiting so a concurrent `configure_shielded` @@ -380,21 +349,18 @@ impl ShieldedSyncManager { if summary.sync_unix_seconds == 0 { summary.sync_unix_seconds = now; } - self.last_sync_unix - .store(summary.sync_unix_seconds, Ordering::Release); - - // Dispatch the completion event BEFORE clearing `is_syncing`. - // `quiesce()` drains on the falling edge of `is_syncing`, so if - // we cleared the flag first a stop/clear caller could unblock - // while this completion event (FFI callback → Swift - // `handleShieldedSyncCompleted`) is still pending — surfacing a - // stale post-stop/post-clear event. Holding the flag across the - // dispatch makes quiesce's barrier cover the event too. + self.lifecycle + .store_last_sync_unix(summary.sync_unix_seconds); + + // Dispatch the completion event BEFORE the `_pass` guard drops. + // `quiesce()` drains on the falling edge of `is_syncing`; if + // the guard cleared the flag before the dispatch a stop/clear + // caller could unblock while the callback is still pending — + // surfacing a stale post-stop/post-clear event. self.event_manager.on_shielded_sync_completed(&summary); - self.is_syncing.store(false, Ordering::Release); - summary + // `_pass` drops here → `is_syncing = false` } /// Sync a single wallet on demand. @@ -425,27 +391,16 @@ impl ShieldedSyncManager { }; // Reuse the manager-wide `is_syncing` flag so a per-wallet - // `sync_wallet()` can't race the periodic `sync_now()` - // against the same store — both go through - // `coordinator.sync()`, which serializes per-coordinator - // but the manager flag is what the host UI watches. - if self - .is_syncing - .compare_exchange(false, true, Ordering::AcqRel, Ordering::Acquire) - .is_err() - { - return Ok(None); - } - - // Bail if a `quiesce()` raised the gate after our CAS (see - // `sync_now`) so the drain barrier holds. - if self.quiescing.load(Ordering::Acquire) { - self.is_syncing.store(false, Ordering::Release); + // `sync_wallet()` can't race the periodic `sync_now()` against the + // same store — both go through `coordinator.sync()`, which + // serializes per-coordinator, but the manager flag is what the host + // UI watches. Bail (Ok(None)) if a pass is already in flight or a + // teardown raised the quiescing gate. + let Some(_pass) = self.lifecycle.begin_pass() else { return Ok(None); - } + }; let pass = coordinator.sync(force).await; - self.is_syncing.store(false, Ordering::Release); // Extract this wallet's slice from the network-wide pass // summary. If the wallet is registered, we'll get back an @@ -470,8 +425,88 @@ impl std::fmt::Debug for ShieldedSyncManager { f.debug_struct("ShieldedSyncManager") .field("is_running", &self.is_running()) .field("is_syncing", &self.is_syncing()) - .field("interval_secs", &self.interval_secs.load(Ordering::Acquire)) + .field("interval_secs", &self.lifecycle.interval_secs()) .field("last_sync_unix", &self.last_sync_unix_seconds()) .finish() } } + +#[cfg(test)] +mod tests { + use super::*; + + use crate::events::PlatformEventHandler; + + /// Build a manager with an empty coordinator slot and a no-handler + /// event manager. An empty slot makes `sync_now` return an empty + /// summary, but it still drives the full timestamp + completion + /// protocol and — crucially for this test — the generation-guarded + /// background loop, without needing a live `NetworkShieldedCoordinator`. + fn make_manager() -> Arc { + let event_manager = Arc::new(PlatformEventManager::new(Vec::< + Arc, + >::new())); + let coordinator_slot = Arc::new(RwLock::new(None)); + // merge: port #3953's helper onto our 3-arg new() (registry added by + // the ThreadRegistry refactor), mirroring the sibling address-sync test. + let registry = ThreadRegistry::new(); + Arc::new(ShieldedSyncManager::new( + event_manager, + coordinator_slot, + registry, + )) + } + + // merge: #3953's generation-guard test ported onto our ThreadRegistry + // refactor — the registry's per-key clearing latch is the equivalent guard. + /// Restart-in-place regression: a tight `start()` → `stop()` → `start()` + /// must leave the manager *running* on the new loop. The cancelled stale + /// loop races to clear its registry slot as it exits; the registry's + /// per-key clearing latch must stop it from stripping the freshly + /// installed loop's running state — otherwise the new loop keeps running + /// but becomes invisible to `is_running()` / `stop()`. + /// + /// Determinism: the only wait is a *bounded* poll. With the latch in + /// place `is_running()` is true for the whole window, so the test + /// never fails spuriously on correct code. A regression flips it false + /// within milliseconds once the stale loop clears the slot, which the + /// poll catches. Needs the multi-thread flavor because `start()` + /// drives its loop via `Handle::current().block_on` on a dedicated OS + /// thread, which would deadlock a single-threaded test runtime. + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] + async fn restart_in_place_keeps_running_after_stale_loop_exits() { + let mgr = make_manager(); + + // Gen 1. Wait (bounded) for the first pass to land — a real + // lifecycle signal that the loop is now parked in its interval + // sleep, so its cleanup is still pending when we stop+restart. + Arc::clone(&mgr).start(); + let mut waited = 0; + while mgr.last_sync_unix_seconds().is_none() { + assert!(waited < 200, "gen-1's first sync pass never completed"); + tokio::time::sleep(Duration::from_millis(10)).await; + waited += 1; + } + + // Tight stop→start with no await between: the just-cancelled gen-1 + // loop cannot reach its cleanup before gen 2 is installed, so the + // race window the guard protects is reliably open. + mgr.stop(); + Arc::clone(&mgr).start(); + + // Give the stale gen-1 loop ample time to run its (guarded) + // cleanup. `is_running()` must stay true throughout. + for _ in 0..100 { + assert!( + mgr.is_running(), + "stale gen-1 loop cleared gen-2's cancel token — generation guard regressed" + ); + tokio::time::sleep(Duration::from_millis(10)).await; + } + + // The surviving loop is the tracked one: a single `stop()` fully + // reflects it, so there is no orphaned unreflectable duplicate. + mgr.stop(); + assert!(!mgr.is_running(), "stop() must reflect the live loop"); + } +} diff --git a/packages/rs-platform-wallet/src/wallet/apply.rs b/packages/rs-platform-wallet/src/wallet/apply.rs index 8c690543ee0..099b77f97fd 100644 --- a/packages/rs-platform-wallet/src/wallet/apply.rs +++ b/packages/rs-platform-wallet/src/wallet/apply.rs @@ -148,93 +148,17 @@ impl PlatformWalletInfo { } } - // 2b. Identity keys. Runs after the scalar identity pass so - // the owning ManagedIdentity is guaranteed to exist before - // we layer keys into it. Upserts land first, then removals, - // matching the discipline used across the rest of this - // function. Orphan entries (owner not in the wallet) are - // logged and skipped by the per-entry apply helpers. - if let Some(keys_cs) = identity_keys { - let crate::changeset::IdentityKeysChangeSet { upserts, removed } = keys_cs; - // Thread the wallet network through so the key-apply - // path can reproduce DIP-9 derivation paths for any - // entry that carries `(wallet_id, derivation_indices)`. - let network = wallet.network; - for (_key, entry) in upserts { - self.identity_manager - .apply_identity_key_entry(entry, network); - } - for (identity_id, key_id) in removed { - self.identity_manager - .apply_identity_key_removal(&identity_id, key_id); - } - } - - // 3. Contacts. Each entry routes to its owning ManagedIdentity by - // `(owner, contact)` key; orphans (owner not in the wallet) - // are logged and skipped. Trivial map ops (sent / incoming - // insert and remove) are inlined here — no helper earns its - // name for a single `insert` / `shift_remove` call. Only - // `apply_established_contact` is a method because it has - // real logic (drops both pending sides per the contract). - if let Some(contact_cs) = contacts { - let crate::changeset::ContactChangeSet { - sent_requests, - removed_sent, - incoming_requests, - removed_incoming, - established, - } = contact_cs; - - for (key, entry) in sent_requests { - match self.identity_manager.managed_identity_mut(&key.owner_id) { - Some(managed) => { - managed - .sent_contact_requests - .insert(entry.request.recipient_id, entry.request); - } - None => tracing::warn!( - owner = %key.owner_id, - "skipping sent contact request during apply: owner identity not in wallet" - ), - } - } - for (key, entry) in incoming_requests { - match self.identity_manager.managed_identity_mut(&key.owner_id) { - Some(managed) => { - managed - .incoming_contact_requests - .insert(entry.request.sender_id, entry.request); - } - None => tracing::warn!( - owner = %key.owner_id, - "skipping incoming contact request during apply: owner identity not in wallet" - ), - } - } - for key in removed_sent { - if let Some(managed) = self.identity_manager.managed_identity_mut(&key.owner_id) { - managed.sent_contact_requests.remove(&key.recipient_id); - } - } - for key in removed_incoming { - if let Some(managed) = self.identity_manager.managed_identity_mut(&key.owner_id) { - managed.incoming_contact_requests.remove(&key.sender_id); - } - } - // Established promotions — drop any matching pending - // entries on both sides per the auto-establishment contract. - for (key, established) in established { - match self.identity_manager.managed_identity_mut(&key.owner_id) { - Some(managed) => { - managed.apply_established_contact(established); - } - None => tracing::warn!( - owner = %key.owner_id, - "skipping established contact during apply: owner identity not in wallet" - ), - } - } + // 2b/3. Identity keys + contacts. Keys are layered before + // contacts so a contact entry never lands before its + // owner's keys; orphans are logged and skipped. Single + // source of truth shared with the persister rehydration + // path (`load_from_persistor`). + if identity_keys.is_some() || contacts.is_some() { + self.identity_manager.apply_contacts_and_keys( + contacts.unwrap_or_default(), + identity_keys.unwrap_or_default(), + wallet.network, + ); } // 3b. DashPay profile/payment overlays. Applied AFTER identities diff --git a/packages/rs-platform-wallet/src/wallet/identity/state/manager/apply.rs b/packages/rs-platform-wallet/src/wallet/identity/state/manager/apply.rs index 7d04f29c538..09dd930c0cc 100644 --- a/packages/rs-platform-wallet/src/wallet/identity/state/manager/apply.rs +++ b/packages/rs-platform-wallet/src/wallet/identity/state/manager/apply.rs @@ -15,7 +15,7 @@ //! [`IdentityManager::apply_identity_key_entry`]. use super::{IdentityLocation, IdentityManager}; -use crate::changeset::{IdentityEntry, IdentityKeyEntry}; +use crate::changeset::{ContactChangeSet, IdentityEntry, IdentityKeyEntry, IdentityKeysChangeSet}; use crate::wallet::identity::state::managed_identity::ManagedIdentity; use dpp::identity::accessors::IdentityGettersV0; use dpp::identity::identity_public_key::accessors::v0::IdentityPublicKeyGettersV0; @@ -181,4 +181,85 @@ impl IdentityManager { managed.identity.public_keys_mut().remove(&key_id); } } + + /// Layer a [`ContactChangeSet`] + [`IdentityKeysChangeSet`] onto the + /// already-restored managed identities. + /// + /// Single source of truth for the contact / identity-key routing — + /// shared by the runtime changeset-replay path + /// ([`apply_changeset`](crate::wallet::PlatformWalletInfo::apply_changeset)) + /// and the persister rehydration path + /// ([`load_from_persistor`](crate::PlatformWalletManager::load_from_persistor)). + /// Identity keys are applied first so a contact entry never lands + /// before its owner's keys; orphan entries (owner not in the + /// wallet) are logged and skipped, never fatal. `removed_*` are + /// honoured for the replay path; the rehydration feed leaves them + /// empty. + pub(crate) fn apply_contacts_and_keys( + &mut self, + contacts: ContactChangeSet, + identity_keys: IdentityKeysChangeSet, + network: Network, + ) { + let IdentityKeysChangeSet { upserts, removed } = identity_keys; + for (_key, entry) in upserts { + self.apply_identity_key_entry(entry, network); + } + for (identity_id, key_id) in removed { + self.apply_identity_key_removal(&identity_id, key_id); + } + + let ContactChangeSet { + sent_requests, + removed_sent, + incoming_requests, + removed_incoming, + established, + } = contacts; + for (key, entry) in sent_requests { + match self.managed_identity_mut(&key.owner_id) { + Some(managed) => { + managed + .sent_contact_requests + .insert(entry.request.recipient_id, entry.request); + } + None => tracing::warn!( + owner = %key.owner_id, + "skipping sent contact request: owner identity not in wallet" + ), + } + } + for (key, entry) in incoming_requests { + match self.managed_identity_mut(&key.owner_id) { + Some(managed) => { + managed + .incoming_contact_requests + .insert(entry.request.sender_id, entry.request); + } + None => tracing::warn!( + owner = %key.owner_id, + "skipping incoming contact request: owner identity not in wallet" + ), + } + } + for key in removed_sent { + if let Some(managed) = self.managed_identity_mut(&key.owner_id) { + managed.sent_contact_requests.remove(&key.recipient_id); + } + } + for key in removed_incoming { + if let Some(managed) = self.managed_identity_mut(&key.owner_id) { + managed.incoming_contact_requests.remove(&key.sender_id); + } + } + for (key, established) in established { + match self.managed_identity_mut(&key.owner_id) { + Some(managed) => managed.apply_established_contact(established), + None => tracing::warn!( + owner = %key.owner_id, + "skipping established contact: owner identity not in wallet" + ), + } + } + } } diff --git a/packages/rs-platform-wallet/tests/rehydration_load.rs b/packages/rs-platform-wallet/tests/rehydration_load.rs new file mode 100644 index 00000000000..8775ebf7191 --- /dev/null +++ b/packages/rs-platform-wallet/tests/rehydration_load.rs @@ -0,0 +1,277 @@ +//! Item E — `load_from_persistor` (seedless / watch-only) end-to-end +//! through a real `PlatformWalletManager`. +//! +//! Scope after the seedless rework: load reconstructs every persisted +//! wallet **watch-only** from its keyless account manifest. The load +//! path never touches the seed, so it performs no wrong-seed check; a +//! sign-time gate is deferred separate FFI work and is not part of this +//! path. Per-row decode failures surface as +//! [`SkipReason::CorruptPersistedRow`] without aborting the batch. +//! +//! RT cases here: +//! - RT-WO: round-trip — watch-only wallet is registered after reload. +//! - RT-Corrupt: a row with an empty manifest is skipped with +//! `MissingManifest`, the other row loads, a `WalletSkippedOnLoad` +//! event fires, `load` returns `Ok`. +//! - RT-Z: no key/seed material in any `LoadOutcome` / `SkipReason` +//! surface (the structural-only contract). + +use std::sync::{Arc, Mutex}; + +use key_wallet::wallet::initialization::WalletAccountCreationOptions; +use key_wallet::wallet::Wallet; +use platform_wallet::changeset::{ + AccountRegistrationEntry, ClientStartState, ClientWalletStartState, CoreChangeSet, + PersistenceError, PlatformWalletChangeSet, PlatformWalletPersistence, +}; +use platform_wallet::events::{EventHandler, PlatformEvent, PlatformEventHandler}; +use platform_wallet::manager::load_outcome::CorruptKind; +use platform_wallet::wallet::platform_wallet::WalletId; +use platform_wallet::{PlatformWalletManager, SkipReason}; + +// ---- test doubles ---- + +/// Persister whose `load()` returns a fixed keyless `ClientStartState`. +struct FixedLoadPersister { + state: Mutex>, +} + +impl FixedLoadPersister { + fn new() -> Self { + Self { + state: Mutex::new(None), + } + } + fn set(&self, s: ClientStartState) { + *self.state.lock().unwrap() = Some(s); + } +} + +impl PlatformWalletPersistence for FixedLoadPersister { + fn store(&self, _: WalletId, _: PlatformWalletChangeSet) -> Result<(), PersistenceError> { + Ok(()) + } + fn flush(&self, _: WalletId) -> Result<(), PersistenceError> { + Ok(()) + } + fn load(&self) -> Result { + // Rebuild a fresh ClientStartState each call (load may be + // called twice for the recoverability sub-case). + let guard = self.state.lock().unwrap(); + match guard.as_ref() { + None => Ok(ClientStartState::default()), + Some(s) => { + let mut out = ClientStartState::default(); + for (id, w) in &s.wallets { + out.wallets.insert( + *id, + ClientWalletStartState { + network: w.network, + birth_height: w.birth_height, + account_manifest: w.account_manifest.clone(), + core_state: w.core_state.clone(), + identity_manager: Default::default(), + unused_asset_locks: Default::default(), + contacts: Default::default(), + identity_keys: Default::default(), + }, + ); + } + Ok(out) + } + } + } +} + +/// Event handler recording every `PlatformEvent`. +#[derive(Default)] +struct RecordingHandler { + events: Mutex>, +} +impl EventHandler for RecordingHandler {} +impl PlatformEventHandler for RecordingHandler { + fn on_platform_event(&self, event: &PlatformEvent) { + self.events.lock().unwrap().push(event.clone()); + } +} + +// ---- harness ---- + +fn manifest_and_id(seed: [u8; 64]) -> (Vec, [u8; 32]) { + let w = Wallet::from_seed_bytes( + seed, + key_wallet::Network::Testnet, + WalletAccountCreationOptions::Default, + ) + .unwrap(); + let manifest = w + .accounts + .all_accounts() + .into_iter() + .map(|a| AccountRegistrationEntry { + account_type: a.account_type, + account_xpub: a.account_xpub, + }) + .collect(); + (manifest, w.compute_wallet_id()) +} + +fn slice(seed: [u8; 64]) -> (WalletId, ClientWalletStartState) { + let (manifest, id) = manifest_and_id(seed); + ( + id, + ClientWalletStartState { + network: key_wallet::Network::Testnet, + birth_height: 1, + account_manifest: manifest, + core_state: CoreChangeSet::default(), + identity_manager: Default::default(), + unused_asset_locks: Default::default(), + contacts: Default::default(), + identity_keys: Default::default(), + }, + ) +} + +async fn manager( + persister: Arc, + handler: Arc, +) -> Arc> { + let sdk = Arc::new(dash_sdk::Sdk::new_mock()); + Arc::new(PlatformWalletManager::new(sdk, persister, handler)) +} + +// ---- tests ---- + +/// RT-WO: seedless watch-only round-trip — a persisted wallet loads and +/// is registered after reload (no signing material needed). +#[tokio::test] +async fn rt_wo_watch_only_roundtrip() { + let seed = [0x11; 64]; + let p = Arc::new(FixedLoadPersister::new()); + let h = Arc::new(RecordingHandler::default()); + let (id, s) = slice(seed); + let mut st = ClientStartState::default(); + st.wallets.insert(id, s); + p.set(st); + + let mgr = manager(Arc::clone(&p), Arc::clone(&h)).await; + let outcome = mgr.load_from_persistor().await.expect("Ok"); + + assert_eq!(outcome.loaded, vec![id]); + assert!(outcome.skipped.is_empty()); + assert!( + mgr.get_wallet(&id).await.is_some(), + "watch-only restored wallet must be registered" + ); + assert_eq!(mgr.wallet_ids().await, vec![id]); +} + +/// RT-Corrupt: a corrupt row (empty manifest) is skipped with +/// `MissingManifest`; the other row loads cleanly; the load returns +/// `Ok`; exactly one `WalletSkippedOnLoad` event fires for the skipped +/// row. +#[tokio::test] +async fn rt_corrupt_row_skipped_and_other_loads() { + let seed_a = [0x31; 64]; + let seed_b = [0x32; 64]; + let p = Arc::new(FixedLoadPersister::new()); + let h = Arc::new(RecordingHandler::default()); + let (id_a, sa) = slice(seed_a); + let (id_b, _sb) = slice(seed_b); + + // B's row is structurally corrupt — empty manifest. + let sb_corrupt = ClientWalletStartState { + network: key_wallet::Network::Testnet, + birth_height: 1, + account_manifest: Vec::new(), + core_state: CoreChangeSet::default(), + identity_manager: Default::default(), + unused_asset_locks: Default::default(), + contacts: Default::default(), + identity_keys: Default::default(), + }; + + let mut st = ClientStartState::default(); + st.wallets.insert(id_a, sa); + st.wallets.insert(id_b, sb_corrupt); + p.set(st); + + let mgr = manager(Arc::clone(&p), Arc::clone(&h)).await; + let outcome = mgr + .load_from_persistor() + .await + .expect("Ok despite per-row skip"); + + assert!(outcome.loaded.contains(&id_a), "A loads fully"); + assert!(!outcome.loaded.contains(&id_b), "B is skipped, not loaded"); + assert_eq!(outcome.skipped.len(), 1); + let (skipped_id, skipped_reason) = &outcome.skipped[0]; + assert_eq!(*skipped_id, id_b); + assert!(matches!( + skipped_reason, + SkipReason::CorruptPersistedRow { + kind: CorruptKind::MissingManifest + } + )); + assert!(mgr.get_wallet(&id_a).await.is_some()); + assert!( + mgr.get_wallet(&id_b).await.is_none(), + "corrupt row must be ABSENT, not a degraded placeholder" + ); + + // Exactly one WalletSkippedOnLoad event for B. + { + let events = h.events.lock().unwrap(); + assert_eq!(events.len(), 1); + match &events[0] { + PlatformEvent::WalletSkippedOnLoad { wallet_id, reason } => { + assert_eq!(*wallet_id, id_b); + assert!(matches!( + reason, + SkipReason::CorruptPersistedRow { + kind: CorruptKind::MissingManifest + } + )); + } + } + } +} + +/// RT-Z: no key/seed material leaks into `LoadOutcome` / +/// `SkipReason::CorruptPersistedRow` surfaces. The seedless load path +/// never sees seed bytes so this is mostly a sentinel guard against +/// future regression where someone embeds row contents in `DecodeError`. +#[tokio::test] +async fn rt_z_secret_hygiene_surfaces() { + let seed = [0xAB; 64]; + let p = Arc::new(FixedLoadPersister::new()); + let h = Arc::new(RecordingHandler::default()); + let (id, _s) = slice(seed); + + // Corrupt row to force a skip and inspect every public surface. + let corrupt = ClientWalletStartState { + network: key_wallet::Network::Testnet, + birth_height: 1, + account_manifest: Vec::new(), + core_state: CoreChangeSet::default(), + identity_manager: Default::default(), + unused_asset_locks: Default::default(), + contacts: Default::default(), + identity_keys: Default::default(), + }; + let mut st = ClientStartState::default(); + st.wallets.insert(id, corrupt); + p.set(st); + + let mgr = manager(Arc::clone(&p), Arc::clone(&h)).await; + let outcome = mgr.load_from_persistor().await.expect("Ok"); + let dbg = format!("{outcome:?}"); + // 0xAB seed bytes must not appear hex-rendered anywhere. + assert!(!dbg.to_lowercase().contains(&"ab".repeat(10))); + // The structural skip reason renders without any row bytes. + for (_, reason) in &outcome.skipped { + let rendered = format!("{reason} {reason:?}"); + assert!(!rendered.to_lowercase().contains(&"ab".repeat(10))); + } +} diff --git a/packages/swift-sdk/Sources/SwiftDashSDK/PlatformWallet/PlatformWalletManager.swift b/packages/swift-sdk/Sources/SwiftDashSDK/PlatformWallet/PlatformWalletManager.swift index 0e433d368ef..97bd03894cc 100644 --- a/packages/swift-sdk/Sources/SwiftDashSDK/PlatformWallet/PlatformWalletManager.swift +++ b/packages/swift-sdk/Sources/SwiftDashSDK/PlatformWallet/PlatformWalletManager.swift @@ -94,17 +94,17 @@ public class PlatformWalletManager: ObservableObject { /// enqueue time and `handleShieldedSyncCompleted` drops any event whose /// snapshot no longer matches the current generation. /// - /// The Rust quiesce barrier guarantees no persistence after stop/clear, - /// but the completion callback is re-dispatched onto this `@MainActor`, - /// so a final, already-dispatched event can land just after stop/clear - /// returns. A plain boolean gate is bypassable: a caller can stop (set - /// the flag) and restart (clear the flag) in the same actor turn, which - /// re-opens the gate before the stale, previously-enqueued completion - /// task runs — so the old event leaks into the new run. Tying - /// suppression to a generation closes that race: the stale task carries - /// the pre-stop generation, the restart does not reset the counter, so - /// the snapshot mismatches and the event is dropped even on a same-turn - /// restart. + /// `stop` cancels the in-flight pass before its completion fires; + /// `clear` goes further with a full Rust-side quiesce. Either way, an + /// already-dispatched completion event can still land on this + /// `@MainActor` after stop/clear returns. A plain boolean gate is + /// bypassable: a caller can stop (set the flag) and restart (clear it) + /// in the same actor turn, re-opening the gate before the stale + /// completion task runs — so the old event leaks into the new run. + /// Tying suppression to a generation closes that race: the stale task + /// carries the pre-stop generation, the restart does not reset the + /// counter, so the snapshot mismatches and the event is dropped even + /// on a same-turn restart. /// /// `nonisolated` + lock-guarded so the FFI callback thread can snapshot /// it without hopping onto the main actor first. @@ -128,12 +128,19 @@ public class PlatformWalletManager: ObservableObject { internal private(set) var handle: Handle = NULL_HANDLE /// Retained for the lifetime of the FFI handle so the callback - /// context pointer remains valid. - private var persistenceHandler: PlatformWalletPersistenceHandler? + /// context pointer remains valid. `nonisolated(unsafe)` so the + /// nonisolated `deinit` can `passRetained` the handler on an + /// incomplete-shutdown without Swift 6 strict-concurrency rejecting + /// the cross-isolation access — the FFI lifetime contract makes the + /// access safe (the handler is only ever read after the Rust side + /// has signalled the worker is wedged + the wrapper class itself is + /// being destroyed). + nonisolated(unsafe) private var persistenceHandler: PlatformWalletPersistenceHandler? /// Retained for the lifetime of the FFI handle so the event-handler - /// context pointer remains valid. - private var eventHandler: PlatformWalletEventHandler? + /// context pointer remains valid. See `persistenceHandler` for the + /// `nonisolated(unsafe)` rationale. + nonisolated(unsafe) private var eventHandler: PlatformWalletEventHandler? /// Background task that polls SPV progress. private var progressPollTask: Task? @@ -152,10 +159,49 @@ public class PlatformWalletManager: ObservableObject { deinit { progressPollTask?.cancel() - if handle != NULL_HANDLE { - platform_wallet_manager_platform_address_sync_stop(handle).discard() - platform_wallet_manager_shielded_sync_stop(handle).discard() - platform_wallet_manager_destroy(handle).discard() + guard handle != NULL_HANDLE else { return } + + // Tear down the Rust manager: signal the two host-driven sync + // loops (platform-address + shielded) to cancel and `discard()` + // them — both are cancel-only on the Rust side and never report + // an incomplete drain. Identity-sync and the event adapter are + // joined inside `destroy`, which is the single host-visible + // join point. + platform_wallet_manager_platform_address_sync_stop(handle).discard() + platform_wallet_manager_shielded_sync_stop(handle).discard() + + // Capture the CODE (not just free the message) for the one call + // that CAN report `.errorShutdownIncomplete`: `destroy`. Rust + // returns that code when a background coordinator did not drain + // within the join deadline, or when a prior-generation shielded + // thread is still parked alive as an orphan (a tight + // `stop()`→`start()` reap that had to detach it past the wedge + // backstop). In either case a lingering `!Send` coordinator + // thread may still hold the `passUnretained` context pointers + // Rust was handed for our `persistenceHandler` / `eventHandler` + // and fire ONE final callback through them. The contract: on + // that code the host must NOT free the callback context + // immediately. + let destroyCode = + platform_wallet_manager_destroy(handle).discardReturningCode() + + // Both handlers are passed to Rust via `Unmanaged.passUnretained` + // (see `PlatformWalletPersistenceHandler`/`PlatformWalletEventHandler` + // `makeCallbacks()`), so Rust holds non-owning pointers and these + // objects are kept alive ONLY by the stored properties below. The + // instant this deinit returns, ARC releases them — which would be a + // use-after-free if a lingering coordinator then fires its final + // callback. So, ONLY on an incomplete shutdown, deliberately leak one + // extra strong reference to each (an unbalanced `passRetained` that is + // never released) so they outlive any lingering thread. A clean + // shutdown (the common case) takes neither branch and releases the + // handlers normally — we never leak unconditionally. The leak is + // bounded by how often a shutdown wedges (rare) and trades two small + // objects for guaranteed callback safety, since an incomplete drain + // gives no later signal that the lingering thread has finally exited. + if destroyCode == .errorShutdownIncomplete { + if let persistenceHandler { _ = Unmanaged.passRetained(persistenceHandler) } + if let eventHandler { _ = Unmanaged.passRetained(eventHandler) } } } @@ -323,15 +369,17 @@ public class PlatformWalletManager: ObservableObject { /// /// Calls `platform_wallet_manager_load_from_persistor` which fires /// the Swift-side `on_load_wallet_list_fn` callback. For each - /// persisted wallet, Rust reconstructs a **watch-only** `Wallet` - /// plus the wallet's persisted platform-address sync snapshot. - /// After the FFI returns, we call `platform_wallet_manager_get_wallet` - /// for each restored id so Swift gets a `ManagedPlatformWallet` - /// handle. + /// persisted wallet, Rust rebuilds a **watch-only** `Wallet` from + /// its keyless account manifest (`Wallet::new_watch_only`) and + /// applies the persisted platform-address sync snapshot. After the + /// FFI returns we call `platform_wallet_manager_get_wallet` for + /// each restored id so Swift gets a `ManagedPlatformWallet` handle. /// - /// Signing operations will fail until a future unlock flow - /// upgrades a watch-only wallet to a signing wallet via the - /// mnemonic stored in Keychain. + /// Signing happens on demand via the configured + /// `MnemonicResolverHandle`: each resolver-fed sign entrypoint + /// fail-closed gates the resolved seed against the loaded + /// `wallet_id` and surfaces a structural wrong-seed error on + /// mismatch (no keys cross that surface). /// /// Idempotent: if there's no persisted state, does nothing and /// leaves `self.wallets` untouched. Safe to call before any @@ -340,7 +388,12 @@ public class PlatformWalletManager: ObservableObject { public func loadFromPersistor() throws -> [ManagedPlatformWallet] { try ensureConfigured() - try platform_wallet_manager_load_from_persistor(handle).check() + // Pass nil for `out_outcome` — Swift doesn't currently consume + // the per-wallet skip summary (corrupt persisted rows are + // logged by Rust at warn level). When Swift starts surfacing + // skipped wallets to the UI, pass a `LoadOutcomeFFI` here and + // free it with `platform_wallet_load_outcome_free`. + try platform_wallet_manager_load_from_persistor(handle, nil).check() // Ask SwiftData for the list of wallet ids we just told Rust // to load. We reuse the same container rather than shipping a diff --git a/packages/swift-sdk/Sources/SwiftDashSDK/PlatformWallet/PlatformWalletManagerShieldedSync.swift b/packages/swift-sdk/Sources/SwiftDashSDK/PlatformWallet/PlatformWalletManagerShieldedSync.swift index 7d2429b56c4..a6adfe65b50 100644 --- a/packages/swift-sdk/Sources/SwiftDashSDK/PlatformWallet/PlatformWalletManagerShieldedSync.swift +++ b/packages/swift-sdk/Sources/SwiftDashSDK/PlatformWallet/PlatformWalletManagerShieldedSync.swift @@ -282,9 +282,9 @@ extension PlatformWalletManager { ) } try platform_wallet_manager_shielded_sync_stop(handle).check() - // The Rust drain returned; bump the generation so any trailing - // completion event the main actor delivers after this point is - // dropped (its snapshot predates this bump). + // Rust stop is cancel-only. Bump the generation so any trailing + // completion event the main actor still delivers from a pre-stop + // snapshot is dropped on arrival. shieldedSyncGeneration.bump() // The dropped completion would normally clear the per-pass // progress mirrors; do it here so a pass stopped mid-flight diff --git a/packages/swift-sdk/Sources/SwiftDashSDK/PlatformWallet/PlatformWalletResult.swift b/packages/swift-sdk/Sources/SwiftDashSDK/PlatformWallet/PlatformWalletResult.swift index 2c311f91e9d..c24f72fbf8c 100644 --- a/packages/swift-sdk/Sources/SwiftDashSDK/PlatformWallet/PlatformWalletResult.swift +++ b/packages/swift-sdk/Sources/SwiftDashSDK/PlatformWallet/PlatformWalletResult.swift @@ -39,6 +39,12 @@ public enum PlatformWalletResultCode: Int32, Sendable { /// outcome. Do NOT auto-retry — a retry would rebuild the bundle and /// could double-execute if the original landed. case errorShieldedSpendUnconfirmed = 18 + /// A destroy/stop/clear completed but a background coordinator did not + /// exit cleanly (timed out or ended non-cleanly). The host should defer + /// freeing its callback context — a lingering coordinator may still fire + /// one final callback through it — and, on the clear path, must NOT + /// commit its own persistence wipe (the Rust store was left intact). + case errorShutdownIncomplete = 19 case notFound = 98 case errorUnknown = 99 @@ -82,6 +88,8 @@ public enum PlatformWalletResultCode: Int32, Sendable { self = .errorShieldedBroadcastUnconfirmed case PLATFORM_WALLET_FFI_RESULT_CODE_ERROR_SHIELDED_SPEND_UNCONFIRMED: self = .errorShieldedSpendUnconfirmed + case PLATFORM_WALLET_FFI_RESULT_CODE_ERROR_SHUTDOWN_INCOMPLETE: + self = .errorShutdownIncomplete case PLATFORM_WALLET_FFI_RESULT_CODE_NOT_FOUND: self = .notFound case PLATFORM_WALLET_FFI_RESULT_CODE_ERROR_UNKNOWN: @@ -177,6 +185,12 @@ public enum PlatformWalletError: LocalizedError { /// notes reserved wallet-side (a shield reserves nothing) until the /// next sync reconciles the outcome. Do NOT auto-retry. case shieldedSpendUnconfirmed(String) + /// A destroy / stop / clear completed but a background coordinator did + /// not exit cleanly. The host should defer freeing its callback context + /// (a lingering coordinator may still fire one final callback) and, on + /// the clear path, must NOT commit its own persistence wipe — the Rust + /// store was left intact so it can be retried once the pass settles. + case shutdownIncomplete(String) case notFound(String) case unknown(String) @@ -192,6 +206,7 @@ public enum PlatformWalletError: LocalizedError { .arithmeticOverflow(let m), .noSelectableInputs(let m), .walletAlreadyExists(let m), .shieldedBroadcastFailed(let m), .shieldedBroadcastUnconfirmed(let m), .shieldedSpendUnconfirmed(let m), + .shutdownIncomplete(let m), .notFound(let m), .unknown(let m): return m } @@ -222,6 +237,7 @@ public enum PlatformWalletError: LocalizedError { case .errorShieldedBroadcastFailed: self = .shieldedBroadcastFailed(detail) case .errorShieldedBroadcastUnconfirmed: self = .shieldedBroadcastUnconfirmed(detail) case .errorShieldedSpendUnconfirmed: self = .shieldedSpendUnconfirmed(detail) + case .errorShutdownIncomplete: self = .shutdownIncomplete(detail) case .notFound: self = .notFound(detail) case .errorUnknown: self = .unknown(detail) } @@ -240,4 +256,16 @@ extension PlatformWalletFFIResult { func discard() { _ = PlatformWalletResult(self) } + + /// Free the result's Rust-owned message and return its typed code. + /// + /// Like `discard()`, but hands back the code so the caller can branch + /// on it — used by `PlatformWalletManager.deinit`, which must detect + /// `.errorShutdownIncomplete` to decide whether to keep its callback + /// context alive. The message is still freed deterministically (the + /// temporary `PlatformWalletResult` frees it on drop). + @inline(__always) + func discardReturningCode() -> PlatformWalletResultCode { + PlatformWalletResult(self).code + } } diff --git a/packages/swift-sdk/SwiftExampleApp/SwiftExampleApp/Core/Services/ShieldedService.swift b/packages/swift-sdk/SwiftExampleApp/SwiftExampleApp/Core/Services/ShieldedService.swift index f60eb18becb..6b775407a77 100644 --- a/packages/swift-sdk/SwiftExampleApp/SwiftExampleApp/Core/Services/ShieldedService.swift +++ b/packages/swift-sdk/SwiftExampleApp/SwiftExampleApp/Core/Services/ShieldedService.swift @@ -536,11 +536,15 @@ class ShieldedService: ObservableObject { /// /// What it does NOT touch: /// * The manager-wide shielded sync loop is `stopShieldedSync`'d - /// first so the persister callback can't re-derive the - /// rows we're deleting. It restarts on either of the two - /// bind paths: [`manualSync`] self-binding (which calls - /// `startShieldedSync()` after a successful self-rebind), - /// or `rebindWalletScopedServices` firing on a navigation. + /// first (cancel-only — signals cancel and returns), then the + /// `clearShielded()` call below provides the actual drain + /// barrier: it raises a continuously-held quiescing gate, joins + /// the in-flight pass, and returns `errorShutdownIncomplete` + /// instead of wiping if the join was non-clean. The loop + /// restarts on either of the two bind paths: [`manualSync`] + /// self-binding (which calls `startShieldedSync()` after a + /// successful self-rebind), or `rebindWalletScopedServices` + /// firing on a navigation. /// * The per-network commitment-tree SQLite file at /// `dbPath(for:)`. Earlier revisions of this helper /// unlinked it for a "true clean slate", but with no @@ -599,15 +603,19 @@ class ShieldedService: ObservableObject { // The single SQLite commitment-tree file stays open; // the next `bindShielded` call repopulates the // registries and the next sync re-saves notes via - // the changeset path. Best-effort — failure logs but - // doesn't abort the wipe. + // the changeset path. Fail-closed: if `clearShielded` + // throws, the Rust shielded store is still populated, so + // we surface the error and skip the SwiftData wipe below + // to keep the Swift mirror consistent with Rust state. if let managerForStop { do { try managerForStop.clearShielded() } catch { + lastError = "Failed to reset Rust shielded state: \(error.localizedDescription)" SDKLogger.error( "ShieldedService.clearLocalState: clearShielded failed: \(error.localizedDescription)" ) + return } }