diff --git a/glidefs/examples/cache_hot_read.rs b/glidefs/examples/cache_hot_read.rs new file mode 100644 index 00000000..12011c93 --- /dev/null +++ b/glidefs/examples/cache_hot_read.rs @@ -0,0 +1,112 @@ +//! Empirical probe for Claim 4: can a bigger explicit L1 + O_DIRECT recover the +//! warm-read latency that buffered I/O gets "for free" from the page cache? +//! +//! Exercises the full `HybridCache::get()` path (L1 memory tier -> L2 SSD tier) +//! under a hot-biased access pattern, so L1 sizing determines the hit source: +//! - buffered + small L1 -> hot reads miss L1, served by page-cached L2 (~tens of µs) +//! - direct + small L1 -> hot reads miss L1, served by media (~hundreds of µs) +//! - direct + large L1 -> hot reads hit L1 (~sub-µs) +//! +//! Usage: +//! cargo run --release --example cache_hot_read -- + +use std::path::Path; +use std::time::{Duration, Instant}; + +use bytes::Bytes; +use foyer::{ + BlockEngineConfig, DeviceBuilder, EvictionConfig, FsDeviceBuilder, HybridCache, + HybridCacheBuilder, HybridCachePolicy, IoEngineConfig, RecoverMode, S3FifoConfig, +}; +use rand::Rng; + +use glidefs::block::block_map::{Blake3Hash, blake3_128}; + +const BLOCK_SIZE: usize = 128 * 1024; + +#[tokio::main] +async fn main() { + let a: Vec = std::env::args().collect(); + let mode = a.get(1).map(String::as_str).unwrap_or("buffered"); + let l1_mb: usize = a.get(2).and_then(|s| s.parse().ok()).unwrap_or(2); + let hot_mb: usize = a.get(3).and_then(|s| s.parse().ok()).unwrap_or(32); + let total_mb: usize = a.get(4).and_then(|s| s.parse().ok()).unwrap_or(256); + let dir = a.get(5).cloned().unwrap_or_else(|| "/tmp/cache_hot".into()); + let direct = mode == "direct"; + + let num_total = total_mb * 1024 * 1024 / BLOCK_SIZE; + let num_hot = (hot_mb * 1024 * 1024 / BLOCK_SIZE).min(num_total); + + std::fs::create_dir_all(&dir).unwrap(); + let device = FsDeviceBuilder::new(Path::new(&dir)) + .with_capacity(num_total * BLOCK_SIZE * 2) + .with_direct(direct) + .build() + .expect("build device"); + let cache: HybridCache = HybridCacheBuilder::new() + .with_name("cache-hot-read") + .with_policy(HybridCachePolicy::WriteOnInsertion) + .memory(l1_mb * 1024 * 1024) + .with_eviction_config(EvictionConfig::S3Fifo(S3FifoConfig::default())) + .with_weighter(|_k: &Blake3Hash, v: &Bytes| v.len()) + .storage() + .with_engine_config(BlockEngineConfig::new(device)) + .with_io_engine_config(Box::new(foyer::PsyncIoEngineConfig::new()) as Box) + .with_recover_mode(RecoverMode::Quiet) + .build() + .await + .expect("build cache"); + + // Populate, recording hashes (block index -> hash). + let mut hashes = Vec::with_capacity(num_total); + let mut buf = vec![0u8; BLOCK_SIZE]; + for i in 0..num_total { + for (j, b) in buf.iter_mut().enumerate() { + *b = (i as u32).wrapping_mul(2654435761).wrapping_add(j as u32) as u8; + } + let h = blake3_128(&buf); + hashes.push(h); + cache.insert(h, Bytes::copy_from_slice(&buf)); + if i % 32 == 31 { + cache.storage().wait().await; + } + } + cache.storage().wait().await; + + // 90% of accesses hit the hot set (first num_hot blocks), 10% uniform. + let mut rng = rand::thread_rng(); + let pick = |rng: &mut rand::rngs::ThreadRng| -> usize { + if rng.gen_bool(0.9) { + rng.gen_range(0..num_hot) + } else { + rng.gen_range(0..num_total) + } + }; + + // Warm up so L1 converges to the hot set under this pattern. + for _ in 0..(num_total * 3) { + let _ = cache.get(&hashes[pick(&mut rng)]).await.expect("get"); + } + + // Timed run. + const ITERS: usize = 100_000; + let mut samples = Vec::with_capacity(ITERS); + for _ in 0..ITERS { + let h = hashes[pick(&mut rng)]; + let t = Instant::now(); + let got = cache.get(&h).await.expect("get"); + samples.push(t.elapsed()); + assert!(got.is_some(), "miss"); + } + samples.sort_unstable(); + let median = samples[ITERS / 2]; + let p99 = samples[ITERS * 99 / 100]; + let mean: Duration = samples.iter().sum::() / ITERS as u32; + + println!( + "mode={mode:<8} L1={l1_mb:>4}MiB hot={hot_mb}MiB total={total_mb}MiB | \ + median={:>8.2?} mean={:>8.2?} p99={:>8.2?}", + median, mean, p99 + ); + std::fs::remove_dir_all(&dir).ok(); +} diff --git a/glidefs/examples/cache_l2_poll.rs b/glidefs/examples/cache_l2_poll.rs new file mode 100644 index 00000000..d3c0fa9d --- /dev/null +++ b/glidefs/examples/cache_l2_poll.rs @@ -0,0 +1,131 @@ +//! Empirical probe: do io_uring's O_DIRECT-only knobs (iopoll / sqpoll) help the +//! foyer L2 (SSD) read path, and what do they cost in CPU? +//! +//! Builds an O_DIRECT foyer cache with the uring engine in {plain, iopoll, +//! sqpoll} mode, populates it, then times `storage().load()` (the pure media read +//! path) at a given concurrency -- reporting read latency AND the CPU time the +//! process burned during the timed window (polling modes spin, so watch stime). +//! +//! Usage: +//! cargo run --release --example cache_l2_poll -- + +use std::path::Path; +use std::time::{Duration, Instant}; + +use bytes::Bytes; +use foyer::{ + BlockEngineConfig, DeviceBuilder, EvictionConfig, FsDeviceBuilder, HybridCache, + HybridCacheBuilder, HybridCachePolicy, IoEngineConfig, RecoverMode, S3FifoConfig, + UringIoEngineConfig, +}; +use futures::future::join_all; +use rand::Rng; + +use glidefs::block::block_map::{Blake3Hash, blake3_128}; + +const BLOCK_SIZE: usize = 128 * 1024; +const NUM_BLOCKS: usize = 2048; // 256 MiB working set + +// CPU time (user+sys) consumed by this process so far, in seconds. +fn cpu_secs() -> (f64, f64) { + let stat = std::fs::read_to_string("/proc/self/stat").unwrap_or_default(); + // Fields 14 (utime) and 15 (stime) in clock ticks, after the ")" of comm. + let after = stat.rsplit(')').next().unwrap_or(""); + let f: Vec<&str> = after.split_whitespace().collect(); + let hz = 100.0; // USER_HZ + let utime = f.get(11).and_then(|s| s.parse::().ok()).unwrap_or(0.0); + let stime = f.get(12).and_then(|s| s.parse::().ok()).unwrap_or(0.0); + (utime / hz, stime / hz) +} + +#[tokio::main] +async fn main() { + let a: Vec = std::env::args().collect(); + let poll = a.get(1).map(String::as_str).unwrap_or("none"); + let conc: usize = a.get(2).and_then(|s| s.parse().ok()).unwrap_or(16); + let dir = a.get(3).cloned().unwrap_or_else(|| "/tmp/cache_l2_poll".into()); + + let mut uring = UringIoEngineConfig::new(); + match poll { + "none" => {} + "iopoll" => uring = uring.with_iopoll(true), + "sqpoll" => uring = uring.with_sqpoll(true), + other => panic!("poll must be none|iopoll|sqpoll, got {other}"), + } + + std::fs::create_dir_all(&dir).unwrap(); + let device = FsDeviceBuilder::new(Path::new(&dir)) + .with_capacity(NUM_BLOCKS * BLOCK_SIZE * 2) + .with_direct(true) // iopoll/sqpoll REQUIRE O_DIRECT + .build() + .expect("build O_DIRECT device"); + + let cache: HybridCache = match HybridCacheBuilder::new() + .with_name("cache-l2-poll") + .with_policy(HybridCachePolicy::WriteOnInsertion) + .memory(2 * 1024 * 1024) // tiny L1 -> reads hit L2 media + .with_eviction_config(EvictionConfig::S3Fifo(S3FifoConfig::default())) + .with_weighter(|_k: &Blake3Hash, v: &Bytes| v.len()) + .storage() + .with_engine_config(BlockEngineConfig::new(device)) + .with_io_engine_config(Box::new(uring) as Box) + .with_recover_mode(RecoverMode::Quiet) + .build() + .await + { + Ok(c) => c, + Err(e) => { + println!("poll={poll:<7} UNSUPPORTED: {e}"); + std::fs::remove_dir_all(&dir).ok(); + return; + } + }; + + // Populate; keep hashes. + let mut hashes = Vec::with_capacity(NUM_BLOCKS); + let mut buf = vec![0u8; BLOCK_SIZE]; + for i in 0..NUM_BLOCKS { + for (j, b) in buf.iter_mut().enumerate() { + *b = (i as u32).wrapping_mul(2654435761).wrapping_add(j as u32) as u8; + } + let h = blake3_128(&buf); + hashes.push(h); + cache.insert(h, Bytes::copy_from_slice(&buf)); + if i % 32 == 31 { + cache.storage().wait().await; + } + } + cache.storage().wait().await; + + // Timed: storage().load() (pure L2/media path), `conc` in flight per unit. + const UNITS: usize = 4000; + let mut rng = rand::thread_rng(); + let (u0, s0) = cpu_secs(); + let mut samples = Vec::with_capacity(UNITS); + for _ in 0..UNITS { + let keys: Vec = (0..conc).map(|_| hashes[rng.gen_range(0..hashes.len())]).collect(); + let t = Instant::now(); + let loads = join_all(keys.iter().map(|k| cache.storage().load(k))).await; + samples.push(t.elapsed()); + for l in loads { + assert!(matches!(l.expect("load"), foyer::Load::Entry { .. })); + } + } + let (u1, s1) = cpu_secs(); + samples.sort_unstable(); + let wall: Duration = samples.iter().sum(); + let per_read = wall / (UNITS * conc) as u32; + let median_batch = samples[UNITS / 2]; + let reads = (UNITS * conc) as f64; + let cpu_user = u1 - u0; + let cpu_sys = s1 - s0; + + println!( + "poll={poll:<7} conc={conc:<3} | per_read={per_read:>8.2?} batch_median={median_batch:>8.2?} \ + | CPU/read: user={:>5.2}us sys={:>5.2}us (wall={:.1}s)", + cpu_user / reads * 1e6, + cpu_sys / reads * 1e6, + wall.as_secs_f64(), + ); + std::fs::remove_dir_all(&dir).ok(); +} diff --git a/glidefs/examples/cache_ram.rs b/glidefs/examples/cache_ram.rs new file mode 100644 index 00000000..c467ad1f --- /dev/null +++ b/glidefs/examples/cache_ram.rs @@ -0,0 +1,136 @@ +//! Empirical probe for the buffered-vs-O_DIRECT RAM-ownership question. +//! +//! Builds a foyer `HybridCache` (mirroring the clean-cache +//! config) on a real directory, populates it, then reports the process RSS (which +//! includes foyer's explicit L1 memory tier). It then EXITS leaving the cache +//! files on disk so the page cache they hold survives -- inspect it with `fincore` +//! afterward. Buffered should show ~working-set bytes still in page cache (i.e. +//! double-cached on top of foyer's L1); O_DIRECT should show ~0. +//! +//! Usage: +//! cargo run --release --example cache_ram -- +//! +//! Then: +//! fincore --bytes --total /* ; rm -rf + +use std::path::Path; +use std::time::Instant; + +use bytes::Bytes; +use foyer::{ + BlockEngineConfig, DeviceBuilder, EvictionConfig, FsDeviceBuilder, HybridCache, + HybridCacheBuilder, HybridCachePolicy, IoEngineConfig, PsyncIoEngineConfig, RecoverMode, + S3FifoConfig, +}; + +use glidefs::block::block_map::{Blake3Hash, blake3_128}; +use rand::Rng; + +const BLOCK_SIZE: usize = 128 * 1024; + +fn vm_rss_kb() -> u64 { + let status = std::fs::read_to_string("/proc/self/status").unwrap_or_default(); + for line in status.lines() { + if let Some(rest) = line.strip_prefix("VmRSS:") { + return rest + .split_whitespace() + .next() + .and_then(|v| v.parse().ok()) + .unwrap_or(0); + } + } + 0 +} + +#[tokio::main] +async fn main() { + let args: Vec = std::env::args().collect(); + let mode = args.get(1).map(String::as_str).unwrap_or("buffered"); + let mem_mb: usize = args.get(2).and_then(|s| s.parse().ok()).unwrap_or(64); + let num_blocks: usize = args.get(3).and_then(|s| s.parse().ok()).unwrap_or(2048); + let dir = args + .get(4) + .cloned() + .unwrap_or_else(|| "/tmp/cache_ram_probe".to_string()); + // If > 0, after populating keep reading the L2 tier for this many seconds + // (used as an "active cache" antagonist in the tenant-interference test). + let soak_secs: u64 = args.get(5).and_then(|s| s.parse().ok()).unwrap_or(0); + + let direct = match mode { + "buffered" => false, + "direct" => true, + other => panic!("mode must be buffered|direct, got {other}"), + }; + + let mem_bytes = mem_mb * 1024 * 1024; + let ssd_bytes = (num_blocks * BLOCK_SIZE) * 2; // headroom + let working_set_mb = (num_blocks * BLOCK_SIZE) / (1024 * 1024); + + std::fs::create_dir_all(&dir).unwrap(); + let device = FsDeviceBuilder::new(Path::new(&dir)) + .with_capacity(ssd_bytes) + .with_direct(direct) + .build() + .expect("build device (O_DIRECT not supported here?)"); + + let cache: HybridCache = HybridCacheBuilder::new() + .with_name("cache-ram-probe") + .with_policy(HybridCachePolicy::WriteOnInsertion) + .memory(mem_bytes) + .with_eviction_config(EvictionConfig::S3Fifo(S3FifoConfig::default())) + .with_weighter(|_k: &Blake3Hash, v: &Bytes| v.len()) + .storage() + .with_engine_config(BlockEngineConfig::new(device)) + .with_io_engine_config(Box::new(PsyncIoEngineConfig::new()) as Box) + .with_recover_mode(RecoverMode::Quiet) + .build() + .await + .expect("build hybrid cache"); + + let rss_before = vm_rss_kb(); + + // Populate in batches, flushing so everything lands on the SSD tier. + let mut hashes = Vec::with_capacity(num_blocks); + let mut data = vec![0u8; BLOCK_SIZE]; + for i in 0..num_blocks { + // Cheap deterministic fill; distinct per block so hashes differ. + for (j, b) in data.iter_mut().enumerate() { + *b = (i as u8).wrapping_add(j as u8).wrapping_mul(31); + } + let hash = blake3_128(&data); + hashes.push(hash); + cache.insert(hash, Bytes::copy_from_slice(&data)); + if i % 32 == 31 { + cache.storage().wait().await; + } + } + cache.storage().wait().await; + + let rss_after = vm_rss_kb(); + + println!("mode={mode} direct={direct} L1_mem={mem_mb}MiB working_set={working_set_mb}MiB"); + println!( + "process RSS: before={} MiB after={} MiB (delta={} MiB)", + rss_before / 1024, + rss_after / 1024, + (rss_after.saturating_sub(rss_before)) / 1024, + ); + println!("cache dir: {dir}"); + println!("--> now run: fincore --bytes --total {dir}/*"); + + if soak_secs > 0 { + // Continuously read the L2 tier so its footprint stays hot. In buffered + // mode this keeps ~working-set bytes resident in the page cache (the + // antagonist); in direct mode it touches the device, not the page cache. + println!("SOAKING for {soak_secs}s"); // readiness marker for the harness + let mut rng = rand::thread_rng(); + let deadline = Instant::now() + std::time::Duration::from_secs(soak_secs); + while Instant::now() < deadline { + for _ in 0..256 { + let h = hashes[rng.gen_range(0..hashes.len())]; + let _ = cache.storage().load(&h).await; + } + } + } + // Exit without dropping caches; files persist for fincore inspection. +} diff --git a/glidefs/examples/interference_test.sh b/glidefs/examples/interference_test.sh new file mode 100755 index 00000000..4a33b761 --- /dev/null +++ b/glidefs/examples/interference_test.sh @@ -0,0 +1,67 @@ +#!/usr/bin/env bash +# Claim 5: does a buffered foyer cache steal page cache from a co-resident tenant? +# +# An fio "tenant" (buffered random reads over a file it wants page-cached) runs +# alone, then alongside a foyer antagonist in buffered vs O_DIRECT mode -- all +# inside a memory-capped *user* cgroup scope (no sudo), so it can't disturb the +# real tenants on this box. +# +# buffered antagonist -> ~2 GiB page cache competes with fio's 2 GiB in a 3 GiB +# cap -> fio's pages evicted -> fio slows down. +# direct antagonist -> ~0 page cache -> fio keeps its working set -> fast. +set -uo pipefail + +ROOT=/home/jared/glidefs +ANTAG=$ROOT/target/release/examples/cache_ram +WORK=$ROOT/target/interference +FIOFILE=$WORK/tenant.dat +ANTAG_DIR=$WORK/antag_cache +INNER=$WORK/_inner.sh +CAP=${CAP:-3G} +FIO_SIZE=${FIO_SIZE:-2G} +ANTAG_BLOCKS=${ANTAG_BLOCKS:-16384} # 16384 * 128KiB = 2 GiB working set +SOAK=${SOAK:-60} + +mkdir -p "$WORK" +[ -f "$FIOFILE" ] || fio --name=prep --filename="$FIOFILE" --size="$FIO_SIZE" \ + --rw=write --bs=1M --ioengine=psync --direct=1 >/dev/null 2>&1 + +# Inner runner executed *inside* the capped scope: args = label mode +cat > "$INNER" <<'INNER_EOF' +#!/usr/bin/env bash +set -uo pipefail +label=$1 mode=$2 +ANTAG=$3 BLOCKS=$4 ADIR=$5 SOAK=$6 FIOFILE=$7 FIO_SIZE=$8 WORK=$9 +if [ "$mode" != none ]; then + "$ANTAG" "$mode" 64 "$BLOCKS" "$ADIR" "$SOAK" > "$WORK/antag.log" 2>&1 & + ap=$! + while ! grep -q SOAKING "$WORK/antag.log" 2>/dev/null; do + kill -0 "$ap" 2>/dev/null || { echo "$label ANTAGONIST DIED"; tail -2 "$WORK/antag.log"; exit 1; } + sleep 0.5 + done + sleep 2 # let the soak warm the page cache +fi +# Warm the tenant file into page cache *inside this cgroup*, then read without +# invalidating it (fio defaults to invalidate=1, which would drop it cold). +dd if="$FIOFILE" of=/dev/null bs=1M 2>/dev/null +fio --name=tenant --filename="$FIOFILE" --size="$FIO_SIZE" --rw=randread --bs=128k \ + --ioengine=psync --direct=0 --invalidate=0 --runtime=20 --time_based --numjobs=1 \ + --group_reporting --output-format=json 2>/dev/null \ +| jq -r --arg l "$label" '.jobs[0].read | + "\($l)\tIOPS=\(.iops|floor)\tclat_mean=\(.clat_ns.mean/1000|floor)us\tp99=\(.clat_ns.percentile."99.000000"/1000|floor)us\tBW=\(.bw/1024|floor)MiB/s"' +[ "$mode" != none ] && kill "${ap:-0}" 2>/dev/null +exit 0 +INNER_EOF +chmod +x "$INNER" + +scenario() { # $1=label $2=mode + rm -rf "$ANTAG_DIR" + systemd-run --user --scope -q -p MemoryMax=$CAP \ + bash "$INNER" "$1" "$2" "$ANTAG" "$ANTAG_BLOCKS" "$ANTAG_DIR" "$SOAK" "$FIOFILE" "$FIO_SIZE" "$WORK" +} + +echo "=== Claim 5: tenant interference (cap=$CAP, fio=$FIO_SIZE, antagonist=2GiB) ===" +scenario "baseline (fio alone)" none +scenario "+ buffered antagonist" buffered +scenario "+ O_DIRECT antagonist" direct +rm -rf "$ANTAG_DIR" "$INNER" diff --git a/glidefs/src/block/api.rs b/glidefs/src/block/api.rs index 716244bf..d88a2d60 100644 --- a/glidefs/src/block/api.rs +++ b/glidefs/src/block/api.rs @@ -1205,6 +1205,7 @@ mod tests { cache_dir: temp_dir.path().to_path_buf(), block_size: 128 * 1024, clean_cache: Arc::new(SimpleBlockCache::new(64 * 1024 * 1024)), + pack_index_cache: None, wal_sync: false, max_s3_uploads: 0, max_s3_downloads: 0, @@ -1469,6 +1470,7 @@ mod tests { cache_dir: temp_dir.path().to_path_buf(), block_size: 128 * 1024, clean_cache: Arc::new(SimpleBlockCache::new(64 * 1024 * 1024)), + pack_index_cache: None, wal_sync: false, max_s3_uploads: 0, max_s3_downloads: 0, diff --git a/glidefs/src/block/cache.rs b/glidefs/src/block/cache.rs index 46911900..91cf616c 100644 --- a/glidefs/src/block/cache.rs +++ b/glidefs/src/block/cache.rs @@ -42,12 +42,17 @@ use foyer::{ /// Configuration for the foyer-backed block cache. pub struct FoyerCacheConfig { - /// Memory tier capacity in bytes. + /// Memory tier (L1) capacity in bytes. With `direct`, this is the *only* RAM + /// cache, so it should cover the hot working set (an under-sized L1 + O_DIRECT + /// pays full media latency on misses). pub memory_bytes: usize, /// SSD tier capacity in bytes. pub ssd_bytes: usize, /// Directory for SSD cache files. pub ssd_dir: PathBuf, + /// Prefer `O_DIRECT` for the SSD tier (bypass the page cache). Falls back to + /// buffered if the backing filesystem doesn't support it (e.g. tmpfs). + pub direct: bool, } /// Production block cache backed by foyer's HybridCache. @@ -70,10 +75,21 @@ impl FoyerBlockCache { let ssd_dir = &config.ssd_dir; let ssd_bytes = config.ssd_bytes; let memory_bytes = config.memory_bytes; + // O_DIRECT only if requested AND the backing FS supports it (tmpfs etc. + // EINVAL), so the neighborly default degrades to buffered instead of + // failing to start. + let direct = config.direct && super::foyer_engine::o_direct_supported(ssd_dir); + if config.direct && !direct { + tracing::warn!( + dir = %ssd_dir.display(), + "clean cache: O_DIRECT unsupported on this filesystem, using buffered I/O" + ); + } let inner = super::foyer_engine::build_preferring_uring("glidefs-clean-cache", |engine| { async move { let device = FsDeviceBuilder::new(ssd_dir) .with_capacity(ssd_bytes) + .with_direct(direct) .build()?; let storage = HybridCacheBuilder::new() .with_name("glidefs-clean-cache") @@ -272,6 +288,7 @@ mod tests { memory_bytes, ssd_bytes, ssd_dir: dir.path().to_path_buf(), + direct: false, }) .await .expect("failed to open foyer cache") @@ -294,6 +311,27 @@ mod tests { assert!(cache.get(&make_hash(99)).await.is_none()); } + /// `direct: true` must never fail to open — it degrades to buffered I/O on + /// filesystems without O_DIRECT support (e.g. tmpfs, common in CI temp dirs). + /// This guards the neighborly default from crashing startup. + #[tokio::test] + async fn test_foyer_direct_falls_back_gracefully() { + let dir = tempfile::tempdir().unwrap(); + let cache = FoyerBlockCache::open(FoyerCacheConfig { + memory_bytes: 4 * 1024 * 1024, + ssd_bytes: 16 * 1024 * 1024, + ssd_dir: dir.path().to_path_buf(), + direct: true, + }) + .await + .expect("direct=true must open (falling back to buffered if unsupported)"); + + let hash = make_hash(7); + let data = Bytes::from(vec![0xBB; 4096]); + cache.insert(hash, data.clone()); + assert_eq!(cache.get(&hash).await.expect("round-trips"), data); + } + #[tokio::test] async fn test_foyer_trait_object() { // Verify FoyerBlockCache works through Arc. diff --git a/glidefs/src/block/foyer_engine.rs b/glidefs/src/block/foyer_engine.rs index c19f9a36..861c14a9 100644 --- a/glidefs/src/block/foyer_engine.rs +++ b/glidefs/src/block/foyer_engine.rs @@ -9,9 +9,42 @@ //! automatic. use std::future::Future; +use std::path::Path; use foyer::{HybridCache, IoEngineConfig, StorageKey, StorageValue}; +/// Whether the filesystem backing `dir` supports `O_DIRECT`. +/// +/// O_DIRECT lets the cache bypass the OS page cache, so it stops double-caching +/// data foyer already holds and stops evicting co-resident tenants' pages (see +/// `benches/` measurements). But it `EINVAL`s on filesystems that don't support +/// it (notably tmpfs, common for `/tmp` cache dirs in dev/CI), so we probe before +/// committing and let callers fall back to buffered I/O. The probe opens and +/// removes a tiny file with `O_DIRECT`; the result is used immediately, so TOCTOU +/// is not a concern. +pub fn o_direct_supported(dir: &Path) -> bool { + #[cfg(target_os = "linux")] + { + use std::os::unix::fs::OpenOptionsExt; + let probe = dir.join(".glidefs_odirect_probe"); + let ok = std::fs::OpenOptions::new() + .read(true) + .write(true) + .create(true) + .truncate(true) + .custom_flags(libc::O_DIRECT) + .open(&probe) + .is_ok(); + let _ = std::fs::remove_file(&probe); + ok + } + #[cfg(not(target_os = "linux"))] + { + let _ = dir; + false + } +} + /// Build a foyer [`HybridCache`], preferring the io_uring I/O engine on Linux and /// degrading gracefully to foyer's default psync engine. /// diff --git a/glidefs/src/block/pack_index_cache.rs b/glidefs/src/block/pack_index_cache.rs index f582c70a..8ab95649 100644 --- a/glidefs/src/block/pack_index_cache.rs +++ b/glidefs/src/block/pack_index_cache.rs @@ -79,19 +79,24 @@ pub struct PackIndexCache { impl PackIndexCache { /// Open the pack index cache with default tier sizes (64MB memory, 2GB SSD). + /// Open with default tier sizes and buffered I/O (for ephemeral/CLI use). pub async fn open(cache_dir: &Path) -> anyhow::Result { - Self::open_with_sizes(cache_dir, DEFAULT_MEMORY_BYTES, DEFAULT_SSD_BYTES).await + Self::open_with_sizes(cache_dir, DEFAULT_MEMORY_BYTES, DEFAULT_SSD_BYTES, false).await } - /// Open with custom tier sizes. + /// Open with custom tier sizes. `direct` prefers `O_DIRECT` (falls back to + /// buffered if the backing filesystem doesn't support it). pub async fn open_with_sizes( cache_dir: &Path, memory_bytes: usize, ssd_bytes: usize, + direct: bool, ) -> anyhow::Result { let dir = cache_dir.join("foyer_pack_index_v2"); std::fs::create_dir_all(&dir)?; + let direct = direct && crate::block::foyer_engine::o_direct_supported(&dir); + // Prefer the io_uring I/O engine on Linux (falls back to psync); the // builder closure is reconstructed per attempt because foyer's device and // storage builders are consuming. @@ -102,6 +107,7 @@ impl PackIndexCache { async move { let device = FsDeviceBuilder::new(dir) .with_capacity(ssd_bytes) + .with_direct(direct) .build()?; let storage = HybridCacheBuilder::new() .with_name("glidefs-pack-index-cache") @@ -303,7 +309,7 @@ mod tests { /// Small cache sizes for tests (foyer needs real directories). async fn open_test_cache(dir: &Path) -> PackIndexCache { - PackIndexCache::open_with_sizes(dir, 16 * 1024 * 1024, 64 * 1024 * 1024) + PackIndexCache::open_with_sizes(dir, 16 * 1024 * 1024, 64 * 1024 * 1024, false) .await .expect("failed to open test cache") } diff --git a/glidefs/src/block/router.rs b/glidefs/src/block/router.rs index 379fe339..1f76e7de 100644 --- a/glidefs/src/block/router.rs +++ b/glidefs/src/block/router.rs @@ -255,6 +255,9 @@ pub struct RouterConfig { pub block_size: usize, /// Shared block cache for decompressed block data pub clean_cache: Arc, + /// Pre-built pack-index cache (with the server's O_DIRECT/sizing policy). When + /// `None`, the router opens a buffered, default-sized one (CLI/test path). + pub pack_index_cache: Option>, /// Whether to fsync the WAL after each write batch pub wal_sync: bool, /// Max concurrent S3 pack uploads across all exports (0 = unlimited). @@ -621,11 +624,14 @@ impl ExportRouter { .as_ref() .map(|s| Arc::new(tokio::sync::Semaphore::new(s.available_permits()))); - let pack_index_cache = Arc::new( - PackIndexCache::open(&config.cache_dir) - .await - .map_err(|e| RouterError::Manifest(format!("pack index cache: {e}")))?, - ); + let pack_index_cache = match config.pack_index_cache { + Some(c) => c, + None => Arc::new( + PackIndexCache::open(&config.cache_dir) + .await + .map_err(|e| RouterError::Manifest(format!("pack index cache: {e}")))?, + ), + }; let manifest_cache: foyer::Cache> = foyer::Cache::builder(config.manifest_cache_bytes) @@ -2988,6 +2994,7 @@ impl ExportRouter { cache_dir, block_size: 128 * 1024, clean_cache: Arc::new(SimpleBlockCache::new(256 * 1024 * 1024)), + pack_index_cache: None, wal_sync: false, max_s3_uploads: 0, max_s3_downloads: 0, @@ -3159,6 +3166,7 @@ mod tests { cache_dir: temp.path().to_path_buf(), block_size: 128 * 1024, clean_cache: Arc::new(SimpleBlockCache::new(64 * 1024 * 1024)), + pack_index_cache: None, wal_sync: false, max_s3_uploads: 0, max_s3_downloads: 0, @@ -3242,6 +3250,7 @@ mod tests { cache_dir: temp_dir.path().to_path_buf(), block_size: 128 * 1024, clean_cache: Arc::new(SimpleBlockCache::new(256 * 1024 * 1024)), + pack_index_cache: None, wal_sync: false, max_s3_uploads: 0, max_s3_downloads: 0, @@ -4805,6 +4814,7 @@ mod tests { cache_dir: temp_dir.path().to_path_buf(), block_size: 128 * 1024, clean_cache: Arc::new(SimpleBlockCache::new(256 * 1024 * 1024)), + pack_index_cache: None, wal_sync: false, max_s3_uploads: 0, max_s3_downloads: 0, @@ -4843,6 +4853,7 @@ mod tests { cache_dir: temp_dir2.path().to_path_buf(), block_size: 128 * 1024, clean_cache: Arc::new(SimpleBlockCache::new(256 * 1024 * 1024)), + pack_index_cache: None, wal_sync: false, max_s3_uploads: 0, max_s3_downloads: 0, @@ -4892,6 +4903,7 @@ mod tests { cache_dir: temp_dir.path().join("parent"), block_size: 128 * 1024, clean_cache: Arc::new(SimpleBlockCache::new(256 * 1024 * 1024)), + pack_index_cache: None, wal_sync: false, max_s3_uploads: 0, max_s3_downloads: 0, @@ -4923,6 +4935,7 @@ mod tests { cache_dir: temp_dir.path().join("child"), block_size: 128 * 1024, clean_cache: Arc::new(SimpleBlockCache::new(256 * 1024 * 1024)), + pack_index_cache: None, wal_sync: false, max_s3_uploads: 0, max_s3_downloads: 0, @@ -5030,6 +5043,7 @@ mod tests { cache_dir: reader_dir.path().to_path_buf(), block_size: 128 * 1024, clean_cache: Arc::new(SimpleBlockCache::new(256 * 1024 * 1024)), + pack_index_cache: None, wal_sync: false, max_s3_uploads: 0, max_s3_downloads: 0, diff --git a/glidefs/src/cli/bless.rs b/glidefs/src/cli/bless.rs index 9aa1c36f..5a6cba95 100644 --- a/glidefs/src/cli/bless.rs +++ b/glidefs/src/cli/bless.rs @@ -293,6 +293,7 @@ pub async fn run_bless_oci( memory_bytes: 4 * 1024 * 1024, ssd_bytes: 16 * 1024 * 1024, ssd_dir: foyer_dir, + direct: false, // ephemeral CLI cache; buffered is fine }) .await .context("failed to open block cache")?, diff --git a/glidefs/src/cli/push.rs b/glidefs/src/cli/push.rs index 38de507f..e1eab996 100644 --- a/glidefs/src/cli/push.rs +++ b/glidefs/src/cli/push.rs @@ -163,6 +163,7 @@ async fn load_readonly_handler( memory_bytes: 64 * 1024 * 1024, ssd_bytes: 256 * 1024 * 1024, ssd_dir: foyer_dir, + direct: false, // ephemeral CLI cache; buffered is fine }) .await .context("failed to open block cache")?, diff --git a/glidefs/src/cli/server.rs b/glidefs/src/cli/server.rs index ad76c5bf..742c9402 100644 --- a/glidefs/src/cli/server.rs +++ b/glidefs/src/cli/server.rs @@ -95,16 +95,18 @@ pub async fn build_router_only(config_path: PathBuf) -> Result { .as_ref() .ok_or_else(|| anyhow::anyhow!("NBD server configuration is required"))?; - // Shared clean block cache — foyer HybridCache with memory + SSD tiers - let memory_bytes = - (settings.cache.memory_size_gb.unwrap_or(1.0) * 1024.0 * 1024.0 * 1024.0) as usize; - let ssd_bytes = - (settings.cache.ssd_cache_size_gb.unwrap_or(10.0) * 1024.0 * 1024.0 * 1024.0) as usize; + // Shared clean block cache — foyer HybridCache with memory + SSD tiers. + // O_DIRECT by default (bypass page cache so we don't double-cache or evict + // co-resident tenants' RAM); degrades to buffered on filesystems that lack it. + let memory_bytes = settings.cache.clean_cache_memory_bytes(); + let ssd_bytes = settings.cache.clean_cache_ssd_bytes(); + let direct = settings.cache.use_direct(); let foyer_dir = cache_dir.join("foyer"); info!( - "Opening clean cache: {}MB memory, {}GB SSD at {}", + "Opening clean cache: {}MB memory (L1), {}GB SSD, direct={} at {}", memory_bytes / (1024 * 1024), ssd_bytes / (1024 * 1024 * 1024), + direct, foyer_dir.display(), ); let clean_cache: Arc = Arc::new( @@ -112,11 +114,24 @@ pub async fn build_router_only(config_path: PathBuf) -> Result { memory_bytes, ssd_bytes, ssd_dir: foyer_dir, + direct, }) .await .context("Failed to open foyer clean cache")?, ); + // Pack-index cache (same O_DIRECT policy + configurable tiers). + let pack_index_cache = Arc::new( + crate::block::pack_index_cache::PackIndexCache::open_with_sizes( + &cache_dir, + settings.cache.pack_index_memory_bytes(), + settings.cache.pack_index_ssd_bytes(), + direct, + ) + .await + .context("Failed to open pack index cache")?, + ); + let db_path_for_prewarm = db_path.clone(); let router = Arc::new( ExportRouter::new(crate::block::router::RouterConfig { @@ -125,6 +140,7 @@ pub async fn build_router_only(config_path: PathBuf) -> Result { cache_dir, block_size: nbd_config.block_size(), clean_cache, + pack_index_cache: Some(pack_index_cache), wal_sync: nbd_config.wal_sync(), max_s3_uploads: nbd_config.max_s3_uploads(), max_s3_downloads: nbd_config.max_s3_downloads(), diff --git a/glidefs/src/config.rs b/glidefs/src/config.rs index ab2864a7..bb085d5e 100644 --- a/glidefs/src/config.rs +++ b/glidefs/src/config.rs @@ -58,11 +58,82 @@ pub struct CacheConfig { #[serde(deserialize_with = "deserialize_expandable_path")] pub dir: PathBuf, pub disk_size_gb: f64, + /// Clean-cache memory tier (L1) in GB. This is a **RAM budget**, not a + /// performance dial: with `direct`, L1 is pinned, non-reclaimable host RAM that + /// can't be sold to tenants (unlike the page cache, which is elastic and + /// reclaimable). The default ([`DEFAULT_L1_BYTES`]) is sized to hold the + /// deduplicated shared base (pre-fork/layered, content-addressed → one copy) + /// plus a few hot tenants' warm tails — independent of node RAM, since dedup + /// makes the working set roughly constant across node sizes. Raise it per-node + /// if you knowingly pack more hot tenants; watch the L1 hit-rate metric to + /// know if it's right. Blocks beyond L1 fall to the O_DIRECT SSD tier + /// (~220 µs) — still ~1000× faster than the S3 miss it replaces. #[serde(skip_serializing_if = "Option::is_none")] pub memory_size_gb: Option, /// SSD tier capacity for foyer clean cache in GB (default: 10GB). #[serde(skip_serializing_if = "Option::is_none")] pub ssd_cache_size_gb: Option, + /// Pack-index cache memory tier in GB (default: 64 MiB). + #[serde(skip_serializing_if = "Option::is_none")] + pub pack_index_memory_size_gb: Option, + /// Pack-index cache SSD tier in GB (default: 2 GiB). + #[serde(skip_serializing_if = "Option::is_none")] + pub pack_index_ssd_size_gb: Option, + /// Use `O_DIRECT` for the on-disk cache tiers (default: true). Bypasses the OS + /// page cache so the cache doesn't double-cache data or evict co-resident + /// tenants' pages. Automatically falls back to buffered I/O on filesystems + /// that don't support O_DIRECT (e.g. tmpfs). + #[serde(skip_serializing_if = "Option::is_none")] + pub direct: Option, +} + +const GIB: f64 = 1024.0 * 1024.0 * 1024.0; + +/// Default clean-cache L1 (memory tier) budget: a flat 2 GiB. +/// +/// Deliberately a **flat budget, not a fraction of RAM** — every byte of L1 is +/// pinned, non-reclaimable, non-chargeable host RAM, and a RAM-fraction would +/// grow that unsellable reservation as nodes get bigger for no benefit. The +/// working set L1 must hold is set by the *deduplicated* content (one copy of the +/// shared base) plus a few hot tenants' warm tails — roughly constant regardless +/// of node size. 2 GiB comfortably covers that common case; operators with more +/// hot tenants raise `memory_size_gb` and confirm via the L1 hit-rate metric. +pub const DEFAULT_L1_BYTES: usize = 2 * 1024 * 1024 * 1024; + +impl CacheConfig { + /// Clean-cache memory tier (L1) bytes: explicit override or the default budget. + pub fn clean_cache_memory_bytes(&self) -> usize { + match self.memory_size_gb { + Some(gb) => (gb * GIB) as usize, + None => DEFAULT_L1_BYTES, + } + } + + /// Clean-cache SSD tier bytes (default 10 GiB). + pub fn clean_cache_ssd_bytes(&self) -> usize { + (self.ssd_cache_size_gb.unwrap_or(10.0) * GIB) as usize + } + + /// Pack-index cache memory tier bytes (default 64 MiB). + pub fn pack_index_memory_bytes(&self) -> usize { + match self.pack_index_memory_size_gb { + Some(gb) => (gb * GIB) as usize, + None => 64 * 1024 * 1024, + } + } + + /// Pack-index cache SSD tier bytes (default 2 GiB). + pub fn pack_index_ssd_bytes(&self) -> usize { + match self.pack_index_ssd_size_gb { + Some(gb) => (gb * GIB) as usize, + None => 2 * 1024 * 1024 * 1024, + } + } + + /// Whether to prefer `O_DIRECT` for on-disk cache tiers (default true). + pub fn use_direct(&self) -> bool { + self.direct.unwrap_or(true) + } } #[derive(Debug, Deserialize, Serialize, Clone)] @@ -583,6 +654,20 @@ impl Settings { ssd ); } + if let Some(mem) = self.cache.pack_index_memory_size_gb { + anyhow::ensure!( + mem.is_finite() && mem > 0.0, + "cache.pack_index_memory_size_gb must be a finite number > 0, got {}", + mem + ); + } + if let Some(ssd) = self.cache.pack_index_ssd_size_gb { + anyhow::ensure!( + ssd.is_finite() && ssd > 0.0, + "cache.pack_index_ssd_size_gb must be a finite number > 0, got {}", + ssd + ); + } // Storage timeout validation if let Some(t) = self.storage.connect_timeout_secs { @@ -714,6 +799,9 @@ impl Settings { disk_size_gb: 10.0, memory_size_gb: Some(1.0), ssd_cache_size_gb: None, + pack_index_memory_size_gb: None, + pack_index_ssd_size_gb: None, + direct: None, }, storage: StorageConfig { url: "s3://your-bucket/glidefs-data".to_string(), diff --git a/glidefs/tests/blktests.rs b/glidefs/tests/blktests.rs index db519027..faad9cd8 100644 --- a/glidefs/tests/blktests.rs +++ b/glidefs/tests/blktests.rs @@ -192,6 +192,7 @@ mod blktests { cache_dir: cache_dir.path().to_path_buf(), block_size: 128 * 1024, clean_cache, + pack_index_cache: None, wal_sync: false, max_s3_uploads: 128, max_s3_downloads: 512, diff --git a/glidefs/tests/docker_integration/bless_api.rs b/glidefs/tests/docker_integration/bless_api.rs index de8ee1ac..10eba2dd 100644 --- a/glidefs/tests/docker_integration/bless_api.rs +++ b/glidefs/tests/docker_integration/bless_api.rs @@ -230,6 +230,7 @@ async fn create_test_router( cache_dir: dir.path().to_path_buf(), block_size: 131_072, // 128 KB — matches bless default clean_cache: Arc::new(SimpleBlockCache::new(64 * 1024 * 1024)), + pack_index_cache: None, wal_sync: false, max_s3_uploads: 0, max_s3_downloads: 0, diff --git a/glidefs/tests/docker_integration/mod.rs b/glidefs/tests/docker_integration/mod.rs index 79df8401..4ddf7997 100644 --- a/glidefs/tests/docker_integration/mod.rs +++ b/glidefs/tests/docker_integration/mod.rs @@ -496,6 +496,7 @@ impl TestServer { cache_dir: cache_dir.path().to_path_buf(), block_size: 128 * 1024, clean_cache, + pack_index_cache: None, wal_sync: false, max_s3_uploads: 128, max_s3_downloads: 512, @@ -589,6 +590,7 @@ impl TestServer { cache_dir: cache_dir.clone(), block_size: 128 * 1024, clean_cache, + pack_index_cache: None, wal_sync: false, max_s3_uploads: 128, max_s3_downloads: 512, diff --git a/glidefs/tests/fio_bench.rs b/glidefs/tests/fio_bench.rs index c411a9a8..952837dd 100644 --- a/glidefs/tests/fio_bench.rs +++ b/glidefs/tests/fio_bench.rs @@ -51,6 +51,7 @@ mod fio_bench { cache_dir: cache_dir.path().to_path_buf(), block_size: 128 * 1024, clean_cache, + pack_index_cache: None, wal_sync: false, max_s3_uploads: 128, max_s3_downloads: 512, diff --git a/glidefs/tests/fio_verify.rs b/glidefs/tests/fio_verify.rs index a6dcb590..d06cad0a 100644 --- a/glidefs/tests/fio_verify.rs +++ b/glidefs/tests/fio_verify.rs @@ -69,6 +69,7 @@ mod fio_verify { cache_dir: cache_dir.path().to_path_buf(), block_size: 128 * 1024, clean_cache, + pack_index_cache: None, wal_sync: false, max_s3_uploads: 128, max_s3_downloads: 512, @@ -336,6 +337,7 @@ mod fio_verify { cache_dir: cache_dir2.path().to_path_buf(), block_size: 128 * 1024, clean_cache, + pack_index_cache: None, wal_sync: false, max_s3_uploads: 128, max_s3_downloads: 512, @@ -469,6 +471,7 @@ mod fio_verify { cache_dir: cache_dir2.path().to_path_buf(), block_size: 128 * 1024, clean_cache, + pack_index_cache: None, wal_sync: false, max_s3_uploads: 128, max_s3_downloads: 512, @@ -578,6 +581,7 @@ mod fio_verify { cache_dir: cache_dir2.path().to_path_buf(), block_size: 128 * 1024, clean_cache, + pack_index_cache: None, wal_sync: false, max_s3_uploads: 128, max_s3_downloads: 512, diff --git a/glidefs/tests/integration/chunk_prefetch.rs b/glidefs/tests/integration/chunk_prefetch.rs index 0fd0b582..b38159a7 100644 --- a/glidefs/tests/integration/chunk_prefetch.rs +++ b/glidefs/tests/integration/chunk_prefetch.rs @@ -27,6 +27,7 @@ async fn test_prefetch_chunk_metas_cold_start() { cache_dir: dir1.path().to_path_buf(), block_size: BLOCK_SIZE, clean_cache: Arc::new(SimpleBlockCache::new(64 * 1024 * 1024)), + pack_index_cache: None, wal_sync: false, max_s3_uploads: 0, max_s3_downloads: 0, @@ -81,6 +82,7 @@ async fn test_prefetch_chunk_metas_cold_start() { cache_dir: dir2.path().to_path_buf(), block_size: BLOCK_SIZE, clean_cache: Arc::new(SimpleBlockCache::new(64 * 1024 * 1024)), + pack_index_cache: None, wal_sync: false, max_s3_uploads: 0, max_s3_downloads: 0, @@ -146,6 +148,7 @@ async fn test_prefetch_empty_router() { cache_dir: dir.path().to_path_buf(), block_size: BLOCK_SIZE, clean_cache: Arc::new(SimpleBlockCache::new(64 * 1024 * 1024)), + pack_index_cache: None, wal_sync: false, max_s3_uploads: 0, max_s3_downloads: 0, diff --git a/glidefs/tests/integration/data_safety.rs b/glidefs/tests/integration/data_safety.rs index 99786d61..78118aac 100644 --- a/glidefs/tests/integration/data_safety.rs +++ b/glidefs/tests/integration/data_safety.rs @@ -2191,6 +2191,7 @@ async fn test_cold_wake_stress_concurrent_writes() { cache_dir: cache_dir1.path().to_path_buf(), block_size: BLOCK_SIZE, clean_cache, + pack_index_cache: None, wal_sync: false, max_s3_uploads: 128, max_s3_downloads: 512, @@ -2258,6 +2259,7 @@ async fn test_cold_wake_stress_concurrent_writes() { cache_dir: cache_dir2.path().to_path_buf(), block_size: BLOCK_SIZE, clean_cache: clean_cache2, + pack_index_cache: None, wal_sync: false, max_s3_uploads: 128, max_s3_downloads: 512, @@ -2535,6 +2537,7 @@ async fn test_concurrent_sub_block_writes_same_not_present_block() { cache_dir: cache_dir1.path().to_path_buf(), block_size: BLOCK_SIZE, clean_cache: clean_cache1, + pack_index_cache: None, wal_sync: false, max_s3_uploads: 128, max_s3_downloads: 512, @@ -2590,6 +2593,7 @@ async fn test_concurrent_sub_block_writes_same_not_present_block() { cache_dir: cache_dir2.path().to_path_buf(), block_size: BLOCK_SIZE, clean_cache: clean_cache2, + pack_index_cache: None, wal_sync: false, max_s3_uploads: 128, max_s3_downloads: 512, diff --git a/glidefs/tests/integration/flush_ordering.rs b/glidefs/tests/integration/flush_ordering.rs index 9632d851..3155b9e8 100644 --- a/glidefs/tests/integration/flush_ordering.rs +++ b/glidefs/tests/integration/flush_ordering.rs @@ -44,6 +44,7 @@ fn create_router_config(s3: Arc, dir: &TempDir) -> RouterConfig cache_dir: dir.path().to_path_buf(), block_size: BLOCK_SIZE, clean_cache: Arc::new(SimpleBlockCache::new(64 * 1024 * 1024)), + pack_index_cache: None, wal_sync: true, max_s3_uploads: 0, max_s3_downloads: 0, diff --git a/glidefs/tests/integration/flush_safety.rs b/glidefs/tests/integration/flush_safety.rs index 5d3fedd6..2bb97a10 100644 --- a/glidefs/tests/integration/flush_safety.rs +++ b/glidefs/tests/integration/flush_safety.rs @@ -127,6 +127,7 @@ fn create_router_config(s3: Arc, dir: &TempDir) -> RouterConfig cache_dir: dir.path().to_path_buf(), block_size: BLOCK_SIZE, clean_cache: Arc::new(SimpleBlockCache::new(64 * 1024 * 1024)), + pack_index_cache: None, wal_sync: false, max_s3_uploads: 0, max_s3_downloads: 0, @@ -998,6 +999,7 @@ async fn test_readahead_prefetches_next_pack_index() { cache_dir: dir.path().to_path_buf(), block_size: BLOCK_SIZE, clean_cache: Arc::new(SimpleBlockCache::new(64 * 1024 * 1024)), + pack_index_cache: None, wal_sync: false, max_s3_uploads: 0, max_s3_downloads: 0, @@ -1056,6 +1058,7 @@ async fn test_readahead_prefetches_next_pack_index() { cache_dir: dir2.path().to_path_buf(), block_size: BLOCK_SIZE, clean_cache: Arc::new(SimpleBlockCache::new(64 * 1024 * 1024)), + pack_index_cache: None, wal_sync: false, max_s3_uploads: 0, max_s3_downloads: 0, diff --git a/glidefs/tests/integration/snapshots.rs b/glidefs/tests/integration/snapshots.rs index 4b78f11e..c17613d9 100644 --- a/glidefs/tests/integration/snapshots.rs +++ b/glidefs/tests/integration/snapshots.rs @@ -134,6 +134,7 @@ async fn test_fork_from_snapshot() { cache_dir: dir.path().to_path_buf(), block_size: BLOCK_SIZE, clean_cache: Arc::new(SimpleBlockCache::new(64 * 1024 * 1024)), + pack_index_cache: None, wal_sync: false, max_s3_uploads: 0, max_s3_downloads: 0, @@ -316,6 +317,7 @@ async fn test_purge_export_deletes_snapshots() { cache_dir: dir.path().to_path_buf(), block_size: BLOCK_SIZE, clean_cache: Arc::new(SimpleBlockCache::new(64 * 1024 * 1024)), + pack_index_cache: None, wal_sync: false, max_s3_uploads: 0, max_s3_downloads: 0, @@ -374,6 +376,7 @@ async fn test_api_list_and_delete_snapshots() { cache_dir: dir.path().to_path_buf(), block_size: BLOCK_SIZE, clean_cache: Arc::new(SimpleBlockCache::new(64 * 1024 * 1024)), + pack_index_cache: None, wal_sync: false, max_s3_uploads: 0, max_s3_downloads: 0, @@ -491,6 +494,7 @@ async fn test_remove_without_purge_preserves_snapshots() { cache_dir: dir.path().to_path_buf(), block_size: BLOCK_SIZE, clean_cache: Arc::new(SimpleBlockCache::new(64 * 1024 * 1024)), + pack_index_cache: None, wal_sync: false, max_s3_uploads: 0, max_s3_downloads: 0, @@ -548,6 +552,7 @@ async fn test_snapshot_tag_and_fork() { cache_dir: dir.path().to_path_buf(), block_size: BLOCK_SIZE, clean_cache: Arc::new(SimpleBlockCache::new(64 * 1024 * 1024)), + pack_index_cache: None, wal_sync: false, max_s3_uploads: 0, max_s3_downloads: 0, @@ -617,6 +622,7 @@ async fn test_standalone_tag() { cache_dir: dir.path().to_path_buf(), block_size: BLOCK_SIZE, clean_cache: Arc::new(SimpleBlockCache::new(64 * 1024 * 1024)), + pack_index_cache: None, wal_sync: false, max_s3_uploads: 0, max_s3_downloads: 0, @@ -1108,6 +1114,7 @@ async fn test_fork_from_snapshot_zero_overwrite_sees_original() { cache_dir: dir.path().to_path_buf(), block_size: BLOCK_SIZE, clean_cache: Arc::new(SimpleBlockCache::new(64 * 1024 * 1024)), + pack_index_cache: None, wal_sync: false, max_s3_uploads: 0, max_s3_downloads: 0, @@ -1358,6 +1365,7 @@ async fn test_head_manifest_not_found() { cache_dir: dir.path().to_path_buf(), block_size: BLOCK_SIZE, clean_cache: Arc::new(SimpleBlockCache::new(64 * 1024 * 1024)), + pack_index_cache: None, wal_sync: false, max_s3_uploads: 0, max_s3_downloads: 0, @@ -1394,6 +1402,7 @@ async fn test_fork_parent_deleted_child_survives() { cache_dir: dir.path().to_path_buf(), block_size: BLOCK_SIZE, clean_cache: Arc::new(SimpleBlockCache::new(64 * 1024 * 1024)), + pack_index_cache: None, wal_sync: false, max_s3_uploads: 0, max_s3_downloads: 0, @@ -1477,6 +1486,7 @@ async fn test_fork_parent_deleted_child_survives() { cache_dir: dir2.path().to_path_buf(), block_size: BLOCK_SIZE, clean_cache: Arc::new(SimpleBlockCache::new(64 * 1024 * 1024)), + pack_index_cache: None, wal_sync: false, max_s3_uploads: 0, max_s3_downloads: 0, @@ -1560,6 +1570,7 @@ async fn test_fork_concurrent_parent_write_during_fork() { cache_dir: dir.path().to_path_buf(), block_size: BLOCK_SIZE, clean_cache: Arc::new(SimpleBlockCache::new(64 * 1024 * 1024)), + pack_index_cache: None, wal_sync: false, max_s3_uploads: 0, max_s3_downloads: 0, @@ -1670,6 +1681,7 @@ async fn test_fork_inherit_then_overwrite_all() { cache_dir: dir.path().to_path_buf(), block_size: BLOCK_SIZE, clean_cache: Arc::new(SimpleBlockCache::new(64 * 1024 * 1024)), + pack_index_cache: None, wal_sync: false, max_s3_uploads: 0, max_s3_downloads: 0, @@ -1743,6 +1755,7 @@ async fn test_fork_inherit_then_overwrite_all() { cache_dir: dir2.path().to_path_buf(), block_size: BLOCK_SIZE, clean_cache: Arc::new(SimpleBlockCache::new(64 * 1024 * 1024)), + pack_index_cache: None, wal_sync: false, max_s3_uploads: 0, max_s3_downloads: 0, @@ -1800,6 +1813,7 @@ async fn test_fork_from_snapshot_during_active_writes() { cache_dir: dir.path().to_path_buf(), block_size: BLOCK_SIZE, clean_cache: Arc::new(SimpleBlockCache::new(64 * 1024 * 1024)), + pack_index_cache: None, wal_sync: false, max_s3_uploads: 0, max_s3_downloads: 0, @@ -1896,6 +1910,7 @@ async fn test_fork_both_children_from_same_parent() { cache_dir: dir.path().to_path_buf(), block_size: BLOCK_SIZE, clean_cache: Arc::new(SimpleBlockCache::new(64 * 1024 * 1024)), + pack_index_cache: None, wal_sync: false, max_s3_uploads: 0, max_s3_downloads: 0, diff --git a/glidefs/tests/zc_glidefs.rs b/glidefs/tests/zc_glidefs.rs index aff1eed0..a3f74abb 100644 --- a/glidefs/tests/zc_glidefs.rs +++ b/glidefs/tests/zc_glidefs.rs @@ -85,6 +85,7 @@ async fn setup_router_full( cache_dir: cache_dir.path().to_path_buf(), block_size: 4096, clean_cache, + pack_index_cache: None, wal_sync: false, max_s3_uploads: 8, max_s3_downloads: 16, @@ -448,6 +449,7 @@ async fn zc_glidefs_multi_device_soak() { cache_dir: cache_dir.path().to_path_buf(), block_size: 4096, clean_cache, + pack_index_cache: None, wal_sync: false, max_s3_uploads: 8, max_s3_downloads: 16, diff --git a/oci-distribution/src/config.rs b/oci-distribution/src/config.rs index ddde7579..4eae679f 100644 --- a/oci-distribution/src/config.rs +++ b/oci-distribution/src/config.rs @@ -86,6 +86,7 @@ pub async fn load_readonly_handler( memory_bytes: 64 * 1024 * 1024, ssd_bytes: 256 * 1024 * 1024, ssd_dir: foyer_dir, + direct: false, // ephemeral registry cache; buffered is fine }) .await .context("failed to open block cache")?,