From 62dd8958d1b618b6a7834a9342599932bd4abd2c Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 19 May 2026 09:32:18 -0700 Subject: [PATCH] Fix branch snapshot isolation Signed-off-by: Cong Wang --- README.md | 4 +- bench/README.md | 8 +- bench/branchfs_bench.py | 4 +- bench/plot_results.py | 5 -- src/branch.rs | 162 +++++++++++++++++++++++++++----------- src/fs_helpers.rs | 8 +- tests/test_branch_dirs.sh | 10 ++- tests/test_commit.sh | 2 + 8 files changed, 138 insertions(+), 65 deletions(-) diff --git a/README.md b/README.md index 954d276..25092e2 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ BranchFS is a FUSE-based filesystem that enables speculative branching on top of | Feature | Description | |---------|-------------| -| Fast Branch Creation | O(1) branch creation with copy-on-write semantics | +| Snapshot Isolation | Branches capture their inherited parent view at creation time | | Commit to Parent | Changes merge into immediate parent branch (or base if parent is main) | | Atomic Abort | Instantly discards leaf branch, parent and siblings unaffected | | Atomic Commit | Merges leaf branch into parent atomically | @@ -16,7 +16,7 @@ BranchFS is a FUSE-based filesystem that enables speculative branching on top of ## Architecture -BranchFS is a FUSE-based filesystem that requires no root privileges. It implements file-level copy-on-write: when a file is modified on a branch, the entire file is lazily copied to the branch's delta storage, while unmodified files are resolved by walking up the branch chain to the base directory. Deletions are tracked via tombstone markers. On commit, changes from a leaf branch are merged into its immediate parent (or applied to the base directory if the parent is main); on abort, the leaf branch's delta storage is simply discarded. +BranchFS is a FUSE-based filesystem that requires no root privileges. It implements file-level copy-on-write over a frozen inherited view: when a branch is created, BranchFS snapshots the parent's visible tree into the branch's inherited storage. When a file is modified on a branch, the file is copied to the branch's delta storage, and lookups prefer branch deltas/tombstones over the inherited snapshot. Deletions are tracked via tombstone markers. On commit, changes from a leaf branch are merged into its immediate parent (or applied to the base directory if the parent is main); on abort, the leaf branch's delta storage and inherited snapshot are discarded. ### Why not overlayfs? diff --git a/bench/README.md b/bench/README.md index 8807bf0..b332246 100644 --- a/bench/README.md +++ b/bench/README.md @@ -31,7 +31,7 @@ python3 branchfs_bench.py python3 branchfs_bench.py --quick # Specific benchmark -python3 branchfs_bench.py --bench creation # Branch creation O(1) test +python3 branchfs_bench.py --bench creation # Snapshot creation latency python3 branchfs_bench.py --bench commit # Commit latency python3 branchfs_bench.py --bench abort # Abort latency python3 branchfs_bench.py --bench throughput # Read/write throughput @@ -43,13 +43,13 @@ python3 branchfs_bench.py --latex ../paper/ ## Benchmarks -### 1. Branch Creation Latency (O(1) Verification) +### 1. Branch Creation Latency Measures internal branch creation time with varying base directory sizes. -Validates that branch creation is O(1) - constant time regardless of base size. +This includes materializing the inherited snapshot used for branch isolation. - **Parameters**: Base sizes of 100, 1K, 10K files -- **Expected**: ~300 µs constant latency +- **Expected**: Scales with visible parent tree size ### 2. Commit Latency diff --git a/bench/branchfs_bench.py b/bench/branchfs_bench.py index 6ac4bca..33b4124 100755 --- a/bench/branchfs_bench.py +++ b/bench/branchfs_bench.py @@ -3,7 +3,7 @@ BranchFS Microbenchmark Suite Benchmarks: -1. Branch creation latency vs base directory size (O(1) verification) +1. Branch creation latency vs base directory size 2. Commit latency vs modification size 3. Abort latency 4. Read/write throughput @@ -101,7 +101,7 @@ def _create_base_files(self, base_dir: Path, num_files: int, file_size: int = 10 file_path.write_bytes(os.urandom(file_size)) # ========================================================================= - # Benchmark 1: Branch Creation Latency vs Base Size (O(1) verification) + # Benchmark 1: Branch Creation Latency vs Base Size # ========================================================================= def bench_branch_creation(self, base_sizes: list[int] = [100, 1000, 10000], iterations: int = 5): diff --git a/bench/plot_results.py b/bench/plot_results.py index 7eb4844..9205873 100755 --- a/bench/plot_results.py +++ b/bench/plot_results.py @@ -43,11 +43,6 @@ def plot_branch_creation(results: list[dict], output_dir: Path): ax.set_title('(a) Branch Creation', fontsize=11) ax.grid(True, alpha=0.3) - # Add horizontal reference line for O(1) - avg = sum(y) / len(y) - ax.axhline(y=avg, color='red', linestyle='--', alpha=0.5, label=f'Avg: {avg:.0f} μs') - ax.legend(fontsize=8) - plt.tight_layout() plt.savefig(output_dir / 'branch_creation.pdf', dpi=300, bbox_inches='tight') plt.savefig(output_dir / 'branch_creation.png', dpi=150, bbox_inches='tight') diff --git a/src/branch.rs b/src/branch.rs index 591d199..5ef6b42 100644 --- a/src/branch.rs +++ b/src/branch.rs @@ -101,6 +101,7 @@ pub struct Branch { pub name: String, pub parent: Option, pub files_dir: PathBuf, + pub inherited_dir: PathBuf, pub tombstones_file: PathBuf, tombstones: RwLock>, /// Number of stale (removed-from-memory-but-still-on-disk) tombstone entries. @@ -121,9 +122,11 @@ impl Branch { ) -> Result { let branch_dir = storage_path.join("branches").join(name); let files_dir = branch_dir.join("files"); + let inherited_dir = branch_dir.join("inherited"); let tombstones_file = branch_dir.join("tombstones"); fs::create_dir_all(&files_dir)?; + fs::create_dir_all(&inherited_dir)?; if !tombstones_file.exists() { File::create(&tombstones_file)?; } @@ -134,6 +137,7 @@ impl Branch { name: name.to_string(), parent: parent.map(|s| s.to_string()), files_dir, + inherited_dir, tombstones_file, tombstones: RwLock::new(tombstones), tombstone_stale: AtomicU64::new(0), @@ -208,6 +212,10 @@ impl Branch { self.files_dir.join(rel_path.trim_start_matches('/')) } + pub fn inherited_path(&self, rel_path: &str) -> PathBuf { + self.inherited_dir.join(rel_path.trim_start_matches('/')) + } + pub fn has_delta(&self, rel_path: &str) -> bool { self.delta_path(rel_path).symlink_metadata().is_ok() } @@ -348,6 +356,10 @@ impl BranchManager { let parent_version = parent_branch.commit_count.load(Ordering::SeqCst); let branch = Branch::new(name, Some(parent), &self.storage_path, parent_version)?; + if let Err(e) = self.snapshot_visible_tree(&branches, parent, &branch.inherited_dir) { + let _ = fs::remove_dir_all(self.storage_path.join("branches").join(name)); + return Err(e); + } branches.insert(name.to_string(), branch); let elapsed = start.elapsed(); @@ -510,34 +522,34 @@ impl BranchManager { } } - /// Collect all candidate file/directory names visible in a directory - /// by walking the full branch ancestor chain and base directory. - /// Used by readdir to enumerate all possible entries before filtering. - pub fn collect_dir_names(&self, branch_name: &str, rel_path: &str) -> Result> { - let branches = self.branches.read(); - let mut names = HashSet::new(); - - let mut current = branch_name; - loop { - let branch = branches - .get(current) - .ok_or_else(|| BranchError::NotFound(current.to_string()))?; + fn inherited_source_path(&self, branch: &Branch, rel_path: &str) -> PathBuf { + if branch.parent.is_some() { + branch.inherited_path(rel_path) + } else { + self.base_path.join(rel_path.trim_start_matches('/')) + } + } - let delta_dir = branch.files_dir.join(rel_path.trim_start_matches('/')); - if let Ok(dir) = fs::read_dir(&delta_dir) { - for entry in dir.flatten() { - names.insert(entry.file_name().to_string_lossy().to_string()); - } - } + fn collect_dir_names_locked( + &self, + branches: &HashMap, + branch_name: &str, + rel_path: &str, + ) -> Result> { + let branch = branches + .get(branch_name) + .ok_or_else(|| BranchError::NotFound(branch_name.to_string()))?; + let mut names = HashSet::new(); - match &branch.parent { - Some(parent) => current = parent, - None => break, + let delta_dir = branch.files_dir.join(rel_path.trim_start_matches('/')); + if let Ok(dir) = fs::read_dir(&delta_dir) { + for entry in dir.flatten() { + names.insert(entry.file_name().to_string_lossy().to_string()); } } - let base_dir = self.base_path.join(rel_path.trim_start_matches('/')); - if let Ok(dir) = fs::read_dir(&base_dir) { + let inherited_dir = self.inherited_source_path(branch, rel_path); + if let Ok(dir) = fs::read_dir(&inherited_dir) { for entry in dir.flatten() { names.insert(entry.file_name().to_string_lossy().to_string()); } @@ -546,37 +558,97 @@ impl BranchManager { Ok(names) } - pub fn resolve_path(&self, branch_name: &str, rel_path: &str) -> Result> { - let branches = self.branches.read(); - - let mut current = branch_name; - loop { - let branch = branches - .get(current) - .ok_or_else(|| BranchError::NotFound(current.to_string()))?; - - if branch.is_deleted(rel_path) { - return Ok(None); - } + fn resolve_path_locked( + &self, + branches: &HashMap, + branch_name: &str, + rel_path: &str, + ) -> Result> { + let branch = branches + .get(branch_name) + .ok_or_else(|| BranchError::NotFound(branch_name.to_string()))?; - if branch.has_delta(rel_path) { - return Ok(Some(branch.delta_path(rel_path))); - } + if branch.is_deleted(rel_path) { + return Ok(None); + } - match &branch.parent { - Some(parent) => current = parent, - None => break, - } + if branch.has_delta(rel_path) { + return Ok(Some(branch.delta_path(rel_path))); } - let base = self.base_path.join(rel_path.trim_start_matches('/')); - if base.symlink_metadata().is_ok() { - Ok(Some(base)) + let inherited = self.inherited_source_path(branch, rel_path); + if inherited.symlink_metadata().is_ok() { + Ok(Some(inherited)) } else { Ok(None) } } + fn snapshot_visible_tree( + &self, + branches: &HashMap, + source_branch: &str, + dst_root: &Path, + ) -> Result<()> { + if dst_root.exists() { + fs::remove_dir_all(dst_root)?; + } + fs::create_dir_all(dst_root)?; + self.snapshot_visible_dir(branches, source_branch, "/", dst_root) + } + + fn snapshot_visible_dir( + &self, + branches: &HashMap, + source_branch: &str, + rel_path: &str, + dst_root: &Path, + ) -> Result<()> { + let mut names: Vec = self + .collect_dir_names_locked(branches, source_branch, rel_path)? + .into_iter() + .collect(); + names.sort(); + + for name in names { + let child_rel = if rel_path == "/" { + format!("/{}", name) + } else { + format!("{}/{}", rel_path, name) + }; + + let Some(src) = self.resolve_path_locked(branches, source_branch, &child_rel)? else { + continue; + }; + let meta = src.symlink_metadata()?; + let dst = dst_root.join(child_rel.trim_start_matches('/')); + + if meta.file_type().is_dir() { + fs::create_dir_all(&dst)?; + fs::set_permissions(&dst, meta.permissions())?; + self.snapshot_visible_dir(branches, source_branch, &child_rel, dst_root)?; + } else { + storage::copy_entry(&src, &dst)?; + } + } + + Ok(()) + } + + /// Collect all candidate file/directory names visible in a directory. + /// Branches resolve against their own deltas plus the frozen inherited + /// snapshot captured at fork time; main resolves against its delta plus + /// the live base directory. + pub fn collect_dir_names(&self, branch_name: &str, rel_path: &str) -> Result> { + let branches = self.branches.read(); + self.collect_dir_names_locked(&branches, branch_name, rel_path) + } + + pub fn resolve_path(&self, branch_name: &str, rel_path: &str) -> Result> { + let branches = self.branches.read(); + self.resolve_path_locked(&branches, branch_name, rel_path) + } + /// Returns true if no other branch has `parent == name`. fn is_leaf(name: &str, branches: &std::collections::HashMap) -> bool { !branches.values().any(|b| b.parent.as_deref() == Some(name)) diff --git a/src/fs_helpers.rs b/src/fs_helpers.rs index 6007c62..8f5ddb7 100644 --- a/src/fs_helpers.rs +++ b/src/fs_helpers.rs @@ -146,9 +146,9 @@ impl BranchFs { /// Collect readdir entries for a directory resolved via a specific branch. /// - /// Walks the full ancestor chain (branch → parent → … → main → base) to - /// collect all candidate names, then resolves each via `resolve_path` to - /// respect tombstones and determine the correct file type. + /// Collects candidate names from the branch delta plus its frozen inherited + /// snapshot, then resolves each via `resolve_path` to respect tombstones and + /// determine the correct file type. /// /// `inode_prefix` controls how child inode paths are formed: /// - `"/@branch"` for branch subtrees (produces `/@branch/child`) @@ -165,7 +165,7 @@ impl BranchFs { (ino, FileType::Directory, "..".to_string()), ]; - // Collect all candidate names from the full branch ancestor chain + base. + // Collect all candidate names from the branch delta + inherited view. let mut candidates: Vec = match self.manager.collect_dir_names(branch, rel_path) { Ok(names) => names.into_iter().collect(), Err(_) => return entries, diff --git a/tests/test_branch_dirs.sh b/tests/test_branch_dirs.sh index a1d0394..e51f1a7 100755 --- a/tests/test_branch_dirs.sh +++ b/tests/test_branch_dirs.sh @@ -168,6 +168,7 @@ test_branch_dir_nested_child() { # Create parent, then child do_create "parent-br" "main" + echo "parent content" > "$TEST_MNT/@parent-br/parent_file.txt" do_create "child-br" "parent-br" # @child-br should appear as a top-level @branch dir (flat namespace) @@ -180,14 +181,17 @@ test_branch_dir_nested_child() { echo "child content" > "$TEST_MNT/@child-br/child_file.txt" assert_file_exists "$TEST_MNT/@child-br/child_file.txt" "child_file.txt via @child-br" - # Child branch should see parent's files (inheritance via resolve_path chain) - echo "parent content" > "$TEST_MNT/@parent-br/parent_file.txt" + # Child branch sees the parent's state as of the child fork. + assert_file_exists "$TEST_MNT/@child-br/parent_file.txt" "Child sees parent's fork-time file" + + # Later parent changes are isolated from the already-created child. + echo "late parent content" > "$TEST_MNT/@parent-br/late_parent_file.txt" # Switch root to main so we don't confuse things echo "switch:main" > "$TEST_MNT/.branchfs_ctl" sleep 0.3 - assert_file_exists "$TEST_MNT/@child-br/parent_file.txt" "Child sees parent's file" + assert_file_not_exists "$TEST_MNT/@child-br/late_parent_file.txt" "Child does not see parent's post-fork file" do_unmount } diff --git a/tests/test_commit.sh b/tests/test_commit.sh index eff5c70..6c3e2d2 100755 --- a/tests/test_commit.sh +++ b/tests/test_commit.sh @@ -145,6 +145,8 @@ test_commit_preserves_siblings() { assert_branch_not_exists "sibling_b" "sibling_b removed after commit" assert_branch_exists "sibling_a" "sibling_a preserved after sibling commit" + assert_file_exists "$TEST_BASE/sibling_b_file.txt" "sibling_b file committed to base" + assert_file_not_exists "$TEST_MNT/@sibling_a/sibling_b_file.txt" "sibling_a does not see sibling_b post-fork commit" do_unmount }