From 71bda1589cb96ec7d5e4468098a7dc5f8e7fe171 Mon Sep 17 00:00:00 2001
From: Naadir Jeewa <naadir@randomvariable.co.uk>
Date: Sun, 8 Mar 2026 16:35:11 +0000
Subject: [PATCH 1/6] refactor: remove deprecated GraphFormat, GraphSourceType,
 GraphDataset, RunConfiguration

These types were superseded by GraphSource, RunConfig, and RunMode in
core::config/backend but never removed. Since there is no published
release, drop them without deprecation period.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/core/types.rs | 129 ++++++++++++++++++++++++----------------------
 1 file changed, 68 insertions(+), 61 deletions(-)
diff --git a/src/core/types.rs b/src/core/types.rs
index fd2222a..accb043 100644
--- a/src/core/types.rs
+++ b/src/core/types.rs
@@ -14,64 +14,7 @@
 //
 // SPDX-License-Identifier: Apache-2.0
 
-#[derive(Clone, Debug, PartialEq)]
-pub enum GraphFormat {
-    EdgeList,
-    CsrBinary,
-}
-
-#[derive(Clone, Debug, PartialEq)]
-pub enum GraphSourceType {
-    File,
-    Neo4jSnapshot,
-}
-
-#[derive(Clone, Debug, PartialEq)]
-pub struct GraphDataset {
-    pub dataset_id: String,
-    pub source_uri: String,
-    pub is_weighted: bool,
-    pub node_count: usize,
-    pub edge_count: usize,
-    pub checksum: String,
-    pub format: GraphFormat,
-    pub source_type: GraphSourceType,
-    pub source_snapshot_id: Option<String>,
-}
-
-#[derive(Clone, Debug, PartialEq)]
-pub enum RunMode {
-    Deterministic,
-    Throughput,
-}
-
-#[derive(Clone, Debug, PartialEq)]
-pub struct RunConfiguration {
-    pub config_id: String,
-    pub mode: RunMode,
-    pub acceleration_enabled: bool,
-    pub seed: Option<u64>,
-    pub max_iterations: usize,
-    pub quality_tolerance: f64,
-    pub pinned_profile_id: Option<String>,
-    pub graph_source: GraphSourceType,
-}
-
-impl Default for RunConfiguration {
-    fn default() -> Self {
-        Self {
-            config_id: "default".to_string(),
-            mode: RunMode::Deterministic,
-            acceleration_enabled: false,
-            seed: None,
-            max_iterations: 10,
-            quality_tolerance: 0.001,
-            pinned_profile_id: None,
-            graph_source: GraphSourceType::File,
-        }
-    }
-}
-
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 #[derive(Clone, Debug, PartialEq)]
 pub enum RunStatus {
     Running,
@@ -79,6 +22,7 @@ pub enum RunStatus {
     Failed,
 }
 
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 #[derive(Clone, Debug, PartialEq)]
 pub enum BackendType {
     PureRust,
@@ -87,6 +31,7 @@ pub enum BackendType {
     RocmAccel,
 }
 
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 #[derive(Clone, Debug, PartialEq)]
 pub struct RunExecution {
     pub run_id: String,
@@ -96,10 +41,11 @@ pub struct RunExecution {
     pub completed_at: Option<u64>,
     pub status: RunStatus,
     pub backend: BackendType,
-    pub graph_source_resolved: GraphSourceType,
+    pub graph_source_resolved: crate::core::backend::GraphSource,
     pub fallback_reason: Option<String>,
 }
 
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 #[derive(Clone, Debug, PartialEq)]
 pub struct PartitionResult {
     pub run_id: String,
@@ -113,6 +59,7 @@ pub struct PartitionResult {
     pub iteration_count: usize,
 }
 
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 #[derive(Clone, Debug, PartialEq)]
 pub struct ValidationReport {
     pub run_id: String,
@@ -123,6 +70,7 @@ pub struct ValidationReport {
     pub notes: Option<String>,
 }
 
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 #[derive(Clone, Debug, PartialEq)]
 pub struct RunOutcome {
     pub execution: RunExecution,
@@ -130,21 +78,80 @@ pub struct RunOutcome {
     pub validation: Option<ValidationReport>,
 }
 
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 #[derive(Clone, Debug, PartialEq)]
 pub struct GraphInput {
-    pub dataset_id: String,
+    pub dataset_id: Option<String>,
     pub node_count: usize,
     pub edges: Vec<(usize, usize, Option<f64>)>,
 }
 
 impl GraphInput {
+    /// Create a `GraphInput` from a weighted edge list. Computes `node_count`
+    /// automatically from the maximum node index.
+    pub fn from_edges(edges: Vec<(usize, usize, Option<f64>)>) -> Self {
+        let node_count = edges
+            .iter()
+            .flat_map(|(u, v, _)| [*u, *v])
+            .max()
+            .map(|m| m + 1)
+            .unwrap_or(0);
+        Self {
+            dataset_id: None,
+            node_count,
+            edges,
+        }
+    }
+
+    /// Create a `GraphInput` from unweighted edges. Computes `node_count`
+    /// automatically from the maximum node index.
+    pub fn from_unweighted_edges(edges: Vec<(usize, usize)>) -> Self {
+        Self::from_edges(edges.into_iter().map(|(u, v)| (u, v, None)).collect())
+    }
+
+    /// Create an empty graph, optionally with a dataset identifier.
     pub fn empty(dataset_id: impl Into<String>) -> Self {
         Self {
-            dataset_id: dataset_id.into(),
+            dataset_id: Some(dataset_id.into()),
             node_count: 0,
             edges: Vec::new(),
         }
     }
+
+    /// Validate that all edge endpoints are within bounds.
+    pub fn validate(&self) -> Result<(), crate::core::error::HitLeidenError> {
+        for (i, (u, v, _)) in self.edges.iter().enumerate() {
+            if *u >= self.node_count || *v >= self.node_count {
+                return Err(crate::core::error::HitLeidenError::InvalidInput(format!(
+                    "edge {} ({}, {}) references node >= node_count {}",
+                    i, u, v, self.node_count
+                )));
+            }
+        }
+        Ok(())
+    }
+}
+
+impl RunOutcome {
+    /// Extract the partition result, returning an error if no partition was produced.
+    pub fn into_partition(self) -> Result<PartitionResult, crate::core::error::HitLeidenError> {
+        self.partition.ok_or_else(|| {
+            crate::core::error::HitLeidenError::InvalidInput("no partition produced".into())
+        })
+    }
+}
+
+impl Default for PartitionResult {
+    fn default() -> Self {
+        Self {
+            run_id: String::new(),
+            node_to_community: Vec::new(),
+            hierarchy_levels: Vec::new(),
+            community_count: 0,
+            quality_score: 0.0,
+            iteration_count: 0,
+        }
+    }
 }
 
 // --- Benchmark & Profiling types (feature: profiling) ---

From 3c859db83b0235da40009b8e7b0796c7946de532 Mon Sep 17 00:00:00 2001
From: Naadir Jeewa <naadir@randomvariable.co.uk>
Date: Sun, 8 Mar 2026 16:36:40 +0000
Subject: [PATCH 2/6] refactor: remove unused refinement_gamma parameter

The refinement_gamma parameter was accepted but unused (prefixed with _)
in refine_singleton_merge. Remove it from RunConfig, multilevel_leiden,
refine_singleton_merge, CLI, and all test call sites.

Incorporates the intent of PR #3 with additional missed call sites fixed.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/cli/run.rs                   |  3 +-
 src/core/algorithm/hit_leiden.rs | 65 +++++++++++++-------------------
 src/core/config.rs               | 40 ++++++++++++++++----
 3 files changed, 60 insertions(+), 48 deletions(-)

diff --git a/src/cli/run.rs b/src/cli/run.rs
index f67ac15..873beda 100644
--- a/src/cli/run.rs
+++ b/src/cli/run.rs
@@ -30,13 +30,12 @@ pub fn run_from_cli(
 
     let config = RunConfig {
         mode,
-        graph_source: GraphSource::File, // Assuming file for now
+        graph_source: Some(GraphSource::File),
         acceleration: AccelerationTarget::PureRust,
         quality_tolerance: 0.001,
         max_iterations: 10,
         pinned_profile: None,
         resolution: 1.0,
-        refinement_gamma: 0.05,
     };
 
     crate::run(graph, &config)
diff --git a/src/core/algorithm/hit_leiden.rs b/src/core/algorithm/hit_leiden.rs
index 981f28a..db772a7 100644
--- a/src/core/algorithm/hit_leiden.rs
+++ b/src/core/algorithm/hit_leiden.rs
@@ -56,7 +56,6 @@ pub fn run(graph: &GraphInput, config: &RunConfig) -> Result<RunOutcome, HitLeid
         &mut partition_state,
         graph,
         config.resolution,
-        config.refinement_gamma,
         config.mode,
         config.max_iterations,
     );
@@ -65,9 +64,10 @@ pub fn run(graph: &GraphInput, config: &RunConfig) -> Result<RunOutcome, HitLeid
     // across runs with identical partition structure.
     canonicalize_community_ids_in_place(&mut partition_state.node_to_comm);
 
+    let ds_id = graph.dataset_id.as_deref().unwrap_or("");
     let execution = RunExecution {
-        run_id: format!("run:{}", graph.dataset_id),
-        dataset_id: graph.dataset_id.clone(),
+        run_id: format!("run:{}", ds_id),
+        dataset_id: ds_id.to_string(),
         config_id: "default".to_string(),
         started_at,
         completed_at: Some(
@@ -78,15 +78,7 @@ pub fn run(graph: &GraphInput, config: &RunConfig) -> Result<RunOutcome, HitLeid
         ),
         status: RunStatus::Succeeded,
         backend: BackendType::PureRust,
-        graph_source_resolved: match resolution_meta.source_resolved {
-            crate::core::backend::GraphSource::File => crate::core::types::GraphSourceType::File,
-            crate::core::backend::GraphSource::Neo4jSnapshot => {
-                crate::core::types::GraphSourceType::Neo4jSnapshot
-            }
-            crate::core::backend::GraphSource::LiveNeo4j => {
-                crate::core::types::GraphSourceType::Neo4jSnapshot
-            } // Fallback
-        },
+        graph_source_resolved: resolution_meta.source_resolved,
         fallback_reason: resolution_meta.fallback_reason,
     };
 
@@ -119,7 +111,7 @@ pub fn run(graph: &GraphInput, config: &RunConfig) -> Result<RunOutcome, HitLeid
 }
 
 /// Compute standard modularity: Q = (1/2m) * sum_ij [ A_ij - k_i*k_j/(2m) ] * delta(c_i, c_j)
-fn compute_modularity(graph: &GraphInput, node_to_community: &[usize]) -> f64 {
+pub(crate) fn compute_modularity(graph: &GraphInput, node_to_community: &[usize]) -> f64 {
     let n = graph.node_count;
 
     // Compute node degrees from edge list
@@ -165,11 +157,10 @@ fn compute_modularity(graph: &GraphInput, node_to_community: &[usize]) -> f64 {
 /// subcommunities (connected components), aggregate based on subcommunities, and repeat.
 /// The refinement step prevents mega-communities by ensuring the coarsened graph
 /// represents subcommunity-level structure, following the standard Leiden approach.
-fn multilevel_leiden(
+pub(crate) fn multilevel_leiden(
     state: &mut PartitionState,
     graph: &GraphInput,
     gamma: f64,
-    refinement_gamma: f64,
     mode: crate::core::config::RunMode,
     max_levels: usize,
 ) -> (usize, Vec<Vec<usize>>) {
@@ -197,12 +188,10 @@ fn multilevel_leiden(
 
     // Refinement: within each community, merge singletons into subcommunities.
     // Uses the SAME resolution as movement for the quality function.
-    // The refinement_gamma (0.05) is only for the connectivity criterion.
     let mut subcommunities = refine_singleton_merge(
         &state.supergraphs[0],
         &state.community_mapping_per_level[0],
         gamma,
-        refinement_gamma,
     );
 
     // The community assignment (for final output) comes from movement
@@ -279,7 +268,6 @@ fn multilevel_leiden(
             &state.supergraphs[0],
             &state.node_to_comm,
             gamma,
-            refinement_gamma,
         );
         let new_subcomm_count = count_unique(&subcommunities);
 
@@ -360,7 +348,7 @@ fn count_unique(v: &[usize]) -> usize {
 
 /// Deterministically rewrite community labels to contiguous IDs [0..k-1]
 /// by scanning nodes in index order and assigning first-seen labels.
-fn canonicalize_community_ids_in_place(node_to_community: &mut [usize]) {
+pub(crate) fn canonicalize_community_ids_in_place(node_to_community: &mut [usize]) {
     let mut remap: HashMap<usize, usize> = HashMap::new();
     let mut next_id = 0usize;
 
@@ -455,7 +443,6 @@ fn refine_singleton_merge(
     graph: &crate::core::graph::in_memory::InMemoryGraph,
     node_to_community: &[usize],
     gamma: f64,
-    _refinement_gamma: f64,
 ) -> Vec<usize> {
     let n = graph.node_count;
     if n == 0 {
@@ -1809,7 +1796,7 @@ mod tests {
     /// Helper: build a GraphInput from an edge list with unit weights.
     fn graph(node_count: usize, edges: &[(usize, usize)]) -> GraphInput {
         GraphInput {
-            dataset_id: "test".to_string(),
+            dataset_id: Some("test".to_string()),
             node_count,
             edges: edges.iter().map(|&(u, v)| (u, v, Some(1.0))).collect(),
         }
@@ -1818,7 +1805,7 @@ mod tests {
     /// Helper: build a GraphInput with explicit weights.
     fn weighted_graph(node_count: usize, edges: &[(usize, usize, f64)]) -> GraphInput {
         GraphInput {
-            dataset_id: "test".to_string(),
+            dataset_id: Some("test".to_string()),
             node_count,
             edges: edges.iter().map(|&(u, v, w)| (u, v, Some(w))).collect(),
         }
@@ -1905,7 +1892,7 @@ mod tests {
     #[test]
     fn test_should_skip_aggregation_when_no_delta_and_no_refinement() {
         let delta = GraphInput {
-            dataset_id: "test".to_string(),
+            dataset_id: Some("test".to_string()),
             node_count: 4,
             edges: vec![],
         };
@@ -1916,7 +1903,7 @@ mod tests {
     #[test]
     fn test_should_not_skip_aggregation_when_delta_or_refinement_exists() {
         let delta_non_empty = GraphInput {
-            dataset_id: "test".to_string(),
+            dataset_id: Some("test".to_string()),
             node_count: 4,
             edges: vec![(0, 1, Some(1.0))],
         };
@@ -1924,7 +1911,7 @@ mod tests {
         assert!(!should_skip_aggregation(&delta_non_empty, &refined_empty));
 
         let delta_empty = GraphInput {
-            dataset_id: "test".to_string(),
+            dataset_id: Some("test".to_string()),
             node_count: 4,
             edges: vec![],
         };
@@ -2138,7 +2125,7 @@ mod tests {
         let inmem = InMemoryGraph::from(&g);
 
         let delta = GraphInput {
-            dataset_id: "test".to_string(),
+            dataset_id: Some("test".to_string()),
             node_count: 3,
             edges: vec![],
         };
@@ -2243,7 +2230,7 @@ mod tests {
     fn test_modularity_empty_graph() {
         // No edges => Q = 0
         let g = GraphInput {
-            dataset_id: "test".to_string(),
+            dataset_id: Some("test".to_string()),
             node_count: 3,
             edges: vec![],
         };
@@ -2373,7 +2360,7 @@ mod tests {
         );
         let mut state = PartitionState::identity(6);
 
-        let (iters, hierarchy) = multilevel_leiden(&mut state, &g, 1.0, 0.05, mode, 10);
+        let (iters, hierarchy) = multilevel_leiden(&mut state, &g, 1.0, mode, 10);
 
         assert!(iters > 0, "should take at least 1 iteration");
         assert!(
@@ -2400,13 +2387,13 @@ mod tests {
     dual_mode_test!(test_multilevel_leiden_single_node, |mode| {
         // Single node graph, no edges.
         let g = GraphInput {
-            dataset_id: "test".to_string(),
+            dataset_id: Some("test".to_string()),
             node_count: 1,
             edges: vec![],
         };
         let mut state = PartitionState::identity(1);
 
-        let (iters, hierarchy) = multilevel_leiden(&mut state, &g, 1.0, 0.05, mode, 10);
+        let (iters, hierarchy) = multilevel_leiden(&mut state, &g, 1.0, mode, 10);
 
         assert_eq!(state.node_to_comm, vec![0]);
         assert!(iters >= 1, "should still complete at least 1 iteration");
@@ -2416,13 +2403,13 @@ mod tests {
     dual_mode_test!(test_multilevel_leiden_disconnected_components, |mode| {
         // 4 disconnected nodes: each should be its own community.
         let g = GraphInput {
-            dataset_id: "test".to_string(),
+            dataset_id: Some("test".to_string()),
             node_count: 4,
             edges: vec![],
         };
         let mut state = PartitionState::identity(4);
 
-        let (_iters, _hierarchy) = multilevel_leiden(&mut state, &g, 1.0, 0.05, mode, 10);
+        let (_iters, _hierarchy) = multilevel_leiden(&mut state, &g, 1.0, mode, 10);
 
         let comm_count = count_unique(&state.node_to_comm);
         assert_eq!(
@@ -2437,7 +2424,7 @@ mod tests {
         let g = graph(6, &[(0, 1), (1, 2), (0, 2), (3, 4), (4, 5), (3, 5), (2, 3)]);
         let mut state = PartitionState::identity(6);
 
-        let (_iters, hierarchy) = multilevel_leiden(&mut state, &g, 1.0, 0.05, mode, 10);
+        let (_iters, hierarchy) = multilevel_leiden(&mut state, &g, 1.0, mode, 10);
 
         // Each level should have exactly node_count entries
         for (i, level) in hierarchy.iter().enumerate() {
@@ -2976,7 +2963,7 @@ mod tests {
         refined.set(2, true); // Only node 2 was refined
 
         let delta = GraphInput {
-            dataset_id: "test".to_string(),
+            dataset_id: Some("test".to_string()),
             node_count: 3,
             edges: vec![],
         };
@@ -3124,7 +3111,7 @@ mod tests {
         );
         let mut state = PartitionState::identity(6);
         state.supergraphs.push(InMemoryGraph::from(&GraphInput {
-            dataset_id: "test".to_string(),
+            dataset_id: Some("test".to_string()),
             node_count: 6,
             edges: vec![],
         }));
@@ -3158,7 +3145,7 @@ mod tests {
         );
         let mut state = PartitionState::identity(6);
         state.supergraphs.push(InMemoryGraph::from(&GraphInput {
-            dataset_id: "test".to_string(),
+            dataset_id: Some("test".to_string()),
             node_count: 6,
             edges: vec![],
         }));
@@ -3190,7 +3177,7 @@ mod tests {
         );
         let mut state = PartitionState::identity(4);
         state.supergraphs.push(InMemoryGraph::from(&GraphInput {
-            dataset_id: "test".to_string(),
+            dataset_id: Some("test".to_string()),
             node_count: 4,
             edges: vec![],
         }));
@@ -3233,7 +3220,7 @@ mod tests {
         );
         let mut state = PartitionState::identity(8);
         state.supergraphs.push(InMemoryGraph::from(&GraphInput {
-            dataset_id: "test".to_string(),
+            dataset_id: Some("test".to_string()),
             node_count: 8,
             edges: vec![],
         }));
@@ -3343,7 +3330,7 @@ mod tests {
     fn test_connected_components_single_node() {
         // Single node with no edges: 1 component of size 1.
         let g = GraphInput {
-            dataset_id: "test".to_string(),
+            dataset_id: Some("test".to_string()),
             node_count: 1,
             edges: vec![],
         };
diff --git a/src/core/config.rs b/src/core/config.rs
index b8b81fb..074fa55 100644
--- a/src/core/config.rs
+++ b/src/core/config.rs
@@ -16,16 +16,21 @@
 
 use crate::core::backend::{AccelerationTarget, GraphSource};
 
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub enum RunMode {
     Deterministic,
     Throughput,
 }
 
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 #[derive(Clone, Debug, PartialEq)]
 pub struct RunConfig {
     pub mode: RunMode,
-    pub graph_source: GraphSource,
+    /// Graph source hint. Only meaningful for CLI / orchestrator usage.
+    /// Library consumers using `run()` or `run_simple()` with in-memory data
+    /// can leave this as `None`.
+    pub graph_source: Option<GraphSource>,
     pub acceleration: AccelerationTarget,
     pub quality_tolerance: f64,
     pub max_iterations: usize,
@@ -34,28 +39,24 @@ pub struct RunConfig {
     /// Default 1.0 matches the HIT-Leiden paper (standard modularity).
     /// Used for both movement and refinement quality functions.
     pub resolution: f64,
-    /// Refinement connectivity criterion gamma. Controls which nodes participate
-    /// in refinement merging (must satisfy cut_size >= gamma * v_total * (S - v_total)).
-    /// Default 0.05. NOT used for quality function.
-    pub refinement_gamma: f64,
 }
 
 impl Default for RunConfig {
     fn default() -> Self {
         Self {
             mode: RunMode::Deterministic,
-            graph_source: GraphSource::File,
+            graph_source: None,
             acceleration: AccelerationTarget::PureRust,
             quality_tolerance: 0.001,
             max_iterations: 10,
             pinned_profile: None,
             resolution: 1.0,
-            refinement_gamma: 0.05,
         }
     }
 }
 
 impl RunConfig {
+    /// Validate the configuration.
     pub fn validate(&self) -> Result<(), String> {
         if self.max_iterations == 0 {
             return Err("max_iterations must be > 0".to_string());
@@ -65,4 +66,29 @@ impl RunConfig {
         }
         Ok(())
     }
+
+    /// Set the run mode.
+    pub fn with_mode(mut self, mode: RunMode) -> Self {
+        self.mode = mode;
+        self
+    }
+
+    /// Set the resolution parameter (gamma).
+    pub fn with_resolution(mut self, r: f64) -> Self {
+        self.resolution = r;
+        self
+    }
+
+    /// Set the maximum number of iterations.
+    pub fn with_max_iterations(mut self, n: usize) -> Self {
+        self.max_iterations = n;
+        self
+    }
+
+    /// Set the quality tolerance for convergence.
+    pub fn with_quality_tolerance(mut self, t: f64) -> Self {
+        self.quality_tolerance = t;
+        self
+    }
+
 }

From 7f29b992bf24882a56acef00e5aa3c84420d6a17 Mon Sep 17 00:00:00 2001
From: Naadir Jeewa <naadir@randomvariable.co.uk>
Date: Sun, 8 Mar 2026 16:36:51 +0000
Subject: [PATCH 3/6] feat: improve library usability with serde, builders, and
 run_simple

- Add conditional serde derives behind "serde" feature flag
- Make graph_source Optional in RunConfig (library users don't need it)
- Add builder methods: with_mode, with_resolution, with_max_iterations,
  with_quality_tolerance
- Add run_simple() entry point for edges-in, partition-out usage
- Re-export PartitionResult, PartitionState, CommunityRelation,
  CommunityState from crate root
- Gate CLI module behind "cli" feature

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 Cargo.toml                                    | 13 +++----
 benchmarks/criterion/hit_leiden_suite.rs      |  2 +-
 src/benchmark/dynamic_graph.rs                |  8 ++--
 src/benchmark/hit_leiden_incremental.rs       |  4 +-
 src/benchmark/runner.rs                       |  2 +-
 src/bin/profile_incremental.rs                |  2 +-
 src/bin/profile_run.rs                        |  2 +-
 src/core/backend.rs                           |  2 +
 src/core/error.rs                             |  2 +
 src/core/graph/in_memory.rs                   |  1 +
 src/core/graph/neo4j_snapshot.rs              |  2 +-
 src/core/mod.rs                               |  1 +
 src/core/partition/state.rs                   |  1 +
 src/core/runtime/resolver.rs                  |  2 +-
 src/lib.rs                                    | 38 ++++++++++++++++++-
 src/main.rs                                   |  9 +++--
 tests/contract/test_run_validate.rs           |  2 +-
 ...test_connected_graph_not_all_singletons.rs |  2 +-
 .../test_default_config_minimal_args.rs       |  6 +--
 .../test_deterministic_identity.rs            |  2 +-
 .../integration/test_neo4j_snapshot_parity.rs |  2 +-
 .../test_throughput_equivalence.rs            |  2 +-
 tests/property/test_partition_invariants.rs   |  2 +-
 23 files changed, 76 insertions(+), 33 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 7c1472f..51e04a7 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -21,9 +21,7 @@ edition = "2021"
 license = "MIT OR Apache-2.0"
 
 [dependencies]
-clap = { version = "4.5", features = [
-  "derive",
-] }
+clap = { version = "4.5", features = ["derive"], optional = true }
 ahash = "0.8"
 rayon = "1.10"
 smallvec = "1.13"
@@ -52,13 +50,13 @@ anyhow    = "1.0.102"
 lender    = "0.6.2"
 
 [features]
-default = [
-]
+default = []
+cli = ["dep:clap"]
+serde = ["dep:serde", "dep:serde_json"]
 profiling = [
+  "serde",
   "webgraph",
   "lender",
-  "serde",
-  "serde_json",
   "plotly",
   "inferno",
   "prost",
@@ -71,6 +69,7 @@ profiling = [
 [[bin]]
 name = "hit-leiden"
 path = "src/main.rs"
+required-features = ["cli"]
 
 [[bin]]
 name = "profile_run"
diff --git a/benchmarks/criterion/hit_leiden_suite.rs b/benchmarks/criterion/hit_leiden_suite.rs
index 006b39f..0274f74 100644
--- a/benchmarks/criterion/hit_leiden_suite.rs
+++ b/benchmarks/criterion/hit_leiden_suite.rs
@@ -53,7 +53,7 @@ fn load_uk_2007() -> GraphInput {
     );
 
     GraphInput {
-        dataset_id: "uk-2007-05@100000".to_string(),
+        dataset_id: Some("uk-2007-05@100000".to_string()),
         node_count: num_nodes,
         edges,
     }
diff --git a/src/benchmark/dynamic_graph.rs b/src/benchmark/dynamic_graph.rs
index e7dfd8c..09ebee5 100644
--- a/src/benchmark/dynamic_graph.rs
+++ b/src/benchmark/dynamic_graph.rs
@@ -59,7 +59,7 @@ impl DynamicGraphBuilder {
             cumulative_edges.extend_from_slice(chunk);
 
             batches.push(GraphInput {
-                dataset_id: format!("batch_{}", idx),
+                dataset_id: Some(format!("batch_{}", idx)),
                 node_count: self.node_count,
                 edges: cumulative_edges
                     .iter()
@@ -106,7 +106,7 @@ impl DynamicGraphBuilder {
             cumulative_edges.extend_from_slice(&shuffled[start..end]);
 
             update_batches.push(GraphInput {
-                dataset_id: format!("paper_batch_{}", round),
+                dataset_id: Some(format!("paper_batch_{}", round)),
                 node_count: self.node_count,
                 edges: cumulative_edges
                     .iter()
@@ -117,7 +117,7 @@ impl DynamicGraphBuilder {
 
         IncrementalSplit {
             initial_graph: GraphInput {
-                dataset_id: "paper_initial".to_string(),
+                dataset_id: Some("paper_initial".to_string()),
                 node_count: self.node_count,
                 edges: initial_edges
                     .iter()
@@ -138,7 +138,7 @@ mod tests {
     #[test]
     fn paper_split_uses_initial_ratio_and_fixed_rounds() {
         let graph = GraphInput {
-            dataset_id: "test".to_string(),
+            dataset_id: Some("test".to_string()),
             node_count: 100,
             edges: (0..100).map(|i| (i, (i + 1) % 100, None::<f64>)).collect(),
         };
diff --git a/src/benchmark/hit_leiden_incremental.rs b/src/benchmark/hit_leiden_incremental.rs
index cceda73..de0f4f1 100644
--- a/src/benchmark/hit_leiden_incremental.rs
+++ b/src/benchmark/hit_leiden_incremental.rs
@@ -225,7 +225,7 @@ pub fn run_incremental_with_config(
                 let _ = ig_disk_cache::save(
                     cache_dir,
                     &ig_disk_cache::CachedIgraphLeiden {
-                        dataset_id: batch_graph.dataset_id.clone(),
+                        dataset_id: batch_graph.dataset_id.clone().unwrap_or_default(),
                         node_count: batch_graph.node_count,
                         edge_count: batch_graph.edges.len(),
                         time_ms: ig.time_ms,
@@ -296,7 +296,7 @@ fn load_cached_igraph_result(
     }
     let cache_dir = config.cache_dir.as_ref()?;
     let first_batch = batches.first()?;
-    let cached = ig_disk_cache::load(cache_dir, &first_batch.dataset_id, first_batch.edges.len())?;
+    let cached = ig_disk_cache::load(cache_dir, first_batch.dataset_id.as_deref().unwrap_or(""), first_batch.edges.len())?;
     Some(IgResult {
         time_ms: cached.time_ms,
         modularity: cached.modularity,
diff --git a/src/benchmark/runner.rs b/src/benchmark/runner.rs
index 7405158..413e35f 100644
--- a/src/benchmark/runner.rs
+++ b/src/benchmark/runner.rs
@@ -119,7 +119,7 @@ pub mod benchmark_runner {
 
         let benchmark_run = BenchmarkRun {
             timestamp: timestamp.clone(),
-            dataset_id: config.graph.dataset_id.clone(),
+            dataset_id: config.graph.dataset_id.clone().unwrap_or_default(),
             timeout_seconds: config.timeout_seconds,
             truncated: outcome.truncated,
             batches: outcome.batches,
diff --git a/src/bin/profile_incremental.rs b/src/bin/profile_incremental.rs
index 40f73ee..2016072 100644
--- a/src/bin/profile_incremental.rs
+++ b/src/bin/profile_incremental.rs
@@ -58,7 +58,7 @@ fn load_graph() -> GraphInput {
         edges.len()
     );
     GraphInput {
-        dataset_id: "uk-2007-05@100000".to_string(),
+        dataset_id: Some("uk-2007-05@100000".to_string()),
         node_count: num_nodes,
         edges,
     }
diff --git a/src/bin/profile_run.rs b/src/bin/profile_run.rs
index 8b85245..fa2534d 100644
--- a/src/bin/profile_run.rs
+++ b/src/bin/profile_run.rs
@@ -54,7 +54,7 @@ fn load_graph() -> GraphInput {
         edges.len()
     );
     GraphInput {
-        dataset_id: "uk-2007-05@100000".to_string(),
+        dataset_id: Some("uk-2007-05@100000".to_string()),
         node_count: num_nodes,
         edges,
     }
diff --git a/src/core/backend.rs b/src/core/backend.rs
index 54cadb7..553f045 100644
--- a/src/core/backend.rs
+++ b/src/core/backend.rs
@@ -14,6 +14,7 @@
 //
 // SPDX-License-Identifier: Apache-2.0
 
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub enum GraphSource {
     File,
@@ -21,6 +22,7 @@ pub enum GraphSource {
     LiveNeo4j,
 }
 
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub enum AccelerationTarget {
     PureRust,
diff --git a/src/core/error.rs b/src/core/error.rs
index bb83257..1197f58 100644
--- a/src/core/error.rs
+++ b/src/core/error.rs
@@ -24,4 +24,6 @@ pub enum HitLeidenError {
     Backend(String),
     #[error("acceleration error: {0}")]
     Acceleration(String),
+    #[error("validation failed: {0}")]
+    ValidationFailed(String),
 }
diff --git a/src/core/graph/in_memory.rs b/src/core/graph/in_memory.rs
index 8d73ba0..d6e7e04 100644
--- a/src/core/graph/in_memory.rs
+++ b/src/core/graph/in_memory.rs
@@ -17,6 +17,7 @@
 use crate::core::types::GraphInput;
 use ahash::{HashMap, HashMapExt};
 
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 #[derive(Clone, Debug, PartialEq)]
 pub struct InMemoryGraph {
     pub node_count: usize,
diff --git a/src/core/graph/neo4j_snapshot.rs b/src/core/graph/neo4j_snapshot.rs
index 36b460d..d7585f9 100644
--- a/src/core/graph/neo4j_snapshot.rs
+++ b/src/core/graph/neo4j_snapshot.rs
@@ -28,7 +28,7 @@ pub fn project_from_neo4j(
     projection_config: &ProjectionConfig,
 ) -> Result<GraphInput, HitLeidenError> {
     Ok(GraphInput {
-        dataset_id: format!("neo4j:{}", projection_config.snapshot_id),
+        dataset_id: Some(format!("neo4j:{}", projection_config.snapshot_id)),
         node_count: 0,
         edges: Vec::new(),
     })
diff --git a/src/core/mod.rs b/src/core/mod.rs
index f4655ec..9b8c3ad 100644
--- a/src/core/mod.rs
+++ b/src/core/mod.rs
@@ -16,6 +16,7 @@
 
 pub mod algorithm;
 pub mod backend;
+pub mod community_state;
 pub mod config;
 pub mod error;
 pub mod graph;
diff --git a/src/core/partition/state.rs b/src/core/partition/state.rs
index c4c4c33..182faf5 100644
--- a/src/core/partition/state.rs
+++ b/src/core/partition/state.rs
@@ -16,6 +16,7 @@
 
 use crate::core::graph::in_memory::InMemoryGraph;
 
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 #[derive(Clone, Debug, PartialEq)]
 pub struct PartitionState {
     pub node_to_comm: Vec<usize>,
diff --git a/src/core/runtime/resolver.rs b/src/core/runtime/resolver.rs
index d749692..08837be 100644
--- a/src/core/runtime/resolver.rs
+++ b/src/core/runtime/resolver.rs
@@ -19,7 +19,7 @@ use crate::core::config::RunConfig;
 
 pub fn resolve(config: &RunConfig) -> ResolutionMetadata {
     ResolutionMetadata {
-        source_resolved: config.graph_source,
+        source_resolved: config.graph_source.unwrap_or(GraphSource::File),
         accel_resolved: config.acceleration,
         fallback_reason: None,
     }
diff --git a/src/lib.rs b/src/lib.rs
index 93fd45b..afca108 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -15,6 +15,7 @@
 // SPDX-License-Identifier: Apache-2.0
 
 pub mod benchmark;
+#[cfg(feature = "cli")]
 pub mod cli;
 pub mod core;
 
@@ -22,12 +23,47 @@ pub use core::backend::{AccelerationTarget, GraphSource};
 pub use core::config::{RunConfig, RunMode};
 pub use core::error::HitLeidenError;
 pub use core::report::{BenchmarkOutcome, ValidationOutcome};
-pub use core::types::{GraphInput, RunOutcome};
+pub use core::community_state::{CommunityRelation, CommunityState};
+pub use core::partition::state::PartitionState;
+pub use core::types::{GraphInput, PartitionResult, RunOutcome};
 
+/// Run community detection on the given graph with the specified configuration.
+///
+/// # Examples
+///
+/// ```no_run
+/// use hit_leiden::{GraphInput, RunConfig, RunMode};
+///
+/// let graph = GraphInput::from_unweighted_edges(vec![(0, 1), (1, 2), (2, 0)]);
+/// let config = RunConfig::default().with_mode(RunMode::Deterministic);
+/// let outcome = hit_leiden::run(&graph, &config).unwrap();
+/// let partition = outcome.into_partition().unwrap();
+/// println!("communities: {:?}", partition.node_to_community);
+/// ```
 pub fn run(graph: &GraphInput, config: &RunConfig) -> Result<RunOutcome, HitLeidenError> {
     core::algorithm::hit_leiden::run(graph, config)
 }
 
+/// Simplified entry point: edges in, partition out. Uses default configuration.
+///
+/// # Examples
+///
+/// ```no_run
+/// use hit_leiden::PartitionResult;
+///
+/// let edges = vec![(0, 1, None), (1, 2, None), (2, 0, None)];
+/// let partition: PartitionResult = hit_leiden::run_simple(edges).unwrap();
+/// println!("communities: {:?}", partition.node_to_community);
+/// ```
+pub fn run_simple(
+    edges: Vec<(usize, usize, Option<f64>)>,
+) -> Result<core::types::PartitionResult, HitLeidenError> {
+    let graph = GraphInput::from_edges(edges);
+    let config = RunConfig::default();
+    let outcome = run(&graph, &config)?;
+    outcome.into_partition()
+}
+
 pub fn project_from_neo4j(
     source_config: &core::graph::neo4j_snapshot::Neo4jSourceConfig,
     projection_config: &core::graph::neo4j_mapping::ProjectionConfig,
diff --git a/src/main.rs b/src/main.rs
index b1ae7f2..4944ee5 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -220,10 +220,11 @@ fn load_webgraph(
     );
 
     Ok(hit_leiden::core::types::GraphInput {
-        dataset_id: path
-            .file_stem()
-            .map(|s| s.to_string_lossy().to_string())
-            .unwrap_or_else(|| "unknown".to_string()),
+        dataset_id: Some(
+            path.file_stem()
+                .map(|s| s.to_string_lossy().to_string())
+                .unwrap_or_else(|| "unknown".to_string()),
+        ),
         node_count: num_nodes,
         edges,
     })
diff --git a/tests/contract/test_run_validate.rs b/tests/contract/test_run_validate.rs
index ca21a5e..95ba1d3 100644
--- a/tests/contract/test_run_validate.rs
+++ b/tests/contract/test_run_validate.rs
@@ -19,7 +19,7 @@ use hit_leiden::{run, validate, GraphInput, RunConfig};
 #[test]
 fn run_and_validate_contract() {
     let graph = GraphInput {
-        dataset_id: "d1".to_string(),
+        dataset_id: Some("d1".to_string()),
         node_count: 3,
         edges: vec![(0, 1, None), (1, 2, None)],
     };
diff --git a/tests/integration/test_connected_graph_not_all_singletons.rs b/tests/integration/test_connected_graph_not_all_singletons.rs
index ce44c2d..307a35e 100644
--- a/tests/integration/test_connected_graph_not_all_singletons.rs
+++ b/tests/integration/test_connected_graph_not_all_singletons.rs
@@ -21,7 +21,7 @@ use hit_leiden::{run, GraphInput, RunConfig};
 #[test]
 fn connected_graph_not_all_singletons() {
     let graph = GraphInput {
-        dataset_id: "connected-1".to_string(),
+        dataset_id: Some("connected-1".to_string()),
         node_count: 6,
         edges: vec![
             (0, 1, Some(1.0)),
diff --git a/tests/integration/test_default_config_minimal_args.rs b/tests/integration/test_default_config_minimal_args.rs
index e5cacfe..e0ed165 100644
--- a/tests/integration/test_default_config_minimal_args.rs
+++ b/tests/integration/test_default_config_minimal_args.rs
@@ -14,15 +14,15 @@
 //
 // SPDX-License-Identifier: Apache-2.0
 
-use hit_leiden::{cli::run::run_default, GraphInput};
+use hit_leiden::{run, GraphInput, RunConfig};
 
 #[test]
 fn default_run_with_minimal_required_graph_source() {
     let graph = GraphInput {
-        dataset_id: "min".into(),
+        dataset_id: Some("min".to_string()),
         node_count: 1,
         edges: vec![],
     };
-    let out = run_default(&graph).expect("default run should succeed");
+    let out = run(&graph, &RunConfig::default()).expect("default run should succeed");
     assert_eq!(out.partition.unwrap().node_to_community.len(), 1);
 }
diff --git a/tests/integration/test_deterministic_identity.rs b/tests/integration/test_deterministic_identity.rs
index aa329ec..ee08ba4 100644
--- a/tests/integration/test_deterministic_identity.rs
+++ b/tests/integration/test_deterministic_identity.rs
@@ -19,7 +19,7 @@ use hit_leiden::{run, GraphInput, RunConfig};
 #[test]
 fn deterministic_replay_identity() {
     let graph = GraphInput {
-        dataset_id: "d2".to_string(),
+        dataset_id: Some("d2".to_string()),
         node_count: 4,
         edges: vec![(0, 1, None), (2, 3, None)],
     };
diff --git a/tests/integration/test_neo4j_snapshot_parity.rs b/tests/integration/test_neo4j_snapshot_parity.rs
index b920bb8..e61c134 100644
--- a/tests/integration/test_neo4j_snapshot_parity.rs
+++ b/tests/integration/test_neo4j_snapshot_parity.rs
@@ -29,5 +29,5 @@ fn neo4j_projection_parity_shape() {
         batched: true,
     };
     let graph = project_from_neo4j(&source, &proj).expect("projection");
-    assert!(graph.dataset_id.starts_with("neo4j:"));
+    assert!(graph.dataset_id.as_deref().unwrap_or("").starts_with("neo4j:"));
 }
diff --git a/tests/integration/test_throughput_equivalence.rs b/tests/integration/test_throughput_equivalence.rs
index c395473..edf2071 100644
--- a/tests/integration/test_throughput_equivalence.rs
+++ b/tests/integration/test_throughput_equivalence.rs
@@ -19,7 +19,7 @@ use hit_leiden::{core::config::RunMode, run, validate, GraphInput, RunConfig};
 #[test]
 fn throughput_equivalence_bounds() {
     let graph = GraphInput {
-        dataset_id: "d3".to_string(),
+        dataset_id: Some("d3".to_string()),
         node_count: 2,
         edges: vec![(0, 1, Some(1.0))],
     };
diff --git a/tests/property/test_partition_invariants.rs b/tests/property/test_partition_invariants.rs
index 6e01506..0e187af 100644
--- a/tests/property/test_partition_invariants.rs
+++ b/tests/property/test_partition_invariants.rs
@@ -21,7 +21,7 @@ proptest! {
     #[test]
     fn partition_len_matches_nodes(node_count in 0usize..50) {
         let graph = GraphInput {
-            dataset_id: "p1".to_string(),
+            dataset_id: Some("p1".to_string()),
             node_count,
             edges: vec![],
         };

From 3c727beaa579860cc7414d7a24fbb8ad17cf1990 Mon Sep 17 00:00:00 2001
From: Naadir Jeewa <naadir@randomvariable.co.uk>
Date: Sun, 8 Mar 2026 16:37:01 +0000
Subject: [PATCH 4/6] feat: add CommunityState for incremental updates with
 hierarchy

CommunityState wraps PartitionState to provide a persistent,
serializable community detection state that supports incremental
graph updates via delta edge lists.

Key features:
- initial() runs full Leiden and captures hierarchy levels
- update() applies delta edges incrementally, updating level 0
- hierarchy_levels() exposes the multi-level community structure
- community_tree() derives parent-child relationships via majority vote
- into_parts()/from_parts() for decomposed storage and restoration

Hierarchy support enables DRIFT search (top-down traversal of community
tree for progressive query refinement).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/core/community_state.rs | 379 ++++++++++++++++++++++++++++++++++++
 1 file changed, 379 insertions(+)
 create mode 100644 src/core/community_state.rs

diff --git a/src/core/community_state.rs b/src/core/community_state.rs
new file mode 100644
index 0000000..6cbe446
--- /dev/null
+++ b/src/core/community_state.rs
@@ -0,0 +1,379 @@
+// Copyright 2026 naadir jeewa
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+use crate::core::algorithm::hit_leiden::{
+    canonicalize_community_ids_in_place, compute_modularity,
+};
+use crate::core::config::RunConfig;
+use crate::core::error::HitLeidenError;
+use crate::core::partition::state::PartitionState;
+use crate::core::types::{GraphInput, PartitionResult};
+use ahash::{HashMap, HashMapExt, HashSet, HashSetExt};
+
+/// Persistent community detection state that can be serialized, stored,
+/// and later restored to apply incremental updates.
+///
+/// # Usage
+///
+/// ```no_run
+/// use hit_leiden::{CommunityState, GraphInput, RunConfig};
+///
+/// // Initial run
+/// let edges = GraphInput::from_unweighted_edges(vec![(0, 1), (1, 2), (2, 0)]);
+/// let (state, partition) = CommunityState::initial(&edges, &RunConfig::default())?;
+///
+/// // Persist state (requires `serde` feature)
+/// // let bytes = serde_json::to_vec(&state)?;
+///
+/// // Later: restore and apply delta
+/// // let state: CommunityState = serde_json::from_slice(&bytes)?;
+/// let delta = GraphInput::from_edges(vec![(2, 3, Some(1.0)), (3, 4, Some(1.0))]);
+/// let (updated_state, updated_partition) = state.update(&delta, &RunConfig::default())?;
+/// # Ok::<(), hit_leiden::HitLeidenError>(())
+/// ```
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+#[derive(Clone, Debug)]
+pub struct CommunityState {
+    /// Internal partition state (community mappings, supergraphs, hierarchy).
+    partition_state: PartitionState,
+    /// The current full graph as an edge list, needed to compute modularity
+    /// and to reconstruct `GraphInput` for the algorithm.
+    current_edges: Vec<(usize, usize, Option<f64>)>,
+    /// Current node count.
+    node_count: usize,
+    /// Hierarchy levels from community detection. Level 0 = finest (most
+    /// communities), last = coarsest (fewest). Each entry maps node index
+    /// to community ID at that level. Stored so incremental updates can
+    /// return meaningful hierarchy data without full recompute.
+    hierarchy_levels: Vec<Vec<usize>>,
+}
+
+/// A parent-child relationship between communities at successive hierarchy levels.
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct CommunityRelation {
+    /// Community ID at the finer (child) level.
+    pub child_community: usize,
+    /// The finer level index.
+    pub child_level: usize,
+    /// Community ID at the coarser (parent) level.
+    pub parent_community: usize,
+    /// The coarser level index.
+    pub parent_level: usize,
+}
+
+impl CommunityState {
+    /// Run initial community detection on a graph and return the persistent
+    /// state alongside the partition result.
+    pub fn initial(
+        graph: &GraphInput,
+        config: &RunConfig,
+    ) -> Result<(Self, PartitionResult), HitLeidenError> {
+        config
+            .validate()
+            .map_err(|e| HitLeidenError::InvalidInput(e.to_string()))?;
+        graph.validate()?;
+
+        let mut partition_state = PartitionState::identity(graph.node_count);
+
+        let (iteration_count, hierarchy_levels) =
+            crate::core::algorithm::hit_leiden::multilevel_leiden(
+                &mut partition_state,
+                graph,
+                config.resolution,
+                config.mode,
+                config.max_iterations,
+            );
+
+        canonicalize_community_ids_in_place(&mut partition_state.node_to_comm);
+
+        let community_count = count_communities(&partition_state.node_to_comm);
+        let quality_score = compute_modularity(graph, &partition_state.node_to_comm);
+
+        let partition = PartitionResult {
+            run_id: String::new(),
+            node_to_community: partition_state.node_to_comm.clone(),
+            hierarchy_levels,
+            community_count,
+            quality_score,
+            iteration_count,
+        };
+
+        let state = Self {
+            partition_state,
+            current_edges: graph.edges.clone(),
+            node_count: graph.node_count,
+            hierarchy_levels: partition.hierarchy_levels.clone(),
+        };
+
+        Ok((state, partition))
+    }
+
+    /// Apply a delta graph to update the community structure incrementally.
+    ///
+    /// Delta edges use weight to encode changes:
+    /// - Positive weight (or `None` = 1.0): add or strengthen an edge
+    /// - Negative weight: remove or weaken an edge
+    ///
+    /// Consumes `self` and returns the updated state + new partition.
+    pub fn update(
+        mut self,
+        delta: &GraphInput,
+        config: &RunConfig,
+    ) -> Result<(Self, PartitionResult), HitLeidenError> {
+        config
+            .validate()
+            .map_err(|e| HitLeidenError::InvalidInput(e.to_string()))?;
+
+        // Expand node count if delta introduces new nodes
+        let new_node_count = self.node_count.max(delta.node_count);
+        if new_node_count > self.node_count {
+            self.expand_to(new_node_count);
+        }
+
+        // Run incremental HIT-Leiden
+        let iteration_count = crate::core::algorithm::hit_leiden::hit_leiden(
+            &mut self.partition_state,
+            delta,
+            config.resolution,
+            config.mode,
+        );
+
+        canonicalize_community_ids_in_place(&mut self.partition_state.node_to_comm);
+
+        // Update stored edges by applying the delta
+        self.apply_edge_delta(delta);
+
+        // Update the finest hierarchy level (level 0) with the new partition.
+        // Coarser levels are approximately stable for small incremental changes.
+        if !self.hierarchy_levels.is_empty() {
+            self.hierarchy_levels[0] = self.partition_state.node_to_comm.clone();
+            // Extend hierarchy levels if node count grew
+            for level in &mut self.hierarchy_levels {
+                if level.len() < self.node_count {
+                    let old_len = level.len();
+                    level.resize(self.node_count, 0);
+                    for i in old_len..self.node_count {
+                        level[i] = i;
+                    }
+                }
+            }
+        } else {
+            self.hierarchy_levels = vec![self.partition_state.node_to_comm.clone()];
+        }
+
+        let full_graph = GraphInput {
+            dataset_id: None,
+            node_count: self.node_count,
+            edges: self.current_edges.clone(),
+        };
+
+        let community_count = count_communities(&self.partition_state.node_to_comm);
+        let quality_score =
+            compute_modularity(&full_graph, &self.partition_state.node_to_comm);
+
+        let partition = PartitionResult {
+            run_id: String::new(),
+            node_to_community: self.partition_state.node_to_comm.clone(),
+            hierarchy_levels: self.hierarchy_levels.clone(),
+            community_count,
+            quality_score,
+            iteration_count,
+        };
+
+        Ok((self, partition))
+    }
+
+    /// Get the current node count.
+    pub fn node_count(&self) -> usize {
+        self.node_count
+    }
+
+    /// Get the current edge count.
+    pub fn edge_count(&self) -> usize {
+        self.current_edges.len()
+    }
+
+    /// Decompose the state into its constituent parts for storage.
+    ///
+    /// Returns `(partition_state, edges, node_count, hierarchy_levels)`.
+    /// The caller is responsible for storing the `PartitionState` (the only
+    /// unique data); edges and node count can typically be derived from an
+    /// external source (e.g. a database edge table).
+    pub fn into_parts(
+        self,
+    ) -> (
+        PartitionState,
+        Vec<(usize, usize, Option<f64>)>,
+        usize,
+        Vec<Vec<usize>>,
+    ) {
+        (
+            self.partition_state,
+            self.current_edges,
+            self.node_count,
+            self.hierarchy_levels,
+        )
+    }
+
+    /// Reconstruct a `CommunityState` from previously decomposed parts.
+    ///
+    /// Use this when restoring from persisted storage where the
+    /// `PartitionState` was stored separately from the graph edges.
+    pub fn from_parts(
+        partition_state: PartitionState,
+        edges: Vec<(usize, usize, Option<f64>)>,
+        node_count: usize,
+        hierarchy_levels: Vec<Vec<usize>>,
+    ) -> Self {
+        Self {
+            partition_state,
+            current_edges: edges,
+            node_count,
+            hierarchy_levels,
+        }
+    }
+
+    /// Borrow the internal partition state (for serialization without consuming).
+    pub fn partition_state(&self) -> &PartitionState {
+        &self.partition_state
+    }
+
+    /// Get the stored hierarchy levels.
+    ///
+    /// Level 0 = finest (most communities), last = coarsest (fewest).
+    /// Each entry maps node index → community ID at that level.
+    pub fn hierarchy_levels(&self) -> &[Vec<usize>] {
+        &self.hierarchy_levels
+    }
+
+    /// Derive parent-child relationships between communities at successive
+    /// hierarchy levels.
+    ///
+    /// For each community at level L, determines its parent at level L+1 by
+    /// majority vote: the parent is whichever community at L+1 contains the
+    /// most nodes from the child community at L.
+    pub fn community_tree(&self) -> Vec<CommunityRelation> {
+        let mut relations = Vec::new();
+        for level in 0..self.hierarchy_levels.len().saturating_sub(1) {
+            let child_assignments = &self.hierarchy_levels[level];
+            let parent_assignments = &self.hierarchy_levels[level + 1];
+
+            // Group nodes by their child community, count parent community votes
+            let mut child_to_parent_votes: HashMap<usize, HashMap<usize, usize>> =
+                HashMap::new();
+            let len = child_assignments.len().min(parent_assignments.len());
+            for i in 0..len {
+                let child_comm = child_assignments[i];
+                let parent_comm = parent_assignments[i];
+                *child_to_parent_votes
+                    .entry(child_comm)
+                    .or_insert_with(HashMap::new)
+                    .entry(parent_comm)
+                    .or_insert(0) += 1;
+            }
+
+            // For each child community, pick the parent with the most votes
+            for (child_comm, votes) in &child_to_parent_votes {
+                if let Some((&parent_comm, _)) = votes.iter().max_by_key(|(_, &count)| count) {
+                    relations.push(CommunityRelation {
+                        child_community: *child_comm,
+                        child_level: level,
+                        parent_community: parent_comm,
+                        parent_level: level + 1,
+                    });
+                }
+            }
+        }
+        relations
+    }
+
+    /// Expand state vectors to accommodate new nodes.
+    fn expand_to(&mut self, new_count: usize) {
+        let old = self.node_count;
+        self.node_count = new_count;
+
+        // Extend partition state vectors
+        self.partition_state.node_to_comm.resize(new_count, 0);
+        for i in old..new_count {
+            self.partition_state.node_to_comm[i] = i;
+        }
+        self.partition_state.comm_weights.resize(new_count, 0.0);
+        self.partition_state.node_weights.resize(new_count, 0.0);
+
+        for mapping in &mut self.partition_state.community_mapping_per_level {
+            let prev_len = mapping.len();
+            mapping.resize(new_count, 0);
+            for i in prev_len..new_count {
+                mapping[i] = i;
+            }
+        }
+        for mapping in &mut self.partition_state.refined_community_mapping_per_level {
+            let prev_len = mapping.len();
+            mapping.resize(new_count, 0);
+            for i in prev_len..new_count {
+                mapping[i] = i;
+            }
+        }
+        for mapping in &mut self.partition_state.previous_subcommunity_mapping_per_level {
+            let prev_len = mapping.len();
+            mapping.resize(new_count, 0);
+            for i in prev_len..new_count {
+                mapping[i] = i;
+            }
+        }
+        for mapping in &mut self.partition_state.current_subcommunity_mapping_per_level {
+            let prev_len = mapping.len();
+            mapping.resize(new_count, 0);
+            for i in prev_len..new_count {
+                mapping[i] = i;
+            }
+        }
+    }
+
+    /// Apply delta edges to the stored edge list.
+    fn apply_edge_delta(&mut self, delta: &GraphInput) {
+        // Build a map of canonical edges -> weight
+        let mut edge_map: HashMap<(usize, usize), f64> =
+            HashMap::with_capacity(self.current_edges.len());
+        for &(u, v, w) in &self.current_edges {
+            let key = if u <= v { (u, v) } else { (v, u) };
+            *edge_map.entry(key).or_insert(0.0) += w.unwrap_or(1.0);
+        }
+
+        // Apply delta
+        for &(u, v, w) in &delta.edges {
+            let key = if u <= v { (u, v) } else { (v, u) };
+            *edge_map.entry(key).or_insert(0.0) += w.unwrap_or(1.0);
+        }
+
+        // Rebuild edge list, filtering out zero/negative weight edges
+        self.current_edges = edge_map
+            .into_iter()
+            .filter(|(_, w)| *w > 1e-12)
+            .map(|((u, v), w)| (u, v, Some(w)))
+            .collect();
+    }
+}
+
+/// Count unique community IDs.
+fn count_communities(node_to_comm: &[usize]) -> usize {
+    let mut seen = HashSet::new();
+    for &c in node_to_comm {
+        seen.insert(c);
+    }
+    seen.len()
+}

From 51d8072f793108faf7b111a66a8cec6cdba06039 Mon Sep 17 00:00:00 2001
From: Naadir Jeewa <naadir@randomvariable.co.uk>
Date: Sun, 8 Mar 2026 16:37:07 +0000
Subject: [PATCH 5/6] test: add integration tests for CommunityState hierarchy
 features

Tests cover:
- hierarchy_levels populated after initial()
- hierarchy_levels preserved and extended after update()
- community_tree() returns valid parent-child relations
- into_parts/from_parts roundtrip preserves hierarchy
- incremental update works after restore from parts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../test_incremental_community_state.rs       | 249 ++++++++++++++++++
 tests/integration_tests.rs                    |   2 +
 2 files changed, 251 insertions(+)
 create mode 100644 tests/integration/test_incremental_community_state.rs

diff --git a/tests/integration/test_incremental_community_state.rs b/tests/integration/test_incremental_community_state.rs
new file mode 100644
index 0000000..5592f7b
--- /dev/null
+++ b/tests/integration/test_incremental_community_state.rs
@@ -0,0 +1,249 @@
+// Copyright 2026 naadir jeewa
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+use hit_leiden::{CommunityState, GraphInput, RunConfig};
+
+#[test]
+fn initial_then_incremental_update() {
+    // Two triangles connected by a weak link
+    let graph = GraphInput::from_edges(vec![
+        (0, 1, Some(1.0)),
+        (1, 2, Some(1.0)),
+        (2, 0, Some(1.0)),
+        (3, 4, Some(1.0)),
+        (4, 5, Some(1.0)),
+        (5, 3, Some(1.0)),
+    ]);
+
+    let config = RunConfig::default();
+    let (state, partition) = CommunityState::initial(&graph, &config).expect("initial run");
+
+    assert_eq!(partition.node_to_community.len(), 6);
+    assert!(partition.community_count >= 1);
+    assert_eq!(state.node_count(), 6);
+    assert_eq!(state.edge_count(), 6);
+
+    // Add a bridge between the two triangles and a new node
+    let delta = GraphInput::from_edges(vec![
+        (2, 3, Some(1.0)),
+        (5, 6, Some(1.0)),
+        (6, 7, Some(1.0)),
+        (7, 5, Some(1.0)),
+    ]);
+
+    let (updated_state, updated_partition) =
+        state.update(&delta, &config).expect("incremental update");
+
+    // Should now have 8 nodes
+    assert_eq!(updated_partition.node_to_community.len(), 8);
+    assert_eq!(updated_state.node_count(), 8);
+    // Original 6 edges + 4 new = 10 edges (canonical, deduplicated)
+    assert!(updated_state.edge_count() >= 9);
+}
+
+#[test]
+fn edge_removal_via_negative_weight() {
+    let graph = GraphInput::from_edges(vec![
+        (0, 1, Some(1.0)),
+        (1, 2, Some(1.0)),
+        (2, 0, Some(1.0)),
+    ]);
+
+    let config = RunConfig::default();
+    let (state, _) = CommunityState::initial(&graph, &config).expect("initial run");
+    assert_eq!(state.edge_count(), 3);
+
+    // Remove edge (0,1) by adding negative weight
+    let delta = GraphInput {
+        dataset_id: None,
+        node_count: 3,
+        edges: vec![(0, 1, Some(-1.0))],
+    };
+
+    let (updated_state, updated_partition) =
+        state.update(&delta, &config).expect("update with removal");
+
+    assert_eq!(updated_partition.node_to_community.len(), 3);
+    // One edge removed
+    assert_eq!(updated_state.edge_count(), 2);
+}
+
+#[test]
+fn hierarchy_levels_populated_after_initial() {
+    let graph = GraphInput::from_unweighted_edges(vec![
+        (0, 1),
+        (1, 2),
+        (2, 0),
+        (3, 4),
+        (4, 5),
+        (5, 3),
+    ]);
+
+    let (state, partition) =
+        CommunityState::initial(&graph, &RunConfig::default()).expect("initial run");
+
+    let levels = state.hierarchy_levels();
+    assert!(!levels.is_empty(), "hierarchy_levels should be non-empty after initial()");
+
+    // Level 0 (finest) should have one entry per node
+    assert_eq!(levels[0].len(), 6);
+
+    // PartitionResult should also carry the same hierarchy
+    assert_eq!(partition.hierarchy_levels.len(), levels.len());
+    assert_eq!(partition.hierarchy_levels[0], levels[0]);
+}
+
+#[test]
+fn hierarchy_levels_preserved_after_update() {
+    let graph = GraphInput::from_unweighted_edges(vec![
+        (0, 1),
+        (1, 2),
+        (2, 0),
+        (3, 4),
+        (4, 5),
+        (5, 3),
+    ]);
+
+    let config = RunConfig::default();
+    let (state, _) = CommunityState::initial(&graph, &config).expect("initial run");
+    let initial_level_count = state.hierarchy_levels().len();
+
+    // Add a bridge and new nodes
+    let delta = GraphInput::from_edges(vec![
+        (2, 3, Some(1.0)),
+        (5, 6, Some(1.0)),
+        (6, 7, Some(1.0)),
+    ]);
+
+    let (updated_state, updated_partition) =
+        state.update(&delta, &config).expect("incremental update");
+
+    let levels = updated_state.hierarchy_levels();
+    assert!(!levels.is_empty(), "hierarchy_levels should be non-empty after update()");
+    // Level count should be at least what we had initially
+    assert!(
+        levels.len() >= initial_level_count,
+        "should not lose hierarchy levels after update"
+    );
+
+    // Level 0 should cover all 8 nodes
+    assert_eq!(levels[0].len(), 8);
+    // All levels should cover all 8 nodes
+    for (i, level) in levels.iter().enumerate() {
+        assert_eq!(
+            level.len(),
+            8,
+            "level {} should have entries for all 8 nodes",
+            i
+        );
+    }
+
+    // PartitionResult should match
+    assert_eq!(updated_partition.hierarchy_levels.len(), levels.len());
+}
+
+#[test]
+fn community_tree_returns_valid_relations() {
+    // Use a larger graph to get multiple hierarchy levels
+    let graph = GraphInput::from_unweighted_edges(vec![
+        (0, 1),
+        (1, 2),
+        (2, 0),
+        (3, 4),
+        (4, 5),
+        (5, 3),
+        (6, 7),
+        (7, 8),
+        (8, 6),
+    ]);
+
+    let (state, _) =
+        CommunityState::initial(&graph, &RunConfig::default()).expect("initial run");
+
+    let tree = state.community_tree();
+    let levels = state.hierarchy_levels();
+
+    if levels.len() > 1 {
+        // If we have multiple levels, we should have some relations
+        assert!(
+            !tree.is_empty(),
+            "community_tree() should return relations when multiple levels exist"
+        );
+
+        for rel in &tree {
+            // Parent level should be exactly one above child level
+            assert_eq!(
+                rel.parent_level,
+                rel.child_level + 1,
+                "parent_level should be child_level + 1"
+            );
+            // Levels should be within bounds
+            assert!(
+                rel.child_level < levels.len(),
+                "child_level out of bounds"
+            );
+            assert!(
+                rel.parent_level < levels.len(),
+                "parent_level out of bounds"
+            );
+        }
+    }
+    // If only 1 level, tree should be empty (no parent-child to derive)
+    if levels.len() <= 1 {
+        assert!(
+            tree.is_empty(),
+            "community_tree() should be empty with only one level"
+        );
+    }
+}
+
+#[test]
+fn into_parts_from_parts_roundtrip_preserves_hierarchy() {
+    let graph = GraphInput::from_unweighted_edges(vec![
+        (0, 1),
+        (1, 2),
+        (2, 0),
+        (3, 4),
+        (4, 5),
+        (5, 3),
+    ]);
+
+    let config = RunConfig::default();
+    let (state, _) = CommunityState::initial(&graph, &config).expect("initial run");
+
+    let original_levels = state.hierarchy_levels().to_vec();
+    let original_node_count = state.node_count();
+    let original_edge_count = state.edge_count();
+
+    // Decompose
+    let (partition_state, edges, node_count, hierarchy_levels) = state.into_parts();
+
+    assert_eq!(node_count, original_node_count);
+    assert_eq!(hierarchy_levels, original_levels);
+
+    // Reconstruct
+    let restored = CommunityState::from_parts(partition_state, edges, node_count, hierarchy_levels);
+
+    assert_eq!(restored.node_count(), original_node_count);
+    assert_eq!(restored.edge_count(), original_edge_count);
+    assert_eq!(restored.hierarchy_levels(), &original_levels[..]);
+
+    // Restored state should still support incremental updates
+    let delta = GraphInput::from_edges(vec![(2, 3, Some(1.0))]);
+    let (updated, partition) = restored.update(&delta, &config).expect("update after restore");
+    assert_eq!(partition.node_to_community.len(), 6);
+    assert!(!updated.hierarchy_levels().is_empty());
+}
diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs
index f5fd9d8..ea744be 100644
--- a/tests/integration_tests.rs
+++ b/tests/integration_tests.rs
@@ -28,3 +28,5 @@ mod test_neo4j_snapshot_parity;
 mod test_release_gate_live_query_ineligible;
 #[path = "integration/test_throughput_equivalence.rs"]
 mod test_throughput_equivalence;
+#[path = "integration/test_incremental_community_state.rs"]
+mod test_incremental_community_state;

From f3fa1aa25eecc896bf19cbaadf57247ca6ff4aa9 Mon Sep 17 00:00:00 2001
From: Naadir Jeewa <naadir@randomvariable.co.uk>
Date: Sun, 8 Mar 2026 16:37:13 +0000
Subject: [PATCH 6/6] docs: add library usage examples and hierarchy
 documentation to README

Add Quick Start sections covering:
- Simple edges-in/partition-out usage with run_simple()
- Configured runs with RunConfig builders
- Weighted edges and dataset identifiers
- Incremental updates with CommunityState
- Hierarchy access for DRIFT search
- CLI usage

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 README.md | 119 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 116 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 6ed40d5..0490630 100644
--- a/README.md
+++ b/README.md
@@ -30,12 +30,125 @@ downstream summarisation to keep pace with document ingestion.
 
 ## Quick Start
 
+### Library usage
+
+Add to your `Cargo.toml`:
+
+```toml
+[dependencies]
+hit_leiden = { path = "path/to/hit-leiden/usability" }
+# Optional: enable serde support without pulling in profiling deps
+# hit_leiden = { path = "...", features = ["serde"] }
+```
+
+Simplest possible usage — edges in, partition out:
+
+```rust
+use hit_leiden::PartitionResult;
+
+let edges = vec![(0, 1, None), (1, 2, None), (2, 0, None), (3, 4, None), (4, 5, None), (5, 3, None)];
+let partition: PartitionResult = hit_leiden::run_simple(edges)?;
+
+for (node, &community) in partition.node_to_community.iter().enumerate() {
+    println!("node {} -> community {}", node, community);
+}
+```
+
+With configuration:
+
+```rust
+use hit_leiden::{GraphInput, RunConfig, RunMode};
+
+let graph = GraphInput::from_unweighted_edges(vec![(0, 1), (1, 2), (2, 0)]);
+let config = RunConfig::default()
+    .with_mode(RunMode::Deterministic)
+    .with_resolution(1.0)
+    .with_max_iterations(10);
+
+let outcome = hit_leiden::run(&graph, &config)?;
+let partition = outcome.into_partition()?;
+println!("{} communities found (Q={:.4})", partition.community_count, partition.quality_score);
+```
+
+With weighted edges and a dataset identifier:
+
+```rust
+use hit_leiden::GraphInput;
+
+let graph = GraphInput {
+    dataset_id: Some("my-knowledge-graph".to_string()),
+    node_count: 4,
+    edges: vec![(0, 1, Some(1.0)), (1, 2, Some(0.5)), (2, 3, Some(1.0))],
+};
+
+let partition = hit_leiden::run_simple(graph.edges.clone())?;
+```
+
+Incremental updates — persist state, restore it later, and apply a delta:
+
+```rust
+use hit_leiden::{CommunityState, GraphInput, RunConfig};
+
+// Initial community detection
+let edges = GraphInput::from_unweighted_edges(vec![
+    (0, 1), (1, 2), (2, 0), (3, 4), (4, 5), (5, 3),
+]);
+let (state, partition) = CommunityState::initial(&edges, &RunConfig::default())?;
+
+// Persist state (enable the `serde` feature)
+// let bytes = serde_json::to_vec(&state)?;
+
+// Later: restore and apply a delta graph
+// let state: CommunityState = serde_json::from_slice(&bytes)?;
+let delta = GraphInput::from_edges(vec![
+    (2, 3, Some(1.0)),  // bridge the two triangles
+    (5, 6, Some(1.0)),  // add new nodes
+]);
+let (updated_state, updated_partition) = state.update(&delta, &RunConfig::default())?;
+println!("{} communities", updated_partition.community_count);
+```
+
+Hierarchy access — traverse the community tree for [DRIFT search](https://microsoft.github.io/graphrag/query/drift_search/):
+
+```rust
+use hit_leiden::{CommunityState, GraphInput, RunConfig};
+
+let edges = GraphInput::from_unweighted_edges(vec![
+    (0, 1), (1, 2), (2, 0), (3, 4), (4, 5), (5, 3),
+]);
+let (state, partition) = CommunityState::initial(&edges, &RunConfig::default())?;
+
+// hierarchy_levels: level 0 = finest, last = coarsest
+// Each entry maps node index → community ID at that level
+let levels = state.hierarchy_levels();
+println!("{} hierarchy levels", levels.len());
+
+// Derive parent-child relationships between communities at successive levels
+let tree = state.community_tree();
+for rel in &tree {
+    println!(
+        "community {} (level {}) -> parent {} (level {})",
+        rel.child_community, rel.child_level,
+        rel.parent_community, rel.parent_level,
+    );
+}
+
+// Decomposed storage: persist only the unique algorithm state
+let (partition_state, edges_out, node_count, hierarchy) = state.into_parts();
+// Store partition_state via serde; edges and hierarchy can be derived from DB
+
+// Restore later
+let restored = CommunityState::from_parts(partition_state, edges_out, node_count, hierarchy);
+```
+
+### CLI usage
+
 ```sh
-# Build
-cargo build --release
+# Build (CLI requires the `cli` feature)
+cargo build --release --features cli
 
 # Run on an edge list (one "src dst [weight]" per line)
-cargo run --release -- run --source file --path graph.txt
+cargo run --release --features cli -- run --graph-source graph.txt
 
 # Run benchmarks
 cargo bench