randomvariable · randomvariable · Mar 8, 2026 · Mar 8, 2026 · Mar 8, 2026 · Mar 8, 2026
diff --git a/Cargo.toml b/Cargo.toml
@@ -21,9 +21,7 @@ edition = "2021"
 license = "MIT OR Apache-2.0"
 
 [dependencies]
-clap = { version = "4.5", features = [
-  "derive",
-] }
+clap = { version = "4.5", features = ["derive"], optional = true }
 ahash = "0.8"
 rayon = "1.10"
 smallvec = "1.13"
@@ -52,13 +50,13 @@ anyhow    = "1.0.102"
 lender    = "0.6.2"
 
 [features]
-default = [
-]
+default = []
+cli = ["dep:clap"]
+serde = ["dep:serde", "dep:serde_json"]
 profiling = [
+  "serde",
   "webgraph",
   "lender",
-  "serde",
-  "serde_json",
   "plotly",
   "inferno",
   "prost",
@@ -71,6 +69,7 @@ profiling = [
 [[bin]]
 name = "hit-leiden"
 path = "src/main.rs"
+required-features = ["cli"]
 
 [[bin]]
 name = "profile_run"

diff --git a/README.md b/README.md
@@ -30,12 +30,125 @@ downstream summarisation to keep pace with document ingestion.
 
 ## Quick Start
 
+### Library usage
+
+Add to your `Cargo.toml`:
+
+```toml
+[dependencies]
+hit_leiden = { path = "path/to/hit-leiden/usability" }
+# Optional: enable serde support without pulling in profiling deps
+# hit_leiden = { path = "...", features = ["serde"] }
+```
+
+Simplest possible usage — edges in, partition out:
+
+```rust
+use hit_leiden::PartitionResult;
+
+let edges = vec![(0, 1, None), (1, 2, None), (2, 0, None), (3, 4, None), (4, 5, None), (5, 3, None)];
+let partition: PartitionResult = hit_leiden::run_simple(edges)?;
+
+for (node, &community) in partition.node_to_community.iter().enumerate() {
+    println!("node {} -> community {}", node, community);
+}
+```
+
+With configuration:
+
+```rust
+use hit_leiden::{GraphInput, RunConfig, RunMode};
+
+let graph = GraphInput::from_unweighted_edges(vec![(0, 1), (1, 2), (2, 0)]);
+let config = RunConfig::default()
+    .with_mode(RunMode::Deterministic)
+    .with_resolution(1.0)
+    .with_max_iterations(10);
+
+let outcome = hit_leiden::run(&graph, &config)?;
+let partition = outcome.into_partition()?;
+println!("{} communities found (Q={:.4})", partition.community_count, partition.quality_score);
+```
+
+With weighted edges and a dataset identifier:
+
+```rust
+use hit_leiden::GraphInput;
+
+let graph = GraphInput {
+    dataset_id: Some("my-knowledge-graph".to_string()),
+    node_count: 4,
+    edges: vec![(0, 1, Some(1.0)), (1, 2, Some(0.5)), (2, 3, Some(1.0))],
+};
+
+let partition = hit_leiden::run_simple(graph.edges.clone())?;
+```
+
+Incremental updates — persist state, restore it later, and apply a delta:
+
+```rust
+use hit_leiden::{CommunityState, GraphInput, RunConfig};
+
+// Initial community detection
+let edges = GraphInput::from_unweighted_edges(vec![
+    (0, 1), (1, 2), (2, 0), (3, 4), (4, 5), (5, 3),
+]);
+let (state, partition) = CommunityState::initial(&edges, &RunConfig::default())?;
+
+// Persist state (enable the `serde` feature)
+// let bytes = serde_json::to_vec(&state)?;
+
+// Later: restore and apply a delta graph
+// let state: CommunityState = serde_json::from_slice(&bytes)?;
+let delta = GraphInput::from_edges(vec![
+    (2, 3, Some(1.0)),  // bridge the two triangles
+    (5, 6, Some(1.0)),  // add new nodes
+]);
+let (updated_state, updated_partition) = state.update(&delta, &RunConfig::default())?;
+println!("{} communities", updated_partition.community_count);
+```
+
+Hierarchy access — traverse the community tree for [DRIFT search](https://microsoft.github.io/graphrag/query/drift_search/):
+
+```rust
+use hit_leiden::{CommunityState, GraphInput, RunConfig};
+
+let edges = GraphInput::from_unweighted_edges(vec![
+    (0, 1), (1, 2), (2, 0), (3, 4), (4, 5), (5, 3),
+]);
+let (state, partition) = CommunityState::initial(&edges, &RunConfig::default())?;
+
+// hierarchy_levels: level 0 = finest, last = coarsest
+// Each entry maps node index → community ID at that level
+let levels = state.hierarchy_levels();
+println!("{} hierarchy levels", levels.len());
+
+// Derive parent-child relationships between communities at successive levels
+let tree = state.community_tree();
+for rel in &tree {
+    println!(
+        "community {} (level {}) -> parent {} (level {})",
+        rel.child_community, rel.child_level,
+        rel.parent_community, rel.parent_level,
+    );
+}
+
+// Decomposed storage: persist only the unique algorithm state
+let (partition_state, edges_out, node_count, hierarchy) = state.into_parts();
+// Store partition_state via serde; edges and hierarchy can be derived from DB
+
+// Restore later
+let restored = CommunityState::from_parts(partition_state, edges_out, node_count, hierarchy);
+```
+
+### CLI usage
+
 ```sh
-# Build
-cargo build --release
+# Build (CLI requires the `cli` feature)
+cargo build --release --features cli
 
 # Run on an edge list (one "src dst [weight]" per line)
-cargo run --release -- run --source file --path graph.txt
+cargo run --release --features cli -- run --graph-source graph.txt
 
 # Run benchmarks
 cargo bench

diff --git a/benchmarks/criterion/hit_leiden_suite.rs b/benchmarks/criterion/hit_leiden_suite.rs
@@ -53,7 +53,7 @@ fn load_uk_2007() -> GraphInput {
     );
 
     GraphInput {
-        dataset_id: "uk-2007-05@100000".to_string(),
+        dataset_id: Some("uk-2007-05@100000".to_string()),
         node_count: num_nodes,
         edges,
     }

diff --git a/src/benchmark/dynamic_graph.rs b/src/benchmark/dynamic_graph.rs
@@ -59,7 +59,7 @@ impl DynamicGraphBuilder {
             cumulative_edges.extend_from_slice(chunk);
 
             batches.push(GraphInput {
-                dataset_id: format!("batch_{}", idx),
+                dataset_id: Some(format!("batch_{}", idx)),
                 node_count: self.node_count,
                 edges: cumulative_edges
                     .iter()
@@ -106,7 +106,7 @@ impl DynamicGraphBuilder {
             cumulative_edges.extend_from_slice(&shuffled[start..end]);
 
             update_batches.push(GraphInput {
-                dataset_id: format!("paper_batch_{}", round),
+                dataset_id: Some(format!("paper_batch_{}", round)),
                 node_count: self.node_count,
                 edges: cumulative_edges
                     .iter()
@@ -117,7 +117,7 @@ impl DynamicGraphBuilder {
 
         IncrementalSplit {
             initial_graph: GraphInput {
-                dataset_id: "paper_initial".to_string(),
+                dataset_id: Some("paper_initial".to_string()),
                 node_count: self.node_count,
                 edges: initial_edges
                     .iter()
@@ -138,7 +138,7 @@ mod tests {
     #[test]
     fn paper_split_uses_initial_ratio_and_fixed_rounds() {
         let graph = GraphInput {
-            dataset_id: "test".to_string(),
+            dataset_id: Some("test".to_string()),
             node_count: 100,
             edges: (0..100).map(|i| (i, (i + 1) % 100, None::<f64>)).collect(),
         };

diff --git a/src/benchmark/hit_leiden_incremental.rs b/src/benchmark/hit_leiden_incremental.rs
@@ -225,7 +225,7 @@ pub fn run_incremental_with_config(
                 let _ = ig_disk_cache::save(
                     cache_dir,
                     &ig_disk_cache::CachedIgraphLeiden {
-                        dataset_id: batch_graph.dataset_id.clone(),
+                        dataset_id: batch_graph.dataset_id.clone().unwrap_or_default(),
                         node_count: batch_graph.node_count,
                         edge_count: batch_graph.edges.len(),
                         time_ms: ig.time_ms,
@@ -296,7 +296,7 @@ fn load_cached_igraph_result(
     }
     let cache_dir = config.cache_dir.as_ref()?;
     let first_batch = batches.first()?;
-    let cached = ig_disk_cache::load(cache_dir, &first_batch.dataset_id, first_batch.edges.len())?;
+    let cached = ig_disk_cache::load(cache_dir, first_batch.dataset_id.as_deref().unwrap_or(""), first_batch.edges.len())?;
     Some(IgResult {
         time_ms: cached.time_ms,
         modularity: cached.modularity,

diff --git a/src/benchmark/runner.rs b/src/benchmark/runner.rs
@@ -119,7 +119,7 @@ pub mod benchmark_runner {
 
         let benchmark_run = BenchmarkRun {
             timestamp: timestamp.clone(),
-            dataset_id: config.graph.dataset_id.clone(),
+            dataset_id: config.graph.dataset_id.clone().unwrap_or_default(),
             timeout_seconds: config.timeout_seconds,
             truncated: outcome.truncated,
             batches: outcome.batches,

diff --git a/src/bin/profile_incremental.rs b/src/bin/profile_incremental.rs
@@ -58,7 +58,7 @@ fn load_graph() -> GraphInput {
         edges.len()
     );
     GraphInput {
-        dataset_id: "uk-2007-05@100000".to_string(),
+        dataset_id: Some("uk-2007-05@100000".to_string()),
         node_count: num_nodes,
         edges,
     }

diff --git a/src/bin/profile_run.rs b/src/bin/profile_run.rs
@@ -54,7 +54,7 @@ fn load_graph() -> GraphInput {
         edges.len()
     );
     GraphInput {
-        dataset_id: "uk-2007-05@100000".to_string(),
+        dataset_id: Some("uk-2007-05@100000".to_string()),
         node_count: num_nodes,
         edges,
     }

diff --git a/src/cli/run.rs b/src/cli/run.rs
@@ -30,13 +30,12 @@ pub fn run_from_cli(
 
     let config = RunConfig {
         mode,
-        graph_source: GraphSource::File, // Assuming file for now
+        graph_source: Some(GraphSource::File),
         acceleration: AccelerationTarget::PureRust,
         quality_tolerance: 0.001,
         max_iterations: 10,
         pinned_profile: None,
         resolution: 1.0,
-        refinement_gamma: 0.05,
     };
 
     crate::run(graph, &config)