Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 6 additions & 7 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,7 @@ edition = "2021"
license = "MIT OR Apache-2.0"

[dependencies]
clap = { version = "4.5", features = [
"derive",
] }
clap = { version = "4.5", features = ["derive"], optional = true }
ahash = "0.8"
rayon = "1.10"
smallvec = "1.13"
Expand Down Expand Up @@ -52,13 +50,13 @@ anyhow = "1.0.102"
lender = "0.6.2"

[features]
default = [
]
default = []
cli = ["dep:clap"]
serde = ["dep:serde", "dep:serde_json"]
profiling = [
"serde",
"webgraph",
"lender",
"serde",
"serde_json",
"plotly",
"inferno",
"prost",
Expand All @@ -71,6 +69,7 @@ profiling = [
[[bin]]
name = "hit-leiden"
path = "src/main.rs"
required-features = ["cli"]

[[bin]]
name = "profile_run"
Expand Down
119 changes: 116 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,125 @@ downstream summarisation to keep pace with document ingestion.

## Quick Start

### Library usage

Add to your `Cargo.toml`:

```toml
[dependencies]
hit_leiden = { path = "path/to/hit-leiden/usability" }
# Optional: enable serde support without pulling in profiling deps
# hit_leiden = { path = "...", features = ["serde"] }
```

Simplest possible usage — edges in, partition out:

```rust
use hit_leiden::PartitionResult;

let edges = vec![(0, 1, None), (1, 2, None), (2, 0, None), (3, 4, None), (4, 5, None), (5, 3, None)];
let partition: PartitionResult = hit_leiden::run_simple(edges)?;

for (node, &community) in partition.node_to_community.iter().enumerate() {
println!("node {} -> community {}", node, community);
}
```

With configuration:

```rust
use hit_leiden::{GraphInput, RunConfig, RunMode};

let graph = GraphInput::from_unweighted_edges(vec![(0, 1), (1, 2), (2, 0)]);
let config = RunConfig::default()
.with_mode(RunMode::Deterministic)
.with_resolution(1.0)
.with_max_iterations(10);

let outcome = hit_leiden::run(&graph, &config)?;
let partition = outcome.into_partition()?;
println!("{} communities found (Q={:.4})", partition.community_count, partition.quality_score);
```

With weighted edges and a dataset identifier:

```rust
use hit_leiden::GraphInput;

let graph = GraphInput {
dataset_id: Some("my-knowledge-graph".to_string()),
node_count: 4,
edges: vec![(0, 1, Some(1.0)), (1, 2, Some(0.5)), (2, 3, Some(1.0))],
};

let partition = hit_leiden::run_simple(graph.edges.clone())?;
```

Incremental updates — persist state, restore it later, and apply a delta:

```rust
use hit_leiden::{CommunityState, GraphInput, RunConfig};

// Initial community detection
let edges = GraphInput::from_unweighted_edges(vec![
(0, 1), (1, 2), (2, 0), (3, 4), (4, 5), (5, 3),
]);
let (state, partition) = CommunityState::initial(&edges, &RunConfig::default())?;

// Persist state (enable the `serde` feature)
// let bytes = serde_json::to_vec(&state)?;

// Later: restore and apply a delta graph
// let state: CommunityState = serde_json::from_slice(&bytes)?;
let delta = GraphInput::from_edges(vec![
(2, 3, Some(1.0)), // bridge the two triangles
(5, 6, Some(1.0)), // add new nodes
]);
let (updated_state, updated_partition) = state.update(&delta, &RunConfig::default())?;
println!("{} communities", updated_partition.community_count);
```

Hierarchy access — traverse the community tree for [DRIFT search](https://microsoft.github.io/graphrag/query/drift_search/):

```rust
use hit_leiden::{CommunityState, GraphInput, RunConfig};

let edges = GraphInput::from_unweighted_edges(vec![
(0, 1), (1, 2), (2, 0), (3, 4), (4, 5), (5, 3),
]);
let (state, partition) = CommunityState::initial(&edges, &RunConfig::default())?;

// hierarchy_levels: level 0 = finest, last = coarsest
// Each entry maps node index → community ID at that level
let levels = state.hierarchy_levels();
println!("{} hierarchy levels", levels.len());

// Derive parent-child relationships between communities at successive levels
let tree = state.community_tree();
for rel in &tree {
println!(
"community {} (level {}) -> parent {} (level {})",
rel.child_community, rel.child_level,
rel.parent_community, rel.parent_level,
);
}

// Decomposed storage: persist only the unique algorithm state
let (partition_state, edges_out, node_count, hierarchy) = state.into_parts();
// Store partition_state via serde; edges and hierarchy can be derived from DB

// Restore later
let restored = CommunityState::from_parts(partition_state, edges_out, node_count, hierarchy);
```

### CLI usage

```sh
# Build
cargo build --release
# Build (CLI requires the `cli` feature)
cargo build --release --features cli

# Run on an edge list (one "src dst [weight]" per line)
cargo run --release -- run --source file --path graph.txt
cargo run --release --features cli -- run --graph-source graph.txt

# Run benchmarks
cargo bench
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/criterion/hit_leiden_suite.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ fn load_uk_2007() -> GraphInput {
);

GraphInput {
dataset_id: "uk-2007-05@100000".to_string(),
dataset_id: Some("uk-2007-05@100000".to_string()),
node_count: num_nodes,
edges,
}
Expand Down
8 changes: 4 additions & 4 deletions src/benchmark/dynamic_graph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ impl DynamicGraphBuilder {
cumulative_edges.extend_from_slice(chunk);

batches.push(GraphInput {
dataset_id: format!("batch_{}", idx),
dataset_id: Some(format!("batch_{}", idx)),
node_count: self.node_count,
edges: cumulative_edges
.iter()
Expand Down Expand Up @@ -106,7 +106,7 @@ impl DynamicGraphBuilder {
cumulative_edges.extend_from_slice(&shuffled[start..end]);

update_batches.push(GraphInput {
dataset_id: format!("paper_batch_{}", round),
dataset_id: Some(format!("paper_batch_{}", round)),
node_count: self.node_count,
edges: cumulative_edges
.iter()
Expand All @@ -117,7 +117,7 @@ impl DynamicGraphBuilder {

IncrementalSplit {
initial_graph: GraphInput {
dataset_id: "paper_initial".to_string(),
dataset_id: Some("paper_initial".to_string()),
node_count: self.node_count,
edges: initial_edges
.iter()
Expand All @@ -138,7 +138,7 @@ mod tests {
#[test]
fn paper_split_uses_initial_ratio_and_fixed_rounds() {
let graph = GraphInput {
dataset_id: "test".to_string(),
dataset_id: Some("test".to_string()),
node_count: 100,
edges: (0..100).map(|i| (i, (i + 1) % 100, None::<f64>)).collect(),
};
Expand Down
4 changes: 2 additions & 2 deletions src/benchmark/hit_leiden_incremental.rs
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ pub fn run_incremental_with_config(
let _ = ig_disk_cache::save(
cache_dir,
&ig_disk_cache::CachedIgraphLeiden {
dataset_id: batch_graph.dataset_id.clone(),
dataset_id: batch_graph.dataset_id.clone().unwrap_or_default(),
node_count: batch_graph.node_count,
edge_count: batch_graph.edges.len(),
time_ms: ig.time_ms,
Expand Down Expand Up @@ -296,7 +296,7 @@ fn load_cached_igraph_result(
}
let cache_dir = config.cache_dir.as_ref()?;
let first_batch = batches.first()?;
let cached = ig_disk_cache::load(cache_dir, &first_batch.dataset_id, first_batch.edges.len())?;
let cached = ig_disk_cache::load(cache_dir, first_batch.dataset_id.as_deref().unwrap_or(""), first_batch.edges.len())?;
Some(IgResult {
time_ms: cached.time_ms,
modularity: cached.modularity,
Expand Down
2 changes: 1 addition & 1 deletion src/benchmark/runner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ pub mod benchmark_runner {

let benchmark_run = BenchmarkRun {
timestamp: timestamp.clone(),
dataset_id: config.graph.dataset_id.clone(),
dataset_id: config.graph.dataset_id.clone().unwrap_or_default(),
timeout_seconds: config.timeout_seconds,
truncated: outcome.truncated,
batches: outcome.batches,
Expand Down
2 changes: 1 addition & 1 deletion src/bin/profile_incremental.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ fn load_graph() -> GraphInput {
edges.len()
);
GraphInput {
dataset_id: "uk-2007-05@100000".to_string(),
dataset_id: Some("uk-2007-05@100000".to_string()),
node_count: num_nodes,
edges,
}
Expand Down
2 changes: 1 addition & 1 deletion src/bin/profile_run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ fn load_graph() -> GraphInput {
edges.len()
);
GraphInput {
dataset_id: "uk-2007-05@100000".to_string(),
dataset_id: Some("uk-2007-05@100000".to_string()),
node_count: num_nodes,
edges,
}
Expand Down
3 changes: 1 addition & 2 deletions src/cli/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,12 @@ pub fn run_from_cli(

let config = RunConfig {
mode,
graph_source: GraphSource::File, // Assuming file for now
graph_source: Some(GraphSource::File),
acceleration: AccelerationTarget::PureRust,
quality_tolerance: 0.001,
max_iterations: 10,
pinned_profile: None,
resolution: 1.0,
refinement_gamma: 0.05,
};

crate::run(graph, &config)
Expand Down
Loading