From 7d1966c3787ba7d75eb80f73c1a18efb7d451250 Mon Sep 17 00:00:00 2001
From: sama <sama@node0.ljn.hrsama-pg0.utah.cloudlab.us>
Date: Thu, 23 Oct 2025 03:38:23 -0600
Subject: [PATCH 1/6] feat:support eigenvector centrality algorithm

---
 cpp_easygraph/CMakeLists.txt                  |  16 +-
 cpp_easygraph/cpp_easygraph.cpp               |   1 +
 .../functions/centrality/centrality.h         |   7 +
 .../functions/centrality/eigenvector.cpp      | 416 ++++++++++++++++++
 easygraph/functions/centrality/__init__.py    |   3 +-
 easygraph/functions/centrality/eigenvector.py | 155 +++++++
 .../centrality/tests/test_eigenvector.py      | 119 +++++
 7 files changed, 715 insertions(+), 2 deletions(-)
 create mode 100644 cpp_easygraph/functions/centrality/eigenvector.cpp
 create mode 100644 easygraph/functions/centrality/eigenvector.py
 create mode 100644 easygraph/functions/centrality/tests/test_eigenvector.py
diff --git a/cpp_easygraph/CMakeLists.txt b/cpp_easygraph/CMakeLists.txt
index 2a43c776..f272b808 100644
--- a/cpp_easygraph/CMakeLists.txt
+++ b/cpp_easygraph/CMakeLists.txt
@@ -41,4 +41,18 @@ endif()
 set_target_properties(cpp_easygraph  PROPERTIES
     LINK_SEARCH_START_STATIC ON
     LINK_SEARCH_END_STATIC ON
-)
\ No newline at end of file
+)
+
+find_package(Eigen3 QUIET)
+if(Eigen3_FOUND)
+    message(STATUS "Found Eigen3: ${EIGEN3_INCLUDE_DIR}")
+    include_directories(${EIGEN3_INCLUDE_DIR})
+    add_definitions(-DEIGEN_MAJOR_VERSION=${EIGEN3_VERSION_MAJOR})
+endif()
+
+# 启用OpenMP支持
+find_package(OpenMP)
+if(OpenMP_CXX_FOUND)
+    message(STATUS "Found OpenMP: ${OpenMP_CXX_VERSION}")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
+endif()
\ No newline at end of file
diff --git a/cpp_easygraph/cpp_easygraph.cpp b/cpp_easygraph/cpp_easygraph.cpp
index f5570d6e..487c0b1b 100644
--- a/cpp_easygraph/cpp_easygraph.cpp
+++ b/cpp_easygraph/cpp_easygraph.cpp
@@ -83,6 +83,7 @@ PYBIND11_MODULE(cpp_easygraph, m) {
     m.def("cpp_constraint", &constraint, py::arg("G"), py::arg("nodes") = py::none(), py::arg("weight") = py::none(), py::arg("n_workers") = py::none());
     m.def("cpp_effective_size", &effective_size, py::arg("G"), py::arg("nodes") = py::none(), py::arg("weight") = py::none(), py::arg("n_workers") = py::none());
     m.def("cpp_efficiency", &efficiency, py::arg("G"), py::arg("nodes") = py::none(), py::arg("weight") = py::none(), py::arg("n_workers") = py::none());
+    m.def("cpp_eigenvector_centrality", &cpp_eigenvector_centrality, py::arg("G"), py::arg("max_iter") = 100, py::arg("tol") = 1.0e-6, py::arg("nstart") = py::none(), py::arg("weight") = "weight");
     m.def("cpp_hierarchy", &hierarchy, py::arg("G"), py::arg("nodes") = py::none(), py::arg("weight") = py::none(), py::arg("n_workers") = py::none());
     m.def("cpp_pagerank", &_pagerank, py::arg("G"), py::arg("alpha") = 0.85, py::arg("max_iterator") = 500, py::arg("threshold") = 1e-6);
     m.def("cpp_dijkstra_multisource", &_dijkstra_multisource, py::arg("G"), py::arg("sources"), py::arg("weight") = "weight", py::arg("target") = py::none());    
diff --git a/cpp_easygraph/functions/centrality/centrality.h b/cpp_easygraph/functions/centrality/centrality.h
index 7040618a..121c9eb0 100644
--- a/cpp_easygraph/functions/centrality/centrality.h
+++ b/cpp_easygraph/functions/centrality/centrality.h
@@ -12,4 +12,11 @@ py::object cpp_katz_centrality(
     py::object py_max_iter,
     py::object py_tol,
     py::object py_normalized
+);
+py::object cpp_eigenvector_centrality(
+    py::object G,
+    py::object py_max_iter,
+    py::object py_tol,
+    py::object py_nstart,
+    py::object py_weight
 );
\ No newline at end of file
diff --git a/cpp_easygraph/functions/centrality/eigenvector.cpp b/cpp_easygraph/functions/centrality/eigenvector.cpp
new file mode 100644
index 00000000..aa434621
--- /dev/null
+++ b/cpp_easygraph/functions/centrality/eigenvector.cpp
@@ -0,0 +1,416 @@
+#include <cmath>
+#include <vector>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+#include <pybind11/numpy.h>
+#include <unordered_map>
+#include <algorithm>
+#include <tuple>
+#include "centrality.h"
+#include "../../classes/graph.h"
+
+#ifdef EIGEN_MAJOR_VERSION
+#include <Eigen/Sparse>
+#include <Eigen/Eigenvalues>
+#if EIGEN_WORLD_VERSION == 3 && EIGEN_MAJOR_VERSION >= 3
+#define HAVE_EIGEN
+#endif
+#endif
+
+namespace py = pybind11;
+
+class CSRMatrix {
+public:
+    std::vector<int> indptr;
+    std::vector<int> indices;
+    std::vector<double> data;
+    int rows, cols;
+    
+    CSRMatrix(int r, int c) : rows(r), cols(c) {
+        indptr.resize(r + 1, 0);
+    }
+    
+    void reserve(size_t nnz) {
+        indices.reserve(nnz);
+        data.reserve(nnz);
+    }
+    
+    std::vector<double> multiply(const std::vector<double>& vec) const {
+        std::vector<double> result(rows, 0.0);
+        
+        #pragma omp parallel for if(rows > 1000)
+        for (int i = 0; i < rows; i++) {
+            for (int j = indptr[i]; j < indptr[i + 1]; j++) {
+                result[i] += data[j] * vec[indices[j]];
+            }
+        }
+        
+        return result;
+    }
+
+    void multiply_inplace(const std::vector<double>& vec, std::vector<double>& result) const {
+        std::fill(result.begin(), result.end(), 0.0);
+        
+        #pragma omp parallel for if(rows > 1000)
+        for (int i = 0; i < rows; i++) {
+            for (int j = indptr[i]; j < indptr[i + 1]; j++) {
+                result[i] += data[j] * vec[indices[j]];
+            }
+        }
+    }
+
+#ifdef HAVE_EIGEN
+    Eigen::SparseMatrix<double> to_eigen() const {
+        Eigen::SparseMatrix<double> eigen_mat(rows, cols);
+        eigen_mat.reserve(Eigen::VectorXi::Constant(cols, 5));
+        
+        for (int i = 0; i < rows; i++) {
+            for (int j = indptr[i]; j < indptr[i + 1]; j++) {
+                eigen_mat.insert(i, indices[j]) = data[j];
+            }
+        }
+        eigen_mat.makeCompressed();
+        return eigen_mat;
+    }
+#endif
+};
+
+CSRMatrix build_transpose_matrix(
+    const Graph& graph,
+    const std::vector<node_t>& nodes,
+    const std::string& weight_key
+) {
+    int n = nodes.size();
+    std::unordered_map<node_t, int> node_to_idx;
+    for (size_t i = 0; i < nodes.size(); i++) {
+        node_to_idx[nodes[i]] = i;
+    }
+    
+    size_t nnz = 0;
+    for (node_t node_id : nodes) {
+        const auto& adj_it = graph.adj.find(node_id);
+        if (adj_it != graph.adj.end()) {
+            nnz += adj_it->second.size();
+        }
+    }
+    
+    std::vector<std::tuple<int, int, double>> edges;
+    edges.reserve(nnz);
+    
+    for (node_t src_id : nodes) {
+        int src_idx = node_to_idx[src_id];
+        const auto& adj_it = graph.adj.find(src_id);
+        if (adj_it != graph.adj.end()) {
+            for (const auto& adj_pair : adj_it->second) {
+                node_t dst_id = adj_pair.first;
+                auto dst_it = node_to_idx.find(dst_id);
+                if (dst_it != node_to_idx.end()) {
+                    int dst_idx = dst_it->second;
+                    
+                    double w = 1.0;
+                    if (!weight_key.empty()) {
+                        auto w_it = adj_pair.second.find(weight_key);
+                        if (w_it != adj_pair.second.end()) {
+                            w = w_it->second;
+                        }
+                    }
+                    
+                    edges.push_back(std::make_tuple(dst_idx, src_idx, w));
+                }
+            }
+        }
+    }
+    
+    std::sort(edges.begin(), edges.end(), 
+              [](const std::tuple<int, int, double>& a, const std::tuple<int, int, double>& b) {
+                  if (std::get<0>(a) != std::get<0>(b)) {
+                      return std::get<0>(a) < std::get<0>(b);
+                  }
+                  return std::get<1>(a) < std::get<1>(b);
+              });
+    
+    CSRMatrix matrix(n, n);
+    matrix.reserve(edges.size());
+    
+    for (const auto& edge : edges) {
+        int row = std::get<0>(edge);
+        matrix.indptr[row + 1]++;
+    }
+    
+    for (int i = 0; i < n; i++) {
+        matrix.indptr[i + 1] += matrix.indptr[i];
+    }
+    
+    for (const auto& edge : edges) {
+        int row = std::get<0>(edge);
+        int col = std::get<1>(edge);
+        double val = std::get<2>(edge);
+        
+        matrix.indices.push_back(col);
+        matrix.data.push_back(val);
+    }
+    
+    std::vector<bool> has_connections(n, false);
+    
+    for (size_t i = 0; i < n; i++) {
+        if (matrix.indptr[i + 1] > matrix.indptr[i]) {
+            has_connections[i] = true;
+        }
+    }
+    
+    for (int i = 0; i < n; i++) {
+        if (!has_connections[i]) {
+            matrix.indices.push_back(i);
+            matrix.data.push_back(1.0e-4);
+            
+            for (int j = i + 1; j <= n; j++) {
+                matrix.indptr[j]++;
+            }
+        }
+    }
+    return matrix;
+}
+
+std::vector<double> power_iteration(
+    const CSRMatrix& A,
+    int max_iter,
+    double tol,
+    std::vector<double>& x
+) {
+    int n = A.rows;
+    std::vector<double> x_next(n, 0.0);
+    
+    for (int iter = 0; iter < max_iter; iter++) {
+        A.multiply_inplace(x, x_next);
+        
+        double norm = 0.0;
+        #pragma omp parallel for reduction(+:norm) if(n > 10000)
+        for (int i = 0; i < n; i++) {
+            norm += x_next[i] * x_next[i];
+        }
+        norm = std::sqrt(norm);
+        
+        if (norm < 1e-12) {
+            return x;
+        }
+        
+        #pragma omp parallel for if(n > 10000)
+        for (int i = 0; i < n; i++) {
+            x_next[i] /= norm;
+        }
+        
+        double diff = 0.0;
+        #pragma omp parallel for reduction(+:diff) if(n > 10000)
+        for (int i = 0; i < n; i++) {
+            diff += std::fabs(x_next[i] - x[i]);
+        }
+        
+        if (diff < n * tol) {
+            std::swap(x, x_next);
+            break;
+        }
+        
+        std::swap(x, x_next);
+    }
+    
+    return x;
+}
+
+#ifdef HAVE_EIGEN
+std::vector<double> compute_eigenvector_eigen(
+    const CSRMatrix& A, 
+    int max_iter,
+    double tol
+) {
+    Eigen::SparseMatrix<double> eigen_matrix = A.to_eigen();
+    
+    int n = A.rows;
+    std::vector<double> result(n);
+    
+    try {
+        Eigen::EigenSolver<Eigen::MatrixXd> solver;
+        
+        if (n < 1000) {
+            Eigen::MatrixXd dense_matrix(eigen_matrix);
+            solver.compute(dense_matrix);
+            
+            int max_idx = 0;
+            double max_val = solver.eigenvalues()[0].real();
+            for (int i = 1; i < n; i++) {
+                if (solver.eigenvalues()[i].real() > max_val) {
+                    max_val = solver.eigenvalues()[i].real();
+                    max_idx = i;
+                }
+            }
+            
+            Eigen::VectorXd eigen_vec = solver.eigenvectors().col(max_idx).real();
+            for (int i = 0; i < n; i++) {
+                result[i] = eigen_vec(i);
+            }
+        } else {
+            throw std::runtime_error("Matrix too large for dense solver");
+        }
+    }
+    catch (const std::exception& e) {
+        std::vector<double> x(n, 1.0/n);
+        return power_iteration(A, max_iter, tol, x);
+    }
+    
+    double sum = 0.0;
+    for (double val : result) {
+        sum += val;
+    }
+    
+    if (sum < 0.0) {
+        for (double& val : result) {
+            val = -val;
+        }
+    }
+    
+    double norm = 0.0;
+    for (double val : result) {
+        norm += val * val;
+    }
+    norm = std::sqrt(norm);
+    
+    for (double& val : result) {
+        val /= norm;
+    }
+    
+    return result;
+}
+#endif
+
+py::object cpp_eigenvector_centrality(
+    py::object G,
+    py::object py_max_iter,
+    py::object py_tol,
+    py::object py_nstart,
+    py::object py_weight
+) {
+    try {
+        Graph& graph = G.cast<Graph&>();
+        int max_iter = py_max_iter.cast<int>();
+        double tol = py_tol.cast<double>();
+        std::string weight_key = "";
+        if (!py_weight.is_none()) {
+            weight_key = py_weight.cast<std::string>();
+        }
+
+        if (graph.node.size() == 0) {
+            return py::dict();
+        }
+
+        std::vector<node_t> nodes;
+        for (auto& node_pair : graph.node) {
+            nodes.push_back(node_pair.first);
+        }
+        int n = nodes.size();
+        
+        CSRMatrix A_transpose = build_transpose_matrix(graph, nodes, weight_key);
+        
+        std::vector<bool> isolated_nodes(n, true);
+        for (int i = 0; i < n; i++) {
+            // A node is isolated if it has zero edges (no entries in its row)
+            if (A_transpose.indptr[i + 1] == A_transpose.indptr[i]) {
+                isolated_nodes[i] = true;
+            } else {
+                isolated_nodes[i] = false;
+            }
+        }
+
+        std::vector<double> centrality;
+        
+        if (py_nstart.is_none()) {
+            bool fast_solver_success = false;
+            
+#ifdef HAVE_EIGEN
+            try {
+                centrality = compute_eigenvector_eigen(A_transpose, max_iter, tol);
+                fast_solver_success = true;
+            } catch (const std::exception&) {
+                fast_solver_success = false;
+            }
+#endif
+            
+            if (!fast_solver_success) {
+                std::vector<double> x(n, 1.0/n);
+                centrality = power_iteration(A_transpose, max_iter, tol, x);
+            }
+        } else {
+            py::dict nstart = py_nstart.cast<py::dict>();
+            std::vector<double> x(n, 0.0);
+            
+            for (size_t i = 0; i < nodes.size(); i++) {
+                py::object node_obj = graph.id_to_node[py::cast(nodes[i])];
+                if (nstart.contains(node_obj)) {
+                    x[i] = nstart[node_obj].cast<double>();
+                } else {
+                    x[i] = 1.0;
+                }
+            }
+            
+            bool all_zeros = true;
+            for (double val : x) {
+                if (std::abs(val) > 1e-10) {
+                    all_zeros = false;
+                    break;
+                }
+            }
+            
+            if (all_zeros) {
+                throw std::runtime_error("initial vector cannot have all zero values");
+            }
+            
+            double sum_abs = 0.0;
+            for (double val : x) {
+                sum_abs += std::fabs(val);
+            }
+            
+            for (double& val : x) {
+                val /= sum_abs;
+            }
+            
+            centrality = power_iteration(A_transpose, max_iter, tol, x);
+        }
+        
+        double sum = 0.0;
+        for (double val : centrality) {
+            sum += val;
+        }
+        
+        if (sum < 0.0) {
+            for (double& val : centrality) {
+                val = -val;
+            }
+        }
+        
+        for (int i = 0; i < n; i++) {
+            if (isolated_nodes[i]) {
+                centrality[i] = 0.0;
+            }
+        }
+
+        double norm = 0.0;
+        for (double val : centrality) {
+            norm += val * val;
+        }
+        norm = std::sqrt(norm);
+        
+        if (norm > 0.0) {
+            for (double& val : centrality) {
+                val /= norm;
+            }
+        }
+        
+        py::dict result;
+        for (size_t i = 0; i < nodes.size(); i++) {
+            py::object node_obj = graph.id_to_node[py::cast(nodes[i])];
+            result[node_obj] = centrality[i];
+        }
+        
+        return result;
+    } catch (const std::exception& e) {
+        throw std::runtime_error(e.what());
+    }
+}
\ No newline at end of file
diff --git a/easygraph/functions/centrality/__init__.py b/easygraph/functions/centrality/__init__.py
index f37e55fe..bd2fec50 100644
--- a/easygraph/functions/centrality/__init__.py
+++ b/easygraph/functions/centrality/__init__.py
@@ -5,4 +5,5 @@
 from .flowbetweenness import *
 from .laplacian import *
 from .pagerank import *
-from .katz_centrality import *
\ No newline at end of file
+from .katz_centrality import *
+from .eigenvector import *
\ No newline at end of file
diff --git a/easygraph/functions/centrality/eigenvector.py b/easygraph/functions/centrality/eigenvector.py
new file mode 100644
index 00000000..2d7506d0
--- /dev/null
+++ b/easygraph/functions/centrality/eigenvector.py
@@ -0,0 +1,155 @@
+import math
+import easygraph as eg
+from easygraph.utils import *
+from easygraph.utils.decorators import *
+from scipy import sparse
+from scipy.sparse import linalg
+import numpy as np
+from collections import defaultdict
+
+__all__ = ["eigenvector_centrality"]
+
+@not_implemented_for("multigraph")
+@hybrid("cpp_eigenvector_centrality")
+def eigenvector_centrality(G, max_iter=100, tol=1.0e-6, nstart=None, weight=None):
+    """Calculate eigenvector centrality for nodes in the graph
+    
+    Eigenvector centrality is based on the idea that a node's importance
+    depends on the importance of its neighboring nodes.
+    Specifically, a node's centrality is proportional to the sum of 
+    centrality values of its neighbors.
+    
+    Parameters
+    ----------
+    G : graph object
+        An undirected or directed graph
+    
+    max_iter : int, optional (default=100)
+        Maximum number of iterations for the power method
+    
+    tol : float, optional (default=1.0e-6)
+        Convergence threshold; algorithm terminates when the difference
+        between centrality values in consecutive iterations is less than this value
+    
+    nstart : dictionary, optional (default=None)
+        Dictionary mapping nodes to initial centrality values
+        If None, the ARPACK solver is used to directly compute the eigenvector
+    
+    weight : string or None, optional (default=None)
+        Name of the edge attribute to be used as edge weight
+        If None, all edges are considered to have weight 1
+    
+    Returns
+    -------
+    centrality : dictionary
+        Dictionary mapping nodes to their eigenvector centrality values
+    
+    Raises
+    ------
+    EasyGraphPointlessConcept
+        When input is an empty graph
+    
+    EasyGraphError
+        When the algorithm fails to converge within the specified maximum iterations
+    
+    Notes
+    -----
+    This algorithm uses the power iteration method to find the principal eigenvector.
+    When nstart is not provided, the ARPACK solver is used for efficiency.
+    The returned centrality values are normalized.
+    """
+    
+    if len(G) == 0:
+        raise eg.EasyGraphPointlessConcept(
+            "cannot compute centrality for the null graph"
+        )
+
+    if len(G) == 1:
+        raise eg.EasyGraphPointlessConcept(
+            "cannot compute eigenvector centrality for a single node graph"
+        )
+    
+    
+    # Build node list and mapping
+    nodelist = list(G.nodes)
+    n = len(nodelist)
+    node_map = {node: i for i, node in enumerate(nodelist)}
+    
+    # Build weighted adjacency matrix
+    row, col, data = [], [], []
+    for u in nodelist:
+        u_idx = node_map[u]
+        for v, attrs in G[u].items():
+            if v in node_map:
+                v_idx = node_map[v]
+                w = attrs.get(weight, 1.0) if weight else 1.0
+                # Build transpose matrix for centrality calculation
+                row.append(v_idx)
+                col.append(u_idx)
+                data.append(float(w))
+    
+    # Create CSR format sparse matrix
+    A = sparse.csr_matrix((data, (row, col)), shape=(n, n))
+
+    # Detect and handle isolated nodes
+    row_sums = np.array(A.sum(axis=1)).flatten()
+    col_sums = np.array(A.sum(axis=0)).flatten()
+    isolated_nodes = np.where((row_sums == 0) & (col_sums == 0))[0]
+    
+    has_isolated = len(isolated_nodes) > 0
+    isolated_indices = []
+    
+    # Add small self-loops to isolated nodes for stability
+    if has_isolated:
+        # Store isolated node indices
+        isolated_indices = isolated_nodes.tolist()
+        
+        # Add small self-loop weights to isolated nodes
+        for idx in isolated_indices:
+            A[idx, idx] = 1.0e-4  # Small enough to not affect results, but maintains numerical stability
+    if nstart is not None:
+        # Use custom initial vector for power iteration
+        v = np.array([nstart.get(n, 1.0) for n in nodelist], dtype=float)
+        v = v / np.sum(np.abs(v))
+        
+        # Power iteration method to compute principal eigenvector
+        v_last = np.zeros_like(v)
+        for _ in range(max_iter):
+            np.copyto(v_last, v)
+            v = A @ v_last  # Sparse matrix multiplication
+            
+            norm = np.linalg.norm(v)
+            if norm < 1e-10:
+                v = v_last.copy()
+                break
+            v = v / norm  # Normalization
+            
+            # Check convergence
+            if np.linalg.norm(v - v_last) < tol:
+                break
+        else:
+            raise eg.EasyGraphError(f"Eigenvector calculation did not converge in {max_iter} iterations")
+        
+        centrality = v
+    else:
+        # Use ARPACK solver to directly compute the principal eigenvector
+        eigenvalues, eigenvectors = linalg.eigs(A, k=1, which='LR', 
+                                              maxiter=max_iter, tol=tol)
+        centrality = np.real(eigenvectors[:,0])
+    
+    # Ensure positive results and normalize
+    if centrality.sum() < 0:
+        centrality = -centrality
+    
+    centrality = centrality / np.linalg.norm(centrality)
+     # Set centrality of isolated nodes to zero
+    if has_isolated:
+        for idx in isolated_indices:
+            centrality[idx] = 0.0
+        # Renormalize if needed
+        if np.sum(centrality) > 0:
+            centrality = centrality / np.linalg.norm(centrality)
+    
+    # Return dictionary of node centrality values
+    return {nodelist[i]: float(centrality[i]) for i in range(n)}
+
diff --git a/easygraph/functions/centrality/tests/test_eigenvector.py b/easygraph/functions/centrality/tests/test_eigenvector.py
new file mode 100644
index 00000000..1617b857
--- /dev/null
+++ b/easygraph/functions/centrality/tests/test_eigenvector.py
@@ -0,0 +1,119 @@
+import unittest
+import numpy as np
+import easygraph as eg
+from easygraph.functions.centrality import eigenvector_centrality
+from easygraph.utils.exception import EasyGraphPointlessConcept, EasyGraphNotImplemented
+
+
+class Test_eigenvector(unittest.TestCase):
+    def setUp(self):
+        self.edges = [
+            (1, 4),
+            (2, 4),
+            ("String", "Bool"),
+            (4, 1),
+            (0, 4),
+            (4, 256),
+            ((None, None), (None, None)),
+        ]
+
+        self.test_graphs = []
+        self.test_graphs.append(eg.classes.DiGraph(self.edges))
+
+        self.simple_graph = eg.Graph()
+        self.simple_graph.add_edges_from([(0, 1), (1, 2), (2, 3)])
+
+        self.directed_graph = eg.DiGraph()
+        self.directed_graph.add_edges_from([(0, 1), (1, 2), (2, 3)])
+
+        self.weighted_graph = eg.Graph()
+        self.weighted_graph.add_edges_from([(0, 1), (1, 2), (2, 3)])
+
+        for u, v, data in self.weighted_graph.edges:
+            data["weight"] = 2.0
+
+        self.disconnected_graph = eg.Graph()
+        self.disconnected_graph.add_edges_from([(0, 1), (2, 3)])
+
+        self.isolated_node_graph = eg.Graph()
+        self.isolated_node_graph.add_edges_from([(0, 1), (1, 2)])
+        self.isolated_node_graph.add_node(3)  
+
+        self.single_node_graph = eg.Graph()
+        self.single_node_graph.add_node(42)
+
+        self.mixed_nodes_graph = eg.Graph()
+        self.mixed_nodes_graph.add_edges_from([(1, 2), ("X", "Y"), ((1, 2), (3, 4))])
+
+    def test_eigenvector(self):
+
+        for G in self.test_graphs:
+            result = eigenvector_centrality(G)
+            self.assertIsInstance(result, dict)
+            self.assertEqual(len(result), len(G))
+
+            for v in result.values():
+                self.assertIsInstance(v, float)
+
+    def test_simple_graph(self):
+        result = eigenvector_centrality(self.simple_graph)
+        self.assertEqual(len(result), len(self.simple_graph))
+
+        for v in result.values():
+            self.assertIsInstance(v, float)
+
+    def test_directed_graph(self):
+        result = eigenvector_centrality(self.directed_graph)
+        self.assertEqual(len(result), len(self.directed_graph))
+
+    def test_weighted_graph(self):
+        result = eigenvector_centrality(self.weighted_graph, weight="weight")
+        self.assertEqual(len(result), len(self.weighted_graph))
+
+    def test_disconnected_graph(self):
+
+        result = eigenvector_centrality(self.disconnected_graph)
+        self.assertEqual(len(result), len(self.disconnected_graph))
+
+    def test_isolated_nodes(self):
+
+        result = eigenvector_centrality(self.isolated_node_graph)
+        self.assertEqual(len(result), len(self.isolated_node_graph))
+        self.assertTrue(3 in result)
+        self.assertGreaterEqual(result[3], 0)
+
+    def test_single_node_graph(self):
+        with self.assertRaises(EasyGraphPointlessConcept):
+            eigenvector_centrality(self.single_node_graph)
+
+    def test_mixed_node_types(self):
+        result = eigenvector_centrality(self.mixed_nodes_graph)
+        self.assertEqual(len(result), len(self.mixed_nodes_graph))
+
+    def test_max_iter_parameter(self):
+        result = eigenvector_centrality(self.simple_graph, max_iter=50)
+        self.assertEqual(len(result), len(self.simple_graph))
+
+    def test_tol_parameter(self):
+        result = eigenvector_centrality(self.simple_graph, tol=1.0e-3)
+        self.assertEqual(len(result), len(self.simple_graph))
+
+    def test_nstart_parameter(self):
+        nstart = {node: 1.0 for node in self.simple_graph}
+        result = eigenvector_centrality(self.simple_graph, nstart=nstart)
+        self.assertEqual(len(result), len(self.simple_graph))
+
+    def test_multigraph_raises(self):
+        G = eg.MultiGraph()
+        G.add_edges_from([(0, 1), (0, 1)])
+        with self.assertRaises(EasyGraphNotImplemented):  
+            eigenvector_centrality(G)
+
+    def test_empty_graph_raises(self):
+        G = eg.Graph()
+        with self.assertRaises(EasyGraphPointlessConcept):
+            eigenvector_centrality(G)
+
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

From b5850d8b56b0fcd98ccd27e3e7d18f4995b2b199 Mon Sep 17 00:00:00 2001
From: Nianle <24302010062@m.fudan.edu.cn>
Date: Fri, 5 Dec 2025 04:40:18 -0700
Subject: [PATCH 2/6] Optimize Eigenvector Centrality

---
 cpp_easygraph/CMakeLists.txt                  |  37 +-
 .../functions/centrality/eigenvector.cpp      | 784 ++++++++++++------
 2 files changed, 561 insertions(+), 260 deletions(-)

diff --git a/cpp_easygraph/CMakeLists.txt b/cpp_easygraph/CMakeLists.txt
index f272b808..2e85607f 100644
--- a/cpp_easygraph/CMakeLists.txt
+++ b/cpp_easygraph/CMakeLists.txt
@@ -13,6 +13,25 @@ add_subdirectory(pybind11)
 
 option(EASYGRAPH_ENABLE_GPU "EASYGRAPH_ENABLE_GPU" OFF)
 
+# 启用OpenMP支持
+find_package(OpenMP)
+if(OpenMP_CXX_FOUND)
+    message(STATUS "Found OpenMP: ${OpenMP_CXX_VERSION}")
+    message(STATUS "OpenMP flags: ${OpenMP_CXX_FLAGS}")
+else()
+    message(WARNING "OpenMP not found. Parallel optimization will be disabled.")
+endif()
+
+# 查找ARPACK库（兼容无CMake配置的系统）
+find_library(ARPACK_LIB NAMES arpack arpack_double PATHS ENV LD_LIBRARY_PATH)
+if(NOT ARPACK_LIB)
+    message(FATAL_ERROR "ARPACK library not found")
+else()
+    message(STATUS "Found ARPACK library: ${ARPACK_LIB}")
+endif()
+
+find_package(LAPACK REQUIRED)
+
 if (EASYGRAPH_ENABLE_GPU)
 
     pybind11_add_module(cpp_easygraph
@@ -38,11 +57,6 @@ else()
 
 endif()
 
-set_target_properties(cpp_easygraph  PROPERTIES
-    LINK_SEARCH_START_STATIC ON
-    LINK_SEARCH_END_STATIC ON
-)
-
 find_package(Eigen3 QUIET)
 if(Eigen3_FOUND)
     message(STATUS "Found Eigen3: ${EIGEN3_INCLUDE_DIR}")
@@ -50,9 +64,12 @@ if(Eigen3_FOUND)
     add_definitions(-DEIGEN_MAJOR_VERSION=${EIGEN3_VERSION_MAJOR})
 endif()
 
-# 启用OpenMP支持
-find_package(OpenMP)
+# 将OpenMP链接到target
 if(OpenMP_CXX_FOUND)
-    message(STATUS "Found OpenMP: ${OpenMP_CXX_VERSION}")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
-endif()
\ No newline at end of file
+    target_link_libraries(cpp_easygraph PRIVATE OpenMP::OpenMP_CXX)
+    target_compile_options(cpp_easygraph PRIVATE ${OpenMP_CXX_FLAGS})
+    message(STATUS "OpenMP enabled for cpp_easygraph")
+endif()
+
+# 链接ARPACK和LAPACK
+target_link_libraries(cpp_easygraph PRIVATE ${ARPACK_LIB} LAPACK::LAPACK)
\ No newline at end of file
diff --git a/cpp_easygraph/functions/centrality/eigenvector.cpp b/cpp_easygraph/functions/centrality/eigenvector.cpp
index aa434621..6d9eb669 100644
--- a/cpp_easygraph/functions/centrality/eigenvector.cpp
+++ b/cpp_easygraph/functions/centrality/eigenvector.cpp
@@ -1,286 +1,482 @@
-#include <cmath>
 #include <vector>
+#include <cmath>
+#include <algorithm>
+#include <iostream>
+#include <chrono>
 #include <pybind11/pybind11.h>
 #include <pybind11/stl.h>
 #include <pybind11/numpy.h>
 #include <unordered_map>
-#include <algorithm>
 #include <tuple>
 #include "centrality.h"
 #include "../../classes/graph.h"
 
-#ifdef EIGEN_MAJOR_VERSION
+#ifdef HAVE_EIGEN
 #include <Eigen/Sparse>
 #include <Eigen/Eigenvalues>
-#if EIGEN_WORLD_VERSION == 3 && EIGEN_MAJOR_VERSION >= 3
-#define HAVE_EIGEN
-#endif
+    #ifdef HAVE_EIGEN_SPARSE_SOLVER
+    #include <Eigen/SparseCore>
+    #endif
 #endif
 
 namespace py = pybind11;
 
+extern "C" {
+    void dsaupd_(int*, char*, int*, char*, int*, double*, double*, int*, 
+                 double*, int*, int*, int*, double*, double*, int*, int*);
+    void dseupd_(int*, char*, int*, double*, double*, int*, double*, 
+                 char*, int*, char*, int*, double*, double*, int*, 
+                 double*, int*, int*, int*, double*, double*, int*, int*);
+    void dnaupd_(int*, char*, int*, char*, int*, double*, double*, int*, 
+                 double*, int*, int*, int*, double*, double*, int*, int*);
+    void dneupd_(int* rvec, char* howmny, int* select,
+                 double* dr, double* di, double* z, int* ldz,
+                 double* sigmar, double* sigmai, double* workev,
+                 char* bmat, int* n, char* which, int* nev,
+                 double* tol, double* resid, int* ncv, double* v, int* ldv,
+                 int* iparam, int* ipntr, double* workd, double* workl,
+                 int* lworkl, int* info);
+}
+
+// CSRMatrix 稀疏矩阵结构体
 class CSRMatrix {
 public:
     std::vector<int> indptr;
     std::vector<int> indices;
     std::vector<double> data;
-    int rows, cols;
-    
+    int rows;
+    int cols;
+
+    CSRMatrix() : rows(0), cols(0) {}
     CSRMatrix(int r, int c) : rows(r), cols(c) {
-        indptr.resize(r + 1, 0);
+        indptr.assign(r + 1, 0);
     }
-    
+
     void reserve(size_t nnz) {
         indices.reserve(nnz);
         data.reserve(nnz);
     }
-    
-    std::vector<double> multiply(const std::vector<double>& vec) const {
-        std::vector<double> result(rows, 0.0);
-        
-        #pragma omp parallel for if(rows > 1000)
-        for (int i = 0; i < rows; i++) {
-            for (int j = indptr[i]; j < indptr[i + 1]; j++) {
-                result[i] += data[j] * vec[indices[j]];
+
+    void multiply_inplace(const std::vector<double>& vec, std::vector<double>& result) const {
+        result.assign(rows, 0.0);
+        #pragma omp parallel for schedule(static,64) if(rows > 1000)
+        for (int i = 0; i < rows; ++i) {
+            double sum = 0.0;
+            for (int j = indptr[i]; j < indptr[i + 1]; ++j) {
+                sum += data[j] * vec[indices[j]];
             }
+            result[i] = sum;
         }
-        
-        return result;
     }
 
-    void multiply_inplace(const std::vector<double>& vec, std::vector<double>& result) const {
-        std::fill(result.begin(), result.end(), 0.0);
-        
-        #pragma omp parallel for if(rows > 1000)
-        for (int i = 0; i < rows; i++) {
-            for (int j = indptr[i]; j < indptr[i + 1]; j++) {
-                result[i] += data[j] * vec[indices[j]];
-            }
+    double estimate_norm() const {
+        double s = 0.0;
+        #pragma omp parallel for reduction(+:s) if(data.size() > 1000)
+        for (size_t i = 0; i < data.size(); ++i) s += data[i] * data[i];
+        return std::sqrt(s);
+    }
+
+    double rayleigh_quotient(const std::vector<double>& x, const std::vector<double>& Ax) const {
+        double num = 0.0, den = 0.0;
+        #pragma omp parallel for reduction(+:num,den) if(rows > 1000)
+        for (int i = 0; i < rows; ++i) {
+            num += Ax[i] * x[i];
+            den += x[i] * x[i];
         }
+        return den > 0.0 ? num / den : 0.0;
     }
 
 #ifdef HAVE_EIGEN
     Eigen::SparseMatrix<double> to_eigen() const {
-        Eigen::SparseMatrix<double> eigen_mat(rows, cols);
-        eigen_mat.reserve(Eigen::VectorXi::Constant(cols, 5));
-        
-        for (int i = 0; i < rows; i++) {
-            for (int j = indptr[i]; j < indptr[i + 1]; j++) {
-                eigen_mat.insert(i, indices[j]) = data[j];
+        Eigen::SparseMatrix<double> M(rows, cols);
+        std::vector<Eigen::Triplet<double>> trip;
+        trip.reserve(data.size());
+        for (int i = 0; i < rows; ++i) {
+            for (int j = indptr[i]; j < indptr[i+1]; ++j) {
+                trip.emplace_back(i, indices[j], data[j]);
             }
         }
-        eigen_mat.makeCompressed();
-        return eigen_mat;
+        M.setFromTriplets(trip.begin(), trip.end());
+        M.makeCompressed();
+        return M;
     }
 #endif
 };
 
-CSRMatrix build_transpose_matrix(
-    const Graph& graph,
-    const std::vector<node_t>& nodes,
-    const std::string& weight_key
-) {
-    int n = nodes.size();
-    std::unordered_map<node_t, int> node_to_idx;
-    for (size_t i = 0; i < nodes.size(); i++) {
-        node_to_idx[nodes[i]] = i;
-    }
-    
-    size_t nnz = 0;
-    for (node_t node_id : nodes) {
-        const auto& adj_it = graph.adj.find(node_id);
-        if (adj_it != graph.adj.end()) {
-            nnz += adj_it->second.size();
+// 前向声明
+std::vector<double> power_iteration_chebyshev(
+    const CSRMatrix& A,
+    int max_iter,
+    double tol,
+    std::vector<double>& x
+);
+double vector_norm(const std::vector<double>& x);
+void normalize_vector(std::vector<double>& x, double norm);
+double vector_diff_norm(const std::vector<double>& x1, const std::vector<double>& x2);
+
+
+
+// 构建转置稀疏矩阵
+CSRMatrix build_transpose_matrix(Graph& graph, const std::vector<node_t>& nodes, const std::string& weight_key) {
+    try {
+        std::shared_ptr<CSRGraph> csr_ptr;
+        if (weight_key.empty()) {
+            csr_ptr = graph.gen_CSR();
+        } else {
+            csr_ptr = graph.gen_CSR(weight_key);
         }
-    }
-    
-    std::vector<std::tuple<int, int, double>> edges;
-    edges.reserve(nnz);
-    
-    for (node_t src_id : nodes) {
-        int src_idx = node_to_idx[src_id];
-        const auto& adj_it = graph.adj.find(src_id);
-        if (adj_it != graph.adj.end()) {
-            for (const auto& adj_pair : adj_it->second) {
-                node_t dst_id = adj_pair.first;
-                auto dst_it = node_to_idx.find(dst_id);
-                if (dst_it != node_to_idx.end()) {
-                    int dst_idx = dst_it->second;
-                    
-                    double w = 1.0;
-                    if (!weight_key.empty()) {
-                        auto w_it = adj_pair.second.find(weight_key);
-                        if (w_it != adj_pair.second.end()) {
-                            w = w_it->second;
-                        }
-                    }
-                    
-                    edges.push_back(std::make_tuple(dst_idx, src_idx, w));
+
+        if (!csr_ptr) {
+            std::cerr << "[build_transpose_matrix] Error: gen_CSR returned nullptr" << std::endl;
+            const int n = static_cast<int>(nodes.size());
+            CSRMatrix A(n, n);
+            A.indptr.assign(n + 1, 0);
+            return A;
+        }
+
+        const int n = static_cast<int>(nodes.size());
+        std::vector<int> src_indptr;
+        std::vector<int> src_indices;
+        std::vector<double> src_data;
+        int rows = 0, cols = 0;
+
+        // 只用V/E/unweighted_W/W_map
+        if (!csr_ptr->V.empty() && !csr_ptr->E.empty()) {
+            rows = static_cast<int>(csr_ptr->V.size()) - 1;
+            cols = rows;
+            src_indptr = csr_ptr->V;
+            src_indices = csr_ptr->E;
+            if (weight_key.empty()) {
+                src_data = csr_ptr->unweighted_W.empty() ? 
+                          std::vector<double>(csr_ptr->E.size(), 1.0) : 
+                          csr_ptr->unweighted_W;
+            } else {
+                auto it = csr_ptr->W_map.find(weight_key);
+                if (it != csr_ptr->W_map.end() && it->second) {
+                    src_data = *(it->second);
+                } else {
+                    src_data = std::vector<double>(csr_ptr->E.size(), 1.0);
                 }
             }
+        } else {
+            std::cerr << "[build_transpose_matrix] Warning: CSR data is empty (V size=" 
+                      << csr_ptr->V.size() << ")" << std::endl;
+            CSRMatrix A(n, n);
+            A.indptr.assign(n + 1, 0);
+            return A;
         }
-    }
-    
-    std::sort(edges.begin(), edges.end(), 
-              [](const std::tuple<int, int, double>& a, const std::tuple<int, int, double>& b) {
-                  if (std::get<0>(a) != std::get<0>(b)) {
-                      return std::get<0>(a) < std::get<0>(b);
-                  }
-                  return std::get<1>(a) < std::get<1>(b);
-              });
-    
-    CSRMatrix matrix(n, n);
-    matrix.reserve(edges.size());
-    
-    for (const auto& edge : edges) {
-        int row = std::get<0>(edge);
-        matrix.indptr[row + 1]++;
-    }
-    
-    for (int i = 0; i < n; i++) {
-        matrix.indptr[i + 1] += matrix.indptr[i];
-    }
-    
-    for (const auto& edge : edges) {
-        int row = std::get<0>(edge);
-        int col = std::get<1>(edge);
-        double val = std::get<2>(edge);
-        
-        matrix.indices.push_back(col);
-        matrix.data.push_back(val);
-    }
-    
-    std::vector<bool> has_connections(n, false);
-    
-    for (size_t i = 0; i < n; i++) {
-        if (matrix.indptr[i + 1] > matrix.indptr[i]) {
-            has_connections[i] = true;
+
+        if (src_indptr.size() < 2 || src_indices.empty()) {
+            std::cerr << "[build_transpose_matrix] Warning: Invalid CSR structure" << std::endl;
+            CSRMatrix A(n, n);
+            A.indptr.assign(n + 1, 0);
+            return A;
         }
-    }
-    
-    for (int i = 0; i < n; i++) {
-        if (!has_connections[i]) {
-            matrix.indices.push_back(i);
-            matrix.data.push_back(1.0e-4);
-            
-            for (int j = i + 1; j <= n; j++) {
-                matrix.indptr[j]++;
+
+        // 转置
+        CSRMatrix At(cols, rows);
+        At.indptr.assign(cols + 1, 0);
+        size_t nnz = src_indices.size();
+
+        for (size_t idx = 0; idx < nnz; ++idx) {
+            int col = src_indices[idx];
+            if (col >= 0 && col < cols) {
+                At.indptr[col + 1]++;
+            }
+        }
+        for (int i = 0; i < cols; ++i) {
+            At.indptr[i + 1] += At.indptr[i];
+        }
+
+        At.indices.assign(nnz, 0);
+        At.data.assign(nnz, 0.0);
+
+        std::vector<int> cur_pos(At.indptr.begin(), At.indptr.end());
+        
+        for (int r = 0; r < rows; ++r) {
+            int start = src_indptr[r];
+            int end = (r + 1 < static_cast<int>(src_indptr.size())) ? src_indptr[r + 1] : static_cast<int>(nnz);
+            for (int p = start; p < end; ++p) {
+                if (p >= static_cast<int>(src_indices.size())) break;
+                int c = src_indices[p];
+                if (c < 0 || c >= cols) continue;
+                int dest = cur_pos[c]++;
+                At.indices[dest] = r;
+                At.data[dest] = (p < static_cast<int>(src_data.size())) ? src_data[p] : 1.0;
             }
         }
+
+        return At;
+    } catch (const std::exception& e) {
+        std::cerr << "[build_transpose_matrix] Exception: " << e.what() << std::endl;
+        const int n = static_cast<int>(nodes.size());
+        CSRMatrix A(n, n);
+        A.indptr.assign(n + 1, 0);
+        return A;
     }
-    return matrix;
 }
 
-std::vector<double> power_iteration(
+// ARPACK: 有向图
+std::vector<double> compute_eigenvector_arpack_directed(
     const CSRMatrix& A,
     int max_iter,
-    double tol,
-    std::vector<double>& x
+    double tol
 ) {
-    int n = A.rows;
-    std::vector<double> x_next(n, 0.0);
-    
-    for (int iter = 0; iter < max_iter; iter++) {
-        A.multiply_inplace(x, x_next);
-        
-        double norm = 0.0;
-        #pragma omp parallel for reduction(+:norm) if(n > 10000)
-        for (int i = 0; i < n; i++) {
-            norm += x_next[i] * x_next[i];
-        }
-        norm = std::sqrt(norm);
-        
-        if (norm < 1e-12) {
-            return x;
-        }
-        
-        #pragma omp parallel for if(n > 10000)
+    const int n_const = A.rows;
+    std::vector<double> result(n_const, 0.0);
+    int n = n_const;
+    int nev = 1;
+    int ncv = (n > 30) ? 30 : n;
+    if (ncv < nev + 2) {
+        ncv = std::min(nev + 2, n);
+    }
+    int ido = 0;
+    char bmat[2] = "I";
+    char which[3] = "LR";
+    std::vector<double> resid(n);
+    std::vector<double> v(static_cast<size_t>(n) * ncv);
+    std::vector<double> workd(3 * n);
+    int lworkl = 3 * ncv * ncv + 6 * ncv;
+    std::vector<double> workl(lworkl);
+
+    #pragma omp parallel for schedule(static) if(n > 1000)
+    for (int i = 0; i < n; i++) {
+        int degree = A.indptr[i + 1] - A.indptr[i];
+        resid[i] = degree > 0 ? 
+                   static_cast<double>(degree) + 1e-4 * (rand() / static_cast<double>(RAND_MAX)) :
+                   1.0;
+    }
+    double norm = 0.0;
+    for (int i = 0; i < n; i++) {
+        norm += resid[i] * resid[i];
+    }
+    norm = std::sqrt(norm);
+    if (norm > 1e-12) {
         for (int i = 0; i < n; i++) {
-            x_next[i] /= norm;
+            resid[i] /= norm;
         }
-        
-        double diff = 0.0;
-        #pragma omp parallel for reduction(+:diff) if(n > 10000)
-        for (int i = 0; i < n; i++) {
-            diff += std::fabs(x_next[i] - x[i]);
+    }
+    int iparam[11] = {0};
+    iparam[0] = 1;
+    iparam[2] = max_iter;
+    iparam[3] = 1;
+    iparam[6] = 1;
+    int ipntr[14];
+    int ldv = n;
+    int info = 0;
+    int iter_count = 0;
+    auto start_time = std::chrono::high_resolution_clock::now();
+    try {
+        while (true) {
+            dnaupd_(&ido, bmat, &n, which, &nev, &tol, resid.data(), &ncv,
+                    v.data(), &ldv, iparam, ipntr, workd.data(), workl.data(),
+                    &lworkl, &info);
+            if (ido == -1 || ido == 1) {
+                int x_idx = ipntr[0] - 1;
+                int y_idx = ipntr[1] - 1;
+                std::vector<double> x_vec(n), y_vec(n);
+                std::copy(workd.begin() + x_idx, workd.begin() + x_idx + n, x_vec.begin());
+                A.multiply_inplace(x_vec, y_vec);
+                std::copy(y_vec.begin(), y_vec.end(), workd.begin() + y_idx);
+                iter_count++;
+            } else if (ido == 99) {
+                break;
+            } else {
+                throw std::runtime_error("ARPACK unexpected ido: " + std::to_string(ido));
+            }
+            if (iter_count > max_iter * 10) {
+                throw std::runtime_error("ARPACK exceeded iteration limit");
+            }
         }
-        
-        if (diff < n * tol) {
-            std::swap(x, x_next);
-            break;
+    } catch (const std::exception& e) {
+        std::cerr << "[ARPACK-Directed] Error: " << e.what() << std::endl;
+        throw;
+    }
+    if (info < 0) {
+        std::string error_msg = "ARPACK dnaupd_ error: " + std::to_string(info);
+        switch(info) {
+            case -5: error_msg += " (LWORKL too small, need at least " + std::to_string(3*ncv*ncv + 6*ncv) + ")"; break;
         }
-        
-        std::swap(x, x_next);
+        throw std::runtime_error(error_msg);
     }
-    
-    return x;
+    int rvec = 1;
+    char howmny[2] = "A";
+    std::vector<int> select(ncv);
+    std::vector<double> dr(nev + 1);
+    std::vector<double> di(nev + 1);
+    std::vector<double> z(n * (nev + 1));
+    std::vector<double> workev(3 * ncv);
+    int ldz = n;
+    double sigmar = 0.0, sigmai = 0.0;
+    dneupd_(&rvec, howmny, select.data(), dr.data(), di.data(), z.data(), &ldz,
+            &sigmar, &sigmai, workev.data(), bmat, &n, which, &nev, &tol,
+            resid.data(), &ncv, v.data(), &ldv, iparam, ipntr, workd.data(),
+            workl.data(), &lworkl, &info);
+    if (info != 0) {
+        throw std::runtime_error("ARPACK dneupd_ error: " + std::to_string(info));
+    }
+    std::copy(z.begin(), z.begin() + n, result.begin());
+    return result;
 }
 
-#ifdef HAVE_EIGEN
-std::vector<double> compute_eigenvector_eigen(
-    const CSRMatrix& A, 
+// ARPACK: 无向图
+std::vector<double> compute_eigenvector_arpack(
+    const CSRMatrix& A,
     int max_iter,
     double tol
 ) {
-    Eigen::SparseMatrix<double> eigen_matrix = A.to_eigen();
-    
-    int n = A.rows;
-    std::vector<double> result(n);
-    
+    const int n_const = A.rows;
+    std::vector<double> result(n_const, 0.0);
+    int n = n_const;
+    int nev = 1;
+    int ncv = (n > 30) ? 30 : n;
+    if (ncv < nev + 2) {
+        ncv = std::min(nev + 2, n);
+    }
+    int ido = 0;
+    char bmat[2] = "I";
+    char which[3] = "LR";
+    std::vector<double> resid(n);
+    std::vector<double> v(static_cast<size_t>(n) * ncv);
+    std::vector<double> workd(3 * n);
+    int lworkl = ncv * (ncv + 8);
+    std::vector<double> workl(lworkl);
+
+    #pragma omp parallel for schedule(static) if(n > 1000)
+    for (int i = 0; i < n; i++) {
+        int degree = A.indptr[i + 1] - A.indptr[i];
+        resid[i] = degree > 0 ? 
+                   static_cast<double>(degree) + 1e-4 * (rand() / static_cast<double>(RAND_MAX)) :
+                   1.0;
+    }
+    double norm = 0.0;
+    for (int i = 0; i < n; i++) {
+        norm += resid[i] * resid[i];
+    }
+    norm = std::sqrt(norm);
+    if (norm > 1e-12) {
+        for (int i = 0; i < n; i++) {
+            resid[i] /= norm;
+        }
+    }
+    int iparam[11] = {0};
+    iparam[0] = 1;
+    iparam[2] = max_iter;
+    iparam[3] = 1;
+    iparam[6] = 1;
+    int ipntr[11];
+    int ldv = n;
+    int info = 0;
+    int iter_count = 0;
+    auto start_time = std::chrono::high_resolution_clock::now();
     try {
-        Eigen::EigenSolver<Eigen::MatrixXd> solver;
-        
-        if (n < 1000) {
-            Eigen::MatrixXd dense_matrix(eigen_matrix);
-            solver.compute(dense_matrix);
-            
-            int max_idx = 0;
-            double max_val = solver.eigenvalues()[0].real();
-            for (int i = 1; i < n; i++) {
-                if (solver.eigenvalues()[i].real() > max_val) {
-                    max_val = solver.eigenvalues()[i].real();
-                    max_idx = i;
-                }
+        while (true) {
+            dsaupd_(&ido, bmat, &n, which, &nev, &tol, resid.data(), &ncv,
+                    v.data(), &ldv, iparam, ipntr, workd.data(), workl.data(),
+                    &lworkl, &info);
+            if (ido == -1 || ido == 1) {
+                int x_idx = ipntr[0] - 1;
+                int y_idx = ipntr[1] - 1;
+                std::vector<double> x_vec(n), y_vec(n);
+                std::copy(workd.begin() + x_idx, workd.begin() + x_idx + n, x_vec.begin());
+                A.multiply_inplace(x_vec, y_vec);
+                std::copy(y_vec.begin(), y_vec.end(), workd.begin() + y_idx);
+                iter_count++;
+            } else if (ido == 99) {
+                break;
+            } else {
+                throw std::runtime_error("ARPACK unexpected ido: " + std::to_string(ido));
             }
-            
-            Eigen::VectorXd eigen_vec = solver.eigenvectors().col(max_idx).real();
-            for (int i = 0; i < n; i++) {
-                result[i] = eigen_vec(i);
+            if (iter_count > max_iter * 10) {
+                throw std::runtime_error("ARPACK exceeded iteration limit");
             }
-        } else {
-            throw std::runtime_error("Matrix too large for dense solver");
         }
+    } catch (const std::exception& e) {
+        std::cerr << "[ARPACK] Error: " << e.what() << std::endl;
+        throw;
     }
-    catch (const std::exception& e) {
-        std::vector<double> x(n, 1.0/n);
-        return power_iteration(A, max_iter, tol, x);
+    if (info < 0) {
+        std::string error_msg = "ARPACK dsaupd_ error: " + std::to_string(info);
+        switch(info) {
+            case -1: error_msg += " (N must be positive)"; break;
+            case -2: error_msg += " (NEV must be positive)"; break;
+            case -3: error_msg += " (NCV-NEV >= 2 and NCV <= N not satisfied)"; break;
+            case -5: error_msg += " (LWORKL too small, need at least " + std::to_string(ncv * (ncv + 8)) + ")"; break;
+            case -8: error_msg += " (Error in LAPACK)"; break;
+            case -9: error_msg += " (Starting vector is zero)"; break;
+            case -14: error_msg += " (Did not find any eigenvalues)"; break;
+        }
+        throw std::runtime_error(error_msg);
     }
-    
-    double sum = 0.0;
-    for (double val : result) {
-        sum += val;
+    int rvec = 1;
+    char howmny[2] = "A";
+    std::vector<int> select(ncv);
+    std::vector<double> d(nev);
+    std::vector<double> z(n * nev);
+    int ldz = n;
+    double sigma = 0.0;
+    dseupd_(&rvec, howmny, select.data(), d.data(), z.data(), &ldz, &sigma,
+            bmat, &n, which, &nev, &tol, resid.data(), &ncv, v.data(), &ldv,
+            iparam, ipntr, workd.data(), workl.data(), &lworkl, &info);
+    if (info != 0) {
+        throw std::runtime_error("ARPACK dseupd_ error: " + std::to_string(info));
     }
-    
-    if (sum < 0.0) {
-        for (double& val : result) {
-            val = -val;
+    std::copy(z.begin(), z.begin() + n, result.begin());
+    return result;
+}
+
+#if defined(HAVE_EIGEN) && defined(HAVE_EIGEN_SPARSE_SOLVER)
+std::vector<double> compute_eigenvector_eigen_sparse(
+    const CSRMatrix& A,
+    int max_iter,
+    double tol
+) {
+    const int n = A.rows;
+    Eigen::SparseMatrix<double> eigen_mat = A.to_eigen();
+    Eigen::VectorXd x(n);
+    #pragma omp parallel for schedule(static) if(n > 1000)
+    for (int i = 0; i < n; i++) {
+        int degree = A.indptr[i + 1] - A.indptr[i];
+        double second_order = 0.0;
+        for (int j = A.indptr[i]; j < A.indptr[i + 1]; j++) {
+            int nei = A.indices[j];
+            int nei_degree = A.indptr[nei + 1] - A.indptr[nei];
+            second_order += nei_degree;
         }
+        x(i) = degree > 0 ? 
+               static_cast<double>(degree) + 0.1 * second_order / (degree + 1) :
+               1.0;
     }
-    
-    double norm = 0.0;
-    for (double val : result) {
-        norm += val * val;
+    x.normalize();
+    double prev_eigenvalue = 0.0;
+    const double rel_tol = tol * std::max(1.0, A.estimate_norm());
+    for (int iter = 0; iter < max_iter; iter++) {
+        Eigen::VectorXd x_new = eigen_mat * x;
+        double norm = x_new.norm();
+        if (norm < 1e-12) break;
+        x_new /= norm;
+        double eigenvalue = x_new.dot(eigen_mat * x_new) / x_new.squaredNorm();
+        double diff = (x_new - x).norm();
+        bool converged_vector = (diff < rel_tol);
+        bool converged_eigenvalue = (iter > 0) && 
+                                   (std::abs(eigenvalue - prev_eigenvalue) < tol * std::abs(eigenvalue));
+        if (converged_vector || converged_eigenvalue) {
+            x = x_new;
+            break;
+        }
+        x = x_new;
+        prev_eigenvalue = eigenvalue;
     }
-    norm = std::sqrt(norm);
-    
-    for (double& val : result) {
-        val /= norm;
+    std::vector<double> result(n);
+    #pragma omp parallel for schedule(static) if(n > 1000)
+    for (int i = 0; i < n; i++) {
+        result[i] = x(i);
     }
-    
     return result;
 }
 #endif
 
+// 主入口
 py::object cpp_eigenvector_centrality(
     py::object G,
     py::object py_max_iter,
@@ -302,45 +498,68 @@ py::object cpp_eigenvector_centrality(
         }
 
         std::vector<node_t> nodes;
+        nodes.reserve(graph.node.size());
         for (auto& node_pair : graph.node) {
             nodes.push_back(node_pair.first);
         }
-        int n = nodes.size();
+        const int n = nodes.size();
         
         CSRMatrix A_transpose = build_transpose_matrix(graph, nodes, weight_key);
         
-        std::vector<bool> isolated_nodes(n, true);
+        std::vector<bool> isolated_nodes(n, false);
+        #pragma omp parallel for schedule(static) if(n > 1000)
         for (int i = 0; i < n; i++) {
-            // A node is isolated if it has zero edges (no entries in its row)
-            if (A_transpose.indptr[i + 1] == A_transpose.indptr[i]) {
-                isolated_nodes[i] = true;
-            } else {
-                isolated_nodes[i] = false;
-            }
+            isolated_nodes[i] = (A_transpose.indptr[i + 1] == A_transpose.indptr[i]);
         }
 
         std::vector<double> centrality;
         
         if (py_nstart.is_none()) {
             bool fast_solver_success = false;
-            
-#ifdef HAVE_EIGEN
+#ifdef HAVE_ARPACK
             try {
-                centrality = compute_eigenvector_eigen(A_transpose, max_iter, tol);
+                if (Graph_is_directed(G)) {
+                    centrality = compute_eigenvector_arpack_directed(A_transpose, max_iter, tol);
+                } else {
+                    centrality = compute_eigenvector_arpack(A_transpose, max_iter, tol);
+                }
                 fast_solver_success = true;
-            } catch (const std::exception&) {
+            } catch (const std::exception& e) {
                 fast_solver_success = false;
             }
 #endif
-            
+#if defined(HAVE_EIGEN) && defined(HAVE_EIGEN_SPARSE_SOLVER)
+            if (!fast_solver_success && n >= 100 && n < 500000) {
+                try {
+                    centrality = compute_eigenvector_eigen_sparse(A_transpose, max_iter, tol);
+                    fast_solver_success = true;
+                } catch (const std::exception&) {
+                    fast_solver_success = false;
+                }
+            }
+#endif
             if (!fast_solver_success) {
-                std::vector<double> x(n, 1.0/n);
-                centrality = power_iteration(A_transpose, max_iter, tol, x);
+                std::vector<double> x(n, 0.0);
+                #pragma omp parallel for schedule(static) if(n > 1000)
+                for (int i = 0; i < n; i++) {
+                    if (!isolated_nodes[i]) {
+                        const int degree = A_transpose.indptr[i + 1] - A_transpose.indptr[i];
+                        double second_order = 0.0;
+                        for (int j = A_transpose.indptr[i]; j < A_transpose.indptr[i + 1]; j++) {
+                            int nei = A_transpose.indices[j];
+                            int nei_degree = A_transpose.indptr[nei + 1] - A_transpose.indptr[nei];
+                            second_order += nei_degree;
+                        }
+                        x[i] = static_cast<double>(degree) + 
+                               0.1 * second_order / (degree + 1) +
+                               1e-4 * (rand() / static_cast<double>(RAND_MAX));
+                    }
+                }
+                centrality = power_iteration_chebyshev(A_transpose, max_iter, tol, x);
             }
         } else {
             py::dict nstart = py_nstart.cast<py::dict>();
             std::vector<double> x(n, 0.0);
-            
             for (size_t i = 0; i < nodes.size(); i++) {
                 py::object node_obj = graph.id_to_node[py::cast(nodes[i])];
                 if (nstart.contains(node_obj)) {
@@ -349,7 +568,6 @@ py::object cpp_eigenvector_centrality(
                     x[i] = 1.0;
                 }
             }
-            
             bool all_zeros = true;
             for (double val : x) {
                 if (std::abs(val) > 1e-10) {
@@ -357,60 +575,126 @@ py::object cpp_eigenvector_centrality(
                     break;
                 }
             }
-            
             if (all_zeros) {
                 throw std::runtime_error("initial vector cannot have all zero values");
             }
-            
             double sum_abs = 0.0;
-            for (double val : x) {
-                sum_abs += std::fabs(val);
+            #pragma omp parallel for reduction(+:sum_abs) schedule(static) if(n > 1000)
+            for (int i = 0; i < n; i++) {
+                sum_abs += std::fabs(x[i]);
             }
-            
-            for (double& val : x) {
-                val /= sum_abs;
+            const double inv_sum = 1.0 / sum_abs;
+            #pragma omp parallel for schedule(static) if(n > 1000)
+            for (int i = 0; i < n; i++) {
+                x[i] *= inv_sum;
             }
-            
-            centrality = power_iteration(A_transpose, max_iter, tol, x);
+            centrality = power_iteration_chebyshev(A_transpose, max_iter, tol, x);
         }
-        
         double sum = 0.0;
-        for (double val : centrality) {
-            sum += val;
+        #pragma omp parallel for reduction(+:sum) schedule(static) if(n > 1000)
+        for (int i = 0; i < n; i++) {
+            sum += centrality[i];
         }
-        
         if (sum < 0.0) {
-            for (double& val : centrality) {
-                val = -val;
+            #pragma omp parallel for schedule(static) if(n > 1000)
+            for (int i = 0; i < n; i++) {
+                centrality[i] = -centrality[i];
             }
         }
-        
+        #pragma omp parallel for schedule(static) if(n > 1000)
         for (int i = 0; i < n; i++) {
             if (isolated_nodes[i]) {
                 centrality[i] = 0.0;
             }
         }
-
-        double norm = 0.0;
-        for (double val : centrality) {
-            norm += val * val;
+        double norm = vector_norm(centrality);
+        if (norm > 1e-12) {
+            normalize_vector(centrality, norm);
         }
-        norm = std::sqrt(norm);
-        
-        if (norm > 0.0) {
-            for (double& val : centrality) {
-                val /= norm;
-            }
-        }
-        
         py::dict result;
         for (size_t i = 0; i < nodes.size(); i++) {
             py::object node_obj = graph.id_to_node[py::cast(nodes[i])];
             result[node_obj] = centrality[i];
         }
-        
         return result;
     } catch (const std::exception& e) {
         throw std::runtime_error(e.what());
     }
+}
+
+// 幂迭代
+std::vector<double> power_iteration_chebyshev(
+    const CSRMatrix& A,
+    int max_iter,
+    double tol,
+    std::vector<double>& x
+) {
+    const int n = A.rows;
+    std::vector<double> x_old(n, 0.0);
+    std::vector<double> x_new(n, 0.0);
+    double norm0 = 0.0;
+    for (int i = 0; i < n; ++i) norm0 += x[i] * x[i];
+    norm0 = std::sqrt(norm0);
+    if (norm0 < 1e-12) {
+        for (int i = 0; i < n; ++i) x_old[i] = 1.0 / std::sqrt(static_cast<double>(n));
+    } else {
+        for (int i = 0; i < n; ++i) x_old[i] = x[i] / norm0;
+    }
+    double lambda_old = 0.0, lambda = 0.0;
+    for (int iter = 0; iter < max_iter; ++iter) {
+        A.multiply_inplace(x_old, x_new);
+        double norm = 0.0;
+        for (int i = 0; i < n; ++i) norm += x_new[i] * x_new[i];
+        norm = std::sqrt(norm);
+        if (norm < 1e-12) break;
+        double inv_norm = 1.0 / norm;
+        for (int i = 0; i < n; ++i) x_new[i] *= inv_norm;
+        std::vector<double> Ax(n, 0.0);
+        A.multiply_inplace(x_new, Ax);
+        double num = 0.0, den = 0.0;
+        for (int i = 0; i < n; ++i) {
+            num += x_new[i] * Ax[i];
+            den += x_new[i] * x_new[i];
+        }
+        lambda = (den > 0.0) ? (num / den) : 0.0;
+        double diff = 0.0;
+        for (int i = 0; i < n; ++i) {
+            double d = x_new[i] - x_old[i];
+            diff += d * d;
+        }
+        diff = std::sqrt(diff);
+        if ((iter > 0 && std::abs(lambda - lambda_old) < tol * std::abs(lambda)) || (diff < tol)) {
+            for (int i = 0; i < n; ++i) x[i] = x_new[i];
+            return x;
+        }
+        lambda_old = lambda;
+        x_old.swap(x_new);
+    }
+    for (int i = 0; i < n; ++i) x[i] = x_old[i];
+    return x;
+}
+
+double vector_norm(const std::vector<double>& x) {
+    double s = 0.0;
+    #pragma omp parallel for reduction(+:s) if(x.size() > 1000)
+    for (size_t i = 0; i < x.size(); ++i) s += x[i] * x[i];
+    return std::sqrt(s);
+}
+
+void normalize_vector(std::vector<double>& x, double norm) {
+    const double inv_norm = 1.0 / norm;
+    #pragma omp parallel for schedule(static) if(x.size() > 1000)
+    for (size_t i = 0; i < x.size(); ++i) {
+        x[i] *= inv_norm;
+    }
+}
+
+double vector_diff_norm(const std::vector<double>& x1, const std::vector<double>& x2) {
+    double diff = 0.0;
+    #pragma omp parallel for reduction(+:diff) if(x1.size() > 1000)
+    for (size_t i = 0; i < x1.size(); ++i) {
+        double d = x1[i] - x2[i];
+        diff += d * d;
+    }
+    return std::sqrt(diff);
 }
\ No newline at end of file

From 1b6a0d90a1f5c6cb955adf95f5b67a137320fc88 Mon Sep 17 00:00:00 2001
From: Nianle <24302010062@m.fudan.edu.cn>
Date: Fri, 5 Dec 2025 06:48:12 -0700
Subject: [PATCH 3/6] Optimize Closeness Centrality

---
 .../functions/centrality/closeness.cpp        | 294 ++++++++++++++++--
 1 file changed, 262 insertions(+), 32 deletions(-)

diff --git a/cpp_easygraph/functions/centrality/closeness.cpp b/cpp_easygraph/functions/centrality/closeness.cpp
index 9a2dc967..89b6d2ab 100644
--- a/cpp_easygraph/functions/centrality/closeness.cpp
+++ b/cpp_easygraph/functions/centrality/closeness.cpp
@@ -7,44 +7,186 @@
 #include "../../classes/graph.h"
 #include "../../common/utils.h"
 #include "../../classes/linkgraph.h"
-#include "../../classes/segment_tree.cpp"
 
-double closeness_dijkstra(const Graph_L& G_l, const int &S, int cutoff, Segment_tree_zkw& segment_tree_zkw){
-    const int dis_inf = 0x3f3f3f3f;
+#include <queue>
+#include <vector>
+#include <limits>
+#include <cmath>
+
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+// 堆节点：使用负值 + 最大堆实现最小堆
+typedef std::pair<float, int> HeapNode;
+
+// 优化的邻接表缓存结构 - 按需构建，类似igraph的lazy_inclist
+struct FastAdjCache {
+    std::vector<int*> neighbor_ptrs;
+    std::vector<int> neighbor_counts;
+    std::vector<float*> weight_ptrs;
+    std::vector<std::vector<int>> neighbor_storage;
+    std::vector<std::vector<float>> weight_storage;
+    
+    void init(int N) {
+        neighbor_ptrs.resize(N + 1, nullptr);
+        neighbor_counts.resize(N + 1, 0);
+        neighbor_storage.resize(N + 1);
+    }
+    
+    void init_with_weights(int N) {
+        init(N);
+        weight_ptrs.resize(N + 1, nullptr);
+        weight_storage.resize(N + 1);
+    }
+    
+    inline void build_if_needed(int u, const std::vector<int>& head, 
+                                const std::vector<LinkEdge>& edges, bool store_weights) {
+        if (neighbor_ptrs[u] != nullptr) return;
+        
+        std::vector<int>& neis = neighbor_storage[u];
+        for (int p = head[u]; p != -1; p = edges[p].next) {
+            neis.push_back(edges[p].to);
+            if (store_weights) {
+                weight_storage[u].push_back(edges[p].w);
+            }
+        }
+        
+        neighbor_counts[u] = neis.size();
+        neighbor_ptrs[u] = neis.data();
+        if (store_weights) {
+            weight_ptrs[u] = weight_storage[u].data();
+        }
+    }
+    
+    inline int* get_neighbors_ptr(int u) const { return neighbor_ptrs[u]; }
+    inline int get_neighbor_count(int u) const { return neighbor_counts[u]; }
+    inline float* get_weights_ptr(int u) const { return weight_ptrs[u]; }
+};
+
+// BFS实现 - 直接使用原始邻接表
+double closeness_bfs_direct(const Graph_L& G_l, const int &S, int cutoff,
+                            std::vector<int>& already_counted,
+                            std::vector<int>& queue_storage,
+                            int timestamp) {
     int N = G_l.n;
-    segment_tree_zkw.init(N);
-    std::vector<float> dis(N+1, INT_MAX);
     const std::vector<LinkEdge>& E = G_l.edges;
     const std::vector<int>& head = G_l.head;
-    int number_connected = 0;
+    
+    int nodes_reached = 0;
     long long sum_dis = 0;
-    dis[S] = 0; 
-    segment_tree_zkw.change(S, 0);
-    while(segment_tree_zkw.t[1] != dis_inf) {
-        int u = segment_tree_zkw.num[1];
-        if(u == 0) break;
-        segment_tree_zkw.change(u, dis_inf);
-        if (cutoff >= 0 && dis[u] > cutoff){
+    
+    queue_storage.clear();
+    
+    int queue_front = 0;
+    already_counted[S] = timestamp;
+    queue_storage.push_back(S);
+    queue_storage.push_back(0);
+    
+    while (queue_front < static_cast<int>(queue_storage.size())) {
+        int u = queue_storage[queue_front++];
+        int actdist = queue_storage[queue_front++];
+        
+        if (cutoff >= 0 && actdist > cutoff) {
             continue;
-        } 
-        number_connected += 1;
-        sum_dis += dis[u];
-        for(register int p = head[u]; p != -1; p = E[p].next) {
+        }
+        
+        sum_dis += actdist;
+        nodes_reached++;
+        
+        for (int p = head[u]; p != -1; p = E[p].next) {
             int v = E[p].to;
-            if(cutoff >= 0 && (dis[u] + E[p].w) > cutoff){
+            
+            if (already_counted[v] == timestamp) {
                 continue;
             }
-            if (dis[v] > dis[u] + E[p].w) {
-                dis[v] = dis[u] + E[p].w;
-                segment_tree_zkw.change(v, dis[v]);  
-            }
+            
+            already_counted[v] = timestamp;
+            queue_storage.push_back(v);
+            queue_storage.push_back(actdist + 1);
         }
     }
-    if (number_connected == 1)
+    
+    if (nodes_reached == 1)
         return 0.0;
     else
-        return 1.0 * (number_connected - 1) * (number_connected - 1) / ((N - 1) * sum_dis);
+        return 1.0 * (nodes_reached - 1) * (nodes_reached - 1) / ((N - 1) * sum_dis);
+}
+
+// 检查图是否为无权图
+inline bool is_unweighted_graph(const Graph_L& G_l) {
+    const std::vector<LinkEdge>& E = G_l.edges;
+    for (const auto& edge : E) {
+        if (std::abs(edge.w - 1.0f) > 1e-9) {
+            return false;
+        }
+    }
+    return true;
+}
+
+// Dijkstra实现 - 使用按需构建的邻接表缓存
+double closeness_dijkstra_cached(const Graph_L& G_l, const int &S, int cutoff,
+                                 std::vector<float>& dist,
+                                 std::vector<int>& which,
+                                 FastAdjCache& cache,
+                                 int timestamp) {
+    int N = G_l.n;
+    const std::vector<LinkEdge>& E = G_l.edges;
+    const std::vector<int>& head = G_l.head;
+    
+    int nodes_reached = 0;
+    double sum_dis = 0.0;
+    
+    std::priority_queue<HeapNode> heap;
     
+    dist[S] = 1.0f;
+    which[S] = timestamp;
+    heap.push({-1.0f, S});
+    
+    while (!heap.empty()) {
+        HeapNode top = heap.top();
+        heap.pop();
+        float mindist = -top.first;
+        int minnei = top.second;
+        
+        if (mindist > dist[minnei]) {
+            continue;
+        }
+        
+        float actual_dist = mindist - 1.0f;
+        if (cutoff >= 0 && actual_dist > cutoff) {
+            continue;
+        }
+        
+        sum_dis += actual_dist;
+        nodes_reached++;
+        
+        cache.build_if_needed(minnei, head, E, true);
+        
+        int* neis = cache.get_neighbors_ptr(minnei);
+        float* ws = cache.get_weights_ptr(minnei);
+        int nlen = cache.get_neighbor_count(minnei);
+        
+        for (int j = 0; j < nlen; j++) {
+            int to = neis[j];
+            float altdist = mindist + ws[j];
+            float curdist = dist[to];
+            
+            if (which[to] != timestamp) {
+                which[to] = timestamp;
+                dist[to] = altdist;
+                heap.push({-altdist, to});
+            } else if (curdist == 0.0f || altdist < curdist) {
+                dist[to] = altdist;
+                heap.push({-altdist, to});
+            }
+        }
+    }
+    
+    if (nodes_reached == 1)
+        return 0.0;
+    else
+        return 1.0 * (nodes_reached - 1) * (nodes_reached - 1) / ((N - 1) * sum_dis);
 }
 
 static py::object invoke_cpp_closeness_centrality(py::object G, py::object weight, 
@@ -58,27 +200,115 @@ static py::object invoke_cpp_closeness_centrality(py::object G, py::object weigh
     if (!cutoff.is_none()){
         cutoff_ = cutoff.cast<int>();
     }
-    Segment_tree_zkw segment_tree_zkw(N);
+    
+    // 自动选择算法
+    bool use_bfs = (weight.is_none() || is_unweighted_graph(G_l));
+    
     std::vector<double> CC;
+    
     if(!sources.is_none()){
         py::list sources_list = py::list(sources);
         int sources_list_len = py::len(sources_list);
-        for(register int i = 0; i < sources_list_len; i++){
+        CC.resize(sources_list_len);
+        
+        // 收集所有源节点ID
+        std::vector<node_t> source_ids(sources_list_len);
+        for(int i = 0; i < sources_list_len; i++){
             if(G_.node_to_id.attr("get")(sources_list[i],py::none()).is_none()){
                 printf("The node should exist in the graph!");
                 return py::none();
             }
-            node_t source_id = G_.node_to_id.attr("get")(sources_list[i]).cast<node_t>();
-            float res = closeness_dijkstra(G_l, source_id, cutoff_,segment_tree_zkw);
-            CC.push_back(res);
+            source_ids[i] = G_.node_to_id.attr("get")(sources_list[i]).cast<node_t>();
+        }
+        
+        // OpenMP并行计算（参考eigenvector的并行策略）
+        // 只在源节点数量较多时启用并行，避免小任务的并行开销
+        // omp parallel if(sources_list_len > 100)
+        {
+            // 每个线程独立的数据结构（避免竞争）
+            std::vector<int> already_counted(N + 1, 0);
+            std::vector<int> queue_storage;
+            queue_storage.reserve(N * 2);
+            
+            std::vector<float> dist(N + 1, 0.0f);
+            std::vector<int> which(N + 1, 0);
+            
+            FastAdjCache cache;
+            if (!use_bfs) {
+                cache.init_with_weights(N);
+            }
+            
+            // 为每个线程分配唯一的时间戳起始值
+            #ifdef _OPENMP
+            int thread_id = omp_get_thread_num();
+            int num_threads = omp_get_num_threads();
+            int timestamp = thread_id * sources_list_len;
+            #else
+            int timestamp = 0;
+            #endif
+            
+            // 并行循环：每个线程处理不同的源节点
+            // omp for schedule(dynamic, 1)
+            for(int i = 0; i < sources_list_len; i++){
+                timestamp++;
+                double res;
+                if (use_bfs) {
+                    res = closeness_bfs_direct(G_l, source_ids[i], cutoff_, 
+                                              already_counted, queue_storage, timestamp);
+                } else {
+                    res = closeness_dijkstra_cached(G_l, source_ids[i], cutoff_, 
+                                                   dist, which, cache, timestamp);
+                }
+                CC[i] = res;
+            }
         }
     }
     else{
-        for(register int i = 1; i <= N; i++){
-            float res = closeness_dijkstra(G_l, i, cutoff_,segment_tree_zkw);
-            CC.push_back(res);
+        CC.resize(N);
+        
+        // OpenMP并行计算所有节点
+        // 只在节点数量较多时启用并行
+        // omp parallel if(N > 100)
+        {
+            // 每个线程独立的数据结构
+            std::vector<int> already_counted(N + 1, 0);
+            std::vector<int> queue_storage;
+            queue_storage.reserve(N * 2);
+            
+            std::vector<float> dist(N + 1, 0.0f);
+            std::vector<int> which(N + 1, 0);
+            
+            FastAdjCache cache;
+            if (!use_bfs) {
+                cache.init_with_weights(N);
+            }
+            
+            // 为每个线程分配唯一的时间戳起始值
+            #ifdef _OPENMP
+            int thread_id = omp_get_thread_num();
+            int num_threads = omp_get_num_threads();
+            int timestamp = thread_id * N;
+            #else
+            int timestamp = 0;
+            #endif
+            
+            // 并行循环：dynamic调度适合负载不均衡的情况
+            // omp for schedule(dynamic, 10)
+            for(int i = 1; i <= N; i++){
+                timestamp++;
+                double res;
+                if (use_bfs) {
+                    res = closeness_bfs_direct(G_l, i, cutoff_, 
+                                              already_counted, queue_storage, timestamp);
+                } else {
+                    res = closeness_dijkstra_cached(G_l, i, cutoff_, 
+                                                   dist, which, cache, timestamp);
+                }
+                CC[i - 1] = res;
+            }
         }
     }
+    
     py::array::ShapeContainer ret_shape{(int)CC.size()};
     py::array_t<double> ret(ret_shape, CC.data());
 

From 106b32243fa1fad693c8d2baa1d93dd9a816bffd Mon Sep 17 00:00:00 2001
From: Nianle <24302010062@m.fudan.edu.cn>
Date: Fri, 5 Dec 2025 18:08:57 -0700
Subject: [PATCH 4/6] Enable OpenMP

---
 .../functions/centrality/closeness.cpp        | 42 +++++++++----------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/cpp_easygraph/functions/centrality/closeness.cpp b/cpp_easygraph/functions/centrality/closeness.cpp
index 89b6d2ab..d06f3ec9 100644
--- a/cpp_easygraph/functions/centrality/closeness.cpp
+++ b/cpp_easygraph/functions/centrality/closeness.cpp
@@ -17,10 +17,10 @@
 #include <omp.h>
 #endif
 
-// 堆节点：使用负值 + 最大堆实现最小堆
+// Heap node: use negative value + max heap to implement min heap
 typedef std::pair<float, int> HeapNode;
 
-// 优化的邻接表缓存结构 - 按需构建，类似igraph的lazy_inclist
+// Optimized adjacency list cache
 struct FastAdjCache {
     std::vector<int*> neighbor_ptrs;
     std::vector<int> neighbor_counts;
@@ -64,7 +64,7 @@ struct FastAdjCache {
     inline float* get_weights_ptr(int u) const { return weight_ptrs[u]; }
 };
 
-// BFS实现 - 直接使用原始邻接表
+// BFS implementation - directly use raw adjacency list
 double closeness_bfs_direct(const Graph_L& G_l, const int &S, int cutoff,
                             std::vector<int>& already_counted,
                             std::vector<int>& queue_storage,
@@ -113,7 +113,7 @@ double closeness_bfs_direct(const Graph_L& G_l, const int &S, int cutoff,
         return 1.0 * (nodes_reached - 1) * (nodes_reached - 1) / ((N - 1) * sum_dis);
 }
 
-// 检查图是否为无权图
+// Check if the graph is unweighted
 inline bool is_unweighted_graph(const Graph_L& G_l) {
     const std::vector<LinkEdge>& E = G_l.edges;
     for (const auto& edge : E) {
@@ -124,7 +124,7 @@ inline bool is_unweighted_graph(const Graph_L& G_l) {
     return true;
 }
 
-// Dijkstra实现 - 使用按需构建的邻接表缓存
+// Dijkstra implementation - use on-demand adjacency cache
 double closeness_dijkstra_cached(const Graph_L& G_l, const int &S, int cutoff,
                                  std::vector<float>& dist,
                                  std::vector<int>& which,
@@ -201,7 +201,7 @@ static py::object invoke_cpp_closeness_centrality(py::object G, py::object weigh
         cutoff_ = cutoff.cast<int>();
     }
     
-    // 自动选择算法
+    // Auto algorithm selection
     bool use_bfs = (weight.is_none() || is_unweighted_graph(G_l));
     
     std::vector<double> CC;
@@ -211,7 +211,7 @@ static py::object invoke_cpp_closeness_centrality(py::object G, py::object weigh
         int sources_list_len = py::len(sources_list);
         CC.resize(sources_list_len);
         
-        // 收集所有源节点ID
+        // Collect all source node IDs
         std::vector<node_t> source_ids(sources_list_len);
         for(int i = 0; i < sources_list_len; i++){
             if(G_.node_to_id.attr("get")(sources_list[i],py::none()).is_none()){
@@ -221,11 +221,11 @@ static py::object invoke_cpp_closeness_centrality(py::object G, py::object weigh
             source_ids[i] = G_.node_to_id.attr("get")(sources_list[i]).cast<node_t>();
         }
         
-        // OpenMP并行计算（参考eigenvector的并行策略）
-        // 只在源节点数量较多时启用并行，避免小任务的并行开销
-        // omp parallel if(sources_list_len > 100)
+        // OpenMP parallel computation
+        // Only enable parallelism when sources are many to avoid overhead
+        #pragma omp parallel if(sources_list_len > 100)
         {
-            // 每个线程独立的数据结构（避免竞争）
+            // Per-thread data structures (avoid race conditions)
             std::vector<int> already_counted(N + 1, 0);
             std::vector<int> queue_storage;
             queue_storage.reserve(N * 2);
@@ -238,7 +238,7 @@ static py::object invoke_cpp_closeness_centrality(py::object G, py::object weigh
                 cache.init_with_weights(N);
             }
             
-            // 为每个线程分配唯一的时间戳起始值
+            // Assign unique timestamp start for each thread
             #ifdef _OPENMP
             int thread_id = omp_get_thread_num();
             int num_threads = omp_get_num_threads();
@@ -247,8 +247,8 @@ static py::object invoke_cpp_closeness_centrality(py::object G, py::object weigh
             int timestamp = 0;
             #endif
             
-            // 并行循环：每个线程处理不同的源节点
-            // omp for schedule(dynamic, 1)
+            // Parallel loop: each thread handles different source node
+            #pragma omp for schedule(dynamic, 1)
             for(int i = 0; i < sources_list_len; i++){
                 timestamp++;
                 double res;
@@ -266,11 +266,11 @@ static py::object invoke_cpp_closeness_centrality(py::object G, py::object weigh
     else{
         CC.resize(N);
         
-        // OpenMP并行计算所有节点
-        // 只在节点数量较多时启用并行
-        // omp parallel if(N > 100)
+        // OpenMP parallel computation for all nodes
+        // Only enable parallelism when node count is large
+        #pragma omp parallel if(N > 100)
         {
-            // 每个线程独立的数据结构
+            // Per-thread data structures
             std::vector<int> already_counted(N + 1, 0);
             std::vector<int> queue_storage;
             queue_storage.reserve(N * 2);
@@ -283,7 +283,7 @@ static py::object invoke_cpp_closeness_centrality(py::object G, py::object weigh
                 cache.init_with_weights(N);
             }
             
-            // 为每个线程分配唯一的时间戳起始值
+            // Assign unique timestamp start for each thread
             #ifdef _OPENMP
             int thread_id = omp_get_thread_num();
             int num_threads = omp_get_num_threads();
@@ -292,8 +292,8 @@ static py::object invoke_cpp_closeness_centrality(py::object G, py::object weigh
             int timestamp = 0;
             #endif
             
-            // 并行循环：dynamic调度适合负载不均衡的情况
-            // omp for schedule(dynamic, 10)
+            // Parallel loop: dynamic scheduling for load balancing
+            #pragma omp for schedule(dynamic, 10)
             for(int i = 1; i <= N; i++){
                 timestamp++;
                 double res;

From ff6c2809fca716bb4c59317d2d6838afb0618226 Mon Sep 17 00:00:00 2001
From: Nianle <24302010062@m.fudan.edu.cn>
Date: Fri, 5 Dec 2025 18:58:15 -0700
Subject: [PATCH 5/6] Update cpp_easygraph/CMakeLists.txt: optional deps and
 safe target linking

---
 cpp_easygraph/CMakeLists.txt | 82 ++++++++++++++++--------------------
 1 file changed, 37 insertions(+), 45 deletions(-)

diff --git a/cpp_easygraph/CMakeLists.txt b/cpp_easygraph/CMakeLists.txt
index 2e85607f..673c70fe 100644
--- a/cpp_easygraph/CMakeLists.txt
+++ b/cpp_easygraph/CMakeLists.txt
@@ -13,63 +13,55 @@ add_subdirectory(pybind11)
 
 option(EASYGRAPH_ENABLE_GPU "EASYGRAPH_ENABLE_GPU" OFF)
 
-# 启用OpenMP支持
+# Find OpenMP (optional)
 find_package(OpenMP)
 if(OpenMP_CXX_FOUND)
     message(STATUS "Found OpenMP: ${OpenMP_CXX_VERSION}")
-    message(STATUS "OpenMP flags: ${OpenMP_CXX_FLAGS}")
 else()
-    message(WARNING "OpenMP not found. Parallel optimization will be disabled.")
+    message(STATUS "OpenMP not found; continuing without OpenMP")
 endif()
 
-# 查找ARPACK库（兼容无CMake配置的系统）
-find_library(ARPACK_LIB NAMES arpack arpack_double PATHS ENV LD_LIBRARY_PATH)
-if(NOT ARPACK_LIB)
-    message(FATAL_ERROR "ARPACK library not found")
-else()
+# Find ARPACK (optional; do not fail if absent)
+find_library(ARPACK_LIB NAMES arpack arpack_double)
+if(ARPACK_LIB)
     message(STATUS "Found ARPACK library: ${ARPACK_LIB}")
+    add_compile_definitions(HAVE_ARPACK=1)
+else()
+    message(WARNING "ARPACK not found; ARPACK-based solvers will be disabled (fallback used).")
+    add_compile_definitions(HAVE_ARPACK=0)
 endif()
 
 find_package(LAPACK REQUIRED)
 
-if (EASYGRAPH_ENABLE_GPU)
-
-    pybind11_add_module(cpp_easygraph
-        ${SOURCES}
-        $<TARGET_OBJECTS:gpu_easygraph>
-    )
-
-    set_property(TARGET cpp_easygraph PROPERTY CUDA_ARCHITECTURES all)
-
-    target_compile_definitions(cpp_easygraph 
-        PRIVATE EASYGRAPH_ENABLE_GPU
-    )
-
-    target_link_libraries(cpp_easygraph
-        PRIVATE cudart_static
-    )
-
-else()
-
-    pybind11_add_module(cpp_easygraph
-        ${SOURCES}
-    )
-
-endif()
-
+# Find Eigen (optional). Change to REQUIRED if Eigen must be enforced.
 find_package(Eigen3 QUIET)
 if(Eigen3_FOUND)
-    message(STATUS "Found Eigen3: ${EIGEN3_INCLUDE_DIR}")
-    include_directories(${EIGEN3_INCLUDE_DIR})
-    add_definitions(-DEIGEN_MAJOR_VERSION=${EIGEN3_VERSION_MAJOR})
-endif()
-
-# 将OpenMP链接到target
-if(OpenMP_CXX_FOUND)
-    target_link_libraries(cpp_easygraph PRIVATE OpenMP::OpenMP_CXX)
-    target_compile_options(cpp_easygraph PRIVATE ${OpenMP_CXX_FLAGS})
-    message(STATUS "OpenMP enabled for cpp_easygraph")
+    message(STATUS "Found Eigen3: ${Eigen3_INCLUDE_DIRS}")
+    add_compile_definitions(HAVE_EIGEN=1)
+else()
+    message(STATUS "Eigen3 not found; eigen-accelerated code will be disabled")
+    add_compile_definitions(HAVE_EIGEN=0)
 endif()
 
-# 链接ARPACK和LAPACK
-target_link_libraries(cpp_easygraph PRIVATE ${ARPACK_LIB} LAPACK::LAPACK)
\ No newline at end of file
+# Link/include to cpp_easygraph only if the target exists.
+# This avoids errors when the target is created elsewhere or later.
+if(TARGET cpp_easygraph)
+    if(Eigen3_FOUND)
+        target_include_directories(cpp_easygraph PRIVATE ${Eigen3_INCLUDE_DIRS})
+        if(TARGET Eigen3::Eigen)
+            target_link_libraries(cpp_easygraph PRIVATE Eigen3::Eigen)
+        endif()
+    endif()
+
+    if(OpenMP_CXX_FOUND)
+        target_link_libraries(cpp_easygraph PRIVATE OpenMP::OpenMP_CXX)
+    endif()
+
+    if(ARPACK_LIB)
+        target_link_libraries(cpp_easygraph PRIVATE ${ARPACK_LIB} LAPACK::LAPACK)
+    else()
+        target_link_libraries(cpp_easygraph PRIVATE LAPACK::LAPACK)
+    endif()
+else()
+    message(STATUS "cpp_easygraph target not defined yet; optional libs will be linked after target creation.")
+endif()
\ No newline at end of file

From 6537360aad36f6661cd5fe3ab0aae72607078d93 Mon Sep 17 00:00:00 2001
From: NianLe <24302010062@m.fudan.edu.cn>
Date: Sat, 6 Dec 2025 11:53:58 +0800
Subject: [PATCH 6/6] Apply suggestion from @Copilot

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 cpp_easygraph/cpp_easygraph.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp_easygraph/cpp_easygraph.cpp b/cpp_easygraph/cpp_easygraph.cpp
index 487c0b1b..1733d7e0 100644
--- a/cpp_easygraph/cpp_easygraph.cpp
+++ b/cpp_easygraph/cpp_easygraph.cpp
@@ -83,7 +83,7 @@ PYBIND11_MODULE(cpp_easygraph, m) {
     m.def("cpp_constraint", &constraint, py::arg("G"), py::arg("nodes") = py::none(), py::arg("weight") = py::none(), py::arg("n_workers") = py::none());
     m.def("cpp_effective_size", &effective_size, py::arg("G"), py::arg("nodes") = py::none(), py::arg("weight") = py::none(), py::arg("n_workers") = py::none());
     m.def("cpp_efficiency", &efficiency, py::arg("G"), py::arg("nodes") = py::none(), py::arg("weight") = py::none(), py::arg("n_workers") = py::none());
-    m.def("cpp_eigenvector_centrality", &cpp_eigenvector_centrality, py::arg("G"), py::arg("max_iter") = 100, py::arg("tol") = 1.0e-6, py::arg("nstart") = py::none(), py::arg("weight") = "weight");
+    m.def("cpp_eigenvector_centrality", &cpp_eigenvector_centrality, py::arg("G"), py::arg("max_iter") = 100, py::arg("tol") = 1.0e-6, py::arg("nstart") = py::none(), py::arg("weight") = py::none());
     m.def("cpp_hierarchy", &hierarchy, py::arg("G"), py::arg("nodes") = py::none(), py::arg("weight") = py::none(), py::arg("n_workers") = py::none());
     m.def("cpp_pagerank", &_pagerank, py::arg("G"), py::arg("alpha") = 0.85, py::arg("max_iterator") = 500, py::arg("threshold") = 1e-6);
     m.def("cpp_dijkstra_multisource", &_dijkstra_multisource, py::arg("G"), py::arg("sources"), py::arg("weight") = "weight", py::arg("target") = py::none());