From 7d1966c3787ba7d75eb80f73c1a18efb7d451250 Mon Sep 17 00:00:00 2001 From: sama Date: Thu, 23 Oct 2025 03:38:23 -0600 Subject: [PATCH 1/6] feat:support eigenvector centrality algorithm --- cpp_easygraph/CMakeLists.txt | 16 +- cpp_easygraph/cpp_easygraph.cpp | 1 + .../functions/centrality/centrality.h | 7 + .../functions/centrality/eigenvector.cpp | 416 ++++++++++++++++++ easygraph/functions/centrality/__init__.py | 3 +- easygraph/functions/centrality/eigenvector.py | 155 +++++++ .../centrality/tests/test_eigenvector.py | 119 +++++ 7 files changed, 715 insertions(+), 2 deletions(-) create mode 100644 cpp_easygraph/functions/centrality/eigenvector.cpp create mode 100644 easygraph/functions/centrality/eigenvector.py create mode 100644 easygraph/functions/centrality/tests/test_eigenvector.py diff --git a/cpp_easygraph/CMakeLists.txt b/cpp_easygraph/CMakeLists.txt index 2a43c776..f272b808 100644 --- a/cpp_easygraph/CMakeLists.txt +++ b/cpp_easygraph/CMakeLists.txt @@ -41,4 +41,18 @@ endif() set_target_properties(cpp_easygraph PROPERTIES LINK_SEARCH_START_STATIC ON LINK_SEARCH_END_STATIC ON -) \ No newline at end of file +) + +find_package(Eigen3 QUIET) +if(Eigen3_FOUND) + message(STATUS "Found Eigen3: ${EIGEN3_INCLUDE_DIR}") + include_directories(${EIGEN3_INCLUDE_DIR}) + add_definitions(-DEIGEN_MAJOR_VERSION=${EIGEN3_VERSION_MAJOR}) +endif() + +# 启用OpenMP支持 +find_package(OpenMP) +if(OpenMP_CXX_FOUND) + message(STATUS "Found OpenMP: ${OpenMP_CXX_VERSION}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") +endif() \ No newline at end of file diff --git a/cpp_easygraph/cpp_easygraph.cpp b/cpp_easygraph/cpp_easygraph.cpp index f5570d6e..487c0b1b 100644 --- a/cpp_easygraph/cpp_easygraph.cpp +++ b/cpp_easygraph/cpp_easygraph.cpp @@ -83,6 +83,7 @@ PYBIND11_MODULE(cpp_easygraph, m) { m.def("cpp_constraint", &constraint, py::arg("G"), py::arg("nodes") = py::none(), py::arg("weight") = py::none(), py::arg("n_workers") = py::none()); m.def("cpp_effective_size", &effective_size, py::arg("G"), py::arg("nodes") = py::none(), py::arg("weight") = py::none(), py::arg("n_workers") = py::none()); m.def("cpp_efficiency", &efficiency, py::arg("G"), py::arg("nodes") = py::none(), py::arg("weight") = py::none(), py::arg("n_workers") = py::none()); + m.def("cpp_eigenvector_centrality", &cpp_eigenvector_centrality, py::arg("G"), py::arg("max_iter") = 100, py::arg("tol") = 1.0e-6, py::arg("nstart") = py::none(), py::arg("weight") = "weight"); m.def("cpp_hierarchy", &hierarchy, py::arg("G"), py::arg("nodes") = py::none(), py::arg("weight") = py::none(), py::arg("n_workers") = py::none()); m.def("cpp_pagerank", &_pagerank, py::arg("G"), py::arg("alpha") = 0.85, py::arg("max_iterator") = 500, py::arg("threshold") = 1e-6); m.def("cpp_dijkstra_multisource", &_dijkstra_multisource, py::arg("G"), py::arg("sources"), py::arg("weight") = "weight", py::arg("target") = py::none()); diff --git a/cpp_easygraph/functions/centrality/centrality.h b/cpp_easygraph/functions/centrality/centrality.h index 7040618a..121c9eb0 100644 --- a/cpp_easygraph/functions/centrality/centrality.h +++ b/cpp_easygraph/functions/centrality/centrality.h @@ -12,4 +12,11 @@ py::object cpp_katz_centrality( py::object py_max_iter, py::object py_tol, py::object py_normalized +); +py::object cpp_eigenvector_centrality( + py::object G, + py::object py_max_iter, + py::object py_tol, + py::object py_nstart, + py::object py_weight ); \ No newline at end of file diff --git a/cpp_easygraph/functions/centrality/eigenvector.cpp b/cpp_easygraph/functions/centrality/eigenvector.cpp new file mode 100644 index 00000000..aa434621 --- /dev/null +++ b/cpp_easygraph/functions/centrality/eigenvector.cpp @@ -0,0 +1,416 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "centrality.h" +#include "../../classes/graph.h" + +#ifdef EIGEN_MAJOR_VERSION +#include +#include +#if EIGEN_WORLD_VERSION == 3 && EIGEN_MAJOR_VERSION >= 3 +#define HAVE_EIGEN +#endif +#endif + +namespace py = pybind11; + +class CSRMatrix { +public: + std::vector indptr; + std::vector indices; + std::vector data; + int rows, cols; + + CSRMatrix(int r, int c) : rows(r), cols(c) { + indptr.resize(r + 1, 0); + } + + void reserve(size_t nnz) { + indices.reserve(nnz); + data.reserve(nnz); + } + + std::vector multiply(const std::vector& vec) const { + std::vector result(rows, 0.0); + + #pragma omp parallel for if(rows > 1000) + for (int i = 0; i < rows; i++) { + for (int j = indptr[i]; j < indptr[i + 1]; j++) { + result[i] += data[j] * vec[indices[j]]; + } + } + + return result; + } + + void multiply_inplace(const std::vector& vec, std::vector& result) const { + std::fill(result.begin(), result.end(), 0.0); + + #pragma omp parallel for if(rows > 1000) + for (int i = 0; i < rows; i++) { + for (int j = indptr[i]; j < indptr[i + 1]; j++) { + result[i] += data[j] * vec[indices[j]]; + } + } + } + +#ifdef HAVE_EIGEN + Eigen::SparseMatrix to_eigen() const { + Eigen::SparseMatrix eigen_mat(rows, cols); + eigen_mat.reserve(Eigen::VectorXi::Constant(cols, 5)); + + for (int i = 0; i < rows; i++) { + for (int j = indptr[i]; j < indptr[i + 1]; j++) { + eigen_mat.insert(i, indices[j]) = data[j]; + } + } + eigen_mat.makeCompressed(); + return eigen_mat; + } +#endif +}; + +CSRMatrix build_transpose_matrix( + const Graph& graph, + const std::vector& nodes, + const std::string& weight_key +) { + int n = nodes.size(); + std::unordered_map node_to_idx; + for (size_t i = 0; i < nodes.size(); i++) { + node_to_idx[nodes[i]] = i; + } + + size_t nnz = 0; + for (node_t node_id : nodes) { + const auto& adj_it = graph.adj.find(node_id); + if (adj_it != graph.adj.end()) { + nnz += adj_it->second.size(); + } + } + + std::vector> edges; + edges.reserve(nnz); + + for (node_t src_id : nodes) { + int src_idx = node_to_idx[src_id]; + const auto& adj_it = graph.adj.find(src_id); + if (adj_it != graph.adj.end()) { + for (const auto& adj_pair : adj_it->second) { + node_t dst_id = adj_pair.first; + auto dst_it = node_to_idx.find(dst_id); + if (dst_it != node_to_idx.end()) { + int dst_idx = dst_it->second; + + double w = 1.0; + if (!weight_key.empty()) { + auto w_it = adj_pair.second.find(weight_key); + if (w_it != adj_pair.second.end()) { + w = w_it->second; + } + } + + edges.push_back(std::make_tuple(dst_idx, src_idx, w)); + } + } + } + } + + std::sort(edges.begin(), edges.end(), + [](const std::tuple& a, const std::tuple& b) { + if (std::get<0>(a) != std::get<0>(b)) { + return std::get<0>(a) < std::get<0>(b); + } + return std::get<1>(a) < std::get<1>(b); + }); + + CSRMatrix matrix(n, n); + matrix.reserve(edges.size()); + + for (const auto& edge : edges) { + int row = std::get<0>(edge); + matrix.indptr[row + 1]++; + } + + for (int i = 0; i < n; i++) { + matrix.indptr[i + 1] += matrix.indptr[i]; + } + + for (const auto& edge : edges) { + int row = std::get<0>(edge); + int col = std::get<1>(edge); + double val = std::get<2>(edge); + + matrix.indices.push_back(col); + matrix.data.push_back(val); + } + + std::vector has_connections(n, false); + + for (size_t i = 0; i < n; i++) { + if (matrix.indptr[i + 1] > matrix.indptr[i]) { + has_connections[i] = true; + } + } + + for (int i = 0; i < n; i++) { + if (!has_connections[i]) { + matrix.indices.push_back(i); + matrix.data.push_back(1.0e-4); + + for (int j = i + 1; j <= n; j++) { + matrix.indptr[j]++; + } + } + } + return matrix; +} + +std::vector power_iteration( + const CSRMatrix& A, + int max_iter, + double tol, + std::vector& x +) { + int n = A.rows; + std::vector x_next(n, 0.0); + + for (int iter = 0; iter < max_iter; iter++) { + A.multiply_inplace(x, x_next); + + double norm = 0.0; + #pragma omp parallel for reduction(+:norm) if(n > 10000) + for (int i = 0; i < n; i++) { + norm += x_next[i] * x_next[i]; + } + norm = std::sqrt(norm); + + if (norm < 1e-12) { + return x; + } + + #pragma omp parallel for if(n > 10000) + for (int i = 0; i < n; i++) { + x_next[i] /= norm; + } + + double diff = 0.0; + #pragma omp parallel for reduction(+:diff) if(n > 10000) + for (int i = 0; i < n; i++) { + diff += std::fabs(x_next[i] - x[i]); + } + + if (diff < n * tol) { + std::swap(x, x_next); + break; + } + + std::swap(x, x_next); + } + + return x; +} + +#ifdef HAVE_EIGEN +std::vector compute_eigenvector_eigen( + const CSRMatrix& A, + int max_iter, + double tol +) { + Eigen::SparseMatrix eigen_matrix = A.to_eigen(); + + int n = A.rows; + std::vector result(n); + + try { + Eigen::EigenSolver solver; + + if (n < 1000) { + Eigen::MatrixXd dense_matrix(eigen_matrix); + solver.compute(dense_matrix); + + int max_idx = 0; + double max_val = solver.eigenvalues()[0].real(); + for (int i = 1; i < n; i++) { + if (solver.eigenvalues()[i].real() > max_val) { + max_val = solver.eigenvalues()[i].real(); + max_idx = i; + } + } + + Eigen::VectorXd eigen_vec = solver.eigenvectors().col(max_idx).real(); + for (int i = 0; i < n; i++) { + result[i] = eigen_vec(i); + } + } else { + throw std::runtime_error("Matrix too large for dense solver"); + } + } + catch (const std::exception& e) { + std::vector x(n, 1.0/n); + return power_iteration(A, max_iter, tol, x); + } + + double sum = 0.0; + for (double val : result) { + sum += val; + } + + if (sum < 0.0) { + for (double& val : result) { + val = -val; + } + } + + double norm = 0.0; + for (double val : result) { + norm += val * val; + } + norm = std::sqrt(norm); + + for (double& val : result) { + val /= norm; + } + + return result; +} +#endif + +py::object cpp_eigenvector_centrality( + py::object G, + py::object py_max_iter, + py::object py_tol, + py::object py_nstart, + py::object py_weight +) { + try { + Graph& graph = G.cast(); + int max_iter = py_max_iter.cast(); + double tol = py_tol.cast(); + std::string weight_key = ""; + if (!py_weight.is_none()) { + weight_key = py_weight.cast(); + } + + if (graph.node.size() == 0) { + return py::dict(); + } + + std::vector nodes; + for (auto& node_pair : graph.node) { + nodes.push_back(node_pair.first); + } + int n = nodes.size(); + + CSRMatrix A_transpose = build_transpose_matrix(graph, nodes, weight_key); + + std::vector isolated_nodes(n, true); + for (int i = 0; i < n; i++) { + // A node is isolated if it has zero edges (no entries in its row) + if (A_transpose.indptr[i + 1] == A_transpose.indptr[i]) { + isolated_nodes[i] = true; + } else { + isolated_nodes[i] = false; + } + } + + std::vector centrality; + + if (py_nstart.is_none()) { + bool fast_solver_success = false; + +#ifdef HAVE_EIGEN + try { + centrality = compute_eigenvector_eigen(A_transpose, max_iter, tol); + fast_solver_success = true; + } catch (const std::exception&) { + fast_solver_success = false; + } +#endif + + if (!fast_solver_success) { + std::vector x(n, 1.0/n); + centrality = power_iteration(A_transpose, max_iter, tol, x); + } + } else { + py::dict nstart = py_nstart.cast(); + std::vector x(n, 0.0); + + for (size_t i = 0; i < nodes.size(); i++) { + py::object node_obj = graph.id_to_node[py::cast(nodes[i])]; + if (nstart.contains(node_obj)) { + x[i] = nstart[node_obj].cast(); + } else { + x[i] = 1.0; + } + } + + bool all_zeros = true; + for (double val : x) { + if (std::abs(val) > 1e-10) { + all_zeros = false; + break; + } + } + + if (all_zeros) { + throw std::runtime_error("initial vector cannot have all zero values"); + } + + double sum_abs = 0.0; + for (double val : x) { + sum_abs += std::fabs(val); + } + + for (double& val : x) { + val /= sum_abs; + } + + centrality = power_iteration(A_transpose, max_iter, tol, x); + } + + double sum = 0.0; + for (double val : centrality) { + sum += val; + } + + if (sum < 0.0) { + for (double& val : centrality) { + val = -val; + } + } + + for (int i = 0; i < n; i++) { + if (isolated_nodes[i]) { + centrality[i] = 0.0; + } + } + + double norm = 0.0; + for (double val : centrality) { + norm += val * val; + } + norm = std::sqrt(norm); + + if (norm > 0.0) { + for (double& val : centrality) { + val /= norm; + } + } + + py::dict result; + for (size_t i = 0; i < nodes.size(); i++) { + py::object node_obj = graph.id_to_node[py::cast(nodes[i])]; + result[node_obj] = centrality[i]; + } + + return result; + } catch (const std::exception& e) { + throw std::runtime_error(e.what()); + } +} \ No newline at end of file diff --git a/easygraph/functions/centrality/__init__.py b/easygraph/functions/centrality/__init__.py index f37e55fe..bd2fec50 100644 --- a/easygraph/functions/centrality/__init__.py +++ b/easygraph/functions/centrality/__init__.py @@ -5,4 +5,5 @@ from .flowbetweenness import * from .laplacian import * from .pagerank import * -from .katz_centrality import * \ No newline at end of file +from .katz_centrality import * +from .eigenvector import * \ No newline at end of file diff --git a/easygraph/functions/centrality/eigenvector.py b/easygraph/functions/centrality/eigenvector.py new file mode 100644 index 00000000..2d7506d0 --- /dev/null +++ b/easygraph/functions/centrality/eigenvector.py @@ -0,0 +1,155 @@ +import math +import easygraph as eg +from easygraph.utils import * +from easygraph.utils.decorators import * +from scipy import sparse +from scipy.sparse import linalg +import numpy as np +from collections import defaultdict + +__all__ = ["eigenvector_centrality"] + +@not_implemented_for("multigraph") +@hybrid("cpp_eigenvector_centrality") +def eigenvector_centrality(G, max_iter=100, tol=1.0e-6, nstart=None, weight=None): + """Calculate eigenvector centrality for nodes in the graph + + Eigenvector centrality is based on the idea that a node's importance + depends on the importance of its neighboring nodes. + Specifically, a node's centrality is proportional to the sum of + centrality values of its neighbors. + + Parameters + ---------- + G : graph object + An undirected or directed graph + + max_iter : int, optional (default=100) + Maximum number of iterations for the power method + + tol : float, optional (default=1.0e-6) + Convergence threshold; algorithm terminates when the difference + between centrality values in consecutive iterations is less than this value + + nstart : dictionary, optional (default=None) + Dictionary mapping nodes to initial centrality values + If None, the ARPACK solver is used to directly compute the eigenvector + + weight : string or None, optional (default=None) + Name of the edge attribute to be used as edge weight + If None, all edges are considered to have weight 1 + + Returns + ------- + centrality : dictionary + Dictionary mapping nodes to their eigenvector centrality values + + Raises + ------ + EasyGraphPointlessConcept + When input is an empty graph + + EasyGraphError + When the algorithm fails to converge within the specified maximum iterations + + Notes + ----- + This algorithm uses the power iteration method to find the principal eigenvector. + When nstart is not provided, the ARPACK solver is used for efficiency. + The returned centrality values are normalized. + """ + + if len(G) == 0: + raise eg.EasyGraphPointlessConcept( + "cannot compute centrality for the null graph" + ) + + if len(G) == 1: + raise eg.EasyGraphPointlessConcept( + "cannot compute eigenvector centrality for a single node graph" + ) + + + # Build node list and mapping + nodelist = list(G.nodes) + n = len(nodelist) + node_map = {node: i for i, node in enumerate(nodelist)} + + # Build weighted adjacency matrix + row, col, data = [], [], [] + for u in nodelist: + u_idx = node_map[u] + for v, attrs in G[u].items(): + if v in node_map: + v_idx = node_map[v] + w = attrs.get(weight, 1.0) if weight else 1.0 + # Build transpose matrix for centrality calculation + row.append(v_idx) + col.append(u_idx) + data.append(float(w)) + + # Create CSR format sparse matrix + A = sparse.csr_matrix((data, (row, col)), shape=(n, n)) + + # Detect and handle isolated nodes + row_sums = np.array(A.sum(axis=1)).flatten() + col_sums = np.array(A.sum(axis=0)).flatten() + isolated_nodes = np.where((row_sums == 0) & (col_sums == 0))[0] + + has_isolated = len(isolated_nodes) > 0 + isolated_indices = [] + + # Add small self-loops to isolated nodes for stability + if has_isolated: + # Store isolated node indices + isolated_indices = isolated_nodes.tolist() + + # Add small self-loop weights to isolated nodes + for idx in isolated_indices: + A[idx, idx] = 1.0e-4 # Small enough to not affect results, but maintains numerical stability + if nstart is not None: + # Use custom initial vector for power iteration + v = np.array([nstart.get(n, 1.0) for n in nodelist], dtype=float) + v = v / np.sum(np.abs(v)) + + # Power iteration method to compute principal eigenvector + v_last = np.zeros_like(v) + for _ in range(max_iter): + np.copyto(v_last, v) + v = A @ v_last # Sparse matrix multiplication + + norm = np.linalg.norm(v) + if norm < 1e-10: + v = v_last.copy() + break + v = v / norm # Normalization + + # Check convergence + if np.linalg.norm(v - v_last) < tol: + break + else: + raise eg.EasyGraphError(f"Eigenvector calculation did not converge in {max_iter} iterations") + + centrality = v + else: + # Use ARPACK solver to directly compute the principal eigenvector + eigenvalues, eigenvectors = linalg.eigs(A, k=1, which='LR', + maxiter=max_iter, tol=tol) + centrality = np.real(eigenvectors[:,0]) + + # Ensure positive results and normalize + if centrality.sum() < 0: + centrality = -centrality + + centrality = centrality / np.linalg.norm(centrality) + # Set centrality of isolated nodes to zero + if has_isolated: + for idx in isolated_indices: + centrality[idx] = 0.0 + # Renormalize if needed + if np.sum(centrality) > 0: + centrality = centrality / np.linalg.norm(centrality) + + # Return dictionary of node centrality values + return {nodelist[i]: float(centrality[i]) for i in range(n)} + diff --git a/easygraph/functions/centrality/tests/test_eigenvector.py b/easygraph/functions/centrality/tests/test_eigenvector.py new file mode 100644 index 00000000..1617b857 --- /dev/null +++ b/easygraph/functions/centrality/tests/test_eigenvector.py @@ -0,0 +1,119 @@ +import unittest +import numpy as np +import easygraph as eg +from easygraph.functions.centrality import eigenvector_centrality +from easygraph.utils.exception import EasyGraphPointlessConcept, EasyGraphNotImplemented + + +class Test_eigenvector(unittest.TestCase): + def setUp(self): + self.edges = [ + (1, 4), + (2, 4), + ("String", "Bool"), + (4, 1), + (0, 4), + (4, 256), + ((None, None), (None, None)), + ] + + self.test_graphs = [] + self.test_graphs.append(eg.classes.DiGraph(self.edges)) + + self.simple_graph = eg.Graph() + self.simple_graph.add_edges_from([(0, 1), (1, 2), (2, 3)]) + + self.directed_graph = eg.DiGraph() + self.directed_graph.add_edges_from([(0, 1), (1, 2), (2, 3)]) + + self.weighted_graph = eg.Graph() + self.weighted_graph.add_edges_from([(0, 1), (1, 2), (2, 3)]) + + for u, v, data in self.weighted_graph.edges: + data["weight"] = 2.0 + + self.disconnected_graph = eg.Graph() + self.disconnected_graph.add_edges_from([(0, 1), (2, 3)]) + + self.isolated_node_graph = eg.Graph() + self.isolated_node_graph.add_edges_from([(0, 1), (1, 2)]) + self.isolated_node_graph.add_node(3) + + self.single_node_graph = eg.Graph() + self.single_node_graph.add_node(42) + + self.mixed_nodes_graph = eg.Graph() + self.mixed_nodes_graph.add_edges_from([(1, 2), ("X", "Y"), ((1, 2), (3, 4))]) + + def test_eigenvector(self): + + for G in self.test_graphs: + result = eigenvector_centrality(G) + self.assertIsInstance(result, dict) + self.assertEqual(len(result), len(G)) + + for v in result.values(): + self.assertIsInstance(v, float) + + def test_simple_graph(self): + result = eigenvector_centrality(self.simple_graph) + self.assertEqual(len(result), len(self.simple_graph)) + + for v in result.values(): + self.assertIsInstance(v, float) + + def test_directed_graph(self): + result = eigenvector_centrality(self.directed_graph) + self.assertEqual(len(result), len(self.directed_graph)) + + def test_weighted_graph(self): + result = eigenvector_centrality(self.weighted_graph, weight="weight") + self.assertEqual(len(result), len(self.weighted_graph)) + + def test_disconnected_graph(self): + + result = eigenvector_centrality(self.disconnected_graph) + self.assertEqual(len(result), len(self.disconnected_graph)) + + def test_isolated_nodes(self): + + result = eigenvector_centrality(self.isolated_node_graph) + self.assertEqual(len(result), len(self.isolated_node_graph)) + self.assertTrue(3 in result) + self.assertGreaterEqual(result[3], 0) + + def test_single_node_graph(self): + with self.assertRaises(EasyGraphPointlessConcept): + eigenvector_centrality(self.single_node_graph) + + def test_mixed_node_types(self): + result = eigenvector_centrality(self.mixed_nodes_graph) + self.assertEqual(len(result), len(self.mixed_nodes_graph)) + + def test_max_iter_parameter(self): + result = eigenvector_centrality(self.simple_graph, max_iter=50) + self.assertEqual(len(result), len(self.simple_graph)) + + def test_tol_parameter(self): + result = eigenvector_centrality(self.simple_graph, tol=1.0e-3) + self.assertEqual(len(result), len(self.simple_graph)) + + def test_nstart_parameter(self): + nstart = {node: 1.0 for node in self.simple_graph} + result = eigenvector_centrality(self.simple_graph, nstart=nstart) + self.assertEqual(len(result), len(self.simple_graph)) + + def test_multigraph_raises(self): + G = eg.MultiGraph() + G.add_edges_from([(0, 1), (0, 1)]) + with self.assertRaises(EasyGraphNotImplemented): + eigenvector_centrality(G) + + def test_empty_graph_raises(self): + G = eg.Graph() + with self.assertRaises(EasyGraphPointlessConcept): + eigenvector_centrality(G) + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file From b5850d8b56b0fcd98ccd27e3e7d18f4995b2b199 Mon Sep 17 00:00:00 2001 From: Nianle <24302010062@m.fudan.edu.cn> Date: Fri, 5 Dec 2025 04:40:18 -0700 Subject: [PATCH 2/6] Optimize Eigenvector Centrality --- cpp_easygraph/CMakeLists.txt | 37 +- .../functions/centrality/eigenvector.cpp | 784 ++++++++++++------ 2 files changed, 561 insertions(+), 260 deletions(-) diff --git a/cpp_easygraph/CMakeLists.txt b/cpp_easygraph/CMakeLists.txt index f272b808..2e85607f 100644 --- a/cpp_easygraph/CMakeLists.txt +++ b/cpp_easygraph/CMakeLists.txt @@ -13,6 +13,25 @@ add_subdirectory(pybind11) option(EASYGRAPH_ENABLE_GPU "EASYGRAPH_ENABLE_GPU" OFF) +# 启用OpenMP支持 +find_package(OpenMP) +if(OpenMP_CXX_FOUND) + message(STATUS "Found OpenMP: ${OpenMP_CXX_VERSION}") + message(STATUS "OpenMP flags: ${OpenMP_CXX_FLAGS}") +else() + message(WARNING "OpenMP not found. Parallel optimization will be disabled.") +endif() + +# 查找ARPACK库(兼容无CMake配置的系统) +find_library(ARPACK_LIB NAMES arpack arpack_double PATHS ENV LD_LIBRARY_PATH) +if(NOT ARPACK_LIB) + message(FATAL_ERROR "ARPACK library not found") +else() + message(STATUS "Found ARPACK library: ${ARPACK_LIB}") +endif() + +find_package(LAPACK REQUIRED) + if (EASYGRAPH_ENABLE_GPU) pybind11_add_module(cpp_easygraph @@ -38,11 +57,6 @@ else() endif() -set_target_properties(cpp_easygraph PROPERTIES - LINK_SEARCH_START_STATIC ON - LINK_SEARCH_END_STATIC ON -) - find_package(Eigen3 QUIET) if(Eigen3_FOUND) message(STATUS "Found Eigen3: ${EIGEN3_INCLUDE_DIR}") @@ -50,9 +64,12 @@ if(Eigen3_FOUND) add_definitions(-DEIGEN_MAJOR_VERSION=${EIGEN3_VERSION_MAJOR}) endif() -# 启用OpenMP支持 -find_package(OpenMP) +# 将OpenMP链接到target if(OpenMP_CXX_FOUND) - message(STATUS "Found OpenMP: ${OpenMP_CXX_VERSION}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") -endif() \ No newline at end of file + target_link_libraries(cpp_easygraph PRIVATE OpenMP::OpenMP_CXX) + target_compile_options(cpp_easygraph PRIVATE ${OpenMP_CXX_FLAGS}) + message(STATUS "OpenMP enabled for cpp_easygraph") +endif() + +# 链接ARPACK和LAPACK +target_link_libraries(cpp_easygraph PRIVATE ${ARPACK_LIB} LAPACK::LAPACK) \ No newline at end of file diff --git a/cpp_easygraph/functions/centrality/eigenvector.cpp b/cpp_easygraph/functions/centrality/eigenvector.cpp index aa434621..6d9eb669 100644 --- a/cpp_easygraph/functions/centrality/eigenvector.cpp +++ b/cpp_easygraph/functions/centrality/eigenvector.cpp @@ -1,286 +1,482 @@ -#include #include +#include +#include +#include +#include #include #include #include #include -#include #include #include "centrality.h" #include "../../classes/graph.h" -#ifdef EIGEN_MAJOR_VERSION +#ifdef HAVE_EIGEN #include #include -#if EIGEN_WORLD_VERSION == 3 && EIGEN_MAJOR_VERSION >= 3 -#define HAVE_EIGEN -#endif + #ifdef HAVE_EIGEN_SPARSE_SOLVER + #include + #endif #endif namespace py = pybind11; +extern "C" { + void dsaupd_(int*, char*, int*, char*, int*, double*, double*, int*, + double*, int*, int*, int*, double*, double*, int*, int*); + void dseupd_(int*, char*, int*, double*, double*, int*, double*, + char*, int*, char*, int*, double*, double*, int*, + double*, int*, int*, int*, double*, double*, int*, int*); + void dnaupd_(int*, char*, int*, char*, int*, double*, double*, int*, + double*, int*, int*, int*, double*, double*, int*, int*); + void dneupd_(int* rvec, char* howmny, int* select, + double* dr, double* di, double* z, int* ldz, + double* sigmar, double* sigmai, double* workev, + char* bmat, int* n, char* which, int* nev, + double* tol, double* resid, int* ncv, double* v, int* ldv, + int* iparam, int* ipntr, double* workd, double* workl, + int* lworkl, int* info); +} + +// CSRMatrix 稀疏矩阵结构体 class CSRMatrix { public: std::vector indptr; std::vector indices; std::vector data; - int rows, cols; - + int rows; + int cols; + + CSRMatrix() : rows(0), cols(0) {} CSRMatrix(int r, int c) : rows(r), cols(c) { - indptr.resize(r + 1, 0); + indptr.assign(r + 1, 0); } - + void reserve(size_t nnz) { indices.reserve(nnz); data.reserve(nnz); } - - std::vector multiply(const std::vector& vec) const { - std::vector result(rows, 0.0); - - #pragma omp parallel for if(rows > 1000) - for (int i = 0; i < rows; i++) { - for (int j = indptr[i]; j < indptr[i + 1]; j++) { - result[i] += data[j] * vec[indices[j]]; + + void multiply_inplace(const std::vector& vec, std::vector& result) const { + result.assign(rows, 0.0); + #pragma omp parallel for schedule(static,64) if(rows > 1000) + for (int i = 0; i < rows; ++i) { + double sum = 0.0; + for (int j = indptr[i]; j < indptr[i + 1]; ++j) { + sum += data[j] * vec[indices[j]]; } + result[i] = sum; } - - return result; } - void multiply_inplace(const std::vector& vec, std::vector& result) const { - std::fill(result.begin(), result.end(), 0.0); - - #pragma omp parallel for if(rows > 1000) - for (int i = 0; i < rows; i++) { - for (int j = indptr[i]; j < indptr[i + 1]; j++) { - result[i] += data[j] * vec[indices[j]]; - } + double estimate_norm() const { + double s = 0.0; + #pragma omp parallel for reduction(+:s) if(data.size() > 1000) + for (size_t i = 0; i < data.size(); ++i) s += data[i] * data[i]; + return std::sqrt(s); + } + + double rayleigh_quotient(const std::vector& x, const std::vector& Ax) const { + double num = 0.0, den = 0.0; + #pragma omp parallel for reduction(+:num,den) if(rows > 1000) + for (int i = 0; i < rows; ++i) { + num += Ax[i] * x[i]; + den += x[i] * x[i]; } + return den > 0.0 ? num / den : 0.0; } #ifdef HAVE_EIGEN Eigen::SparseMatrix to_eigen() const { - Eigen::SparseMatrix eigen_mat(rows, cols); - eigen_mat.reserve(Eigen::VectorXi::Constant(cols, 5)); - - for (int i = 0; i < rows; i++) { - for (int j = indptr[i]; j < indptr[i + 1]; j++) { - eigen_mat.insert(i, indices[j]) = data[j]; + Eigen::SparseMatrix M(rows, cols); + std::vector> trip; + trip.reserve(data.size()); + for (int i = 0; i < rows; ++i) { + for (int j = indptr[i]; j < indptr[i+1]; ++j) { + trip.emplace_back(i, indices[j], data[j]); } } - eigen_mat.makeCompressed(); - return eigen_mat; + M.setFromTriplets(trip.begin(), trip.end()); + M.makeCompressed(); + return M; } #endif }; -CSRMatrix build_transpose_matrix( - const Graph& graph, - const std::vector& nodes, - const std::string& weight_key -) { - int n = nodes.size(); - std::unordered_map node_to_idx; - for (size_t i = 0; i < nodes.size(); i++) { - node_to_idx[nodes[i]] = i; - } - - size_t nnz = 0; - for (node_t node_id : nodes) { - const auto& adj_it = graph.adj.find(node_id); - if (adj_it != graph.adj.end()) { - nnz += adj_it->second.size(); +// 前向声明 +std::vector power_iteration_chebyshev( + const CSRMatrix& A, + int max_iter, + double tol, + std::vector& x +); +double vector_norm(const std::vector& x); +void normalize_vector(std::vector& x, double norm); +double vector_diff_norm(const std::vector& x1, const std::vector& x2); + + + +// 构建转置稀疏矩阵 +CSRMatrix build_transpose_matrix(Graph& graph, const std::vector& nodes, const std::string& weight_key) { + try { + std::shared_ptr csr_ptr; + if (weight_key.empty()) { + csr_ptr = graph.gen_CSR(); + } else { + csr_ptr = graph.gen_CSR(weight_key); } - } - - std::vector> edges; - edges.reserve(nnz); - - for (node_t src_id : nodes) { - int src_idx = node_to_idx[src_id]; - const auto& adj_it = graph.adj.find(src_id); - if (adj_it != graph.adj.end()) { - for (const auto& adj_pair : adj_it->second) { - node_t dst_id = adj_pair.first; - auto dst_it = node_to_idx.find(dst_id); - if (dst_it != node_to_idx.end()) { - int dst_idx = dst_it->second; - - double w = 1.0; - if (!weight_key.empty()) { - auto w_it = adj_pair.second.find(weight_key); - if (w_it != adj_pair.second.end()) { - w = w_it->second; - } - } - - edges.push_back(std::make_tuple(dst_idx, src_idx, w)); + + if (!csr_ptr) { + std::cerr << "[build_transpose_matrix] Error: gen_CSR returned nullptr" << std::endl; + const int n = static_cast(nodes.size()); + CSRMatrix A(n, n); + A.indptr.assign(n + 1, 0); + return A; + } + + const int n = static_cast(nodes.size()); + std::vector src_indptr; + std::vector src_indices; + std::vector src_data; + int rows = 0, cols = 0; + + // 只用V/E/unweighted_W/W_map + if (!csr_ptr->V.empty() && !csr_ptr->E.empty()) { + rows = static_cast(csr_ptr->V.size()) - 1; + cols = rows; + src_indptr = csr_ptr->V; + src_indices = csr_ptr->E; + if (weight_key.empty()) { + src_data = csr_ptr->unweighted_W.empty() ? + std::vector(csr_ptr->E.size(), 1.0) : + csr_ptr->unweighted_W; + } else { + auto it = csr_ptr->W_map.find(weight_key); + if (it != csr_ptr->W_map.end() && it->second) { + src_data = *(it->second); + } else { + src_data = std::vector(csr_ptr->E.size(), 1.0); } } + } else { + std::cerr << "[build_transpose_matrix] Warning: CSR data is empty (V size=" + << csr_ptr->V.size() << ")" << std::endl; + CSRMatrix A(n, n); + A.indptr.assign(n + 1, 0); + return A; } - } - - std::sort(edges.begin(), edges.end(), - [](const std::tuple& a, const std::tuple& b) { - if (std::get<0>(a) != std::get<0>(b)) { - return std::get<0>(a) < std::get<0>(b); - } - return std::get<1>(a) < std::get<1>(b); - }); - - CSRMatrix matrix(n, n); - matrix.reserve(edges.size()); - - for (const auto& edge : edges) { - int row = std::get<0>(edge); - matrix.indptr[row + 1]++; - } - - for (int i = 0; i < n; i++) { - matrix.indptr[i + 1] += matrix.indptr[i]; - } - - for (const auto& edge : edges) { - int row = std::get<0>(edge); - int col = std::get<1>(edge); - double val = std::get<2>(edge); - - matrix.indices.push_back(col); - matrix.data.push_back(val); - } - - std::vector has_connections(n, false); - - for (size_t i = 0; i < n; i++) { - if (matrix.indptr[i + 1] > matrix.indptr[i]) { - has_connections[i] = true; + + if (src_indptr.size() < 2 || src_indices.empty()) { + std::cerr << "[build_transpose_matrix] Warning: Invalid CSR structure" << std::endl; + CSRMatrix A(n, n); + A.indptr.assign(n + 1, 0); + return A; } - } - - for (int i = 0; i < n; i++) { - if (!has_connections[i]) { - matrix.indices.push_back(i); - matrix.data.push_back(1.0e-4); - - for (int j = i + 1; j <= n; j++) { - matrix.indptr[j]++; + + // 转置 + CSRMatrix At(cols, rows); + At.indptr.assign(cols + 1, 0); + size_t nnz = src_indices.size(); + + for (size_t idx = 0; idx < nnz; ++idx) { + int col = src_indices[idx]; + if (col >= 0 && col < cols) { + At.indptr[col + 1]++; + } + } + for (int i = 0; i < cols; ++i) { + At.indptr[i + 1] += At.indptr[i]; + } + + At.indices.assign(nnz, 0); + At.data.assign(nnz, 0.0); + + std::vector cur_pos(At.indptr.begin(), At.indptr.end()); + + for (int r = 0; r < rows; ++r) { + int start = src_indptr[r]; + int end = (r + 1 < static_cast(src_indptr.size())) ? src_indptr[r + 1] : static_cast(nnz); + for (int p = start; p < end; ++p) { + if (p >= static_cast(src_indices.size())) break; + int c = src_indices[p]; + if (c < 0 || c >= cols) continue; + int dest = cur_pos[c]++; + At.indices[dest] = r; + At.data[dest] = (p < static_cast(src_data.size())) ? src_data[p] : 1.0; } } + + return At; + } catch (const std::exception& e) { + std::cerr << "[build_transpose_matrix] Exception: " << e.what() << std::endl; + const int n = static_cast(nodes.size()); + CSRMatrix A(n, n); + A.indptr.assign(n + 1, 0); + return A; } - return matrix; } -std::vector power_iteration( +// ARPACK: 有向图 +std::vector compute_eigenvector_arpack_directed( const CSRMatrix& A, int max_iter, - double tol, - std::vector& x + double tol ) { - int n = A.rows; - std::vector x_next(n, 0.0); - - for (int iter = 0; iter < max_iter; iter++) { - A.multiply_inplace(x, x_next); - - double norm = 0.0; - #pragma omp parallel for reduction(+:norm) if(n > 10000) - for (int i = 0; i < n; i++) { - norm += x_next[i] * x_next[i]; - } - norm = std::sqrt(norm); - - if (norm < 1e-12) { - return x; - } - - #pragma omp parallel for if(n > 10000) + const int n_const = A.rows; + std::vector result(n_const, 0.0); + int n = n_const; + int nev = 1; + int ncv = (n > 30) ? 30 : n; + if (ncv < nev + 2) { + ncv = std::min(nev + 2, n); + } + int ido = 0; + char bmat[2] = "I"; + char which[3] = "LR"; + std::vector resid(n); + std::vector v(static_cast(n) * ncv); + std::vector workd(3 * n); + int lworkl = 3 * ncv * ncv + 6 * ncv; + std::vector workl(lworkl); + + #pragma omp parallel for schedule(static) if(n > 1000) + for (int i = 0; i < n; i++) { + int degree = A.indptr[i + 1] - A.indptr[i]; + resid[i] = degree > 0 ? + static_cast(degree) + 1e-4 * (rand() / static_cast(RAND_MAX)) : + 1.0; + } + double norm = 0.0; + for (int i = 0; i < n; i++) { + norm += resid[i] * resid[i]; + } + norm = std::sqrt(norm); + if (norm > 1e-12) { for (int i = 0; i < n; i++) { - x_next[i] /= norm; + resid[i] /= norm; } - - double diff = 0.0; - #pragma omp parallel for reduction(+:diff) if(n > 10000) - for (int i = 0; i < n; i++) { - diff += std::fabs(x_next[i] - x[i]); + } + int iparam[11] = {0}; + iparam[0] = 1; + iparam[2] = max_iter; + iparam[3] = 1; + iparam[6] = 1; + int ipntr[14]; + int ldv = n; + int info = 0; + int iter_count = 0; + auto start_time = std::chrono::high_resolution_clock::now(); + try { + while (true) { + dnaupd_(&ido, bmat, &n, which, &nev, &tol, resid.data(), &ncv, + v.data(), &ldv, iparam, ipntr, workd.data(), workl.data(), + &lworkl, &info); + if (ido == -1 || ido == 1) { + int x_idx = ipntr[0] - 1; + int y_idx = ipntr[1] - 1; + std::vector x_vec(n), y_vec(n); + std::copy(workd.begin() + x_idx, workd.begin() + x_idx + n, x_vec.begin()); + A.multiply_inplace(x_vec, y_vec); + std::copy(y_vec.begin(), y_vec.end(), workd.begin() + y_idx); + iter_count++; + } else if (ido == 99) { + break; + } else { + throw std::runtime_error("ARPACK unexpected ido: " + std::to_string(ido)); + } + if (iter_count > max_iter * 10) { + throw std::runtime_error("ARPACK exceeded iteration limit"); + } } - - if (diff < n * tol) { - std::swap(x, x_next); - break; + } catch (const std::exception& e) { + std::cerr << "[ARPACK-Directed] Error: " << e.what() << std::endl; + throw; + } + if (info < 0) { + std::string error_msg = "ARPACK dnaupd_ error: " + std::to_string(info); + switch(info) { + case -5: error_msg += " (LWORKL too small, need at least " + std::to_string(3*ncv*ncv + 6*ncv) + ")"; break; } - - std::swap(x, x_next); + throw std::runtime_error(error_msg); } - - return x; + int rvec = 1; + char howmny[2] = "A"; + std::vector select(ncv); + std::vector dr(nev + 1); + std::vector di(nev + 1); + std::vector z(n * (nev + 1)); + std::vector workev(3 * ncv); + int ldz = n; + double sigmar = 0.0, sigmai = 0.0; + dneupd_(&rvec, howmny, select.data(), dr.data(), di.data(), z.data(), &ldz, + &sigmar, &sigmai, workev.data(), bmat, &n, which, &nev, &tol, + resid.data(), &ncv, v.data(), &ldv, iparam, ipntr, workd.data(), + workl.data(), &lworkl, &info); + if (info != 0) { + throw std::runtime_error("ARPACK dneupd_ error: " + std::to_string(info)); + } + std::copy(z.begin(), z.begin() + n, result.begin()); + return result; } -#ifdef HAVE_EIGEN -std::vector compute_eigenvector_eigen( - const CSRMatrix& A, +// ARPACK: 无向图 +std::vector compute_eigenvector_arpack( + const CSRMatrix& A, int max_iter, double tol ) { - Eigen::SparseMatrix eigen_matrix = A.to_eigen(); - - int n = A.rows; - std::vector result(n); - + const int n_const = A.rows; + std::vector result(n_const, 0.0); + int n = n_const; + int nev = 1; + int ncv = (n > 30) ? 30 : n; + if (ncv < nev + 2) { + ncv = std::min(nev + 2, n); + } + int ido = 0; + char bmat[2] = "I"; + char which[3] = "LR"; + std::vector resid(n); + std::vector v(static_cast(n) * ncv); + std::vector workd(3 * n); + int lworkl = ncv * (ncv + 8); + std::vector workl(lworkl); + + #pragma omp parallel for schedule(static) if(n > 1000) + for (int i = 0; i < n; i++) { + int degree = A.indptr[i + 1] - A.indptr[i]; + resid[i] = degree > 0 ? + static_cast(degree) + 1e-4 * (rand() / static_cast(RAND_MAX)) : + 1.0; + } + double norm = 0.0; + for (int i = 0; i < n; i++) { + norm += resid[i] * resid[i]; + } + norm = std::sqrt(norm); + if (norm > 1e-12) { + for (int i = 0; i < n; i++) { + resid[i] /= norm; + } + } + int iparam[11] = {0}; + iparam[0] = 1; + iparam[2] = max_iter; + iparam[3] = 1; + iparam[6] = 1; + int ipntr[11]; + int ldv = n; + int info = 0; + int iter_count = 0; + auto start_time = std::chrono::high_resolution_clock::now(); try { - Eigen::EigenSolver solver; - - if (n < 1000) { - Eigen::MatrixXd dense_matrix(eigen_matrix); - solver.compute(dense_matrix); - - int max_idx = 0; - double max_val = solver.eigenvalues()[0].real(); - for (int i = 1; i < n; i++) { - if (solver.eigenvalues()[i].real() > max_val) { - max_val = solver.eigenvalues()[i].real(); - max_idx = i; - } + while (true) { + dsaupd_(&ido, bmat, &n, which, &nev, &tol, resid.data(), &ncv, + v.data(), &ldv, iparam, ipntr, workd.data(), workl.data(), + &lworkl, &info); + if (ido == -1 || ido == 1) { + int x_idx = ipntr[0] - 1; + int y_idx = ipntr[1] - 1; + std::vector x_vec(n), y_vec(n); + std::copy(workd.begin() + x_idx, workd.begin() + x_idx + n, x_vec.begin()); + A.multiply_inplace(x_vec, y_vec); + std::copy(y_vec.begin(), y_vec.end(), workd.begin() + y_idx); + iter_count++; + } else if (ido == 99) { + break; + } else { + throw std::runtime_error("ARPACK unexpected ido: " + std::to_string(ido)); } - - Eigen::VectorXd eigen_vec = solver.eigenvectors().col(max_idx).real(); - for (int i = 0; i < n; i++) { - result[i] = eigen_vec(i); + if (iter_count > max_iter * 10) { + throw std::runtime_error("ARPACK exceeded iteration limit"); } - } else { - throw std::runtime_error("Matrix too large for dense solver"); } + } catch (const std::exception& e) { + std::cerr << "[ARPACK] Error: " << e.what() << std::endl; + throw; } - catch (const std::exception& e) { - std::vector x(n, 1.0/n); - return power_iteration(A, max_iter, tol, x); + if (info < 0) { + std::string error_msg = "ARPACK dsaupd_ error: " + std::to_string(info); + switch(info) { + case -1: error_msg += " (N must be positive)"; break; + case -2: error_msg += " (NEV must be positive)"; break; + case -3: error_msg += " (NCV-NEV >= 2 and NCV <= N not satisfied)"; break; + case -5: error_msg += " (LWORKL too small, need at least " + std::to_string(ncv * (ncv + 8)) + ")"; break; + case -8: error_msg += " (Error in LAPACK)"; break; + case -9: error_msg += " (Starting vector is zero)"; break; + case -14: error_msg += " (Did not find any eigenvalues)"; break; + } + throw std::runtime_error(error_msg); } - - double sum = 0.0; - for (double val : result) { - sum += val; + int rvec = 1; + char howmny[2] = "A"; + std::vector select(ncv); + std::vector d(nev); + std::vector z(n * nev); + int ldz = n; + double sigma = 0.0; + dseupd_(&rvec, howmny, select.data(), d.data(), z.data(), &ldz, &sigma, + bmat, &n, which, &nev, &tol, resid.data(), &ncv, v.data(), &ldv, + iparam, ipntr, workd.data(), workl.data(), &lworkl, &info); + if (info != 0) { + throw std::runtime_error("ARPACK dseupd_ error: " + std::to_string(info)); } - - if (sum < 0.0) { - for (double& val : result) { - val = -val; + std::copy(z.begin(), z.begin() + n, result.begin()); + return result; +} + +#if defined(HAVE_EIGEN) && defined(HAVE_EIGEN_SPARSE_SOLVER) +std::vector compute_eigenvector_eigen_sparse( + const CSRMatrix& A, + int max_iter, + double tol +) { + const int n = A.rows; + Eigen::SparseMatrix eigen_mat = A.to_eigen(); + Eigen::VectorXd x(n); + #pragma omp parallel for schedule(static) if(n > 1000) + for (int i = 0; i < n; i++) { + int degree = A.indptr[i + 1] - A.indptr[i]; + double second_order = 0.0; + for (int j = A.indptr[i]; j < A.indptr[i + 1]; j++) { + int nei = A.indices[j]; + int nei_degree = A.indptr[nei + 1] - A.indptr[nei]; + second_order += nei_degree; } + x(i) = degree > 0 ? + static_cast(degree) + 0.1 * second_order / (degree + 1) : + 1.0; } - - double norm = 0.0; - for (double val : result) { - norm += val * val; + x.normalize(); + double prev_eigenvalue = 0.0; + const double rel_tol = tol * std::max(1.0, A.estimate_norm()); + for (int iter = 0; iter < max_iter; iter++) { + Eigen::VectorXd x_new = eigen_mat * x; + double norm = x_new.norm(); + if (norm < 1e-12) break; + x_new /= norm; + double eigenvalue = x_new.dot(eigen_mat * x_new) / x_new.squaredNorm(); + double diff = (x_new - x).norm(); + bool converged_vector = (diff < rel_tol); + bool converged_eigenvalue = (iter > 0) && + (std::abs(eigenvalue - prev_eigenvalue) < tol * std::abs(eigenvalue)); + if (converged_vector || converged_eigenvalue) { + x = x_new; + break; + } + x = x_new; + prev_eigenvalue = eigenvalue; } - norm = std::sqrt(norm); - - for (double& val : result) { - val /= norm; + std::vector result(n); + #pragma omp parallel for schedule(static) if(n > 1000) + for (int i = 0; i < n; i++) { + result[i] = x(i); } - return result; } #endif +// 主入口 py::object cpp_eigenvector_centrality( py::object G, py::object py_max_iter, @@ -302,45 +498,68 @@ py::object cpp_eigenvector_centrality( } std::vector nodes; + nodes.reserve(graph.node.size()); for (auto& node_pair : graph.node) { nodes.push_back(node_pair.first); } - int n = nodes.size(); + const int n = nodes.size(); CSRMatrix A_transpose = build_transpose_matrix(graph, nodes, weight_key); - std::vector isolated_nodes(n, true); + std::vector isolated_nodes(n, false); + #pragma omp parallel for schedule(static) if(n > 1000) for (int i = 0; i < n; i++) { - // A node is isolated if it has zero edges (no entries in its row) - if (A_transpose.indptr[i + 1] == A_transpose.indptr[i]) { - isolated_nodes[i] = true; - } else { - isolated_nodes[i] = false; - } + isolated_nodes[i] = (A_transpose.indptr[i + 1] == A_transpose.indptr[i]); } std::vector centrality; if (py_nstart.is_none()) { bool fast_solver_success = false; - -#ifdef HAVE_EIGEN +#ifdef HAVE_ARPACK try { - centrality = compute_eigenvector_eigen(A_transpose, max_iter, tol); + if (Graph_is_directed(G)) { + centrality = compute_eigenvector_arpack_directed(A_transpose, max_iter, tol); + } else { + centrality = compute_eigenvector_arpack(A_transpose, max_iter, tol); + } fast_solver_success = true; - } catch (const std::exception&) { + } catch (const std::exception& e) { fast_solver_success = false; } #endif - +#if defined(HAVE_EIGEN) && defined(HAVE_EIGEN_SPARSE_SOLVER) + if (!fast_solver_success && n >= 100 && n < 500000) { + try { + centrality = compute_eigenvector_eigen_sparse(A_transpose, max_iter, tol); + fast_solver_success = true; + } catch (const std::exception&) { + fast_solver_success = false; + } + } +#endif if (!fast_solver_success) { - std::vector x(n, 1.0/n); - centrality = power_iteration(A_transpose, max_iter, tol, x); + std::vector x(n, 0.0); + #pragma omp parallel for schedule(static) if(n > 1000) + for (int i = 0; i < n; i++) { + if (!isolated_nodes[i]) { + const int degree = A_transpose.indptr[i + 1] - A_transpose.indptr[i]; + double second_order = 0.0; + for (int j = A_transpose.indptr[i]; j < A_transpose.indptr[i + 1]; j++) { + int nei = A_transpose.indices[j]; + int nei_degree = A_transpose.indptr[nei + 1] - A_transpose.indptr[nei]; + second_order += nei_degree; + } + x[i] = static_cast(degree) + + 0.1 * second_order / (degree + 1) + + 1e-4 * (rand() / static_cast(RAND_MAX)); + } + } + centrality = power_iteration_chebyshev(A_transpose, max_iter, tol, x); } } else { py::dict nstart = py_nstart.cast(); std::vector x(n, 0.0); - for (size_t i = 0; i < nodes.size(); i++) { py::object node_obj = graph.id_to_node[py::cast(nodes[i])]; if (nstart.contains(node_obj)) { @@ -349,7 +568,6 @@ py::object cpp_eigenvector_centrality( x[i] = 1.0; } } - bool all_zeros = true; for (double val : x) { if (std::abs(val) > 1e-10) { @@ -357,60 +575,126 @@ py::object cpp_eigenvector_centrality( break; } } - if (all_zeros) { throw std::runtime_error("initial vector cannot have all zero values"); } - double sum_abs = 0.0; - for (double val : x) { - sum_abs += std::fabs(val); + #pragma omp parallel for reduction(+:sum_abs) schedule(static) if(n > 1000) + for (int i = 0; i < n; i++) { + sum_abs += std::fabs(x[i]); } - - for (double& val : x) { - val /= sum_abs; + const double inv_sum = 1.0 / sum_abs; + #pragma omp parallel for schedule(static) if(n > 1000) + for (int i = 0; i < n; i++) { + x[i] *= inv_sum; } - - centrality = power_iteration(A_transpose, max_iter, tol, x); + centrality = power_iteration_chebyshev(A_transpose, max_iter, tol, x); } - double sum = 0.0; - for (double val : centrality) { - sum += val; + #pragma omp parallel for reduction(+:sum) schedule(static) if(n > 1000) + for (int i = 0; i < n; i++) { + sum += centrality[i]; } - if (sum < 0.0) { - for (double& val : centrality) { - val = -val; + #pragma omp parallel for schedule(static) if(n > 1000) + for (int i = 0; i < n; i++) { + centrality[i] = -centrality[i]; } } - + #pragma omp parallel for schedule(static) if(n > 1000) for (int i = 0; i < n; i++) { if (isolated_nodes[i]) { centrality[i] = 0.0; } } - - double norm = 0.0; - for (double val : centrality) { - norm += val * val; + double norm = vector_norm(centrality); + if (norm > 1e-12) { + normalize_vector(centrality, norm); } - norm = std::sqrt(norm); - - if (norm > 0.0) { - for (double& val : centrality) { - val /= norm; - } - } - py::dict result; for (size_t i = 0; i < nodes.size(); i++) { py::object node_obj = graph.id_to_node[py::cast(nodes[i])]; result[node_obj] = centrality[i]; } - return result; } catch (const std::exception& e) { throw std::runtime_error(e.what()); } +} + +// 幂迭代 +std::vector power_iteration_chebyshev( + const CSRMatrix& A, + int max_iter, + double tol, + std::vector& x +) { + const int n = A.rows; + std::vector x_old(n, 0.0); + std::vector x_new(n, 0.0); + double norm0 = 0.0; + for (int i = 0; i < n; ++i) norm0 += x[i] * x[i]; + norm0 = std::sqrt(norm0); + if (norm0 < 1e-12) { + for (int i = 0; i < n; ++i) x_old[i] = 1.0 / std::sqrt(static_cast(n)); + } else { + for (int i = 0; i < n; ++i) x_old[i] = x[i] / norm0; + } + double lambda_old = 0.0, lambda = 0.0; + for (int iter = 0; iter < max_iter; ++iter) { + A.multiply_inplace(x_old, x_new); + double norm = 0.0; + for (int i = 0; i < n; ++i) norm += x_new[i] * x_new[i]; + norm = std::sqrt(norm); + if (norm < 1e-12) break; + double inv_norm = 1.0 / norm; + for (int i = 0; i < n; ++i) x_new[i] *= inv_norm; + std::vector Ax(n, 0.0); + A.multiply_inplace(x_new, Ax); + double num = 0.0, den = 0.0; + for (int i = 0; i < n; ++i) { + num += x_new[i] * Ax[i]; + den += x_new[i] * x_new[i]; + } + lambda = (den > 0.0) ? (num / den) : 0.0; + double diff = 0.0; + for (int i = 0; i < n; ++i) { + double d = x_new[i] - x_old[i]; + diff += d * d; + } + diff = std::sqrt(diff); + if ((iter > 0 && std::abs(lambda - lambda_old) < tol * std::abs(lambda)) || (diff < tol)) { + for (int i = 0; i < n; ++i) x[i] = x_new[i]; + return x; + } + lambda_old = lambda; + x_old.swap(x_new); + } + for (int i = 0; i < n; ++i) x[i] = x_old[i]; + return x; +} + +double vector_norm(const std::vector& x) { + double s = 0.0; + #pragma omp parallel for reduction(+:s) if(x.size() > 1000) + for (size_t i = 0; i < x.size(); ++i) s += x[i] * x[i]; + return std::sqrt(s); +} + +void normalize_vector(std::vector& x, double norm) { + const double inv_norm = 1.0 / norm; + #pragma omp parallel for schedule(static) if(x.size() > 1000) + for (size_t i = 0; i < x.size(); ++i) { + x[i] *= inv_norm; + } +} + +double vector_diff_norm(const std::vector& x1, const std::vector& x2) { + double diff = 0.0; + #pragma omp parallel for reduction(+:diff) if(x1.size() > 1000) + for (size_t i = 0; i < x1.size(); ++i) { + double d = x1[i] - x2[i]; + diff += d * d; + } + return std::sqrt(diff); } \ No newline at end of file From 1b6a0d90a1f5c6cb955adf95f5b67a137320fc88 Mon Sep 17 00:00:00 2001 From: Nianle <24302010062@m.fudan.edu.cn> Date: Fri, 5 Dec 2025 06:48:12 -0700 Subject: [PATCH 3/6] Optimize Closeness Centrality --- .../functions/centrality/closeness.cpp | 294 ++++++++++++++++-- 1 file changed, 262 insertions(+), 32 deletions(-) diff --git a/cpp_easygraph/functions/centrality/closeness.cpp b/cpp_easygraph/functions/centrality/closeness.cpp index 9a2dc967..89b6d2ab 100644 --- a/cpp_easygraph/functions/centrality/closeness.cpp +++ b/cpp_easygraph/functions/centrality/closeness.cpp @@ -7,44 +7,186 @@ #include "../../classes/graph.h" #include "../../common/utils.h" #include "../../classes/linkgraph.h" -#include "../../classes/segment_tree.cpp" -double closeness_dijkstra(const Graph_L& G_l, const int &S, int cutoff, Segment_tree_zkw& segment_tree_zkw){ - const int dis_inf = 0x3f3f3f3f; +#include +#include +#include +#include + +#ifdef _OPENMP +#include +#endif + +// 堆节点:使用负值 + 最大堆实现最小堆 +typedef std::pair HeapNode; + +// 优化的邻接表缓存结构 - 按需构建,类似igraph的lazy_inclist +struct FastAdjCache { + std::vector neighbor_ptrs; + std::vector neighbor_counts; + std::vector weight_ptrs; + std::vector> neighbor_storage; + std::vector> weight_storage; + + void init(int N) { + neighbor_ptrs.resize(N + 1, nullptr); + neighbor_counts.resize(N + 1, 0); + neighbor_storage.resize(N + 1); + } + + void init_with_weights(int N) { + init(N); + weight_ptrs.resize(N + 1, nullptr); + weight_storage.resize(N + 1); + } + + inline void build_if_needed(int u, const std::vector& head, + const std::vector& edges, bool store_weights) { + if (neighbor_ptrs[u] != nullptr) return; + + std::vector& neis = neighbor_storage[u]; + for (int p = head[u]; p != -1; p = edges[p].next) { + neis.push_back(edges[p].to); + if (store_weights) { + weight_storage[u].push_back(edges[p].w); + } + } + + neighbor_counts[u] = neis.size(); + neighbor_ptrs[u] = neis.data(); + if (store_weights) { + weight_ptrs[u] = weight_storage[u].data(); + } + } + + inline int* get_neighbors_ptr(int u) const { return neighbor_ptrs[u]; } + inline int get_neighbor_count(int u) const { return neighbor_counts[u]; } + inline float* get_weights_ptr(int u) const { return weight_ptrs[u]; } +}; + +// BFS实现 - 直接使用原始邻接表 +double closeness_bfs_direct(const Graph_L& G_l, const int &S, int cutoff, + std::vector& already_counted, + std::vector& queue_storage, + int timestamp) { int N = G_l.n; - segment_tree_zkw.init(N); - std::vector dis(N+1, INT_MAX); const std::vector& E = G_l.edges; const std::vector& head = G_l.head; - int number_connected = 0; + + int nodes_reached = 0; long long sum_dis = 0; - dis[S] = 0; - segment_tree_zkw.change(S, 0); - while(segment_tree_zkw.t[1] != dis_inf) { - int u = segment_tree_zkw.num[1]; - if(u == 0) break; - segment_tree_zkw.change(u, dis_inf); - if (cutoff >= 0 && dis[u] > cutoff){ + + queue_storage.clear(); + + int queue_front = 0; + already_counted[S] = timestamp; + queue_storage.push_back(S); + queue_storage.push_back(0); + + while (queue_front < static_cast(queue_storage.size())) { + int u = queue_storage[queue_front++]; + int actdist = queue_storage[queue_front++]; + + if (cutoff >= 0 && actdist > cutoff) { continue; - } - number_connected += 1; - sum_dis += dis[u]; - for(register int p = head[u]; p != -1; p = E[p].next) { + } + + sum_dis += actdist; + nodes_reached++; + + for (int p = head[u]; p != -1; p = E[p].next) { int v = E[p].to; - if(cutoff >= 0 && (dis[u] + E[p].w) > cutoff){ + + if (already_counted[v] == timestamp) { continue; } - if (dis[v] > dis[u] + E[p].w) { - dis[v] = dis[u] + E[p].w; - segment_tree_zkw.change(v, dis[v]); - } + + already_counted[v] = timestamp; + queue_storage.push_back(v); + queue_storage.push_back(actdist + 1); } } - if (number_connected == 1) + + if (nodes_reached == 1) return 0.0; else - return 1.0 * (number_connected - 1) * (number_connected - 1) / ((N - 1) * sum_dis); + return 1.0 * (nodes_reached - 1) * (nodes_reached - 1) / ((N - 1) * sum_dis); +} + +// 检查图是否为无权图 +inline bool is_unweighted_graph(const Graph_L& G_l) { + const std::vector& E = G_l.edges; + for (const auto& edge : E) { + if (std::abs(edge.w - 1.0f) > 1e-9) { + return false; + } + } + return true; +} + +// Dijkstra实现 - 使用按需构建的邻接表缓存 +double closeness_dijkstra_cached(const Graph_L& G_l, const int &S, int cutoff, + std::vector& dist, + std::vector& which, + FastAdjCache& cache, + int timestamp) { + int N = G_l.n; + const std::vector& E = G_l.edges; + const std::vector& head = G_l.head; + + int nodes_reached = 0; + double sum_dis = 0.0; + + std::priority_queue heap; + dist[S] = 1.0f; + which[S] = timestamp; + heap.push({-1.0f, S}); + + while (!heap.empty()) { + HeapNode top = heap.top(); + heap.pop(); + float mindist = -top.first; + int minnei = top.second; + + if (mindist > dist[minnei]) { + continue; + } + + float actual_dist = mindist - 1.0f; + if (cutoff >= 0 && actual_dist > cutoff) { + continue; + } + + sum_dis += actual_dist; + nodes_reached++; + + cache.build_if_needed(minnei, head, E, true); + + int* neis = cache.get_neighbors_ptr(minnei); + float* ws = cache.get_weights_ptr(minnei); + int nlen = cache.get_neighbor_count(minnei); + + for (int j = 0; j < nlen; j++) { + int to = neis[j]; + float altdist = mindist + ws[j]; + float curdist = dist[to]; + + if (which[to] != timestamp) { + which[to] = timestamp; + dist[to] = altdist; + heap.push({-altdist, to}); + } else if (curdist == 0.0f || altdist < curdist) { + dist[to] = altdist; + heap.push({-altdist, to}); + } + } + } + + if (nodes_reached == 1) + return 0.0; + else + return 1.0 * (nodes_reached - 1) * (nodes_reached - 1) / ((N - 1) * sum_dis); } static py::object invoke_cpp_closeness_centrality(py::object G, py::object weight, @@ -58,27 +200,115 @@ static py::object invoke_cpp_closeness_centrality(py::object G, py::object weigh if (!cutoff.is_none()){ cutoff_ = cutoff.cast(); } - Segment_tree_zkw segment_tree_zkw(N); + + // 自动选择算法 + bool use_bfs = (weight.is_none() || is_unweighted_graph(G_l)); + std::vector CC; + if(!sources.is_none()){ py::list sources_list = py::list(sources); int sources_list_len = py::len(sources_list); - for(register int i = 0; i < sources_list_len; i++){ + CC.resize(sources_list_len); + + // 收集所有源节点ID + std::vector source_ids(sources_list_len); + for(int i = 0; i < sources_list_len; i++){ if(G_.node_to_id.attr("get")(sources_list[i],py::none()).is_none()){ printf("The node should exist in the graph!"); return py::none(); } - node_t source_id = G_.node_to_id.attr("get")(sources_list[i]).cast(); - float res = closeness_dijkstra(G_l, source_id, cutoff_,segment_tree_zkw); - CC.push_back(res); + source_ids[i] = G_.node_to_id.attr("get")(sources_list[i]).cast(); + } + + // OpenMP并行计算(参考eigenvector的并行策略) + // 只在源节点数量较多时启用并行,避免小任务的并行开销 + // omp parallel if(sources_list_len > 100) + { + // 每个线程独立的数据结构(避免竞争) + std::vector already_counted(N + 1, 0); + std::vector queue_storage; + queue_storage.reserve(N * 2); + + std::vector dist(N + 1, 0.0f); + std::vector which(N + 1, 0); + + FastAdjCache cache; + if (!use_bfs) { + cache.init_with_weights(N); + } + + // 为每个线程分配唯一的时间戳起始值 + #ifdef _OPENMP + int thread_id = omp_get_thread_num(); + int num_threads = omp_get_num_threads(); + int timestamp = thread_id * sources_list_len; + #else + int timestamp = 0; + #endif + + // 并行循环:每个线程处理不同的源节点 + // omp for schedule(dynamic, 1) + for(int i = 0; i < sources_list_len; i++){ + timestamp++; + double res; + if (use_bfs) { + res = closeness_bfs_direct(G_l, source_ids[i], cutoff_, + already_counted, queue_storage, timestamp); + } else { + res = closeness_dijkstra_cached(G_l, source_ids[i], cutoff_, + dist, which, cache, timestamp); + } + CC[i] = res; + } } } else{ - for(register int i = 1; i <= N; i++){ - float res = closeness_dijkstra(G_l, i, cutoff_,segment_tree_zkw); - CC.push_back(res); + CC.resize(N); + + // OpenMP并行计算所有节点 + // 只在节点数量较多时启用并行 + // omp parallel if(N > 100) + { + // 每个线程独立的数据结构 + std::vector already_counted(N + 1, 0); + std::vector queue_storage; + queue_storage.reserve(N * 2); + + std::vector dist(N + 1, 0.0f); + std::vector which(N + 1, 0); + + FastAdjCache cache; + if (!use_bfs) { + cache.init_with_weights(N); + } + + // 为每个线程分配唯一的时间戳起始值 + #ifdef _OPENMP + int thread_id = omp_get_thread_num(); + int num_threads = omp_get_num_threads(); + int timestamp = thread_id * N; + #else + int timestamp = 0; + #endif + + // 并行循环:dynamic调度适合负载不均衡的情况 + // omp for schedule(dynamic, 10) + for(int i = 1; i <= N; i++){ + timestamp++; + double res; + if (use_bfs) { + res = closeness_bfs_direct(G_l, i, cutoff_, + already_counted, queue_storage, timestamp); + } else { + res = closeness_dijkstra_cached(G_l, i, cutoff_, + dist, which, cache, timestamp); + } + CC[i - 1] = res; + } } } + py::array::ShapeContainer ret_shape{(int)CC.size()}; py::array_t ret(ret_shape, CC.data()); From 106b32243fa1fad693c8d2baa1d93dd9a816bffd Mon Sep 17 00:00:00 2001 From: Nianle <24302010062@m.fudan.edu.cn> Date: Fri, 5 Dec 2025 18:08:57 -0700 Subject: [PATCH 4/6] Enable OpenMP --- .../functions/centrality/closeness.cpp | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/cpp_easygraph/functions/centrality/closeness.cpp b/cpp_easygraph/functions/centrality/closeness.cpp index 89b6d2ab..d06f3ec9 100644 --- a/cpp_easygraph/functions/centrality/closeness.cpp +++ b/cpp_easygraph/functions/centrality/closeness.cpp @@ -17,10 +17,10 @@ #include #endif -// 堆节点:使用负值 + 最大堆实现最小堆 +// Heap node: use negative value + max heap to implement min heap typedef std::pair HeapNode; -// 优化的邻接表缓存结构 - 按需构建,类似igraph的lazy_inclist +// Optimized adjacency list cache struct FastAdjCache { std::vector neighbor_ptrs; std::vector neighbor_counts; @@ -64,7 +64,7 @@ struct FastAdjCache { inline float* get_weights_ptr(int u) const { return weight_ptrs[u]; } }; -// BFS实现 - 直接使用原始邻接表 +// BFS implementation - directly use raw adjacency list double closeness_bfs_direct(const Graph_L& G_l, const int &S, int cutoff, std::vector& already_counted, std::vector& queue_storage, @@ -113,7 +113,7 @@ double closeness_bfs_direct(const Graph_L& G_l, const int &S, int cutoff, return 1.0 * (nodes_reached - 1) * (nodes_reached - 1) / ((N - 1) * sum_dis); } -// 检查图是否为无权图 +// Check if the graph is unweighted inline bool is_unweighted_graph(const Graph_L& G_l) { const std::vector& E = G_l.edges; for (const auto& edge : E) { @@ -124,7 +124,7 @@ inline bool is_unweighted_graph(const Graph_L& G_l) { return true; } -// Dijkstra实现 - 使用按需构建的邻接表缓存 +// Dijkstra implementation - use on-demand adjacency cache double closeness_dijkstra_cached(const Graph_L& G_l, const int &S, int cutoff, std::vector& dist, std::vector& which, @@ -201,7 +201,7 @@ static py::object invoke_cpp_closeness_centrality(py::object G, py::object weigh cutoff_ = cutoff.cast(); } - // 自动选择算法 + // Auto algorithm selection bool use_bfs = (weight.is_none() || is_unweighted_graph(G_l)); std::vector CC; @@ -211,7 +211,7 @@ static py::object invoke_cpp_closeness_centrality(py::object G, py::object weigh int sources_list_len = py::len(sources_list); CC.resize(sources_list_len); - // 收集所有源节点ID + // Collect all source node IDs std::vector source_ids(sources_list_len); for(int i = 0; i < sources_list_len; i++){ if(G_.node_to_id.attr("get")(sources_list[i],py::none()).is_none()){ @@ -221,11 +221,11 @@ static py::object invoke_cpp_closeness_centrality(py::object G, py::object weigh source_ids[i] = G_.node_to_id.attr("get")(sources_list[i]).cast(); } - // OpenMP并行计算(参考eigenvector的并行策略) - // 只在源节点数量较多时启用并行,避免小任务的并行开销 - // omp parallel if(sources_list_len > 100) + // OpenMP parallel computation + // Only enable parallelism when sources are many to avoid overhead + #pragma omp parallel if(sources_list_len > 100) { - // 每个线程独立的数据结构(避免竞争) + // Per-thread data structures (avoid race conditions) std::vector already_counted(N + 1, 0); std::vector queue_storage; queue_storage.reserve(N * 2); @@ -238,7 +238,7 @@ static py::object invoke_cpp_closeness_centrality(py::object G, py::object weigh cache.init_with_weights(N); } - // 为每个线程分配唯一的时间戳起始值 + // Assign unique timestamp start for each thread #ifdef _OPENMP int thread_id = omp_get_thread_num(); int num_threads = omp_get_num_threads(); @@ -247,8 +247,8 @@ static py::object invoke_cpp_closeness_centrality(py::object G, py::object weigh int timestamp = 0; #endif - // 并行循环:每个线程处理不同的源节点 - // omp for schedule(dynamic, 1) + // Parallel loop: each thread handles different source node + #pragma omp for schedule(dynamic, 1) for(int i = 0; i < sources_list_len; i++){ timestamp++; double res; @@ -266,11 +266,11 @@ static py::object invoke_cpp_closeness_centrality(py::object G, py::object weigh else{ CC.resize(N); - // OpenMP并行计算所有节点 - // 只在节点数量较多时启用并行 - // omp parallel if(N > 100) + // OpenMP parallel computation for all nodes + // Only enable parallelism when node count is large + #pragma omp parallel if(N > 100) { - // 每个线程独立的数据结构 + // Per-thread data structures std::vector already_counted(N + 1, 0); std::vector queue_storage; queue_storage.reserve(N * 2); @@ -283,7 +283,7 @@ static py::object invoke_cpp_closeness_centrality(py::object G, py::object weigh cache.init_with_weights(N); } - // 为每个线程分配唯一的时间戳起始值 + // Assign unique timestamp start for each thread #ifdef _OPENMP int thread_id = omp_get_thread_num(); int num_threads = omp_get_num_threads(); @@ -292,8 +292,8 @@ static py::object invoke_cpp_closeness_centrality(py::object G, py::object weigh int timestamp = 0; #endif - // 并行循环:dynamic调度适合负载不均衡的情况 - // omp for schedule(dynamic, 10) + // Parallel loop: dynamic scheduling for load balancing + #pragma omp for schedule(dynamic, 10) for(int i = 1; i <= N; i++){ timestamp++; double res; From ff6c2809fca716bb4c59317d2d6838afb0618226 Mon Sep 17 00:00:00 2001 From: Nianle <24302010062@m.fudan.edu.cn> Date: Fri, 5 Dec 2025 18:58:15 -0700 Subject: [PATCH 5/6] Update cpp_easygraph/CMakeLists.txt: optional deps and safe target linking --- cpp_easygraph/CMakeLists.txt | 82 ++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 45 deletions(-) diff --git a/cpp_easygraph/CMakeLists.txt b/cpp_easygraph/CMakeLists.txt index 2e85607f..673c70fe 100644 --- a/cpp_easygraph/CMakeLists.txt +++ b/cpp_easygraph/CMakeLists.txt @@ -13,63 +13,55 @@ add_subdirectory(pybind11) option(EASYGRAPH_ENABLE_GPU "EASYGRAPH_ENABLE_GPU" OFF) -# 启用OpenMP支持 +# Find OpenMP (optional) find_package(OpenMP) if(OpenMP_CXX_FOUND) message(STATUS "Found OpenMP: ${OpenMP_CXX_VERSION}") - message(STATUS "OpenMP flags: ${OpenMP_CXX_FLAGS}") else() - message(WARNING "OpenMP not found. Parallel optimization will be disabled.") + message(STATUS "OpenMP not found; continuing without OpenMP") endif() -# 查找ARPACK库(兼容无CMake配置的系统) -find_library(ARPACK_LIB NAMES arpack arpack_double PATHS ENV LD_LIBRARY_PATH) -if(NOT ARPACK_LIB) - message(FATAL_ERROR "ARPACK library not found") -else() +# Find ARPACK (optional; do not fail if absent) +find_library(ARPACK_LIB NAMES arpack arpack_double) +if(ARPACK_LIB) message(STATUS "Found ARPACK library: ${ARPACK_LIB}") + add_compile_definitions(HAVE_ARPACK=1) +else() + message(WARNING "ARPACK not found; ARPACK-based solvers will be disabled (fallback used).") + add_compile_definitions(HAVE_ARPACK=0) endif() find_package(LAPACK REQUIRED) -if (EASYGRAPH_ENABLE_GPU) - - pybind11_add_module(cpp_easygraph - ${SOURCES} - $ - ) - - set_property(TARGET cpp_easygraph PROPERTY CUDA_ARCHITECTURES all) - - target_compile_definitions(cpp_easygraph - PRIVATE EASYGRAPH_ENABLE_GPU - ) - - target_link_libraries(cpp_easygraph - PRIVATE cudart_static - ) - -else() - - pybind11_add_module(cpp_easygraph - ${SOURCES} - ) - -endif() - +# Find Eigen (optional). Change to REQUIRED if Eigen must be enforced. find_package(Eigen3 QUIET) if(Eigen3_FOUND) - message(STATUS "Found Eigen3: ${EIGEN3_INCLUDE_DIR}") - include_directories(${EIGEN3_INCLUDE_DIR}) - add_definitions(-DEIGEN_MAJOR_VERSION=${EIGEN3_VERSION_MAJOR}) -endif() - -# 将OpenMP链接到target -if(OpenMP_CXX_FOUND) - target_link_libraries(cpp_easygraph PRIVATE OpenMP::OpenMP_CXX) - target_compile_options(cpp_easygraph PRIVATE ${OpenMP_CXX_FLAGS}) - message(STATUS "OpenMP enabled for cpp_easygraph") + message(STATUS "Found Eigen3: ${Eigen3_INCLUDE_DIRS}") + add_compile_definitions(HAVE_EIGEN=1) +else() + message(STATUS "Eigen3 not found; eigen-accelerated code will be disabled") + add_compile_definitions(HAVE_EIGEN=0) endif() -# 链接ARPACK和LAPACK -target_link_libraries(cpp_easygraph PRIVATE ${ARPACK_LIB} LAPACK::LAPACK) \ No newline at end of file +# Link/include to cpp_easygraph only if the target exists. +# This avoids errors when the target is created elsewhere or later. +if(TARGET cpp_easygraph) + if(Eigen3_FOUND) + target_include_directories(cpp_easygraph PRIVATE ${Eigen3_INCLUDE_DIRS}) + if(TARGET Eigen3::Eigen) + target_link_libraries(cpp_easygraph PRIVATE Eigen3::Eigen) + endif() + endif() + + if(OpenMP_CXX_FOUND) + target_link_libraries(cpp_easygraph PRIVATE OpenMP::OpenMP_CXX) + endif() + + if(ARPACK_LIB) + target_link_libraries(cpp_easygraph PRIVATE ${ARPACK_LIB} LAPACK::LAPACK) + else() + target_link_libraries(cpp_easygraph PRIVATE LAPACK::LAPACK) + endif() +else() + message(STATUS "cpp_easygraph target not defined yet; optional libs will be linked after target creation.") +endif() \ No newline at end of file From 6537360aad36f6661cd5fe3ab0aae72607078d93 Mon Sep 17 00:00:00 2001 From: NianLe <24302010062@m.fudan.edu.cn> Date: Sat, 6 Dec 2025 11:53:58 +0800 Subject: [PATCH 6/6] Apply suggestion from @Copilot Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- cpp_easygraph/cpp_easygraph.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp_easygraph/cpp_easygraph.cpp b/cpp_easygraph/cpp_easygraph.cpp index 487c0b1b..1733d7e0 100644 --- a/cpp_easygraph/cpp_easygraph.cpp +++ b/cpp_easygraph/cpp_easygraph.cpp @@ -83,7 +83,7 @@ PYBIND11_MODULE(cpp_easygraph, m) { m.def("cpp_constraint", &constraint, py::arg("G"), py::arg("nodes") = py::none(), py::arg("weight") = py::none(), py::arg("n_workers") = py::none()); m.def("cpp_effective_size", &effective_size, py::arg("G"), py::arg("nodes") = py::none(), py::arg("weight") = py::none(), py::arg("n_workers") = py::none()); m.def("cpp_efficiency", &efficiency, py::arg("G"), py::arg("nodes") = py::none(), py::arg("weight") = py::none(), py::arg("n_workers") = py::none()); - m.def("cpp_eigenvector_centrality", &cpp_eigenvector_centrality, py::arg("G"), py::arg("max_iter") = 100, py::arg("tol") = 1.0e-6, py::arg("nstart") = py::none(), py::arg("weight") = "weight"); + m.def("cpp_eigenvector_centrality", &cpp_eigenvector_centrality, py::arg("G"), py::arg("max_iter") = 100, py::arg("tol") = 1.0e-6, py::arg("nstart") = py::none(), py::arg("weight") = py::none()); m.def("cpp_hierarchy", &hierarchy, py::arg("G"), py::arg("nodes") = py::none(), py::arg("weight") = py::none(), py::arg("n_workers") = py::none()); m.def("cpp_pagerank", &_pagerank, py::arg("G"), py::arg("alpha") = 0.85, py::arg("max_iterator") = 500, py::arg("threshold") = 1e-6); m.def("cpp_dijkstra_multisource", &_dijkstra_multisource, py::arg("G"), py::arg("sources"), py::arg("weight") = "weight", py::arg("target") = py::none());