diff --git a/CMakeLists.txt b/CMakeLists.txt index c91128d..a0f2ef5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -148,6 +148,15 @@ if(PIXIE_TESTS) gtest gtest_main ${PIXIE_DIAGNOSTICS_LIBS}) + + add_executable(bp_tree_tests + src/tests/bp_tree_tests.cpp) + target_include_directories(bp_tree_tests + PUBLIC include) + target_link_libraries(bp_tree_tests + gtest + gtest_main + ${PIXIE_DIAGNOSTICS_LIBS}) endif() # --------------------------------------------------------------------------- diff --git a/include/pixie/bp_tree.h b/include/pixie/bp_tree.h new file mode 100644 index 0000000..854e200 --- /dev/null +++ b/include/pixie/bp_tree.h @@ -0,0 +1,135 @@ +#pragma once + +#include + +#include + +namespace pixie { +/** + * @brief A node class of BP tree + */ +struct BpNode { + size_t number; + size_t pos; + + BpNode(size_t node_number, size_t bp_pos) + : number(node_number), pos(bp_pos) {} +}; + +/** + * @brief A tree class based on the balances parentheses (BP) + * representation + */ +class BpTree { + private: + const size_t num_bits_; + RmMTree rmm; + + public: + /** + * @brief Constructor from an external array of uint64_t + */ + explicit BpTree(const std::vector& words, size_t tree_size) + : num_bits_(2 * tree_size), rmm(words, 2 * tree_size) {} + + /** + * @brief Returns the root node + */ + static BpNode root() { return BpNode(0, 0); } + + /** + * @brief Returns the size of the tree + */ + size_t size() const { return num_bits_ / 2; } + + /** + * @brief Indicates if @p node is a leaf + */ + bool is_leaf(const BpNode& node) const { + return (node.pos + 1 == num_bits_) or rmm.bit(node.pos + 1) == 0; + } + + /** + * @brief Indicates if @p node is a root + */ + static bool is_root(const BpNode& node) { return node.number == 0; } + + /** + * @brief Returns the number of children of a @p node + * this method has O(d) time complexity! + * + * TODO try make this faster + */ + size_t degree(const BpNode& node) const { + if (is_leaf(node)) { + return 0; + } + BpNode child = first_child(node); + size_t child_count = 1; + while (true) { + if (is_last_child(child)) { + return child_count; + } + child = next_sibling(child); + child_count++; + } + } + + /** + * @brief Returns first child of a @p node + */ + static BpNode first_child(const BpNode& node) { + size_t pos = node.pos + 1; + size_t num = node.number + 1; + return BpNode(num, pos); + } + + /** + * @brief Returns the i-th child of @p node + * Indexing starts at 0 + * this method has O(i) time complexity! + * + * TODO try make this faster + */ + BpNode child(const BpNode& node, size_t i) const { + BpNode child = first_child(node); + while (i--) { + child = next_sibling(child); + } + return child; + } + + /** + * @brief Returns the parent of a @p node if @p node is not root, + * else returns root + */ + BpNode parent(const BpNode& node) const { + if (node.number == 0) { + return root(); + } + size_t pos = rmm.enclose(node.pos + 1); + size_t num = rmm.rank1(pos) - 1; + return BpNode(num, pos); + } + + /** + * @brief Indicates if @p node is last child + */ + bool is_last_child(const BpNode& node) const { + size_t end = rmm.close(node.pos + 1); + if (end + 1 >= num_bits_) { + return true; + } + return rmm.bit(end + 1) == 0; + } + + /** + * @brief Returns next sibling of a @p node + */ + BpNode next_sibling(const BpNode& node) const { + size_t pos = rmm.close(node.pos + 1) + 1; + size_t num = rmm.rank1(pos + 1) - 1; + return BpNode(num, pos); + } +}; +} // namespace pixie diff --git a/include/pixie/rmm_tree.h b/include/pixie/rmm_tree.h index fe8bbe7..db703a3 100644 --- a/include/pixie/rmm_tree.h +++ b/include/pixie/rmm_tree.h @@ -2195,7 +2195,23 @@ class RmMTree { } build(leaf_block_bits, max_overhead); } +public: + /** + * @brief Export internal bitvector into a 0/1 string. + */ + std::string to_string() const { + std::string result; + result.resize(num_bits); + + for (size_t i = 0; i < num_bits; ++i) { + uint64_t word = bits[i >> 6]; + bool bit = (word >> (i & 63)) & 1ULL; + result[i] = bit ? '1' : '0'; + } + return result; + } +private: /** * @brief Build internal structures from 64-bit words. * @param words Words with LSB-first bits. @@ -2215,6 +2231,7 @@ class RmMTree { build(leaf_block_bits, max_overhead); } + public: /** * @brief Read bit at position @p position (LSB-first across words). */ @@ -2222,6 +2239,7 @@ class RmMTree { return (bits[position >> 6] >> (position & 63)) & 1u; } + private: /** * @brief Set bit at position @p position to 1. */ diff --git a/include/pixie/utils.h b/include/pixie/utils.h index 24116d5..3dc5c4b 100644 --- a/include/pixie/utils.h +++ b/include/pixie/utils.h @@ -6,6 +6,9 @@ #include #include +#include "bp_tree.h" + +using pixie::BpNode; using pixie::LoudsNode; std::vector> generate_random_tree(size_t tree_size, @@ -30,7 +33,7 @@ std::vector> bfs_order( std::queue> q; bfs_adj[0].push_back(0); q.push({0, 0}); - int cnt = 1; + size_t cnt = 1; while (!q.empty()) { size_t old_v = q.front().first; size_t cur_v = q.front().second; @@ -46,6 +49,32 @@ std::vector> bfs_order( return bfs_adj; } +std::vector> dfs_order( + size_t tree_size, + const std::vector>& adj) { + std::vector> dfs_adj(tree_size); + std::vector> stack; + dfs_adj[0].push_back(0); + stack.push_back({0, 0}); + std::vector renumbering(tree_size, 0); + size_t next_number = 1; + while (!stack.empty()) { + auto& [v, i] = stack.back(); + i++; + if (i == adj[v].size()) { + stack.pop_back(); + continue; + } + size_t u = adj[v][i]; + renumbering[u] = next_number++; + dfs_adj[renumbering[v]].push_back(renumbering[u]); + dfs_adj[renumbering[u]].push_back(renumbering[v]); + + stack.push_back(std::pair{u, 0}); + } + return dfs_adj; +} + std::vector adj_to_louds( size_t tree_size, const std::vector>& adj) { @@ -59,6 +88,29 @@ std::vector adj_to_louds( return louds; } +std::vector adj_to_bp(size_t tree_size, + const std::vector>& adj) { + size_t bp_size = tree_size * 2; + std::vector bp((bp_size + 63) / 64, 0); + std::vector> stack; + stack.push_back(std::make_pair(0, 0)); + size_t pos = 0; + bp[pos >> 6] = bp[pos >> 6] | (1ULL << (pos & 63)); + while (!stack.empty()) { + auto& [v, p] = stack.back(); + p++; + if (p >= adj[v].size()) { + pos++; + stack.pop_back(); + continue; + } + pos++; + bp[pos >> 6] = bp[pos >> 6] | (1ULL << (pos & 63)); + stack.push_back(std::make_pair(adj[v][p], 0)); + } + return bp; +} + struct AdjListNode { size_t number; }; @@ -71,6 +123,14 @@ bool operator==(const LoudsNode& b, const AdjListNode& a) { return a.number == b.number; } +bool operator==(const AdjListNode& a, const BpNode& b) { + return a.number == b.number; +} + +bool operator==(const BpNode& b, const AdjListNode& a) { + return a.number == b.number; +} + class AdjListTree { private: std::vector> adj; diff --git a/src/tests/bp_tree_tests.cpp b/src/tests/bp_tree_tests.cpp new file mode 100644 index 0000000..f262c5c --- /dev/null +++ b/src/tests/bp_tree_tests.cpp @@ -0,0 +1,110 @@ +#include "pixie/bp_tree.h" + +#include +#include + +#include +#include + +using pixie::BpNode; +using pixie::BpTree; + +TEST(BpTreeTest, Basic) { + std::vector> adj = {{0, 1}, {0, 2}, {1, 3}, {2, 4}, {3}}; + size_t tree_size = 5; + + std::vector bp = adj_to_bp(tree_size, adj); + + BpTree bp_tree(bp, 5); + AdjListTree debug_tree(adj); + + BpNode cur = bp_tree.root(); + AdjListNode debug = debug_tree.root(); + for (size_t i = 0; i < tree_size - 1; i++) { + EXPECT_EQ(cur, debug); + cur = bp_tree.child(cur, 0); + debug = debug_tree.child(debug, 0); + } + EXPECT_EQ(cur, debug); +} + +TEST(BpTreeTest, RandomTreeDFS) { + for (size_t tree_size = 8; tree_size < (1 << 22); tree_size <<= 1) { + std::mt19937_64 rng(42); + std::vector> adj = generate_random_tree(tree_size, rng); + adj = dfs_order(tree_size, adj); + std::vector bp = adj_to_bp(tree_size, adj); + BpTree bp_tree(bp, tree_size); + AdjListTree debug_tree(adj); + + std::stack> st; + + st.push({bp_tree.root(), debug_tree.root()}); + + while (!st.empty()) { + auto cur = st.top().first; + auto debug = st.top().second; + st.pop(); + EXPECT_EQ(cur, debug); + EXPECT_EQ(bp_tree.parent(cur), debug_tree.parent(debug)); + + if (cur.number > 0) { + EXPECT_EQ(bp_tree.is_last_child(cur), debug_tree.is_last_child(debug)); + } + size_t deg = bp_tree.degree(cur); + EXPECT_EQ(deg, debug_tree.degree(debug)); + + if (deg == 0) { + continue; + } + auto child = bp_tree.first_child(cur); + auto debug_child = debug_tree.first_child(debug); + st.push({child, debug_child}); + for (size_t i = 1; i < deg; i++) { + child = bp_tree.next_sibling(child); + st.push({child, debug_tree.child(debug, i)}); + } + } + } +} + + +TEST(BpTreeTest, RandomTreeBFS) { + for (size_t tree_size = 8; tree_size < (1 << 22); tree_size <<= 1) { + std::mt19937_64 rng(42); + std::vector> adj = generate_random_tree(tree_size, rng); + adj = dfs_order(tree_size, adj); + std::vector bp = adj_to_bp(tree_size, adj); + BpTree bp_tree(bp, tree_size); + AdjListTree debug_tree(adj); + + std::queue> st; + + st.push({bp_tree.root(), debug_tree.root()}); + + while (!st.empty()) { + auto cur = st.front().first; + auto debug = st.front().second; + st.pop(); + EXPECT_EQ(bp_tree.parent(cur), debug_tree.parent(debug)); + + if (cur.number > 0) { + EXPECT_EQ(bp_tree.is_last_child(cur), debug_tree.is_last_child(debug)); + } + size_t deg = bp_tree.degree(cur); + EXPECT_EQ(deg, debug_tree.degree(debug)); + + if (deg == 0) { + continue; + } + auto child = bp_tree.first_child(cur); + auto debug_child = debug_tree.first_child(debug); + st.push({child, debug_child}); + for (size_t i = 1; i < deg; i++) { + child = bp_tree.next_sibling(child); + st.push({child, debug_tree.child(debug, i)}); + } + } + } +} +