-
Notifications
You must be signed in to change notification settings - Fork 0
Bp implementation #38
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,135 @@ | ||
| #pragma once | ||
|
|
||
| #include <pixie/rmm_tree.h> | ||
|
|
||
| #include <cstdint> | ||
|
|
||
| namespace pixie { | ||
| /** | ||
| * @brief A node class of BP tree | ||
| */ | ||
| struct BpNode { | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please make it so it is public part of the |
||
| size_t number; | ||
| size_t pos; | ||
|
|
||
| BpNode(size_t node_number, size_t bp_pos) | ||
| : number(node_number), pos(bp_pos) {} | ||
| }; | ||
|
|
||
| /** | ||
| * @brief A tree class based on the balances parentheses (BP) | ||
| * representation | ||
| */ | ||
| class BpTree { | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I also think it is better to write abbreviations in upper case for class names, i.e. |
||
| private: | ||
| const size_t num_bits_; | ||
| RmMTree rmm; | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
|
||
| public: | ||
| /** | ||
| * @brief Constructor from an external array of uint64_t | ||
| */ | ||
| explicit BpTree(const std::vector<std::uint64_t>& words, size_t tree_size) | ||
| : num_bits_(2 * tree_size), rmm(words, 2 * tree_size) {} | ||
|
|
||
| /** | ||
| * @brief Returns the root node | ||
| */ | ||
| static BpNode root() { return BpNode(0, 0); } | ||
|
|
||
| /** | ||
| * @brief Returns the size of the tree | ||
| */ | ||
| size_t size() const { return num_bits_ / 2; } | ||
|
|
||
| /** | ||
| * @brief Indicates if @p node is a leaf | ||
| */ | ||
| bool is_leaf(const BpNode& node) const { | ||
| return (node.pos + 1 == num_bits_) or rmm.bit(node.pos + 1) == 0; | ||
| } | ||
|
|
||
| /** | ||
| * @brief Indicates if @p node is a root | ||
| */ | ||
| static bool is_root(const BpNode& node) { return node.number == 0; } | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Although the implementation doesn't depend on the tree instance, in fact it is a tree method, so please remove |
||
|
|
||
| /** | ||
| * @brief Returns the number of children of a @p node | ||
| * this method has O(d) time complexity! | ||
| * | ||
| * TODO try make this faster | ||
| */ | ||
| size_t degree(const BpNode& node) const { | ||
| if (is_leaf(node)) { | ||
| return 0; | ||
| } | ||
| BpNode child = first_child(node); | ||
| size_t child_count = 1; | ||
| while (true) { | ||
| if (is_last_child(child)) { | ||
| return child_count; | ||
| } | ||
| child = next_sibling(child); | ||
| child_count++; | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * @brief Returns first child of a @p node | ||
| */ | ||
| static BpNode first_child(const BpNode& node) { | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The same thing about |
||
| size_t pos = node.pos + 1; | ||
| size_t num = node.number + 1; | ||
| return BpNode(num, pos); | ||
| } | ||
|
|
||
| /** | ||
| * @brief Returns the i-th child of @p node | ||
| * Indexing starts at 0 | ||
| * this method has O(i) time complexity! | ||
| * | ||
| * TODO try make this faster | ||
| */ | ||
| BpNode child(const BpNode& node, size_t i) const { | ||
| BpNode child = first_child(node); | ||
| while (i--) { | ||
| child = next_sibling(child); | ||
| } | ||
| return child; | ||
| } | ||
|
|
||
| /** | ||
| * @brief Returns the parent of a @p node if @p node is not root, | ||
| * else returns root | ||
| */ | ||
| BpNode parent(const BpNode& node) const { | ||
| if (node.number == 0) { | ||
| return root(); | ||
| } | ||
| size_t pos = rmm.enclose(node.pos + 1); | ||
| size_t num = rmm.rank1(pos) - 1; | ||
| return BpNode(num, pos); | ||
| } | ||
|
|
||
| /** | ||
| * @brief Indicates if @p node is last child | ||
| */ | ||
| bool is_last_child(const BpNode& node) const { | ||
| size_t end = rmm.close(node.pos + 1); | ||
| if (end + 1 >= num_bits_) { | ||
| return true; | ||
| } | ||
| return rmm.bit(end + 1) == 0; | ||
| } | ||
|
|
||
| /** | ||
| * @brief Returns next sibling of a @p node | ||
| */ | ||
| BpNode next_sibling(const BpNode& node) const { | ||
| size_t pos = rmm.close(node.pos + 1) + 1; | ||
| size_t num = rmm.rank1(pos + 1) - 1; | ||
| return BpNode(num, pos); | ||
| } | ||
| }; | ||
| } // namespace pixie | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2195,7 +2195,23 @@ class RmMTree { | |
| } | ||
| build(leaf_block_bits, max_overhead); | ||
| } | ||
| public: | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please make it so that there's a single private section on top and a single public section at bottom of class declaration |
||
| /** | ||
| * @brief Export internal bitvector into a 0/1 string. | ||
| */ | ||
| std::string to_string() const { | ||
| std::string result; | ||
| result.resize(num_bits); | ||
|
|
||
| for (size_t i = 0; i < num_bits; ++i) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. SUGGESTION: Optimize bit-to-string conversion for large vectors The per-bit loop does a shift and index for every bit. A byte-level LUT (256-entry) or word-level conversion can reduce overhead when exporting large bitvectors.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This will wait for better times |
||
| uint64_t word = bits[i >> 6]; | ||
| bool bit = (word >> (i & 63)) & 1ULL; | ||
| result[i] = bit ? '1' : '0'; | ||
| } | ||
|
|
||
| return result; | ||
| } | ||
| private: | ||
| /** | ||
| * @brief Build internal structures from 64-bit words. | ||
| * @param words Words with LSB-first bits. | ||
|
|
@@ -2215,13 +2231,15 @@ class RmMTree { | |
| build(leaf_block_bits, max_overhead); | ||
| } | ||
|
|
||
| public: | ||
| /** | ||
| * @brief Read bit at position @p position (LSB-first across words). | ||
| */ | ||
| inline int bit(const size_t& position) const noexcept { | ||
| return (bits[position >> 6] >> (position & 63)) & 1u; | ||
| } | ||
|
|
||
| private: | ||
| /** | ||
| * @brief Set bit at position @p position to 1. | ||
| */ | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,110 @@ | ||
| #include "pixie/bp_tree.h" | ||
|
|
||
| #include <gtest/gtest.h> | ||
| #include <pixie/utils.h> | ||
|
|
||
| #include <random> | ||
| #include <stack> | ||
|
|
||
| using pixie::BpNode; | ||
| using pixie::BpTree; | ||
|
|
||
| TEST(BpTreeTest, Basic) { | ||
| std::vector<std::vector<size_t>> adj = {{0, 1}, {0, 2}, {1, 3}, {2, 4}, {3}}; | ||
| size_t tree_size = 5; | ||
|
|
||
| std::vector<uint64_t> bp = adj_to_bp(tree_size, adj); | ||
|
|
||
| BpTree bp_tree(bp, 5); | ||
| AdjListTree debug_tree(adj); | ||
|
|
||
| BpNode cur = bp_tree.root(); | ||
| AdjListNode debug = debug_tree.root(); | ||
| for (size_t i = 0; i < tree_size - 1; i++) { | ||
| EXPECT_EQ(cur, debug); | ||
| cur = bp_tree.child(cur, 0); | ||
| debug = debug_tree.child(debug, 0); | ||
| } | ||
| EXPECT_EQ(cur, debug); | ||
| } | ||
|
|
||
| TEST(BpTreeTest, RandomTreeDFS) { | ||
| for (size_t tree_size = 8; tree_size < (1 << 22); tree_size <<= 1) { | ||
| std::mt19937_64 rng(42); | ||
| std::vector<std::vector<size_t>> adj = generate_random_tree(tree_size, rng); | ||
| adj = dfs_order(tree_size, adj); | ||
| std::vector<uint64_t> bp = adj_to_bp(tree_size, adj); | ||
| BpTree bp_tree(bp, tree_size); | ||
| AdjListTree debug_tree(adj); | ||
|
|
||
| std::stack<std::pair<BpNode, AdjListNode>> st; | ||
|
|
||
| st.push({bp_tree.root(), debug_tree.root()}); | ||
|
|
||
| while (!st.empty()) { | ||
| auto cur = st.top().first; | ||
| auto debug = st.top().second; | ||
| st.pop(); | ||
| EXPECT_EQ(cur, debug); | ||
| EXPECT_EQ(bp_tree.parent(cur), debug_tree.parent(debug)); | ||
|
|
||
| if (cur.number > 0) { | ||
| EXPECT_EQ(bp_tree.is_last_child(cur), debug_tree.is_last_child(debug)); | ||
| } | ||
| size_t deg = bp_tree.degree(cur); | ||
| EXPECT_EQ(deg, debug_tree.degree(debug)); | ||
|
|
||
| if (deg == 0) { | ||
| continue; | ||
| } | ||
| auto child = bp_tree.first_child(cur); | ||
| auto debug_child = debug_tree.first_child(debug); | ||
| st.push({child, debug_child}); | ||
| for (size_t i = 1; i < deg; i++) { | ||
| child = bp_tree.next_sibling(child); | ||
| st.push({child, debug_tree.child(debug, i)}); | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
|
|
||
| TEST(BpTreeTest, RandomTreeBFS) { | ||
| for (size_t tree_size = 8; tree_size < (1 << 22); tree_size <<= 1) { | ||
| std::mt19937_64 rng(42); | ||
| std::vector<std::vector<size_t>> adj = generate_random_tree(tree_size, rng); | ||
| adj = dfs_order(tree_size, adj); | ||
| std::vector<uint64_t> bp = adj_to_bp(tree_size, adj); | ||
| BpTree bp_tree(bp, tree_size); | ||
| AdjListTree debug_tree(adj); | ||
|
|
||
| std::queue<std::pair<BpNode, AdjListNode>> st; | ||
|
|
||
| st.push({bp_tree.root(), debug_tree.root()}); | ||
|
|
||
| while (!st.empty()) { | ||
| auto cur = st.front().first; | ||
| auto debug = st.front().second; | ||
| st.pop(); | ||
| EXPECT_EQ(bp_tree.parent(cur), debug_tree.parent(debug)); | ||
|
|
||
| if (cur.number > 0) { | ||
| EXPECT_EQ(bp_tree.is_last_child(cur), debug_tree.is_last_child(debug)); | ||
| } | ||
| size_t deg = bp_tree.degree(cur); | ||
| EXPECT_EQ(deg, debug_tree.degree(debug)); | ||
|
|
||
| if (deg == 0) { | ||
| continue; | ||
| } | ||
| auto child = bp_tree.first_child(cur); | ||
| auto debug_child = debug_tree.first_child(debug); | ||
| st.push({child, debug_child}); | ||
| for (size_t i = 1; i < deg; i++) { | ||
| child = bp_tree.next_sibling(child); | ||
| st.push({child, debug_tree.child(debug, i)}); | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Tests should be added to coverage/build-test workflows (but not for AVX-512).