Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,15 @@ if(PIXIE_TESTS)
gtest
gtest_main
${PIXIE_DIAGNOSTICS_LIBS})

add_executable(bp_tree_tests
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tests should be added to coverage/build-test workflows (but not for AVX-512).

src/tests/bp_tree_tests.cpp)
target_include_directories(bp_tree_tests
PUBLIC include)
target_link_libraries(bp_tree_tests
gtest
gtest_main
${PIXIE_DIAGNOSTICS_LIBS})
endif()

# ---------------------------------------------------------------------------
Expand Down
135 changes: 135 additions & 0 deletions include/pixie/bp_tree.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
#pragma once

#include <pixie/rmm_tree.h>

#include <cstdint>

namespace pixie {
/**
* @brief A node class of BP tree
*/
struct BpNode {
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please make it so it is public part of the BpTree class and is simply called Node.

size_t number;
size_t pos;

BpNode(size_t node_number, size_t bp_pos)
: number(node_number), pos(bp_pos) {}
};

/**
* @brief A tree class based on the balances parentheses (BP)
* representation
*/
class BpTree {
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also think it is better to write abbreviations in upper case for class names, i.e. BPTree

private:
const size_t num_bits_;
RmMTree rmm;
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rmm_


public:
/**
* @brief Constructor from an external array of uint64_t
*/
explicit BpTree(const std::vector<std::uint64_t>& words, size_t tree_size)
: num_bits_(2 * tree_size), rmm(words, 2 * tree_size) {}

/**
* @brief Returns the root node
*/
static BpNode root() { return BpNode(0, 0); }

/**
* @brief Returns the size of the tree
*/
size_t size() const { return num_bits_ / 2; }

/**
* @brief Indicates if @p node is a leaf
*/
bool is_leaf(const BpNode& node) const {
return (node.pos + 1 == num_bits_) or rmm.bit(node.pos + 1) == 0;
}

/**
* @brief Indicates if @p node is a root
*/
static bool is_root(const BpNode& node) { return node.number == 0; }
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Although the implementation doesn't depend on the tree instance, in fact it is a tree method, so please remove static.


/**
* @brief Returns the number of children of a @p node
* this method has O(d) time complexity!
*
* TODO try make this faster
*/
size_t degree(const BpNode& node) const {
if (is_leaf(node)) {
return 0;
}
BpNode child = first_child(node);
size_t child_count = 1;
while (true) {
if (is_last_child(child)) {
return child_count;
}
child = next_sibling(child);
child_count++;
}
}

/**
* @brief Returns first child of a @p node
*/
static BpNode first_child(const BpNode& node) {
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The same thing about static

size_t pos = node.pos + 1;
size_t num = node.number + 1;
return BpNode(num, pos);
}

/**
* @brief Returns the i-th child of @p node
* Indexing starts at 0
* this method has O(i) time complexity!
*
* TODO try make this faster
*/
BpNode child(const BpNode& node, size_t i) const {
BpNode child = first_child(node);
while (i--) {
child = next_sibling(child);
}
return child;
}

/**
* @brief Returns the parent of a @p node if @p node is not root,
* else returns root
*/
BpNode parent(const BpNode& node) const {
if (node.number == 0) {
return root();
}
size_t pos = rmm.enclose(node.pos + 1);
size_t num = rmm.rank1(pos) - 1;
return BpNode(num, pos);
}

/**
* @brief Indicates if @p node is last child
*/
bool is_last_child(const BpNode& node) const {
size_t end = rmm.close(node.pos + 1);
if (end + 1 >= num_bits_) {
return true;
}
return rmm.bit(end + 1) == 0;
}

/**
* @brief Returns next sibling of a @p node
*/
BpNode next_sibling(const BpNode& node) const {
size_t pos = rmm.close(node.pos + 1) + 1;
size_t num = rmm.rank1(pos + 1) - 1;
return BpNode(num, pos);
}
};
} // namespace pixie
18 changes: 18 additions & 0 deletions include/pixie/rmm_tree.h
Original file line number Diff line number Diff line change
Expand Up @@ -2195,7 +2195,23 @@ class RmMTree {
}
build(leaf_block_bits, max_overhead);
}
public:
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please make it so that there's a single private section on top and a single public section at bottom of class declaration

/**
* @brief Export internal bitvector into a 0/1 string.
*/
std::string to_string() const {
std::string result;
result.resize(num_bits);

for (size_t i = 0; i < num_bits; ++i) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SUGGESTION: Optimize bit-to-string conversion for large vectors

The per-bit loop does a shift and index for every bit. A byte-level LUT (256-entry) or word-level conversion can reduce overhead when exporting large bitvectors.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will wait for better times

uint64_t word = bits[i >> 6];
bool bit = (word >> (i & 63)) & 1ULL;
result[i] = bit ? '1' : '0';
}

return result;
}
private:
/**
* @brief Build internal structures from 64-bit words.
* @param words Words with LSB-first bits.
Expand All @@ -2215,13 +2231,15 @@ class RmMTree {
build(leaf_block_bits, max_overhead);
}

public:
/**
* @brief Read bit at position @p position (LSB-first across words).
*/
inline int bit(const size_t& position) const noexcept {
return (bits[position >> 6] >> (position & 63)) & 1u;
}

private:
/**
* @brief Set bit at position @p position to 1.
*/
Expand Down
62 changes: 61 additions & 1 deletion include/pixie/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
#include <random>
#include <vector>

#include "bp_tree.h"

using pixie::BpNode;
using pixie::LoudsNode;

std::vector<std::vector<size_t>> generate_random_tree(size_t tree_size,
Expand All @@ -30,7 +33,7 @@ std::vector<std::vector<size_t>> bfs_order(
std::queue<std::pair<size_t, size_t>> q;
bfs_adj[0].push_back(0);
q.push({0, 0});
int cnt = 1;
size_t cnt = 1;
while (!q.empty()) {
size_t old_v = q.front().first;
size_t cur_v = q.front().second;
Expand All @@ -46,6 +49,32 @@ std::vector<std::vector<size_t>> bfs_order(
return bfs_adj;
}

std::vector<std::vector<size_t>> dfs_order(
size_t tree_size,
const std::vector<std::vector<size_t>>& adj) {
std::vector<std::vector<size_t>> dfs_adj(tree_size);
std::vector<std::pair<size_t, size_t>> stack;
dfs_adj[0].push_back(0);
stack.push_back({0, 0});
std::vector<size_t> renumbering(tree_size, 0);
size_t next_number = 1;
while (!stack.empty()) {
auto& [v, i] = stack.back();
i++;
if (i == adj[v].size()) {
stack.pop_back();
continue;
}
size_t u = adj[v][i];
renumbering[u] = next_number++;
dfs_adj[renumbering[v]].push_back(renumbering[u]);
dfs_adj[renumbering[u]].push_back(renumbering[v]);

stack.push_back(std::pair{u, 0});
}
return dfs_adj;
}

std::vector<uint64_t> adj_to_louds(
size_t tree_size,
const std::vector<std::vector<size_t>>& adj) {
Expand All @@ -59,6 +88,29 @@ std::vector<uint64_t> adj_to_louds(
return louds;
}

std::vector<uint64_t> adj_to_bp(size_t tree_size,
const std::vector<std::vector<size_t>>& adj) {
size_t bp_size = tree_size * 2;
std::vector<uint64_t> bp((bp_size + 63) / 64, 0);
std::vector<std::pair<size_t, size_t>> stack;
stack.push_back(std::make_pair(0, 0));
size_t pos = 0;
bp[pos >> 6] = bp[pos >> 6] | (1ULL << (pos & 63));
while (!stack.empty()) {
auto& [v, p] = stack.back();
p++;
if (p >= adj[v].size()) {
pos++;
stack.pop_back();
continue;
}
pos++;
bp[pos >> 6] = bp[pos >> 6] | (1ULL << (pos & 63));
stack.push_back(std::make_pair(adj[v][p], 0));
}
return bp;
}

struct AdjListNode {
size_t number;
};
Expand All @@ -71,6 +123,14 @@ bool operator==(const LoudsNode& b, const AdjListNode& a) {
return a.number == b.number;
}

bool operator==(const AdjListNode& a, const BpNode& b) {
return a.number == b.number;
}

bool operator==(const BpNode& b, const AdjListNode& a) {
return a.number == b.number;
}

class AdjListTree {
private:
std::vector<std::vector<size_t>> adj;
Expand Down
110 changes: 110 additions & 0 deletions src/tests/bp_tree_tests.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
#include "pixie/bp_tree.h"

#include <gtest/gtest.h>
#include <pixie/utils.h>

#include <random>
#include <stack>

using pixie::BpNode;
using pixie::BpTree;

TEST(BpTreeTest, Basic) {
std::vector<std::vector<size_t>> adj = {{0, 1}, {0, 2}, {1, 3}, {2, 4}, {3}};
size_t tree_size = 5;

std::vector<uint64_t> bp = adj_to_bp(tree_size, adj);

BpTree bp_tree(bp, 5);
AdjListTree debug_tree(adj);

BpNode cur = bp_tree.root();
AdjListNode debug = debug_tree.root();
for (size_t i = 0; i < tree_size - 1; i++) {
EXPECT_EQ(cur, debug);
cur = bp_tree.child(cur, 0);
debug = debug_tree.child(debug, 0);
}
EXPECT_EQ(cur, debug);
}

TEST(BpTreeTest, RandomTreeDFS) {
for (size_t tree_size = 8; tree_size < (1 << 22); tree_size <<= 1) {
std::mt19937_64 rng(42);
std::vector<std::vector<size_t>> adj = generate_random_tree(tree_size, rng);
adj = dfs_order(tree_size, adj);
std::vector<uint64_t> bp = adj_to_bp(tree_size, adj);
BpTree bp_tree(bp, tree_size);
AdjListTree debug_tree(adj);

std::stack<std::pair<BpNode, AdjListNode>> st;

st.push({bp_tree.root(), debug_tree.root()});

while (!st.empty()) {
auto cur = st.top().first;
auto debug = st.top().second;
st.pop();
EXPECT_EQ(cur, debug);
EXPECT_EQ(bp_tree.parent(cur), debug_tree.parent(debug));

if (cur.number > 0) {
EXPECT_EQ(bp_tree.is_last_child(cur), debug_tree.is_last_child(debug));
}
size_t deg = bp_tree.degree(cur);
EXPECT_EQ(deg, debug_tree.degree(debug));

if (deg == 0) {
continue;
}
auto child = bp_tree.first_child(cur);
auto debug_child = debug_tree.first_child(debug);
st.push({child, debug_child});
for (size_t i = 1; i < deg; i++) {
child = bp_tree.next_sibling(child);
st.push({child, debug_tree.child(debug, i)});
}
}
}
}


TEST(BpTreeTest, RandomTreeBFS) {
for (size_t tree_size = 8; tree_size < (1 << 22); tree_size <<= 1) {
std::mt19937_64 rng(42);
std::vector<std::vector<size_t>> adj = generate_random_tree(tree_size, rng);
adj = dfs_order(tree_size, adj);
std::vector<uint64_t> bp = adj_to_bp(tree_size, adj);
BpTree bp_tree(bp, tree_size);
AdjListTree debug_tree(adj);

std::queue<std::pair<BpNode, AdjListNode>> st;

st.push({bp_tree.root(), debug_tree.root()});

while (!st.empty()) {
auto cur = st.front().first;
auto debug = st.front().second;
st.pop();
EXPECT_EQ(bp_tree.parent(cur), debug_tree.parent(debug));

if (cur.number > 0) {
EXPECT_EQ(bp_tree.is_last_child(cur), debug_tree.is_last_child(debug));
}
size_t deg = bp_tree.degree(cur);
EXPECT_EQ(deg, debug_tree.degree(debug));

if (deg == 0) {
continue;
}
auto child = bp_tree.first_child(cur);
auto debug_child = debug_tree.first_child(debug);
st.push({child, debug_child});
for (size_t i = 1; i < deg; i++) {
child = bp_tree.next_sibling(child);
st.push({child, debug_tree.child(debug, i)});
}
}
}
}

Loading