diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index 29903f7..84f4196 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -32,6 +32,10 @@ jobs: working-directory: ./build run: ./louds_tree_tests + - name: Run BP Tree Tests + working-directory: ./build + run: ./bp_tree_tests + - name: Run DFUDS Tree Tests working-directory: ./build run: ./dfuds_tree_tests @@ -96,6 +100,18 @@ jobs: fi exit $rc + - name: Run BP Tree Tests + working-directory: ./build + run: | + timeout 1800 sde-external-9.58.0-2025-06-16-lin/sde64 -icl -emu-xinuse 0 -- \ + ./bp_tree_tests --gtest_output=xml:bp_results.xml + rc=$? + if [ $rc -eq 124 ] && grep -q 'failures="0"' bp_results.xml 2>/dev/null; then + echo "SDE timed out during process teardown (known SDE/ASan issue) - all tests passed, treating as success" + exit 0 + fi + exit $rc + - name: Run DFUDS Tree Tests working-directory: ./build run: | diff --git a/CMakeLists.txt b/CMakeLists.txt index 87c59ad..24971d9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -175,6 +175,15 @@ if(PIXIE_TESTS) gtest_main ${PIXIE_DIAGNOSTICS_LIBS}) + add_executable(bp_tree_tests + src/tests/bp_tree_tests.cpp) + target_include_directories(bp_tree_tests + PUBLIC include) + target_link_libraries(bp_tree_tests + gtest + gtest_main + ${PIXIE_DIAGNOSTICS_LIBS}) + add_executable(dfuds_tree_tests src/tests/dfuds_tree_tests.cpp) target_include_directories(dfuds_tree_tests @@ -239,6 +248,15 @@ if(PIXIE_BENCHMARKS) benchmark_main ${PIXIE_DIAGNOSTICS_LIBS}) + add_executable(bp_tree_benchmarks + src/benchmarks/bp_tree_benchmarks.cpp) + target_include_directories(bp_tree_benchmarks + PUBLIC include) + target_link_libraries(bp_tree_benchmarks + benchmark + benchmark_main + ${PIXIE_DIAGNOSTICS_LIBS}) + add_executable(dfuds_tree_benchmarks src/benchmarks/dfuds_tree_benchmarks.cpp) target_include_directories(dfuds_tree_benchmarks diff --git a/CMakePresets.json b/CMakePresets.json index 5da7763..b8bbc58 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -114,6 +114,7 @@ "benchmark_tests", "test_rmm", "louds_tree_tests", + "bp_tree_tests", "dfuds_tree_tests", "excess_positions_tests" ] @@ -127,6 +128,7 @@ "benchmark_tests", "test_rmm", "louds_tree_tests", + "bp_tree_tests", "dfuds_tree_tests", "excess_positions_tests" ] @@ -139,6 +141,7 @@ "benchmarks", "bench_rmm", "louds_tree_benchmarks", + "bp_tree_benchmarks", "dfuds_tree_benchmarks", "alignment_comparison", "excess_positions_benchmarks" @@ -153,6 +156,7 @@ "bench_rmm", "bench_rmm_sdsl", "louds_tree_benchmarks", + "bp_tree_benchmarks", "dfuds_tree_benchmarks", "alignment_comparison", "excess_positions_benchmarks" @@ -166,6 +170,7 @@ "benchmarks", "bench_rmm", "louds_tree_benchmarks", + "bp_tree_benchmarks", "dfuds_tree_benchmarks", "alignment_comparison", "excess_positions_benchmarks" @@ -188,6 +193,7 @@ "benchmark_tests", "test_rmm", "louds_tree_tests", + "bp_tree_tests", "dfuds_tree_tests", "excess_positions_tests" ] @@ -201,6 +207,7 @@ "benchmark_tests", "test_rmm", "louds_tree_tests", + "bp_tree_tests", "dfuds_tree_tests", "excess_positions_tests" ] diff --git a/include/pixie/bp_tree.h b/include/pixie/bp_tree.h new file mode 100644 index 0000000..54f4957 --- /dev/null +++ b/include/pixie/bp_tree.h @@ -0,0 +1,171 @@ +#pragma once + +#include + +#include + +#include "utils.h" + +namespace pixie { + +/** + * @brief A tree class based on the balances parentheses (BP) + * representation + */ +class BPTree { + private: + const size_t num_bits_; + RmMTree rmm; + + public: + /** + * @brief A node class of BP tree + */ + struct Node { + size_t number; + size_t pos; + + /** + * @brief A node class of BP tree + */ + Node(size_t node_number, size_t bp_pos) + : number(node_number), pos(bp_pos) {} + }; + + /** + * @brief Constructor from an external array of uint64_t + */ + explicit BPTree(const std::vector& words, size_t tree_size) + : num_bits_(2 * tree_size), rmm(words, 2 * tree_size) {} + + /** + * @brief Returns the root node + */ + Node root() const { return Node(0, 0); } + + /** + * @brief Returns the size of the tree + */ + size_t size() const { return num_bits_ / 2; } + + /** + * @brief Indicates if @p node is a leaf + */ + bool is_leaf(const Node& node) const { + return (node.pos + 2 == num_bits_) or rmm.bit(node.pos + 1) == 0; + } + + /** + * @brief Indicates if @p node is a root + */ + bool is_root(const Node& node) { return node.number == 0; } + + /** + * @brief Returns the number of children of a @p node + * this method has O(d) time complexity! + * + * TODO try make this faster + */ + size_t degree(const Node& node) const { + if (is_leaf(node)) { + return 0; + } + Node child = first_child(node); + size_t child_count = 1; + while (true) { + if (is_last_child(child)) { + return child_count; + } + child = next_sibling(child); + child_count++; + } + } + + /** + * @brief Returns first child of a @p node + */ + Node first_child(const Node& node) const { + size_t pos = node.pos + 1; + size_t num = node.number + 1; + return Node(num, pos); + } + + /** + * @brief Returns the i-th child of @p node + * Indexing starts at 0 + * this method has O(i) time complexity! + * + * TODO try make this faster + */ + Node child(const Node& node, size_t i) const { + Node child = first_child(node); + while (i--) { + child = next_sibling(child); + } + return child; + } + + /** + * @brief Returns the parent of a @p node if @p node is not root, + * else returns root + */ + Node parent(const Node& node) const { + if (node.number == 0) { + return root(); + } + size_t pos = rmm.enclose(node.pos); + size_t num = rmm.rank1(pos); + return Node(num, pos); + } + + /** + * @brief Indicates if @p node is last child + */ + bool is_last_child(const Node& node) const { + size_t end = rmm.close(node.pos); + + return end + 2 >= num_bits_ or rmm.bit(end + 1) == 0; + } + + /** + * @brief Returns next sibling of a @p node + */ + Node next_sibling(const Node& node) const { + size_t pos = rmm.close(node.pos) + 1; + size_t num = rmm.rank1(pos + 1) - 1; + return Node(num, pos); + } +}; + +std::vector adj_to_bp(size_t tree_size, + const std::vector>& adj) { + size_t bp_size = tree_size * 2; + std::vector bp((bp_size + 63) / 64, 0); + std::vector> stack; + stack.push_back(std::make_pair(0, 0)); + size_t pos = 0; + bp[pos >> 6] = bp[pos >> 6] | (1ULL << (pos & 63)); + while (!stack.empty()) { + auto& [v, p] = stack.back(); + p++; + if (p >= adj[v].size()) { + pos++; + stack.pop_back(); + continue; + } + pos++; + bp[pos >> 6] = bp[pos >> 6] | (1ULL << (pos & 63)); + stack.push_back(std::make_pair(adj[v][p], 0)); + } + return bp; +} + +bool operator==(const AdjListNode& a, const BPTree::Node& b) { + return a.number == b.number; +} + +bool operator==(const BPTree::Node& b, const AdjListNode& a) { + return a.number == b.number; +} + +} // namespace pixie diff --git a/include/pixie/utils.h b/include/pixie/utils.h index 2ce0234..fd08631 100644 --- a/include/pixie/utils.h +++ b/include/pixie/utils.h @@ -30,7 +30,7 @@ std::vector> bfs_order( std::queue> q; bfs_adj[0].push_back(0); q.push({0, 0}); - int cnt = 1; + size_t cnt = 1; while (!q.empty()) { size_t old_v = q.front().first; size_t cur_v = q.front().second; diff --git a/scripts/coverage_report.sh b/scripts/coverage_report.sh index 84bec46..8b85ce8 100755 --- a/scripts/coverage_report.sh +++ b/scripts/coverage_report.sh @@ -10,6 +10,7 @@ cmake --build --preset coverage "${BUILD_DIR}/unittests" "${BUILD_DIR}/excess_positions_tests" "${BUILD_DIR}/louds_tree_tests" +"${BUILD_DIR}/bp_tree_tests" "${BUILD_DIR}/dfuds_tree_tests" "${BUILD_DIR}/test_rmm" diff --git a/src/benchmarks/bp_tree_benchmarks.cpp b/src/benchmarks/bp_tree_benchmarks.cpp new file mode 100644 index 0000000..164283e --- /dev/null +++ b/src/benchmarks/bp_tree_benchmarks.cpp @@ -0,0 +1,66 @@ +#include +#include +#include + +#include + +using Node = pixie::BpTree::Node; +using pixie::BpTree; + +/** + * DFS with O(1) extra memory + */ +static void BM_BpTreeDFS(benchmark::State& state) { + size_t tree_size = state.range(0); + std::mt19937_64 rng(42); + + for (auto _ : state) { + state.PauseTiming(); + std::vector> adj = generate_random_tree(tree_size, rng); + adj = dfs_order(tree_size, adj); + std::vector bp = adj_to_bp(tree_size, adj); + BpTree tree(bp, tree_size); + + Node cur = tree.root(); + bool above = 1; + + state.ResumeTiming(); + + benchmark::DoNotOptimize(cur); + + while (true) { + if (above) { + if (tree.is_leaf(cur)) { + above = 0; + } else { + cur = tree.first_child(cur); + } + benchmark::DoNotOptimize(cur); + } else { + if (tree.is_last_child(cur)) { + cur = tree.parent(cur); + if (tree.is_root(cur)) { + break; + } + benchmark::DoNotOptimize(cur); + } else { + cur = tree.next_sibling(cur); + above = 1; + benchmark::DoNotOptimize(cur); + } + } + } + } +} + +BENCHMARK(BM_BpTreeDFS) + ->ArgNames({"tree_size"}) + ->RangeMultiplier(2) + ->Range(1ull << 8, 1ull << 18) + ->Iterations(100); + +BENCHMARK(BM_BpTreeDFS) + ->ArgNames({"tree_size"}) + ->RangeMultiplier(2) + ->Range(1ull << 18, 1ull << 26) + ->Iterations(10); diff --git a/src/tests/bp_tree_tests.cpp b/src/tests/bp_tree_tests.cpp new file mode 100644 index 0000000..5c4b15c --- /dev/null +++ b/src/tests/bp_tree_tests.cpp @@ -0,0 +1,111 @@ +#include "pixie/bp_tree.h" + +#include +#include + +#include +#include + +using Node = pixie::BPTree::Node; +using pixie::adj_to_bp; +using pixie::BPTree; + +TEST(BpTreeTest, Basic) { + std::vector> adj = {{0, 1}, {0, 2}, {1, 3}, {2, 4}, {3}}; + size_t tree_size = 5; + + std::vector bp = adj_to_bp(tree_size, adj); + + BPTree bp_tree(bp, 5); + AdjListTree debug_tree(adj); + + Node cur = bp_tree.root(); + AdjListNode debug = debug_tree.root(); + for (size_t i = 0; i < tree_size - 1; i++) { + EXPECT_EQ(cur, debug); + cur = bp_tree.child(cur, 0); + debug = debug_tree.child(debug, 0); + } + EXPECT_EQ(cur, debug); +} + +TEST(BpTreeTest, RandomTreeDFS) { + for (size_t tree_size = 8; tree_size < (1 << 22); tree_size <<= 1) { + std::mt19937_64 rng(42); + std::vector> adj = generate_random_tree(tree_size, rng); + adj = dfs_order(tree_size, adj); + std::vector bp = adj_to_bp(tree_size, adj); + BPTree bp_tree(bp, tree_size); + AdjListTree debug_tree(adj); + + std::stack> st; + + st.push({bp_tree.root(), debug_tree.root()}); + + while (!st.empty()) { + auto cur = st.top().first; + auto debug = st.top().second; + st.pop(); + EXPECT_EQ(cur, debug); + EXPECT_EQ(bp_tree.parent(cur), debug_tree.parent(debug)); + + if (cur.number > 0) { + EXPECT_EQ(bp_tree.is_last_child(cur), debug_tree.is_last_child(debug)); + } + size_t deg = bp_tree.degree(cur); + EXPECT_EQ(deg, debug_tree.degree(debug)); + EXPECT_EQ(bp_tree.is_leaf(cur), debug_tree.is_leaf(debug)); + + if (deg == 0) { + continue; + } + auto child = bp_tree.first_child(cur); + auto debug_child = debug_tree.first_child(debug); + st.push({child, debug_child}); + for (size_t i = 1; i < deg; i++) { + child = bp_tree.next_sibling(child); + st.push({child, debug_tree.child(debug, i)}); + } + } + } +} + +TEST(BpTreeTest, RandomTreeBFS) { + for (size_t tree_size = 8; tree_size < (1 << 22); tree_size <<= 1) { + std::mt19937_64 rng(42); + std::vector> adj = generate_random_tree(tree_size, rng); + adj = dfs_order(tree_size, adj); + std::vector bp = adj_to_bp(tree_size, adj); + BPTree bp_tree(bp, tree_size); + AdjListTree debug_tree(adj); + + std::queue> st; + + st.push({bp_tree.root(), debug_tree.root()}); + + while (!st.empty()) { + auto cur = st.front().first; + auto debug = st.front().second; + st.pop(); + EXPECT_EQ(bp_tree.parent(cur), debug_tree.parent(debug)); + + if (cur.number > 0) { + EXPECT_EQ(bp_tree.is_last_child(cur), debug_tree.is_last_child(debug)); + } + size_t deg = bp_tree.degree(cur); + EXPECT_EQ(deg, debug_tree.degree(debug)); + EXPECT_EQ(bp_tree.is_leaf(cur), debug_tree.is_leaf(debug)); + + if (deg == 0) { + continue; + } + auto child = bp_tree.first_child(cur); + auto debug_child = debug_tree.first_child(debug); + st.push({child, debug_child}); + for (size_t i = 1; i < deg; i++) { + child = bp_tree.next_sibling(child); + st.push({child, debug_tree.child(debug, i)}); + } + } + } +}