Skip to content

Commit b0c0177

Browse files
authored
Merge pull request #2964 from timbess/feature/decouple-pkey-from-limit
Update `limit` behavior to use incrementing `index` (fix `last_by_index` aggregate)
2 parents b71d515 + b7a4879 commit b0c0177

File tree

25 files changed

+570
-194
lines changed

25 files changed

+570
-194
lines changed

.github/workflows/build.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,10 @@ jobs:
212212
is-release: false
213213

214214
steps:
215+
- name: Free up disk space
216+
run: |
217+
rm -rf /__t/*
218+
215219
- name: Checkout
216220
uses: actions/checkout@v4
217221

cpp/perspective/build.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ try {
4242
execSync(`mkdirp ${cwd}`, { stdio });
4343
process.env.CLICOLOR_FORCE = 1;
4444
execSync(
45-
`emcmake cmake ${__dirname} ${cmake_flags} -DCMAKE_BUILD_TYPE=${env}`,
45+
`emcmake cmake ${__dirname} ${cmake_flags} -DCMAKE_BUILD_TYPE=${env} -DRAPIDJSON_BUILD_EXAMPLES=OFF`,
4646
{
4747
cwd,
4848
stdio,

cpp/perspective/src/cpp/base.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -799,6 +799,19 @@ type_to_dtype<void*>() {
799799
return DTYPE_OBJECT;
800800
}
801801

802+
std::ostream&
803+
operator<<(std::ostream& os, const t_op& op) {
804+
#define X(NAME) \
805+
case NAME: \
806+
os << #NAME; \
807+
break;
808+
809+
switch (op) { FOREACH_T_OP(X) }
810+
#undef X
811+
812+
return os;
813+
}
814+
802815
} // end namespace perspective
803816

804817
namespace std {

cpp/perspective/src/cpp/binding_api.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ void
4747
encode_api_response(
4848
const ProtoServerResp<std::string>& msg, EncodedApiResp* encoded
4949
) {
50-
auto* data = new char[msg.data.size()];
50+
auto* data = static_cast<char*>(UNINSTRUMENTED_MALLOC(msg.data.size()));
5151
std::copy(msg.data.begin(), msg.data.end(), data);
5252

5353
encoded->data = data;
@@ -57,8 +57,12 @@ encode_api_response(
5757

5858
EncodedApiEntries*
5959
encode_api_responses(const std::vector<ProtoServerResp<std::string>>& msgs) {
60-
auto* encoded = new EncodedApiEntries;
61-
encoded->entries = new EncodedApiResp[msgs.size()];
60+
auto* encoded = static_cast<EncodedApiEntries*>(
61+
UNINSTRUMENTED_MALLOC(sizeof(EncodedApiEntries))
62+
);
63+
encoded->entries = static_cast<EncodedApiResp*>(
64+
UNINSTRUMENTED_MALLOC(sizeof(EncodedApiResp) * msgs.size())
65+
);
6266

6367
encoded->size = msgs.size();
6468
auto* encoded_mem = encoded->entries;

cpp/perspective/src/cpp/context_zero.cpp

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
// ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃
1111
// ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
1212

13+
#include "perspective/base.h"
14+
#include "perspective/raw_types.h"
1315
#include <perspective/first.h>
1416
#include <perspective/context_base.h>
1517
#include <perspective/get_data_extents.h>
@@ -101,6 +103,7 @@ t_ctx0::notify(
101103
flattened.get_const_column("psp_pkey");
102104
std::shared_ptr<const t_column> op_sptr =
103105
flattened.get_const_column("psp_op");
106+
auto old_pkey_col = flattened.get_column("psp_old_pkey");
104107
const t_column* pkey_col = pkey_sptr.get();
105108
const t_column* op_col = op_sptr.get();
106109

@@ -173,11 +176,20 @@ t_ctx0::notify(
173176
m_symtable.get_interned_tscalar(pkey_col->get_scalar(idx));
174177
std::uint8_t op_ = *(op_col->get_nth<std::uint8_t>(idx));
175178
t_op op = static_cast<t_op>(op_);
176-
bool existed = *(existed_col->get_nth<bool>(idx));
179+
const auto existed = *(existed_col->get_nth<bool>(idx));
180+
const auto old_pkey = old_pkey_col->get_scalar(idx);
177181

178182
switch (op) {
179183
case OP_INSERT: {
180-
if (existed) {
184+
if (old_pkey.is_valid()) {
185+
m_traversal->move_row(
186+
*m_gstate,
187+
*(m_expression_tables->m_master),
188+
m_config,
189+
old_pkey,
190+
pkey
191+
);
192+
} else if (existed) {
181193
m_traversal->update_row(
182194
*m_gstate,
183195
*(m_expression_tables->m_master),

cpp/perspective/src/cpp/data_table.cpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,12 @@
2323
#include <sstream>
2424
#include <utility>
2525
namespace perspective {
26+
std::ostream&
27+
operator<<(std::ostream& os, const t_flatten_record& fr) {
28+
os << "store_idx: " << fr.m_store_idx << ", bidx: " << fr.m_begin_idx
29+
<< ", eidx: " << fr.m_edge_idx;
30+
return os;
31+
}
2632

2733
void
2834
t_data_table::set_capacity(t_uindex idx) {
@@ -319,15 +325,15 @@ t_data_table::get_schema() const {
319325
}
320326

321327
std::shared_ptr<t_data_table>
322-
t_data_table::flatten() const {
328+
t_data_table::flatten(t_uindex limit) const {
323329
PSP_TRACE_SENTINEL();
324330
PSP_VERBOSE_ASSERT(m_init, "touching uninited object");
325331
PSP_VERBOSE_ASSERT(is_pkey_table(), "Not a pkeyed table");
326332
std::shared_ptr<t_data_table> flattened = std::make_shared<t_data_table>(
327333
"", "", m_schema, DEFAULT_EMPTY_CAPACITY, BACKING_STORE_MEMORY
328334
);
329335
flattened->init();
330-
flatten_body<std::shared_ptr<t_data_table>>(flattened);
336+
flatten_body<std::shared_ptr<t_data_table>>(flattened, limit);
331337
return flattened;
332338
}
333339

@@ -635,6 +641,13 @@ t_data_table::join(const std::shared_ptr<t_data_table>& other_table) const {
635641
PSP_VERBOSE_ASSERT(m_init, "touching uninited object");
636642

637643
if (size() != other_table->size()) {
644+
#if PSP_DEBUG
645+
LOG_DEBUG("Joining current table:");
646+
pprint();
647+
LOG_DEBUG("on this this table:");
648+
other_table->pprint();
649+
#endif
650+
638651
std::stringstream ss;
639652
ss << "[t_data_table::join] Cannot join two tables of unequal sizes! "
640653
"Current size: "

cpp/perspective/src/cpp/flat_traversal.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,28 @@ t_ftrav::delete_row(t_tscalar pkey) {
388388
++m_step_deletes;
389389
}
390390

391+
void
392+
t_ftrav::move_row(
393+
const t_gstate& gstate,
394+
const t_data_table& expression_master_table,
395+
const t_config& config,
396+
t_tscalar old_pkey,
397+
t_tscalar new_pkey
398+
) {
399+
auto old_pkiter = m_pkeyidx.find(old_pkey);
400+
bool old_pkey_existed = old_pkiter != m_pkeyidx.end();
401+
if (!old_pkey_existed) {
402+
LOG_DEBUG("Tried to move pkey that doesn't exist: " << old_pkey);
403+
return;
404+
}
405+
LOG_DEBUG("Moving pkey from: " << old_pkey << " to: " << new_pkey);
406+
407+
(*m_index)[old_pkiter->second].m_deleted = true;
408+
t_mselem mselem;
409+
fill_sort_elem(gstate, expression_master_table, config, new_pkey, mselem);
410+
m_new_elems[new_pkey] = mselem;
411+
}
412+
391413
std::vector<t_sortspec>
392414
t_ftrav::get_sort_by() const {
393415
return m_sortby;

cpp/perspective/src/cpp/gnode.cpp

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
// ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃
1111
// ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
1212

13+
#include "perspective/raw_types.h"
1314
#include <perspective/first.h>
1415
#include <perspective/context_unit.h>
1516
#include <perspective/context_zero.h>
@@ -48,7 +49,9 @@ calc_negate(t_tscalar val) {
4849
return val.negate();
4950
}
5051

51-
t_gnode::t_gnode(t_schema input_schema, t_schema output_schema) :
52+
t_gnode::t_gnode(
53+
t_schema input_schema, t_schema output_schema, t_uindex limit
54+
) :
5255
m_mode(NODE_PROCESSING_SIMPLE_DATAFLOW)
5356
#ifdef PSP_PARALLEL_FOR
5457
,
@@ -60,6 +63,7 @@ t_gnode::t_gnode(t_schema input_schema, t_schema output_schema) :
6063
m_output_schema(std::move(output_schema)),
6164
m_init(false),
6265
m_id(0),
66+
m_limit(limit),
6367
m_last_input_port_id(0),
6468
m_pool_cleanup([]() {}) {
6569
PSP_TRACE_SENTINEL();
@@ -85,6 +89,10 @@ t_gnode::t_gnode(t_schema input_schema, t_schema output_schema) :
8589
existed_schema
8690
};
8791
m_epoch = std::chrono::high_resolution_clock::now();
92+
93+
m_input_schema.add_column(
94+
"psp_old_pkey", m_input_schema.get_dtype("psp_pkey")
95+
);
8896
}
8997

9098
t_gnode::~t_gnode() {
@@ -97,7 +105,8 @@ void
97105
t_gnode::init() {
98106
PSP_TRACE_SENTINEL();
99107

100-
m_gstate = std::make_shared<t_gstate>(m_input_schema, m_output_schema);
108+
m_gstate =
109+
std::make_shared<t_gstate>(m_input_schema, m_output_schema, m_limit);
101110
m_gstate->init();
102111

103112
// Create and store the main input port, which is always port 0. The next
@@ -123,7 +132,7 @@ t_gnode::init() {
123132

124133
for (const auto& iter : m_input_ports) {
125134
std::shared_ptr<t_port> input_port = iter.second;
126-
input_port->get_table()->flatten();
135+
input_port->get_table()->flatten(m_limit);
127136
}
128137

129138
// Initialize expression-related state
@@ -186,11 +195,13 @@ t_gnode::calc_transition(
186195

187196
if (!row_pre_existed && !cur_valid && !t_env::backout_invalid_neq_ft()) {
188197
trans = VALUE_TRANSITION_NEQ_FT;
189-
} else if (row_pre_existed && !prev_valid && !cur_valid && !t_env::backout_eq_invalid_invalid()) {
198+
} else if (row_pre_existed && !prev_valid && !cur_valid
199+
&& !t_env::backout_eq_invalid_invalid()) {
190200
trans = VALUE_TRANSITION_EQ_TT;
191201
} else if (!prev_existed && !exists) {
192202
trans = VALUE_TRANSITION_EQ_FF;
193-
} else if (row_pre_existed && exists && !prev_valid && cur_valid && !t_env::backout_nveq_ft()) {
203+
} else if (row_pre_existed && exists && !prev_valid && cur_valid
204+
&& !t_env::backout_nveq_ft()) {
194205
trans = VALUE_TRANSITION_NVEQ_FT;
195206
} else if (prev_existed && exists && prev_cur_eq) {
196207
trans = VALUE_TRANSITION_EQ_TT;
@@ -298,16 +309,22 @@ t_gnode::_process_table(t_uindex port_id) {
298309
}
299310

300311
m_was_updated = true;
301-
flattened = input_port->get_table()->flatten();
312+
flattened = input_port->get_table()->flatten(m_limit);
302313

303314
PSP_GNODE_VERIFY_TABLE(flattened);
304315
PSP_GNODE_VERIFY_TABLE(get_table());
305316

306317
t_uindex flattened_num_rows = flattened->num_rows();
307-
308318
std::vector<t_rlookup> row_lookup(flattened_num_rows);
309319
t_column* pkey_col = flattened->get_column("psp_pkey").get();
310320

321+
#if PSP_DEBUG
322+
LOG_DEBUG("m_mapping");
323+
for (const auto [k, v] : m_gstate->get_pkey_map()) {
324+
LOG_DEBUG("KEY: " << k << " , VALUE: " << v);
325+
}
326+
#endif
327+
311328
for (t_uindex idx = 0; idx < flattened_num_rows; ++idx) {
312329
// See if each primary key in flattened already exist in the dataset
313330
t_tscalar pkey = pkey_col->get_scalar(idx);

0 commit comments

Comments
 (0)