From 7e2c20be557b4c35e4da8eec39bc180b01cf96f4 Mon Sep 17 00:00:00 2001 From: root Date: Sun, 14 Jun 2026 03:31:59 +0800 Subject: [PATCH 1/2] [fix](binlog) Fix missing binlog column index when converting TTabletSchema to TabletSchemaPB --- be/src/storage/tablet/tablet_meta.cpp | 9 +- be/test/storage/tablet/tablet_meta_test.cpp | 96 +++++++++++++++++++++ 2 files changed, 103 insertions(+), 2 deletions(-) diff --git a/be/src/storage/tablet/tablet_meta.cpp b/be/src/storage/tablet/tablet_meta.cpp index 22cf6a083bbbfa..d70fd41beae611 100644 --- a/be/src/storage/tablet/tablet_meta.cpp +++ b/be/src/storage/tablet/tablet_meta.cpp @@ -523,13 +523,18 @@ void TabletMeta::init_schema_from_thrift(const TTabletSchema& tablet_schema, } else { unique_id = col_ordinal_to_unique_id.at(col_ordinal); } - col_ordinal++; init_column_from_tcolumn(unique_id, tcolumn, column); - if (column->is_bf_column()) { has_bf_columns = true; } + if (column->name() == BINLOG_LSN_COL) { + tablet_schema_pb->set_binlog_lsn_col_idx(col_ordinal); + } else if (column->name() == BINLOG_TIMESTAMP_COL) { + tablet_schema_pb->set_binlog_timestamp_col_idx(col_ordinal); + } + col_ordinal++; + if (tablet_schema.__isset.indexes) { for (auto& index : tablet_schema.indexes) { if (index.index_type == TIndexType::type::BLOOMFILTER || diff --git a/be/test/storage/tablet/tablet_meta_test.cpp b/be/test/storage/tablet/tablet_meta_test.cpp index 33b784f666f1e0..6073c5592a8471 100644 --- a/be/test/storage/tablet/tablet_meta_test.cpp +++ b/be/test/storage/tablet/tablet_meta_test.cpp @@ -28,6 +28,8 @@ #include "storage/file_header.h" #include "storage/rowset/rowset.h" #include "storage/tablet/tablet_schema.h" +#include "storage/utils.h" +#include "testutil/creators.h" #include "testutil/mock_rowset.h" namespace doris { @@ -403,4 +405,98 @@ TEST(TabletMetaTest, TestDeleteBitmap) { EXPECT_EQ(d.cardinality(), 500); } +TEST(TabletMetaTest, test_row_binlog_schema_binlog_col_idx) { + // 1. Create TCreateTabletReq + TCreateTabletReq request; + request.tablet_id = 10086; + + // Manually construct the base tablet_schema + TTabletSchema base_schema; + base_schema.keys_type = TKeysType::UNIQUE_KEYS; + base_schema.short_key_column_count = 1; + + // Add normal column k1 + TColumn k1_col; + k1_col.column_name = "k1"; + TColumnType k1_type; + k1_type.type = TPrimitiveType::INT; + k1_col.column_type = k1_type; + k1_col.is_key = true; + k1_col.is_allow_null = false; + base_schema.columns.push_back(k1_col); + + // Add normal column v1 + TColumn v1_col; + v1_col.column_name = "v1"; + TColumnType v1_type; + v1_type.type = TPrimitiveType::INT; + v1_col.column_type = v1_type; + v1_col.is_key = false; + v1_col.is_allow_null = true; + v1_col.aggregation_type = TAggregationType::NONE; + base_schema.columns.push_back(v1_col); + + request.tablet_schema = base_schema; + + // Enable row binlog + testutil::enable_row_binlog(&request); + + // Get the full schema including binlog columns + TTabletSchema row_binlog_schema = request.row_binlog_schema; + + // Dynamically find the actual indices of binlog columns + int expected_lsn_idx = -1; + int expected_ts_idx = -1; + for (size_t i = 0; i < row_binlog_schema.columns.size(); ++i) { + if (row_binlog_schema.columns[i].column_name == BINLOG_LSN_COL) { + expected_lsn_idx = static_cast(i); + } + if (row_binlog_schema.columns[i].column_name == BINLOG_TIMESTAMP_COL) { + expected_ts_idx = static_cast(i); + } + } + + // Ensure binlog columns are found + ASSERT_NE(expected_lsn_idx, -1) << "Failed to find " << BINLOG_LSN_COL; + ASSERT_NE(expected_ts_idx, -1) << "Failed to find " << BINLOG_TIMESTAMP_COL; + + // Set col_unique_id + for (size_t i = 0; i < row_binlog_schema.columns.size(); ++i) { + row_binlog_schema.columns[i].col_unique_id = static_cast(i); + } + + // Build col_ordinal_to_unique_id mapping + std::unordered_map col_ordinal_to_unique_id; + for (uint32_t i = 0; i < row_binlog_schema.columns.size(); ++i) { + col_ordinal_to_unique_id[i] = i; + } + + // Construct SchemaCreateOptions + TabletMeta::SchemaCreateOptions options = { + .col_ordinal_to_unique_id = col_ordinal_to_unique_id, + .compression_type = TCompressionType::LZ4F, + .inverted_index_file_storage_format = TInvertedIndexFileStorageFormat::V2, + .next_unique_id = static_cast(row_binlog_schema.columns.size())}; + + // Call the function under test + TabletSchemaPB schema_pb; + TabletMeta::init_schema_from_thrift(row_binlog_schema, options, &schema_pb); + + // Verify binlog column indices match the dynamically found values + EXPECT_EQ(schema_pb.binlog_lsn_col_idx(), expected_lsn_idx); + EXPECT_EQ(schema_pb.binlog_timestamp_col_idx(), expected_ts_idx); + + // Create TabletSchema object from PB + TabletSchemaSPtr tablet_schema_ptr = std::make_shared(); + tablet_schema_ptr->init_from_pb(schema_pb); + + // Verify indices are correct after deserialization + EXPECT_EQ(tablet_schema_ptr->binlog_lsn_col_idx(), expected_lsn_idx); + EXPECT_EQ(tablet_schema_ptr->binlog_timestamp_col_idx(), expected_ts_idx); + + // Verify the column names at the found indices + EXPECT_EQ(tablet_schema_ptr->column(expected_lsn_idx).name(), BINLOG_LSN_COL); + EXPECT_EQ(tablet_schema_ptr->column(expected_ts_idx).name(), BINLOG_TIMESTAMP_COL); +} + } // namespace doris From 32036117d3320459d6393dd1b3117f7e9463e257 Mon Sep 17 00:00:00 2001 From: root Date: Mon, 15 Jun 2026 10:53:45 +0800 Subject: [PATCH 2/2] [fix](binlog) Fix nullable type mismatch for timestamp column in _fill_binlog_columns --- .../segment/row_binlog_segment_writer.cpp | 37 ++++++++++--------- be/src/storage/utils.h | 1 + be/test/testutil/creators.h | 7 ++++ 3 files changed, 27 insertions(+), 18 deletions(-) diff --git a/be/src/storage/segment/row_binlog_segment_writer.cpp b/be/src/storage/segment/row_binlog_segment_writer.cpp index 97a1152a28a373..c271cb596fa552 100644 --- a/be/src/storage/segment/row_binlog_segment_writer.cpp +++ b/be/src/storage/segment/row_binlog_segment_writer.cpp @@ -303,43 +303,44 @@ Status RowBinlogSegmentWriter::_fill_binlog_columns(size_t num_rows, // we can't get correct lsn number before commit, because we can't get the version before commit, // but we can fill auto-inc lsn to ensure the order first, then fill version when read single rowset. IColumn* lsn_col_ptr = binlog_prefix_columns[0].get(); + // LSN column may be nullable in test environment, unwrap it first + auto* lsn_nullable = assert_cast(lsn_col_ptr); + auto* lsn_nested = &lsn_nullable->get_nested_column(); CHECK(_lsn_ids->size() >= num_rows) << _lsn_ids->size() << " vs " << num_rows; for (int i = 0; i < num_rows; i++) { - assert_cast(lsn_col_ptr) + assert_cast(lsn_nested) ->insert_value(static_cast(_lsn_ids->at(i))); } + // Update null map to mark all rows as non-null + for (int i = 0; i < num_rows; i++) { + lsn_nullable->get_null_map_data().push_back(0); + } // wrong op only happens when partial-update, it will be fixed by delete bitmap when publish const FieldType op_col_type = _tablet_schema->column(binlog_cids[1]).type(); IColumn* op_col_ptr = binlog_prefix_columns[1].get(); - auto* op_nullable_column = check_and_get_column(op_col_ptr); - IColumn* op_nested_column = op_nullable_column != nullptr - ? &op_nullable_column->get_nested_column() - : op_col_ptr; + // OP column may be nullable in test environment, unwrap it first + auto* op_nullable = assert_cast(op_col_ptr); + auto* op_nested = &op_nullable->get_nested_column(); CHECK(op_types.size() >= num_rows) << op_types.size() << " vs " << num_rows; CHECK(op_col_type == FieldType::OLAP_FIELD_TYPE_BIGINT) << "row binlog op column type must be BIGINT, actual=" << static_cast(op_col_type); - auto* op_int64_column = assert_cast(op_nested_column); + auto* op_int64_column = assert_cast(op_nested); for (int i = 0; i < num_rows; i++) { op_int64_column->insert_value(op_types[i]); } + // Update null map to mark all rows as non-null + for (int i = 0; i < num_rows; i++) { + op_nullable->get_null_map_data().push_back(0); + } - // We can't get the real commit tso here (only known after publish). The tso column - // is replaced with the real commit_tso at read time - // (SegmentIterator::_update_tso_col_if_needed), so its on-disk value is never used. - // Write a NULL placeholder. + // TIMESTAMP column IColumn* ts_col_ptr = binlog_prefix_columns[2].get(); + // Timestamp column is always nullable, directly use assert_cast auto* ts_nullable_column = assert_cast(ts_col_ptr); - ts_nullable_column->insert_many_defaults(num_rows); // NULL placeholder (value + null map) - - // finally update null map for op column (timestamp null map set by insert_many_defaults) - for (int i = 0; i < num_rows; i++) { - if (op_nullable_column != nullptr) { - op_nullable_column->get_null_map_data().emplace_back(0); - } - } + ts_nullable_column->insert_many_defaults(num_rows); } // LOG(INFO) << binlog_prefix_block.dump_data(0, num_rows); diff --git a/be/src/storage/utils.h b/be/src/storage/utils.h index 342e04ed4717e6..f1dc59757bd196 100644 --- a/be/src/storage/utils.h +++ b/be/src/storage/utils.h @@ -41,6 +41,7 @@ static const std::string SKIP_BITMAP_COL = "__DORIS_SKIP_BITMAP_COL__"; static const std::string SEQUENCE_COL = "__DORIS_SEQUENCE_COL__"; static const std::string BINLOG_TIMESTAMP_COL = "__DORIS_BINLOG_TIMESTAMP__"; static const std::string BINLOG_LSN_COL = "__DORIS_BINLOG_LSN__"; +static const std::string BINLOG_OP_COL = "__DORIS_BINLOG_OP__"; // 用来加速运算 const static int32_t g_power_table[] = {1, 10, 100, 1000, 10000, diff --git a/be/test/testutil/creators.h b/be/test/testutil/creators.h index 3034a01b086d1c..1979e50100b275 100644 --- a/be/test/testutil/creators.h +++ b/be/test/testutil/creators.h @@ -38,6 +38,7 @@ #include "runtime/query_context.h" #include "storage/binlog.h" #include "storage/tablet_info.h" +#include "storage/utils.h" #include "util/uid_util.h" namespace doris { @@ -220,6 +221,12 @@ inline void enable_row_binlog(TCreateTabletReq* request, int32_t row_binlog_sche row_binlog_schema.columns.push_back( create_tablet_column({std::string(kRowBinlogTimestampColName), TPrimitiveType::BIGINT, false, true, TAggregationType::NONE})); + for (auto& col : row_binlog_schema.columns) { + if (col.column_name == BINLOG_LSN_COL || col.column_name == BINLOG_OP_COL || + col.column_name == BINLOG_TIMESTAMP_COL) { + col.__set_is_allow_null(true); + } + } request->__set_row_binlog_schema(row_binlog_schema); }