2222namespace doris ::vectorized {
2323#include " common/compile_check_begin.h"
2424
25- std::unique_ptr<EqualityDeleteBase> EqualityDeleteBase::get_delete_impl (Block* delete_block) {
25+ std::unique_ptr<EqualityDeleteBase> EqualityDeleteBase::get_delete_impl (
26+ Block* delete_block, const std::vector<int >& delete_col_ids) {
27+ DCHECK_EQ (delete_block->columns (), delete_col_ids.size ());
2628 if (delete_block->columns () == 1 ) {
27- return std::make_unique<SimpleEqualityDelete>(delete_block);
29+ return std::make_unique<SimpleEqualityDelete>(delete_block, delete_col_ids );
2830 } else {
29- return std::make_unique<MultiEqualityDelete>(delete_block);
31+ return std::make_unique<MultiEqualityDelete>(delete_block, delete_col_ids );
3032 }
3133}
3234
3335Status SimpleEqualityDelete::_build_set () {
3436 COUNTER_UPDATE (num_delete_rows, _delete_block->rows ());
35- if (_delete_block->columns () != 1 ) {
37+ if (_delete_block->columns () != 1 ) [[unlikely]] {
3638 return Status::InternalError (" Simple equality delete can be only applied with one column" );
3739 }
3840 auto & column_and_type = _delete_block->get_by_position (0 );
39- _delete_column_name = column_and_type.name ;
40- _delete_column_type = remove_nullable (column_and_type.type )->get_primitive_type ();
41- _hybrid_set.reset (create_set (_delete_column_type, _delete_block->rows (), false ));
41+ auto delete_column_type = remove_nullable (column_and_type.type )->get_primitive_type ();
42+ _hybrid_set.reset (create_set (delete_column_type, _delete_block->rows (), false ));
4243 _hybrid_set->insert_fixed_len (column_and_type.column , 0 );
4344 return Status::OK ();
4445}
4546
4647Status SimpleEqualityDelete::filter_data_block (
47- Block* data_block, const std::unordered_map<std::string, uint32_t >* col_name_to_block_idx) {
48+ Block* data_block, const std::unordered_map<std::string, uint32_t >* col_name_to_block_idx,
49+ const std::unordered_map<int , std::string>& id_to_block_column_name,
50+ IColumn::Filter& filter) {
4851 SCOPED_TIMER (equality_delete_time);
49- auto column_and_type =
50- data_block->get_by_position (col_name_to_block_idx->at (_delete_column_name));
51- if (column_and_type.type ->get_primitive_type () != _delete_column_type) {
52- return Status::InternalError (
53- " Not support type change in column '{}', src type: {}, target type: {}" ,
54- _delete_column_name, column_and_type.type ->get_name (), (int )_delete_column_type);
55- }
52+ DCHECK (_delete_col_ids.size () == 0 );
53+ auto column_field_id = _delete_col_ids[0 ];
54+
55+ auto column_and_type = data_block->get_by_position (
56+ col_name_to_block_idx->at (id_to_block_column_name.at (column_field_id)));
57+
5658 size_t rows = data_block->rows ();
57- // _filter: 1 => in _hybrid_set; 0 => not in _hybrid_set
58- if (_filter == nullptr ) {
59- _filter = std::make_unique<IColumn::Filter>(rows, 0 );
59+ // _filter: 1 => in _hybrid_set; 0 => not in _hybrid_set
60+ if (_single_filter == nullptr ) {
61+ _single_filter = std::make_unique<IColumn::Filter>(rows, 0 );
6062 } else {
6163 // reset the array capacity and fill all elements using the 0
62- _filter ->assign (rows, UInt8 (0 ));
64+ _single_filter ->assign (rows, UInt8 (0 ));
6365 }
64-
6566 if (column_and_type.column ->is_nullable ()) {
6667 const NullMap& null_map =
6768 reinterpret_cast <const ColumnNullable*>(column_and_type.column .get ())
6869 ->get_null_map_data ();
6970 _hybrid_set->find_batch_nullable (
7071 remove_nullable (column_and_type.column )->assume_mutable_ref (), rows, null_map,
71- *_filter );
72+ *_single_filter );
7273 if (_hybrid_set->contain_null ()) {
73- auto * filter_data = _filter ->data ();
74+ auto * filter_data = _single_filter ->data ();
7475 for (size_t i = 0 ; i < rows; ++i) {
7576 filter_data[i] = filter_data[i] || null_map[i];
7677 }
7778 }
7879 } else {
79- _hybrid_set->find_batch (column_and_type.column ->assume_mutable_ref (), rows, *_filter);
80+ _hybrid_set->find_batch (column_and_type.column ->assume_mutable_ref (), rows,
81+ *_single_filter);
8082 }
8183 // should reverse _filter
82- auto * filter_data = _filter-> data ();
84+ auto * filter_data = filter. data ();
8385 for (size_t i = 0 ; i < rows; ++i) {
84- filter_data[i] = !filter_data [i];
86+ filter_data[i] & = !_single_filter-> data () [i];
8587 }
86-
87- Block::filter_block_internal (data_block, *_filter, data_block->columns ());
8888 return Status::OK ();
8989}
9090
@@ -104,24 +104,32 @@ Status MultiEqualityDelete::_build_set() {
104104}
105105
106106Status MultiEqualityDelete::filter_data_block (
107- Block* data_block, const std::unordered_map<std::string, uint32_t >* col_name_to_block_idx) {
107+ Block* data_block, const std::unordered_map<std::string, uint32_t >* col_name_to_block_idx,
108+ const std::unordered_map<int , std::string>& id_to_block_column_name,
109+ IColumn::Filter& filter) {
108110 SCOPED_TIMER (equality_delete_time);
111+ DCHECK_EQ (_delete_block->get_columns_with_type_and_name ().size (), _delete_col_ids.size ());
109112 size_t column_index = 0 ;
110113
111- for (auto delete_col : _delete_block->get_columns_with_type_and_name ()) {
112- const std::string& column_name = delete_col.name ;
113- if (!col_name_to_block_idx->contains (column_name)) {
114- return Status::InternalError (" Column '{}' not found in data block: {}" , column_name,
115- data_block->dump_structure ());
114+ for (size_t idx = 0 ; idx < _delete_block->get_columns_with_type_and_name ().size (); ++idx) {
115+ auto delete_col = _delete_block->get_columns_with_type_and_name ()[idx];
116+ auto delete_col_id = _delete_col_ids[idx];
117+
118+ DCHECK (id_to_block_column_name.contains (delete_col_id));
119+ const auto & block_column_name = id_to_block_column_name.at (delete_col_id);
120+ if (!col_name_to_block_idx->contains (block_column_name)) [[unlikely]] {
121+ return Status::InternalError (" Column '{}' not found in data block: {}" ,
122+ block_column_name, data_block->dump_structure ());
116123 }
117124 auto column_and_type =
118- data_block->safe_get_by_position (col_name_to_block_idx->at (column_name ));
119- if (!delete_col.type ->equals (*column_and_type.type )) {
125+ data_block->safe_get_by_position (col_name_to_block_idx->at (block_column_name ));
126+ if (!delete_col.type ->equals (*column_and_type.type )) [[unlikely]] {
120127 return Status::InternalError (
121128 " Not support type change in column '{}', src type: {}, target type: {}" ,
122- column_name, delete_col.type ->get_name (), column_and_type.type ->get_name ());
129+ block_column_name, delete_col.type ->get_name (),
130+ column_and_type.type ->get_name ());
123131 }
124- _data_column_index[column_index++] = col_name_to_block_idx->at (column_name );
132+ _data_column_index[column_index++] = col_name_to_block_idx->at (block_column_name );
125133 }
126134 size_t rows = data_block->rows ();
127135 _data_hashes.clear ();
@@ -130,26 +138,18 @@ Status MultiEqualityDelete::filter_data_block(
130138 data_block->get_by_position (index).column ->update_hashes_with_value (_data_hashes.data (),
131139 nullptr );
132140 }
133-
134- if (_filter == nullptr ) {
135- _filter = std::make_unique<IColumn::Filter>(rows, 1 );
136- } else {
137- // reset the array capacity and fill all elements using the 0
138- _filter->assign (rows, UInt8 (1 ));
139- }
140- auto * filter_data = _filter->data ();
141+ auto * filter_data = filter.data ();
141142 for (size_t i = 0 ; i < rows; ++i) {
142143 for (auto beg = _delete_hash_map.lower_bound (_data_hashes[i]),
143144 end = _delete_hash_map.upper_bound (_data_hashes[i]);
144145 beg != end; ++beg) {
145- if (_equal (data_block, i, beg->second )) {
146+ if (filter[i] && _equal (data_block, i, beg->second )) {
146147 filter_data[i] = 0 ;
147148 break ;
148149 }
149150 }
150151 }
151152
152- Block::filter_block_internal (data_block, *_filter, data_block->columns ());
153153 return Status::OK ();
154154}
155155
0 commit comments