Skip to content

Commit

Permalink
[fix](scan) Incorrect scan keys lead to wrong query results. (apache#…
Browse files Browse the repository at this point in the history
…40814)

```
mysql [doris_14555]>select * from table_9436528_3;
+------+------+------+------+------------------------+--------------------+------+
| col1 | col2 | col3 | col5 | col4                   | col6               | col7 |
+------+------+------+------+------------------------+--------------------+------+
| -100 |    1 |  -82 |    1 | 2024-02-16 04:37:37.00 | -1299962421.904282 | NULL |
| -100 |    1 |   92 |    1 | 2024-02-16 04:37:37.00 |   23423423.0324234 | NULL |
| -100 |    0 |  -82 |    0 | 2023-11-11 10:49:43.00 |   840968969.872149 | NULL |
```
wrong result:
```
mysql [doris_14555]>select * from table_9436528_3 where col1 <= -100 and col2 in (true, false) and col3 = -82;
+------+------+------+------+------------------------+--------------------+------+
| col1 | col2 | col3 | col5 | col4                   | col6               | col7 |
+------+------+------+------+------------------------+--------------------+------+
| -100 |    1 |  -82 |    1 | 2024-02-16 04:37:37.00 | -1299962421.904282 | NULL |
| -100 |    1 |   92 |    1 | 2024-02-16 04:37:37.00 |   23423423.0324234 | NULL |
+------+------+------+------+------------------------+--------------------+------+
```
  • Loading branch information
mrhhsg committed Sep 19, 2024
1 parent f483a76 commit 27394fb
Show file tree
Hide file tree
Showing 5 changed files with 181 additions and 14 deletions.
6 changes: 4 additions & 2 deletions be/src/exec/olap_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,7 @@ class OlapScanKeys {

template <PrimitiveType primitive_type>
Status extend_scan_key(ColumnValueRange<primitive_type>& range, int32_t max_scan_key_num,
bool* exact_value, bool* eos);
bool* exact_value, bool* eos, bool* should_break);

Status get_key_range(std::vector<std::unique_ptr<OlapScanRange>>* key_range);

Expand Down Expand Up @@ -1100,7 +1100,8 @@ bool ColumnValueRange<primitive_type>::has_intersection(ColumnValueRange<primiti

template <PrimitiveType primitive_type>
Status OlapScanKeys::extend_scan_key(ColumnValueRange<primitive_type>& range,
int32_t max_scan_key_num, bool* exact_value, bool* eos) {
int32_t max_scan_key_num, bool* exact_value, bool* eos,
bool* should_break) {
using CppType = typename PrimitiveTypeTraits<primitive_type>::CppType;
using ConstIterator = typename std::set<CppType>::const_iterator;

Expand All @@ -1124,6 +1125,7 @@ Status OlapScanKeys::extend_scan_key(ColumnValueRange<primitive_type>& range,
range.convert_to_range_value();
*exact_value = false;
} else {
*should_break = true;
return Status::OK();
}
}
Expand Down
18 changes: 12 additions & 6 deletions be/src/pipeline/exec/olap_scan_operator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -451,9 +451,13 @@ Status OlapScanLocalState::_build_key_ranges_and_filters() {
// we use `exact_range` to identify a key range is an exact range or not when we convert
// it to `_scan_keys`. If `exact_range` is true, we can just discard it from `_olap_filters`.
bool exact_range = true;

// If the `_scan_keys` cannot extend by the range of column, should stop.
bool should_break = false;

bool eos = false;
for (int column_index = 0;
column_index < column_names.size() && !_scan_keys.has_range_value() && !eos;
for (int column_index = 0; column_index < column_names.size() &&
!_scan_keys.has_range_value() && !eos && !should_break;
++column_index) {
auto iter = _colname_to_value_range.find(column_names[column_index]);
if (_colname_to_value_range.end() == iter) {
Expand All @@ -467,17 +471,19 @@ Status OlapScanLocalState::_build_key_ranges_and_filters() {
// but the original range may be converted to olap filters, if it's not a exact_range.
auto temp_range = range;
if (range.get_fixed_value_size() <= p._max_pushdown_conditions_per_column) {
RETURN_IF_ERROR(_scan_keys.extend_scan_key(
temp_range, p._max_scan_key_num, &exact_range, &eos));
RETURN_IF_ERROR(
_scan_keys.extend_scan_key(temp_range, p._max_scan_key_num,
&exact_range, &eos, &should_break));
if (exact_range) {
_colname_to_value_range.erase(iter->first);
}
} else {
// if exceed max_pushdown_conditions_per_column, use whole_value_rang instead
// and will not erase from _colname_to_value_range, it must be not exact_range
temp_range.set_whole_value_range();
RETURN_IF_ERROR(_scan_keys.extend_scan_key(
temp_range, p._max_scan_key_num, &exact_range, &eos));
RETURN_IF_ERROR(
_scan_keys.extend_scan_key(temp_range, p._max_scan_key_num,
&exact_range, &eos, &should_break));
}
return Status::OK();
},
Expand Down
18 changes: 12 additions & 6 deletions be/src/vec/exec/scan/new_olap_scan_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -273,9 +273,13 @@ Status NewOlapScanNode::_build_key_ranges_and_filters() {
// we use `exact_range` to identify a key range is an exact range or not when we convert
// it to `_scan_keys`. If `exact_range` is true, we can just discard it from `_olap_filters`.
bool exact_range = true;

// If the `_scan_keys` cannot extend by the range of column, should stop.
bool should_break = false;

bool eos = false;
for (int column_index = 0;
column_index < column_names.size() && !_scan_keys.has_range_value() && !eos;
for (int column_index = 0; column_index < column_names.size() &&
!_scan_keys.has_range_value() && !eos && !should_break;
++column_index) {
auto iter = _colname_to_value_range.find(column_names[column_index]);
if (_colname_to_value_range.end() == iter) {
Expand All @@ -289,17 +293,19 @@ Status NewOlapScanNode::_build_key_ranges_and_filters() {
// but the original range may be converted to olap filters, if it's not a exact_range.
auto temp_range = range;
if (range.get_fixed_value_size() <= _max_pushdown_conditions_per_column) {
RETURN_IF_ERROR(_scan_keys.extend_scan_key(
temp_range, _max_scan_key_num, &exact_range, &eos));
RETURN_IF_ERROR(
_scan_keys.extend_scan_key(temp_range, _max_scan_key_num,
&exact_range, &eos, &should_break));
if (exact_range) {
_colname_to_value_range.erase(iter->first);
}
} else {
// if exceed max_pushdown_conditions_per_column, use whole_value_rang instead
// and will not erase from _colname_to_value_range, it must be not exact_range
temp_range.set_whole_value_range();
RETURN_IF_ERROR(_scan_keys.extend_scan_key(
temp_range, _max_scan_key_num, &exact_range, &eos));
RETURN_IF_ERROR(
_scan_keys.extend_scan_key(temp_range, _max_scan_key_num,
&exact_range, &eos, &should_break));
}
return Status::OK();
},
Expand Down
43 changes: 43 additions & 0 deletions regression-test/data/correctness/test_scan_keys_with_bool_type.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !select1 --
-100 false -82 false 2023-11-11T10:49:43 8.40968969872149E8 \N
-100 true -82 true 2024-02-16T04:37:37 -1.299962421904282E9 \N
-100 true 92 true 2024-02-16T04:37:37 2.34234230324234E7 \N

-- !select2 --
-100 false -82 false 2023-11-11T10:49:43 8.40968969872149E8 \N
-100 true -82 true 2024-02-16T04:37:37 -1.299962421904282E9 \N

-- !select3 --
-100 false -82 false 2023-11-11T10:49:43 8.40968969872149E8 \N
-100 true -82 true 2024-02-16T04:37:37 -1.299962421904282E9 \N

-- !select3 --
-100 0 -82 false 2023-11-11T10:49:43 8.40968969872149E8 \N
-100 1 -82 true 2024-02-16T04:37:37 -1.299962421904282E9 \N
-100 1 92 true 2024-02-16T04:37:37 2.34234230324234E7 \N
-100 2 -82 true 2024-02-16T04:37:37 -1.299962421904282E9 \N

-- !select4 --
-100 1 -82 true 2024-02-16T04:37:37 -1.299962421904282E9 \N
-100 2 -82 true 2024-02-16T04:37:37 -1.299962421904282E9 \N

-- !select5 --
-100 0 -82 false 2023-11-11T10:49:43 8.40968969872149E8 \N
-100 1 -82 true 2024-02-16T04:37:37 -1.299962421904282E9 \N
-100 2 -82 true 2024-02-16T04:37:37 -1.299962421904282E9 \N

-- !select6 --
-100 a -82 false 2023-11-11T10:49:43 8.40968969872149E8 \N
-100 b -82 true 2024-02-16T04:37:37 -1.299962421904282E9 \N
-100 b 92 true 2024-02-16T04:37:37 2.34234230324234E7 \N
-100 c 92 true 2024-02-16T04:37:37 2.34234230324234E7 \N

-- !select7 --
-100 a -82 false 2023-11-11T10:49:43 8.40968969872149E8 \N
-100 b -82 true 2024-02-16T04:37:37 -1.299962421904282E9 \N

-- !select8 --
-100 a -82 false 2023-11-11T10:49:43 8.40968969872149E8 \N
-100 b -82 true 2024-02-16T04:37:37 -1.299962421904282E9 \N

Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

suite("test_scan_keys_with_bool_type") {
sql """ DROP TABLE IF EXISTS test_scan_keys_with_bool_type """

sql """
CREATE TABLE `test_scan_keys_with_bool_type` (
`col1` tinyint NOT NULL,
`col2` boolean NOT NULL,
`col3` tinyint NOT NULL,
`col5` boolean REPLACE NOT NULL,
`col4` datetime(2) REPLACE NOT NULL,
`col6` double REPLACE_IF_NOT_NULL NULL,
`col7` datetime(3) REPLACE_IF_NOT_NULL NULL
) ENGINE=OLAP
AGGREGATE KEY(`col1`, `col2`, `col3`)
DISTRIBUTED BY HASH(`col1`, `col2`, `col3`) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"disable_auto_compaction" = "true"
);
"""

sql """ insert into test_scan_keys_with_bool_type values
( -100 , 0 , -82 , 0 , '2023-11-11 10:49:43.00' , 840968969.872149 , NULL ),
( -100 , 1 , -82 , 1 , '2024-02-16 04:37:37.00' , -1299962421.904282 , NULL ),
( -100 , 1 , 92 , 1 , '2024-02-16 04:37:37.00' , 23423423.0324234 , NULL );
"""

qt_select1 " select * from test_scan_keys_with_bool_type order by 1, 2, 3, 4, 5, 6, 7; "
qt_select2 " select * from test_scan_keys_with_bool_type where col1 <= -100 and col2 in (true, false) and col3 = -82 order by 1, 2, 3, 4, 5, 6, 7; "
qt_select3 " select * from test_scan_keys_with_bool_type where col1 <= -100 and col3 = -82 order by 1, 2, 3, 4, 5, 6, 7; "
sql """ DROP TABLE IF EXISTS test_scan_keys_with_bool_type2 """

sql """
CREATE TABLE `test_scan_keys_with_bool_type2` (
`col1` tinyint NOT NULL,
`col2` int NOT NULL,
`col3` tinyint NOT NULL,
`col5` boolean REPLACE NOT NULL,
`col4` datetime(2) REPLACE NOT NULL,
`col6` double REPLACE_IF_NOT_NULL NULL,
`col7` datetime(3) REPLACE_IF_NOT_NULL NULL
) ENGINE=OLAP
AGGREGATE KEY(`col1`, `col2`, `col3`)
DISTRIBUTED BY HASH(`col1`, `col2`, `col3`) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"disable_auto_compaction" = "true"
);
"""

sql """ insert into test_scan_keys_with_bool_type2 values
( -100 , 0 , -82 , 0 , '2023-11-11 10:49:43.00' , 840968969.872149 , NULL ),
( -100 , 1 , -82 , 1 , '2024-02-16 04:37:37.00' , -1299962421.904282 , NULL ),
( -100 , 2 , -82 , 1 , '2024-02-16 04:37:37.00' , -1299962421.904282 , NULL ),
( -100 , 1 , 92 , 1 , '2024-02-16 04:37:37.00' , 23423423.0324234 , NULL );
"""

qt_select3 " select * from test_scan_keys_with_bool_type2 order by 1, 2, 3, 4, 5, 6, 7; "
qt_select4 " select * from test_scan_keys_with_bool_type2 where col1 <= -100 and col2 in (1, 2) and col3 = -82 order by 1, 2, 3, 4, 5, 6, 7; "
qt_select5 " select * from test_scan_keys_with_bool_type2 where col1 <= -100 and col3 = -82 order by 1, 2, 3, 4, 5, 6, 7; "


sql """ DROP TABLE IF EXISTS test_scan_keys_with_bool_type3 """

sql """
CREATE TABLE `test_scan_keys_with_bool_type3` (
`col1` tinyint NOT NULL,
`col2` char NOT NULL,
`col3` tinyint NOT NULL,
`col5` boolean REPLACE NOT NULL,
`col4` datetime(2) REPLACE NOT NULL,
`col6` double REPLACE_IF_NOT_NULL NULL,
`col7` datetime(3) REPLACE_IF_NOT_NULL NULL
) ENGINE=OLAP
AGGREGATE KEY(`col1`, `col2`, `col3`)
DISTRIBUTED BY HASH(`col1`, `col2`, `col3`) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"disable_auto_compaction" = "true"
);
"""

sql """ insert into test_scan_keys_with_bool_type3 values
( -100 , 'a' , -82 , 0 , '2023-11-11 10:49:43.00' , 840968969.872149 , NULL ),
( -100 , 'b', -82 , 1 , '2024-02-16 04:37:37.00' , -1299962421.904282 , NULL ),
( -100 , 'b' , 92 , 1 , '2024-02-16 04:37:37.00' , 23423423.0324234 , NULL ),
( -100 , 'c' , 92 , 1 , '2024-02-16 04:37:37.00' , 23423423.0324234 , NULL );
"""

qt_select6 " select * from test_scan_keys_with_bool_type3 order by 1, 2, 3, 4, 5, 6, 7; "
qt_select7 " select * from test_scan_keys_with_bool_type3 where col1 <= -100 and col2 in ('a', 'b') and col3 = -82 order by 1, 2, 3, 4, 5, 6, 7; "
qt_select8 " select * from test_scan_keys_with_bool_type3 where col1 <= -100 and col3 = -82 order by 1, 2, 3, 4, 5, 6, 7; "
}

0 comments on commit 27394fb

Please sign in to comment.