From e09c6363fd39f19c57691fdf0741571add1d2eae Mon Sep 17 00:00:00 2001 From: airborne12 Date: Mon, 14 Oct 2024 12:16:58 +0800 Subject: [PATCH] [Fix](inverted index) fix match null for inverted index (#41746) ## Proposed changes str match null return null , not true event if str is null --- be/src/vec/exprs/vmatch_predicate.cpp | 16 ++-------- be/src/vec/functions/match.cpp | 21 ++++--------- be/src/vec/functions/match.h | 2 -- .../inverted_index_p0/test_null_index.out | 4 ++- .../inverted_index_p0/test_null_index.groovy | 30 +++++++++++++++++-- 5 files changed, 40 insertions(+), 33 deletions(-) diff --git a/be/src/vec/exprs/vmatch_predicate.cpp b/be/src/vec/exprs/vmatch_predicate.cpp index 9dea7ca870c197..699181e433d427 100644 --- a/be/src/vec/exprs/vmatch_predicate.cpp +++ b/be/src/vec/exprs/vmatch_predicate.cpp @@ -84,14 +84,9 @@ Status VMatchPredicate::prepare(RuntimeState* state, const RowDescriptor& desc, argument_template.emplace_back(nullptr, child->data_type(), child->expr_name()); child_expr_name.emplace_back(child->expr_name()); } - // result column always not null - if (_data_type->is_nullable()) { - _function = SimpleFunctionFactory::instance().get_function( - _fn.name.function_name, argument_template, remove_nullable(_data_type)); - } else { - _function = SimpleFunctionFactory::instance().get_function(_fn.name.function_name, - argument_template, _data_type); - } + + _function = SimpleFunctionFactory::instance().get_function(_fn.name.function_name, + argument_template, _data_type); if (_function == nullptr) { std::string type_str; for (auto arg : argument_template) { @@ -174,11 +169,6 @@ Status VMatchPredicate::execute(VExprContext* context, Block* block, int* result RETURN_IF_ERROR(_function->execute(context->fn_context(_fn_context_index), *block, arguments, num_columns_without_result, block->rows(), false)); *result_column_id = num_columns_without_result; - if (_data_type->is_nullable()) { - auto nested = block->get_by_position(num_columns_without_result).column; - auto nullable = ColumnNullable::create(nested, ColumnUInt8::create(block->rows(), 0)); - block->replace_by_position(num_columns_without_result, nullable); - } return Status::OK(); } diff --git a/be/src/vec/functions/match.cpp b/be/src/vec/functions/match.cpp index b70a605e962973..71d6fd8ec6e556 100644 --- a/be/src/vec/functions/match.cpp +++ b/be/src/vec/functions/match.cpp @@ -51,6 +51,10 @@ Status FunctionMatchBase::evaluate_inverted_index( std::shared_ptr roaring = std::make_shared(); Field param_value; arguments[0].column->get(0, param_value); + if (param_value.is_null()) { + // if query value is null, skip evaluate inverted index + return Status::OK(); + } auto param_type = arguments[0].type->get_type_as_type_descriptor().type; if (!is_string_type(param_type)) { return Status::Error( @@ -102,12 +106,7 @@ Status FunctionMatchBase::execute_impl(FunctionContext* context, Block& block, const auto* values = check_and_get_column(source_col.get()); const ColumnArray* array_col = nullptr; if (source_col->is_column_array()) { - if (source_col->is_nullable()) { - auto* nullable = check_and_get_column(source_col.get()); - array_col = check_and_get_column(*nullable->get_nested_column_ptr()); - } else { - array_col = check_and_get_column(source_col.get()); - } + array_col = check_and_get_column(source_col.get()); if (array_col && !array_col->get_data().is_column_string()) { return Status::NotSupported( fmt::format("unsupported nested array of type {} for function {}", @@ -127,15 +126,7 @@ Status FunctionMatchBase::execute_impl(FunctionContext* context, Block& block, values = check_and_get_column(*(array_col->get_data_ptr())); } } else if (auto* nullable = check_and_get_column(source_col.get())) { - // match null - if (type_ptr->is_nullable()) { - if (column_ptr->only_null()) { - block.get_by_position(result).column = nullable->get_null_map_column_ptr(); - return Status::OK(); - } - } else { - values = check_and_get_column(*nullable->get_nested_column_ptr()); - } + values = check_and_get_column(*nullable->get_nested_column_ptr()); } if (!values) { diff --git a/be/src/vec/functions/match.h b/be/src/vec/functions/match.h index 3026e4a06cf7fd..d64fc6de686803 100644 --- a/be/src/vec/functions/match.h +++ b/be/src/vec/functions/match.h @@ -59,8 +59,6 @@ const std::string MATCH_PHRASE_EDGE_FUNCTION = "match_phrase_edge"; class FunctionMatchBase : public IFunction { public: - bool use_default_implementation_for_nulls() const override { return false; } - size_t get_number_of_arguments() const override { return 2; } String get_name() const override { return "match"; } diff --git a/regression-test/data/inverted_index_p0/test_null_index.out b/regression-test/data/inverted_index_p0/test_null_index.out index e37dc33ca03d32..d0405083b994b0 100644 --- a/regression-test/data/inverted_index_p0/test_null_index.out +++ b/regression-test/data/inverted_index_p0/test_null_index.out @@ -2,6 +2,8 @@ -- !sql -- -- !sql -- -1 a \N [null] [1] -- !sql -- + +-- !sql -- + diff --git a/regression-test/suites/inverted_index_p0/test_null_index.groovy b/regression-test/suites/inverted_index_p0/test_null_index.groovy index 61b613470ee6ac..a11e3dde7c4066 100644 --- a/regression-test/suites/inverted_index_p0/test_null_index.groovy +++ b/regression-test/suites/inverted_index_p0/test_null_index.groovy @@ -34,8 +34,8 @@ suite("test_null_index", "p0"){ `id` int(11) NOT NULL, `str` string NOT NULL, `str_null` string NULL, - `value` array NOT NULL, - `value_int` array NOT NULL + `value` array NOT NULL, + `value_int` array NOT NULL ) ENGINE=OLAP DUPLICATE KEY(`id`) COMMENT 'OLAP' @@ -48,4 +48,30 @@ suite("test_null_index", "p0"){ sql "INSERT INTO $indexTblName VALUES (1, 'a', null, [null], [1]), (2, 'b', 'b', ['b'], [2]), (3, 'c', 'c', ['c'], [3]);" qt_sql "SELECT * FROM $indexTblName WHERE str match null order by id;" qt_sql "SELECT * FROM $indexTblName WHERE str_null match null order by id;" + + def indexTblName2 = "with_index_test" + + sql "DROP TABLE IF EXISTS ${indexTblName2}" + // create 1 replica table + sql """ + CREATE TABLE IF NOT EXISTS ${indexTblName2}( + `id` int(11) NOT NULL, + `str` string NOT NULL, + `str_null` string NULL, + `value` array NOT NULL, + `value_int` array NOT NULL, + INDEX str_idx(`str`) USING INVERTED, + INDEX str_null_idx(`str_null`) USING INVERTED + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES( + "replication_allocation" = "tag.location.default: 1" + ); + """ + sql """ set enable_common_expr_pushdown = true """ + sql "INSERT INTO $indexTblName2 VALUES (1, 'a', null, [null], [1]), (2, 'b', 'b', ['b'], [2]), (3, 'c', 'c', ['c'], [3]);" + qt_sql "SELECT * FROM $indexTblName2 WHERE str match null order by id;" + qt_sql "SELECT * FROM $indexTblName2 WHERE str_null match null order by id;" }