From 0ba85cb4849d1583202a8a0f99259ef84162eb9a Mon Sep 17 00:00:00 2001 From: trueeyu Date: Mon, 11 Sep 2023 19:27:50 +0800 Subject: [PATCH] [Enhancement] No need to build_slice for get data from BinaryColumn (#30702) Signed-off-by: trueeyu (cherry picked from commit 2d86dd234641429254df891102ed44db1441adbb) # Conflicts: # be/src/util/arrow/starrocks_column_to_arrow.cpp --- .../util/arrow/starrocks_column_to_arrow.cpp | 56 +++++++++++++++---- 1 file changed, 46 insertions(+), 10 deletions(-) diff --git a/be/src/util/arrow/starrocks_column_to_arrow.cpp b/be/src/util/arrow/starrocks_column_to_arrow.cpp index dd7c526086ac4..c26953f1f3228 100644 --- a/be/src/util/arrow/starrocks_column_to_arrow.cpp +++ b/be/src/util/arrow/starrocks_column_to_arrow.cpp @@ -33,7 +33,6 @@ struct ColumnToArrowConverter; using StarRocksColumnType = RunTimeColumnType; using ArrowType = ArrowTypeIdToType; - using ArrowCppType = ArrowTypeIdToCppType; using ArrowBuilderType = typename arrow::TypeTraits::BuilderType; static inline arrow::Status convert(const ColumnPtr& column, arrow::MemoryPool* pool, std::shared_ptr& array) { @@ -72,7 +71,6 @@ struct ColumnToArrowConverter> { using StarRocksCppType = RunTimeCppType; using StarRocksColumnType = RunTimeColumnType; using ArrowType = ArrowTypeIdToType; - using ArrowCppType = ArrowTypeIdToCppType; using ArrowBuilderType = typename arrow::TypeTraits::BuilderType; static inline arrow::Decimal128 convert_datum(const StarRocksCppType& datum) { @@ -149,7 +147,6 @@ struct ColumnToArrowConverter> { using StarRocksCppType = RunTimeCppType; using StarRocksColumnType = RunTimeColumnType; using ArrowType = ArrowTypeIdToType; - using ArrowCppType = ArrowTypeIdToCppType; using ArrowBuilderType = typename arrow::TypeTraits::BuilderType; static inline std::string convert_datum(const StarRocksCppType& datum, [[maybe_unused]] int precision, @@ -180,6 +177,7 @@ struct ColumnToArrowConverter> { const auto* nullable_column = down_cast(column.get()); const auto* data_column = down_cast(nullable_column->data_column().get()); const auto* null_column = down_cast(nullable_column->null_column().get()); +<<<<<<< HEAD const auto& data = data_column->get_data(); [[maybe_unused]] int precision = -1; [[maybe_unused]] int scale = -1; @@ -187,18 +185,38 @@ struct ColumnToArrowConverter> { precision = data_column->precision(); scale = data_column->scale(); } +======= +>>>>>>> 2d86dd2346 ([Enhancement] No need to build_slice for get data from BinaryColumn (#30702)) const auto num_rows = null_column->size(); - for (auto i = 0; i < num_rows; ++i) { - if (nullable_column->is_null(i)) { - ARROW_RETURN_NOT_OK(builder->AppendNull()); - } else { - ARROW_RETURN_NOT_OK(builder->Append(convert_datum(data[i], precision, scale))); + if constexpr (lt_is_string) { + const auto& data = data_column->get_proxy_data(); + for (auto i = 0; i < num_rows; ++i) { + if (nullable_column->is_null(i)) { + ARROW_RETURN_NOT_OK(builder->AppendNull()); + } else { + ARROW_RETURN_NOT_OK(builder->Append(convert_datum(data[i], -1, -1))); + } + } + } else { + const auto& data = data_column->get_data(); + [[maybe_unused]] int precision = -1; + [[maybe_unused]] int scale = -1; + if constexpr (lt_is_decimal) { + precision = data_column->precision(); + scale = data_column->scale(); + } + for (auto i = 0; i < num_rows; ++i) { + if (nullable_column->is_null(i)) { + ARROW_RETURN_NOT_OK(builder->AppendNull()); + } else { + ARROW_RETURN_NOT_OK(builder->Append(convert_datum(data[i], precision, scale))); + } } } } else { const auto* data_column = down_cast(column.get()); - const auto& data = data_column->get_data(); const auto num_rows = column->size(); +<<<<<<< HEAD [[maybe_unused]] int precision = -1; [[maybe_unused]] int scale = -1; if constexpr (pt_is_decimal) { @@ -207,6 +225,24 @@ struct ColumnToArrowConverter> { } for (auto i = 0; i < num_rows; ++i) { ARROW_RETURN_NOT_OK(builder->Append(convert_datum(data[i], precision, scale))); +======= + if constexpr (lt_is_string) { + const auto& data = data_column->get_proxy_data(); + for (auto i = 0; i < num_rows; ++i) { + ARROW_RETURN_NOT_OK(builder->Append(convert_datum(data[i], -1, -1))); + } + } else { + const auto& data = data_column->get_data(); + [[maybe_unused]] int precision = -1; + [[maybe_unused]] int scale = -1; + if constexpr (lt_is_decimal) { + precision = data_column->precision(); + scale = data_column->scale(); + } + for (auto i = 0; i < num_rows; ++i) { + ARROW_RETURN_NOT_OK(builder->Append(convert_datum(data[i], precision, scale))); + } +>>>>>>> 2d86dd2346 ([Enhancement] No need to build_slice for get data from BinaryColumn (#30702)) } } return builder->Finish(&array); @@ -295,7 +331,7 @@ Status convert_chunk_to_arrow_batch(Chunk* chunk, std::vector& _ou std::vector> arrays(result_num_column); for (auto i = 0; i < result_num_column; ++i) { - ASSIGN_OR_RETURN(ColumnPtr column, _output_expr_ctxs[i]->evaluate(chunk)); + ASSIGN_OR_RETURN(ColumnPtr column, _output_expr_ctxs[i]->evaluate(chunk)) Expr* expr = _output_expr_ctxs[i]->root(); if (column->is_constant()) { column = vectorized::ColumnHelper::unfold_const_column(expr->type(), chunk->num_rows(), column);