From 4007b41a62e70074c0b1df51bc94162685ae1b0e Mon Sep 17 00:00:00 2001 From: "lisizhuo.lsz" Date: Thu, 18 Jun 2026 11:28:27 +0800 Subject: [PATCH] feat: introduce type casting executors for schema evolution --- .../boolean_to_decimal_cast_executor.cpp | 98 ++++++++++ .../boolean_to_decimal_cast_executor.h | 45 +++++ .../boolean_to_numeric_cast_executor.cpp | 81 ++++++++ .../boolean_to_numeric_cast_executor.h | 63 ++++++ .../boolean_to_string_cast_executor.cpp | 61 ++++++ .../casting/boolean_to_string_cast_executor.h | 44 +++++ .../decimal_to_decimal_cast_executor.cpp | 103 ++++++++++ .../decimal_to_decimal_cast_executor.h | 47 +++++ ...mal_to_numeric_primitive_cast_executor.cpp | 98 ++++++++++ ...cimal_to_numeric_primitive_cast_executor.h | 45 +++++ ...ric_primitive_to_decimal_cast_executor.cpp | 180 ++++++++++++++++++ ...meric_primitive_to_decimal_cast_executor.h | 55 ++++++ .../numeric_to_boolean_cast_executor.cpp | 70 +++++++ .../numeric_to_boolean_cast_executor.h | 61 ++++++ .../numeric_to_string_cast_executor.cpp | 105 ++++++++++ .../casting/numeric_to_string_cast_executor.h | 56 ++++++ .../string_to_boolean_cast_executor.cpp | 88 +++++++++ .../casting/string_to_boolean_cast_executor.h | 44 +++++ .../string_to_decimal_cast_executor.cpp | 109 +++++++++++ .../casting/string_to_decimal_cast_executor.h | 54 ++++++ ...ing_to_numeric_primitive_cast_executor.cpp | 115 +++++++++++ ...tring_to_numeric_primitive_cast_executor.h | 55 ++++++ 22 files changed, 1677 insertions(+) create mode 100644 src/paimon/core/casting/boolean_to_decimal_cast_executor.cpp create mode 100644 src/paimon/core/casting/boolean_to_decimal_cast_executor.h create mode 100644 src/paimon/core/casting/boolean_to_numeric_cast_executor.cpp create mode 100644 src/paimon/core/casting/boolean_to_numeric_cast_executor.h create mode 100644 src/paimon/core/casting/boolean_to_string_cast_executor.cpp create mode 100644 src/paimon/core/casting/boolean_to_string_cast_executor.h create mode 100644 src/paimon/core/casting/decimal_to_decimal_cast_executor.cpp create mode 100644 src/paimon/core/casting/decimal_to_decimal_cast_executor.h create mode 100644 src/paimon/core/casting/decimal_to_numeric_primitive_cast_executor.cpp create mode 100644 src/paimon/core/casting/decimal_to_numeric_primitive_cast_executor.h create mode 100644 src/paimon/core/casting/numeric_primitive_to_decimal_cast_executor.cpp create mode 100644 src/paimon/core/casting/numeric_primitive_to_decimal_cast_executor.h create mode 100644 src/paimon/core/casting/numeric_to_boolean_cast_executor.cpp create mode 100644 src/paimon/core/casting/numeric_to_boolean_cast_executor.h create mode 100644 src/paimon/core/casting/numeric_to_string_cast_executor.cpp create mode 100644 src/paimon/core/casting/numeric_to_string_cast_executor.h create mode 100644 src/paimon/core/casting/string_to_boolean_cast_executor.cpp create mode 100644 src/paimon/core/casting/string_to_boolean_cast_executor.h create mode 100644 src/paimon/core/casting/string_to_decimal_cast_executor.cpp create mode 100644 src/paimon/core/casting/string_to_decimal_cast_executor.h create mode 100644 src/paimon/core/casting/string_to_numeric_primitive_cast_executor.cpp create mode 100644 src/paimon/core/casting/string_to_numeric_primitive_cast_executor.h diff --git a/src/paimon/core/casting/boolean_to_decimal_cast_executor.cpp b/src/paimon/core/casting/boolean_to_decimal_cast_executor.cpp new file mode 100644 index 0000000..81884f8 --- /dev/null +++ b/src/paimon/core/casting/boolean_to_decimal_cast_executor.cpp @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "paimon/core/casting/boolean_to_decimal_cast_executor.h" + +#include +#include +#include +#include + +#include "arrow/array/array_primitive.h" +#include "arrow/array/builder_decimal.h" +#include "arrow/type.h" +#include "arrow/util/checked_cast.h" +#include "arrow/util/decimal.h" +#include "paimon/common/utils/arrow/status_utils.h" +#include "paimon/common/utils/decimal_utils.h" +#include "paimon/data/decimal.h" +#include "paimon/defs.h" +#include "paimon/status.h" + +namespace arrow { +class MemoryPool; +class Array; +} // namespace arrow + +namespace paimon { + +Result BooleanToDecimalCastExecutor::Cast( + const Literal& literal, const std::shared_ptr& target_type) const { + assert(literal.GetType() == FieldType::BOOLEAN); + PAIMON_RETURN_NOT_OK(DecimalUtils::CheckDecimalType(*target_type)); + auto* decimal_type = arrow::internal::checked_cast(target_type.get()); + assert(decimal_type); + if (literal.IsNull()) { + return Literal(FieldType::DECIMAL); + } + bool bool_value = literal.GetValue(); + auto scaled_decimal = DecimalUtils::RescaleDecimalWithOverflowCheck( + arrow::Decimal128(bool_value), /*src_scale=*/0, decimal_type->precision(), + decimal_type->scale()); + if (scaled_decimal == std::nullopt) { + return Literal(FieldType::DECIMAL); + } + return Literal(Decimal( + decimal_type->precision(), decimal_type->scale(), + static_cast(static_cast(static_cast( + scaled_decimal.value().high_bits())) + << 64 | + scaled_decimal.value().low_bits()))); +} + +Result> BooleanToDecimalCastExecutor::Cast( + const std::shared_ptr& array, const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const { + PAIMON_RETURN_NOT_OK(DecimalUtils::CheckDecimalType(*target_type)); + auto* boolean_array = arrow::internal::checked_cast(array.get()); + assert(boolean_array); + auto* decimal_type = arrow::internal::checked_cast(target_type.get()); + assert(decimal_type); + auto decimal_builder = std::make_shared(target_type, pool); + for (int64_t i = 0; i < boolean_array->length(); ++i) { + if (boolean_array->IsNull(i)) { + PAIMON_RETURN_NOT_OK_FROM_ARROW(decimal_builder->AppendNull()); + } else { + bool bool_value = boolean_array->Value(i); + auto scaled_decimal = DecimalUtils::RescaleDecimalWithOverflowCheck( + arrow::Decimal128(bool_value), /*src_scale=*/0, decimal_type->precision(), + decimal_type->scale()); + if (scaled_decimal == std::nullopt) { + PAIMON_RETURN_NOT_OK_FROM_ARROW(decimal_builder->AppendNull()); + } else { + PAIMON_RETURN_NOT_OK_FROM_ARROW(decimal_builder->Append(scaled_decimal.value())); + } + } + } + std::shared_ptr casted_array; + PAIMON_RETURN_NOT_OK_FROM_ARROW(decimal_builder->Finish(&casted_array)); + return casted_array; +} + +} // namespace paimon diff --git a/src/paimon/core/casting/boolean_to_decimal_cast_executor.h b/src/paimon/core/casting/boolean_to_decimal_cast_executor.h new file mode 100644 index 0000000..4752743 --- /dev/null +++ b/src/paimon/core/casting/boolean_to_decimal_cast_executor.h @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once +#include + +#include "arrow/array/array_base.h" +#include "paimon/core/casting/cast_executor.h" +#include "paimon/data/decimal.h" +#include "paimon/predicate/literal.h" +#include "paimon/result.h" + +namespace arrow { +class DataType; +class MemoryPool; +} // namespace arrow + +namespace paimon { +class BooleanToDecimalCastExecutor : public CastExecutor { + public: + Result Cast(const Literal& literal, + const std::shared_ptr& target_type) const override; + + Result> Cast(const std::shared_ptr& array, + const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const override; +}; + +} // namespace paimon diff --git a/src/paimon/core/casting/boolean_to_numeric_cast_executor.cpp b/src/paimon/core/casting/boolean_to_numeric_cast_executor.cpp new file mode 100644 index 0000000..ff42d0d --- /dev/null +++ b/src/paimon/core/casting/boolean_to_numeric_cast_executor.cpp @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "paimon/core/casting/boolean_to_numeric_cast_executor.h" + +#include +#include +#include +#include + +#include "arrow/compute/cast.h" +#include "arrow/type.h" +#include "fmt/format.h" +#include "paimon/common/utils/field_type_utils.h" +#include "paimon/core/casting/casting_utils.h" +#include "paimon/defs.h" +#include "paimon/status.h" + +namespace arrow { +class MemoryPool; +class Array; +} // namespace arrow + +namespace paimon { +BooleanToNumericCastExecutor::BooleanToNumericCastExecutor() { + literal_cast_executor_map_ = { + {FieldType::TINYINT, + [&](const Literal& literal) { return CastLiteral(literal, FieldType::TINYINT); }}, + {FieldType::SMALLINT, + [&](const Literal& literal) { + return CastLiteral(literal, FieldType::SMALLINT); + }}, + {FieldType::INT, + [&](const Literal& literal) { return CastLiteral(literal, FieldType::INT); }}, + {FieldType::BIGINT, + [&](const Literal& literal) { return CastLiteral(literal, FieldType::BIGINT); }}, + {FieldType::FLOAT, + [&](const Literal& literal) { return CastLiteral(literal, FieldType::FLOAT); }}, + {FieldType::DOUBLE, + [&](const Literal& literal) { return CastLiteral(literal, FieldType::DOUBLE); }}}; +} + +Result BooleanToNumericCastExecutor::Cast( + const Literal& literal, const std::shared_ptr& target_type) const { + assert(literal.GetType() == FieldType::BOOLEAN); + PAIMON_ASSIGN_OR_RAISE(FieldType target_field_type, + FieldTypeUtils::ConvertToFieldType(target_type->id())); + auto iter = literal_cast_executor_map_.find(target_field_type); + if (iter == literal_cast_executor_map_.end()) { + return Status::Invalid( + fmt::format("cast literal in BooleanToNumericCastExecutor failed: cannot find cast " + "function from boolean to {}", + target_type->ToString())); + } + return iter->second(literal); +} + +Result> BooleanToNumericCastExecutor::Cast( + const std::shared_ptr& array, const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const { + arrow::compute::CastOptions options = arrow::compute::CastOptions::Safe(); + return CastingUtils::Cast(array, target_type, options, pool); +} + +} // namespace paimon diff --git a/src/paimon/core/casting/boolean_to_numeric_cast_executor.h b/src/paimon/core/casting/boolean_to_numeric_cast_executor.h new file mode 100644 index 0000000..dcc2384 --- /dev/null +++ b/src/paimon/core/casting/boolean_to_numeric_cast_executor.h @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include +#include +#include + +#include "arrow/array/array_base.h" +#include "paimon/core/casting/cast_executor.h" +#include "paimon/predicate/literal.h" +#include "paimon/result.h" + +namespace arrow { +class DataType; +class MemoryPool; +} // namespace arrow + +namespace paimon { +enum class FieldType; + +class BooleanToNumericCastExecutor : public CastExecutor { + public: + BooleanToNumericCastExecutor(); + Result Cast(const Literal& literal, + const std::shared_ptr& target_type) const override; + + Result> Cast(const std::shared_ptr& array, + const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const override; + + private: + template + static Literal CastLiteral(const Literal& literal, const FieldType& target_type) { + if (literal.IsNull()) { + return Literal(target_type); + } + bool value = literal.GetValue(); + return Literal(static_cast(value)); + } + + private: + std::map> literal_cast_executor_map_; +}; + +} // namespace paimon diff --git a/src/paimon/core/casting/boolean_to_string_cast_executor.cpp b/src/paimon/core/casting/boolean_to_string_cast_executor.cpp new file mode 100644 index 0000000..144a6ad --- /dev/null +++ b/src/paimon/core/casting/boolean_to_string_cast_executor.cpp @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "paimon/core/casting/boolean_to_string_cast_executor.h" + +#include +#include +#include + +#include "arrow/compute/cast.h" +#include "arrow/type.h" +#include "paimon/common/utils/field_type_utils.h" +#include "paimon/core/casting/casting_utils.h" +#include "paimon/defs.h" + +namespace arrow { +class MemoryPool; +class Array; +} // namespace arrow + +namespace paimon { +Result BooleanToStringCastExecutor::Cast( + const Literal& literal, const std::shared_ptr& target_type) const { + assert(literal.GetType() == FieldType::BOOLEAN); + PAIMON_ASSIGN_OR_RAISE(FieldType target_field_type, + FieldTypeUtils::ConvertToFieldType(target_type->id())); + assert(target_field_type == FieldType::STRING); + static const std::string TRUE = "true"; + static const std::string FALSE = "false"; + if (literal.IsNull()) { + return Literal(target_field_type); + } + if (literal.GetValue()) { + return Literal(target_field_type, TRUE.data(), TRUE.size()); + } + return Literal(target_field_type, FALSE.data(), FALSE.size()); +} + +Result> BooleanToStringCastExecutor::Cast( + const std::shared_ptr& array, const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const { + arrow::compute::CastOptions options = arrow::compute::CastOptions::Safe(); + return CastingUtils::Cast(array, target_type, options, pool); +} +} // namespace paimon diff --git a/src/paimon/core/casting/boolean_to_string_cast_executor.h b/src/paimon/core/casting/boolean_to_string_cast_executor.h new file mode 100644 index 0000000..18ad8be --- /dev/null +++ b/src/paimon/core/casting/boolean_to_string_cast_executor.h @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once +#include + +#include "arrow/array/array_base.h" +#include "paimon/core/casting/cast_executor.h" +#include "paimon/predicate/literal.h" +#include "paimon/result.h" + +namespace arrow { +class DataType; +class MemoryPool; +} // namespace arrow + +namespace paimon { +class BooleanToStringCastExecutor : public CastExecutor { + public: + Result Cast(const Literal& literal, + const std::shared_ptr& target_type) const override; + + Result> Cast(const std::shared_ptr& array, + const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const override; +}; + +} // namespace paimon diff --git a/src/paimon/core/casting/decimal_to_decimal_cast_executor.cpp b/src/paimon/core/casting/decimal_to_decimal_cast_executor.cpp new file mode 100644 index 0000000..34dabde --- /dev/null +++ b/src/paimon/core/casting/decimal_to_decimal_cast_executor.cpp @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "paimon/core/casting/decimal_to_decimal_cast_executor.h" + +#include +#include +#include +#include + +#include "arrow/array/array_base.h" +#include "arrow/array/array_decimal.h" +#include "arrow/array/builder_decimal.h" +#include "arrow/type.h" +#include "arrow/util/checked_cast.h" +#include "arrow/util/decimal.h" +#include "paimon/common/utils/arrow/status_utils.h" +#include "paimon/common/utils/decimal_utils.h" +#include "paimon/data/decimal.h" +#include "paimon/defs.h" +#include "paimon/status.h" + +namespace arrow { +class MemoryPool; +} // namespace arrow + +namespace paimon { +Result DecimalToDecimalCastExecutor::Cast( + const Literal& literal, const std::shared_ptr& target_type) const { + assert(literal.GetType() == FieldType::DECIMAL); + PAIMON_RETURN_NOT_OK(DecimalUtils::CheckDecimalType(*target_type)); + if (literal.IsNull()) { + return Literal(FieldType::DECIMAL); + } + auto* target_decimal_type = + arrow::internal::checked_cast(target_type.get()); + assert(target_decimal_type); + auto src_value = literal.GetValue(); + arrow::Decimal128 src_decimal(src_value.HighBits(), src_value.LowBits()); + auto scaled_decimal = DecimalUtils::RescaleDecimalWithOverflowCheck( + src_decimal, src_value.Scale(), target_decimal_type->precision(), + target_decimal_type->scale()); + if (scaled_decimal == std::nullopt) { + return Literal(FieldType::DECIMAL); + } + return Literal(Decimal( + target_decimal_type->precision(), target_decimal_type->scale(), + static_cast(static_cast(static_cast( + scaled_decimal.value().high_bits())) + << 64 | + scaled_decimal.value().low_bits()))); +} + +Result> DecimalToDecimalCastExecutor::Cast( + const std::shared_ptr& array, const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const { + PAIMON_RETURN_NOT_OK(DecimalUtils::CheckDecimalType(*target_type)); + auto* src_array = arrow::internal::checked_cast(array.get()); + assert(src_array); + auto* src_decimal_type = + arrow::internal::checked_cast(array->type().get()); + assert(src_decimal_type); + auto* target_decimal_type = + arrow::internal::checked_cast(target_type.get()); + assert(target_decimal_type); + auto decimal_builder = std::make_shared(target_type, pool); + for (int64_t i = 0; i < src_array->length(); ++i) { + if (src_array->IsNull(i)) { + PAIMON_RETURN_NOT_OK_FROM_ARROW(decimal_builder->AppendNull()); + } else { + arrow::Decimal128 src_value(src_array->GetValue(i)); + auto scaled_decimal = DecimalUtils::RescaleDecimalWithOverflowCheck( + src_value, src_decimal_type->scale(), target_decimal_type->precision(), + target_decimal_type->scale()); + if (scaled_decimal == std::nullopt) { + PAIMON_RETURN_NOT_OK_FROM_ARROW(decimal_builder->AppendNull()); + } else { + PAIMON_RETURN_NOT_OK_FROM_ARROW(decimal_builder->Append(scaled_decimal.value())); + } + } + } + std::shared_ptr casted_array; + PAIMON_RETURN_NOT_OK_FROM_ARROW(decimal_builder->Finish(&casted_array)); + return casted_array; +} + +} // namespace paimon diff --git a/src/paimon/core/casting/decimal_to_decimal_cast_executor.h b/src/paimon/core/casting/decimal_to_decimal_cast_executor.h new file mode 100644 index 0000000..a898ba1 --- /dev/null +++ b/src/paimon/core/casting/decimal_to_decimal_cast_executor.h @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +#include "arrow/array/array_base.h" +#include "paimon/core/casting/cast_executor.h" +#include "paimon/predicate/literal.h" +#include "paimon/result.h" +#include "paimon/visibility.h" + +namespace arrow { +class DataType; +class MemoryPool; +} // namespace arrow + +namespace paimon { + +class PAIMON_EXPORT DecimalToDecimalCastExecutor : public CastExecutor { + public: + Result Cast(const Literal& literal, + const std::shared_ptr& target_type) const override; + + Result> Cast(const std::shared_ptr& array, + const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const override; +}; + +} // namespace paimon diff --git a/src/paimon/core/casting/decimal_to_numeric_primitive_cast_executor.cpp b/src/paimon/core/casting/decimal_to_numeric_primitive_cast_executor.cpp new file mode 100644 index 0000000..efd0541 --- /dev/null +++ b/src/paimon/core/casting/decimal_to_numeric_primitive_cast_executor.cpp @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "paimon/core/casting/decimal_to_numeric_primitive_cast_executor.h" + +#include +#include +#include +#include + +#include "arrow/compute/cast.h" +#include "arrow/scalar.h" +#include "arrow/type.h" +#include "fmt/format.h" +#include "paimon/common/utils/field_type_utils.h" +#include "paimon/core/casting/casting_utils.h" +#include "paimon/data/decimal.h" +#include "paimon/defs.h" +#include "paimon/status.h" + +namespace arrow { +class MemoryPool; +class Array; +} // namespace arrow + +namespace paimon { +Result DecimalToNumericPrimitiveCastExecutor::Cast( + const Literal& literal, const std::shared_ptr& target_type) const { + assert(literal.GetType() == FieldType::DECIMAL); + PAIMON_ASSIGN_OR_RAISE(FieldType target_field_type, + FieldTypeUtils::ConvertToFieldType(target_type->id())); + if (literal.IsNull()) { + PAIMON_ASSIGN_OR_RAISE(FieldType type, + FieldTypeUtils::ConvertToFieldType(target_type->id())); + return Literal(type); + } + auto decimal_value = literal.GetValue(); + auto src_type = arrow::decimal128(decimal_value.Precision(), decimal_value.Scale()); + arrow::compute::CastOptions options = arrow::compute::CastOptions::Safe(); + options.allow_decimal_truncate = true; + options.allow_int_overflow = true; + if (target_field_type == FieldType::TINYINT) { + return CastingUtils::Cast, int8_t>( + literal, src_type, target_type, options); + } else if (target_field_type == FieldType::SMALLINT) { + return CastingUtils::Cast, int16_t>( + literal, src_type, target_type, options); + } else if (target_field_type == FieldType::INT) { + return CastingUtils::Cast, int32_t>( + literal, src_type, target_type, options); + } else if (target_field_type == FieldType::BIGINT) { + return CastingUtils::Cast, int64_t>( + literal, src_type, target_type, options); + } else if (target_field_type == FieldType::FLOAT) { + return CastingUtils::Cast, float>( + literal, src_type, target_type, options); + } else if (target_field_type == FieldType::DOUBLE) { + return CastingUtils::Cast, double>( + literal, src_type, target_type, options); + } + return Status::Invalid(fmt::format( + "cast literal in DecimalToNumericPrimitiveCastExecutor failed: cannot find cast " + "function from decimal to {}", + FieldTypeUtils::FieldTypeToString(target_field_type))); +} + +Result> DecimalToNumericPrimitiveCastExecutor::Cast( + const std::shared_ptr& array, const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const { + arrow::compute::CastOptions options = arrow::compute::CastOptions::Safe(); + options.allow_decimal_truncate = true; + options.allow_int_overflow = true; + return CastingUtils::Cast(array, target_type, options, pool); +} + +} // namespace paimon diff --git a/src/paimon/core/casting/decimal_to_numeric_primitive_cast_executor.h b/src/paimon/core/casting/decimal_to_numeric_primitive_cast_executor.h new file mode 100644 index 0000000..b21ed89 --- /dev/null +++ b/src/paimon/core/casting/decimal_to_numeric_primitive_cast_executor.h @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +#include "arrow/array/array_base.h" +#include "paimon/core/casting/cast_executor.h" +#include "paimon/predicate/literal.h" +#include "paimon/result.h" + +namespace arrow { +class DataType; +class MemoryPool; +} // namespace arrow + +namespace paimon { +class DecimalToNumericPrimitiveCastExecutor : public CastExecutor { + public: + Result Cast(const Literal& literal, + const std::shared_ptr& target_type) const override; + + Result> Cast(const std::shared_ptr& array, + const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const override; +}; + +} // namespace paimon diff --git a/src/paimon/core/casting/numeric_primitive_to_decimal_cast_executor.cpp b/src/paimon/core/casting/numeric_primitive_to_decimal_cast_executor.cpp new file mode 100644 index 0000000..b950dbc --- /dev/null +++ b/src/paimon/core/casting/numeric_primitive_to_decimal_cast_executor.cpp @@ -0,0 +1,180 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "paimon/core/casting/numeric_primitive_to_decimal_cast_executor.h" + +#include +#include +#include +#include +#include +#include + +#include "arrow/array/array_base.h" +#include "arrow/array/array_primitive.h" +#include "arrow/array/builder_decimal.h" +#include "arrow/type.h" +#include "arrow/util/checked_cast.h" +#include "arrow/util/decimal.h" +#include "fmt/format.h" +#include "paimon/common/utils/arrow/status_utils.h" +#include "paimon/common/utils/decimal_utils.h" +#include "paimon/common/utils/field_type_utils.h" +#include "paimon/data/decimal.h" +#include "paimon/defs.h" +#include "paimon/status.h" + +namespace arrow { +class MemoryPool; +} // namespace arrow + +namespace paimon { + +template +Result NumericPrimitiveToDecimalCastExecutor::Cast( + const Literal& literal, const std::shared_ptr& target_type) { + auto* decimal_type = arrow::internal::checked_cast(target_type.get()); + assert(decimal_type); + if (literal.IsNull()) { + return Literal(FieldType::DECIMAL); + } + auto src_value = literal.GetValue(); + if constexpr (std::is_same_v || std::is_same_v) { + if (src_value == INFINITY || src_value == -INFINITY || std::isnan(src_value)) { + return Status::Invalid(fmt::format("Cannot cast {} to decimal", src_value)); + } + auto decimal_result = arrow::Decimal128::FromReal(src_value, decimal_type->precision(), + decimal_type->scale()); + if (decimal_result.ok()) { + return Literal{Decimal(decimal_type->precision(), decimal_type->scale(), + static_cast( + static_cast(static_cast( + decimal_result.ValueUnsafe().high_bits())) + << 64 | + decimal_result.ValueUnsafe().low_bits()))}; + } + } else { + auto scaled_decimal = DecimalUtils::RescaleDecimalWithOverflowCheck( + arrow::Decimal128(src_value), /*src_scale=*/0, decimal_type->precision(), + decimal_type->scale()); + if (scaled_decimal != std::nullopt) { + return Literal(Decimal(decimal_type->precision(), decimal_type->scale(), + static_cast( + static_cast(static_cast( + scaled_decimal.value().high_bits())) + << 64 | + scaled_decimal.value().low_bits()))); + } + } + return Literal(FieldType::DECIMAL); +} + +Result NumericPrimitiveToDecimalCastExecutor::Cast( + const Literal& literal, const std::shared_ptr& target_type) const { + PAIMON_RETURN_NOT_OK(DecimalUtils::CheckDecimalType(*target_type)); + FieldType src_type = literal.GetType(); + if (src_type == FieldType::TINYINT) { + return Cast(literal, target_type); + } else if (src_type == FieldType::SMALLINT) { + return Cast(literal, target_type); + } else if (src_type == FieldType::INT) { + return Cast(literal, target_type); + } else if (src_type == FieldType::BIGINT) { + return Cast(literal, target_type); + } else if (src_type == FieldType::FLOAT) { + return Cast(literal, target_type); + } else if (src_type == FieldType::DOUBLE) { + return Cast(literal, target_type); + } + return Status::Invalid(fmt::format( + "cast literal in NumericPrimitiveToDecimalCastExecutor failed: cannot find cast " + "function from {} to {}", + FieldTypeUtils::FieldTypeToString(src_type), target_type->ToString())); +} + +template +Result> NumericPrimitiveToDecimalCastExecutor::Cast( + const std::shared_ptr& array, const std::shared_ptr& target_type, + arrow::MemoryPool* pool) { + using SrcValueType = typename arrow::NumericArray::value_type; + auto* typed_array = arrow::internal::checked_cast*>(array.get()); + assert(typed_array); + auto* decimal_type = arrow::internal::checked_cast(target_type.get()); + auto decimal_builder = std::make_shared(target_type, pool); + for (int64_t i = 0; i < typed_array->length(); ++i) { + if (typed_array->IsNull(i)) { + PAIMON_RETURN_NOT_OK_FROM_ARROW(decimal_builder->AppendNull()); + } else { + SrcValueType src_value = typed_array->Value(i); + if constexpr (std::is_same_v || + std::is_same_v) { + if (src_value == INFINITY || src_value == -INFINITY || std::isnan(src_value)) { + return Status::Invalid(fmt::format("Cannot cast {} to decimal", src_value)); + } + auto decimal_result = arrow::Decimal128::FromReal( + src_value, decimal_type->precision(), decimal_type->scale()); + if (!decimal_result.ok()) { + PAIMON_RETURN_NOT_OK_FROM_ARROW(decimal_builder->AppendNull()); + } else { + PAIMON_RETURN_NOT_OK_FROM_ARROW( + decimal_builder->Append(decimal_result.ValueUnsafe())); + } + } else { + auto scaled_decimal = DecimalUtils::RescaleDecimalWithOverflowCheck( + arrow::Decimal128(src_value), /*src_scale=*/0, decimal_type->precision(), + decimal_type->scale()); + if (scaled_decimal == std::nullopt) { + PAIMON_RETURN_NOT_OK_FROM_ARROW(decimal_builder->AppendNull()); + } else { + PAIMON_RETURN_NOT_OK_FROM_ARROW( + decimal_builder->Append(scaled_decimal.value())); + } + } + } + } + std::shared_ptr casted_array; + PAIMON_RETURN_NOT_OK_FROM_ARROW(decimal_builder->Finish(&casted_array)); + return casted_array; +} + +Result> NumericPrimitiveToDecimalCastExecutor::Cast( + const std::shared_ptr& array, const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const { + PAIMON_RETURN_NOT_OK(DecimalUtils::CheckDecimalType(*target_type)); + auto src_type_id = array->type()->id(); + if (src_type_id == arrow::Type::type::INT8) { + return Cast(array, target_type, pool); + } else if (src_type_id == arrow::Type::type::INT16) { + return Cast(array, target_type, pool); + } else if (src_type_id == arrow::Type::type::INT32) { + return Cast(array, target_type, pool); + } else if (src_type_id == arrow::Type::type::INT64) { + return Cast(array, target_type, pool); + } else if (src_type_id == arrow::Type::type::FLOAT) { + return Cast(array, target_type, pool); + } else if (src_type_id == arrow::Type::type::DOUBLE) { + return Cast(array, target_type, pool); + } + return Status::Invalid( + fmt::format("cast array in NumericPrimitiveToDecimalCastExecutor failed: cannot cast " + "from {} to decimal", + array->type()->ToString())); +} + +} // namespace paimon diff --git a/src/paimon/core/casting/numeric_primitive_to_decimal_cast_executor.h b/src/paimon/core/casting/numeric_primitive_to_decimal_cast_executor.h new file mode 100644 index 0000000..128766f --- /dev/null +++ b/src/paimon/core/casting/numeric_primitive_to_decimal_cast_executor.h @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +#include "arrow/array/array_base.h" +#include "paimon/core/casting/cast_executor.h" +#include "paimon/predicate/literal.h" +#include "paimon/result.h" + +namespace arrow { +class DataType; +class MemoryPool; +} // namespace arrow + +namespace paimon { +class NumericPrimitiveToDecimalCastExecutor : public CastExecutor { + public: + Result Cast(const Literal& literal, + const std::shared_ptr& target_type) const override; + + Result> Cast(const std::shared_ptr& array, + const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const override; + + private: + template + static Result> Cast( + const std::shared_ptr& array, + const std::shared_ptr& target_type, arrow::MemoryPool* pool); + + template + static Result Cast(const Literal& literal, + const std::shared_ptr& target_type); +}; + +} // namespace paimon diff --git a/src/paimon/core/casting/numeric_to_boolean_cast_executor.cpp b/src/paimon/core/casting/numeric_to_boolean_cast_executor.cpp new file mode 100644 index 0000000..db23ee2 --- /dev/null +++ b/src/paimon/core/casting/numeric_to_boolean_cast_executor.cpp @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "paimon/core/casting/numeric_to_boolean_cast_executor.h" + +#include +#include +#include +#include + +#include "arrow/compute/cast.h" +#include "arrow/type.h" +#include "fmt/format.h" +#include "paimon/common/utils/field_type_utils.h" +#include "paimon/core/casting/casting_utils.h" +#include "paimon/status.h" + +namespace arrow { +class MemoryPool; +class Array; +} // namespace arrow + +namespace paimon { +NumericToBooleanCastExecutor::NumericToBooleanCastExecutor() { + literal_cast_executor_map_ = { + {FieldType::TINYINT, [&](const Literal& literal) { return CastLiteral(literal); }}, + {FieldType::SMALLINT, + [&](const Literal& literal) { return CastLiteral(literal); }}, + {FieldType::INT, [&](const Literal& literal) { return CastLiteral(literal); }}, + {FieldType::BIGINT, [&](const Literal& literal) { return CastLiteral(literal); }}}; +} + +Result NumericToBooleanCastExecutor::Cast( + const Literal& literal, const std::shared_ptr& target_type) const { + assert(target_type->id() == arrow::Type::type::BOOL); + FieldType src_type = literal.GetType(); + auto iter = literal_cast_executor_map_.find(src_type); + if (iter == literal_cast_executor_map_.end()) { + return Status::Invalid( + fmt::format("cast literal in NumericToBooleanCastExecutor failed: cannot find cast " + "function from {} to boolean", + FieldTypeUtils::FieldTypeToString(src_type))); + } + return iter->second(literal); +} + +Result> NumericToBooleanCastExecutor::Cast( + const std::shared_ptr& array, const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const { + arrow::compute::CastOptions options = arrow::compute::CastOptions::Safe(); + return CastingUtils::Cast(array, target_type, options, pool); +} + +} // namespace paimon diff --git a/src/paimon/core/casting/numeric_to_boolean_cast_executor.h b/src/paimon/core/casting/numeric_to_boolean_cast_executor.h new file mode 100644 index 0000000..92e069b --- /dev/null +++ b/src/paimon/core/casting/numeric_to_boolean_cast_executor.h @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once +#include +#include +#include + +#include "arrow/array/array_base.h" +#include "paimon/core/casting/cast_executor.h" +#include "paimon/defs.h" +#include "paimon/predicate/literal.h" +#include "paimon/result.h" + +namespace arrow { +class DataType; +class MemoryPool; +} // namespace arrow + +namespace paimon { +class NumericToBooleanCastExecutor : public CastExecutor { + public: + NumericToBooleanCastExecutor(); + Result Cast(const Literal& literal, + const std::shared_ptr& target_type) const override; + + Result> Cast(const std::shared_ptr& array, + const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const override; + + private: + template + static Literal CastLiteral(const Literal& literal) { + if (literal.IsNull()) { + return Literal(FieldType::BOOLEAN); + } + SrcType value = literal.GetValue(); + return Literal{static_cast(value)}; + } + + private: + std::map> literal_cast_executor_map_; +}; + +} // namespace paimon diff --git a/src/paimon/core/casting/numeric_to_string_cast_executor.cpp b/src/paimon/core/casting/numeric_to_string_cast_executor.cpp new file mode 100644 index 0000000..7e643a0 --- /dev/null +++ b/src/paimon/core/casting/numeric_to_string_cast_executor.cpp @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "paimon/core/casting/numeric_to_string_cast_executor.h" + +#include +#include +#include +#include + +#include "arrow/compute/cast.h" +#include "arrow/scalar.h" +#include "arrow/type.h" +#include "arrow/util/decimal.h" +#include "fmt/format.h" +#include "paimon/common/utils/field_type_utils.h" +#include "paimon/core/casting/casting_utils.h" +#include "paimon/data/decimal.h" +#include "paimon/defs.h" +#include "paimon/status.h" + +namespace arrow { +class MemoryPool; +class Array; +} // namespace arrow + +namespace paimon { + +NumericToStringCastExecutor::NumericToStringCastExecutor() { + literal_cast_executor_map_ = { + {FieldType::TINYINT, [&](const Literal& literal) { return CastLiteral(literal); }}, + {FieldType::SMALLINT, + [&](const Literal& literal) { return CastLiteral(literal); }}, + {FieldType::INT, [&](const Literal& literal) { return CastLiteral(literal); }}, + {FieldType::BIGINT, [&](const Literal& literal) { return CastLiteral(literal); }}, + {FieldType::FLOAT, [&](const Literal& literal) { return CastLiteral(literal); }}, + {FieldType::DOUBLE, [&](const Literal& literal) { return CastLiteral(literal); }}, + {FieldType::DECIMAL, + [&](const Literal& literal) { return CastLiteral(literal); }}}; +} + +template +Literal NumericToStringCastExecutor::CastLiteral(const Literal& literal) { + if (literal.IsNull()) { + return Literal(FieldType::STRING); + } + auto value = literal.GetValue(); + if constexpr (std::is_same_v) { + arrow::FloatScalar scalar(value); + std::string string_value = scalar.ToString(); + return Literal(FieldType::STRING, string_value.data(), string_value.size()); + } else if constexpr (std::is_same_v) { + arrow::DoubleScalar scalar(value); + std::string string_value = scalar.ToString(); + return Literal(FieldType::STRING, string_value.data(), string_value.size()); + } else if constexpr (std::is_same_v) { + auto src_type = arrow::decimal128(value.Precision(), value.Scale()); + arrow::Decimal128Scalar scalar(arrow::Decimal128(value.HighBits(), value.LowBits()), + src_type); + std::string string_value = scalar.ToString(); + return Literal(FieldType::STRING, string_value.data(), string_value.size()); + } else { + std::string string_value = std::to_string(value); + return Literal(FieldType::STRING, string_value.data(), string_value.size()); + } +} + +Result NumericToStringCastExecutor::Cast( + const Literal& literal, const std::shared_ptr& target_type) const { + assert(target_type->id() == arrow::Type::type::STRING); + FieldType src_type = literal.GetType(); + auto iter = literal_cast_executor_map_.find(src_type); + if (iter == literal_cast_executor_map_.end()) { + return Status::Invalid( + fmt::format("cast literal in NumericToStringCastExecutor failed: cannot find cast " + "function from {} to STRING", + FieldTypeUtils::FieldTypeToString(src_type))); + } + return iter->second(literal); +} + +Result> NumericToStringCastExecutor::Cast( + const std::shared_ptr& array, const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const { + arrow::compute::CastOptions options = arrow::compute::CastOptions::Safe(); + return CastingUtils::Cast(array, target_type, options, pool); +} + +} // namespace paimon diff --git a/src/paimon/core/casting/numeric_to_string_cast_executor.h b/src/paimon/core/casting/numeric_to_string_cast_executor.h new file mode 100644 index 0000000..1f84b5a --- /dev/null +++ b/src/paimon/core/casting/numeric_to_string_cast_executor.h @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once +#include +#include +#include + +#include "arrow/array/array_base.h" +#include "paimon/core/casting/cast_executor.h" +#include "paimon/predicate/literal.h" +#include "paimon/result.h" + +namespace arrow { +class DataType; +class MemoryPool; +} // namespace arrow + +namespace paimon { +enum class FieldType; + +class NumericToStringCastExecutor : public CastExecutor { + public: + NumericToStringCastExecutor(); + Result Cast(const Literal& literal, + const std::shared_ptr& target_type) const override; + + Result> Cast(const std::shared_ptr& array, + const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const override; + + private: + template + static Literal CastLiteral(const Literal& literal); + + private: + std::map> literal_cast_executor_map_; +}; + +} // namespace paimon diff --git a/src/paimon/core/casting/string_to_boolean_cast_executor.cpp b/src/paimon/core/casting/string_to_boolean_cast_executor.cpp new file mode 100644 index 0000000..5863278 --- /dev/null +++ b/src/paimon/core/casting/string_to_boolean_cast_executor.cpp @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "paimon/core/casting/string_to_boolean_cast_executor.h" + +#include +#include +#include +#include +#include + +#include "arrow/array/array_binary.h" +#include "arrow/array/builder_primitive.h" +#include "arrow/type.h" +#include "arrow/util/checked_cast.h" +#include "fmt/format.h" +#include "paimon/common/utils/arrow/status_utils.h" +#include "paimon/common/utils/field_type_utils.h" +#include "paimon/common/utils/string_utils.h" +#include "paimon/defs.h" +#include "paimon/status.h" + +namespace arrow { +class MemoryPool; +class Array; +} // namespace arrow + +namespace paimon { +Result StringToBooleanCastExecutor::Cast( + const Literal& literal, const std::shared_ptr& target_type) const { + assert(literal.GetType() == FieldType::STRING); + PAIMON_ASSIGN_OR_RAISE(FieldType target_field_type, + FieldTypeUtils::ConvertToFieldType(target_type->id())); + assert(target_field_type == FieldType::BOOLEAN); + if (literal.IsNull()) { + return Literal(target_field_type); + } + auto value = literal.GetValue(); + std::optional bool_value = StringUtils::StringToValue(value); + if (bool_value == std::nullopt) { + return Status::Invalid(fmt::format( + "StringToBooleanCastExecutor cast failed: STRING '{}' cannot cast to BOOLEAN", value)); + } + return Literal(bool_value.value()); +} + +Result> StringToBooleanCastExecutor::Cast( + const std::shared_ptr& array, const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const { + auto* string_array = arrow::internal::checked_cast(array.get()); + assert(string_array); + auto bool_builder = std::make_shared(pool); + for (int64_t i = 0; i < string_array->length(); ++i) { + if (string_array->IsNull(i)) { + PAIMON_RETURN_NOT_OK_FROM_ARROW(bool_builder->AppendNull()); + } else { + std::optional bool_value = + StringUtils::StringToValue(string_array->GetString(i)); + if (bool_value == std::nullopt) { + return Status::Invalid(fmt::format( + "StringToBooleanCastExecutor cast failed: STRING '{}' cannot cast to BOOLEAN", + string_array->GetString(i))); + } + PAIMON_RETURN_NOT_OK_FROM_ARROW(bool_builder->Append(bool_value.value())); + } + } + std::shared_ptr casted_array; + PAIMON_RETURN_NOT_OK_FROM_ARROW(bool_builder->Finish(&casted_array)); + return casted_array; +} + +} // namespace paimon diff --git a/src/paimon/core/casting/string_to_boolean_cast_executor.h b/src/paimon/core/casting/string_to_boolean_cast_executor.h new file mode 100644 index 0000000..af62aa7 --- /dev/null +++ b/src/paimon/core/casting/string_to_boolean_cast_executor.h @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once +#include + +#include "arrow/array/array_base.h" +#include "paimon/core/casting/cast_executor.h" +#include "paimon/predicate/literal.h" +#include "paimon/result.h" + +namespace arrow { +class DataType; +class MemoryPool; +} // namespace arrow + +namespace paimon { +class StringToBooleanCastExecutor : public CastExecutor { + public: + Result Cast(const Literal& literal, + const std::shared_ptr& target_type) const override; + + Result> Cast(const std::shared_ptr& array, + const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const override; +}; + +} // namespace paimon diff --git a/src/paimon/core/casting/string_to_decimal_cast_executor.cpp b/src/paimon/core/casting/string_to_decimal_cast_executor.cpp new file mode 100644 index 0000000..1f8af8b --- /dev/null +++ b/src/paimon/core/casting/string_to_decimal_cast_executor.cpp @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "paimon/core/casting/string_to_decimal_cast_executor.h" + +#include +#include + +#include "arrow/array/array_binary.h" +#include "arrow/array/builder_decimal.h" +#include "arrow/type.h" +#include "arrow/util/checked_cast.h" +#include "arrow/util/decimal.h" +#include "paimon/common/utils/arrow/status_utils.h" +#include "paimon/common/utils/decimal_utils.h" +#include "paimon/data/decimal.h" +#include "paimon/defs.h" +#include "paimon/status.h" + +namespace arrow { +class MemoryPool; +class Array; +} // namespace arrow + +namespace paimon { + +Result> StringToDecimalCastExecutor::StringToDecimal( + const std::string& str_value, int32_t target_precision, int32_t target_scale) { + int32_t src_precision = 0, src_scale = 0; + arrow::Decimal128 src_decimal; + // parse invalid string + PAIMON_RETURN_NOT_OK_FROM_ARROW( + arrow::Decimal128::FromString(str_value, &src_decimal, &src_precision, &src_scale)); + if (src_precision > Decimal::MAX_PRECISION) { + return std::optional(); + } + return DecimalUtils::RescaleDecimalWithOverflowCheck(src_decimal, src_scale, target_precision, + target_scale); +} + +Result StringToDecimalCastExecutor::Cast( + const Literal& literal, const std::shared_ptr& target_type) const { + assert(literal.GetType() == FieldType::STRING); + PAIMON_RETURN_NOT_OK(DecimalUtils::CheckDecimalType(*target_type)); + auto* decimal_type = arrow::internal::checked_cast(target_type.get()); + assert(decimal_type); + if (literal.IsNull()) { + return Literal(FieldType::DECIMAL); + } + auto src_value = literal.GetValue(); + PAIMON_ASSIGN_OR_RAISE( + std::optional scaled_decimal, + StringToDecimal(src_value, decimal_type->precision(), decimal_type->scale())); + if (scaled_decimal == std::nullopt) { + return Literal(FieldType::DECIMAL); + } + return Literal(Decimal( + decimal_type->precision(), decimal_type->scale(), + static_cast(static_cast(static_cast( + scaled_decimal.value().high_bits())) + << 64 | + scaled_decimal.value().low_bits()))); +} + +Result> StringToDecimalCastExecutor::Cast( + const std::shared_ptr& array, const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const { + PAIMON_RETURN_NOT_OK(DecimalUtils::CheckDecimalType(*target_type)); + auto* string_array = arrow::internal::checked_cast(array.get()); + assert(string_array); + auto* decimal_type = arrow::internal::checked_cast(target_type.get()); + auto decimal_builder = std::make_shared(target_type, pool); + for (int64_t i = 0; i < string_array->length(); ++i) { + if (string_array->IsNull(i)) { + PAIMON_RETURN_NOT_OK_FROM_ARROW(decimal_builder->AppendNull()); + } else { + std::string src_value = string_array->GetString(i); + PAIMON_ASSIGN_OR_RAISE( + std::optional scaled_decimal, + StringToDecimal(src_value, decimal_type->precision(), decimal_type->scale())); + if (scaled_decimal == std::nullopt) { + PAIMON_RETURN_NOT_OK_FROM_ARROW(decimal_builder->AppendNull()); + } else { + PAIMON_RETURN_NOT_OK_FROM_ARROW(decimal_builder->Append(scaled_decimal.value())); + } + } + } + std::shared_ptr casted_array; + PAIMON_RETURN_NOT_OK_FROM_ARROW(decimal_builder->Finish(&casted_array)); + return casted_array; +} + +} // namespace paimon diff --git a/src/paimon/core/casting/string_to_decimal_cast_executor.h b/src/paimon/core/casting/string_to_decimal_cast_executor.h new file mode 100644 index 0000000..39f613f --- /dev/null +++ b/src/paimon/core/casting/string_to_decimal_cast_executor.h @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include +#include +#include +#include + +#include "arrow/array/array_base.h" +#include "arrow/util/decimal.h" +#include "paimon/core/casting/cast_executor.h" +#include "paimon/predicate/literal.h" +#include "paimon/result.h" + +namespace arrow { +class DataType; +class MemoryPool; +} // namespace arrow + +namespace paimon { +class StringToDecimalCastExecutor : public CastExecutor { + public: + Result Cast(const Literal& literal, + const std::shared_ptr& target_type) const override; + + Result> Cast(const std::shared_ptr& array, + const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const override; + + private: + static Result> StringToDecimal(const std::string& str_value, + int32_t target_precision, + int32_t target_scale); +}; + +} // namespace paimon diff --git a/src/paimon/core/casting/string_to_numeric_primitive_cast_executor.cpp b/src/paimon/core/casting/string_to_numeric_primitive_cast_executor.cpp new file mode 100644 index 0000000..e622be3 --- /dev/null +++ b/src/paimon/core/casting/string_to_numeric_primitive_cast_executor.cpp @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "paimon/core/casting/string_to_numeric_primitive_cast_executor.h" + +#include +#include +#include +#include +#include + +#include "arrow/compute/cast.h" +#include "arrow/type.h" +#include "arrow/util/value_parsing.h" +#include "fmt/format.h" +#include "paimon/common/utils/field_type_utils.h" +#include "paimon/common/utils/string_utils.h" +#include "paimon/core/casting/casting_utils.h" +#include "paimon/defs.h" +#include "paimon/status.h" + +namespace arrow { +class MemoryPool; +class Array; +} // namespace arrow + +namespace paimon { +StringToNumericPrimitiveCastExecutor::StringToNumericPrimitiveCastExecutor() { + literal_cast_executor_map_ = { + {FieldType::TINYINT, + [&](const Literal& literal) { return CastLiteral(literal, FieldType::TINYINT); }}, + {FieldType::SMALLINT, + [&](const Literal& literal) { + return CastLiteral(literal, FieldType::SMALLINT); + }}, + {FieldType::INT, + [&](const Literal& literal) { return CastLiteral(literal, FieldType::INT); }}, + {FieldType::BIGINT, + [&](const Literal& literal) { return CastLiteral(literal, FieldType::BIGINT); }}, + {FieldType::FLOAT, + [&](const Literal& literal) { return CastLiteral(literal, FieldType::FLOAT); }}, + {FieldType::DOUBLE, + [&](const Literal& literal) { return CastLiteral(literal, FieldType::DOUBLE); }}}; +} + +template +Result StringToNumericPrimitiveCastExecutor::CastLiteral(const Literal& literal, + const FieldType& target_type) { + if (literal.IsNull()) { + return Literal(target_type); + } + auto value = literal.GetValue(); + if constexpr (std::is_same_v || std::is_same_v) { + // use arrow::internal::StringToFloat Func to handle overflow, e.g., 1.7976931348623157e309 + // is supposed to return infinity + TargetType out; + bool success = arrow::internal::StringToFloat(value.data(), value.size(), '.', &out); + if (!success) { + return Status::Invalid( + fmt::format("cast literal in StringToNumericPrimitiveCastExecutor failed: cannot " + "cast '{}' from STRING to {}", + value, FieldTypeUtils::FieldTypeToString(target_type))); + } + return Literal(out); + } else { + std::optional casted_value = StringUtils::StringToValue(value); + if (!casted_value) { + return Status::Invalid( + fmt::format("cast literal in StringToNumericPrimitiveCastExecutor failed: cannot " + "cast '{}' from STRING to {}", + value, FieldTypeUtils::FieldTypeToString(target_type))); + } + return Literal(casted_value.value()); + } +} + +Result StringToNumericPrimitiveCastExecutor::Cast( + const Literal& literal, const std::shared_ptr& target_type) const { + assert(literal.GetType() == FieldType::STRING); + PAIMON_ASSIGN_OR_RAISE(FieldType target_field_type, + FieldTypeUtils::ConvertToFieldType(target_type->id())); + auto iter = literal_cast_executor_map_.find(target_field_type); + if (iter == literal_cast_executor_map_.end()) { + return Status::Invalid(fmt::format( + "cast literal in StringToNumericPrimitiveCastExecutor failed: cannot find cast " + "function from STRING to {}", + FieldTypeUtils::FieldTypeToString(target_field_type))); + } + return iter->second(literal); +} + +Result> StringToNumericPrimitiveCastExecutor::Cast( + const std::shared_ptr& array, const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const { + arrow::compute::CastOptions options = arrow::compute::CastOptions::Safe(); + return CastingUtils::Cast(array, target_type, options, pool); +} + +} // namespace paimon diff --git a/src/paimon/core/casting/string_to_numeric_primitive_cast_executor.h b/src/paimon/core/casting/string_to_numeric_primitive_cast_executor.h new file mode 100644 index 0000000..c281341 --- /dev/null +++ b/src/paimon/core/casting/string_to_numeric_primitive_cast_executor.h @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once +#include +#include +#include + +#include "arrow/array/array_base.h" +#include "paimon/core/casting/cast_executor.h" +#include "paimon/predicate/literal.h" +#include "paimon/result.h" + +namespace arrow { +class DataType; +class MemoryPool; +} // namespace arrow + +namespace paimon { +enum class FieldType; + +class StringToNumericPrimitiveCastExecutor : public CastExecutor { + public: + StringToNumericPrimitiveCastExecutor(); + Result Cast(const Literal& literal, + const std::shared_ptr& target_type) const override; + + Result> Cast(const std::shared_ptr& array, + const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const override; + + private: + template + static Result CastLiteral(const Literal& literal, const FieldType& target_type); + + private: + std::map(const Literal&)>> literal_cast_executor_map_; +}; +} // namespace paimon