diff --git a/src/paimon/core/casting/binary_to_string_cast_executor.cpp b/src/paimon/core/casting/binary_to_string_cast_executor.cpp new file mode 100644 index 0000000..0975444 --- /dev/null +++ b/src/paimon/core/casting/binary_to_string_cast_executor.cpp @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "paimon/core/casting/binary_to_string_cast_executor.h" + +#include +#include +#include + +#include "arrow/compute/cast.h" +#include "arrow/type.h" +#include "paimon/common/utils/field_type_utils.h" +#include "paimon/core/casting/casting_utils.h" +#include "paimon/defs.h" + +namespace arrow { +class MemoryPool; +class Array; +} // namespace arrow + +namespace paimon { +Result BinaryToStringCastExecutor::Cast( + const Literal& literal, const std::shared_ptr& target_type) const { + assert(literal.GetType() == FieldType::BINARY); + PAIMON_ASSIGN_OR_RAISE(FieldType target_field_type, + FieldTypeUtils::ConvertToFieldType(target_type->id())); + assert(target_field_type == FieldType::STRING); + if (literal.IsNull()) { + return Literal(target_field_type); + } + auto value = literal.GetValue(); + return Literal(FieldType::STRING, value.data(), value.size()); +} + +Result> BinaryToStringCastExecutor::Cast( + const std::shared_ptr& array, const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const { + arrow::compute::CastOptions options = arrow::compute::CastOptions::Safe(); + options.allow_invalid_utf8 = true; + return CastingUtils::Cast(array, target_type, options, pool); +} + +} // namespace paimon diff --git a/src/paimon/core/casting/binary_to_string_cast_executor.h b/src/paimon/core/casting/binary_to_string_cast_executor.h new file mode 100644 index 0000000..0040688 --- /dev/null +++ b/src/paimon/core/casting/binary_to_string_cast_executor.h @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once +#include + +#include "arrow/array/array_base.h" +#include "paimon/core/casting/cast_executor.h" +#include "paimon/predicate/literal.h" +#include "paimon/result.h" + +namespace arrow { +class DataType; +class MemoryPool; +} // namespace arrow + +namespace paimon { +class BinaryToStringCastExecutor : public CastExecutor { + public: + Result Cast(const Literal& literal, + const std::shared_ptr& target_type) const override; + + Result> Cast(const std::shared_ptr& array, + const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const override; +}; + +} // namespace paimon diff --git a/src/paimon/core/casting/date_to_string_cast_executor.cpp b/src/paimon/core/casting/date_to_string_cast_executor.cpp new file mode 100644 index 0000000..afdfb8c --- /dev/null +++ b/src/paimon/core/casting/date_to_string_cast_executor.cpp @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "paimon/core/casting/date_to_string_cast_executor.h" + +#include +#include +#include + +#include "arrow/compute/cast.h" +#include "arrow/scalar.h" +#include "arrow/type.h" +#include "paimon/core/casting/casting_utils.h" +#include "paimon/defs.h" + +namespace arrow { +class MemoryPool; +class Array; +} // namespace arrow + +namespace paimon { +Result DateToStringCastExecutor::Cast( + const Literal& literal, const std::shared_ptr& target_type) const { + assert(target_type->id() == arrow::Type::type::STRING); + assert(literal.GetType() == FieldType::DATE); + return CastingUtils::Cast( + literal, arrow::date32(), target_type, arrow::compute::CastOptions::Safe()); +} + +Result> DateToStringCastExecutor::Cast( + const std::shared_ptr& array, const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const { + arrow::compute::CastOptions options = arrow::compute::CastOptions::Safe(); + return CastingUtils::Cast(array, target_type, options, pool); +} + +} // namespace paimon diff --git a/src/paimon/core/casting/date_to_string_cast_executor.h b/src/paimon/core/casting/date_to_string_cast_executor.h new file mode 100644 index 0000000..4ceab59 --- /dev/null +++ b/src/paimon/core/casting/date_to_string_cast_executor.h @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +#include "arrow/array/array_base.h" +#include "paimon/core/casting/cast_executor.h" +#include "paimon/predicate/literal.h" +#include "paimon/result.h" + +namespace arrow { +class DataType; +class MemoryPool; +} // namespace arrow + +namespace paimon { +class DateToStringCastExecutor : public CastExecutor { + public: + Result Cast(const Literal& literal, + const std::shared_ptr& target_type) const override; + + Result> Cast(const std::shared_ptr& array, + const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const override; +}; + +} // namespace paimon diff --git a/src/paimon/core/casting/date_to_timestamp_cast_executor.cpp b/src/paimon/core/casting/date_to_timestamp_cast_executor.cpp new file mode 100644 index 0000000..33d911f --- /dev/null +++ b/src/paimon/core/casting/date_to_timestamp_cast_executor.cpp @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "paimon/core/casting/date_to_timestamp_cast_executor.h" + +#include +#include +#include + +#include "arrow/compute/cast.h" +#include "arrow/type.h" +#include "arrow/util/checked_cast.h" +#include "paimon/core/casting/casting_utils.h" +#include "paimon/status.h" + +namespace arrow { +class MemoryPool; +class Array; +} // namespace arrow + +namespace paimon { + +Result DateToTimestampCastExecutor::Cast( + const Literal& literal, const std::shared_ptr& target_type) const { + return Status::Invalid("do not support cast literal from date to timestamp"); +} + +Result> DateToTimestampCastExecutor::Cast( + const std::shared_ptr& array, const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const { + auto target_ts_type = arrow::internal::checked_pointer_cast(target_type); + assert(target_ts_type); + arrow::compute::CastOptions options = arrow::compute::CastOptions::Safe(); + + auto target_ts_type_no_tz = arrow::timestamp(target_ts_type->unit()); + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr target_array, + CastingUtils::Cast(array, target_ts_type_no_tz, options, pool)); + if (target_ts_type->timezone().empty()) { + return target_array; + } + // handle timezone + return CastingUtils::TimestampToTimestampWithTimezone(target_array, target_ts_type, pool); +} + +} // namespace paimon diff --git a/src/paimon/core/casting/date_to_timestamp_cast_executor.h b/src/paimon/core/casting/date_to_timestamp_cast_executor.h new file mode 100644 index 0000000..5189dbe --- /dev/null +++ b/src/paimon/core/casting/date_to_timestamp_cast_executor.h @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +#include "arrow/array/array_base.h" +#include "paimon/core/casting/cast_executor.h" +#include "paimon/predicate/literal.h" +#include "paimon/result.h" + +namespace arrow { +class DataType; +class MemoryPool; +} // namespace arrow + +namespace paimon { +class DateToTimestampCastExecutor : public CastExecutor { + public: + Result Cast(const Literal& literal, + const std::shared_ptr& target_type) const override; + + // Noted that: Java Paimon supports date values ranging from 0000-01-01 to 9999-12-31, while C++ + // Paimon only supports from (min_int64/NANOS_PER_DAY) to (max_int64/NANOS_PER_DAY) while target + // type is NANO unit. + Result> Cast(const std::shared_ptr& array, + const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const override; +}; + +} // namespace paimon diff --git a/src/paimon/core/casting/numeric_primitive_to_timestamp_cast_executor.cpp b/src/paimon/core/casting/numeric_primitive_to_timestamp_cast_executor.cpp new file mode 100644 index 0000000..33f5ca6 --- /dev/null +++ b/src/paimon/core/casting/numeric_primitive_to_timestamp_cast_executor.cpp @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "paimon/core/casting/numeric_primitive_to_timestamp_cast_executor.h" + +#include +#include +#include +#include + +#include "arrow/array/array_base.h" +#include "arrow/array/builder_dict.h" +#include "arrow/type.h" +#include "arrow/util/checked_cast.h" +#include "fmt/format.h" +#include "paimon/common/utils/arrow/status_utils.h" +#include "paimon/common/utils/date_time_utils.h" +#include "paimon/core/casting/casting_utils.h" +#include "paimon/core/casting/timestamp_to_timestamp_cast_executor.h" +#include "paimon/data/timestamp.h" +#include "paimon/defs.h" +#include "paimon/status.h" + +namespace arrow { +class MemoryPool; +template +class NumericArray; +} // namespace arrow + +namespace paimon { +Result NumericPrimitiveToTimestampCastExecutor::Cast( + const Literal& literal, const std::shared_ptr& target_type) const { + return Status::Invalid("do not support cast literal from numeric primitive to timestamp"); +} + +Result> NumericPrimitiveToTimestampCastExecutor::Cast( + const std::shared_ptr& array, const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const { + auto src_type = array->type(); + assert(src_type->id() == arrow::Type::type::INT32 || + src_type->id() == arrow::Type::type::INT64); + auto target_array = array; + // 1. int32/int64 array to int64 array + if (src_type->id() == arrow::Type::type::INT32) { + arrow::compute::CastOptions options = arrow::compute::CastOptions::Safe(); + PAIMON_ASSIGN_OR_RAISE(target_array, + CastingUtils::Cast(target_array, arrow::int64(), options, pool)); + } + // 2. int64 array to timestamp(second, tz) array + auto timezone = DateTimeUtils::GetLocalTimezoneName(); + auto ts_second_tz = arrow::timestamp(arrow::TimeUnit::SECOND, timezone); + PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(target_array, target_array->View(ts_second_tz)); + // 3. timestamp(second, tz) array to target ts array + auto timestamp_to_timestamp_cast_executor = + std::make_shared(); + PAIMON_ASSIGN_OR_RAISE( + target_array, timestamp_to_timestamp_cast_executor->Cast(target_array, target_type, pool)); + return target_array; +} + +} // namespace paimon diff --git a/src/paimon/core/casting/numeric_primitive_to_timestamp_cast_executor.h b/src/paimon/core/casting/numeric_primitive_to_timestamp_cast_executor.h new file mode 100644 index 0000000..17f7545 --- /dev/null +++ b/src/paimon/core/casting/numeric_primitive_to_timestamp_cast_executor.h @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once +#include + +#include "arrow/array/array_base.h" +#include "paimon/core/casting/cast_executor.h" +#include "paimon/predicate/literal.h" +#include "paimon/result.h" + +namespace arrow { +class DataType; +class MemoryPool; +} // namespace arrow + +namespace paimon { +class NumericPrimitiveToTimestampCastExecutor : public CastExecutor { + public: + Result Cast(const Literal& literal, + const std::shared_ptr& target_type) const override; + + // Noted that: 1.Java Paimon can cast numeric range [min_int64/1000, max_int64/1000] + // while C++ Paimon can only cast numeric range [min_int64/1e9, max_int64/1e9] + // value while target type is nano, beyond C++ Paimon range will be undefined behavior + Result> Cast(const std::shared_ptr& array, + const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const override; +}; + +} // namespace paimon diff --git a/src/paimon/core/casting/string_to_binary_cast_executor.cpp b/src/paimon/core/casting/string_to_binary_cast_executor.cpp new file mode 100644 index 0000000..0a94aa5 --- /dev/null +++ b/src/paimon/core/casting/string_to_binary_cast_executor.cpp @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "paimon/core/casting/string_to_binary_cast_executor.h" + +#include +#include +#include + +#include "arrow/compute/cast.h" +#include "arrow/type.h" +#include "paimon/common/utils/field_type_utils.h" +#include "paimon/core/casting/casting_utils.h" +#include "paimon/defs.h" + +namespace arrow { +class MemoryPool; +class Array; +} // namespace arrow + +namespace paimon { +Result StringToBinaryCastExecutor::Cast( + const Literal& literal, const std::shared_ptr& target_type) const { + assert(literal.GetType() == FieldType::STRING); + PAIMON_ASSIGN_OR_RAISE(FieldType target_field_type, + FieldTypeUtils::ConvertToFieldType(target_type->id())); + assert(target_field_type == FieldType::BINARY); + if (literal.IsNull()) { + return Literal(target_field_type); + } + auto value = literal.GetValue(); + return Literal(FieldType::BINARY, value.data(), value.size()); +} + +Result> StringToBinaryCastExecutor::Cast( + const std::shared_ptr& array, const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const { + arrow::compute::CastOptions options = arrow::compute::CastOptions::Safe(); + return CastingUtils::Cast(array, target_type, options, pool); +} + +} // namespace paimon diff --git a/src/paimon/core/casting/string_to_binary_cast_executor.h b/src/paimon/core/casting/string_to_binary_cast_executor.h new file mode 100644 index 0000000..ce72f11 --- /dev/null +++ b/src/paimon/core/casting/string_to_binary_cast_executor.h @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once +#include + +#include "arrow/array/array_base.h" +#include "paimon/core/casting/cast_executor.h" +#include "paimon/predicate/literal.h" +#include "paimon/result.h" + +namespace arrow { +class DataType; +class MemoryPool; +} // namespace arrow + +namespace paimon { +class StringToBinaryCastExecutor : public CastExecutor { + public: + Result Cast(const Literal& literal, + const std::shared_ptr& target_type) const override; + + Result> Cast(const std::shared_ptr& array, + const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const override; +}; + +} // namespace paimon diff --git a/src/paimon/core/casting/string_to_date_cast_executor.cpp b/src/paimon/core/casting/string_to_date_cast_executor.cpp new file mode 100644 index 0000000..0608920 --- /dev/null +++ b/src/paimon/core/casting/string_to_date_cast_executor.cpp @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "paimon/core/casting/string_to_date_cast_executor.h" + +#include +#include +#include +#include + +#include "arrow/array/array_binary.h" +#include "arrow/array/builder_primitive.h" +#include "arrow/type.h" +#include "arrow/util/checked_cast.h" +#include "paimon/common/utils/arrow/status_utils.h" +#include "paimon/common/utils/field_type_utils.h" +#include "paimon/common/utils/string_utils.h" +#include "paimon/defs.h" + +namespace arrow { +class MemoryPool; +class Array; +} // namespace arrow + +namespace paimon { +Result StringToDateCastExecutor::Cast( + const Literal& literal, const std::shared_ptr& target_type) const { + assert(literal.GetType() == FieldType::STRING); + PAIMON_ASSIGN_OR_RAISE(FieldType target_field_type, + FieldTypeUtils::ConvertToFieldType(target_type->id())); + assert(target_field_type == FieldType::DATE); + if (literal.IsNull()) { + return Literal(target_field_type); + } + auto value = literal.GetValue(); + PAIMON_ASSIGN_OR_RAISE(int32_t date_value, StringUtils::StringToDate(value)); + return Literal(FieldType::DATE, date_value); +} + +Result> StringToDateCastExecutor::Cast( + const std::shared_ptr& array, const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const { + auto* string_array = arrow::internal::checked_cast(array.get()); + assert(string_array); + auto date_builder = std::make_shared(pool); + for (int64_t i = 0; i < string_array->length(); ++i) { + if (string_array->IsNull(i)) { + PAIMON_RETURN_NOT_OK_FROM_ARROW(date_builder->AppendNull()); + } else { + PAIMON_ASSIGN_OR_RAISE(int32_t date_value, + StringUtils::StringToDate(string_array->GetString(i))); + PAIMON_RETURN_NOT_OK_FROM_ARROW(date_builder->Append(date_value)); + } + } + std::shared_ptr casted_array; + PAIMON_RETURN_NOT_OK_FROM_ARROW(date_builder->Finish(&casted_array)); + return casted_array; +} + +} // namespace paimon diff --git a/src/paimon/core/casting/string_to_date_cast_executor.h b/src/paimon/core/casting/string_to_date_cast_executor.h new file mode 100644 index 0000000..1178db7 --- /dev/null +++ b/src/paimon/core/casting/string_to_date_cast_executor.h @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once +#include + +#include "arrow/array/array_base.h" +#include "paimon/core/casting/cast_executor.h" +#include "paimon/predicate/literal.h" +#include "paimon/result.h" + +namespace arrow { +class DataType; +class MemoryPool; +} // namespace arrow + +namespace paimon { +class StringToDateCastExecutor : public CastExecutor { + public: + Result Cast(const Literal& literal, + const std::shared_ptr& target_type) const override; + + Result> Cast(const std::shared_ptr& array, + const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const override; +}; + +} // namespace paimon diff --git a/src/paimon/core/casting/string_to_timestamp_cast_executor.cpp b/src/paimon/core/casting/string_to_timestamp_cast_executor.cpp new file mode 100644 index 0000000..09b0623 --- /dev/null +++ b/src/paimon/core/casting/string_to_timestamp_cast_executor.cpp @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "paimon/core/casting/string_to_timestamp_cast_executor.h" + +#include +#include +#include + +#include "arrow/compute/cast.h" +#include "arrow/type.h" +#include "arrow/util/checked_cast.h" +#include "paimon/core/casting/casting_utils.h" +#include "paimon/status.h" + +namespace arrow { +class MemoryPool; +class Array; +} // namespace arrow + +namespace paimon { + +Result StringToTimestampCastExecutor::Cast( + const Literal& literal, const std::shared_ptr& target_type) const { + return Status::Invalid("do not support cast literal from string to timestamp"); +} + +Result> StringToTimestampCastExecutor::Cast( + const std::shared_ptr& array, const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const { + auto timestamp_type = arrow::internal::checked_pointer_cast(target_type); + assert(timestamp_type); + auto target_type_no_tz = arrow::timestamp(timestamp_type->unit()); + arrow::compute::CastOptions options = arrow::compute::CastOptions::Safe(); + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr target_array, + CastingUtils::Cast(array, target_type_no_tz, options, pool)); + if (!timestamp_type->timezone().empty()) { + PAIMON_ASSIGN_OR_RAISE(target_array, CastingUtils::TimestampToTimestampWithTimezone( + target_array, timestamp_type, pool)); + } + return target_array; +} + +} // namespace paimon diff --git a/src/paimon/core/casting/string_to_timestamp_cast_executor.h b/src/paimon/core/casting/string_to_timestamp_cast_executor.h new file mode 100644 index 0000000..fd22832 --- /dev/null +++ b/src/paimon/core/casting/string_to_timestamp_cast_executor.h @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +#include "arrow/array/array_base.h" +#include "paimon/core/casting/cast_executor.h" +#include "paimon/predicate/literal.h" +#include "paimon/result.h" + +namespace arrow { +class DataType; +class MemoryPool; +} // namespace arrow + +namespace paimon { +class StringToTimestampCastExecutor : public CastExecutor { + public: + Result Cast(const Literal& literal, + const std::shared_ptr& target_type) const override; + + // Noted that: 1.Java Paimon can cast timestamp range [0000-01-01 00:00:00.00000000, 9999-12-31 + // 23:59:59.999999999] while C++ Paimon can cast timestamp range [min_int64, max_int64] while + // target type is nano, ns value beyond C++ Paimon range will be undefined behavior; 2.Java + // Paimon supports passing a numeric value in TimestampType while C++ Paimon does not; 3. C++ + // Paimon supports the format "1970-01-01T00:00:00" while Java paimon does not + Result> Cast(const std::shared_ptr& array, + const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const override; +}; + +} // namespace paimon diff --git a/src/paimon/core/casting/timestamp_to_date_cast_executor.cpp b/src/paimon/core/casting/timestamp_to_date_cast_executor.cpp new file mode 100644 index 0000000..a629fb0 --- /dev/null +++ b/src/paimon/core/casting/timestamp_to_date_cast_executor.cpp @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "paimon/core/casting/timestamp_to_date_cast_executor.h" + +#include +#include +#include + +#include "arrow/compute/cast.h" +#include "arrow/type.h" +#include "paimon/common/utils/date_time_utils.h" +#include "paimon/common/utils/field_type_utils.h" +#include "paimon/core/casting/casting_utils.h" +#include "paimon/data/timestamp.h" +#include "paimon/defs.h" + +namespace arrow { +class MemoryPool; +class Array; +} // namespace arrow + +namespace paimon { +Result TimestampToDateCastExecutor::Cast( + const Literal& literal, const std::shared_ptr& target_type) const { + return Status::Invalid("do not support cast literal from timestamp to date"); +} + +Result> TimestampToDateCastExecutor::Cast( + const std::shared_ptr& array, const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const { + auto target_array = array; + auto src_ts_type = arrow::internal::checked_pointer_cast(array->type()); + if (!src_ts_type->timezone().empty()) { + auto target_type_no_tz = std::make_shared(src_ts_type->unit()); + PAIMON_ASSIGN_OR_RAISE(target_array, CastingUtils::TimestampWithTimezoneToTimestamp( + target_array, target_type_no_tz, pool)); + } + arrow::compute::CastOptions options = arrow::compute::CastOptions::Safe(); + return CastingUtils::Cast(target_array, target_type, options, pool); +} + +} // namespace paimon diff --git a/src/paimon/core/casting/timestamp_to_date_cast_executor.h b/src/paimon/core/casting/timestamp_to_date_cast_executor.h new file mode 100644 index 0000000..f785c05 --- /dev/null +++ b/src/paimon/core/casting/timestamp_to_date_cast_executor.h @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once +#include + +#include "arrow/array/array_base.h" +#include "paimon/core/casting/cast_executor.h" +#include "paimon/predicate/literal.h" +#include "paimon/result.h" + +namespace arrow { +class DataType; +class MemoryPool; +} // namespace arrow + +namespace paimon { +class TimestampToDateCastExecutor : public CastExecutor { + public: + Result Cast(const Literal& literal, + const std::shared_ptr& target_type) const override; + + Result> Cast(const std::shared_ptr& array, + const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const override; +}; + +} // namespace paimon diff --git a/src/paimon/core/casting/timestamp_to_numeric_primitive_cast_executor.cpp b/src/paimon/core/casting/timestamp_to_numeric_primitive_cast_executor.cpp new file mode 100644 index 0000000..c8ba974 --- /dev/null +++ b/src/paimon/core/casting/timestamp_to_numeric_primitive_cast_executor.cpp @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "paimon/core/casting/timestamp_to_numeric_primitive_cast_executor.h" + +#include +#include +#include + +#include "arrow/array/array_base.h" +#include "arrow/array/array_primitive.h" +#include "arrow/array/builder_base.h" +#include "arrow/type.h" +#include "arrow/util/checked_cast.h" +#include "paimon/common/utils/arrow/status_utils.h" +#include "paimon/common/utils/date_time_utils.h" +#include "paimon/common/utils/field_type_utils.h" +#include "paimon/core/casting/casting_utils.h" +#include "paimon/core/casting/timestamp_to_timestamp_cast_executor.h" +#include "paimon/data/timestamp.h" +#include "paimon/defs.h" +#include "paimon/status.h" + +namespace arrow { +class MemoryPool; +template +class NumericBuilder; +} // namespace arrow + +namespace paimon { +Result TimestampToNumericPrimitiveCastExecutor::Cast( + const Literal& literal, const std::shared_ptr& target_type) const { + return Status::Invalid("do not support cast literal from timestamp to numeric primitive"); +} + +Result> TimestampToNumericPrimitiveCastExecutor::Cast( + const std::shared_ptr& array, const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const { + auto timestamp_type = + arrow::internal::checked_pointer_cast(array->type()); + assert(timestamp_type); + assert(target_type->id() == arrow::Type::type::INT32 || + target_type->id() == arrow::Type::type::INT64); + auto timestamp_to_timestamp_cast_executor = + std::make_shared(); + auto target_array = array; + auto timezone = DateTimeUtils::GetLocalTimezoneName(); + auto ts_with_sec_tz = arrow::timestamp(arrow::TimeUnit::SECOND, timezone); + // 1. timestamp array cast to timestamp(second, tz) + PAIMON_ASSIGN_OR_RAISE(target_array, timestamp_to_timestamp_cast_executor->Cast( + target_array, ts_with_sec_tz, pool)); + // 2. timestamp(second, tz) array cast to int32/int64 array, as output integer indicates + // second + PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(target_array, target_array->View(arrow::int64())); + if (target_type->id() == arrow::Type::type::INT32) { + arrow::compute::CastOptions options = arrow::compute::CastOptions::Safe(); + options.allow_int_overflow = true; + PAIMON_ASSIGN_OR_RAISE(target_array, + CastingUtils::Cast(target_array, target_type, options, pool)); + } + return target_array; +} + +} // namespace paimon diff --git a/src/paimon/core/casting/timestamp_to_numeric_primitive_cast_executor.h b/src/paimon/core/casting/timestamp_to_numeric_primitive_cast_executor.h new file mode 100644 index 0000000..a061576 --- /dev/null +++ b/src/paimon/core/casting/timestamp_to_numeric_primitive_cast_executor.h @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once +#include + +#include "arrow/array/array_base.h" +#include "paimon/core/casting/cast_executor.h" +#include "paimon/predicate/literal.h" +#include "paimon/result.h" + +namespace arrow { +class DataType; +class MemoryPool; +} // namespace arrow + +namespace paimon { +class TimestampToNumericPrimitiveCastExecutor : public CastExecutor { + public: + Result Cast(const Literal& literal, + const std::shared_ptr& target_type) const override; + + // output integer array indicates second, when src array has higher precision (e.g., > 0), + // there may be inconsistent with Java Paimon + Result> Cast(const std::shared_ptr& array, + const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const override; +}; + +} // namespace paimon diff --git a/src/paimon/core/casting/timestamp_to_string_cast_executor.cpp b/src/paimon/core/casting/timestamp_to_string_cast_executor.cpp new file mode 100644 index 0000000..7fbdfe2 --- /dev/null +++ b/src/paimon/core/casting/timestamp_to_string_cast_executor.cpp @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "paimon/core/casting/timestamp_to_string_cast_executor.h" + +#include + +#include "arrow/array/array_base.h" +#include "arrow/compute/cast.h" +#include "arrow/type.h" +#include "arrow/util/checked_cast.h" +#include "paimon/core/casting/casting_utils.h" +#include "paimon/status.h" + +namespace arrow { +class MemoryPool; +} // namespace arrow + +namespace paimon { +Result TimestampToStringCastExecutor::Cast( + const Literal& literal, const std::shared_ptr& target_type) const { + return Status::Invalid("do not support cast literal from timestamp to string"); +} + +Result> TimestampToStringCastExecutor::Cast( + const std::shared_ptr& array, const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const { + auto target_array = array; + auto src_ts_type = arrow::internal::checked_pointer_cast(array->type()); + if (!src_ts_type->timezone().empty()) { + auto target_type_no_tz = std::make_shared(src_ts_type->unit()); + PAIMON_ASSIGN_OR_RAISE(target_array, CastingUtils::TimestampWithTimezoneToTimestamp( + target_array, target_type_no_tz, pool)); + } + arrow::compute::CastOptions options = arrow::compute::CastOptions::Safe(); + return CastingUtils::Cast(target_array, target_type, options, pool); +} + +} // namespace paimon diff --git a/src/paimon/core/casting/timestamp_to_string_cast_executor.h b/src/paimon/core/casting/timestamp_to_string_cast_executor.h new file mode 100644 index 0000000..8b72000 --- /dev/null +++ b/src/paimon/core/casting/timestamp_to_string_cast_executor.h @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +#include "arrow/array/array_base.h" +#include "paimon/core/casting/cast_executor.h" +#include "paimon/predicate/literal.h" +#include "paimon/result.h" + +namespace arrow { +class DataType; +class MemoryPool; +} // namespace arrow + +namespace paimon { +class TimestampToStringCastExecutor : public CastExecutor { + public: + Result Cast(const Literal& literal, + const std::shared_ptr& target_type) const override; + + Result> Cast(const std::shared_ptr& array, + const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const override; +}; + +} // namespace paimon diff --git a/src/paimon/core/casting/timestamp_to_timestamp_cast_executor.cpp b/src/paimon/core/casting/timestamp_to_timestamp_cast_executor.cpp new file mode 100644 index 0000000..ff9bdec --- /dev/null +++ b/src/paimon/core/casting/timestamp_to_timestamp_cast_executor.cpp @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "paimon/core/casting/timestamp_to_timestamp_cast_executor.h" + +#include + +#include "paimon/common/utils/date_time_utils.h" +#include "paimon/core/casting/casting_utils.h" + +namespace paimon { +Result TimestampToTimestampCastExecutor::Cast( + const Literal& literal, const std::shared_ptr& target_type) const { + return Status::Invalid("do not support cast literal from timestamp to timestamp"); +} + +Result> TimestampToTimestampCastExecutor::Cast( + const std::shared_ptr& array, const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const { + arrow::compute::CastOptions options = arrow::compute::CastOptions::Safe(); + options.allow_time_truncate = true; + auto src_ts_type = arrow::internal::checked_pointer_cast(array->type()); + auto target_ts_type = arrow::internal::checked_pointer_cast(target_type); + assert(src_ts_type && target_ts_type); + std::shared_ptr target_array = array; + // first, handle timezone + if (src_ts_type->timezone() != target_ts_type->timezone()) { + auto target_type_with_tz = + std::make_shared(src_ts_type->unit(), target_ts_type->timezone()); + if (src_ts_type->timezone().empty() && !target_type_with_tz->timezone().empty()) { + PAIMON_ASSIGN_OR_RAISE(target_array, CastingUtils::TimestampToTimestampWithTimezone( + target_array, target_type_with_tz, pool)); + } else if (!src_ts_type->timezone().empty() && target_type_with_tz->timezone().empty()) { + PAIMON_ASSIGN_OR_RAISE(target_array, CastingUtils::TimestampWithTimezoneToTimestamp( + target_array, target_type_with_tz, pool)); + } else { + // src and target have non-empty different timezone + PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(target_array, + target_array->View(target_type_with_tz)); + } + } + // second, handle precision + if (src_ts_type->unit() != target_ts_type->unit()) { + PAIMON_ASSIGN_OR_RAISE(target_array, + CastingUtils::Cast(target_array, target_type, options, pool)); + } + return target_array; +} +} // namespace paimon diff --git a/src/paimon/core/casting/timestamp_to_timestamp_cast_executor.h b/src/paimon/core/casting/timestamp_to_timestamp_cast_executor.h new file mode 100644 index 0000000..4a2be64 --- /dev/null +++ b/src/paimon/core/casting/timestamp_to_timestamp_cast_executor.h @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +#include "paimon/core/casting/cast_executor.h" + +namespace paimon { +/// support convert between TIMESTAMP and TIMESTAMP_WITH_LOCAL_TIME_ZONE. Check and adjust if there +/// is the precision changes at the same time. +class PAIMON_EXPORT TimestampToTimestampCastExecutor : public CastExecutor { + public: + Result Cast(const Literal& literal, + const std::shared_ptr& target_type) const override; + + Result> Cast(const std::shared_ptr& array, + const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const override; +}; + +} // namespace paimon