Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions src/paimon/core/casting/binary_to_string_cast_executor.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include "paimon/core/casting/binary_to_string_cast_executor.h"

#include <cassert>
#include <string>
#include <utility>

#include "arrow/compute/cast.h"
#include "arrow/type.h"
#include "paimon/common/utils/field_type_utils.h"
#include "paimon/core/casting/casting_utils.h"
#include "paimon/defs.h"

namespace arrow {
class MemoryPool;
class Array;
} // namespace arrow

namespace paimon {
Result<Literal> BinaryToStringCastExecutor::Cast(
const Literal& literal, const std::shared_ptr<arrow::DataType>& target_type) const {
assert(literal.GetType() == FieldType::BINARY);
PAIMON_ASSIGN_OR_RAISE(FieldType target_field_type,
FieldTypeUtils::ConvertToFieldType(target_type->id()));
assert(target_field_type == FieldType::STRING);
if (literal.IsNull()) {
return Literal(target_field_type);
}
auto value = literal.GetValue<std::string>();
return Literal(FieldType::STRING, value.data(), value.size());
}

Result<std::shared_ptr<arrow::Array>> BinaryToStringCastExecutor::Cast(
const std::shared_ptr<arrow::Array>& array, const std::shared_ptr<arrow::DataType>& target_type,
arrow::MemoryPool* pool) const {
arrow::compute::CastOptions options = arrow::compute::CastOptions::Safe();
options.allow_invalid_utf8 = true;
return CastingUtils::Cast(array, target_type, options, pool);
}

} // namespace paimon
44 changes: 44 additions & 0 deletions src/paimon/core/casting/binary_to_string_cast_executor.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#pragma once
#include <memory>

#include "arrow/array/array_base.h"
#include "paimon/core/casting/cast_executor.h"
#include "paimon/predicate/literal.h"
#include "paimon/result.h"

namespace arrow {
class DataType;
class MemoryPool;
} // namespace arrow

namespace paimon {
class BinaryToStringCastExecutor : public CastExecutor {
public:
Result<Literal> Cast(const Literal& literal,
const std::shared_ptr<arrow::DataType>& target_type) const override;

Result<std::shared_ptr<arrow::Array>> Cast(const std::shared_ptr<arrow::Array>& array,
const std::shared_ptr<arrow::DataType>& target_type,
arrow::MemoryPool* pool) const override;
};

} // namespace paimon
53 changes: 53 additions & 0 deletions src/paimon/core/casting/date_to_string_cast_executor.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include "paimon/core/casting/date_to_string_cast_executor.h"

#include <cassert>
#include <cstdint>
#include <string>

#include "arrow/compute/cast.h"
#include "arrow/scalar.h"
#include "arrow/type.h"
#include "paimon/core/casting/casting_utils.h"
#include "paimon/defs.h"

namespace arrow {
class MemoryPool;
class Array;
} // namespace arrow

namespace paimon {
Result<Literal> DateToStringCastExecutor::Cast(
const Literal& literal, const std::shared_ptr<arrow::DataType>& target_type) const {
assert(target_type->id() == arrow::Type::type::STRING);
assert(literal.GetType() == FieldType::DATE);
return CastingUtils::Cast<arrow::Date32Scalar, int32_t, arrow::StringScalar, std::string>(
literal, arrow::date32(), target_type, arrow::compute::CastOptions::Safe());
}

Result<std::shared_ptr<arrow::Array>> DateToStringCastExecutor::Cast(
const std::shared_ptr<arrow::Array>& array, const std::shared_ptr<arrow::DataType>& target_type,
arrow::MemoryPool* pool) const {
arrow::compute::CastOptions options = arrow::compute::CastOptions::Safe();
return CastingUtils::Cast(array, target_type, options, pool);
}

} // namespace paimon
45 changes: 45 additions & 0 deletions src/paimon/core/casting/date_to_string_cast_executor.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#pragma once

#include <memory>

#include "arrow/array/array_base.h"
#include "paimon/core/casting/cast_executor.h"
#include "paimon/predicate/literal.h"
#include "paimon/result.h"

namespace arrow {
class DataType;
class MemoryPool;
} // namespace arrow

namespace paimon {
class DateToStringCastExecutor : public CastExecutor {
public:
Result<Literal> Cast(const Literal& literal,
const std::shared_ptr<arrow::DataType>& target_type) const override;

Result<std::shared_ptr<arrow::Array>> Cast(const std::shared_ptr<arrow::Array>& array,
const std::shared_ptr<arrow::DataType>& target_type,
arrow::MemoryPool* pool) const override;
};

} // namespace paimon
61 changes: 61 additions & 0 deletions src/paimon/core/casting/date_to_timestamp_cast_executor.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include "paimon/core/casting/date_to_timestamp_cast_executor.h"

#include <cassert>
#include <string>
#include <utility>

#include "arrow/compute/cast.h"
#include "arrow/type.h"
#include "arrow/util/checked_cast.h"
#include "paimon/core/casting/casting_utils.h"
#include "paimon/status.h"

namespace arrow {
class MemoryPool;
class Array;
} // namespace arrow

namespace paimon {

Result<Literal> DateToTimestampCastExecutor::Cast(
const Literal& literal, const std::shared_ptr<arrow::DataType>& target_type) const {
return Status::Invalid("do not support cast literal from date to timestamp");
}

Result<std::shared_ptr<arrow::Array>> DateToTimestampCastExecutor::Cast(
const std::shared_ptr<arrow::Array>& array, const std::shared_ptr<arrow::DataType>& target_type,
arrow::MemoryPool* pool) const {
auto target_ts_type = arrow::internal::checked_pointer_cast<arrow::TimestampType>(target_type);
assert(target_ts_type);
arrow::compute::CastOptions options = arrow::compute::CastOptions::Safe();

auto target_ts_type_no_tz = arrow::timestamp(target_ts_type->unit());
PAIMON_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Array> target_array,
CastingUtils::Cast(array, target_ts_type_no_tz, options, pool));
if (target_ts_type->timezone().empty()) {
return target_array;
}
// handle timezone
return CastingUtils::TimestampToTimestampWithTimezone(target_array, target_ts_type, pool);
}

} // namespace paimon
48 changes: 48 additions & 0 deletions src/paimon/core/casting/date_to_timestamp_cast_executor.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#pragma once

#include <memory>

#include "arrow/array/array_base.h"
#include "paimon/core/casting/cast_executor.h"
#include "paimon/predicate/literal.h"
#include "paimon/result.h"

namespace arrow {
class DataType;
class MemoryPool;
} // namespace arrow

namespace paimon {
class DateToTimestampCastExecutor : public CastExecutor {
public:
Result<Literal> Cast(const Literal& literal,
const std::shared_ptr<arrow::DataType>& target_type) const override;

// Noted that: Java Paimon supports date values ranging from 0000-01-01 to 9999-12-31, while C++
// Paimon only supports from (min_int64/NANOS_PER_DAY) to (max_int64/NANOS_PER_DAY) while target
// type is NANO unit.
Result<std::shared_ptr<arrow::Array>> Cast(const std::shared_ptr<arrow::Array>& array,
const std::shared_ptr<arrow::DataType>& target_type,
arrow::MemoryPool* pool) const override;
};

} // namespace paimon
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include "paimon/core/casting/numeric_primitive_to_timestamp_cast_executor.h"

#include <cassert>
#include <cstdint>
#include <limits>
#include <utility>

#include "arrow/array/array_base.h"
#include "arrow/array/builder_dict.h"
#include "arrow/type.h"
#include "arrow/util/checked_cast.h"
#include "fmt/format.h"
#include "paimon/common/utils/arrow/status_utils.h"
#include "paimon/common/utils/date_time_utils.h"
#include "paimon/core/casting/casting_utils.h"
#include "paimon/core/casting/timestamp_to_timestamp_cast_executor.h"
#include "paimon/data/timestamp.h"
#include "paimon/defs.h"
#include "paimon/status.h"

namespace arrow {
class MemoryPool;
template <typename TYPE>
class NumericArray;
} // namespace arrow

namespace paimon {
Result<Literal> NumericPrimitiveToTimestampCastExecutor::Cast(
const Literal& literal, const std::shared_ptr<arrow::DataType>& target_type) const {
return Status::Invalid("do not support cast literal from numeric primitive to timestamp");
}

Result<std::shared_ptr<arrow::Array>> NumericPrimitiveToTimestampCastExecutor::Cast(
const std::shared_ptr<arrow::Array>& array, const std::shared_ptr<arrow::DataType>& target_type,
arrow::MemoryPool* pool) const {
auto src_type = array->type();
assert(src_type->id() == arrow::Type::type::INT32 ||
src_type->id() == arrow::Type::type::INT64);
auto target_array = array;
// 1. int32/int64 array to int64 array
if (src_type->id() == arrow::Type::type::INT32) {
arrow::compute::CastOptions options = arrow::compute::CastOptions::Safe();
PAIMON_ASSIGN_OR_RAISE(target_array,
CastingUtils::Cast(target_array, arrow::int64(), options, pool));
}
// 2. int64 array to timestamp(second, tz) array
auto timezone = DateTimeUtils::GetLocalTimezoneName();
auto ts_second_tz = arrow::timestamp(arrow::TimeUnit::SECOND, timezone);
PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(target_array, target_array->View(ts_second_tz));
// 3. timestamp(second, tz) array to target ts array
auto timestamp_to_timestamp_cast_executor =
std::make_shared<TimestampToTimestampCastExecutor>();
PAIMON_ASSIGN_OR_RAISE(
target_array, timestamp_to_timestamp_cast_executor->Cast(target_array, target_type, pool));
return target_array;
}

} // namespace paimon
Loading