diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/tests/gtest_iceberg_path_resolver.cpp b/src/Storages/ObjectStorage/DataLakes/Iceberg/tests/gtest_iceberg_path_resolver.cpp new file mode 100644 index 000000000000..9c4adc113eab --- /dev/null +++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/tests/gtest_iceberg_path_resolver.cpp @@ -0,0 +1,41 @@ +#include + +#include + +using namespace DB::Iceberg; + +TEST(GetProperFilePathFromMetadataInfo, S3SchemePreservesPercentEncodedSlash) +{ + auto result = getProperFilePathFromMetadataInfo( + "s3://bucket/warehouse/data/partition=us%2Fwest/file.parquet", + "warehouse", + "s3://bucket/warehouse"); + ASSERT_EQ(result, "warehouse/data/partition=us%2Fwest/file.parquet"); +} + +TEST(GetProperFilePathFromMetadataInfo, SimpleKeyWithoutEncoding) +{ + auto result = getProperFilePathFromMetadataInfo( + "s3://bucket/warehouse/data/file.parquet", + "warehouse", + "s3://bucket/warehouse"); + ASSERT_EQ(result, "warehouse/data/file.parquet"); +} + +TEST(GetProperFilePathFromMetadataInfo, MultiplePercentEncodedSegments) +{ + auto result = getProperFilePathFromMetadataInfo( + "s3://bucket/warehouse/data/region=us%2Fwest/city=san%20francisco/file.parquet", + "warehouse", + "s3://bucket/warehouse"); + ASSERT_EQ(result, "warehouse/data/region=us%2Fwest/city=san%20francisco/file.parquet"); +} + +TEST(GetProperFilePathFromMetadataInfo, HttpSchemePreservesPercentEncodedSlash) +{ + auto result = getProperFilePathFromMetadataInfo( + "http://minio:9000/bucket/warehouse/data/partition=us%2Fwest/file.parquet", + "warehouse", + "http://minio:9000/bucket/warehouse"); + ASSERT_EQ(result, "warehouse/data/partition=us%2Fwest/file.parquet"); +} diff --git a/tests/integration/test_database_iceberg/test.py b/tests/integration/test_database_iceberg/test.py index 167b7d22708c..4b7b2814362f 100644 --- a/tests/integration/test_database_iceberg/test.py +++ b/tests/integration/test_database_iceberg/test.py @@ -679,6 +679,50 @@ def test_table_with_slash(started_cluster): assert node.query(f"SELECT * FROM {CATALOG_NAME}.`{root_namespace}.{table_encoded_name}`") == "\\N\tAAPL\t193.24\t193.31\t('bot')\n" +def test_partition_value_with_slash(started_cluster): + """Partition value containing '/' produces object keys with %2F; reading must preserve encoding.""" + node = started_cluster.instances["node1"] + + test_ref = f"test_partition_slash_{uuid.uuid4()}" + table_name = f"{test_ref}_table" + root_namespace = f"{test_ref}_namespace" + + partition_spec = PartitionSpec( + PartitionField( + source_id=2, field_id=1000, transform=IdentityTransform(), name="symbol" + ) + ) + schema = DEFAULT_SCHEMA + + catalog = load_catalog_impl(started_cluster) + catalog.create_namespace(root_namespace) + + table = create_table( + catalog, + root_namespace, + table_name, + schema, + partition_spec=partition_spec, + sort_order=DEFAULT_SORT_ORDER, + ) + + data = [ + { + "datetime": datetime.now(), + "symbol": "us/west", + "bid": 100.0, + "ask": 101.0, + "details": {"created_by": "test"}, + } + ] + df = pa.Table.from_pylist(data) + table.append(df) + + create_clickhouse_iceberg_database(started_cluster, node, CATALOG_NAME) + assert 1 == int(node.query(f"SELECT count() FROM {CATALOG_NAME}.`{root_namespace}.{table_name}`")) + assert "us/west" in node.query(f"SELECT symbol FROM {CATALOG_NAME}.`{root_namespace}.{table_name}`") + + def test_cluster_select(started_cluster): node1 = started_cluster.instances["node1"] node2 = started_cluster.instances["node2"]