Skip to content

Commit a4e08f9

Browse files
committed
Fix partition overwrite predicate recursion
1 parent 0826d3e commit a4e08f9

2 files changed

Lines changed: 36 additions & 8 deletions

File tree

pyiceberg/table/__init__.py

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -368,19 +368,29 @@ def _build_partition_predicate(
368368
"""
369369
partition_fields = [schema.find_field(field.source_id).name for field in spec.fields]
370370

371-
expr: BooleanExpression = AlwaysFalse()
371+
match_partition_expressions = []
372372
for partition_record in partition_records:
373-
match_partition_expression: BooleanExpression = AlwaysTrue()
374-
375-
for pos, partition_field in enumerate(partition_fields):
376-
predicate = (
373+
predicates = [
374+
(
377375
EqualTo(Reference(partition_field), partition_record[pos])
378376
if partition_record[pos] is not None
379377
else IsNull(Reference(partition_field))
380378
)
381-
match_partition_expression = And(match_partition_expression, predicate)
382-
expr = Or(expr, match_partition_expression)
383-
return expr
379+
for pos, partition_field in enumerate(partition_fields)
380+
]
381+
382+
if not predicates:
383+
match_partition_expressions.append(AlwaysTrue())
384+
elif len(predicates) == 1:
385+
match_partition_expressions.append(predicates[0])
386+
else:
387+
match_partition_expressions.append(And(*predicates))
388+
389+
if not match_partition_expressions:
390+
return AlwaysFalse()
391+
if len(match_partition_expressions) == 1:
392+
return match_partition_expressions[0]
393+
return Or(*match_partition_expressions)
384394

385395
def _append_snapshot_producer(
386396
self, snapshot_properties: dict[str, str], branch: str | None = MAIN_BRANCH

tests/table/test_init.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
EqualTo,
3333
In,
3434
)
35+
from pyiceberg.expressions.visitors import bind
3536
from pyiceberg.io import PY_IO_IMPL, load_file_io
3637
from pyiceberg.partitioning import PartitionField, PartitionSpec
3738
from pyiceberg.schema import Schema
@@ -90,6 +91,7 @@
9091
BucketTransform,
9192
IdentityTransform,
9293
)
94+
from pyiceberg.typedef import Record
9395
from pyiceberg.types import (
9496
BinaryType,
9597
BooleanType,
@@ -788,6 +790,22 @@ def test_apply_add_schema_update(table_v2: Table) -> None:
788790
assert test_context.is_added_schema(2)
789791

790792

793+
def test_build_partition_predicate_binds_many_partitions_without_recursion(table_v2: Table) -> None:
794+
schema = Schema(NestedField(field_id=1, name="date", field_type=StringType(), required=False))
795+
partition_spec = PartitionSpec(
796+
PartitionField(source_id=1, field_id=1000, transform=IdentityTransform(), name="date"),
797+
)
798+
partition_records = {Record(f"2026-02-{partition_idx:04d}T00") for partition_idx in range(512)}
799+
800+
predicate = table_v2.transaction()._build_partition_predicate( # pylint: disable=W0212
801+
partition_records=partition_records,
802+
spec=partition_spec,
803+
schema=schema,
804+
)
805+
806+
assert bind(schema, predicate, case_sensitive=True)
807+
808+
791809
def test_update_metadata_table_schema(table_v2: Table) -> None:
792810
transaction = table_v2.transaction()
793811
update = transaction.update_schema()

0 commit comments

Comments
 (0)