Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm

### Internal
- **GFQL / Cypher bounded reentry ORDER BY+LIMIT preservation across MATCH re-entry (#1342)**: Relaxed bounded reentry admission so `WITH ... ORDER BY ... LIMIT <literal>` (no `SKIP`) can flow into trailing `MATCH` re-entry for multi-row prefixes when the trailing query does not define its own `ORDER BY`. Added regression coverage for single-column, multi-column, and DESC ordering with LIMIT across re-entry, plus cuDF parity, and narrowed fail-fast coverage to unsupported unbounded/SKIP shapes.
- **GFQL / Cypher bounded re-entry ORDER BY safety now resolves parameterized LIMIT values (#1349)**: Re-entry prefix order safety now treats `LIMIT $n` as bounded when query params provide an integer (including `0`), instead of rejecting all `ParameterRef` limits as unknown. This preserves fail-fast behavior for unresolved/non-integer limits and `SKIP`-based shapes while unlocking vectorized `MATCH ... WITH ... ORDER BY ... LIMIT $n MATCH ...` execution on both pandas and cuDF backends. Added targeted regressions for pandas + cuDF success and non-integer-parameter rejection in `graphistry/tests/compute/gfql/cypher/test_lowering.py`.
- **GFQL / Cypher row-carrier plan metadata hardening (#989)**: Reentry compile-time planning now records per-alias property dependencies in `ReentryPlan.aliases[*].carried_properties` (including secondary-alias demotion and free-form bridge carries), so non-source row-carry semantics are explicit in the plan contract instead of inferred from rewrites. Added compile-contract coverage for both carried-alias and free-form lanes.
- **GFQL / Cypher bounded-reentry runtime extraction (#987 Step 3)**: Moved bounded-reentry data-frame execution helpers (`_compiled_query_reentry_state`, `_compiled_query_scalar_reentry_state`, `_compiled_query_freeform_reentry_state`, `_freeform_broadcast_row_to_nodes`, `_union_scalar_reentry_results`, `_apply_optional_reentry_null_fill`, `_aligned_reentry_rows`, `_reentry_carry_payload`, `_ordered_reentry_start_nodes`, `_reentry_validation_error`, the two suggestion constants) out of `graphistry/compute/gfql_unified.py` into a new `graphistry/compute/gfql/cypher/reentry/execution.py` module so the bounded-reentry contract assembled at compile time (`ReentryPlan`) and the matching data-frame stitching live next to each other. `_entity_projection_meta_entry` moved to `graphistry/compute/gfql/cypher/result_postprocess.py` next to `WholeRowProjectionMeta` since it is shared between the connected-OPTIONAL-MATCH and bounded-reentry paths. Pure-move refactor — no semantic change; `gfql_unified.py` shrinks by ~440 LOC and now re-exports the moved private names via aliased imports so existing tests reaching into `graphistry.compute.gfql_unified._compiled_query_reentry_state` continue to work.
- **GFQL / Cypher reentry compile-time module naming cleanup (#1333)**: Renamed bounded-reentry compile-time helper ownership from `graphistry/compute/gfql/cypher/reentry/runtime.py` to `graphistry/compute/gfql/cypher/reentry/compiletime.py` to better align naming with actual responsibilities (`execution.py` remains runtime data-frame behavior). Kept `reentry/runtime.py` as a compatibility re-export shim so existing imports continue to work unchanged.
Expand Down
44 changes: 37 additions & 7 deletions graphistry/compute/gfql/cypher/reentry/carry.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,11 @@
from __future__ import annotations

import re
from typing import TYPE_CHECKING, Optional, Sequence, Tuple
from typing import TYPE_CHECKING, Mapping, Optional, Sequence, Tuple

from graphistry.compute.gfql.cypher.ast import (
CypherQuery,
ExpressionText,
LimitClause,
ParameterRef,
ProjectionStage,
Expand All @@ -29,6 +30,7 @@
"_bounded_reentry_scalar_prefix_columns",
"_bounded_reentry_prefix_order_is_safe",
"_literal_limit_value",
"_resolved_limit_value",
]


Expand Down Expand Up @@ -146,18 +148,46 @@ def _literal_limit_value(limit_clause: Optional[LimitClause]) -> Optional[int]:
return int(text)


def _resolved_limit_value(
limit_clause: Optional[LimitClause],
*,
params: Optional[Mapping[str, object]] = None,
) -> Optional[int]:
value = _literal_limit_value(limit_clause)
if value is not None:
return value
if limit_clause is None:
return None
raw = limit_clause.value
param_name: Optional[str]
if isinstance(raw, ParameterRef):
param_name = raw.name
elif isinstance(raw, ExpressionText):
match = re.fullmatch(r"\$([A-Za-z_][A-Za-z0-9_]*)", raw.text.strip())
if match is None:
return None
param_name = match.group(1)
else:
return None
if params is None or param_name not in params:
return None
param_value = params[param_name]
if isinstance(param_value, bool):
return None
if not isinstance(param_value, int):
return None
return param_value


def _bounded_reentry_prefix_order_is_safe(
*,
prefix_stage: ProjectionStage,
query: CypherQuery,
params: Optional[Mapping[str, object]] = None,
) -> bool:
if prefix_stage.order_by is None:
return True
if query.order_by is not None:
return True
if prefix_stage.skip is not None:
return False
limit_value = _literal_limit_value(prefix_stage.limit)
if limit_value is None:
return False
return limit_value >= 0
resolved_limit = _resolved_limit_value(prefix_stage.limit, params=params)
return prefix_stage.skip is None and resolved_limit is not None and resolved_limit >= 0
2 changes: 1 addition & 1 deletion graphistry/compute/gfql/cypher/reentry/compiletime.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ def _compile_bounded_reentry_query(
merged = set(non_source_carried_props_map.get(alias_name, ()))
merged.update(props)
non_source_carried_props_map[alias_name] = tuple(sorted(merged))
if not _bounded_reentry_prefix_order_is_safe(prefix_stage=prefix_stage, query=query):
if not _bounded_reentry_prefix_order_is_safe(prefix_stage=prefix_stage, query=query, params=params):
raise _unsupported(
"Cypher MATCH after WITH requires bounded literal LIMIT (and no SKIP) to preserve prefix WITH row ordering across MATCH re-entry when the trailing query has no ORDER BY",
field="with.order_by",
Expand Down
59 changes: 53 additions & 6 deletions graphistry/tests/compute/gfql/cypher/test_lowering.py
Original file line number Diff line number Diff line change
Expand Up @@ -8144,8 +8144,24 @@ def test_string_cypher_executes_with_match_reentry_ordered_limit_zero_shape() ->
assert result._nodes.to_dict(orient="records") == []


def test_string_cypher_rejects_reentry_with_parameterized_limit_and_order() -> None:
"""Regression for 992f2fc1: ParameterRef in LIMIT must not crash _literal_limit_value."""
def test_string_cypher_executes_with_match_reentry_parameterized_limit_shape() -> None:
"""Parameterized LIMIT should be treated as bounded when params resolve to int."""
nodes = pd.DataFrame(
{
"id": ["a1", "a2", "b1"],
"label__A": [True, True, False],
"name": ["alpha", "beta", None],
}
)
edges = pd.DataFrame({"s": ["a1", "a2"], "d": ["b1", "b1"]})
result = _mk_graph(nodes, edges).gfql(
"MATCH (a:A) WITH a ORDER BY a.name LIMIT $n MATCH (a)-->(b) RETURN a",
params={"n": 1},
)
assert result._nodes.to_dict(orient="records") == [{"a": "(:A {name: 'alpha'})"}]


def test_string_cypher_rejects_reentry_with_parameterized_non_int_limit_and_order() -> None:
nodes = pd.DataFrame(
{
"id": ["a1", "a2", "b1"],
Expand All @@ -8157,13 +8173,13 @@ def test_string_cypher_rejects_reentry_with_parameterized_limit_and_order() -> N
with pytest.raises(GFQLValidationError) as exc_info:
_mk_graph(nodes, edges).gfql(
"MATCH (a:A) WITH a ORDER BY a.name LIMIT $n MATCH (a)-->(b) RETURN a",
params={"n": 1},
params={"n": "1"},
)
assert "order" in exc_info.value.message.lower()
assert "integer" in exc_info.value.message.lower()


def test_string_cypher_executes_with_match_reentry_limit_shape_on_cudf() -> None:
cudf = pytest.importorskip("cudf")
cudf = _require_cudf_runtime()

nodes = cudf.from_pandas(
pd.DataFrame(
Expand Down Expand Up @@ -8193,7 +8209,7 @@ def test_string_cypher_executes_with_match_reentry_limit_shape_on_cudf() -> None


def test_string_cypher_executes_with_match_reentry_ordered_topk_multi_row_shape_on_cudf() -> None:
cudf = pytest.importorskip("cudf")
cudf = _require_cudf_runtime()

nodes = cudf.from_pandas(
pd.DataFrame(
Expand Down Expand Up @@ -8232,6 +8248,37 @@ def test_string_cypher_executes_with_match_reentry_ordered_topk_multi_row_shape_
]


def test_string_cypher_executes_with_match_reentry_parameterized_limit_shape_on_cudf() -> None:
cudf = _require_cudf_runtime()

nodes = cudf.from_pandas(
pd.DataFrame(
{
"id": ["a1", "a2", "b1"],
"label__A": [True, True, False],
"name": ["alpha", "beta", None],
}
)
)
edges = cudf.from_pandas(
pd.DataFrame(
{
"s": ["a1", "a2"],
"d": ["b1", "b1"],
}
)
)

result = _mk_graph(nodes, edges).gfql(
"MATCH (a:A) WITH a ORDER BY a.name LIMIT $n MATCH (a)-->(b) RETURN a",
params={"n": 1},
engine="cudf",
)

assert type(result._nodes).__module__.startswith("cudf")
assert result._nodes.to_pandas().to_dict(orient="records") == [{"a": "(:A {name: 'alpha'})"}]


def test_string_cypher_failfast_rejects_with_match_reentry_ordered_skip_shape() -> None:
with pytest.raises(GFQLValidationError, match="preserve prefix WITH row ordering"):
_mk_reentry_carried_scalar_graph().gfql(
Expand Down
Loading