Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
- **Compute / DataFrame join helper ownership moved from GFQL-local staging to shared dataframe namespace (#1380)**: Connected-join and same-path semijoin helper families now live under `graphistry/compute/dataframe/join.py` (exported via `graphistry/compute/dataframe/__init__.py`) rather than GFQL-local `dfops`/`same_path` helper ownership. Runtime call-sites were repointed (including `gfql_unified` connected join and same-path consumers) while preserving pandas/cuDF behavior.

### Tests
- **GFQL / native chain reply-author row-shaping locks (#1412, #880)**: Added native GFQL `rows()` + explicit `rows(binding_ops=...)` regression coverage for the SNB IC8 `recent-replies` and IS7 `message-replies` reply-author projection shapes, locking the pygraphistry-side behavior needed to retire adapter-local reply-author joins in benchmark coverage.
- **GFQL / Cypher two-MATCH reentry varlen regression hardening (#1001)**: Strengthened reentry varlen acceptance assertions from shape-only checks to exact expected rows, and added forward/reverse split-vs-connected query equivalence regressions to guard against wrong-row drift in the `match5-25/26` query family.
- **GFQL / Cypher reentry ordered-top-k amplification (#1342, #880 partial)**: Added lowering regressions for MATCH-after-WITH re-entry with single-column and multi-column ordered top-k prefixes, carried-scalar top-k alignment, `LIMIT 0` empty-prefix behavior, `SKIP` failfast retention, plus cuDF parity coverage for the multi-row top-k lane.
- **GFQL / Cypher tag-cooccurrence join+aggregation cardinality amplification (#1396, #880 residual lane)**: Added focused IC6-shape regression coverage for `collect(distinct friend) -> UNWIND -> connected comma MATCH -> WITH tag.name, count(post)` with non-trivial grouped counts (`Alpha=2`, `Beta=1`) plus cuDF parity guard, so the residual tag-cooccurrence join-aggregation lane is pinned without adapter-side workaround assumptions.
Expand Down
129 changes: 129 additions & 0 deletions graphistry/tests/test_compute_chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -974,6 +974,48 @@ def _mk_recent_message_reentry_graph(self):
),
)

def _mk_issue_1412_reply_author_graph(self):
return self._mk_graph(
pd.DataFrame(
{
"id": [
"viewer",
"m1",
"m2",
"c1",
"c2",
"c3",
"message_author",
"reply_author",
"author2",
],
"label__Person": [True, False, False, False, False, False, True, True, True],
"label__Message": [False, True, True, True, True, True, False, False, False],
"label__Comment": [False, False, False, True, True, True, False, False, False],
"firstName": ["View", None, None, None, None, None, "Main", "Peer", "Bob"],
"lastName": ["Er", None, None, None, None, None, "Author", "One", "Two"],
"creationDate": [None, 100, 90, 20, 10, 80, None, None, None],
"content": [None, "post-1", "post-2", "reply-from-peer", "reply-from-main", "old-reply", None, None, None],
}
),
pd.DataFrame(
{
"s": ["m1", "m2", "c1", "c1", "c2", "c2", "c3", "c3"],
"d": ["viewer", "viewer", "m1", "reply_author", "m1", "message_author", "m2", "author2"],
"type": [
"HAS_CREATOR",
"HAS_CREATOR",
"REPLY_OF",
"HAS_CREATOR",
"REPLY_OF",
"HAS_CREATOR",
"REPLY_OF",
"HAS_CREATOR",
],
}
),
)

def _recent_message_zero_hop_match_ops(self):
return [
n({"id": is_in(["post2", "comment1"]), "label__Message": True}, name="message"),
Expand Down Expand Up @@ -1030,6 +1072,93 @@ def test_native_chain_rows_bindings_with_select(self):
assert records[0]["x_val"] == 1
assert records[0]["y_val"] == 2

def test_issue_1412_native_chain_recent_replies_row_shaping_ic8(self):
"""IC8: direct native GFQL rows() replaces the adapter reply-author join."""
g = self._mk_issue_1412_reply_author_graph()
match_ops = [
n({"id": "viewer", "label__Person": True}, name="start"),
e_reverse({"type": "HAS_CREATOR"}),
n({"label__Message": True}, name="message"),
e_reverse({"type": "REPLY_OF"}),
n({"label__Comment": True}, name="comment"),
e_forward({"type": "HAS_CREATOR"}),
n({"label__Person": True}, name="commentAuthor"),
]
items = [
("personId", "commentAuthor.id"),
("personFirstName", "commentAuthor.firstName"),
("personLastName", "commentAuthor.lastName"),
("commentCreationDate", "comment.creationDate"),
("commentId", "comment.id"),
("commentContent", "comment.content"),
]
expected = [
{
"personId": "reply_author",
"personFirstName": "Peer",
"personLastName": "One",
"commentCreationDate": 20.0,
"commentId": "c1",
"commentContent": "reply-from-peer",
},
{
"personId": "message_author",
"personFirstName": "Main",
"personLastName": "Author",
"commentCreationDate": 10.0,
"commentId": "c2",
"commentContent": "reply-from-main",
},
{
"personId": "author2",
"personFirstName": "Bob",
"personLastName": "Two",
"commentCreationDate": 80.0,
"commentId": "c3",
"commentContent": "old-reply",
},
]
sort_by = ["commentCreationDate", "commentId"]
expected_by_sort = sorted(expected, key=lambda row: (row["commentCreationDate"], row["commentId"]))
assert self._rows_records(g, match_ops, items=items, sort_by=sort_by) == expected_by_sort
assert self._binding_rows_records(g, self._to_binding_ops(match_ops), items, sort_by=sort_by) == expected_by_sort

def test_issue_1412_native_chain_message_replies_row_shaping_is7(self):
"""IS7: direct native GFQL rows() keeps reply and message authors aligned."""
g = self._mk_issue_1412_reply_author_graph()
match_ops = [
n({"id": "reply_author", "label__Person": True}, name="replyAuthor"),
e_reverse({"type": "HAS_CREATOR"}),
n({"label__Comment": True}, name="comment"),
e_forward({"type": "REPLY_OF"}),
n({"id": "m1", "label__Message": True}, name="message"),
e_forward({"type": "HAS_CREATOR"}),
n({"label__Person": True}, name="messageAuthor"),
]
items = [
("commentId", "comment.id"),
("commentContent", "comment.content"),
("commentCreationDate", "comment.creationDate"),
("replyAuthorId", "replyAuthor.id"),
("replyAuthorFirstName", "replyAuthor.firstName"),
("replyAuthorLastName", "replyAuthor.lastName"),
("messageAuthorId", "messageAuthor.id"),
]
expected = [
{
"commentId": "c1",
"commentContent": "reply-from-peer",
"commentCreationDate": 20.0,
"replyAuthorId": "reply_author",
"replyAuthorFirstName": "Peer",
"replyAuthorLastName": "One",
"messageAuthorId": "viewer",
}
]
sort_by = ["commentCreationDate", "replyAuthorId"]
assert self._rows_records(g, match_ops, items=items, sort_by=sort_by) == expected
assert self._binding_rows_records(g, self._to_binding_ops(match_ops), items, sort_by=sort_by) == expected

def test_native_chain_rows_bindings_star_graph(self):
"""Star graph: 1 hub -> 3 leaves produces 3 binding rows."""
g = self._mk_graph(
Expand Down
Loading