diff --git a/CHANGELOG.md b/CHANGELOG.md index 277f98af21..cb094d12c1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -54,6 +54,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm - **Compute / DataFrame join helper ownership moved from GFQL-local staging to shared dataframe namespace (#1380)**: Connected-join and same-path semijoin helper families now live under `graphistry/compute/dataframe/join.py` (exported via `graphistry/compute/dataframe/__init__.py`) rather than GFQL-local `dfops`/`same_path` helper ownership. Runtime call-sites were repointed (including `gfql_unified` connected join and same-path consumers) while preserving pandas/cuDF behavior. ### Tests +- **GFQL / native chain reply-author row-shaping locks (#1412, #880)**: Added native GFQL `rows()` + explicit `rows(binding_ops=...)` regression coverage for the SNB IC8 `recent-replies` and IS7 `message-replies` reply-author projection shapes, locking the pygraphistry-side behavior needed to retire adapter-local reply-author joins in benchmark coverage. - **GFQL / Cypher two-MATCH reentry varlen regression hardening (#1001)**: Strengthened reentry varlen acceptance assertions from shape-only checks to exact expected rows, and added forward/reverse split-vs-connected query equivalence regressions to guard against wrong-row drift in the `match5-25/26` query family. - **GFQL / Cypher reentry ordered-top-k amplification (#1342, #880 partial)**: Added lowering regressions for MATCH-after-WITH re-entry with single-column and multi-column ordered top-k prefixes, carried-scalar top-k alignment, `LIMIT 0` empty-prefix behavior, `SKIP` failfast retention, plus cuDF parity coverage for the multi-row top-k lane. - **GFQL / Cypher tag-cooccurrence join+aggregation cardinality amplification (#1396, #880 residual lane)**: Added focused IC6-shape regression coverage for `collect(distinct friend) -> UNWIND -> connected comma MATCH -> WITH tag.name, count(post)` with non-trivial grouped counts (`Alpha=2`, `Beta=1`) plus cuDF parity guard, so the residual tag-cooccurrence join-aggregation lane is pinned without adapter-side workaround assumptions. diff --git a/graphistry/tests/test_compute_chain.py b/graphistry/tests/test_compute_chain.py index 58ea85c1b4..e4c719a8c8 100644 --- a/graphistry/tests/test_compute_chain.py +++ b/graphistry/tests/test_compute_chain.py @@ -974,6 +974,48 @@ def _mk_recent_message_reentry_graph(self): ), ) + def _mk_issue_1412_reply_author_graph(self): + return self._mk_graph( + pd.DataFrame( + { + "id": [ + "viewer", + "m1", + "m2", + "c1", + "c2", + "c3", + "message_author", + "reply_author", + "author2", + ], + "label__Person": [True, False, False, False, False, False, True, True, True], + "label__Message": [False, True, True, True, True, True, False, False, False], + "label__Comment": [False, False, False, True, True, True, False, False, False], + "firstName": ["View", None, None, None, None, None, "Main", "Peer", "Bob"], + "lastName": ["Er", None, None, None, None, None, "Author", "One", "Two"], + "creationDate": [None, 100, 90, 20, 10, 80, None, None, None], + "content": [None, "post-1", "post-2", "reply-from-peer", "reply-from-main", "old-reply", None, None, None], + } + ), + pd.DataFrame( + { + "s": ["m1", "m2", "c1", "c1", "c2", "c2", "c3", "c3"], + "d": ["viewer", "viewer", "m1", "reply_author", "m1", "message_author", "m2", "author2"], + "type": [ + "HAS_CREATOR", + "HAS_CREATOR", + "REPLY_OF", + "HAS_CREATOR", + "REPLY_OF", + "HAS_CREATOR", + "REPLY_OF", + "HAS_CREATOR", + ], + } + ), + ) + def _recent_message_zero_hop_match_ops(self): return [ n({"id": is_in(["post2", "comment1"]), "label__Message": True}, name="message"), @@ -1030,6 +1072,93 @@ def test_native_chain_rows_bindings_with_select(self): assert records[0]["x_val"] == 1 assert records[0]["y_val"] == 2 + def test_issue_1412_native_chain_recent_replies_row_shaping_ic8(self): + """IC8: direct native GFQL rows() replaces the adapter reply-author join.""" + g = self._mk_issue_1412_reply_author_graph() + match_ops = [ + n({"id": "viewer", "label__Person": True}, name="start"), + e_reverse({"type": "HAS_CREATOR"}), + n({"label__Message": True}, name="message"), + e_reverse({"type": "REPLY_OF"}), + n({"label__Comment": True}, name="comment"), + e_forward({"type": "HAS_CREATOR"}), + n({"label__Person": True}, name="commentAuthor"), + ] + items = [ + ("personId", "commentAuthor.id"), + ("personFirstName", "commentAuthor.firstName"), + ("personLastName", "commentAuthor.lastName"), + ("commentCreationDate", "comment.creationDate"), + ("commentId", "comment.id"), + ("commentContent", "comment.content"), + ] + expected = [ + { + "personId": "reply_author", + "personFirstName": "Peer", + "personLastName": "One", + "commentCreationDate": 20.0, + "commentId": "c1", + "commentContent": "reply-from-peer", + }, + { + "personId": "message_author", + "personFirstName": "Main", + "personLastName": "Author", + "commentCreationDate": 10.0, + "commentId": "c2", + "commentContent": "reply-from-main", + }, + { + "personId": "author2", + "personFirstName": "Bob", + "personLastName": "Two", + "commentCreationDate": 80.0, + "commentId": "c3", + "commentContent": "old-reply", + }, + ] + sort_by = ["commentCreationDate", "commentId"] + expected_by_sort = sorted(expected, key=lambda row: (row["commentCreationDate"], row["commentId"])) + assert self._rows_records(g, match_ops, items=items, sort_by=sort_by) == expected_by_sort + assert self._binding_rows_records(g, self._to_binding_ops(match_ops), items, sort_by=sort_by) == expected_by_sort + + def test_issue_1412_native_chain_message_replies_row_shaping_is7(self): + """IS7: direct native GFQL rows() keeps reply and message authors aligned.""" + g = self._mk_issue_1412_reply_author_graph() + match_ops = [ + n({"id": "reply_author", "label__Person": True}, name="replyAuthor"), + e_reverse({"type": "HAS_CREATOR"}), + n({"label__Comment": True}, name="comment"), + e_forward({"type": "REPLY_OF"}), + n({"id": "m1", "label__Message": True}, name="message"), + e_forward({"type": "HAS_CREATOR"}), + n({"label__Person": True}, name="messageAuthor"), + ] + items = [ + ("commentId", "comment.id"), + ("commentContent", "comment.content"), + ("commentCreationDate", "comment.creationDate"), + ("replyAuthorId", "replyAuthor.id"), + ("replyAuthorFirstName", "replyAuthor.firstName"), + ("replyAuthorLastName", "replyAuthor.lastName"), + ("messageAuthorId", "messageAuthor.id"), + ] + expected = [ + { + "commentId": "c1", + "commentContent": "reply-from-peer", + "commentCreationDate": 20.0, + "replyAuthorId": "reply_author", + "replyAuthorFirstName": "Peer", + "replyAuthorLastName": "One", + "messageAuthorId": "viewer", + } + ] + sort_by = ["commentCreationDate", "replyAuthorId"] + assert self._rows_records(g, match_ops, items=items, sort_by=sort_by) == expected + assert self._binding_rows_records(g, self._to_binding_ops(match_ops), items, sort_by=sort_by) == expected + def test_native_chain_rows_bindings_star_graph(self): """Star graph: 1 hub -> 3 leaves produces 3 binding rows.""" g = self._mk_graph(