diff --git a/js/.changeset/issue-27-builtin-references.md b/js/.changeset/issue-27-builtin-references.md new file mode 100644 index 0000000..87dc9f9 --- /dev/null +++ b/js/.changeset/issue-27-builtin-references.md @@ -0,0 +1,9 @@ +--- +'lino-objects-codec': patch +--- + +Document the built-in references format for circular references and add +regression tests that lock it in. The encoder already emits cycles as bare +`obj_N` links inside an `(obj_N: type ...)` self-reference definition, but the +README still showed the legacy `(ref obj_N)` marker. README, regression tests, +and the format-invariant assertions are now consistent. See issue #27. diff --git a/js/README.md b/js/README.md index 3282d63..77ea9dc 100644 --- a/js/README.md +++ b/js/README.md @@ -251,8 +251,15 @@ The library uses the [links-notation](https://github.com/link-foundation/links-n - Basic types are encoded with type markers: `(int 42)`, `(str "hello")`, `(bool true)` - Strings are base64-encoded to handle special characters and newlines -- Collections include object IDs for reference tracking: `(array obj_0 item1 item2 ...)` -- Circular references use special `ref` links: `(ref obj_0)` +- Shared / cyclic collections are defined inline with a self-reference id using + the built-in links-notation `(self-ref: first-ref second-ref ...)` form, e.g. + `(obj_0: array (int 1) (int 2) ...)` or `(obj_0: object (key val) ...)` +- Circular references use built-in links-notation references — the bare object + id link `obj_0` — instead of a dedicated keyword. For example, a self- + referencing object `{ self: obj }` encodes as + `(obj_0: object ((str c2VsZg==) obj_0))` (no `(ref obj_0)` marker). See + [issue #27](https://github.com/link-foundation/lino-objects-codec/issues/27) + for the rationale. This approach allows for: diff --git a/js/tests/test_circular_references.test.js b/js/tests/test_circular_references.test.js index 36f81d0..36fe3dd 100644 --- a/js/tests/test_circular_references.test.js +++ b/js/tests/test_circular_references.test.js @@ -240,3 +240,42 @@ test('array and object circular reference', () => { assert.equal(decoded.arr[0], 1); assert.equal(decoded.arr[1], decoded); // Circular reference }); + +// Tests for the encoded format itself: circular references must use built-in +// links-notation references (a bare `obj_N` link) rather than the legacy +// `(ref obj_N)` marker. See issue #27. +test('encoded format uses built-in references, not (ref X) marker', () => { + const obj = {}; + obj.self = obj; + const encoded = encode({ obj }); + + // Self-reference should appear as a bare `obj_0` link, not wrapped in (ref ...) + assert.match( + encoded, + /obj_0/, + `expected encoded output to contain bare obj_0 reference, got: ${encoded}` + ); + assert.doesNotMatch( + encoded, + /\(ref\b/, + `expected encoded output to NOT contain (ref ...) marker, got: ${encoded}` + ); + + // The self-referenced object must be defined inline using the + // `(obj_id: type ...)` form, not `(type obj_id ...)`. + assert.match( + encoded, + /\(obj_0:\s*object\b/, + `expected (obj_0: object ...) self-reference definition, got: ${encoded}` + ); +}); + +test('decoder rejects legacy (ref X) marker as unknown type', () => { + // The legacy form must no longer be supported as a type marker. Decoding + // should fail loudly so it cannot silently masquerade as a real type. + const legacy = '(object obj_0 ((str c2VsZg==) (ref obj_0)))'; + assert.throws( + () => decode({ notation: legacy }), + /Unknown type marker:\s*ref/ + ); +}); diff --git a/python/README.md b/python/README.md index 5340fb7..93cb2c2 100644 --- a/python/README.md +++ b/python/README.md @@ -152,8 +152,15 @@ The library uses the [links-notation](https://github.com/link-foundation/links-n - Basic types are encoded with type markers: `(int 42)`, `(str "hello")`, `(bool True)` - Strings are base64-encoded to handle special characters and newlines -- Collections include object IDs for reference tracking: `(list obj_0 item1 item2 ...)` -- Circular references use special `ref` links: `(ref obj_0)` +- Shared / cyclic collections are defined inline with a self-reference id using + the built-in links-notation `(self-ref: first-ref second-ref ...)` form, e.g. + `(obj_0: list (int 1) (int 2) ...)` or `(obj_0: dict (key val) ...)` +- Circular references use built-in links-notation references — the bare object + id link `obj_0` — instead of a dedicated keyword. For example, a self- + referencing dict `{"self": obj}` encodes as + `(obj_0: dict ((str c2VsZg==) obj_0))` (no `(ref obj_0)` marker). See + [issue #27](https://github.com/link-foundation/lino-objects-codec/issues/27) + for the rationale. This approach allows for: - Universal representation of object graphs diff --git a/python/changelog.d/20260503_issue_27_builtin_references.md b/python/changelog.d/20260503_issue_27_builtin_references.md new file mode 100644 index 0000000..1544372 --- /dev/null +++ b/python/changelog.d/20260503_issue_27_builtin_references.md @@ -0,0 +1,8 @@ +### Changed + +- Document the built-in references format for circular references in the Python + README. The encoder already produces `(obj_N: dict ...)` / `(obj_N: list ...)` + self-reference definitions and bare `obj_N` back-references; the README now + matches and explicitly notes that the legacy `(ref obj_N)` marker is no + longer recognized. Regression tests lock in the new format. See + [issue #27](https://github.com/link-foundation/lino-objects-codec/issues/27). diff --git a/python/tests/test_circular_references.py b/python/tests/test_circular_references.py index b29d7af..428d6e3 100644 --- a/python/tests/test_circular_references.py +++ b/python/tests/test_circular_references.py @@ -1,5 +1,7 @@ """Tests for encoding/decoding objects with circular references.""" +import pytest + from link_notation_objects_codec import decode, encode @@ -150,3 +152,21 @@ def test_deeply_nested_circular_reference(self): assert decoded["child"]["child"]["child"]["level"] == 4 # Check circular reference back to root assert decoded["child"]["child"]["child"]["root"] is decoded + + def test_encoded_format_uses_builtin_references_not_ref_marker(self): + """Cycles must encode as bare `obj_N` links, not `(ref obj_N)`. See issue #27.""" + d = {} + d["self"] = d + encoded = encode(d) + + # Self-reference must be a bare obj_0 link, not a (ref ...) wrapper. + assert "obj_0" in encoded, encoded + assert "(ref " not in encoded, encoded + # The owner must be defined inline using the (obj_id: type ...) form. + assert "(obj_0: dict" in encoded, encoded + + def test_decoder_rejects_legacy_ref_marker(self): + """Legacy (ref X) form must be rejected as an unknown type marker.""" + legacy = "(dict obj_0 ((str c2VsZg==) (ref obj_0)))" + with pytest.raises(ValueError, match=r"Unknown type marker:\s*ref"): + decode(legacy)