From af703dad6dde27d22b94021628828831952e994a Mon Sep 17 00:00:00 2001 From: konard Date: Sun, 3 May 2026 13:00:30 +0000 Subject: [PATCH 1/2] Initial commit with task details Adding .gitkeep for PR creation (default mode). This file will be removed when the task is complete. Issue: https://github.com/link-foundation/lino-objects-codec/issues/27 --- .gitkeep | 1 + 1 file changed, 1 insertion(+) create mode 100644 .gitkeep diff --git a/.gitkeep b/.gitkeep new file mode 100644 index 0000000..7809763 --- /dev/null +++ b/.gitkeep @@ -0,0 +1 @@ +# .gitkeep file auto-generated at 2026-05-03T13:00:30.434Z for PR creation at branch issue-27-eb86cc75f92a for issue https://github.com/link-foundation/lino-objects-codec/issues/27 \ No newline at end of file From 1c69cfbbc30cad5f68e640661c922384e0ec55c4 Mon Sep 17 00:00:00 2001 From: konard Date: Sun, 3 May 2026 13:10:30 +0000 Subject: [PATCH 2/2] docs(readme): document built-in references for cycles, drop legacy (ref X) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The encoder in both JS and Python already emits cycles using the built-in links-notation `(self-ref: first-ref second-ref ...)` form — a self- referencing dict is `(obj_0: dict ((str c2VsZg==) obj_0))`, with bare `obj_N` back-references rather than a `(ref obj_N)` marker. Decoders in both languages reject `(ref X)` as an unknown type marker. The per-language READMEs (`js/README.md`, `python/README.md`) still documented the legacy `(ref obj_0)` syntax in their "How It Works" sections, which contradicted the actual emitted format and the main README. This commit makes the per-language docs match reality and links to issue #27. Regression tests in both languages now assert two invariants: 1. cycles encode as bare `obj_N` links inside an `(obj_N: type ...)` self-reference definition (no `(ref ` substring in the output); 2. the legacy `(ref X)` form must fail to decode with "Unknown type marker: ref", so it cannot silently masquerade as a real type. Closes #27. --- .gitkeep | 1 - js/.changeset/issue-27-builtin-references.md | 9 +++++ js/README.md | 11 +++++- js/tests/test_circular_references.test.js | 39 +++++++++++++++++++ python/README.md | 11 +++++- .../20260503_issue_27_builtin_references.md | 8 ++++ python/tests/test_circular_references.py | 20 ++++++++++ 7 files changed, 94 insertions(+), 5 deletions(-) delete mode 100644 .gitkeep create mode 100644 js/.changeset/issue-27-builtin-references.md create mode 100644 python/changelog.d/20260503_issue_27_builtin_references.md diff --git a/.gitkeep b/.gitkeep deleted file mode 100644 index 7809763..0000000 --- a/.gitkeep +++ /dev/null @@ -1 +0,0 @@ -# .gitkeep file auto-generated at 2026-05-03T13:00:30.434Z for PR creation at branch issue-27-eb86cc75f92a for issue https://github.com/link-foundation/lino-objects-codec/issues/27 \ No newline at end of file diff --git a/js/.changeset/issue-27-builtin-references.md b/js/.changeset/issue-27-builtin-references.md new file mode 100644 index 0000000..87dc9f9 --- /dev/null +++ b/js/.changeset/issue-27-builtin-references.md @@ -0,0 +1,9 @@ +--- +'lino-objects-codec': patch +--- + +Document the built-in references format for circular references and add +regression tests that lock it in. The encoder already emits cycles as bare +`obj_N` links inside an `(obj_N: type ...)` self-reference definition, but the +README still showed the legacy `(ref obj_N)` marker. README, regression tests, +and the format-invariant assertions are now consistent. See issue #27. diff --git a/js/README.md b/js/README.md index 3282d63..77ea9dc 100644 --- a/js/README.md +++ b/js/README.md @@ -251,8 +251,15 @@ The library uses the [links-notation](https://github.com/link-foundation/links-n - Basic types are encoded with type markers: `(int 42)`, `(str "hello")`, `(bool true)` - Strings are base64-encoded to handle special characters and newlines -- Collections include object IDs for reference tracking: `(array obj_0 item1 item2 ...)` -- Circular references use special `ref` links: `(ref obj_0)` +- Shared / cyclic collections are defined inline with a self-reference id using + the built-in links-notation `(self-ref: first-ref second-ref ...)` form, e.g. + `(obj_0: array (int 1) (int 2) ...)` or `(obj_0: object (key val) ...)` +- Circular references use built-in links-notation references — the bare object + id link `obj_0` — instead of a dedicated keyword. For example, a self- + referencing object `{ self: obj }` encodes as + `(obj_0: object ((str c2VsZg==) obj_0))` (no `(ref obj_0)` marker). See + [issue #27](https://github.com/link-foundation/lino-objects-codec/issues/27) + for the rationale. This approach allows for: diff --git a/js/tests/test_circular_references.test.js b/js/tests/test_circular_references.test.js index 36f81d0..36fe3dd 100644 --- a/js/tests/test_circular_references.test.js +++ b/js/tests/test_circular_references.test.js @@ -240,3 +240,42 @@ test('array and object circular reference', () => { assert.equal(decoded.arr[0], 1); assert.equal(decoded.arr[1], decoded); // Circular reference }); + +// Tests for the encoded format itself: circular references must use built-in +// links-notation references (a bare `obj_N` link) rather than the legacy +// `(ref obj_N)` marker. See issue #27. +test('encoded format uses built-in references, not (ref X) marker', () => { + const obj = {}; + obj.self = obj; + const encoded = encode({ obj }); + + // Self-reference should appear as a bare `obj_0` link, not wrapped in (ref ...) + assert.match( + encoded, + /obj_0/, + `expected encoded output to contain bare obj_0 reference, got: ${encoded}` + ); + assert.doesNotMatch( + encoded, + /\(ref\b/, + `expected encoded output to NOT contain (ref ...) marker, got: ${encoded}` + ); + + // The self-referenced object must be defined inline using the + // `(obj_id: type ...)` form, not `(type obj_id ...)`. + assert.match( + encoded, + /\(obj_0:\s*object\b/, + `expected (obj_0: object ...) self-reference definition, got: ${encoded}` + ); +}); + +test('decoder rejects legacy (ref X) marker as unknown type', () => { + // The legacy form must no longer be supported as a type marker. Decoding + // should fail loudly so it cannot silently masquerade as a real type. + const legacy = '(object obj_0 ((str c2VsZg==) (ref obj_0)))'; + assert.throws( + () => decode({ notation: legacy }), + /Unknown type marker:\s*ref/ + ); +}); diff --git a/python/README.md b/python/README.md index 5340fb7..93cb2c2 100644 --- a/python/README.md +++ b/python/README.md @@ -152,8 +152,15 @@ The library uses the [links-notation](https://github.com/link-foundation/links-n - Basic types are encoded with type markers: `(int 42)`, `(str "hello")`, `(bool True)` - Strings are base64-encoded to handle special characters and newlines -- Collections include object IDs for reference tracking: `(list obj_0 item1 item2 ...)` -- Circular references use special `ref` links: `(ref obj_0)` +- Shared / cyclic collections are defined inline with a self-reference id using + the built-in links-notation `(self-ref: first-ref second-ref ...)` form, e.g. + `(obj_0: list (int 1) (int 2) ...)` or `(obj_0: dict (key val) ...)` +- Circular references use built-in links-notation references — the bare object + id link `obj_0` — instead of a dedicated keyword. For example, a self- + referencing dict `{"self": obj}` encodes as + `(obj_0: dict ((str c2VsZg==) obj_0))` (no `(ref obj_0)` marker). See + [issue #27](https://github.com/link-foundation/lino-objects-codec/issues/27) + for the rationale. This approach allows for: - Universal representation of object graphs diff --git a/python/changelog.d/20260503_issue_27_builtin_references.md b/python/changelog.d/20260503_issue_27_builtin_references.md new file mode 100644 index 0000000..1544372 --- /dev/null +++ b/python/changelog.d/20260503_issue_27_builtin_references.md @@ -0,0 +1,8 @@ +### Changed + +- Document the built-in references format for circular references in the Python + README. The encoder already produces `(obj_N: dict ...)` / `(obj_N: list ...)` + self-reference definitions and bare `obj_N` back-references; the README now + matches and explicitly notes that the legacy `(ref obj_N)` marker is no + longer recognized. Regression tests lock in the new format. See + [issue #27](https://github.com/link-foundation/lino-objects-codec/issues/27). diff --git a/python/tests/test_circular_references.py b/python/tests/test_circular_references.py index b29d7af..428d6e3 100644 --- a/python/tests/test_circular_references.py +++ b/python/tests/test_circular_references.py @@ -1,5 +1,7 @@ """Tests for encoding/decoding objects with circular references.""" +import pytest + from link_notation_objects_codec import decode, encode @@ -150,3 +152,21 @@ def test_deeply_nested_circular_reference(self): assert decoded["child"]["child"]["child"]["level"] == 4 # Check circular reference back to root assert decoded["child"]["child"]["child"]["root"] is decoded + + def test_encoded_format_uses_builtin_references_not_ref_marker(self): + """Cycles must encode as bare `obj_N` links, not `(ref obj_N)`. See issue #27.""" + d = {} + d["self"] = d + encoded = encode(d) + + # Self-reference must be a bare obj_0 link, not a (ref ...) wrapper. + assert "obj_0" in encoded, encoded + assert "(ref " not in encoded, encoded + # The owner must be defined inline using the (obj_id: type ...) form. + assert "(obj_0: dict" in encoded, encoded + + def test_decoder_rejects_legacy_ref_marker(self): + """Legacy (ref X) form must be rejected as an unknown type marker.""" + legacy = "(dict obj_0 ((str c2VsZg==) (ref obj_0)))" + with pytest.raises(ValueError, match=r"Unknown type marker:\s*ref"): + decode(legacy)