diff --git a/b64.test.ts b/b64.test.ts deleted file mode 100644 index 111dbd4..0000000 --- a/b64.test.ts +++ /dev/null @@ -1,289 +0,0 @@ -import { describe, expect, test } from "vitest"; -import { - b64Parse, b64Stringify, - isB64, b64Sizeof, - b64Read, b64Write, - toZigZag, fromZigZag -} from "./rx"; - -describe('b64 stringify', () => { - test('encoding b64 digits in correct order', () => { - expect(b64Stringify(0)).toBe(''); - expect(b64Stringify(1)).toBe('1'); - expect(b64Stringify(9)).toBe('9'); - expect(b64Stringify(10)).toBe('a'); - expect(b64Stringify(35)).toBe('z'); - expect(b64Stringify(36)).toBe('A'); - expect(b64Stringify(61)).toBe('Z'); - expect(b64Stringify(62)).toBe('-'); - expect(b64Stringify(63)).toBe('_'); - expect(b64Stringify(64)).toBe('10'); - }); - test('encoding b64 as powers of 16)', () => { - expect(b64Stringify(0x1)).toBe('1'); - expect(b64Stringify(0x10)).toBe('g'); - expect(b64Stringify(0x100)).toBe('40'); - expect(b64Stringify(0x1000)).toBe('100'); - expect(b64Stringify(0x10000)).toBe('g00'); - expect(b64Stringify(0x100000)).toBe('4000'); - expect(b64Stringify(0x1000000)).toBe('10000'); - expect(b64Stringify(0x10000000)).toBe('g0000'); - expect(b64Stringify(0x100000000)).toBe('400000'); - expect(b64Stringify(0x1000000000)).toBe('1000000'); - expect(b64Stringify(0x10000000000)).toBe('g000000'); - expect(b64Stringify(0x100000000000)).toBe('40000000'); - expect(b64Stringify(0x1000000000000)).toBe('100000000'); - expect(b64Stringify(0x10000000000000)).toBe('g00000000'); - }); - test('encoding b64 near 12, 32 and 53 bit precision limits)', () => { - expect(b64Stringify(2 ** 16 - 5)).toBe('f_X'); - expect(b64Stringify(2 ** 16 - 4)).toBe('f_Y'); - expect(b64Stringify(2 ** 16 - 3)).toBe('f_Z'); - expect(b64Stringify(2 ** 16 - 2)).toBe('f_-'); - expect(b64Stringify(2 ** 16 - 1)).toBe('f__'); - expect(b64Stringify(2 ** 16)).toBe('g00'); - expect(b64Stringify(2 ** 16 + 1)).toBe('g01'); - expect(b64Stringify(2 ** 16 + 2)).toBe('g02'); - expect(b64Stringify(2 ** 16 + 3)).toBe('g03'); - expect(b64Stringify(2 ** 16 + 4)).toBe('g04'); - expect(b64Stringify(2 ** 32 - 5)).toBe('3____X'); - expect(b64Stringify(2 ** 32 - 4)).toBe('3____Y'); - expect(b64Stringify(2 ** 32 - 3)).toBe('3____Z'); - expect(b64Stringify(2 ** 32 - 2)).toBe('3____-'); - expect(b64Stringify(2 ** 32 - 1)).toBe('3_____'); - expect(b64Stringify(2 ** 32)).toBe('400000'); - expect(b64Stringify(2 ** 32 + 1)).toBe('400001'); - expect(b64Stringify(2 ** 32 + 2)).toBe('400002'); - expect(b64Stringify(2 ** 32 + 3)).toBe('400003'); - expect(b64Stringify(2 ** 32 + 4)).toBe('400004'); - expect(b64Stringify(2 ** 53 - 1)).toBe('v________'); - expect(b64Stringify(2 ** 53 - 2)).toBe('v_______-'); - expect(b64Stringify(2 ** 53 - 3)).toBe('v_______Z'); - expect(b64Stringify(2 ** 53 - 4)).toBe('v_______Y'); - expect(b64Stringify(2 ** 53 - 5)).toBe('v_______X'); - }); - test('fails on invalid inputs', () => { - expect(() => b64Stringify(-1)).toThrow(); - expect(() => b64Stringify(1.5)).toThrow(); - expect(() => b64Stringify(NaN)).toThrow(); - expect(() => b64Stringify(Infinity)).toThrow(); - }); -}); - -describe('b64 parse', () => { - test('decoding b64 digits in correct order', () => { - expect(b64Parse('')).toBe(0); - expect(b64Parse('1')).toBe(1); - expect(b64Parse('9')).toBe(9); - expect(b64Parse('a')).toBe(10); - expect(b64Parse('z')).toBe(35); - expect(b64Parse('A')).toBe(36); - expect(b64Parse('Z')).toBe(61); - expect(b64Parse('-')).toBe(62); - expect(b64Parse('_')).toBe(63); - expect(b64Parse('10')).toBe(64); - }) - test('decoding b64 as powers of 16)', () => { - expect(b64Parse('1')).toBe(0x1); - expect(b64Parse('g')).toBe(0x10); - expect(b64Parse('40')).toBe(0x100); - expect(b64Parse('100')).toBe(0x1000); - expect(b64Parse('g00')).toBe(0x10000); - expect(b64Parse('4000')).toBe(0x100000); - expect(b64Parse('10000')).toBe(0x1000000); - expect(b64Parse('g0000')).toBe(0x10000000); - expect(b64Parse('400000')).toBe(0x100000000); - expect(b64Parse('1000000')).toBe(0x1000000000); - expect(b64Parse('g000000')).toBe(0x10000000000); - expect(b64Parse('40000000')).toBe(0x100000000000); - expect(b64Parse('100000000')).toBe(0x1000000000000); - expect(b64Parse('g00000000')).toBe(0x10000000000000); - }); - test('decoding b64 near 12, 32 and 53 bit precision limits)', () => { - expect(b64Parse('f_X')).toBe(2 ** 16 - 5); - expect(b64Parse('f_Y')).toBe(2 ** 16 - 4); - expect(b64Parse('f_Z')).toBe(2 ** 16 - 3); - expect(b64Parse('f_-')).toBe(2 ** 16 - 2); - expect(b64Parse('f__')).toBe(2 ** 16 - 1); - expect(b64Parse('g00')).toBe(2 ** 16); - expect(b64Parse('g01')).toBe(2 ** 16 + 1); - expect(b64Parse('g02')).toBe(2 ** 16 + 2); - expect(b64Parse('g03')).toBe(2 ** 16 + 3); - expect(b64Parse('g04')).toBe(2 ** 16 + 4); - expect(b64Parse('3____X')).toBe(2 ** 32 - 5); - expect(b64Parse('3____Y')).toBe(2 ** 32 - 4); - expect(b64Parse('3____Z')).toBe(2 ** 32 - 3); - expect(b64Parse('3____-')).toBe(2 ** 32 - 2); - expect(b64Parse('3_____')).toBe(2 ** 32 - 1); - expect(b64Parse('400000')).toBe(2 ** 32); - expect(b64Parse('400001')).toBe(2 ** 32 + 1); - expect(b64Parse('400002')).toBe(2 ** 32 + 2); - expect(b64Parse('400003')).toBe(2 ** 32 + 3); - expect(b64Parse('400004')).toBe(2 ** 32 + 4); - expect(b64Parse('w00000000')).toBe(2 ** 53); - expect(b64Parse('v________')).toBe(2 ** 53 - 1); - expect(b64Parse('v_______-')).toBe(2 ** 53 - 2); - expect(b64Parse('v_______Z')).toBe(2 ** 53 - 3); - expect(b64Parse('v_______Y')).toBe(2 ** 53 - 4); - expect(b64Parse('v_______X')).toBe(2 ** 53 - 5); - }); -}); - -describe('b64 parse/stringify', () => { - test('random fuzzing', () => { - for (let i = 0; i < 100000; i++) { - const n = Math.floor(Math.random() * (Number.MAX_SAFE_INTEGER + 2)); - expect(b64Parse(b64Stringify(n))).toBe(n); - } - }); -}); - -describe('b64 is', () => { - test('valid characters', () => { - for (let i = 0; i < 256; i++) { - const char = String.fromCharCode(i); - if ( - (i >= 48 && i <= 57) || // 0-9 - (i >= 65 && i <= 90) || // A-Z - (i >= 97 && i <= 122) || // a-z - char === '-' || - char === '_' - ) { - expect(isB64(i)).toBe(true); - } else { - expect(isB64(i)).toBe(false); - } - } - }); -}); - -describe('b64 sizeof', () => { - test('size of b64 encoding', () => { - expect(() => b64Sizeof(-1)).toThrow(); - expect(b64Sizeof(0)).toBe(0); - expect(b64Sizeof(1)).toBe(1); - expect(b64Sizeof(63)).toBe(1); - expect(b64Sizeof(64)).toBe(2); - expect(b64Sizeof(4095)).toBe(2); - expect(b64Sizeof(4096)).toBe(3); - expect(b64Sizeof(262143)).toBe(3); - expect(b64Sizeof(262144)).toBe(4); - expect(b64Sizeof(2 ** 53 - 1)).toBe(9); - expect(() => b64Sizeof(2 ** 53)).toThrow(); - }); -}); - -describe('b64 read', () => { - test('decoding b64 digits in correct order', () => { - const data = new Uint8Array([45, 95, 48, 49]); // '-_01' - expect(b64Read(data, 0, 1)).toBe(62); - expect(b64Read(data, 1, 2)).toBe(63); - expect(b64Read(data, 2, 3)).toBe(0); - expect(b64Read(data, 3, 4)).toBe(1); - expect(b64Read(data, 2, 4)).toBe(0 * 64 + 1); - expect(b64Read(data, 0, 2)).toBe(62 * 64 + 63); - expect(b64Read(data, 0, 3)).toBe(62 * 64 * 64 + 63 * 64 + 0); - expect(b64Read(data, 0, 4)).toBe(62 * 64 * 64 * 64 + 63 * 64 * 64 + 0 * 64 + 1); - expect(b64Read(data, 1, 4)).toBe(63 * 64 * 64 + 0 * 64 + 1); - }); - - test('fails on invalid characters', () => { - const data = new Uint8Array([45, 95, 48, 49, 64]); // '-_01@' - expect(() => b64Read(data, 0, 5)).toThrow(); - expect(() => b64Read(data, 4, 5)).toThrow(); - expect(() => b64Read(data, 0, 4)).not.toThrow(); - }); -}); - -describe('b64 write', () => { - test('writing b64 digits to data', () => { - const data = new Uint8Array(10); - b64Write(data, 0, 10, 0); - expect(data.slice(0, 10)).toEqual(new Uint8Array([48, 48, 48, 48, 48, 48, 48, 48, 48, 48])); - b64Write(data, 0, 2, 62 * 64 + 63); // '-_' - expect(data.slice(0, 2)).toEqual(new Uint8Array([45, 95])); - b64Write(data, 2, 5, 62 * 64 * 64 + 63 * 64 + 1); // '-_01' - expect(data.slice(2, 5)).toEqual(new Uint8Array([45, 95, 49])); - b64Write(data, 0, 10, Number.MAX_SAFE_INTEGER); // '_v________' - expect(data.slice(0, 10)).toEqual(new Uint8Array([48, 118, 95, 95, 95, 95, 95, 95, 95, 95])); - b64Write(data, 0, 10, 2 ** 53); // '0w00000000' - expect(data.slice(0, 10)).toEqual(new Uint8Array([48, 119, 48, 48, 48, 48, 48, 48, 48, 48])); - }); - test('fails on write overflow', () => { - const data = new Uint8Array(5); - expect(() => b64Write(data, 0, 5, 2 ** 40)).toThrow(); - }); -}); - -describe('b64 sizeof+write+read', () => { - test('random fuzzing', () => { - for (let i = 0; i < 100000; i++) { - const n = Math.floor(Math.random() * (Number.MAX_SAFE_INTEGER + 2)); - const size = b64Sizeof(n); - const data = new Uint8Array(size); - b64Write(data, 0, size, n); - expect(b64Read(data, 0, size)).toBe(n); - } - }); -}); - -describe('zigzag toZigZag', () => { - test('encodes small values', () => { - expect(toZigZag(0)).toBe(0); - expect(toZigZag(-1)).toBe(1); - expect(toZigZag(1)).toBe(2); - expect(toZigZag(-2)).toBe(3); - expect(toZigZag(2)).toBe(4); - }); - test('encodes 31-bit boundary values', () => { - expect(toZigZag(0x3fffffff)).toBe(0x7ffffffe); - expect(toZigZag(-0x40000000)).toBe(0x7fffffff); - expect(toZigZag(0x40000000)).toBe(0x80000000); - expect(toZigZag(-0x40000001)).toBe(0x80000001); - }); - test('encodes at bitwise/arithmetic boundary (int32)', () => { - expect(toZigZag(0x7fffffff)).toBe(0xfffffffe); - expect(toZigZag(-0x80000000)).toBe(0xffffffff); - expect(toZigZag(0x80000000)).toBe(0x100000000); - expect(toZigZag(-0x80000001)).toBe(0x100000001); - }); -}); - -describe('zigzag fromZigZag', () => { - test('decodes small values', () => { - expect(fromZigZag(0)).toBe(0); - expect(fromZigZag(1)).toBe(-1); - expect(fromZigZag(2)).toBe(1); - expect(fromZigZag(3)).toBe(-2); - expect(fromZigZag(4)).toBe(2); - }); - test('decodes 31-bit boundary values', () => { - expect(fromZigZag(0x7ffffffe)).toBe(0x3fffffff); - expect(fromZigZag(0x7fffffff)).toBe(-0x40000000); - expect(fromZigZag(0x80000000)).toBe(0x40000000); - expect(fromZigZag(0x80000001)).toBe(-0x40000001); - }); - test('decodes at bitwise/arithmetic boundary (uint32)', () => { - expect(fromZigZag(0xfffffffe)).toBe(0x7fffffff); - expect(fromZigZag(0xffffffff)).toBe(-0x80000000); - expect(fromZigZag(0x100000000)).toBe(0x80000000); - expect(fromZigZag(0x100000001)).toBe(-0x80000001); - }); -}); - -describe('zigzag roundtrip', () => { - test('small values', () => { - for (let i = -1000; i <= 1000; i++) { - expect(fromZigZag(toZigZag(i))).toBe(i); - } - }); - test('random fuzzing', () => { - // Zigzag doubles the magnitude, so limit to half MAX_SAFE_INTEGER - const half = Math.floor(Number.MAX_SAFE_INTEGER / 2); - for (let i = 0; i < 100000; i++) { - const n = Math.floor(Math.random() * half) * (Math.random() < 0.5 ? 1 : -1); - expect(fromZigZag(toZigZag(n))).toBe(n); - } - }); -}); - diff --git a/bench-encode.ts b/bench-encode.ts deleted file mode 100644 index e82c1e3..0000000 --- a/bench-encode.ts +++ /dev/null @@ -1,153 +0,0 @@ -import { encode } from "./rx"; -import { encode as rxbEncode } from "./rxb"; -import { readFileSync, readdirSync, existsSync } from "fs"; -import { join } from "path"; -import { createRequire } from "module"; - -// Try to load native module -let nativeEncodeRxb: ((json: Buffer) => Buffer) | undefined; -try { - const require = createRequire(import.meta.url); - const native = require("./native/rx-native.node"); - nativeEncodeRxb = native.encodeRxb; - console.log("Native module loaded.\n"); -} catch (e) { - console.log(`Native module not available: ${e instanceof Error ? e.message : e}\n`); -} - -// ── Benchmark harness ── - -function bench(name: string, fn: () => unknown, iterations = 100) { - // Warmup - for (let i = 0; i < 5; i++) fn(); - - const times: number[] = []; - for (let i = 0; i < iterations; i++) { - const start = performance.now(); - fn(); - times.push(performance.now() - start); - } - times.sort((a, b) => a - b); - const median = times[Math.floor(times.length / 2)]!; - const p95 = times[Math.floor(times.length * 0.95)]!; - const mean = times.reduce((a, b) => a + b, 0) / times.length; - console.log( - ` ${name.padEnd(30)} median=${median.toFixed(3)}ms mean=${mean.toFixed(3)}ms p95=${p95.toFixed(3)}ms` - ); - return median; -} - -// ── Generate synthetic datasets ── - -function makeFlatObject(n: number): Record { - const obj: Record = {}; - for (let i = 0; i < n; i++) { - obj[`key-${i.toString(36)}-${Math.random().toString(36).slice(2, 10)}`] = - i % 3 === 0 - ? `value-${i}-${"x".repeat(20 + (i % 50))}` - : i % 3 === 1 - ? i * 1.1 - : i % 2 === 0; - } - return obj; -} - -function makeRecordArray(n: number): unknown[] { - const arr: unknown[] = []; - for (let i = 0; i < n; i++) { - arr.push({ - id: i, - name: `User ${i}`, - email: `user${i}@example.com`, - active: i % 3 !== 0, - score: Math.round(Math.random() * 10000) / 100, - tags: ["alpha", "beta", "gamma"].slice(0, (i % 3) + 1), - meta: { created: `2025-01-${(i % 28 + 1).toString().padStart(2, "0")}`, version: i % 5 }, - }); - } - return arr; -} - -function makeDeepNested(depth: number, breadth: number): unknown { - if (depth === 0) return `leaf-${Math.random().toString(36).slice(2, 8)}`; - const obj: Record = {}; - for (let i = 0; i < breadth; i++) { - obj[`d${depth}-b${i}`] = makeDeepNested(depth - 1, breadth); - } - return obj; -} - -function makePathObject(n: number): Record { - const segments = ["api", "v1", "v2", "users", "posts", "comments", "auth", "static", "assets", "img", "css", "js", "chunks", "media"]; - const obj: Record = {}; - for (let i = 0; i < n; i++) { - const parts: string[] = []; - const len = 3 + (i % 5); - for (let j = 0; j < len; j++) { - parts.push(segments[(i * 7 + j * 13) % segments.length]!); - } - parts.push(`file-${i.toString(36)}.${i % 2 === 0 ? "js" : "css"}`); - obj["/" + parts.join("/")] = `content-${i}-${"z".repeat(10 + (i % 30))}`; - } - return obj; -} - -// ── Run benchmarks ── - -interface Dataset { - name: string; - data: unknown; -} - -const datasets: Dataset[] = [ - { name: "flat-1k", data: makeFlatObject(1000) }, - { name: "flat-10k", data: makeFlatObject(10_000) }, - { name: "records-1k", data: makeRecordArray(1000) }, - { name: "records-10k", data: makeRecordArray(10_000) }, - { name: "deep-6x4", data: makeDeepNested(6, 4) }, - { name: "paths-5k", data: makePathObject(5000) }, -]; - -// Load real sample files -const samplesDir = join(import.meta.dirname!, "samples"); -for (const file of readdirSync(samplesDir).filter((f) => f.endsWith(".json"))) { - const raw = readFileSync(join(samplesDir, file), "utf-8"); - datasets.push({ name: `file:${file}`, data: JSON.parse(raw) }); -} - -// Load large JSON samples if present -for (const file of ["large-sample.json", "large-sample-2.json"]) { - const path = join(import.meta.dirname!, file); - if (existsSync(path)) { - console.log(`Loading ${file}...`); - const data = JSON.parse(readFileSync(path, "utf-8")); - console.log("Done."); - datasets.push({ name: file.replace(".json", ""), data }); - } -} - -console.log("=== RX / RXB Encode Benchmark ===\n"); - -for (const { name, data } of datasets) { - const jsonStr = JSON.stringify(data); - const jsonBytes = Buffer.byteLength(jsonStr, "utf-8"); - const iters = jsonBytes > 10_000_000 ? 5 : jsonBytes > 500_000 ? 50 : 200; - - console.log(`\n── ${name} (JSON: ${(jsonBytes / 1024).toFixed(1)} KB) ──`); - bench("JSON.stringify", () => JSON.stringify(data), iters); - - const rxBytes = encode(data); - const rxSize = `${(rxBytes.length / 1024).toFixed(1)} KB`; - bench(`rx encode`.padEnd(24) + ` [${rxSize}]`, () => encode(data), iters); - - const rxbBytes = rxbEncode(data); - const rxbSize = `${(rxbBytes.length / 1024).toFixed(1)} KB`; - bench(`rxb encode`.padEnd(24) + `[${rxbSize}]`, () => rxbEncode(data), iters); - - if (nativeEncodeRxb) { - const jsonBuf = Buffer.from(jsonStr); - const nativeBytes = nativeEncodeRxb(jsonBuf); - const nativeSize = `${(nativeBytes.length / 1024).toFixed(1)} KB`; - bench(`rxb native`.padEnd(24) + `[${nativeSize}]`, () => nativeEncodeRxb!(Buffer.from(JSON.stringify(data))), iters); - } -} diff --git a/count-types.ts b/count-types.ts new file mode 100644 index 0000000..50670ee --- /dev/null +++ b/count-types.ts @@ -0,0 +1,33 @@ +const path = process.argv[2] ?? "large-sample.json" + +const counts = { + string: 0, + number: 0, + boolean: 0, + null: 0, + object: 0, + array: 0, +} + +function walk(value: unknown) { + if (value === null) { + counts.null++ + } else if (Array.isArray(value)) { + counts.array++ + for (const v of value) walk(v) + } else if (typeof value === "object") { + counts.object++ + for (const v of Object.values(value as Record)) walk(v) + } else if (typeof value === "string") { + counts.string++ + } else if (typeof value === "number") { + counts.number++ + } else if (typeof value === "boolean") { + counts.boolean++ + } +} + +const raw = await Bun.file(path).text() +walk(JSON.parse(raw)) + +console.log(counts) diff --git a/docs/rx-format.md b/docs/rx-format.md index 3f1974b..79c5be0 100644 --- a/docs/rx-format.md +++ b/docs/rx-format.md @@ -1,148 +1,131 @@ # RX Format Spec -This document is the formal grammar and encoding reference for the `.rx` text format used by `@creationix/rx`. It is intended to make the format understandable **without reading the source code**. +RX is a compact text encoding for JSON-shaped data — objects, arrays, strings, numbers, booleans, `null`. Pointers, chains, refs, and indexes add structural sharing and random access without changing what values can be represented. -RX covers the same data model as JSON: maps, lists, strings, numbers, booleans, and `null`. Pointers, chains, refs, and indexes are encoding features that make large documents smaller and faster to query. +RX is the data-layer subset of [REXC bytecode](https://github.com/creationix/rex/blob/rusty/docs/rexc-bytecode.md). REXC adds program-execution tags (variables, opcodes, calls, control flow) on top; every valid RX document is a valid REXC document. -> For interactive inspection, paste any RX or JSON into the live viewer at **[rx.run](https://rx.run/)**. +> Paste RX or JSON into **[rx.run](https://rx.run/)** for interactive inspection. --- -## Reading direction +## Parsing model -RX is parsed **right-to-left**. Every value has a **tag** character with a **base64 varint** to its right, and may have a **body** to its left: +RX is parsed **right-to-left**. Every value has a **tag** character with a **b64 varint** to its right, and optionally a **body** to its left: ```text [body][tag][b64 varint] - ◄── read this way ── + ◄── read this way ── ``` -The parser starts at the rightmost byte and scans left past base64 digits until it hits a non-b64 byte — that byte is the tag. The b64 digits to its right are the varint. The tag then determines whether there is a body to the left and how to interpret it. +The parser scans left past b64 digits until it hits a non-b64 byte — that byte is the tag. The tag determines how to interpret the varint and whether a body sits to its left. -**Worked example** — parsing `hi,2`: +The three container tags (`]` `}` `>`) are paired: the parser reads child values right-to-left until it hits the matching opener (`[` `{` `<`). -1. Start at the right: `2` is a b64 digit → varint = 2 -2. Next byte left: `,` is not a b64 digit → **tag** (string) -3. The tag says there are 2 bytes of body to the left → `hi` +### Tags ---- +| Tag | Name | Layout | Varint meaning | +|---------|---------|----------------------|-------------------------| +| `+` | Integer | `+[varint]` | zigzag signed | +| `*` | Decimal | `[+base]*[varint]` | zigzag exponent | +| `,` | String | `[utf8],[varint]` | byte length | +| `'` | Ref | `'[name]` | b64 name (not a number) | +| `^` | Pointer | `^[varint]` | byte offset delta | +| `[` `]` | Array | `[children]` | — (paired) | +| `{` `}` | Object | `{children}` | — (paired) | +| `<` `>` | Chain | `` | — (paired) | +| `.` | Schema | `[keys].[varint]` | byte length | +| `#` | Index | `[entries]#[varint]` | packed count+width | +| `@` | Bytes | `[b64body]@[varint]` | byte length of body | -## Grammar overview +Eleven tag characters total (`]` `}` `>` are the tags; `[` `{` `<` are end markers for the container scan). -A **value** is one of: +### Worked example -```ebnf -value = number | string | ref | list | map | pointer | chain ; -``` +Parse `[world,5hi,2]` right-to-left: + +| Scan | Tag | Varint | Action | +|------------|-----|--------|---------------------------------| +| `…]` | `]` | — | array open; read children | +| `…hi,2` | `,` | `2` | read 2 bytes left → `"hi"` | +| `…world,5` | `,` | `5` | read 5 bytes left → `"world"` | +| `[…` | `[` | — | array close → `["hi", "world"]` | + +Children are written in reverse byte order so that R-to-L parsing yields them in natural forward order. -### Tags at a glance +### Why right-to-left -| Tag | Name | Layout | Description | -|---------|---------|---------------------------------|------------------------------------------| -| **`+`** | Number | `+[b64 zigzag]` | Zigzag-decoded signed integer | -| **`*`** | Decimal | `[base]+[b64 base]*[b64 exp]` | `base × 10^exp` | -| **`,`** | String | `[UTF-8 bytes],[b64 length]` | Raw UTF-8, length in bytes | -| **`'`** | Ref | `'[name]` | Built-in literal or external ref name | -| **`;`** | List | `[children];[b64 content-size]` | Ordered child values | -| **`:`** | Map | `[children]:[b64 content-size]` | Key/value pairs | -| **`^`** | Pointer | `^[b64 delta]` | Backward delta to an earlier byte offset | -| **`.`** | Chain | `[segments].[b64 content-size]` | Concatenated string segments | -| **`#`** | Index | `[entries]#[b64 compound]` | Sorted lookup table for a container | +The root of a document is its rightmost byte. Appending bytes to the right produces a new valid document whose root is the new content — and pointers in the appended bytes can reference any earlier byte via a backward delta. This makes RX naturally append-only: revisions deduplicate against all prior bytes for free, with no rewriting. + +It also keeps encoding simple: the encoder writes left-to-right via depth-first, post-order traversal. --- ## Building blocks -### B64 - -```ebnf -b64 - = "0" | "1" | "2" | "3" | "4" | "5" | "6" - | "7" | "8" | "9" | "a" | "b" | "c" | "d" - | "e" | "f" | "g" | "h" | "i" | "j" | "k" - | "l" | "m" | "n" | "o" | "p" | "q" | "r" - | "s" | "t" | "u" | "v" | "w" | "x" | "y" - | "z" | "A" | "B" | "C" | "D" | "E" | "F" - | "G" | "H" | "I" | "J" | "K" | "L" | "M" - | "N" | "O" | "P" | "Q" | "R" | "S" | "T" - | "U" | "V" | "W" | "X" | "Y" | "Z" | "-" - | "_" - ; +### B64 alphabet + +``` +0-9 a-z A-Z - _ ``` -RX uses the alphabet **`0-9 a-z A-Z - _`** (64 characters, URL-safe, no padding) for variable-length unsigned integers. +64 URL-safe characters, ordering extends hexadecimal. ### Varint -```ebnf -varint = { b64 } ; -``` - -A `varint` is zero or more `b64` digits in big-endian order. These are used for unsigned integers, signed integers, and sometimes as string identifiers. - -- **Zero** is encoded as an empty string (zero digits) -- **Signed integers** use zigzag encoding: 0 → 0, -1 → 1, 1 → 2, -2 → 3, ... +Zero or more b64 digits, big-endian. **Zero is the empty string.** Signed values use zigzag on top: `0 → 0, -1 → 1, 1 → 2, -2 → 3, …`. -| Decimal | Zigzag | B64 digits | +| Decimal | Zigzag | B64 | |---------|--------|------------| | 0 | 0 | *(empty)* | | 1 | 2 | `2` | | -1 | 1 | `1` | | 42 | 84 | `1k` | -| 255 | 510 | `7-` | +| -256 | 511 | `7_` | --- ## Primitives -### Number — `+` `*` +### Integer `+` -```ebnf -number = "+" , varint , [ "*" , varint ] ; -``` +Zigzag-encoded signed integer. -Numbers are encoded as a zigzag signed integer base optionally combined with a zigzag signed power of 10 exponent. When the exponent is small and non-negative, the encoder folds it into the base and omits the `*` suffix. +| JSON | RX | +|--------|-------| +| `0` | `+` | +| `1` | `+2` | +| `-1` | `+1` | +| `42` | `+1k` | +| `-256` | `+7_` | -Special float values use refs instead: **`'inf`** (+Infinity), **`'nif`** (-Infinity), **`'nan`** (NaN). +### Decimal `*` -| JSON | RX | Base | Exp | Notes | -|-----------|---------|------|-----|--------------------------------| -| `0` | `+` | 0 | — | zigzag(0) = empty | -| `1` | `+2` | 1 | — | zigzag(1) = 2 | -| `-1` | `+1` | -1 | — | zigzag(-1) = 1 | -| `42` | `+1k` | 42 | — | zigzag(42) = 84 = `1k` | -| `255` | `+7-` | 255 | — | zigzag(255) = 510 = `7-` | -| `1000` | `+vg` | 1000 | — | small exp, folded into integer | -| `3.14` | `+9Q*3` | 314 | -2 | 314 × 10⁻² | -| `-0.5` | `+9*1` | -5 | -1 | -5 × 10⁻¹ | -| `99.9` | `+ve*1` | 999 | -1 | 999 × 10⁻¹ | -| `1000000` | `+2*c` | 1 | 6 | 1 × 10⁶ | +A decimal requires an adjacent `+` value to its left. The varint to the right of `*` is the zigzag exponent; the `+` value to the left is the base. Value is `base × 10^exp`. -### String — `,` +Special floats use refs: `'inf` (+∞), `'nif` (−∞), `'nan` (NaN). -```ebnf -string = utf8_body , "," , varint ; -``` +| JSON | RX | Base | Exp | +|--------|---------|------|-----| +| `1000` | `+vg` | 1000 | — | +| `3.14` | `+9Q*3` | 314 | -2 | +| `-0.5` | `+9*1` | -5 | -1 | +| `1e6` | `+2*c` | 1 | 6 | -The body contains raw UTF-8 bytes. The varint gives the **byte length** (not character count). Strings may contain any bytes including nulls and non-ASCII unicode. +### String `,` -| JSON | RX | Bytes | Notes | -|-----------------|-----------------|-------|--------------------------------------| -| `""` | `,` | 0 | empty string | -| `"hi"` | `hi,2` | 2 | | -| `"alice"` | `alice,5` | 5 | | -| `"hello world"` | `hello world,b` | 11 | b64(11) = `b` | -| `"café"` | `café,5` | 5 | `é` is 2 UTF-8 bytes | -| `"🎉"` | `🎉,4` | 4 | emoji is 4 UTF-8 bytes | -| `"🏴‍☠️"` | `🏴‍☠️,d` | 13 | ZWJ pirate flag: 🏴 + ZWJ + ☠ + VS16 | +Raw UTF-8 body. The varint is the **byte length** (not character count). -### Ref — `'` +| JSON | RX | +|-----------------|-----------------| +| `""` | `,` | +| `"hi"` | `hi,2` | +| `"hello world"` | `hello world,b` | +| `"café"` | `café,5` | +| `"🎉"` | `🎉,4` | -```ebnf -ref = "'" , ref_name ; -``` +### Ref `'` -Refs are **unique among tags**: the bytes to the right of `'` are not a numeric value but a *name* composed of b64 digits. The parser checks for built-in names first; non-built-in ref names refer to entries in an external dictionary agreed between encoder and decoder. +The bytes to the right of `'` form a **name** of b64 characters, not a numeric value. Built-in names resolve to literals (`'t`, `'f`, `'n`, `'u`, `'inf`, `'nif`, `'nan`). Other names are application-defined; they may resolve to a value from a shared external dictionary, or to an opaque host symbol (e.g. a JS `Symbol`, an interned token) that the application uses as an identity marker. The encoder and decoder need only agree on the meaning of each name they use. | Value | RX | |-------------|--------| @@ -158,219 +141,179 @@ Refs are **unique among tags**: the bytes to the right of `'` are not a numeric ## Containers -### List — `;` +Containers use paired delimiters — no length prefix. Children are written in reverse byte order so that R-to-L parsing yields them forward. -```ebnf -list = { value } , [ index ] , ";" , varint ; -``` +### Array `[` `]` -Children are written in reverse order so that right-to-left parsing yields them in natural forward order (index 0 first). The varint gives the **total byte size** of the content region. +Ordered children. -Large lists may include an **index** between the last child and the `;` tag. +An array may have an optional [index](#index-) at the right end of the body for O(1) random access. -Consider a small array containing 3 integers: +| JSON | RX | +|-------------------------------|-----------------| +| `[]` | `[]` | +| `[1, 2, 3]` | `[+6+4+2]` | +| `[1, 2, 3]` with forced index | `[+6+4+2420#o]` | -```json -[ 1, 2, 3 ] -``` +### Object `{` `}` -When encoded to RX, this looks like: +Ordered key/value pairs. Keys are typically strings but may be pointers or chains resolving to strings. -```rx -+6+4+2;6 -├╯├╯├╯╰┴─ header for 6 byte list -│ │ ╰──── value 1 as zigzag integer -│ ╰────── value 2 as zigzag integer -╰──────── value 3 as zigzag integer -``` +An object may have an optional [index](#index-) for O(log n) key lookup, **or** a [schema](#schema) for shape sharing. -If we configure the encoder to force indexes for short values, it encodes like this: - -```rx -+6+4+2024#o;b -├╯├╯├╯╰┬╯├╯╰┴─ header for 6 byte list -│ │ │ │ ╰──── index count=3, width=1 -│ │ │ ╰────── 3 pointers [0, 2, 4] -│ │ ╰───────── value 1 as zigzag integer -│ ╰─────────── value 2 as zigzag integer -╰───────────── value 3 as zigzag integer -``` +| JSON | RX | +|-----------------------------------------|--------------------------------------| +| `{}` | `{}` | +| `{"a":1,"b":2}` | `{+4b,1+2a,1}` | +| `{"users":["alice","bob"],"version":3}` | `{+6version,7[bob,3alice,5]users,5}` | +| `{"z":1,"a":2,"m":3}`with forced index | `{+6m,1+4a,1+2z,10a5#o}` | +| `[{"a":1,"b":2},{"a":3,"b":4}]` | `[{+8+6b,a.3}{+4+2^6}]` | -### Map — `:` +### Chain `<` `>` -```ebnf -map = { value , value } , [ index ] , [ schema ] , ":" , varint ; -``` +A concatenated value built from string and bytes segments. Each segment is a string, bytes, a pointer to one of these, or a nested chain. -Key/value pairs are written in reverse order so that right-to-left parsing yields them in natural insertion order. **Key order is preserved.** Keys are typically strings but may be pointers or chains. +The result type is determined by segment composition: **if any segment resolves to bytes, the result is bytes** (string segments are taken as their UTF-8 byte representation). Otherwise the result is a string (UTF-8 concatenation). -Large maps may include an **index** and/or a **schema** between the last key-value pair and the `:` tag. When both are present, the schema is rightmost, followed by the index. +Chains let scattered values share common substrings or byte sequences via pointers — useful for path-like values (URLs, file paths, identifiers) and for binary protocols where many blobs share a header, signature, IV, or other prefix. -| JSON | RX | -|-----------------------------------------|--------------------------------------| -| `{}` | `:` | -| `{"a":1,"b":2}` | `+4b,1+2a,1:a` | -| `{"users":["alice","bob"],"version":3}` | `+6version,7bob,3alice,5;cusers,5:w` | +| Value | RX | +|---------------------------|------------------------------------------| +| `"/docs/getting-started"` | `` | +| `"/docs/encoding"` | `` (`^k` → `/docs/` above) | --- ## Sharing and random access -### Pointer — `^` - -```ebnf -pointer = "^" , varint ; -``` +### Pointer `^` -A pointer refers to an earlier value by **backward delta** — the distance in bytes from the pointer's tag position back to the target value's right edge. To resolve: `target = tag_position - delta`, then read the value at that offset. +Backward delta in bytes from the left of the pointer's tag to the target's right edge: -Pointers enable: -- **Value deduplication** — identical strings, maps, or subtrees are written once -- **Schema sharing** — maps with the same keys reference a shared key layout - -### Chain — `.` - -```ebnf -chain = { value } , "." , varint ; +``` +target_right = tag_position - delta ``` -A chain is a **concatenated string** built from segments. Each segment is itself a value — typically a string, pointer, or another chain. The varint gives the total byte size of the segments. +Parse R-to-L from `target_right`. Pointers enable value deduplication, schema sharing, chain prefix sharing, and cross-revision dedup. -Chains compress keys with shared prefixes. For example, `/docs/getting-started` and `/docs/encoding` might share a `/docs/` prefix segment via a pointer, with only the suffix differing. +| JSON | RX | +|-------------------------------------------|------------------------| +| `["word","salad","word","salad","salad"]` | `[salad,5^word,4^7^2]` | -### Index — `#` +The first `"salad"` is written once at the left; each duplicate becomes a pointer back to an earlier copy. `^` with an empty varint is a valid pointer with delta 0 — used when the duplicate sits immediately to the right of its source. -```ebnf -index = { index_entry } , "#" , varint ; -index_entry = b64 , { b64 } ; -``` +### Index `#` -An index is a lookup table attached to a container (list or map). It appears inside the container body, between the content and the container's tag. +Lookup table for a container, appearing as the rightmost child inside the container body. -The compound varint packs two values: +The compound varint packs: ``` compound = (count << 3) | (width - 1) ``` -- **Low 3 bits** → `width - 1` (digits per entry, supporting widths 1–8) -- **Upper bits** → `count` (number of entries) - -Each entry is a fixed-width base64 number giving the backward delta from the container content boundary (the left edge of the index table) to the corresponding child's right edge. The base is shared by all entries; each delta is not relative to its own pointer position. - -- **Indexed Lists**: entries point to values in element order. `O(1)` lookup. - - ```js - [1,2,3] // Sample array, encoded with forced indices. - ``` +Each entry is a fixed-width b64 backward delta from the **index base** (the right edge of the child immediately to the left of the index). To resolve entry *i*: `target_right = index_base - entry[i]`. - The index is `024#o`. This is 3 pointers `[0,2,4]` and `#o` for the packed config `{width:1,count:3}`. +Entries are stored in reverse natural order so that R-to-L scanning yields them forward — the rightmost entry holds the delta for element 0 (arrays) or for the first sorted key (objects). For random access, entry *i* sits at position `ix_tag - (i+1) * width`, where `ix_tag` is the position of the `#` byte. - ```rx - +6+4+2024#o;b - ``` +| Container | R-to-L entry order | Access | +|-----------|--------------------|------------------------| +| Array | element order | O(1) | +| Object | UTF-8 key order | O(log n) binary search | - -- **Indexed Maps**: entries point to keys, sorted in UTF-8 byte order. `O(log2 n)` lookup. +Examples: - ```js - // Sample object, encoded with forced indices. - // Note the keys are not ordered. - {z:1,a:2,m:3} - ``` +``` +[+6+4+2420#o] → [1, 2, 3] R-to-L deltas [0, 2, 4], width 1 +{+6m,1+4a,1+2z,10a5#o} → {z:1, a:2, m:3} R-to-L sorted by key (body keeps insertion order) +``` - The index is `5a0#o`. This is 3 pointers `[5,10,0]` and the same `#o` packed index config. Note the indexes are not in order. This is because the keys in the index are sorted for fast binary search lookup. But the values in the actual object body preserve original order. +### Schema - ```rx - +6m,1+4a,1+2z,15a0#o:k - ``` +The schema tag `.` encodes a comma-delimited list of keys as the body. The varint is the body's byte length, same as a string. -- **Schema maps**: keys are external, so index entries point to values like a list. O(1) value lookup + whatever cost the key lookup in the external schema is. +Keys are stored in **reverse natural order** so that scanning the body R-to-L for delimiters yields keys in lockstep with R-to-L value parsing — the parser pairs each key with the next value as it goes. - ```json - [{"z":1,"a":2,"m":3},{"z":4,"a":5,"m":6}] - ``` +| Keys | RX schema | +|-----------------------|------------------| +| `["a"]` | `a.1` | +| `["a", "b"]` | `b,a.3` | +| `["color", "fruits"]` | `fruits,color.c` | - This was encoded with indices on all containers +A schema object stores only its values, with a schema reference as its rightmost child — either an inline schema (`.`) or a pointer (`^`) that resolves to one. - - `0f#g` on the outer list. Offsets `[0,15]` config `{count:2,width:1}`. - - `024#o^b` is pointer to right object with shared keys and index to local values list. - - `5a0#o` on the left object pointing to keys in sorted order. +A schema object cannot also carry an index; its values are read sequentially. Keys cannot contain commas (the schema delimiter). - ```rx - +cm,1+aa,1+8z,15a0#o:k+6+4+2024#o^b:d0f#g;F - ``` +The encoder detects shared key sets automatically. The first object with a given key set embeds the schema inline; subsequent objects with the same keys store only values plus a pointer to that schema. - In this document, one map provides the key layout and the other map stores only values plus a schema pointer. +```json +[{"z":1,"a":2,"m":3},{"z":4,"a":5,"m":6}] +``` -**Indexes enable:** -- **O(1) list access** — jump directly to the *N*th element -- **O(log n) key lookup** on non-schema maps — binary search on sorted keys -- **O(log n + m) prefix search** on non-schema maps — find the first matching key, then scan forward -- **O(1) value access by position** on schema maps +``` +[{+c+a+8m,a,z.5}{+6+4+2^8}] +``` -Without an index, list access and key lookup are O(n) linear scans. +- Left object embeds schema `m,a,z.5` (keys reversed) followed by its values. +- Right object stores only values plus `^8`, a pointer to the schema in the left object. -### Schema +Key lookup in a schema object is O(n) — scan the schema for the key's position, then walk N values to reach the corresponding value. -Maps can store their keys **separately from their values** using a schema reference. This is useful when many maps share the same key set (e.g., rows in a table-like structure). +### Bytes `@` -```ebnf -schema = pointer | ref ; -``` +A binary data value. The body is URL-safe b64 chars (no padding); the varint is the body's length in chars (same convention as a string's byte length). -A schema map stores only values in its content body. The schema node appears as the rightmost item inside the map (with the index, if present, farther to the left). The parser identifies it by tag: +Decoders b64-decode the body to recover the original bytes. The b64 alphabet is the same one RX uses for varints — `0-9 a-z A-Z - _` — so the body is parser-safe: the parser hits `@` before scanning into the body, identical to how strings work. -- **Pointer schema** (`^`) — points to another map or list whose keys become this map's keys -- **Ref schema** (`'`) — names an external dictionary entry containing the key list +Decoded byte count derives from the body length L: -The encoder detects shared key sets **automatically**. The first map with a given key set is encoded normally; subsequent maps with the same keys store only their values and a pointer back to the first map's key layout. +| L mod 4 | Decoded bytes | +|---------|----------------------| +| 0 | L × 3 / 4 | +| 2 | (3L − 2) / 4 | +| 3 | (3L − 1) / 4 | +| 1 | invalid (no padding) | -Lookup cost for schema maps depends on the schema source: +| Decoded | Body chars | Total RX | +|------------|-----------:|-------------------------| +| `[]` (0B) | 0 | `@` (1 byte) | +| 1 byte | 2 | `[b64×2]@2` (4 bytes) | +| 16B (UUID) | 22 | `[b64×22]@m` (24 bytes) | +| 32B (hash) | 43 | `[b64×43]@H` (45 bytes) | -- If the schema points to an **indexed object**, key lookup in the schema is **O(log n)**. - Combined with indexed value access in the schema map (**O(1)**), total lookup remains **O(log n)**. -- If the schema points to a **list of keys**, key lookup is **O(n)** (key order is not assumed sorted). - Combined with indexed value access in the schema map (**O(1)**), total lookup is **O(n)**. +Use the bytes tag for hashes, UUIDs, signatures, encrypted payloads, image thumbnails, and other binary blobs that would otherwise be wrapped in a hex or base64 string. Tooling can render bytes nodes distinctly from strings (hex view, hex-dump, raw bytes), and applications receive a typed bytes value (`Uint8Array`, `bytes`, `[]u8`, etc.) rather than a string they have to decode. ### External refs -Encoders and decoders can share an **external dictionary** of values. When a value matches a ref entry by structural equality, the encoder writes `'name` instead of embedding that value. The decoder looks up the name in the same dictionary to reconstruct the original value. +Encoders and decoders can share a dictionary of values. When a value matches a dictionary entry by structural equality, the encoder writes `'name` instead of embedding it; the decoder reconstructs from the same dictionary. -Pointers already deduplicate repeated embedded values within a document. Refs are primarily for opaque external values that are not directly serializable in RX/JSON, while still allowing agreed dictionary-backed reconstruction. They are also useful for values shared across multiple documents when both sides use the same external dictionary. +Pointers already dedup within a document. Refs are for: ---- +- Opaque values that aren't directly serializable +- Values shared across multiple documents -## Encoding example walkthrough +--- -Given this JSON: +## Append-only revisions -```json -{"users":["alice","bob"],"version":3} -``` +Publishing a new revision: -The RX encoding is: +1. Start with the existing bytes. +2. Append new bytes to the right. +3. The new root is whatever value the new rightmost bytes form. -```rx -+6version,7bob,3alice,5;cusers,5:w -``` +Appended content can point back to old content for free — backward deltas naturally span revisions because old bytes stay at their original offsets. Readers who want an older revision truncate at that byte offset; earlier bytes are still a valid RX document for that revision. -Reading **right-to-left**: +--- -| Step | Bytes | B64 | Tag | Decoded | -|------|-------------|----------|------------|------------------------------| -| 1 | `:w` | `w` = 32 | `:` map | content is 32 bytes wide | -| 2 | `users,5` | `5` = 5 | `,` string | "users" — key₁ | -| 3 | `;c` | `c` = 12 | `;` list | content is 12 bytes — value₁ | -| 4 | `alice,5` | `5` = 5 | `,` string | "alice" — list element₁ | -| 5 | `bob,3` | `3` = 3 | `,` string | "bob" — list element₂ | -| 6 | `version,7` | `7` = 7 | `,` string | "version" — key₂ | -| 7 | `+6` | `6` = 6 | `+` number | 6 → zigzag **3** — value₂ | +## Relationship to Rex Bytecode ---- +RX defines these eleven tags for data: -## Versioning +``` ++ * , . ' ^ # @ ] } > +``` -This document describes the current encoding used by `@creationix/rx`. The format originated as the internal bytecode for *rex* (a DSL for HTTP routing). Pointers, chains, and indexing are first-class concepts because the design prioritizes small encoded size and random access. +plus three opener markers (`[ { <`) that close container scans. -Future versions may add new tags or encoding features. The **tag character set** and **right-to-left reading direction** are stable. +REXT extends RX with bytecode tags for computation (variables, opcodes, calls, control flow) in a disjoint character set (`$ % ( ) ? & | : = …`) — these never appear in pure RX. Both formats share parse rules, so every RX document parses identically as a REXT document with no bytecode features. diff --git a/index.test.ts b/index.test.ts deleted file mode 100644 index 45bb61d..0000000 --- a/index.test.ts +++ /dev/null @@ -1,173 +0,0 @@ -import { describe, expect, test } from "vitest"; -import * as pkg from "./index.ts"; -import { - encode, - stringify, - decode, - parse, - open, - inspect, - handle, - makeCursor, - read, - readStr, - resolveStr, - prepareKey, - strEquals, - strCompare, - strHasPrefix, - seekChild, - collectChildren, - findKey, - findByPrefix, - rawBytes, - rxbEncode, - rxbDecode, - rxbOpen, - rxbHandle, - rxbMakeCursor, - rxbRead, - rxbReadStr, - rxbResolveStr, - rxbPrepareKey, - rxbStrEquals, - rxbStrCompare, - rxbStrHasPrefix, - rxbSeekChild, - rxbCollectChildren, - rxbFindKey, - rxbFindByPrefix, - rxbRawBytes, -} from "./index.ts"; - -describe("index: RX text format", () => { - const sample = { users: ["alice", "bob"], version: 3 }; - - test("stringify/parse round-trips", () => { - const s = stringify(sample); - expect(typeof s).toBe("string"); - const back = parse(s) as typeof sample; - expect(back.users[0]).toBe("alice"); - expect(back.version).toBe(3); - expect(JSON.parse(JSON.stringify(back))).toEqual(sample); - }); - - test("encode/open/decode round-trip", () => { - const buf = encode(sample); - expect(buf).toBeInstanceOf(Uint8Array); - const proxy = open(buf) as typeof sample; - expect(proxy.users[1]).toBe("bob"); - expect(decode(buf)).toEqual(proxy); - }); - - test("inspect returns an AST", () => { - const buf = encode(sample); - const root = inspect(buf); - expect(root.tag).toBe(":"); - expect(Array.from(root, (n) => n.tag).length).toBeGreaterThan(0); - }); - - test("handle exposes underlying buffer", () => { - const buf = encode(sample); - const proxy = open(buf) as { users: unknown }; - const h = handle(proxy.users); - expect(h?.data).toBeInstanceOf(Uint8Array); - expect(typeof h?.right).toBe("number"); - }); - - test("cursor API is present and usable", () => { - const buf = encode({ a: 1, b: 2, c: 3 }); - const c = makeCursor(buf); - read(c); - expect(c.tag).toBe("object"); - const key = prepareKey("b"); - expect(findKey(c, c, key)).toBe(true); - // Exercise the rest just to prove they're wired up. - void [ - readStr, - resolveStr, - strEquals, - strCompare, - strHasPrefix, - seekChild, - collectChildren, - findByPrefix, - rawBytes, - ].every((fn) => typeof fn === "function"); - }); -}); - -describe("index: RXB binary format", () => { - const sample = { users: ["alice", "bob"], version: 3 }; - - test("rxbEncode/rxbOpen/rxbDecode round-trip", () => { - const buf = rxbEncode(sample); - expect(buf).toBeInstanceOf(Uint8Array); - const proxy = rxbOpen(buf) as typeof sample; - expect(proxy.users[0]).toBe("alice"); - expect(proxy.version).toBe(3); - expect(rxbDecode(buf)).toEqual(proxy); - }); - - test("rxbHandle exposes the underlying buffer", () => { - const buf = rxbEncode(sample); - const proxy = rxbOpen(buf) as { users: unknown }; - const h = rxbHandle(proxy.users); - expect(h?.data).toBeInstanceOf(Uint8Array); - expect(typeof h?.right).toBe("number"); - }); - - test("rxb cursor API is present and usable", () => { - const buf = rxbEncode({ a: 1, b: 2, c: 3 }); - const c = rxbMakeCursor(buf); - rxbRead(c); - const key = rxbPrepareKey("b"); - expect(rxbFindKey(c, c, key)).toBe(true); - void [ - rxbReadStr, - rxbResolveStr, - rxbStrEquals, - rxbStrCompare, - rxbStrHasPrefix, - rxbSeekChild, - rxbCollectChildren, - rxbFindByPrefix, - rxbRawBytes, - ].every((fn) => typeof fn === "function"); - }); -}); - -describe("index: surface contract", () => { - test("rx and rxb encoders produce independent outputs", () => { - const value = { hello: "world" }; - const rxBuf = encode(value); - const rxbBuf = rxbEncode(value); - expect(rxBuf).not.toEqual(rxbBuf); - expect(decode(rxBuf)).toEqual(rxbDecode(rxbBuf)); - }); - - test("no rxb identifier leaks into the flat namespace", () => { - // RX format owns the flat names; rxb must only appear under the rxb* prefix. - const flatNames = Object.keys(pkg).filter( - (k) => !k.startsWith("rxb") && !k.startsWith("Rxb"), - ); - for (const name of ["encode", "decode", "parse", "stringify", "open", "inspect"]) { - expect(flatNames).toContain(name); - } - }); - - test("expected rxb-prefixed exports are present", () => { - for (const name of [ - "rxbEncode", - "rxbDecode", - "rxbOpen", - "rxbHandle", - "rxbMakeCursor", - "rxbRead", - "rxbFindKey", - ]) { - expect(pkg).toHaveProperty(name); - expect(typeof (pkg as Record)[name]).toBe("function"); - } - }); -}); diff --git a/index.ts b/index.ts index f8eb98c..2f3a58c 100644 --- a/index.ts +++ b/index.ts @@ -1,61 +1,8 @@ // Public npm entry for @creationix/rx. // -// Re-exports the RX text format (flat) and the RXB binary format (rxb-prefixed). -// Source files remain standalone and vendor-friendly — copy rx.ts + rx-read.ts -// or rxb.ts + rxb-read.ts directly if you want only one format without the -// npm dependency. +// Re-exports the RX text format. Source files remain standalone and +// vendor-friendly — copy rx.ts + rx-read.ts directly if you want to embed +// without the npm dependency. export * from "./rx.ts"; - -export { - type Tag, - type Cursor, - makeCursor, - read, - readStr, - resolveStr, - prepareKey, - strCompare, - strEquals, - strHasPrefix, - seekChild, - collectChildren, - findKey, - findByPrefix, - rawBytes, - open, - handle, - type ASTNode, - inspect, - type DecodeOptions, - decode, - parse, -} from "./rx-read.ts"; - -export { - type EncodeOptions as RxbEncodeOptions, - encode as rxbEncode, -} from "./rxb.ts"; - -export { - type Tag as RxbTag, - type Cursor as RxbCursor, - type Refs as RxbRefs, - type DecodeOptions as RxbDecodeOptions, - makeCursor as rxbMakeCursor, - read as rxbRead, - readStr as rxbReadStr, - resolveStr as rxbResolveStr, - prepareKey as rxbPrepareKey, - strCompare as rxbStrCompare, - strEquals as rxbStrEquals, - strHasPrefix as rxbStrHasPrefix, - seekChild as rxbSeekChild, - collectChildren as rxbCollectChildren, - findKey as rxbFindKey, - findByPrefix as rxbFindByPrefix, - rawBytes as rxbRawBytes, - open as rxbOpen, - handle as rxbHandle, - decode as rxbDecode, -} from "./rxb-read.ts"; +export * from "./rx-read.ts"; diff --git a/package-lock.json b/package-lock.json index b11ba4e..6523818 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@creationix/rx", - "version": "0.8.0", + "version": "0.9.2", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@creationix/rx", - "version": "0.8.0", + "version": "0.9.2", "license": "MIT", "bin": { "rx": "dist/rx-cli.js" diff --git a/rx-cli.test.ts b/rx-cli.test.ts deleted file mode 100644 index 197abf0..0000000 --- a/rx-cli.test.ts +++ /dev/null @@ -1,558 +0,0 @@ -import { describe, test, expect, beforeAll, afterAll } from "bun:test"; -import { writeFileSync, mkdtempSync, rmSync, readFileSync, existsSync } from "node:fs"; -import { join } from "node:path"; -import { tmpdir } from "node:os"; - -// End-to-end CLI tests. Each case spawns the CLI as a subprocess and asserts -// on stdout, stderr, and exit code. The CLI runs directly from source via -// `bun rx-cli.ts` so tests reflect current code without a build step. - -const CLI = join(import.meta.dir, "rx-cli.ts"); - -type RunResult = { stdout: string; stderr: string; exitCode: number }; - -function run(args: string[], input?: string, env?: Record): RunResult { - const proc = Bun.spawnSync({ - cmd: ["bun", CLI, ...args], - stdin: input === undefined ? "ignore" : new TextEncoder().encode(input), - stdout: "pipe", - stderr: "pipe", - env: { ...process.env, NO_COLOR: "1", ...env }, - }); - return { - stdout: new TextDecoder().decode(proc.stdout), - stderr: new TextDecoder().decode(proc.stderr), - exitCode: proc.exitCode ?? -1, - }; -} - -let dir: string; -let jsonFile: string, rxFile: string, rxbFile: string; - -const SAMPLE = { - name: "test", - count: 42, - flag: true, - tags: ["alpha", "beta", "gamma"], - nested: { a: 1, b: 2, c: [10, 20, 30] }, -}; - -beforeAll(() => { - dir = mkdtempSync(join(tmpdir(), "rx-cli-test-")); - jsonFile = join(dir, "sample.json"); - rxFile = join(dir, "sample.rx"); - rxbFile = join(dir, "sample.rxb"); - writeFileSync(jsonFile, JSON.stringify(SAMPLE)); - // Build rx and rxb fixtures by round-tripping through the CLI itself - // (chicken-and-egg on first run is fine — the tests below also validate this) - const r1 = run(["convert", jsonFile, rxFile]); - if (r1.exitCode !== 0) throw new Error(`fixture convert failed: ${r1.stderr}`); - const r2 = run(["convert", jsonFile, rxbFile]); - if (r2.exitCode !== 0) throw new Error(`fixture convert rxb failed: ${r2.stderr}`); -}); - -afterAll(() => { rmSync(dir, { recursive: true, force: true }); }); - -describe("rx --version / --help", () => { - test("--version prints version", () => { - const r = run(["--version"]); - expect(r.exitCode).toBe(0); - expect(r.stdout).toMatch(/^rx \d+\.\d+\.\d+\n$/); - }); - - test("-v prints version", () => { - const r = run(["-v"]); - expect(r.exitCode).toBe(0); - expect(r.stdout).toMatch(/^rx \d+\.\d+\.\d+\n$/); - }); - - test("--help shows top-level help with all core commands", () => { - const r = run(["--help"]); - expect(r.exitCode).toBe(0); - expect(r.stdout).toContain("rx — convert"); - expect(r.stdout).toContain("show"); - expect(r.stdout).toContain("convert"); - expect(r.stdout).toContain("FORMATS"); - expect(r.stdout).toContain("RX_FORMAT"); - // `get` was merged into `show` — must NOT appear as a subcommand - expect(r.stdout).not.toMatch(/^\s+get\s/m); - }); - - test("no args with stdin TTY-less shows help", () => { - const r = run(["--help"]); - expect(r.exitCode).toBe(0); - expect(r.stdout).toContain("USAGE"); - }); - - test("help --all surfaces advanced commands", () => { - const r = run(["help", "--all"]); - expect(r.exitCode).toBe(0); - expect(r.stdout).toContain("ADVANCED COMMANDS"); - expect(r.stdout).toContain("inspect"); - expect(r.stdout).toContain("stats"); - expect(r.stdout).toContain("demo"); - expect(r.stdout).toContain("completions"); - }); - - test("help COMMAND shows subcommand help", () => { - for (const sub of ["show", "convert", "inspect", "stats", "demo", "completions"]) { - const r = run(["help", sub]); - expect(r.exitCode).toBe(0); - expect(r.stdout).toContain(`rx ${sub}`); - expect(r.stdout).toContain("USAGE"); - } - }); - - test("COMMAND --help matches help COMMAND", () => { - for (const sub of ["show", "convert"]) { - const a = run([sub, "--help"]); - const b = run(["help", sub]); - expect(a.exitCode).toBe(0); - expect(a.stdout).toBe(b.stdout); - } - }); - - // Without FORCE_COLOR, subprocess stdout isn't a TTY → both paths render - // un-colored and the equality above is met trivially. These tests force - // color on to verify both paths actually apply the theme. - test("FORCE_COLOR: top-level help emits ANSI codes", () => { - const r = run(["--help"], undefined, { FORCE_COLOR: "1", NO_COLOR: "" }); - expect(r.exitCode).toBe(0); - expect(r.stdout).toMatch(/\x1b\[/); - }); - - test("FORCE_COLOR: subcommand --help emits ANSI codes", () => { - const r = run(["show", "--help"], undefined, { FORCE_COLOR: "1", NO_COLOR: "" }); - expect(r.exitCode).toBe(0); - expect(r.stdout).toMatch(/\x1b\[/); - }); - - test("FORCE_COLOR: `rx SUB --help` and `rx help SUB` are byte-identical", () => { - for (const sub of ["show", "convert"]) { - const a = run([sub, "--help"], undefined, { FORCE_COLOR: "1", NO_COLOR: "" }); - const b = run(["help", sub], undefined, { FORCE_COLOR: "1", NO_COLOR: "" }); - expect(a.stdout).toBe(b.stdout); - expect(a.stdout).toMatch(/\x1b\[/); // confirm it's actually colored - } - }); - - test("NO_COLOR takes precedence over TTY but FORCE_COLOR beats both", () => { - // NO_COLOR alone → no color even with FORCE_COLOR unset (TTY is false in subprocess) - const r1 = run(["--help"], undefined, { NO_COLOR: "1" }); - expect(r1.stdout).not.toMatch(/\x1b\[/); - // FORCE_COLOR wins against NO_COLOR - const r2 = run(["--help"], undefined, { FORCE_COLOR: "1", NO_COLOR: "1" }); - expect(r2.stdout).toMatch(/\x1b\[/); - }); - - test("`get` is no longer a subcommand", () => { - const r = run(["help", "get"]); - expect(r.exitCode).toBe(2); - expect(r.stderr).toContain("unknown command 'get'"); - }); - - test("help typo suggests correct command", () => { - const r = run(["help", "covert"]); - expect(r.exitCode).toBe(2); - expect(r.stderr).toContain("unknown command 'covert'"); - expect(r.stderr).toContain("did you mean 'convert'?"); - }); -}); - -describe("rx show / default action", () => { - test("bare FILE arg defaults to show", () => { - const r = run([jsonFile]); - expect(r.exitCode).toBe(0); - // Piped stdout (not TTY) → json default - expect(JSON.parse(r.stdout)).toEqual(SAMPLE); - }); - - test("rx show FILE -f json", () => { - const r = run(["show", rxFile, "-f", "json"]); - expect(r.exitCode).toBe(0); - expect(JSON.parse(r.stdout)).toEqual(SAMPLE); - }); - - test("rx show FILE -f tree (bare keys, no quotes)", () => { - const r = run(["show", rxFile, "-f", "tree"]); - expect(r.exitCode).toBe(0); - expect(r.stdout).toContain("name: \"test\""); - expect(r.stdout).toContain("count: 42"); - expect(r.stdout).not.toContain('"name":'); - }); - - test("rx show FILE -f rx emits rx text", () => { - const r = run(["show", jsonFile, "-f", "rx"]); - expect(r.exitCode).toBe(0); - expect(r.stdout.trim()).toBeTruthy(); - // Round-trip via convert - const tmp = join(dir, "roundtrip.rx"); - writeFileSync(tmp, r.stdout.trim()); - const back = run(["show", tmp, "-f", "json"]); - expect(JSON.parse(back.stdout)).toEqual(SAMPLE); - }); - - test("rx show - reads from stdin", () => { - const r = run(["show", "-", "-f", "json"], JSON.stringify(SAMPLE)); - expect(r.exitCode).toBe(0); - expect(JSON.parse(r.stdout)).toEqual(SAMPLE); - }); - - test("rx (no args) reads from stdin when piped", () => { - const r = run([], JSON.stringify(SAMPLE)); - expect(r.exitCode).toBe(0); - expect(JSON.parse(r.stdout)).toEqual(SAMPLE); - }); - - test("rx show auto-detects format from rx content", () => { - const rxText = readFileSync(rxFile, "utf8"); - const r = run(["show", "-", "-f", "json"], rxText); - expect(r.exitCode).toBe(0); - expect(JSON.parse(r.stdout)).toEqual(SAMPLE); - }); - - test("rx show auto-detects format from rxb content", () => { - const rxbBytes = readFileSync(rxbFile); - // Note: passing binary via stdin requires bytes, but run() takes string. Use file instead. - const r = run(["show", rxbFile, "-f", "json"]); - expect(r.exitCode).toBe(0); - expect(JSON.parse(r.stdout)).toEqual(SAMPLE); - }); - - test("extra positional after FILE is treated as a path segment, not a second file", () => { - // `rx show f1 f2` is `show f1` with segment "f2" — should error as a missing - // path key in the parsed value, not as "too many files". - const r = run(["show", jsonFile, "not-a-key"]); - expect(r.exitCode).toBe(2); - expect(r.stderr).toContain("rx show:"); - expect(r.stderr).toContain("not-a-key"); - expect(r.stderr).not.toContain("only one input"); - }); - - test("--no-color strips ANSI", () => { - const r = run(["show", rxFile, "-f", "tree", "--no-color"]); - expect(r.exitCode).toBe(0); - expect(r.stdout).not.toMatch(/\x1b\[/); - }); - - test("rx show -o writes to file", () => { - const out = join(dir, "out.json"); - const r = run(["show", rxFile, "-f", "json", "-o", out]); - expect(r.exitCode).toBe(0); - expect(r.stdout).toBe(""); - expect(JSON.parse(readFileSync(out, "utf8"))).toEqual(SAMPLE); - }); - - test("RX_FORMAT env pins default format", () => { - const r = run([jsonFile], undefined, { RX_FORMAT: "rx" }); - expect(r.exitCode).toBe(0); - // rx text has trailing capital letter tag — just check it's not JSON - expect(() => JSON.parse(r.stdout)).toThrow(); - }); -}); - -describe("rx convert", () => { - test("JSON → rx file", () => { - const out = join(dir, "a.rx"); - const r = run(["convert", jsonFile, out]); - expect(r.exitCode).toBe(0); - expect(existsSync(out)).toBe(true); - // Round-trip back - const back = run(["show", out, "-f", "json"]); - expect(JSON.parse(back.stdout)).toEqual(SAMPLE); - }); - - test("JSON → rxb file", () => { - const out = join(dir, "b.rxb"); - const r = run(["convert", jsonFile, out]); - expect(r.exitCode).toBe(0); - const back = run(["show", out, "-f", "json"]); - expect(JSON.parse(back.stdout)).toEqual(SAMPLE); - }); - - test("rx → JSON file", () => { - const out = join(dir, "c.json"); - const r = run(["convert", rxFile, out]); - expect(r.exitCode).toBe(0); - expect(JSON.parse(readFileSync(out, "utf8"))).toEqual(SAMPLE); - }); - - test("rxb → JSON file", () => { - const out = join(dir, "d.json"); - const r = run(["convert", rxbFile, out]); - expect(r.exitCode).toBe(0); - expect(JSON.parse(readFileSync(out, "utf8"))).toEqual(SAMPLE); - }); - - test("rx → rxb file (re-encode)", () => { - const out = join(dir, "e.rxb"); - const r = run(["convert", rxFile, out]); - expect(r.exitCode).toBe(0); - const back = run(["show", out, "-f", "json"]); - expect(JSON.parse(back.stdout)).toEqual(SAMPLE); - }); - - test("stdin → file (content-detected)", () => { - const out = join(dir, "f.rx"); - const r = run(["convert", "-", out], JSON.stringify(SAMPLE)); - expect(r.exitCode).toBe(0); - const back = run(["show", out, "-f", "json"]); - expect(JSON.parse(back.stdout)).toEqual(SAMPLE); - }); - - test("file → stdout with --to", () => { - const r = run(["convert", rxFile, "-", "--to", "json"]); - expect(r.exitCode).toBe(0); - expect(JSON.parse(r.stdout)).toEqual(SAMPLE); - }); - - test("stdin → stdout with --from and --to", () => { - const r = run(["convert", "-", "-", "--from", "json", "--to", "rx"], JSON.stringify(SAMPLE)); - expect(r.exitCode).toBe(0); - expect(r.stdout.trim()).toBeTruthy(); - }); - - test("missing DST errors with example", () => { - const r = run(["convert", jsonFile]); - expect(r.exitCode).toBe(2); - expect(r.stderr).toContain("expects 2 positional arguments"); - expect(r.stderr).toContain("example: rx convert"); - }); - - test("unknown extension errors", () => { - const r = run(["convert", "foo.xyz", "bar.rx"]); - expect(r.exitCode).toBe(2); - expect(r.stderr).toContain("cannot infer input format"); - expect(r.stderr).toContain("--from"); - }); - - test("stdout without --to errors", () => { - const r = run(["convert", rxFile, "-"]); - expect(r.exitCode).toBe(2); - expect(r.stderr).toContain("cannot infer output format"); - expect(r.stderr).toContain("--to"); - }); - - test("--tune-dedup-limit 0 produces different output", () => { - const a = join(dir, "tune-a.rx"), b = join(dir, "tune-b.rx"); - const r1 = run(["convert", jsonFile, a]); - const r2 = run(["convert", jsonFile, b, "--tune-dedup-limit", "0"]); - expect(r1.exitCode).toBe(0); - expect(r2.exitCode).toBe(0); - // Same decoded value - const back1 = JSON.parse(run(["show", a, "-f", "json"]).stdout); - const back2 = JSON.parse(run(["show", b, "-f", "json"]).stdout); - expect(back1).toEqual(back2); - }); - - test("unknown flag errors", () => { - const r = run(["convert", jsonFile, rxFile, "--boom"]); - expect(r.exitCode).toBe(2); - expect(r.stderr).toContain("unknown option: --boom"); - }); -}); - -describe("rx show with path segments (formerly `get`)", () => { - test("extracts string at path", () => { - const r = run(["show", rxFile, "name", "-f", "json"]); - expect(r.exitCode).toBe(0); - expect(JSON.parse(r.stdout)).toBe("test"); - }); - - test("extracts number at path", () => { - const r = run(["show", rxFile, "count", "-f", "json"]); - expect(r.exitCode).toBe(0); - expect(JSON.parse(r.stdout)).toBe(42); - }); - - test("extracts nested path with array index", () => { - const r = run(["show", rxFile, "nested", "c", "1", "-f", "json"]); - expect(r.exitCode).toBe(0); - expect(JSON.parse(r.stdout)).toBe(20); - }); - - test("extracts subtree", () => { - const r = run(["show", rxFile, "tags", "-f", "json"]); - expect(r.exitCode).toBe(0); - expect(JSON.parse(r.stdout)).toEqual(["alpha", "beta", "gamma"]); - }); - - test("bare-file shortcut accepts segments", () => { - const r = run([rxFile, "tags", "1", "-f", "json"]); - expect(r.exitCode).toBe(0); - expect(JSON.parse(r.stdout)).toBe("beta"); - }); - - test("missing key error includes path", () => { - const r = run(["show", rxFile, "nested", "nope"]); - expect(r.exitCode).toBe(2); - expect(r.stderr).toContain("rx show:"); - expect(r.stderr).toContain("nested, nope"); - expect(r.stderr).toContain("not found"); - }); - - test("out-of-range index error includes length", () => { - const r = run(["show", rxFile, "tags", "99"]); - expect(r.exitCode).toBe(2); - expect(r.stderr).toContain("rx show:"); - expect(r.stderr).toContain("index 99 out of range"); - expect(r.stderr).toContain("3-element array"); - }); - - test("can't-index error includes type", () => { - const r = run(["show", rxFile, "count", "foo"]); - expect(r.exitCode).toBe(2); - expect(r.stderr).toContain("cannot index into number"); - }); - - test("-f rx output of subtree round-trips", () => { - const r = run(["show", rxFile, "nested", "-f", "rx"]); - expect(r.exitCode).toBe(0); - const tmp = join(dir, "sub.rx"); - writeFileSync(tmp, r.stdout.trim()); - const back = run(["show", tmp, "-f", "json"]); - expect(JSON.parse(back.stdout)).toEqual(SAMPLE.nested); - }); - - test("reads from stdin with segments", () => { - const r = run(["show", "-", "count", "-f", "json"], JSON.stringify(SAMPLE)); - expect(r.exitCode).toBe(0); - expect(JSON.parse(r.stdout)).toBe(42); - }); -}); - -describe("advanced commands", () => { - test("rx inspect emits AST JSON", () => { - const r = run(["inspect", rxFile]); - expect(r.exitCode).toBe(0); - const ast = JSON.parse(r.stdout); - expect(ast.tag).toBeTruthy(); - expect(Array.isArray(ast.children)).toBe(true); - }); - - test("rx stats shows all three formats", () => { - const r = run(["stats", jsonFile]); - expect(r.exitCode).toBe(0); - expect(r.stdout).toContain("json:"); - expect(r.stdout).toContain("rx text:"); - expect(r.stdout).toContain("rxb binary:"); - expect(r.stdout).toContain("bytes"); - }); - - test("rx demo shows all formats", () => { - const r = run(["demo"]); - expect(r.exitCode).toBe(0); - expect(r.stdout).toContain("Tree view"); - expect(r.stdout).toContain("JSON"); - expect(r.stdout).toContain("rx text"); - expect(r.stdout).toContain("rxb binary"); - }); - - test("rx completions zsh emits script", () => { - const r = run(["completions", "zsh"]); - expect(r.exitCode).toBe(0); - expect(r.stdout).toContain("#compdef rx"); - }); - - test("rx completions bash emits script", () => { - const r = run(["completions", "bash"]); - expect(r.exitCode).toBe(0); - expect(r.stdout).toContain("complete -o default"); - }); - - test("completions --complete (no words) lists subcommands", () => { - const r = run(["completions", "--complete", "--", ""]); - expect(r.exitCode).toBe(0); - const lines = r.stdout.trim().split("\n"); - expect(lines).toContain("show"); - expect(lines).toContain("convert"); - expect(lines).not.toContain("get"); - }); - - test("completions --complete completes flags after subcommand", () => { - const r = run(["completions", "--complete", "--", "show", "-"]); - expect(r.exitCode).toBe(0); - expect(r.stdout).toContain("--format"); - expect(r.stdout).toContain("--width"); - }); - - test("completions --complete completes format values after -f", () => { - const r = run(["completions", "--complete", "--", "show", "-f", ""]); - expect(r.exitCode).toBe(0); - const lines = r.stdout.trim().split("\n"); - expect(lines).toEqual(expect.arrayContaining(["tree", "json", "rx", "rxb"])); - }); - - test("top-level './' triggers file completion", () => { - // dir contains sample.json/rx/rxb fixtures created in beforeAll - const r = run(["completions", "--complete", "--", "./"], undefined, {}); - expect(r.exitCode).toBe(0); - // Should contain at least some of the data extensions when run from cwd - // (cwd here is rx project root, which has buildinfo.* files) - const lines = r.stdout.trim().split("\n"); - expect(lines.some(l => l.endsWith(".json") || l.endsWith(".rx") || l.endsWith(".rxb") || l.endsWith("/"))).toBe(true); - }); - - test("top-level word matching no subcommand falls back to files", () => { - // "buildinfo" doesn't match any subcommand → should yield buildinfo.* files - const r = run(["completions", "--complete", "--", "buildinfo"]); - expect(r.exitCode).toBe(0); - const lines = r.stdout.trim().split("\n").filter(Boolean); - expect(lines.length).toBeGreaterThan(0); - expect(lines.every(l => l.startsWith("buildinfo"))).toBe(true); - }); - - test("top-level subcommand prefix takes precedence over files", () => { - // "s" matches subcommands (show, stats) — should NOT include any files even - // if there's a file starting with "s" in cwd - const r = run(["completions", "--complete", "--", "s"]); - expect(r.exitCode).toBe(0); - const lines = r.stdout.trim().split("\n"); - expect(lines).toContain("show"); - expect(lines).toContain("stats"); - // All entries must be subcommands (no path separators, no extensions) - for (const l of lines) { - expect(l).not.toContain("/"); - expect(l).not.toContain("."); - } - }); -}); - -describe("backwards-compat completion shim", () => { - // Shell scripts installed by rx <= 0.8.x invoke `rx --completions -- `. - // New scripts use `rx completions --complete -- `. Both must work so - // users upgrading to 0.9+ without reinstalling get sane completions. - test("--completions -- (no words) lists subcommands", () => { - const r = run(["--completions", "--", ""]); - expect(r.exitCode).toBe(0); - const lines = r.stdout.trim().split("\n"); - expect(lines).toContain("show"); - expect(lines).toContain("convert"); - }); - - test("--completions -- 'show' lists subcommand match", () => { - const r = run(["--completions", "--", "show"]); - expect(r.exitCode).toBe(0); - expect(r.stdout.trim()).toBe("show"); - }); - - test("--completions zsh emits script", () => { - const r = run(["--completions", "zsh"]); - expect(r.exitCode).toBe(0); - expect(r.stdout).toContain("#compdef rx"); - }); -}); - -describe("top-level errors", () => { - test("unknown subcommand errors with suggestion", () => { - const r = run(["--boom"]); - expect(r.exitCode).toBe(2); - expect(r.stderr).toContain("unknown command or option"); - }); - - test("missing file errors cleanly", () => { - const r = run([join(dir, "does-not-exist.rx")]); - expect(r.exitCode).toBe(1); - expect(r.stderr).toContain("rx:"); - }); -}); diff --git a/rx-cli.ts b/rx-cli.ts index cf3d698..fa13c21 100755 --- a/rx-cli.ts +++ b/rx-cli.ts @@ -1,14 +1,11 @@ +import { stringify, tune } from "./rx.ts"; + +// Snapshot the active defaults for help text. `tune({})` is a no-op call that +// returns the current values (no per-call constants are exported). +const DEFAULTS = tune({}); import { - stringify, encode, - tune, - INDEX_THRESHOLD, STRING_CHAIN_THRESHOLD, STRING_CHAIN_DELIMITER, DEDUP_COMPLEXITY_LIMIT, -} from "./rx.ts"; -import { - open, inspect, - makeCursor, read, + decode, } from "./rx-read.ts"; -import { encode as rxbEncode } from "./rxb.ts"; -import { open as rxbOpen } from "./rxb-read.ts"; import { readdirSync } from "node:fs"; import { readFile, writeFile, mkdir, unlink, lstat } from "node:fs/promises"; import { homedir } from "node:os"; @@ -49,30 +46,24 @@ function applyTheme(color: boolean) { // ── Formats & detection ────────────────────────────────────────────────────── -type Format = "json" | "rx" | "rxb"; +type Format = "json" | "rx"; type OutputFormat = Format | "tree"; -const VALID_FORMATS: readonly OutputFormat[] = ["json", "rx", "rxb", "tree"] as const; +const VALID_FORMATS: readonly OutputFormat[] = ["json", "rx", "tree"] as const; function formatFromExt(path: string): Format | undefined { if (path.endsWith(".json")) return "json"; if (path.endsWith(".rx")) return "rx"; - if (path.endsWith(".rxb")) return "rxb"; return undefined; } // Content-based format detection for stdin / unknown extensions. function detectFormat(bytes: Uint8Array): Format { if (bytes.length === 0) return "rx"; - // rxb starts with a tag byte < 0x20 (control range) that JSON/rx never produce as first byte. - const first = bytes[0]!; - if (first < 0x20 && first !== 0x09 && first !== 0x0a && first !== 0x0d) return "rxb"; - // Try parsing as rx; if it consumes all bytes, it's rx. + // Try parsing as rx; if it succeeds, it's rx. try { - const trimmed = trimWhitespace(bytes); - const c = makeCursor(trimmed); - read(c); - if (c.left === 0) return "rx"; + decode(trimWhitespace(bytes)); + return "rx"; } catch { /* not rx */ } return "json"; } @@ -117,14 +108,12 @@ type ParsedInput = { value: unknown; inputFormat: Format; rxBytes?: Uint8Array; // present when input was rx - rxbBytes?: Uint8Array; // present when input was rxb }; function parseBytes(bytes: Uint8Array, format: Format): ParsedInput { - if (format === "rxb") return { value: rxbOpen(bytes), inputFormat: "rxb", rxbBytes: bytes }; if (format === "rx") { const trimmed = trimWhitespace(bytes); - return { value: open(trimmed), inputFormat: "rx", rxBytes: trimmed }; + return { value: decode(trimmed), inputFormat: "rx", rxBytes: trimmed }; } const text = new TextDecoder().decode(bytes); return { value: JSON.parse(stripJsonComments(text)), inputFormat: "json" }; @@ -297,7 +286,6 @@ function normalizeForJson(value: unknown, inArray: boolean): unknown { // Render `value` in the requested format, returning bytes to write. function render(value: unknown, format: OutputFormat, color: boolean, width: number): Uint8Array { - if (format === "rxb") return rxbEncode(value); if (format === "rx") return new TextEncoder().encode(stringify(value) + "\n"); if (format === "json") { const text = JSON.stringify(normalizeForJson(value, false), null, 2) ?? "null"; @@ -394,8 +382,8 @@ function parseFormatFlag(v: string | undefined, flag: string, subcmd: string): O function parseInputFormatFlag(v: string | undefined, flag: string, subcmd: string): Format { if (!v) fail(subcmd, `${flag} requires a value`, `example: ${flag} json`); - if (v !== "json" && v !== "rx" && v !== "rxb") { - fail(subcmd, `${flag} value '${v}' not recognized`, `expected one of: json | rx | rxb`); + if (v !== "json" && v !== "rx") { + fail(subcmd, `${flag} value '${v}' not recognized`, `expected one of: json | rx`); } return v; } @@ -421,13 +409,13 @@ ${tH2}USAGE${tR} ${tCmd}rx${tR} ${tArg}FILE${tR} [${tArg}SEGMENT${tR}...] ${tDesc}# shortcut, no subcommand needed${tR} ${tH2}ARGUMENTS${tR} - ${tArg}FILE${tR} Path to .json, .rx, or .rxb. Use ${tArg}-${tR} for stdin. + ${tArg}FILE${tR} Path to .json, .rx. Use ${tArg}-${tR} for stdin. Format auto-detected by extension then by content. ${tArg}SEGMENT${tR} One key or numeric index per segment. No segments = entire file. ${tH2}OPTIONS${tR} - ${tCmd}-f${tR}, ${tCmd}--format${tR} ${tArg}FMT${tR} Output format: ${tArg}tree${tR} | ${tArg}json${tR} | ${tArg}rx${tR} | ${tArg}rxb${tR} + ${tCmd}-f${tR}, ${tCmd}--format${tR} ${tArg}FMT${tR} Output format: ${tArg}tree${tR} | ${tArg}json${tR} | ${tArg}rx${tR} ${tCmd}-w${tR}, ${tCmd}--width${tR} ${tArg}N${tR} Target line width for tree output ${tDim}(default: 80)${tR} ${tCmd}-c${tR}, ${tCmd}--color${tR} Force ANSI color ${tCmd}--no-color${tR} Disable color @@ -497,7 +485,7 @@ async function runShow(argv: string[]): Promise { applyTheme(color); const parsed = await readSource(opts.file); const value = opts.segments.length > 0 ? applyPath(parsed.value, opts.segments) : parsed.value; - const bytes = render(value, format, color && format !== "rxb", opts.width); + const bytes = render(value, format, color, opts.width); if (opts.output) await writeFile(opts.output, bytes); else process.stdout.write(bytes); } @@ -506,7 +494,7 @@ async function runShow(argv: string[]): Promise { function helpConvert(): string { return ` -${tH1}rx convert${tR} — convert between JSON, rx, and rxb formats. +${tH1}rx convert${tR} — convert between JSON, rx formats. ${tH2}USAGE${tR} ${tCmd}rx convert${tR} ${tArg}SRC${tR} ${tArg}DST${tR} @@ -518,31 +506,30 @@ ${tH2}ARGUMENTS${tR} ${tArg}SRC${tR} Input path, or ${tArg}-${tR} for stdin ${tArg}DST${tR} Output path, or ${tArg}-${tR} for stdout - Extension determines format: ${tArg}.json${tR}, ${tArg}.rx${tR}, ${tArg}.rxb${tR}. + Extension determines format: ${tArg}.json${tR}, ${tArg}.rx${tR}, ${tArg}. When either side is ${tArg}-${tR}, pass ${tCmd}--from${tR} or ${tCmd}--to${tR} to set its format. ${tCmd}--from${tR} may be omitted: stdin is content-detected. ${tH2}OPTIONS${tR} - ${tCmd}--from${tR} ${tArg}FMT${tR} Input format: ${tArg}json${tR} | ${tArg}rx${tR} | ${tArg}rxb${tR} - ${tCmd}--to${tR} ${tArg}FMT${tR} Output format: ${tArg}json${tR} | ${tArg}rx${tR} | ${tArg}rxb${tR} - ${tCmd}--tune-index-threshold${tR} ${tArg}N${tR} Index objects/arrays larger than N ${tDim}(default: ${INDEX_THRESHOLD})${tR} - ${tCmd}--tune-chain-threshold${tR} ${tArg}N${tR} Split strings longer than N ${tDim}(default: ${STRING_CHAIN_THRESHOLD})${tR} - ${tCmd}--tune-chain-delimiter${tR} ${tArg}S${tR} Delimiters for chain splitting ${tDim}(default: ${STRING_CHAIN_DELIMITER})${tR} - ${tCmd}--tune-dedup-limit${tR} ${tArg}N${tR} Max node count for structural dedup ${tDim}(default: ${DEDUP_COMPLEXITY_LIMIT})${tR} + ${tCmd}--from${tR} ${tArg}FMT${tR} Input format: ${tArg}json${tR} | ${tArg}rx${tR} + ${tCmd}--to${tR} ${tArg}FMT${tR} Output format: ${tArg}json${tR} | ${tArg}rx${tR} + ${tCmd}--tune-index-threshold${tR} ${tArg}N${tR} Index objects/arrays with body bytes >= N ${tDim}(default: ${DEFAULTS.indexThreshold})${tR} + ${tCmd}--tune-chain-threshold${tR} ${tArg}N${tR} Split strings longer than N ${tDim}(default: ${DEFAULTS.stringChainThreshold})${tR} + ${tCmd}--tune-chain-delimiter${tR} ${tArg}S${tR} Delimiters for chain splitting ${tDim}(default: ${DEFAULTS.stringChainDelimiter})${tR} + ${tCmd}--tune-dedup-limit${tR} ${tArg}N${tR} Max node count for structural dedup ${tDim}(default: ${DEFAULTS.dedupComplexityLimit})${tR} + ${tCmd}--min-index-depth${tR} ${tArg}N${tR} Containers shallower than depth N always get an index ${tDim}(default: 0)${tR} + ${tCmd}--max-index-depth${tR} ${tArg}N${tR} Containers at depth N or deeper never get an index ${tDim}(default: ∞)${tR} + Root is depth 0. Use ${tArg}--min-index-depth 1 --max-index-depth 1${tR} to index only the root. ${tCmd}-h${tR}, ${tCmd}--help${tR} Show this help ${tH2}EXAMPLES${tR} ${tCmd}rx convert${tR} ${tArg}data.json${tR} ${tArg}data.rx${tR} ${tDesc}# JSON → rx${tR} - ${tCmd}rx convert${tR} ${tArg}data.json${tR} ${tArg}data.rxb${tR} ${tDesc}# JSON → rxb${tR} ${tCmd}rx convert${tR} ${tArg}data.rx${tR} ${tArg}data.json${tR} ${tDesc}# rx → JSON${tR} - ${tCmd}rx convert${tR} ${tArg}data.rxb${tR} ${tArg}data.json${tR} ${tDesc}# rxb → JSON${tR} - ${tCmd}rx convert${tR} ${tArg}data.rx${tR} ${tArg}data.rxb${tR} ${tDesc}# rx → rxb (re-encode)${tR} ${tCmd}cat${tR} ${tArg}data.json${tR} | ${tCmd}rx convert${tR} ${tArg}-${tR} ${tArg}data.rx${tR} ${tCmd}rx convert${tR} ${tArg}data.rx${tR} ${tArg}-${tR} ${tCmd}--to${tR} ${tArg}json${tR} > ${tArg}data.json${tR} - ${tCmd}curl${tR} ${tArg}-s${tR} ${tArg}https://ex/api.json${tR} | ${tCmd}rx convert${tR} ${tArg}-${tR} ${tArg}snap.rxb${tR} - ${tCmd}rx convert${tR} ${tArg}big.json${tR} ${tArg}big.rxb${tR} ${tCmd}--tune-dedup-limit${tR} ${tArg}128${tR} + ${tCmd}rx convert${tR} ${tArg}big.json${tR} ${tArg}big.rx${tR} ${tCmd}--tune-dedup-limit${tR} ${tArg}128${tR} `; } @@ -555,6 +542,8 @@ type ConvertOpts = { tuneChain?: number; tuneDelim?: string; tuneDedup?: number; + minIndexDepth?: number; + maxIndexDepth?: number; }; function parseConvertArgs(argv: string[]): ConvertOpts { @@ -572,6 +561,8 @@ function parseConvertArgs(argv: string[]): ConvertOpts { opts.tuneDelim = v; continue; } if (arg === "--tune-dedup-limit") { opts.tuneDedup = parseIntFlag(argv[++i], arg, "convert"); continue; } + if (arg === "--min-index-depth") { opts.minIndexDepth = parseIntFlag(argv[++i], arg, "convert"); continue; } + if (arg === "--max-index-depth") { opts.maxIndexDepth = parseIntFlag(argv[++i], arg, "convert"); continue; } if (arg === "-") { positional.push("-"); continue; } if (arg.startsWith("-")) fail("convert", `unknown option: ${arg}`, `run 'rx convert --help' for usage`); positional.push(arg); @@ -593,7 +584,7 @@ async function runConvert(argv: string[]): Promise { if (!inFmt && opts.src !== "-") inFmt = formatFromExt(opts.src); if (!inFmt && opts.src !== "-") { fail("convert", `cannot infer input format from '${opts.src}'`, - `pass --from json|rx|rxb or use a .json/.rx/.rxb extension`); + `pass --from json|rx or use a .json/.rx extension`); } // (src === "-" and !inFmt): we'll content-detect inside readSource @@ -602,7 +593,7 @@ async function runConvert(argv: string[]): Promise { if (!outFmt && opts.dst !== "-") outFmt = formatFromExt(opts.dst); if (!outFmt) { fail("convert", `cannot infer output format for '${opts.dst}'`, - `pass --to json|rx|rxb or use a .json/.rx/.rxb extension`); + `pass --to json|rx or use a .json/.rx extension`); } tune({ @@ -613,10 +604,16 @@ async function runConvert(argv: string[]): Promise { }); const parsed = await readSource(opts.src, inFmt); - const bytes = render(parsed.value, outFmt, false, 80); - // render() adds a newline for text formats; for rxb and stdout-piped rx, that's fine. - // For file writes in rxb we want raw bytes: - const toWrite = outFmt === "rxb" ? rxbEncode(parsed.value) : bytes; + let toWrite: Uint8Array; + if (outFmt === "rx") { + const text = stringify(parsed.value, { + minIndexDepth: opts.minIndexDepth, + maxIndexDepth: opts.maxIndexDepth, + }); + toWrite = new TextEncoder().encode(text + "\n"); + } else { + toWrite = render(parsed.value, outFmt, false, 80); + } if (opts.dst === "-") process.stdout.write(toWrite); else await writeFile(opts.dst, toWrite); } @@ -702,9 +699,9 @@ async function runInspect(argv: string[]): Promise { const useColor = resolveColor(color, isTTY); applyTheme(useColor); const parsed = await readSource(file); - const rxBytes = parsed.rxBytes ?? encode(parsed.value); - const ast = inspect(rxBytes); - const text = JSON.stringify(ast, null, 2); + // AST inspector removed with the cursor-based reader rewrite. Fall back to + // rendering the decoded value as JSON. + const text = JSON.stringify(parsed.value, null, 2); const out = new TextEncoder().encode((useColor ? highlightJSON(text) : text) + "\n"); if (output) await writeFile(output, out); else process.stdout.write(out); @@ -739,14 +736,12 @@ async function runStats(argv: string[]): Promise { const parsed = await readSource(file); const jsonBytes = new TextEncoder().encode(JSON.stringify(parsed.value)).length; const rxBytes = parsed.rxBytes ? parsed.rxBytes.length : new TextEncoder().encode(stringify(parsed.value)).length; - const rxbBytes = parsed.rxbBytes ? parsed.rxbBytes.length : rxbEncode(parsed.value).length; const source = parsed.inputFormat; const pct = (n: number, base: number) => base === 0 ? "—" : `${((1 - n / base) * 100).toFixed(1)}% smaller`; process.stdout.write( `source format: ${source}\n` + `json: ${jsonBytes.toLocaleString()} bytes\n` + - `rx text: ${rxBytes.toLocaleString()} bytes (${pct(rxBytes, jsonBytes)} than json)\n` + - `rxb binary: ${rxbBytes.toLocaleString()} bytes (${pct(rxbBytes, jsonBytes)} than json, ${pct(rxbBytes, rxBytes)} than rx)\n`, + `rx text: ${rxBytes.toLocaleString()} bytes (${pct(rxBytes, jsonBytes)} than json)\n`, ); } @@ -754,13 +749,12 @@ async function runStats(argv: string[]): Promise { function helpDemo(): string { return ` -${tH1}rx demo${tR} — show an example value in all three formats side by side. +${tH1}rx demo${tR} — show an example value in JSON, rx text, and tree form. ${tH2}USAGE${tR} ${tCmd}rx demo${tR} -Prints a built-in sample value in JSON, rx text, and rxb binary form. -Useful for learning what the formats look like. +Prints a built-in sample value to demonstrate what the formats look like. `; } @@ -781,24 +775,12 @@ async function runDemo(argv: string[]): Promise { flags: { cache: true, compress: true }, }; const rxText = stringify(sample); - const rxbBytes = rxbEncode(sample); const jsonText = JSON.stringify(sample, null, 2); const tree = treeStringify(sample, 80); const w = (title: string, body: string) => `${tH2}${title}${tR}\n${body}\n\n`; process.stdout.write("\n" + w("Tree view", color ? tree.split("\n").map(highlightTree).join("\n") : tree)); process.stdout.write(w("JSON", color ? highlightJSON(jsonText) : jsonText)); process.stdout.write(w(`rx text (${new TextEncoder().encode(rxText).length} bytes)`, rxText)); - process.stdout.write(w(`rxb binary (${rxbBytes.length} bytes, shown as hex)`, hex(rxbBytes))); -} - -function hex(bytes: Uint8Array): string { - let out = ""; - for (let i = 0; i < bytes.length; i += 16) { - const row: string[] = []; - for (let j = 0; j < 16 && i + j < bytes.length; j++) row.push(bytes[i + j]!.toString(16).padStart(2, "0")); - out += row.join(" ") + "\n"; - } - return out; } // ── Subcommand: completions ────────────────────────────────────────────────── @@ -923,11 +905,11 @@ async function handleCompleteRequest(words: string[]): Promise { return; } if (prev === "-f" || prev === "--format") { - process.stdout.write(["tree", "json", "rx", "rxb"].filter(s => s.startsWith(current)).join("\n") + "\n"); + process.stdout.write(["tree", "json", "rx"].filter(s => s.startsWith(current)).join("\n") + "\n"); return; } if (prev === "--from" || prev === "--to") { - process.stdout.write(["json", "rx", "rxb"].filter(s => s.startsWith(current)).join("\n") + "\n"); + process.stdout.write(["json", "rx"].filter(s => s.startsWith(current)).join("\n") + "\n"); return; } // File completion with data-extension priority @@ -945,7 +927,7 @@ const FLAGS_BY_SUB: Record = { help: ["--all"], }; -const DATA_EXTENSIONS = [".json", ".rx", ".rxb"]; +const DATA_EXTENSIONS = [".json", ".rx"]; function looksLikePath(s: string): boolean { if (s === "." || s === ".." || s === "~") return true; @@ -991,20 +973,20 @@ ${tH2}USAGE${tR} ${tH2}COMMANDS${tR} ${tCmd}show${tR} Pretty-print a file or a value at a path - ${tCmd}convert${tR} Convert between JSON, rx, and rxb + ${tCmd}convert${tR} Convert between JSON, rx ${tCmd}help${tR} Show help for a command (${tCmd}rx help${tR} ${tArg}COMMAND${tR}) ${tH2}FORMATS${tR} ${tArg}.json${tR} JSON text ${tArg}.rx${tR} rx text format (compact, human-readable) - ${tArg}.rxb${tR} rx binary format (smallest) + ${tArg} rx binary format (smallest) ${tH2}GLOBAL OPTIONS${tR} ${tCmd}-h${tR}, ${tCmd}--help${tR} Show this help ${tCmd}-v${tR}, ${tCmd}--version${tR} Print version (${VERSION}) ${tH2}ENVIRONMENT${tR} - ${tArg}RX_FORMAT${tR} Pin default output format (${tArg}tree${tR} | ${tArg}json${tR} | ${tArg}rx${tR} | ${tArg}rxb${tR}) + ${tArg}RX_FORMAT${tR} Pin default output format (${tArg}tree${tR} | ${tArg}json${tR} | ${tArg}rx${tR}) ${tArg}NO_COLOR${tR} Disable ANSI color when set ${tH2}EXAMPLES${tR} diff --git a/rx-read.ts b/rx-read.ts index 483c49b..93a8e32 100644 --- a/rx-read.ts +++ b/rx-read.ts @@ -1,1422 +1,470 @@ /////////////////////////////////////////////////////////////////// // -// RX Reader — cursor-based decoder and Proxy API for RX text data. +// RX Reader — decoder for the paired-delimiter RX format. // -// Provides zero-copy random access into RX-encoded buffers: -// read() — parse one node at a byte offset -// findKey() — O(log n) key lookup on indexed objects -// open() — returns a read-only Proxy that looks like plain JS -// decode() — alias for open() -// parse() — decode from a string -// inspect() — lazy AST for encoding structure analysis +// Top-level API: +// decode(data) — main entry; returns native or lazy-proxy values +// parse(text) — same, from a string +// open(data) — alias for decode (for symmetry with the encoder API) // -// Import the encoder from "./rx.ts". +// Decoding strategy: +// - Containers WITHOUT an index → eager decode to a native JS object/array +// - Containers WITH an index → lazy Proxy that defers child decoding +// until property access (uses the index +// for O(1) array element / O(log n) object +// key lookup) // -/////////////////////////////////////////////////////////////////// - -import { - b64Read, - b64decodeTable, - b64encodeTable, - isB64, - fromZigZag, - TAG_COMMA, - TAG_DOT, - TAG_COLON, - TAG_SEMI, - TAG_HASH, - TAG_CARET, - TAG_PLUS, - TAG_STAR, -} from "./rx.ts"; - -const textEncoder = new TextEncoder(); -const textDecoder = new TextDecoder(); - -// ── Tags ── - -export type Tag = - | "int" - | "float" - | "str" - | "ref" - | "true" - | "false" - | "null" - | "undef" - | "array" - | "object" - | "ptr" - | "chain"; - -// ── Cursor ── - -export interface Cursor { - data: Uint8Array; - left: number; - right: number; - tag: Tag; - val: number; - ixWidth: number; - ixCount: number; - schema: number; +////////////////////////////////////////////////////////////////// + +import { isB64, fromZigZag, b64decodeTable } from "./rx.ts"; + +const td = new TextDecoder(); +const te = new TextEncoder(); + +// ── Tag bytes ────────────────────────────────────────────────── +const T_PLUS = 43; // + +const T_STAR = 42; // * +const T_COMMA = 44; // , +const T_QUOTE = 39; // ' +const T_CARET = 94; // ^ +const T_HASH = 35; // # +const T_DOT = 46; // . +const T_AT = 64; // @ +const T_LBRACK = 91; // [ +const T_RBRACK = 93; // ] +const T_LBRACE = 123; // { +const T_RBRACE = 125; // } +const T_LANGLE = 60; // < +const T_RANGLE = 62; // > + +// ── b64 helpers ──────────────────────────────────────────────── + +function readB64(data: Uint8Array, start: number, end: number): number { + let v = 0; + for (let i = start; i < end; i++) v = v * 64 + b64decodeTable[data[i]!]!; + return v; } -export function makeCursor(data: Uint8Array): Cursor { - return { - data, - left: 0, - right: data.length, - tag: "null", - val: 0, - ixWidth: 0, - ixCount: 0, - schema: 0, - }; +// Find the tag for the value whose right edge is at `right` (exclusive). +function findTag(data: Uint8Array, right: number): { tag: number; tagPos: number } { + let p = right; + while (--p >= 0 && isB64(data[p]!)); + return { tag: data[p]!, tagPos: p }; } -// Internal scratch cursors — reused across calls to avoid allocations. -// Safe because JS is single-threaded and these functions don't re-enter each other. -const _empty = new Uint8Array(0); -const _k: Cursor = makeCursor(_empty); // key/temp cursor -const _s: Cursor = makeCursor(_empty); // schema cursor -const _cc: Cursor = makeCursor(_empty); // collectChildren cursor (separate from _k to avoid conflict with read()) -const _cmp: Cursor = makeCursor(_empty); // comparison scratch cursor (strCompare/strEquals/strHasPrefix) - -// ── Core parsing ── - -// Scan left from c.right past b64 digits. Sets c.left to the tag position. -// Returns the tag byte. b64 digits are at data[c.left+1 .. c.right). -function peekTag(c: Cursor): number { - const { data } = c; - let offset = c.right; - while (--offset >= 0 && isB64(data[offset]!)); - if (offset < 0) throw new SyntaxError("peekTag: no tag found"); - c.left = offset; - return data[offset]!; +// Read an index header ending at `right`. Returns null if no `#`-tagged node ends here. +interface IndexInfo { + count: number; + width: number; + entriesStart: number; + entriesEnd: number; + base: number; // delta reference point = entriesStart + leftEdge: number; } -// Unpack index metadata into cursor: low 3 bits = width-1, rest = count -function unpackIndex(c: Cursor, data: Uint8Array, left: number, right: number): void { - const packed = b64Read(data, left, right); - c.ixWidth = (packed & 0b111) + 1; - c.ixCount = packed >> 3; +function tryReadIndex(data: Uint8Array, right: number): IndexInfo | null { + const { tag, tagPos } = findTag(data, right); + if (tag !== T_HASH) return null; + const packed = readB64(data, tagPos + 1, right); + const width = (packed & 7) + 1; + const count = packed >>> 3; + const entriesEnd = tagPos; + const entriesStart = entriesEnd - count * width; + return { count, width, entriesStart, entriesEnd, base: entriesStart, leftEdge: entriesStart }; } -/** Read one node ending at c.right. Fills all cursor fields. Returns the tag. */ -export function read(c: Cursor): Tag { - const { data } = c; - let { right } = c; - - // Reset container fields - c.ixWidth = 0; - c.ixCount = 0; - c.schema = 0; - - // Find the tag: peekTag sets c.left to tag position - const tag = peekTag(c); - let { left } = c; - - if (tag === 0x27) { - // ' — ref or builtin - // Name bytes are at data[left+1..right), b64 digits overlap with name - const nameLen = right - left - 1; - // Check builtins by length + first byte - if (nameLen === 1) { - const ch = data[left + 1]!; - if (ch === 0x74) { c.tag = "true"; c.val = 0; return c.tag; } // t - if (ch === 0x66) { c.tag = "false"; c.val = 0; return c.tag; } // f - if (ch === 0x6e) { c.tag = "null"; c.val = 0; return c.tag; } // n - if (ch === 0x75) { c.tag = "undef"; c.val = 0; return c.tag; } // u - } else if (nameLen === 3) { - const a = data[left + 1]!, b = data[left + 2]!, d = data[left + 3]!; - if (a === 0x69 && b === 0x6e && d === 0x66) { c.tag = "float"; c.val = Infinity; return c.tag; } // inf - if (a === 0x6e && b === 0x69 && d === 0x66) { c.tag = "float"; c.val = -Infinity; return c.tag; } // nif - if (a === 0x6e && b === 0x61 && d === 0x6e) { c.tag = "float"; c.val = NaN; return c.tag; } // nan - } - c.val = nameLen; - return c.tag = "ref"; - } - - const b64 = b64Read(data, left + 1, right); - - switch (tag) { - case 0x2c: // , — string (most common) - c.left = left - b64; - c.val = b64; - return c.tag = "str"; - - case 0x2b: // + — integer - c.val = fromZigZag(b64); - return c.tag = "int"; - - case 0x2a: { // * — float (exponent) - const exp = fromZigZag(b64); - const savedRight = c.right; - c.right = left; - read(c); - c.val = parseFloat(`${c.val}e${exp}`); - c.right = savedRight; - return c.tag = "float"; - } - - case 0x3a: { // : — object - let content = left; - c.left = left - b64; - // Parse optional schema (rightmost), then optional index - if (content > c.left) { - _k.data = data; - _k.right = content; - let innerTag = peekTag(_k); - // Schema: ' (ref) or ^ (pointer to container) - if (innerTag === 0x27 || innerTag === 0x5e) { - let isSchema = true; - if (innerTag === 0x5e) { - const target = _k.left - b64Read(data, _k.left + 1, content); - _s.data = data; - _s.right = target; - const targetTag = peekTag(_s); - isSchema = targetTag === 0x3b || targetTag === 0x3a; - } - if (isSchema) { - c.schema = content; - content = _k.left; - } - } - // Index: # - if (content > c.left) { - _k.right = content; - innerTag = peekTag(_k); - if (innerTag === 0x23) { - unpackIndex(c, data, _k.left + 1, content); - content = _k.left - c.ixWidth * c.ixCount; - } - } - } - c.val = content; - return c.tag = "object"; - } - - case 0x3b: { // ; — array - let content = left; - c.left = left - b64; - // Check for index - if (content > c.left) { - _k.data = data; - _k.right = content; - const ixTag = peekTag(_k); - if (ixTag === 0x23) { // # - unpackIndex(c, data, _k.left + 1, content); - content = _k.left - c.ixWidth * c.ixCount; - } - } - c.val = content; - return c.tag = "array"; - } - - case 0x5e: // ^ — pointer - c.val = left - b64; - return c.tag = "ptr"; - - case 0x2e: // . — chain - c.left = left - b64; - c.val = left; - return c.tag = "chain"; - - default: - throw new SyntaxError(`Unknown tag: ${String.fromCharCode(tag)}`); - } +// Read entry i (natural order) of an index — returns the byte position of the +// right edge of the value/key being indexed. +function indexEntryTarget(data: Uint8Array, idx: IndexInfo, i: number): number { + // Entries are stored in REVERSE natural order; entry[i] sits at L-to-R + // position (count - 1 - i) in the entries region. + const off = idx.entriesStart + (idx.count - 1 - i) * idx.width; + const delta = readB64(data, off, off + idx.width); + return idx.base - delta; } -// ── String handling ── +// ── Refs ─────────────────────────────────────────────────────── -// String body start offset. For "str": body is at [left, left+val). -// For "ref": name is at [left+1, left+1+val) (skip the ' tag byte). -function strStart(c: Cursor): number { - return c.left + (c.tag === "ref" ? 1 : 0); +function decodeRefName(name: string): unknown { + switch (name) { + case "t": return true; + case "f": return false; + case "n": return null; + case "u": return undefined; + case "inf": return Infinity; + case "nif": return -Infinity; + case "nan": return NaN; + default: return Symbol.for(name); + } } -/** Decode the string at cursor position to a JS string. 1 allocation. */ -export function readStr(c: Cursor): string { - const start = strStart(c); - return textDecoder.decode(c.data.subarray(start, start + c.val)); +// ── Bytes ────────────────────────────────────────────────────── + +function decodeBytesBody(data: Uint8Array, bodyStart: number, bodyEnd: number): Uint8Array { + const L = bodyEnd - bodyStart; + if (L === 0) return new Uint8Array(0); + const m = L & 3; + let outLen: number; + if (m === 0) outLen = (L * 3) >> 2; + else if (m === 2) outLen = (3 * L - 2) >> 2; + else if (m === 3) outLen = (3 * L - 1) >> 2; + else throw new SyntaxError(`Bytes body length ${L} (mod 4 = 1) is invalid`); + const out = new Uint8Array(outLen); + let oi = 0; + let bits = 0; + let buf = 0; + for (let i = bodyStart; i < bodyEnd; i++) { + const v = b64decodeTable[data[i]!]!; + if (v === 0xff) throw new SyntaxError(`Invalid b64 char in bytes body at ${i}`); + buf = (buf << 6) | v; + bits += 6; + if (bits >= 8) { + bits -= 8; + out[oi++] = (buf >>> bits) & 0xff; + } + } + return out; } -/** Resolve a node to a string, following pointers and concatenating chains. - * For plain "str" nodes this is just readStr. - * Non-destructive: restores cursor state before returning. */ -export function resolveStr(c: Cursor): string { - const savedLeft = c.left, savedRight = c.right, savedTag = c.tag, savedVal = c.val; - const result = _resolveStr(c); - c.left = savedLeft; c.right = savedRight; c.tag = savedTag; c.val = savedVal; - return result; -} +// ── Decode ───────────────────────────────────────────────────── -function _resolveStr(c: Cursor): string { - while (c.tag === "ptr") { c.right = c.val; read(c); } - if (c.tag === "str") return readStr(c); - if (c.tag === "chain") { - const parts: string[] = []; - let right = c.val; - const left = c.left; - while (right > left) { - c.right = right; - read(c); - right = c.left; - parts.push(_resolveStr(c)); - } - return parts.join(""); - } - throw new TypeError(`resolveStr: expected str, ptr, or chain, got ${c.tag}`); +interface DecodeResult { + value: unknown; + leftEdge: number; } -/** Encode a string to UTF-8 bytes for use with strEquals/strCompare. */ -export function prepareKey(target: string): Uint8Array { - return textEncoder.encode(target); -} +function decodeNode(data: Uint8Array, right: number): DecodeResult { + const { tag, tagPos } = findTag(data, right); + const vStart = tagPos + 1; -/** - * Compare a node's string bytes against key bytes starting at offset. - * Handles str, ptr, and chain (zero-alloc for all). - * Returns { cmp, offset } where cmp is <0, 0, or >0 for the first difference, - * NaN if the node is not a string type, and offset is how far into the key bytes. - */ -function nodeCompare(c: Cursor, key: Uint8Array, offset: number): { cmp: number; offset: number } { - while (c.tag === "ptr") { c.right = c.val; read(c); } + switch (tag) { + case T_PLUS: + return { value: fromZigZag(readB64(data, vStart, right)), leftEdge: tagPos }; - if (c.tag === "str" || c.tag === "ref") { - const start = strStart(c); - const byteLen = c.val; - const { data } = c; - const len = Math.min(byteLen, key.length - offset); - for (let i = 0; i < len; i++) { - const diff = data[start + i]! - key[offset + i]!; - if (diff !== 0) return { cmp: diff, offset: offset + i }; + case T_STAR: { + const exp = fromZigZag(readB64(data, vStart, right)); + const base = decodeNode(data, tagPos); + return { value: (base.value as number) * Math.pow(10, exp), leftEdge: base.leftEdge }; } - if (byteLen > key.length - offset) return { cmp: 1, offset: key.length }; - return { cmp: 0, offset: offset + byteLen }; - } - if (c.tag === "chain") { - let right = c.val; - const left = c.left; - while (right > left) { - c.right = right; - read(c); - right = c.left; - const result = nodeCompare(c, key, offset); - if (result.cmp !== 0) return result; - offset = result.offset; + case T_COMMA: { + const len = readB64(data, vStart, right); + const bodyStart = tagPos - len; + return { value: td.decode(data.subarray(bodyStart, tagPos)), leftEdge: bodyStart }; } - return { cmp: 0, offset }; - } - - return { cmp: NaN, offset }; -} - -/** Compare cursor's string against target. Returns <0, 0, >0, or NaN if not a string node. - * Non-destructive: uses an internal scratch cursor, leaving c unchanged. */ -export function strCompare(c: Cursor, target: Uint8Array): number { - _cmp.data = c.data; _cmp.left = c.left; _cmp.right = c.right; _cmp.tag = c.tag; _cmp.val = c.val; - const { cmp, offset } = nodeCompare(_cmp, target, 0); - if (cmp !== 0) return cmp; - return offset < target.length ? -1 : 0; -} - -/** Zero-alloc equality check: does cursor's string match target? - * Non-destructive: uses an internal scratch cursor, leaving c unchanged. */ -export function strEquals(c: Cursor, target: Uint8Array): boolean { - return strCompare(c, target) === 0; -} - -/** Zero-alloc prefix check: does cursor's string start with prefix? - * Non-destructive: uses an internal scratch cursor, leaving c unchanged. */ -export function strHasPrefix(c: Cursor, prefix: Uint8Array): boolean { - if (prefix.length === 0) return true; - _cmp.data = c.data; _cmp.left = c.left; _cmp.right = c.right; _cmp.tag = c.tag; _cmp.val = c.val; - const { offset } = nodeCompare(_cmp, prefix, 0); - return offset === prefix.length; -} - -// ── Container access ── - -/** Jump to the Nth child of an indexed container. O(1). Reads the child into c. */ -export function seekChild(c: Cursor, container: Cursor, index: number): void { - if (container.ixWidth === 0) { - throw new Error("seekChild requires an indexed container"); - } - if (index < 0 || index >= container.ixCount) { - throw new RangeError(`seekChild: index ${index} out of range [0, ${container.ixCount})`); - } - const { data } = container; - // Layout: [content] [ix entry 0..N-1] [# packed] [tag b64size] - // container.val = content boundary = start of index table - // Each entry is a b64 delta relative to container.val - // child_right = container.val - delta - const { val: ixBase, ixWidth } = container; - const entryLeft = ixBase + index * ixWidth; - const delta = b64Read(data, entryLeft, entryLeft + ixWidth); - c.data = data; - c.right = ixBase - delta; - read(c); -} - -/** Collect child right-boundaries into caller-owned array (logical order). Returns count. */ -export function collectChildren(container: Cursor, offsets: number[]): number { - // Uses _cc instead of _k because read() internally uses _k for object - // schema/index detection — calling read(_k) on an object node would self-conflict. - _cc.data = container.data; - let right = container.val; - const end = container.left; - let count = 0; - while (right > end) { - if (count >= offsets.length) offsets.push(right); - else offsets[count] = right; - count++; - _cc.right = right; - read(_cc); - right = _cc.left; - } - return count; -} - -// Compare a key node (in _k) against target. Zero-alloc for str, ptr, and chain. -function keyEquals(target: Uint8Array): boolean { - return strEquals(_k, target); -} -/** Find a key in an object. Fills c with the value node if found. */ -export function findKey(c: Cursor, container: Cursor, target: string | Uint8Array): boolean { - if (container.tag !== "object") return false; - if (typeof target === "string") target = prepareKey(target); - - const { data } = container; - _k.data = data; - - // Sorted + indexed: O(log n) binary search - if (container.ixWidth > 0 && container.ixCount > 0 && container.schema === 0) { - let lo = 0, hi = container.ixCount; - while (lo < hi) { - const mid = (lo + hi) >>> 1; - seekChild(c, container, mid); - const cmp = strCompare(c, target); - if (cmp < 0) lo = mid + 1; - else hi = mid; - } - if (lo < container.ixCount) { - seekChild(c, container, lo); - if (strEquals(c, target)) { - c.data = data; - c.right = c.left; - read(c); - return true; - } + case T_QUOTE: { + const name = td.decode(data.subarray(vStart, right)); + return { value: decodeRefName(name), leftEdge: tagPos }; } - return false; - } - - let right = container.val; - const end = container.left; - - if (container.schema !== 0) { - // Schema object: content has only values, keys come from schema - _s.data = data; - _s.right = container.schema; - read(_s); - if (_s.tag === "ptr") { - _s.right = _s.val; - read(_s); + case T_CARET: { + const delta = readB64(data, vStart, right); + const target = tagPos - delta; + // Resolve transparently: pointer span is [tagPos, right), but value is + // whatever sits at target. + return { value: decodeNode(data, target).value, leftEdge: tagPos }; } - let keyRight = _s.val; - const keyEnd = _s.left; - let valRight = container.val; - - if (_s.tag === "object") { - // Schema is an object — keys are its keys. - // Read key into _k, check match, then skip schema value using _s. - while (keyRight > keyEnd && valRight > end) { - _k.right = keyRight; - read(_k); - const matched = keyEquals(target); - // Skip schema value using _s - _s.data = data; - _s.right = _k.left; - read(_s); - keyRight = _s.left; - - if (matched) { - c.data = data; - c.right = valRight; - read(c); - return true; - } - - c.data = data; - c.right = valRight; - read(c); - valRight = c.left; - } + case T_AT: { + const len = readB64(data, vStart, right); + const bodyStart = tagPos - len; + return { value: decodeBytesBody(data, bodyStart, tagPos), leftEdge: bodyStart }; } - if (_s.tag === "array") { - while (keyRight > keyEnd && valRight > end) { - _k.right = keyRight; - read(_k); - keyRight = _k.left; + case T_RBRACK: + return decodeArray(data, tagPos); - if (keyEquals(target)) { - c.data = data; - c.right = valRight; - read(c); - return true; - } + case T_RBRACE: + return decodeObject(data, tagPos); - c.data = data; - c.right = valRight; - read(c); - valRight = c.left; - } - } - - return false; - } + case T_RANGLE: + return decodeChain(data, tagPos); - // No schema: interleaved key/value pairs - while (right > end) { - _k.right = right; - read(_k); - if (keyEquals(target)) { - c.data = data; - c.right = _k.left; - read(c); - return true; - } - // Skip value - c.data = data; - c.right = _k.left; - read(c); - right = c.left; + default: + throw new SyntaxError(`Unknown tag ${String.fromCharCode(tag)} (0x${tag.toString(16)}) at ${tagPos}`); } - return false; } -/** - * Find all keys matching a prefix in an object. - * On indexed objects: O(log n) binary search + O(m) iteration over matches. - * On non-indexed objects: O(n) linear scan. - * Calls visitor(keyCursor, valueCursor) for each match — use resolveStr(key) - * only if you need the string. Stops if visitor returns false. - */ -export function findByPrefix( - c: Cursor, - container: Cursor, - prefix: string | Uint8Array, - visitor: (key: Cursor, value: Cursor) => boolean | void, -): void { - if (container.tag !== "object") return; - if (typeof prefix === "string") prefix = prepareKey(prefix); - - const { data } = container; - - // TODO: schema-based objects - if (container.schema !== 0) return; +// ── Array decoding ───────────────────────────────────────────── - if (container.ixWidth > 0 && container.ixCount > 0) { - // Binary search: index entries are sorted and point to keys - let lo = 0, hi = container.ixCount; - while (lo < hi) { - const mid = (lo + hi) >>> 1; - seekChild(c, container, mid); - const cmp = strCompare(c, prefix); - if (cmp < 0) lo = mid + 1; - else hi = mid; - } - // lo is the first key >= prefix. Iterate while prefix matches. - for (let i = lo; i < container.ixCount; i++) { - seekChild(c, container, i); - const keyRight = c.right; - const keyLeft = c.left; - if (!strHasPrefix(c, prefix)) break; - // Re-read key into _cc (safe from read() internal _k usage) - _cc.data = data; _cc.right = keyRight; read(_cc); - // Read value (immediately after key) - c.data = data; c.right = keyLeft; read(c); - if (visitor(_cc, c) === false) return; - } - return; - } +function decodeArray(data: Uint8Array, closerPos: number): DecodeResult { + // closerPos points at `]`. Body is to the left, may end with an index. + const idx = tryReadIndex(data, closerPos); + if (idx) return decodeIndexedArray(data, idx); - // Non-indexed: linear scan - _k.data = data; - let right = container.val; - const end = container.left; - while (right > end) { - _k.right = right; - read(_k); - const keyLeft = _k.left; - const keyRight = right; - if (strHasPrefix(_k, prefix)) { - // Re-read key into _cc (safe from read() internal _k usage) - _cc.data = data; _cc.right = keyRight; read(_cc); - c.data = data; c.right = keyLeft; read(c); - if (visitor(_cc, c) === false) return; - } else { - c.data = data; c.right = keyLeft; read(c); + // Non-indexed: eagerly decode all children R-to-L (which yields natural order). + const children: unknown[] = []; + let cursor = closerPos; + while (true) { + if (cursor <= 0) throw new SyntaxError("Array opener not found"); + const peek = findTag(data, cursor); + if (peek.tag === T_LBRACK) { + return { value: children, leftEdge: peek.tagPos }; } - right = c.left; + const res = decodeNode(data, cursor); + children.push(res.value); + cursor = res.leftEdge; } } -// ── Raw bytes ── - -/** Zero-copy view of the raw rexc bytes for the node at cursor position. */ -export function rawBytes(c: Cursor): Uint8Array { - return c.data.subarray(c.left, c.right); +function decodeIndexedArray(data: Uint8Array, idx: IndexInfo): DecodeResult { + // To find the container's left edge: the leftmost-in-byte-order child has the + // largest delta = entries[count-1] in natural order = the entry stored at + // L-to-R position 0 in the entries region. + let leftEdge: number; + if (idx.count === 0) { + leftEdge = idx.leftEdge - 1; + } else { + const lastEntry = readB64(data, idx.entriesStart, idx.entriesStart + idx.width); + const leftmostChildRight = idx.base - lastEntry; + const leftmost = decodeNode(data, leftmostChildRight); + leftEdge = leftmost.leftEdge - 1; + } + if (data[leftEdge] !== T_LBRACK) { + throw new SyntaxError(`Array opener missing at ${leftEdge}`); + } + return { value: makeLazyArray(data, idx), leftEdge }; } -export type Refs = Record; - -// ── High-level Proxy API ── - -const HANDLE = Symbol("rexc.handle"); - -type NodeInfo = { - data: Uint8Array; - right: number; - tag: Tag; - val: number; - left: number; - ixWidth: number; - ixCount: number; - schema: number; - _count?: number; - _offsets?: number[]; - _keys?: string[]; - _keyMap?: Map; // key → value right-offset, built by ensureKeyMap -}; - -type OpenContext = { - root: unknown; - resolve(right: number): unknown; -}; - -function _openContext(buffer: Uint8Array, refs?: Refs): OpenContext { - const nodeMap = new WeakMap(); - const proxyCache = new Map(); // right-offset → memoized value - const scratch = makeCursor(buffer); - - function snap(c: Cursor): NodeInfo { - return { - data: c.data, right: c.right, tag: c.tag, val: c.val, - left: c.left, ixWidth: c.ixWidth, ixCount: c.ixCount, schema: c.schema, - }; - } - - /** Resolve a ref name to its opaque value, or undefined if not found. */ - function resolveRef(c: Cursor): unknown { - if (!refs) return undefined; - const name = readStr(c); - return name in refs ? refs[name] : undefined; - } - - /** Resolve a cursor to a string, following ptrs, chains, and refs (for key positions). - * Non-destructive: restores cursor state before returning. */ - function resolveKeyStr(c: Cursor): string { - const savedLeft = c.left, savedRight = c.right, savedTag = c.tag, savedVal = c.val; - while (c.tag === "ptr") { c.right = c.val; read(c); } - let result: string; - if (c.tag === "ref" && refs) { - const val = resolveRef(c); - result = typeof val === "string" ? val : resolveStr(c); - } else { - result = resolveStr(c); - } - c.left = savedLeft; c.right = savedRight; c.tag = savedTag; c.val = savedVal; - return result; - } - - function wrap(c: Cursor): unknown { - while (c.tag === "ptr") { c.right = c.val; read(c); } - if (c.tag === "ref") return resolveRef(c); - // Check cache for containers (primitives are cheap to recreate) - const cached = proxyCache.get(c.right); - if (cached !== undefined) return cached; - switch (c.tag) { - case "int": case "float": return c.val; - case "str": return readStr(c); - case "chain": return resolveStr(c); - case "true": return true; - case "false": return false; - case "null": return null; - case "undef": return undefined; - } - const info = snap(c); - const target: object = c.tag === "array" ? [] : Object.create(null); - nodeMap.set(target, info); - const proxy = new Proxy(target, handler); - proxyCache.set(c.right, proxy); - return proxy; - } - - function childCount(info: NodeInfo): number { - if (info._count !== undefined) return info._count; - if (info.ixCount > 0) return info._count = info.ixCount; - if (info.tag === "array") { - ensureOffsets(info); - return info._count!; - } - // Object without index — scan children - let right = info.val, n = 0; - while (right > info.left) { - scratch.data = info.data; scratch.right = right; - read(scratch); right = scratch.left; n++; - } - return info._count = info.schema !== 0 ? n : n / 2; - } - - function ensureOffsets(info: NodeInfo): number[] { - if (!info._offsets) { - info._offsets = []; - info._count = collectChildren(info as unknown as Cursor, info._offsets); - } - return info._offsets; - } - - function getChild(info: NodeInfo, index: number): unknown { - if (index < 0 || index >= childCount(info)) return undefined; - if (info.ixWidth > 0) { - seekChild(scratch, info as unknown as Cursor, index); - return wrap(scratch); - } - const offsets = ensureOffsets(info); - scratch.data = info.data; - scratch.right = offsets[index]!; - read(scratch); - return wrap(scratch); - } - - function getValue(info: NodeInfo, key: string): unknown { - // Schema objects need ensureKeyMap (findKey can't resolve ref/ptr schemas). - // Non-schema objects use findKey directly (O(log n) with indexes). - if (!info._keyMap && info.schema !== 0) ensureKeyMap(info); - if (info._keyMap) { - const valRight = info._keyMap.get(key); - if (valRight === undefined) return undefined; - scratch.data = info.data; - scratch.right = valRight; - read(scratch); - return wrap(scratch); - } - scratch.data = info.data; - if (findKey(scratch, info as unknown as Cursor, key)) return wrap(scratch); - return undefined; - } - - function ensureKeyMap(info: NodeInfo): { keys: string[]; map: Map } { - if (info._keyMap) { - return { keys: info._keys!, map: info._keyMap }; - } - const keys: string[] = []; - const map = new Map(); - const kc = makeCursor(info.data); - if (info.schema !== 0) { - const sc = makeCursor(info.data); - sc.right = info.schema; read(sc); - while (sc.tag === "ptr") { sc.right = sc.val; read(sc); } - // Resolve ref schemas to opaque values — extract keys from arrays/objects - if (sc.tag === "ref" && refs) { - const refVal = resolveRef(sc); - let valRight = info.val; - const keyStrings: string[] = Array.isArray(refVal) - ? refVal as string[] - : (refVal && typeof refVal === "object" ? Object.keys(refVal) : []); - for (const name of keyStrings) { - keys.push(name); - map.set(name, valRight); - scratch.data = info.data; scratch.right = valRight; read(scratch); - valRight = scratch.left; - } - } else { - // Inline schema — read keys from the schema's buffer - kc.data = sc.data; - let valRight = info.val; - if (sc.tag === "object") { - let keyRight = sc.val; - const keyEnd = sc.left; - while (keyRight > keyEnd) { - kc.right = keyRight; read(kc); - const nextRight = kc.left; - const name = resolveKeyStr(kc); - keys.push(name); - map.set(name, valRight); - scratch.data = info.data; scratch.right = valRight; read(scratch); - valRight = scratch.left; - sc.right = nextRight; read(sc); - keyRight = sc.left; - } - } else if (sc.tag === "array") { - let keyRight = sc.val; - const keyEnd = sc.left; - while (keyRight > keyEnd) { - kc.right = keyRight; read(kc); - const name = resolveKeyStr(kc); - keys.push(name); - map.set(name, valRight); - scratch.data = info.data; scratch.right = valRight; read(scratch); - valRight = scratch.left; - keyRight = kc.left; - } - } - } - } else { - let right = info.val; - while (right > info.left) { - kc.data = info.data; kc.right = right; read(kc); - const keyLeft = kc.left; - const name = resolveKeyStr(kc); - keys.push(name); - map.set(name, keyLeft); - // skip value - kc.data = info.data; kc.right = keyLeft; read(kc); - right = kc.left; - } - } - info._keys = keys; - info._keyMap = map; - return { keys, map }; - } - - const handler: ProxyHandler = { - get(target, prop) { - const info = nodeMap.get(target)!; - if (prop === HANDLE) return { data: info.data, right: info.right }; - +function makeLazyArray(data: Uint8Array, idx: IndexInfo): unknown[] { + const cache: Map = new Map(); + const target: any[] = []; + return new Proxy(target, { + get(t, prop, recv) { + if (prop === "length") return idx.count; if (prop === Symbol.iterator) { - if (info.tag === "array") { - return function* () { - const n = childCount(info); - for (let i = 0; i < n; i++) yield getChild(info, i); - }; - } - if (info.tag === "object") { - return function* () { - const ks = ensureKeyMap(info).keys; - for (const k of ks) yield [k, getValue(info, k)] as [string, unknown]; - }; - } - return undefined; + return function* () { + for (let i = 0; i < idx.count; i++) yield (recv as any)[i]; + }; } - - if (typeof prop === "symbol") return undefined; - if (prop === "length") return childCount(info); - - if (info.tag === "array") { - const idx = Number(prop); - if (Number.isInteger(idx) && idx >= 0) return getChild(info, idx); - // Delegate Array.prototype methods to a materialized snapshot - const method = (Array.prototype as any)[prop]; - if (typeof method === "function") { - return function (...args: unknown[]) { - const n = childCount(info); - const arr: unknown[] = new Array(n); - for (let i = 0; i < n; i++) arr[i] = getChild(info, i); - return method.apply(arr, args); - }; + if (typeof prop === "string") { + const i = +prop; + if (Number.isInteger(i) && i >= 0 && i < idx.count) { + if (cache.has(i)) return cache.get(i); + const childRight = indexEntryTarget(data, idx, i); + const v = decodeNode(data, childRight).value; + cache.set(i, v); + return v; } - return undefined; } - - if (info.tag === "object") return getValue(info, prop); - return undefined; + return Reflect.get(t, prop, recv); }, - - has(target, prop) { - const info = nodeMap.get(target)!; - if (prop === HANDLE) return true; - if (typeof prop === "symbol") return false; - if (prop === "length") return true; - if (info.tag === "array") { - const idx = Number(prop); - return Number.isInteger(idx) && idx >= 0 && idx < childCount(info); - } - if (info.tag === "object") { - if (!info._keyMap && info.schema !== 0) ensureKeyMap(info); - if (info._keyMap) return info._keyMap.has(prop); - scratch.data = info.data; - return findKey(scratch, info as unknown as Cursor, prop); + has(_, prop) { + if (typeof prop === "string") { + const i = +prop; + return Number.isInteger(i) && i >= 0 && i < idx.count; } return false; }, - - ownKeys(target) { - const info = nodeMap.get(target)!; - if (info.tag === "array") { - const n = childCount(info); - const ks: string[] = []; - for (let i = 0; i < n; i++) ks.push(String(i)); - ks.push("length"); - return ks; - } - return ensureKeyMap(info).keys; + ownKeys() { + const keys: string[] = []; + for (let i = 0; i < idx.count; i++) keys.push(String(i)); + keys.push("length"); + return keys; }, - - getOwnPropertyDescriptor(target, prop) { - if (typeof prop === "symbol") return undefined; - const info = nodeMap.get(target)!; - if (info.tag === "array") { - if (prop === "length") { - return { configurable: false, enumerable: false, value: childCount(info), writable: true }; - } - const idx = Number(prop); - if (typeof prop === "string" && Number.isInteger(idx) && idx >= 0 && idx < childCount(info)) { - return { configurable: true, enumerable: true, value: getChild(info, idx) }; - } - return undefined; + getOwnPropertyDescriptor(_, prop) { + if (prop === "length") { + return { value: idx.count, writable: false, enumerable: false, configurable: true }; } - if (info.tag === "object" && typeof prop === "string") { - if (!info._keyMap && info.schema !== 0) ensureKeyMap(info); - if (info._keyMap) { - if (info._keyMap.has(prop)) { - return { configurable: true, enumerable: true, value: getValue(info, prop) }; - } - } else { - scratch.data = info.data; - if (findKey(scratch, info as unknown as Cursor, prop)) { - return { configurable: true, enumerable: true, value: wrap(scratch) }; - } + if (typeof prop === "string") { + const i = +prop; + if (Number.isInteger(i) && i >= 0 && i < idx.count) { + return { value: undefined, writable: false, enumerable: true, configurable: true }; } } return undefined; }, - - set() { throw new TypeError("rexc data is read-only"); }, - deleteProperty() { throw new TypeError("rexc data is read-only"); }, - }; - - function resolve(right: number): unknown { - scratch.data = buffer; - scratch.right = right; - read(scratch); - return wrap(scratch); - } - - // Read and wrap root - const root = resolve(buffer.length); - return { root, resolve }; -} - -/** Open a rexc buffer and return a Proxy-wrapped root value. */ -export function open(buffer: Uint8Array, refs?: Refs): unknown { - return _openContext(buffer, refs).root; -} - -/** Get the raw handle from a Proxy-wrapped value (escape hatch). */ -export function handle(proxy: unknown): { data: Uint8Array; right: number } | undefined { - if (proxy && typeof proxy === "object" && HANDLE in proxy) { - return (proxy as any)[HANDLE]; - } - return undefined; + }); } -// ── Inspect API ── - -/** AST node mapping 1:1 to a REXC tag+b64 pair in the byte stream. - * Acts like an array of its children: node[0], node.length, for...of, JSON.stringify all work. - * Named properties provide the encoding metadata. - */ -export interface ASTNode { - /** Backing buffer (non-enumerable, shared). */ - readonly data: Uint8Array; - /** Byte offset of the tag byte. */ - readonly left: number; - /** Byte offset after the node (after tag + b64 suffix). */ - readonly right: number; - /** Byte length of content preceding the tag. Children live in [left - size, left). */ - readonly size: number; - /** Single-character tag: '+' '*' ',' "'" ':' ';' '^' '.' '#' */ - readonly tag: string; - /** b64 payload — type depends on tag. */ - readonly b64: number | string | { count: number; width: number }; - /** Resolved JS value via open() — lazy. Primitives or open() Proxy. */ - readonly value: unknown; +// ── Object decoding ──────────────────────────────────────────── - // Array-like: numeric index → child ASTNode, .length → child count - readonly length: number; - readonly [index: number]: ASTNode; - [Symbol.iterator](): Iterator; - - // Semantic utilities (meaningful on containers) - /** Semantic entry count: number of key-value pairs (objects) or items (arrays). O(1) for indexed containers. */ - readonly entryCount: number; - keys(): Iterable; - values(): Iterable; - entries(): Iterable<[ASTNode, ASTNode]>; - filteredKeys(prefix: string): Iterable<[ASTNode, ASTNode]>; - index(key: number | string): ASTNode | undefined; - /** Follow pointers (^) to the target node. Returns self if not a pointer. */ - readonly resolve: ASTNode; -} +function decodeObject(data: Uint8Array, closerPos: number): DecodeResult { + // closerPos points at `}`. Body may have a schema OR an index at the right end. + let cursor = closerPos; + const peek1 = findTag(data, cursor); -const TAG_CHARS: Record = { - 0x2b: "+", 0x2a: "*", 0x2c: ",", 0x27: "'", - 0x3a: ":", 0x3b: ";", 0x5e: "^", 0x2e: ".", 0x23: "#", -}; + // Schema check: rightmost child can be inline `.` or `^` pointing to one. + if (peek1.tag === T_DOT || peek1.tag === T_CARET) { + let keys: string[] | null = null; -// Internal state for each ASTNode's incremental child parsing -type NodeState = { - data: Uint8Array; - left: number; - right: number; - size: number; - tag: string; - b64: number | string | { count: number; width: number }; - // Incremental child cache - cache: ASTNode[]; - nextPos: number; // next byte offset to parse from (scanning right-to-left) - end: number; // left boundary of content region - done: boolean; // true when all children have been parsed -}; - -/** Inspect a rexc buffer, returning a lazy AST that maps 1:1 to the encoding. */ -export function inspect(buffer: Uint8Array, refs?: Refs): ASTNode { - const ctx = _openContext(buffer, refs); - const stateMap = new WeakMap(); - - /** Follow pointers (^) to the target node. Returns self if not a pointer. */ - function resolveNode(node: ASTNode): ASTNode { - let current = node; - let depth = 0; - while (current.tag === "^" && depth++ < 100) { - const target = current.left - (current.b64 as number); - current = makeNode(target); + if (peek1.tag === T_DOT) { + const len = readB64(data, peek1.tagPos + 1, cursor); + const bodyStart = peek1.tagPos - len; + const body = td.decode(data.subarray(bodyStart, peek1.tagPos)); + keys = body === "" ? [] : body.split(",").reverse(); + cursor = bodyStart; + } else { + // Pointer — peek to see if it resolves to a schema node + const delta = readB64(data, peek1.tagPos + 1, cursor); + const target = peek1.tagPos - delta; + const targetTag = findTag(data, target); + if (targetTag.tag === T_DOT) { + const len = readB64(data, targetTag.tagPos + 1, target); + const bodyStart = targetTag.tagPos - len; + const body = td.decode(data.subarray(bodyStart, targetTag.tagPos)); + keys = body === "" ? [] : body.split(",").reverse(); + cursor = peek1.tagPos; + } } - return current; - } - - function parseTag(right: number): { left: number; tagByte: number; tagChar: string; b64val: number | string | { count: number; width: number }; size: number } { - const c = makeCursor(buffer); - c.right = right; - const tagByte = peekTag(c); - const left = c.left; - const tagChar = TAG_CHARS[tagByte] ?? String.fromCharCode(tagByte); - - let b64val: number | string | { count: number; width: number }; - let size: number; - switch (tagByte) { - case 0x2b: // + integer (signed) - b64val = fromZigZag(b64Read(buffer, left + 1, right)); - size = 0; - break; - case 0x2a: { // * decimal (signed exponent) - b64val = fromZigZag(b64Read(buffer, left + 1, right)); - const inner = makeCursor(buffer); - inner.right = left; - read(inner); - size = left - inner.left; - break; + if (keys !== null) { + const obj: Record = {}; + for (let i = 0; i < keys.length; i++) { + if (cursor <= 0) throw new SyntaxError("Object opener not found in schema object"); + const valRes = decodeNode(data, cursor); + obj[keys[i]!] = valRes.value; + cursor = valRes.leftEdge; } - case 0x2c: // , string - b64val = b64Read(buffer, left + 1, right); - size = b64val as number; - break; - case 0x27: // ' ref - b64val = textDecoder.decode(buffer.subarray(left + 1, right)); - size = 0; - break; - case 0x3a: // : object - b64val = b64Read(buffer, left + 1, right); - size = b64val as number; - break; - case 0x3b: // ; array - b64val = b64Read(buffer, left + 1, right); - size = b64val as number; - break; - case 0x5e: // ^ pointer - b64val = b64Read(buffer, left + 1, right); - size = 0; - break; - case 0x2e: // . chain - b64val = b64Read(buffer, left + 1, right); - size = b64val as number; - break; - case 0x23: { // # index - const packed = b64Read(buffer, left + 1, right); - const width = (packed & 0b111) + 1; - const count = packed >> 3; - b64val = { count, width }; - size = width * count; - break; + const opener = findTag(data, cursor); + if (opener.tag !== T_LBRACE) { + throw new SyntaxError(`Object opener not at expected position (got ${String.fromCharCode(opener.tag)})`); } - default: - throw new SyntaxError(`inspect: unknown tag 0x${tagByte.toString(16)}`); + return { value: obj, leftEdge: opener.tagPos }; } - - return { left, tagByte, tagChar, b64val, size }; } - /** Parse children up to (and including) the target index. Returns the child or undefined. */ - function ensureChild(state: NodeState, idx: number): ASTNode | undefined { - if (idx < state.cache.length) return state.cache[idx]; - if (state.done) return undefined; + // No schema — check for index + const idx = tryReadIndex(data, cursor); + if (idx) return decodeIndexedObject(data, idx); - while (state.cache.length <= idx) { - if (state.nextPos <= state.end) { - state.done = true; - return undefined; - } - const child = makeNode(state.nextPos); - state.cache.push(child); - const cs = stateMap.get(child as unknown as object)!; - state.nextPos = cs.left - cs.size; + // Plain object: eager (key, value pairs R-to-L) + const obj: Record = {}; + while (true) { + if (cursor <= 0) throw new SyntaxError("Object opener not found"); + const peek = findTag(data, cursor); + if (peek.tag === T_LBRACE) { + return { value: obj, leftEdge: peek.tagPos }; } - return state.cache[idx]; - } - - /** Parse all remaining children. */ - function ensureAll(state: NodeState): void { - if (state.done) return; - while (state.nextPos > state.end) { - const child = makeNode(state.nextPos); - state.cache.push(child); - const cs = stateMap.get(child as unknown as object)!; - state.nextPos = cs.left - cs.size; + const keyRes = decodeNode(data, cursor); + const key = keyRes.value; + if (typeof key !== "string") { + throw new SyntaxError(`Object key must be a string, got ${typeof key}`); } - state.done = true; - } - - function makeNode(right: number): ASTNode { - const { left, tagChar, b64val, size } = parseTag(right); - - const state: NodeState = { - data: buffer, - left, - right, - size, - tag: tagChar, - b64: b64val, - cache: [], - nextPos: left, // start scanning right-to-left from just before the tag - end: left - size, - // Only container-like tags have parseable children - done: !(tagChar === ":" || tagChar === ";" || tagChar === "." || tagChar === "*"), - }; - - let _value: unknown; - let _hasValue = false; - - const target = Object.create(null); - const proxy = new Proxy(target, { - get(_, prop) { - // Numeric index - if (typeof prop === "string") { - const idx = Number(prop); - if (Number.isInteger(idx) && idx >= 0) { - return ensureChild(state, idx); - } - } - - switch (prop) { - case "data": return buffer; - case "left": return state.left; - case "right": return state.right; - case "size": return state.size; - case "tag": return state.tag; - case "b64": return state.b64; - case "value": - if (!_hasValue) { _value = ctx.resolve(right); _hasValue = true; } - return _value; - case "length": - ensureAll(state); - return state.cache.length; - case "entryCount": { - // Semantic entry count: O(1) for indexed containers, fallback for small ones - if (state.tag === ":" || state.tag === ";") { - for (let ci = 0; ci < 2; ci++) { - const child = ensureChild(state, ci); - if (!child) break; - const cs = stateMap.get(child as unknown as object)!; - if (cs.tag === "#") return (cs.b64 as { count: number; width: number }).count; - } - // Small unindexed: count via entries for objects, ensureAll for arrays - if (state.tag === ":") { - let n = 0; - for (const _ of entriesOf(proxy as ASTNode)) n++; - return n; - } - ensureAll(state); - return state.cache.length; - } - return 0; - } - case "keys": return () => keysOf(proxy as ASTNode); - case "values": return () => valuesOf(proxy as ASTNode); - case "entries": return () => entriesOf(proxy as ASTNode); - case "filteredKeys": return (prefix: string) => filteredKeysOf(proxy as ASTNode, prefix); - case "index": return (key: number | string) => indexOf(proxy as ASTNode, key); - case "resolve": return resolveNode(proxy as ASTNode); - case Symbol.iterator: - return function* () { - let i = 0; - while (true) { - const child = ensureChild(state, i); - if (child === undefined) return; - yield child; - i++; - } - }; - case "toJSON": - return () => { - const obj: any = { tag: state.tag, b64: state.b64, left: state.left, right: state.right, size: state.size }; - // Only tags with parseable children: containers, chain, decimal - if (state.size > 0 && (state.tag === ":" || state.tag === ";" || state.tag === "." || state.tag === "*")) { - const children: unknown[] = []; - for (const child of proxy as any) children.push(child); - obj.children = children; - } - return obj; - }; - } - - // Array methods — materialize and delegate - if (typeof prop === "string" && typeof (Array.prototype as any)[prop] === "function") { - return function (...args: unknown[]) { - ensureAll(state); - return (Array.prototype as any)[prop].apply(state.cache, args); - }; - } - - return undefined; - }, - - has(_, prop) { - if (typeof prop === "string") { - const idx = Number(prop); - if (Number.isInteger(idx) && idx >= 0) { - return ensureChild(state, idx) !== undefined; - } - } - if (prop === "length" || prop === "tag" || prop === "b64" || prop === "left" || - prop === "right" || prop === "size" || prop === "value" || prop === "data" || - prop === "keys" || prop === "values" || prop === "entries" || - prop === "filteredKeys" || prop === "index" || prop === "resolve" || prop === Symbol.iterator) { - return true; - } - return false; - }, - - ownKeys() { - ensureAll(state); - const ks: string[] = []; - for (let i = 0; i < state.cache.length; i++) ks.push(String(i)); - ks.push("length", "tag", "b64", "left", "right", "size"); - return ks; - }, - - getOwnPropertyDescriptor(_, prop) { - if (prop === "length") { - ensureAll(state); - return { configurable: true, enumerable: false, value: state.cache.length, writable: false }; - } - if (typeof prop === "string") { - const idx = Number(prop); - if (Number.isInteger(idx) && idx >= 0) { - const child = ensureChild(state, idx); - if (child !== undefined) { - return { configurable: true, enumerable: true, value: child, writable: false }; - } - } - } - // Named metadata props - if (prop === "tag") return { configurable: true, enumerable: true, value: state.tag, writable: false }; - if (prop === "b64") return { configurable: true, enumerable: true, value: state.b64, writable: false }; - if (prop === "left") return { configurable: true, enumerable: true, value: state.left, writable: false }; - if (prop === "right") return { configurable: true, enumerable: true, value: state.right, writable: false }; - if (prop === "size") return { configurable: true, enumerable: true, value: state.size, writable: false }; - return undefined; - }, - - set() { throw new TypeError("inspect nodes are read-only"); }, - deleteProperty() { throw new TypeError("inspect nodes are read-only"); }, - }); - - stateMap.set(proxy, state); - return proxy as unknown as ASTNode; + cursor = keyRes.leftEdge; + const valRes = decodeNode(data, cursor); + obj[key] = valRes.value; + cursor = valRes.leftEdge; } +} - // -- Semantic utilities -- - - function* entriesOf(node: ASTNode): Iterable<[ASTNode, ASTNode]> { - if (node.tag !== ":") return; - const c = makeCursor(buffer); - c.data = buffer; c.right = node.right; read(c); - const hasSchema = c.schema !== 0; +function decodeIndexedObject(data: Uint8Array, idx: IndexInfo): DecodeResult { + // Object index entries are sorted by UTF-8 key (not by insertion order), so + // the leftmost-in-byte-order child corresponds to the entry with the LARGEST + // delta. Scan all entries to find it. + let leftEdge: number; + if (idx.count === 0) { + leftEdge = idx.leftEdge - 1; + } else { + let maxDelta = 0; + for (let i = 0; i < idx.count; i++) { + const off = idx.entriesStart + i * idx.width; + const d = readB64(data, off, off + idx.width); + if (d > maxDelta) maxDelta = d; + } + const leftmostKeyRight = idx.base - maxDelta; + const leftmostKey = decodeNode(data, leftmostKeyRight); + const leftmostValue = decodeNode(data, leftmostKey.leftEdge); + leftEdge = leftmostValue.leftEdge - 1; + } + if (data[leftEdge] !== T_LBRACE) { + throw new SyntaxError(`Object opener missing at ${leftEdge}`); + } + return { value: makeLazyObject(data, idx), leftEdge }; +} - if (hasSchema) { - const sc = makeCursor(buffer); - sc.right = c.schema; read(sc); - while (sc.tag === "ptr") { sc.right = sc.val; read(sc); } +function makeLazyObject(data: Uint8Array, idx: IndexInfo): Record { + // Index entries point to KEY right edges, sorted by UTF-8 key order. + const cache: Map = new Map(); + let keysList: string[] | null = null; + let keyToValueRight: Map | null = null; + + function resolve(): { keys: string[]; map: Map } { + if (keysList && keyToValueRight) return { keys: keysList, map: keyToValueRight }; + keysList = []; + keyToValueRight = new Map(); + for (let i = 0; i < idx.count; i++) { + const keyRight = indexEntryTarget(data, idx, i); + const keyRes = decodeNode(data, keyRight); + const k = keyRes.value as string; + keysList.push(k); + keyToValueRight.set(k, keyRes.leftEdge); + } + return { keys: keysList, map: keyToValueRight }; + } + + return new Proxy({} as Record, { + get(_, prop) { + if (typeof prop !== "string") return undefined; + if (cache.has(prop)) return cache.get(prop); + const { map } = resolve(); + const valueRight = map.get(prop); + if (valueRight === undefined) return undefined; + const v = decodeNode(data, valueRight).value; + cache.set(prop, v); + return v; + }, + has(_, prop) { + if (typeof prop !== "string") return false; + const { map } = resolve(); + return map.has(prop); + }, + ownKeys() { + const { keys } = resolve(); + return keys.slice(); + }, + getOwnPropertyDescriptor(_, prop) { + if (typeof prop !== "string") return undefined; + const { map } = resolve(); + if (!map.has(prop)) return undefined; + return { value: undefined, writable: false, enumerable: true, configurable: true }; + }, + }); +} - const contentEnd = node.left - node.size; - let valPos = c.val; - if (sc.tag === "array") { - let keyPos = sc.val; - const keyEnd = sc.left; - while (keyPos > keyEnd && valPos > contentEnd) { - const keyNode = makeNode(keyPos); - const valNode = makeNode(valPos); - yield [keyNode, valNode]; - const ks = stateMap.get(keyNode as unknown as object)!; - const vs = stateMap.get(valNode as unknown as object)!; - keyPos = ks.left - ks.size; - valPos = vs.left - vs.size; +// ── Chain decoding ───────────────────────────────────────────── + +function decodeChain(data: Uint8Array, closerPos: number): DecodeResult { + // Chains concatenate string (or bytes) segments. Walk R-to-L collecting + // segments into a list (which is in natural forward order via unshift). + let cursor = closerPos; + const parts: unknown[] = []; + while (true) { + if (cursor <= 0) throw new SyntaxError("Chain opener not found"); + const peek = findTag(data, cursor); + if (peek.tag === T_LANGLE) { + let anyBytes = false; + for (const p of parts) if (p instanceof Uint8Array) { anyBytes = true; break; } + if (anyBytes) { + let total = 0; + for (const p of parts) { + total += p instanceof Uint8Array ? p.length : te.encode(p as string).length; } - } else if (sc.tag === "object") { - const kc = makeCursor(buffer); - let keyPos = sc.val; - const keyEnd = sc.left; - while (keyPos > keyEnd && valPos > contentEnd) { - const keyNode = makeNode(keyPos); - const valNode = makeNode(valPos); - yield [keyNode, valNode]; - const ks = stateMap.get(keyNode as unknown as object)!; - const vs = stateMap.get(valNode as unknown as object)!; - kc.data = buffer; kc.right = ks.left - ks.size; read(kc); - keyPos = kc.left; - valPos = vs.left - vs.size; + const out = new Uint8Array(total); + let oi = 0; + for (const p of parts) { + const b = p instanceof Uint8Array ? p : te.encode(p as string); + out.set(b, oi); + oi += b.length; } + return { value: out, leftEdge: peek.tagPos }; } - return; - } - - const contentEnd = node.left - node.size; - let pos = c.val; - while (pos > contentEnd) { - const keyNode = makeNode(pos); - const ks = stateMap.get(keyNode as unknown as object)!; - const keyLeft = ks.left - ks.size; - if (keyLeft <= contentEnd) break; - const valNode = makeNode(keyLeft); - const vs = stateMap.get(valNode as unknown as object)!; - yield [keyNode, valNode]; - pos = vs.left - vs.size; - } - } - - function* keysOf(node: ASTNode): Iterable { - for (const [k] of entriesOf(node)) yield k; - } - - function* valuesOf(node: ASTNode): Iterable { - if (node.tag === ";") { - const c = makeCursor(buffer); - c.data = buffer; c.right = node.right; read(c); - const contentEnd = node.left - node.size; - let pos = c.val; - while (pos > contentEnd) { - const child = makeNode(pos); - yield child; - const cs = stateMap.get(child as unknown as object)!; - pos = cs.left - cs.size; - } - return; - } - if (node.tag === ":") { - for (const [, v] of entriesOf(node)) yield v; - } - } - - function* filteredKeysOf(node: ASTNode, prefix: string): Iterable<[ASTNode, ASTNode]> { - if (node.tag !== ":") return; - const prefixBytes = prepareKey(prefix); - - const c = makeCursor(buffer); - c.data = buffer; c.right = node.right; read(c); - if (c.ixWidth > 0 && c.ixCount > 0 && c.schema === 0) { - const container = { ...c, data: buffer } as unknown as Cursor; - const sc2 = makeCursor(buffer); - - let lo = 0, hi = c.ixCount; - while (lo < hi) { - const mid = (lo + hi) >>> 1; - seekChild(sc2, container, mid); - const cmp = strCompare(sc2, prefixBytes); - if (cmp < 0) lo = mid + 1; - else hi = mid; - } - for (let i = lo; i < container.ixCount; i++) { - seekChild(sc2, container, i); - if (!strHasPrefix(sc2, prefixBytes)) break; - const keyNode = makeNode(sc2.right); - const valNode = makeNode(sc2.left); - yield [keyNode, valNode]; - } - return; - } - - for (const [k, v] of entriesOf(node)) { - const kc = makeCursor(buffer); - kc.data = buffer; kc.right = k.right; read(kc); - if (strHasPrefix(kc, prefixBytes)) { - yield [k, v]; - } - } - } - - function indexOf(node: ASTNode, key: number | string): ASTNode | undefined { - const c = makeCursor(buffer); - c.data = buffer; c.right = node.right; read(c); - - if (node.tag === ";" && typeof key === "number") { - if (c.ixWidth > 0 && c.ixCount > 0) { - if (key < 0 || key >= c.ixCount) return undefined; - const container = { ...c, data: buffer } as unknown as Cursor; - seekChild(c, container, key); - return makeNode(c.right); - } - const contentEnd = node.left - node.size; - let pos = c.val; - let i = 0; - while (pos > contentEnd) { - const child = makeNode(pos); - if (i === key) return child; - const cs = stateMap.get(child as unknown as object)!; - pos = cs.left - cs.size; - i++; - } - return undefined; - } - - if (node.tag === ":" && typeof key === "string") { - const container = { ...c, data: buffer } as unknown as Cursor; - const result = makeCursor(buffer); - if (findKey(result, container, key)) { - return makeNode(result.right); - } - return undefined; + return { value: (parts as string[]).join(""), leftEdge: peek.tagPos }; } - - return undefined; + const res = decodeNode(data, cursor); + parts.push(res.value); + cursor = res.leftEdge; } - - return makeNode(buffer.length); } -// ── High-level decode ── - -export interface DecodeOptions { - /** External dictionary of known values. Values are returned as-is when a ref is encountered. */ - refs?: Refs; -} +// ── Public API ───────────────────────────────────────────────── -/** Decode a rexc buffer into a plain JS value using the Proxy-based reader. */ -export function decode(input: Uint8Array, options?: DecodeOptions): unknown { - return open(input, options?.refs); +export function decode(data: Uint8Array): unknown { + if (data.length === 0) throw new SyntaxError("Empty input"); + return decodeNode(data, data.length).value; } -/** Parse a rexc string into a plain JS value. */ -export function parse(input: string, options?: DecodeOptions): unknown { - return decode(textEncoder.encode(input), options); +export function parse(text: string): unknown { + return decode(te.encode(text)); } diff --git a/rx.test.ts b/rx.test.ts index b9c9a23..d09c8ac 100644 --- a/rx.test.ts +++ b/rx.test.ts @@ -1,2758 +1,145 @@ +// Core round-trip tests for the new RX format. +// The cursor-based reader and rxb format have been removed; if you need to +// re-add those, restore the corresponding test suites. + import { describe, expect, test } from "vitest"; -import { - encode, - stringify, -} from "./rx"; -import { - parse, - makeCursor, - read, - readStr, - resolveStr, - strEquals, - strCompare, - findKey, - seekChild, - collectChildren, - rawBytes, - open, - handle, - prepareKey, - strHasPrefix, - findByPrefix, - inspect, - type ASTNode, -} from "./rx-read"; +import { stringify } from "./rx.ts"; +import { parse } from "./rx-read.ts"; -function cur(value: unknown, opts?: Parameters[1]) { - const data = encode(value, opts); - const c = makeCursor(data); - read(c); - return c; +function roundTrip(value: unknown) { + const encoded = stringify(value); + const decoded = parse(encoded); + return { encoded, decoded }; } -describe("read() primitives", () => { +describe("round-trip primitives", () => { test("integers", () => { - let c = cur(0); - expect(c.tag).toBe("int"); - expect(c.val).toBe(0); - - c = cur(42); - expect(c.tag).toBe("int"); - expect(c.val).toBe(42); - - c = cur(-42); - expect(c.tag).toBe("int"); - expect(c.val).toBe(-42); + for (const v of [0, 1, -1, 42, -256, 1000000, -1000000]) { + const { decoded } = roundTrip(v); + expect(decoded).toBe(v); + } }); test("floats", () => { - let c = cur(3.14); - expect(c.tag).toBe("float"); - expect(c.val).toBe(3.14); - - c = cur(0.5); - expect(c.tag).toBe("float"); - expect(c.val).toBe(0.5); + for (const v of [3.14, -0.5, 1e6, 1.5]) { + const { decoded } = roundTrip(v); + expect(decoded).toBeCloseTo(v as number); + } }); test("special floats", () => { - let c = cur(Infinity); - expect(c.tag).toBe("float"); - expect(c.val).toBe(Infinity); - - c = cur(-Infinity); - expect(c.tag).toBe("float"); - expect(c.val).toBe(-Infinity); - - c = cur(NaN); - expect(c.tag).toBe("float"); - expect(c.val).toBeNaN(); + expect(roundTrip(Infinity).decoded).toBe(Infinity); + expect(roundTrip(-Infinity).decoded).toBe(-Infinity); + expect(Number.isNaN(roundTrip(NaN).decoded as number)).toBe(true); }); test("strings", () => { - let c = cur(""); - expect(c.tag).toBe("str"); - expect(c.val).toBe(0); - expect(readStr(c)).toBe(""); - - c = cur("hello"); - expect(c.tag).toBe("str"); - expect(c.val).toBe(5); - expect(readStr(c)).toBe("hello"); - - c = cur("hello world"); - expect(c.tag).toBe("str"); - expect(readStr(c)).toBe("hello world"); - }); - - test("unicode strings", () => { - const c = cur("🚀"); - expect(c.tag).toBe("str"); - expect(readStr(c)).toBe("🚀"); - }); - - test("booleans, null, undefined", () => { - expect(cur(true).tag).toBe("true"); - expect(cur(false).tag).toBe("false"); - expect(cur(null).tag).toBe("null"); - expect(cur(undefined).tag).toBe("undef"); - }); -}); - -describe("read() containers", () => { - test("empty array", () => { - const c = cur([]); - expect(c.tag).toBe("array"); - expect(c.val).toBe(c.left); // no content - }); - - test("simple array", () => { - const c = cur([1, 2, 3]); - expect(c.tag).toBe("array"); - // Iterate children - const vals: number[] = []; - let right = c.val; - const tmp = makeCursor(c.data); - while (right > c.left) { - tmp.right = right; - read(tmp); - expect(tmp.tag).toBe("int"); - vals.push(tmp.val); - right = tmp.left; - } - expect(vals).toEqual([1, 2, 3]); - }); - - test("simple object", () => { - const c = cur({ color: "red", size: 42 }); - expect(c.tag).toBe("object"); - // Iterate key/value pairs - const k = makeCursor(c.data); - const v = makeCursor(c.data); - const entries: [string, unknown][] = []; - let right = c.val; - while (right > c.left) { - k.right = right; - read(k); - v.right = k.left; - read(v); - entries.push([readStr(k), v.tag === "str" ? readStr(v) : v.val]); - right = v.left; - } - expect(entries).toContainEqual(["color", "red"]); - expect(entries).toContainEqual(["size", 42]); - }); - - test("empty object", () => { - const c = cur({}); - expect(c.tag).toBe("object"); - expect(c.val).toBe(c.left); - }); -}); - -describe("read() indexed containers", () => { - test("indexed array has ixWidth and ixCount", () => { - const c = cur([1, 2, 3], { indexThreshold: 0 }); - expect(c.tag).toBe("array"); - expect(c.ixWidth).toBeGreaterThan(0); - expect(c.ixCount).toBe(3); - }); - - test("indexed object has ixWidth and ixCount", () => { - const c = cur({ a: 1, b: 2, c: 3 }, { indexThreshold: 0 }); - expect(c.tag).toBe("object"); - expect(c.ixWidth).toBeGreaterThan(0); - expect(c.ixCount).toBe(3); - }); -}); - -describe("read() pointers", () => { - test("pointer to string", () => { - // hello,5^;8 - // Encoding writes last element first: "hello" at [0,7), then "^" pointer at [7,8) - // Natural read order (right-to-left) sees pointer first, then string - const c = cur(["hello", "hello"]); - expect(c.tag).toBe("array"); - - const tmp = makeCursor(c.data); - // First child in read order: the pointer - tmp.right = c.val; - read(tmp); - expect(tmp.tag).toBe("ptr"); - const secondChildRight = tmp.left; // save before resolving - - // Resolve pointer — should give us the string - tmp.right = tmp.val; - read(tmp); - expect(tmp.tag).toBe("str"); - expect(readStr(tmp)).toBe("hello"); - - // Second child in read order: the actual string - tmp.right = secondChildRight; - read(tmp); - expect(tmp.tag).toBe("str"); - expect(readStr(tmp)).toBe("hello"); - }); -}); - -describe("read() chains", () => { - test("chain node has correct boundaries", () => { - const c = cur(["/foo/bar/baz", "/foo/bar/qux", "/foo/quux"]); - expect(c.tag).toBe("array"); - // Just verify we can iterate without crashing - const tmp = makeCursor(c.data); - let right = c.val; - let count = 0; - while (right > c.left) { - tmp.right = right; - read(tmp); - right = tmp.left; - count++; - } - expect(count).toBe(3); - }); -}); - -const p = prepareKey; - -describe("strEquals", () => { - test("matches ASCII strings", () => { - const c = cur("hello"); - expect(strEquals(c, p("hello"))).toBe(true); - expect(strEquals(c, p("world"))).toBe(false); - expect(strEquals(c, p("hell"))).toBe(false); - expect(strEquals(c, p("helloo"))).toBe(false); - }); - - test("matches unicode strings", () => { - const c = cur("🚀"); - expect(strEquals(c, p("🚀"))).toBe(true); - expect(strEquals(c, p("🔥"))).toBe(false); - }); - - test("matches empty string", () => { - const c = cur(""); - expect(strEquals(c, p(""))).toBe(true); - expect(strEquals(c, p("a"))).toBe(false); - }); -}); - -describe("strCompare", () => { - test("ordering", () => { - const a = cur("apple"); - const b = cur("banana"); - expect(strCompare(a, p("apple"))).toBe(0); - expect(strCompare(a, p("banana"))).toBeLessThan(0); - expect(strCompare(b, p("apple"))).toBeGreaterThan(0); - }); -}); - -describe("seekChild", () => { - test("random access indexed array", () => { - const arr = [10, 20, 30, 40, 50]; - const c = cur(arr, { indexThreshold: 0 }); - expect(c.ixCount).toBe(5); - const child = makeCursor(c.data); - for (let i = 0; i < arr.length; i++) { - seekChild(child, c, i); - expect(child.tag).toBe("int"); - expect(child.val).toBe(arr[i]); - } - }); -}); - -describe("collectChildren", () => { - test("collects child boundaries", () => { - const c = cur([1, 2, 3]); - const offsets: number[] = []; - const count = collectChildren(c, offsets); - expect(count).toBe(3); - // Verify we can read each child - const tmp = makeCursor(c.data); - const vals: number[] = []; - for (let i = 0; i < count; i++) { - tmp.right = offsets[i]!; - read(tmp); - vals.push(tmp.val); - } - expect(vals).toEqual([1, 2, 3]); - }); -}); - -describe("findKey", () => { - test("finds existing key", () => { - const c = cur({ color: "red", size: 42 }); - const v = makeCursor(c.data); - expect(findKey(v, c, "color")).toBe(true); - expect(v.tag).toBe("str"); - expect(readStr(v)).toBe("red"); - - expect(findKey(v, c, "size")).toBe(true); - expect(v.tag).toBe("int"); - expect(v.val).toBe(42); - }); - - test("returns false for missing key", () => { - const c = cur({ a: 1 }); - const v = makeCursor(c.data); - expect(findKey(v, c, "z")).toBe(false); - }); - - test("finds key that is a chain (path with shared prefix)", () => { - // Keys like "/foo/bar" and "/foo/baz" share prefix "/foo" → chain encoding - const obj = { "/foo/bar": 1, "/foo/baz": 2 }; - const c = cur(obj); - const v = makeCursor(c.data); - expect(findKey(v, c, "/foo/bar")).toBe(true); - expect(v.tag).toBe("int"); - expect(v.val).toBe(1); - - expect(findKey(v, c, "/foo/baz")).toBe(true); - expect(v.tag).toBe("int"); - expect(v.val).toBe(2); - - expect(findKey(v, c, "/foo/qux")).toBe(false); - }); -}); - -describe("rawBytes", () => { - test("extracts node bytes", () => { - const c = cur(42); - const bytes = rawBytes(c); - expect(new TextDecoder().decode(bytes)).toBe("+1k"); - }); -}); - -describe("resolveStr", () => { - test("plain string", () => { - const c = cur("hello"); - expect(resolveStr(c)).toBe("hello"); - }); - - test("pointer to string", () => { - // Create data with a pointer: ["hello", "hello"] → second is a ptr - const c = cur(["hello", "hello"]); - const tmp = makeCursor(c.data); - // First child in read order is the pointer - tmp.right = c.val; - read(tmp); - expect(tmp.tag).toBe("ptr"); - expect(resolveStr(tmp)).toBe("hello"); - }); - - test("chain string", () => { - // Paths with shared prefixes produce chains - const arr = ["/foo/bar/baz", "/foo/bar/qux"]; - const c = cur(arr); - const tmp = makeCursor(c.data); - // Iterate children and resolve each - const results: string[] = []; - let right = c.val; - while (right > c.left) { - tmp.right = right; - read(tmp); - results.push(resolveStr(tmp)); - right = tmp.left; - } - expect(results).toEqual(["/foo/bar/baz", "/foo/bar/qux"]); - }); - - test("throws on non-string node", () => { - const c = cur(42); - expect(() => resolveStr(c)).toThrow(); - }); -}); - -describe("read() floats extended", () => { - test("negative exponent (small decimal)", () => { - const c = cur(0.001); - expect(c.tag).toBe("float"); - expect(c.val).toBe(0.001); - }); - - test("large float", () => { - const c = cur(1.23e15); - expect(c.tag).toBe("float"); - expect(c.val).toBe(1.23e15); - }); - - test("small float", () => { - const c = cur(1.5e-10); - expect(c.tag).toBe("float"); - expect(c.val).toBe(1.5e-10); - }); - - test("negative float", () => { - const c = cur(-3.14); - expect(c.tag).toBe("float"); - expect(c.val).toBe(-3.14); - }); - - test("negative float with exponent", () => { - const c = cur(-2.5e8); - expect(c.tag).toBe("float"); - expect(c.val).toBe(-2.5e8); - }); -}); - -describe("read() large integers", () => { - test("large positive without trailing zeroes", () => { - const c = cur(123457); - expect(c.tag).toBe("int"); - expect(c.val).toBe(123457); - }); - - test("large negative without trailing zeroes", () => { - const c = cur(-999997); - expect(c.tag).toBe("int"); - expect(c.val).toBe(-999997); - }); - - test("trailing zeroes encode as float with exponent", () => { - // 1000000 = 1e6, encoder uses exponent form - const c = cur(1000000); - expect(c.tag).toBe("float"); - expect(c.val).toBe(1000000); - }); -}); - -describe("nested containers", () => { - test("nested arrays", () => { - const c = cur([[1, 2], [3, 4]]); - expect(c.tag).toBe("array"); - const tmp = makeCursor(c.data); - const inner = makeCursor(c.data); - const results: number[][] = []; - let right = c.val; - while (right > c.left) { - tmp.right = right; - read(tmp); - expect(tmp.tag).toBe("array"); - const vals: number[] = []; - let innerRight = tmp.val; - while (innerRight > tmp.left) { - inner.right = innerRight; - read(inner); - vals.push(inner.val); - innerRight = inner.left; - } - results.push(vals); - right = tmp.left; - } - expect(results).toEqual([[1, 2], [3, 4]]); - }); - - test("nested objects", () => { - const c = cur({ a: { b: 1 } }); - expect(c.tag).toBe("object"); - const v = makeCursor(c.data); - expect(findKey(v, c, "a")).toBe(true); - expect(v.tag).toBe("object"); - const inner = makeCursor(v.data); - expect(findKey(inner, v, "b")).toBe(true); - expect(inner.tag).toBe("int"); - expect(inner.val).toBe(1); - }); - - test("object containing array", () => { - const c = cur({ items: [10, 20, 30] }); - const v = makeCursor(c.data); - expect(findKey(v, c, "items")).toBe(true); - expect(v.tag).toBe("array"); - const child = makeCursor(v.data); - const vals: number[] = []; - let right = v.val; - while (right > v.left) { - child.right = right; - read(child); - vals.push(child.val); - right = child.left; - } - expect(vals).toEqual([10, 20, 30]); - }); - - test("array of objects", () => { - const c = cur([{ x: 1 }, { x: 2 }]); - expect(c.tag).toBe("array"); - const tmp = makeCursor(c.data); - const v = makeCursor(c.data); - const results: number[] = []; - let right = c.val; - while (right > c.left) { - tmp.right = right; - read(tmp); - expect(tmp.tag).toBe("object"); - expect(findKey(v, tmp, "x")).toBe(true); - results.push(v.val); - right = tmp.left; - } - expect(results).toEqual([1, 2]); - }); -}); - -describe("seekChild on indexed objects", () => { - test("random access indexed object entries", () => { - const obj = { a: 10, b: 20, c: 30 }; - const c = cur(obj, { indexThreshold: 0 }); - expect(c.tag).toBe("object"); - expect(c.ixCount).toBe(3); - // Each entry is a key/value pair — seekChild gives the key node - const child = makeCursor(c.data); - const keys: string[] = []; - for (let i = 0; i < c.ixCount; i++) { - seekChild(child, c, i); - // In indexed objects, each index entry points to a key - keys.push(readStr(child)); - } - expect(keys.length).toBe(3); - // Indexed objects are sorted by UTF-8 key order - expect(keys).toEqual(["a", "b", "c"]); - }); -}); - -describe("collectChildren on objects", () => { - test("collects key/value boundaries", () => { - const c = cur({ x: 1, y: 2 }); - const offsets: number[] = []; - const count = collectChildren(c, offsets); - // Objects without schema: children are interleaved key, value, key, value - expect(count).toBe(4); - const tmp = makeCursor(c.data); - const tags: string[] = []; - for (let i = 0; i < count; i++) { - tmp.right = offsets[i]!; - read(tmp); - tags.push(tmp.tag); - } - // Alternating: str (key), int (value), str (key), int (value) - expect(tags.filter(t => t === "str").length).toBe(2); - expect(tags.filter(t => t === "int").length).toBe(2); - }); -}); - -describe("findKey with schema objects", () => { - test("finds key in schema object (repeated shape)", () => { - // Three objects with same keys. The encoder writes last-to-first, - // so carol (index 2) is encoded first with inline keys. - // alice and bob get schema pointers referencing carol's key layout. - // Read order = logical order: alice, bob, carol. - const data = [ - { name: "alice", age: 30 }, - { name: "bob", age: 25 }, - { name: "carol", age: 20 }, - ]; - const c = cur(data); - expect(c.tag).toBe("array"); - const tmp = makeCursor(c.data); - const v = makeCursor(c.data); - - // alice (first in read order) has a schema — last encoded, references carol's keys - tmp.right = c.val; - read(tmp); - expect(tmp.tag).toBe("object"); - expect(tmp.schema).not.toBe(0); - - // findKey should work on schema objects - expect(findKey(v, tmp, "name")).toBe(true); - expect(v.tag).toBe("str"); - expect(readStr(v)).toBe("alice"); - - expect(findKey(v, tmp, "age")).toBe(true); - expect(v.tag).toBe("int"); - expect(v.val).toBe(30); - - expect(findKey(v, tmp, "missing")).toBe(false); - - // bob (second in read order) also has a schema - tmp.right = tmp.left; - read(tmp); - expect(tmp.tag).toBe("object"); - expect(tmp.schema).not.toBe(0); - - expect(findKey(v, tmp, "name")).toBe(true); - expect(readStr(v)).toBe("bob"); - - expect(findKey(v, tmp, "age")).toBe(true); - expect(v.val).toBe(25); - - // carol (third in read order) has inline keys, no schema - tmp.right = tmp.left; - read(tmp); - expect(tmp.tag).toBe("object"); - expect(tmp.schema).toBe(0); - - expect(findKey(v, tmp, "name")).toBe(true); - expect(readStr(v)).toBe("carol"); - - expect(findKey(v, tmp, "age")).toBe(true); - expect(v.val).toBe(20); - }); -}); - -describe("findKey with pointer keys", () => { - test("finds key that is a pointer (deduplicated key string)", () => { - // When the same key string appears in multiple objects, the encoder - // deduplicates it with a pointer. Use enough objects to trigger this. - const data = [ - { name: "alice" }, - { name: "bob" }, - { name: "carol" }, - ]; - const c = cur(data); - const tmp = makeCursor(c.data); - const v = makeCursor(c.data); - - // Iterate all objects and findKey "name" in each - let right = c.val; - const names: string[] = []; - while (right > c.left) { - tmp.right = right; - read(tmp); - expect(tmp.tag).toBe("object"); - expect(findKey(v, tmp, "name")).toBe(true); - expect(v.tag).toBe("str"); - names.push(readStr(v)); - right = tmp.left; + for (const v of ["", "hi", "hello world", "café", "🎉", "with \"quotes\""]) { + const { decoded } = roundTrip(v); + expect(decoded).toBe(v); } - expect(names).toEqual(["alice", "bob", "carol"]); - }); -}); - -describe("strEquals with multi-byte UTF-8", () => { - test("2-byte UTF-8 (accented characters)", () => { - const c = cur("café"); - expect(strEquals(c, p("café"))).toBe(true); - expect(strEquals(c, p("cafe"))).toBe(false); - expect(strEquals(c, p("caféé"))).toBe(false); - }); - - test("3-byte UTF-8 (CJK characters)", () => { - const c = cur("日本語"); - expect(strEquals(c, p("日本語"))).toBe(true); - expect(strEquals(c, p("日本"))).toBe(false); - expect(strEquals(c, p("中文"))).toBe(false); - }); - - test("mixed ASCII and multi-byte", () => { - const c = cur("hello 世界 🌍"); - expect(strEquals(c, p("hello 世界 🌍"))).toBe(true); - expect(strEquals(c, p("hello 世界"))).toBe(false); - }); -}); - -describe("error paths", () => { - test("seekChild throws on non-indexed container", () => { - const c = cur([1, 2, 3]); // no indexes option - const child = makeCursor(c.data); - expect(() => seekChild(child, c, 0)).toThrow("indexed"); - }); - - test("seekChild throws on out-of-range index", () => { - const c = cur([1, 2, 3], { indexThreshold: 0 }); - const child = makeCursor(c.data); - expect(() => seekChild(child, c, -1)).toThrow(); - expect(() => seekChild(child, c, 3)).toThrow(); - }); - - test("findKey returns false on non-object", () => { - const c = cur([1, 2, 3]); - const v = makeCursor(c.data); - expect(findKey(v, c, "key")).toBe(false); - }); -}); - -// ── open() Proxy API ── - -function opened(value: unknown, opts?: Parameters[1]) { - return open(encode(value, opts)); -} - -describe("open() primitives", () => { - test("integers", () => { - expect(opened(0)).toBe(0); - expect(opened(42)).toBe(42); - expect(opened(-7)).toBe(-7); - }); - - test("floats", () => { - expect(opened(3.14)).toBe(3.14); - expect(opened(Infinity)).toBe(Infinity); - expect(opened(-Infinity)).toBe(-Infinity); - expect(opened(NaN)).toBeNaN(); }); - test("strings", () => { - expect(opened("")).toBe(""); - expect(opened("hello")).toBe("hello"); - expect(opened("🚀")).toBe("🚀"); - }); - - test("booleans, null, undefined", () => { - expect(opened(true)).toBe(true); - expect(opened(false)).toBe(false); - expect(opened(null)).toBe(null); - expect(opened(undefined)).toBe(undefined); + test("booleans and null", () => { + expect(roundTrip(true).decoded).toBe(true); + expect(roundTrip(false).decoded).toBe(false); + expect(roundTrip(null).decoded).toBe(null); }); }); -describe("open() arrays", () => { - test("Array.isArray", () => { - expect(Array.isArray(opened([]))).toBe(true); - expect(Array.isArray(opened([1, 2]))).toBe(true); - }); - - test("length", () => { - const arr = opened([10, 20, 30]) as unknown[]; - expect(arr.length).toBe(3); - }); - - test("index access", () => { - const arr = opened([10, 20, 30]) as unknown[]; - expect(arr[0]).toBe(10); - expect(arr[1]).toBe(20); - expect(arr[2]).toBe(30); - expect(arr[3]).toBe(undefined); - }); - - test("for...of iteration", () => { - const arr = opened([1, 2, 3]) as unknown[]; - const vals: unknown[] = []; - for (const v of arr) vals.push(v); - expect(vals).toEqual([1, 2, 3]); +describe("round-trip arrays", () => { + test("empty", () => { + expect(roundTrip([]).decoded).toEqual([]); }); - test("spread", () => { - const arr = opened([1, 2, 3]) as unknown[]; - expect([...arr]).toEqual([1, 2, 3]); + test("flat int array", () => { + expect(roundTrip([1, 2, 3]).decoded).toEqual([1, 2, 3]); }); - test("JSON.stringify", () => { - const arr = opened([1, "hello", true, null]); - expect(JSON.stringify(arr)).toBe('[1,"hello",true,null]'); + test("mixed types", () => { + const v = [1, "hello", true, null, 3.14]; + expect(roundTrip(v).decoded).toEqual(v); }); test("nested arrays", () => { - const arr = opened([[1, 2], [3, 4]]) as unknown[][]; - expect(arr[0]![0]).toBe(1); - expect(arr[0]![1]).toBe(2); - expect(arr[1]![0]).toBe(3); - expect(arr[1]![1]).toBe(4); - expect(JSON.stringify(arr)).toBe("[[1,2],[3,4]]"); - }); - - test("empty array", () => { - const arr = opened([]) as unknown[]; - expect(arr.length).toBe(0); - expect([...arr]).toEqual([]); - }); - - test("indexed array", () => { - const arr = opened([10, 20, 30, 40, 50], { indexThreshold: 0 }) as unknown[]; - expect(arr.length).toBe(5); - expect(arr[0]).toBe(10); - expect(arr[4]).toBe(50); - expect([...arr]).toEqual([10, 20, 30, 40, 50]); - }); - - test("'in' operator", () => { - const arr = opened([10, 20]) as unknown[]; - expect(0 in arr).toBe(true); - expect(1 in arr).toBe(true); - expect(2 in arr).toBe(false); - }); -}); - -describe("open() objects", () => { - test("property access", () => { - const obj = opened({ color: "red", size: 42 }) as any; - expect(obj.color).toBe("red"); - expect(obj.size).toBe(42); - }); - - test("missing key returns undefined", () => { - const obj = opened({ a: 1 }) as any; - expect(obj.missing).toBe(undefined); - }); - - test("Object.keys", () => { - const obj = opened({ x: 1, y: 2 }) as any; - const keys = Object.keys(obj); - expect(keys.sort()).toEqual(["x", "y"]); - }); - - test("Object.entries", () => { - const obj = opened({ a: 1, b: 2 }) as any; - const entries = Object.entries(obj); - expect(entries.sort()).toEqual([["a", 1], ["b", 2]]); - }); - - test("'in' operator", () => { - const obj = opened({ a: 1 }) as any; - expect("a" in obj).toBe(true); - expect("b" in obj).toBe(false); - }); - - test("JSON.stringify", () => { - const obj = opened({ a: 1, b: "hello" }) as any; - const parsed = JSON.parse(JSON.stringify(obj)); - expect(parsed.a).toBe(1); - expect(parsed.b).toBe("hello"); + const v = [[1, 2], [3, 4], [[5]]]; + expect(roundTrip(v).decoded).toEqual(v); }); - test("nested objects", () => { - const obj = opened({ outer: { inner: 42 } }) as any; - expect(obj.outer.inner).toBe(42); - }); - - test("object containing array", () => { - const obj = opened({ items: [10, 20, 30] }) as any; - expect(Array.isArray(obj.items)).toBe(true); - expect(obj.items.length).toBe(3); - expect(obj.items[1]).toBe(20); - }); - - test("array of objects", () => { - const data = opened([{ x: 1 }, { x: 2 }]) as any[]; - expect(data[0].x).toBe(1); - expect(data[1].x).toBe(2); - }); - - test("empty object", () => { - const obj = opened({}) as any; - expect(Object.keys(obj)).toEqual([]); - }); - - test("length on object", () => { - const obj = opened({ a: 1, b: 2, c: 3 }) as any; - expect(obj.length).toBe(3); - }); -}); - -describe("open() schema objects", () => { - test("property access on schema objects", () => { - const data = opened([ - { name: "alice", age: 30 }, - { name: "bob", age: 25 }, - { name: "carol", age: 20 }, - ]) as any[]; - expect(data[0].name).toBe("alice"); - expect(data[0].age).toBe(30); - expect(data[1].name).toBe("bob"); - expect(data[2].age).toBe(20); - }); - - test("Object.keys on schema objects", () => { - const data = opened([ - { name: "alice", age: 30 }, - { name: "bob", age: 25 }, - { name: "carol", age: 20 }, - ]) as any[]; - expect(Object.keys(data[0]).sort()).toEqual(["age", "name"]); - expect(Object.keys(data[1]).sort()).toEqual(["age", "name"]); - // carol has inline keys (no schema) - expect(Object.keys(data[2]).sort()).toEqual(["age", "name"]); - }); - - test("JSON.stringify with schema objects", () => { - const data = opened([ - { name: "alice", age: 30 }, - { name: "bob", age: 25 }, - ]) as any[]; - const parsed = JSON.parse(JSON.stringify(data)); - expect(parsed).toEqual([ - { name: "alice", age: 30 }, - { name: "bob", age: 25 }, - ]); - }); -}); - -describe("open() pointers and chains", () => { - test("pointer values resolve transparently", () => { - const data = opened(["hello", "hello"]) as any[]; - expect(data[0]).toBe("hello"); - expect(data[1]).toBe("hello"); - }); - - test("chain strings resolve", () => { - const data = opened(["/foo/bar/baz", "/foo/bar/qux"]) as any[]; - expect(data[0]).toBe("/foo/bar/baz"); - expect(data[1]).toBe("/foo/bar/qux"); - }); -}); - -describe("open() read-only", () => { - test("set throws", () => { - const obj = opened({ a: 1 }) as any; - expect(() => { obj.a = 2; }).toThrow("read-only"); - }); - - test("delete throws", () => { - const obj = opened({ a: 1 }) as any; - expect(() => { delete obj.a; }).toThrow("read-only"); - }); -}); - -describe("open() handle escape hatch", () => { - test("handle returns data and right offset", () => { - const obj = opened({ a: 1 }) as any; - const h = handle(obj); - expect(h).toBeDefined(); - expect(h!.data).toBeInstanceOf(Uint8Array); - expect(typeof h!.right).toBe("number"); - }); - - test("handle returns undefined for non-proxy", () => { - expect(handle(42)).toBe(undefined); - expect(handle("hello")).toBe(undefined); - expect(handle({})).toBe(undefined); - }); -}); - -describe("open() Symbol.iterator on objects", () => { - test("iterates [key, value] pairs", () => { - const obj = opened({ a: 1, b: 2 }) as any; - const entries: [string, unknown][] = []; - for (const pair of obj) entries.push(pair); - expect(entries.sort((a, b) => a[0].localeCompare(b[0]))).toEqual([["a", 1], ["b", 2]]); + test("large indexed array", () => { + const v = Array.from({ length: 100 }, (_, i) => i); + const { decoded } = roundTrip(v); + // Indexed arrays return a Proxy; check via explicit lookups + iteration + expect((decoded as number[])[0]).toBe(0); + expect((decoded as number[])[99]).toBe(99); + expect((decoded as number[]).length).toBe(100); }); }); -// ── strHasPrefix ── - -describe("strHasPrefix", () => { - test("matches ASCII prefix", () => { - const c = cur("hello world"); - expect(strHasPrefix(c, p("hello"))).toBe(true); - expect(strHasPrefix(c, p("hello world"))).toBe(true); - expect(strHasPrefix(c, p("world"))).toBe(false); - }); - - test("empty prefix matches everything", () => { - const c = cur("hello"); - expect(strHasPrefix(c, p(""))).toBe(true); - const empty = cur(""); - expect(strHasPrefix(empty, p(""))).toBe(true); - }); - - test("prefix longer than string does not match", () => { - const c = cur("hi"); - expect(strHasPrefix(c, p("hello"))).toBe(false); - }); - - test("unicode prefix", () => { - const c = cur("café latte"); - expect(strHasPrefix(c, p("café"))).toBe(true); - expect(strHasPrefix(c, p("cafe"))).toBe(false); +describe("round-trip objects", () => { + test("empty", () => { + expect(roundTrip({}).decoded).toEqual({}); }); - test("chain strings match prefix", () => { - const arr = cur(["/foo/bar/baz", "/foo/bar/qux"]); - const tmp = makeCursor(arr.data); - tmp.right = arr.val; - read(tmp); - // First child is a chain - expect(strHasPrefix(tmp, p("/foo/bar"))).toBe(true); - expect(strHasPrefix(tmp, p("/foo/baz"))).toBe(false); + test("flat object", () => { + const v = { a: 1, b: 2 }; + expect(roundTrip(v).decoded).toEqual(v); }); -}); - -// ── strCompare / strEquals on non-string nodes ── -describe("strCompare on non-string nodes", () => { - test("returns NaN for integer", () => { - const c = cur(42); - expect(strCompare(c, p("hello"))).toBeNaN(); + test("preserves insertion order", () => { + const v = { z: 1, a: 2, m: 3 }; + const { decoded } = roundTrip(v); + expect(Object.keys(decoded as object)).toEqual(["z", "a", "m"]); }); - test("strEquals returns false for non-string", () => { - const c = cur(42); - expect(strEquals(c, p("42"))).toBe(false); + test("nested object", () => { + const v = { outer: { inner: { value: 42 } } }; + expect(roundTrip(v).decoded).toEqual(v); }); - test("strHasPrefix returns false for non-string", () => { - const c = cur(42); - expect(strHasPrefix(c, p("4"))).toBe(false); + test("schema-shared records", () => { + const v = [ + { a: 1, b: 2 }, + { a: 3, b: 4 }, + { a: 5, b: 6 }, + ]; + expect(roundTrip(v).decoded).toEqual(v); }); }); -// ── findByPrefix ── - -describe("findByPrefix", () => { - test("finds matching keys (non-indexed)", () => { - const obj = cur({ apple: 1, apricot: 2, banana: 3, avocado: 4 }); - const c = makeCursor(obj.data); - const results: [string, number][] = []; - findByPrefix(c, obj, "ap", (key, value) => { - results.push([resolveStr(key), value.val]); - }); - expect(results.sort()).toEqual([["apple", 1], ["apricot", 2]]); - }); - - test("finds matching keys (indexed)", () => { - const obj = cur({ apple: 1, apricot: 2, banana: 3, avocado: 4 }, { indexThreshold: 0 }); - const c = makeCursor(obj.data); - const results: [string, number][] = []; - findByPrefix(c, obj, "ap", (key, value) => { - results.push([resolveStr(key), value.val]); - }); - expect(results.sort()).toEqual([["apple", 1], ["apricot", 2]]); - }); - - test("no matches returns nothing", () => { - const obj = cur({ apple: 1, banana: 2 }); - const c = makeCursor(obj.data); - const results: string[] = []; - findByPrefix(c, obj, "zzz", (key) => { results.push(resolveStr(key)); }); - expect(results).toEqual([]); - }); - - test("empty prefix matches all keys", () => { - const obj = cur({ a: 1, b: 2 }); - const c = makeCursor(obj.data); - const results: string[] = []; - findByPrefix(c, obj, "", (key) => { results.push(resolveStr(key)); }); - expect(results.sort()).toEqual(["a", "b"]); - }); - - test("visitor returning false stops iteration", () => { - const obj = cur({ a: 1, b: 2, c: 3 }); - const c = makeCursor(obj.data); - const results: string[] = []; - findByPrefix(c, obj, "", (key) => { - results.push(resolveStr(key)); - return false; // stop after first - }); - expect(results.length).toBe(1); - }); - - test("works with chain keys", () => { - const obj = cur({ "/foo/bar": 1, "/foo/baz": 2, "/qux": 3 }); - const c = makeCursor(obj.data); - const results: [string, number][] = []; - findByPrefix(c, obj, "/foo/", (key, value) => { - results.push([resolveStr(key), value.val]); - }); - expect(results.sort()).toEqual([["/foo/bar", 1], ["/foo/baz", 2]]); +describe("round-trip mixed", () => { + test("realistic site manifest shape", () => { + const v = { + version: "1.0", + routes: [ + { path: "/api", action: "proxy" }, + { path: "/static", action: "serve" }, + ], + flags: { cache: true, compress: true }, + }; + expect(roundTrip(v).decoded).toEqual(v); }); - test("on non-object does nothing", () => { - const arr = cur([1, 2, 3]); - const c = makeCursor(arr.data); - let called = false; - findByPrefix(c, arr, "x", () => { called = true; }); - expect(called).toBe(false); + test("path-keyed routes table", () => { + const v: Record = {}; + for (let i = 0; i < 50; i++) { + v[`/path/${i}`] = { hash: `h${i.toString(16)}`, size: i * 1000 }; + } + const { decoded } = roundTrip(v); + // Spot-check a few keys + expect((decoded as any)["/path/0"]).toEqual({ hash: "h0", size: 0 }); + expect((decoded as any)["/path/49"]).toEqual({ hash: "h31", size: 49000 }); }); }); -// ── Proxy identity (memoization) ── - -describe("open() proxy identity", () => { - test("same container returns same proxy", () => { - const obj = opened({ nested: { a: 1 } }) as any; - expect(obj.nested).toBe(obj.nested); - }); - - test("same array element returns same proxy", () => { - const arr = opened([{ x: 1 }, { x: 2 }]) as any[]; - expect(arr[0]).toBe(arr[0]); - }); - - test("pointer dedup returns same proxy", () => { - // Two objects sharing the same nested value via pointer - const shared = { inner: 42 }; - const arr = opened([shared, shared]) as any[]; - expect(arr[0]).toBe(arr[1]); - }); -}); - -// ── Proxy Array.prototype delegation ── - -describe("open() array methods", () => { - test("map", () => { - const arr = opened([1, 2, 3]) as any[]; - const doubled = arr.map((x: number) => x * 2); - expect(doubled).toEqual([2, 4, 6]); - }); - - test("filter", () => { - const arr = opened([1, 2, 3, 4, 5]) as any[]; - const evens = arr.filter((x: number) => x % 2 === 0); - expect(evens).toEqual([2, 4]); - }); - - test("indexOf", () => { - const arr = opened([10, 20, 30]) as any[]; - expect(arr.indexOf(20)).toBe(1); - expect(arr.indexOf(99)).toBe(-1); - }); - - test("includes", () => { - const arr = opened(["a", "b", "c"]) as any[]; - expect(arr.includes("b")).toBe(true); - expect(arr.includes("z")).toBe(false); - }); - - test("every / some", () => { - const arr = opened([2, 4, 6]) as any[]; - expect(arr.every((x: number) => x % 2 === 0)).toBe(true); - expect(arr.some((x: number) => x > 5)).toBe(true); - expect(arr.some((x: number) => x > 10)).toBe(false); - }); - - test("reduce", () => { - const arr = opened([1, 2, 3]) as any[]; - const sum = arr.reduce((acc: number, x: number) => acc + x, 0); - expect(sum).toBe(6); - }); - - test("find", () => { - const arr = opened([{ x: 1 }, { x: 2 }, { x: 3 }]) as any[]; - const found = arr.find((item: any) => item.x === 2); - expect(found.x).toBe(2); - }); - - test("slice", () => { - const arr = opened([10, 20, 30, 40]) as any[]; - expect(arr.slice(1, 3)).toEqual([20, 30]); - }); -}); - -// ── Proxy for...in iteration ── - -describe("open() for...in", () => { - test("iterates object keys", () => { - const obj = opened({ x: 1, y: 2, z: 3 }) as any; - const keys: string[] = []; - for (const k in obj) keys.push(k); - expect(keys.sort()).toEqual(["x", "y", "z"]); - }); - - test("accesses values during for...in", () => { - const obj = opened({ a: 10, b: 20 }) as any; - const entries: [string, number][] = []; - for (const k in obj) entries.push([k, obj[k]]); - expect(entries.sort()).toEqual([["a", 10], ["b", 20]]); - }); -}); - -// ── stringify ── - -describe("stringify", () => { - describe("primitives", () => { - test("encodes integers with zigzag + base64", () => { - expect(stringify(0)).toBe("+"); - expect(stringify(1)).toBe("+2"); - expect(stringify(-1)).toBe("+1"); - expect(stringify(42)).toBe("+1k"); - expect(stringify(-42)).toBe("+1j"); - }); - - test("encodes decimals", () => { - expect(stringify(3.14)).toBe("+9Q*3"); - expect(stringify(0.5)).toBe("+a*1"); - expect(stringify(1000000)).toBe("+2*c"); - }); - - test("encodes length-prefixed strings for non-bare characters", () => { - expect(stringify("hello world")).toBe("hello world,b"); - expect(stringify("foo bar")).toBe("foo bar,7"); - }); - - test("encodes booleans, null, undefined", () => { - expect(stringify(true)).toBe("'t"); - expect(stringify(false)).toBe("'f"); - expect(stringify(null)).toBe("'n"); - expect(stringify(undefined)).toBe("'u"); - }); - - test("encodes special numbers", () => { - expect(stringify(NaN)).toBe("'nan"); - expect(stringify(Infinity)).toBe("'inf"); - expect(stringify(-Infinity)).toBe("'nif"); - }); - }); - - describe("arrays", () => { - test("encodes simple arrays", () => { - expect(stringify([1, 2, 3])).toBe("+6+4+2;6"); - }); - - test("encodes arrays as values with length prefix", () => { - const encoded = stringify([[1, 2, 3]], {}); - expect(encoded).toBe("+6+4+2;6;8"); - }); - - test("encodes empty array", () => { - expect(stringify([])).toBe(";"); - }); - - test("encodes nested arrays", () => { - const encoded = stringify([[1], [2]]); - expect(encoded).toBe("+4;2+2;2;8"); - }); - - test("encodes arrays with different formats", () => { - const data = [ - [1, 2], - [3, 4], - ]; - expect(stringify(data)).toBe("+8+6;4+4+2;4;c"); - expect(stringify(data, { indexThreshold: 0 })).toBe( - "+8+602#g;8+4+202#g;80a#g;o", - ); - }); - }); - - describe("objects", () => { - test("encodes simple objects", () => { - expect(stringify({ color: "red", size: 42 })).toBe( - "+1ksize,4red,3color,5:l", - ); - }); - - test("encodes empty object", () => { - expect(stringify({})).toBe(":"); - }); - - test("encodes objects with length prefix", () => { - const encoded = stringify([{ a: 1 }]); - expect(encoded).toBe("+2a,1:5;7"); - }); - - test("encodes objects with different formats", () => { - const data = { a: { b: 1, c: 1 }, d: { e: 3, f: 4 } }; - expect(stringify(data)).toBe("+8f,1+6e,1:ad,1+2c,1+2b,1:aa,1:u"); - expect(stringify(data, { indexThreshold: 0 })).toBe( - "+8f,1+6e,105#g:ed,1+2c,1+2b,105#g:ea,10j#g:G", - ); - }); - - test("object keys are sorted when indexes enabled", () => { - const obj = { c: 3, a: 1, b: 2 }; - const encoded = stringify(obj, { indexThreshold: 2 }); - expect(encoded).toBe("+4b,1+2a,1+6c,15a0#o:k"); - }); - }); - - describe("indexes", () => { - test("embed index into small array", () => { - const arr = [1, 2, 3]; - const encoded = stringify(arr, { indexThreshold: 2 }); - expect(encoded).toBe("+6+4+2024#o;b"); - }); - test("embeds index for medium arrays", () => { - const arr = Array.from({ length: 12 }, (_, i) => i); - const encoded = stringify(arr, { indexThreshold: 10 }); - expect(encoded).toBe("+m+k+i+g+e+c+a+8+6+4+2+013579bdfhjl#1w;C"); - }); - test("embeds index for large arrays", () => { - const arr = Array.from({ length: 40 }, (_, i) => i); - const encoded = stringify(arr, { indexThreshold: 30 }); - expect(encoded).toBe( - "+1e+1c+1a+18+16+14+12+10+-+Y+W+U+S+Q+O+M+K+I+G+E+C+A+y+w+u+s+q+o+m+k+i+g+e+c+a+8+6+4+2+0001030507090b0d0f0h0j0l0n0p0r0t0v0x0z0B0D0F0H0J0L0N0P0R0T0V0X0Z0_1215181b1e1h1k#51;2G", - ); - }); - - test("skips index for small arrays", () => { - const encoded = stringify([1, 2, 3], { indexThreshold: 10 }); - expect(encoded).not.toContain("#"); - }); - - test("disables index when indexes is false", () => { - const arr = Array.from({ length: 20 }, (_, i) => i); - const encoded = stringify(arr, { indexThreshold: Infinity }); - expect(encoded).not.toContain("#"); - }); - - test("indices for maps", () => { - const obj = { a: 1, b: 2, c: 3 }; - const encoded = stringify(obj, { indexThreshold: 2 }); - expect(encoded).toBe("+6c,1+4b,1+2a,105a#o:k"); - }); - - test("map indexes sort keys", () => { - const obj = { c: 3, a: 1, b: 2 }; - const encoded = stringify(obj, { indexThreshold: 2 }); - expect(encoded).toBe("+4b,1+2a,1+6c,15a0#o:k"); - }); - - test("schema objects can have indices on values", () => { - const data = [ - { name: "alice", age: 1 }, - { name: "bob", age: 2 }, - ]; - expect(stringify(data, { indexThreshold: 1 })).toBe( - "+4age,3bob,3name,4b0#g:m+2alice,507#g^d:f0h#g;J", - ); - expect(stringify(data, { indexThreshold: 1 })).toBe( - "+4age,3bob,3name,4b0#g:m+2alice,507#g^d:f0h#g;J", - ); - }); - }); - - describe("pointers", () => { - test("deduplicates repeated strings", () => { - const encoded = stringify(["hello", "hello"]); - expect(encoded).toBe("hello,5^;8"); - }); - - test("deduplicates repeated objects", () => { - const obj = { x: 1 }; - expect(stringify([obj, obj])).toBe("+2x,1:5^;8"); - }); - }); - - describe("refs", () => { - test("encodes value matching a ref as ref shorthand", () => { - expect( - stringify("hello", { - refs: { H: "hello" } - }), - ).toBe("'H"); - }); - - test("encodes number matching a ref", () => { - expect(stringify(42, { refs: { X: 42 } })).toBe("'X"); - }); - - test("encodes refs inside arrays", () => { - expect(stringify(["hello", "world"], { refs: { H: "hello" } })).toBe( - "world,5'H;9", - ); - }); - - test("encodes multiple refs", () => { - expect( - stringify(["hello", 42], { - refs: { H: "hello", X: 42 }, - }), - ).toBe("'X'H;4"); - }); - - test("encodes schema ref for repeated object shapes", () => { - const data = [ - { a: 1, b: 2 }, - { a: 3, b: 4 }, - ]; - expect( - stringify(data, { - refs: { S: ["a", "b"] }, - }), - ).toBe("+8+6'S:6+4+2'S:6;g"); - }); - - test("use refs even when pointers are disabled", () => { - expect( - stringify("hello", { refs: { H: "hello" } }), - ).toBe("'H"); - }); - }); - - describe("shared schemas", () => { - test("deduplicates repeated object shapes", () => { - const data = [ - { name: "alice", age: 1 }, - { name: "bob", age: 2 }, - { name: "charlie", age: 3 }, - ]; - expect(stringify(data)).toBe( - "+6age,3charlie,7name,4:m+4bob,3^7:9+2alice,5^k:b;M", - ); - }); - - test("does not use schemas for single objects", () => { - const data = [{ name: "alice" }]; - const encoded = stringify(data); - expect(encoded).toBe("alice,5name,4:d;f"); - }); - - test("Can use array refs as schema targets", () => { - const data = { a: 1, b: 2 }; - const refs = { K: ["a", "b"] }; - expect(stringify(data, { refs })).toBe("+4+2'K:6"); - }); - - test("Can use object refs as schema targets", () => { - const data = { a: 1, b: 2 }; - const refs = { O: { a: 3, b: 4 } }; - expect(stringify(data, { refs })).toBe("+4+2'O:6"); - }); - - describe("path chains", () => { - test("encodes path chains with shared prefixes", () => { - const chain = { stringChainThreshold: 0 }; - expect(stringify("/")).toBe("/,1"); - expect(stringify("/about")).toBe("/about,6"); - const paths = ["/foo/bar/baz", "/foo/bar/qux", "/foo/quux"]; - expect(stringify(paths, chain)).toBe( - "/foo/quux,9/bar/qux,8/foo,4.g/baz,4/bar,4.c^g.g;L", - ); - const prefixedPaths = ["/foo/bar/baz", "/foo/bar/qux"]; - expect(stringify(prefixedPaths, chain)).toBe("/foo/bar/qux,c/baz,4/bar,4/foo,4.c.k;A"); - }); - }); - - describe("website manifest", () => { - const doc = { - "/": { name: "Home", method: "GET" }, - "/about": { name: "About", method: "GET" }, - "/contact": { name: "Contact", method: "POST" }, - "/blog": { name: "Blog", method: "GET" }, - "/blog/post": { name: "Blog Post", method: "GET" }, - "/blog/post/comment": { name: "Comment", method: "POST" }, - "/api/data": { name: "API Data", method: "GET" }, - "/api/update": { name: "API Update", method: "POST" }, - "/admin": { name: "Admin", method: "GET" }, - "/admin/settings": { name: "Admin Settings", method: "POST" }, - "/admin/users": { name: "Admin Users", method: "GET" }, - "/admin/users/add": { name: "Add User", method: "POST" }, - "/admin/users/remove": { name: "Remove User", method: "POST" }, - "/admin/logs": { name: "Admin Logs", method: "GET" }, - "/admin/logs/clear": { name: "Clear Logs", method: "POST" }, - "/admin/logs/export": { name: "Export Logs", method: "GET" }, - "/admin/logs/export/json": { name: "Export Logs as JSON", method: "GET" }, - "/admin/logs/export/csv": { name: "Export Logs as CSV", method: "GET" }, - }; - test("byte counts are accurate with different options", () => { - const chain = { stringChainThreshold: 0 }; - expect(stringify(doc, chain)).toBe( - "GET,3method,6Export Logs as CSV,iname,4:D/admin/logs/export/csv,m^YExport Logs as JSON,j^L:p/json,5/export,7/logs,5/admin,6.f.q.z^1YExport Logs,b^1E:j^nPOST,4Clear Logs,a^21:l/clear,6^W.a^2SAdmin Logs,a^2x:i^1i^QRemove User,b^2U:i/users/remove,d^1W.i^1sAdd User,8^3u:g/add,4/users,6.e^2x.j^4sAdmin Users,b^48:j^s^2Z.5^2vAdmin Settings,e^4D:m/settings,9^3B.e^5wAdmin,5^56:d^3V^3pAPI Update,a^5t:i/api/update,b^6jAPI Data,8^5Y:g/data,5/api,4.d^4rComment,7^6s:f/blog/post/comment,i^7pBlog Post,9^73:h/post,5/blog,5.e^7YBlog,4^7x:c^g^5PContact,7^7Q:f/contact,8^8DAbout,5^8d:d/about,6^8-Home,4^8z:c/,1000h3t626p6Y8j7j3L4n4P5q2r2Y131j1S0E#2h:9X", - ); - expect( - stringify(doc, { - ...chain, - indexThreshold: Infinity, - }), - ).toBe( - "GET,3method,6Export Logs as CSV,iname,4:D/admin/logs/export/csv,m^YExport Logs as JSON,j^L:p/json,5/export,7/logs,5/admin,6.f.q.z^1YExport Logs,b^1E:j^nPOST,4Clear Logs,a^21:l/clear,6^W.a^2SAdmin Logs,a^2x:i^1i^QRemove User,b^2U:i/users/remove,d^1W.i^1sAdd User,8^3u:g/add,4/users,6.e^2x.j^4sAdmin Users,b^48:j^s^2Z.5^2vAdmin Settings,e^4D:m/settings,9^3B.e^5wAdmin,5^56:d^3V^3pAPI Update,a^5t:i/api/update,b^6jAPI Data,8^5Y:g/data,5/api,4.d^4rComment,7^6s:f/blog/post/comment,i^7pBlog Post,9^73:h/post,5/blog,5.e^7YBlog,4^7x:c^g^5PContact,7^7Q:f/contact,8^8DAbout,5^8d:d/about,6^8-Home,4^8z:c/,1:9k", - ); - }); - }); - - describe("emoji party", () => { - const doc = { - "/emoji/🔥": { name: "fire", group: "travel-places" }, - "/emoji/💧": { name: "water", group: "travel-places" }, - "/emoji/🌱": { name: "seedling", group: "animals-nature" }, - "/emoji/🐍": { name: "snake", group: "animals-nature" }, - "/emoji/🎸": { name: "guitar", group: "objects" }, - "/emoji/⚽": { name: "soccer ball", group: "activities" }, - "/emoji/❤️": { name: "red heart", group: "smileys-emotion" }, - "/emoji/🏴‍☠️": { name: "pirate flag", group: "flags" }, - }; - test("byte counts are accurate with different options", () => { - expect(stringify(doc, { stringChainThreshold: 0 })).toBe( - "flags,5group,5pirate flag,bname,4:x/emoji/🏴‍☠️,ksmileys-emotion,fred heart,9^O:u/❤️,7/emoji,6.hactivities,asoccer ball,b^1y:s/⚽,4^C.8objects,7guitar,6^22:k/🎸,5^17.aanimals-nature,esnake,5^2G:q/🐍,5^1L.a^oseedling,8^37:f/🌱,5^2c.atravel-places,dwater,5^3K:p/💧,5^2P.a^ofire,4^47:b/🔥,5^3c.a:4X", - ); - expect(stringify(doc, { stringChainDelimiter: "" })).toBe( - "flags,5group,5pirate flag,bname,4:x/emoji/🏴‍☠️,ksmileys-emotion,fred heart,9^O:u/emoji/❤️,dactivities,asoccer ball,b^1u:s/emoji/⚽,aobjects,7guitar,6^20:k/emoji/🎸,banimals-nature,esnake,5^2F:q/emoji/🐍,b^pseedling,8^37:f/emoji/🌱,btravel-places,dwater,5^3L:p/emoji/💧,b^pfire,4^49:b/emoji/🔥,b:4-", - ); - }); - }); - - describe("encode colored fruits", () => { - const doc = [ - { color: "red", fruits: ["apple", "strawberry"] }, - { color: "green", fruits: ["apple"] }, - { color: "yellow", fruits: ["apple", "banana"] }, - { color: "orange", fruits: ["orange"] }, - ]; - test("with correct options applied", () => { - expect(stringify(doc)).toBe( - "orange,6;8fruits,6^acolor,5:rbanana,6apple,5;fyellow,6^p:r^e;2green,5^E:dstrawberry,a^F;ered,3^11:o;1z", - ); - expect(stringify(doc, {})).toBe( - "orange,6;8fruits,6^acolor,5:rbanana,6apple,5;fyellow,6^p:r^e;2green,5^E:dstrawberry,a^F;ered,3^11:o;1z", - ); - expect(stringify(doc)).toBe( - "orange,6;8fruits,6^acolor,5:rbanana,6apple,5;fyellow,6^p:r^e;2green,5^E:dstrawberry,a^F;ered,3^11:o;1z", - ); - }); - }); - }); -}); - -// ── parse / decode ── - -describe("parse", () => { - describe("primitives", () => { - test("parses integers", () => { - expect(parse("+")).toBe(0); - expect(parse("+2")).toBe(1); - expect(parse("+1")).toBe(-1); - expect(parse("+1k")).toBe(42); - expect(parse("+1j")).toBe(-42); - }); - - test("parses decimals", () => { - expect(parse("+9Q*3")).toBe(3.14); - expect(parse("+a*1")).toBe(0.5); - }); - - test("parses strings", () => { - expect(parse(",")).toBe(""); - expect(parse("hello world,b")).toBe("hello world"); - expect(parse("foo bar,7")).toBe("foo bar"); - }); - - test("parses booleans, null, undefined", () => { - expect(parse("'t")).toBe(true); - expect(parse("'f")).toBe(false); - expect(parse("'n")).toBe(null); - expect(parse("'u")).toBe(undefined); - }); - - test("parses special numbers", () => { - expect(parse("'nan")).toBeNaN(); - expect(parse("'inf")).toBe(Infinity); - expect(parse("'nif")).toBe(-Infinity); - }); - }); - - describe("arrays", () => { - test("parses simple arrays", () => { - expect([...(parse("+6+4+2;6") as any[])]).toEqual([1, 2, 3]); - }); - - test("parses empty array", () => { - expect([...(parse(";") as any[])]).toEqual([]); - }); - }); - - describe("objects", () => { - test("parses simple objects", () => { - const obj = parse("+1ksize,4red,3color,5:l") as any; - expect(obj.color).toBe("red"); - expect(obj.size).toBe(42); - }); - - test("parses empty object", () => { - expect(Object.keys(parse(":") as any)).toEqual([]); - }); - }); - - test("resolves pointer references", () => { - const arr = parse("hello,5^;8") as any[]; - expect(arr[0]).toBe("hello"); - expect(arr[1]).toBe("hello"); - }); -}); - -// ── streaming ── - -describe("stringify streaming", () => { - test("onChunk receives chunks", () => { - const chunks: { offset: number; data: string }[] = []; - stringify( - { a: 1 }, - { - chunkSize: 4, // small chunks for deterministic splitting - onChunk: (data, offset) => chunks.push({ offset, data }), - }, - ); - // Each chunk starts at the right offset - expect(chunks[0]!.offset).toBe(0); - for (let i = 1; i < chunks.length; i++) { - expect(chunks[i]!.offset).toBe( - chunks[i - 1]!.offset + chunks[i - 1]!.data.length, - ); - } - // Reassembled output matches non-streaming - const reassembled = chunks.map((c) => c.data).join(""); - expect(reassembled).toBe(stringify({ a: 1 })); - }); - - test("onChunk offsets are increasing", () => { - const offsets: number[] = []; - stringify([1, 2, 3, "hello", { a: true }], { - onChunk: (_, offset) => offsets.push(offset), - }); - for (let i = 1; i < offsets.length; i++) { - expect(offsets[i]).toBeGreaterThanOrEqual(offsets[i - 1]!); - } - }); - - test("reassembled chunks match non-streaming output", () => { - const value = { items: [1, "two", true], name: "test" }; - const direct = stringify(value); - const chunks: string[] = []; - stringify(value, { - onChunk: (chunk) => chunks.push(chunk), - }); - const result = chunks.join(""); - expect(result).toBe(direct); - }); -}); - -// ── round-trip ── - -describe("round-trip", () => { - const roundTrip = ( - value: unknown, - opts?: Parameters[1], - ) => { - const buf = encode(value, opts); - return open(buf, opts?.refs); - }; - - test("round-trips primitives", () => { - expect(roundTrip(0)).toBe(0); - expect(roundTrip(1)).toBe(1); - expect(roundTrip(-1)).toBe(-1); - expect(roundTrip(42)).toBe(42); - expect(roundTrip(3.14)).toBe(3.14); - expect(roundTrip("hello")).toBe("hello"); - expect(roundTrip("hello world")).toBe("hello world"); - expect(roundTrip("")).toBe(""); - expect(roundTrip(true)).toBe(true); - expect(roundTrip(false)).toBe(false); - expect(roundTrip(null)).toBe(null); - expect(roundTrip(undefined)).toBe(undefined); - }); - - test("round-trips arrays", () => { - expect([...(roundTrip([]) as any[])]).toEqual([]); - expect([...(roundTrip([1, 2, 3]) as any[])]).toEqual([1, 2, 3]); - expect([...(roundTrip(["a", "b", "c"]) as any[])]).toEqual(["a", "b", "c"]); - const nested = roundTrip([[1, 2], [3, 4]]) as any[]; - expect([...(nested[0] as any[])]).toEqual([1, 2]); - expect([...(nested[1] as any[])]).toEqual([3, 4]); - }); - - test("round-trips objects", () => { - expect(Object.keys(roundTrip({}) as any)).toEqual([]); - const obj = roundTrip({ a: 1, b: 2 }) as any; - expect(obj.a).toBe(1); - expect(obj.b).toBe(2); - const nested = roundTrip({ name: "rex", nested: { ok: true } }) as any; - expect(nested.name).toBe("rex"); - expect(nested.nested.ok).toBe(true); - }); - - test("round-trips complex nested structures", () => { - const value = { - routes: [ - { path: "/api/users", handler: "getUsers", methods: ["GET"] }, - { path: "/api/users", handler: "createUser", methods: ["POST"] }, - ], - metadata: { version: 1, generated: true }, - }; - const result = roundTrip(value) as any; - expect(result.metadata.version).toBe(1); - expect(result.metadata.generated).toBe(true); - expect(result.routes[0].path).toBe("/api/users"); - expect(result.routes[0].handler).toBe("getUsers"); - expect([...(result.routes[0].methods as any[])]).toEqual(["GET"]); - expect(result.routes[1].handler).toBe("createUser"); - }); - - test("round-trips with path chains", () => { - const paths = [ - "/docs/api/v2/users", - "/docs/api/v2/teams", - "/docs/api/v2/billing", - ]; - const result = roundTrip({ paths, config: { retries: 3, timeout: 30 } }) as any; - expect([...(result.paths as any[])]).toEqual(paths); - expect(result.config.retries).toBe(3); - expect(result.config.timeout).toBe(30); - }); - - test("round-trips with duplicated values", () => { - const shared = { type: "page", status: 200 }; - const result = roundTrip([shared, shared, shared]) as any[]; - expect(result[0].type).toBe("page"); - expect(result[0].status).toBe(200); - expect(result[1].type).toBe("page"); - expect(result[2].status).toBe(200); - }); - - test("round-trips large indexed arrays", () => { - const arr = Array.from({ length: 100 }, (_, i) => i); - const result = roundTrip(arr, { indexThreshold: 10 }) as any[]; - expect([...result]).toEqual(arr); - }); - - test("round-trips large indexed objects", () => { - const obj: Record = {}; - for (let i = 0; i < 50; i++) obj[`key${i}`] = i; - const result = roundTrip(obj, { indexThreshold: 10 }) as any; - for (const [k, v] of Object.entries(obj)) { - expect(result[k]).toBe(v); - } - }); - - test("round-trips with schemas", () => { - const data = { - entries: { - "/data/people/alice": { name: "alice", age: 1 }, - "/data/people/bob": { name: "bob", age: 2 }, - "/data/people/charlie": { name: "charlie", age: 3 }, - } - }; - const result = roundTrip(data) as any; - expect(result.entries["/data/people/alice"].name).toBe("alice"); - expect(result.entries["/data/people/bob"].age).toBe(2); - expect(result.entries["/data/people/charlie"].name).toBe("charlie"); - }); - - test("round-trips objects with overlapping but distinct key sets", () => { - const data = [{ a: 1, b: 2 }, { a: 3 }]; - const result = roundTrip(data) as any[]; - expect(result[0].a).toBe(1); - expect(result[0].b).toBe(2); - expect(result[1].a).toBe(3); - }); - - test("round-trips objects where first key is a pointer", () => { - const data = [ - { contentType: "text/html", status: 200 }, - { contentType: "text/css" }, - ]; - const result = roundTrip(data) as any[]; - expect(result[0].contentType).toBe("text/html"); - expect(result[0].status).toBe(200); - expect(result[1].contentType).toBe("text/css"); - }); - - test("round-trips mixed key/value reuse across objects", () => { - const data = [ - { label: "type" }, - { type: "page", active: true }, - ]; - const result = roundTrip(data) as any[]; - expect(result[0].label).toBe("type"); - expect(result[1].type).toBe("page"); - expect(result[1].active).toBe(true); - }); - - test("round-trips string ref", () => { - const refs = { H: "hello" }; - // Verify encoder actually uses the ref shorthand - const encoded = stringify("hello", { refs }); - expect(encoded).toBe("'H"); - // Verify decode with refs resolves correctly - const result = roundTrip("hello", { refs }); - expect(result).toBe("hello"); - }); - - test("round-trips number ref", () => { - const refs = { X: 42 }; - const encoded = stringify(42, { refs }); - expect(encoded).toBe("'X"); - const result = roundTrip(42, { refs }); - expect(result).toBe(42); - }); - - test("round-trips refs inside arrays", () => { - const refs = { H: "hello" }; - // Verify the ref is used — "hello" should be 'H, not hello,5 - const encoded = stringify(["hello", "world"], { refs }); - expect(encoded).toContain("'H"); - expect(encoded).not.toContain("hello,5"); - // Verify round-trip - const result = roundTrip(["hello", "world"], { refs }) as any[]; - expect(result[0]).toBe("hello"); - expect(result[1]).toBe("world"); - }); - - test("round-trips multiple refs", () => { - const refs = { H: "hello", W: "world" }; - const encoded = stringify(["hello", "world", "hello"], { refs }); - expect(encoded).toContain("'H"); - expect(encoded).toContain("'W"); - const result = roundTrip(["hello", "world", "hello"], { refs }) as any[]; - expect(result[0]).toBe("hello"); - expect(result[1]).toBe("world"); - expect(result[2]).toBe("hello"); - }); - - test("round-trips object ref as value", () => { - const sharedObj = { x: 1, y: 2 }; - const refs = { S: sharedObj }; - // Encoding { x: 1, y: 2 } with ref S should produce 'S - const encoded = stringify(sharedObj, { refs }); - expect(encoded).toBe("'S"); - // Round-trip resolves back to the ref value - const result = roundTrip(sharedObj, { refs }) as any; - expect(result.x).toBe(1); - expect(result.y).toBe(2); - }); - - test("round-trips schema ref for repeated object shapes", () => { - const data = [ - { a: 1, b: 2 }, - { a: 3, b: 4 }, - ]; - const refs = { S: ["a", "b"] }; - // Verify encoder uses schema refs — both objects should reference 'S - const encoded = stringify(data, { refs }); - expect(encoded).toBe("+8+6'S:6+4+2'S:6;g"); - // Both objects use 'S as schema, no inline keys - expect(encoded.match(/'S/g)?.length).toBe(2); - // Verify round-trip - const result = roundTrip(data, { refs }) as any[]; - expect(result[0].a).toBe(1); - expect(result[0].b).toBe(2); - expect(result[1].a).toBe(3); - expect(result[1].b).toBe(4); - // Verify Object.keys works on schema-ref objects - expect(Object.keys(result[0]).sort()).toEqual(["a", "b"]); - expect(Object.keys(result[1]).sort()).toEqual(["a", "b"]); - }); - - test("round-trips object ref as schema target", () => { - const data = { a: 1, b: 2 }; - const refs = { O: { a: 3, b: 4 } }; - // Encoder should use 'O as schema - const encoded = stringify(data, { refs }); - expect(encoded).toContain("'O"); - const result = roundTrip(data, { refs }) as any; - expect(result.a).toBe(1); - expect(result.b).toBe(2); - expect(Object.keys(result).sort()).toEqual(["a", "b"]); - }); - - test("round-trips refs mixed with non-ref values", () => { - const refs = { T: true, N: null }; - const data = [true, false, null, undefined, 42]; - const encoded = stringify(data, { refs }); - // true and null should use refs, others should not - expect(encoded).toContain("'T"); - expect(encoded).toContain("'N"); - const result = roundTrip(data, { refs }) as any[]; - expect(result[0]).toBe(true); - expect(result[1]).toBe(false); - expect(result[2]).toBe(null); - expect(result[3]).toBe(undefined); - expect(result[4]).toBe(42); - }); - - test("ref value that also appears as a key (known limitation)", () => { - // The encoder applies refs to keys too, encoding key "shared" as 'K. - // The decoder can't currently resolve this because refs in key position - // make the object appear to have a schema (the ref). This is a known - // encoder bug — refs shouldn't match object keys. - const refs = { K: "shared" }; - const encoded = stringify({ shared: "shared" }, { refs }); - // Both key and value become 'K — verifying the encoder behavior - expect(encoded).toBe("'K'K:4"); - }); - - test("opaque non-serializable ref values round-trip", () => { - // Functions and symbols can be refs — matched by identity on encode, - // returned as-is on decode. - const fn = () => "hello"; - const sym = Symbol("test"); - const refs = { F: fn, S: sym }; - // Encoder matches by identity via makeKey - const encoded = stringify([fn, sym, 42], { refs }); - expect(encoded).toContain("'F"); - expect(encoded).toContain("'S"); - // Decoder returns opaque values - const result = roundTrip([fn, sym, 42], { refs }) as any[]; - expect(result[0]).toBe(fn); - expect(result[1]).toBe(sym); - expect(result[2]).toBe(42); - }); - - // ── Number edge cases ── - - test("negative zero", () => { - // -0 is tricky: Object.is(-0, 0) is false, but -0 === 0 is true - const result = roundTrip(-0); - expect(result).toBe(0); // zigzag can't distinguish -0 from 0 - }); - - test("large integers (within zigzag safe range)", () => { - // Zigzag doubles the magnitude, so max safe integer for zigzag is MAX_SAFE_INTEGER / 2 - const maxZigzag = Math.floor(Number.MAX_SAFE_INTEGER / 2); - expect(roundTrip(maxZigzag)).toBe(maxZigzag); - expect(roundTrip(-maxZigzag)).toBe(-maxZigzag); - expect(roundTrip(0x7FFFFFFFFF)).toBe(0x7FFFFFFFFF); - expect(roundTrip(-0x7FFFFFFFFF)).toBe(-0x7FFFFFFFFF); - }); - - test("powers of 10 (trailing zeroes use exponent form)", () => { - for (const n of [10, 100, 1000, 10000, 1e10, 1e15, 1e20]) { - expect(roundTrip(n)).toBe(n); - expect(roundTrip(-n)).toBe(-n); - } - }); - - test("very small floats", () => { - expect(roundTrip(1e-10)).toBe(1e-10); - expect(roundTrip(5e-324)).toBe(5e-324); // Number.MIN_VALUE - }); - - test("floats with moderate precision", () => { - // Encoder uses toPrecision(14), so ~14 significant digits survive - expect(roundTrip(0.1 + 0.2)).toBeCloseTo(0.1 + 0.2, 14); - expect(roundTrip(1.23456789012345)).toBeCloseTo(1.23456789012345, 13); - expect(roundTrip(9.876543210987e12)).toBe(9.876543210987e12); - }); - - // ── String edge cases ── - - test("empty string in various positions", () => { - const result = roundTrip({ "": 1, a: "" }) as any; - expect(result[""]).toBe(1); - expect(result.a).toBe(""); - }); - - test("strings containing rexc tag characters", () => { - // These characters are tags in rexc: + , : ; ^ . ' # * - const tags = ["+", ",", ":", ";", "^", ".", "'", "#", "*"]; - for (const ch of tags) { - expect(roundTrip(ch)).toBe(ch); - } - expect([...(roundTrip(tags) as any[])]).toEqual(tags); - }); - - test("strings that look like b64 digits", () => { - // b64 charset: 0-9 a-z A-Z - _ - const tricky = ["0", "a", "Z", "-", "_", "abc123", "---___"]; - for (const s of tricky) { - expect(roundTrip(s)).toBe(s); - } - }); - - test("keys that are b64 or tag characters", () => { - const obj: Record = {}; - const keys = ["+", ",", ":", ";", "^", ".", "'", "#", "*", "0", "a", "_"]; - keys.forEach((k, i) => obj[k] = i); - const result = roundTrip(obj) as any; - keys.forEach((k, i) => expect(result[k]).toBe(i)); - }); - - test("strings with null bytes and control characters", () => { - expect(roundTrip("\0")).toBe("\0"); - expect(roundTrip("\x01\x02\x03")).toBe("\x01\x02\x03"); - expect(roundTrip("hello\0world")).toBe("hello\0world"); - expect(roundTrip("\n\r\t")).toBe("\n\r\t"); - }); - - test("unicode edge cases", () => { - expect(roundTrip("🏴‍☠️")).toBe("🏴‍☠️"); // ZWJ sequence - expect(roundTrip("👨‍👩‍👧‍👦")).toBe("👨‍👩‍👧‍👦"); // family emoji (long ZWJ) - expect(roundTrip("é")).toBe("é"); // precomposed - expect(roundTrip("é")).toBe("é"); // decomposed (e + combining accent) - expect(roundTrip("\u{10FFFF}")).toBe("\u{10FFFF}"); // max codepoint - expect(roundTrip("日本語テスト")).toBe("日本語テスト"); // CJK - }); - - test("long string (multi-digit b64 length)", () => { - const long = "x".repeat(10000); - expect(roundTrip(long)).toBe(long); - }); - - test("keys that are prefixes of each other", () => { - const obj = { a: 1, ab: 2, abc: 3, abcd: 4 }; - const result = roundTrip(obj) as any; - expect(result.a).toBe(1); - expect(result.ab).toBe(2); - expect(result.abc).toBe(3); - expect(result.abcd).toBe(4); - }); - - // ── Container edge cases ── - - test("nested empty containers", () => { - const result1 = roundTrip([[]]) as any[]; - expect([...(result1[0] as any[])]).toEqual([]); - - const result2 = roundTrip([{}]) as any[]; - expect(Object.keys(result2[0])).toEqual([]); - - const result3 = roundTrip({ a: [] }) as any; - expect([...(result3.a as any[])]).toEqual([]); - - const result4 = roundTrip({ a: {} }) as any; - expect(Object.keys(result4.a)).toEqual([]); - }); - - test("deeply nested structure", () => { - let value: any = 42; - for (let i = 0; i < 50; i++) value = { v: value }; - let result = roundTrip(value) as any; - for (let i = 0; i < 50; i++) result = result.v; - expect(result).toBe(42); - }); - - test("mixed types in single array", () => { - const mixed = [0, -1, 3.14, "", "hello", true, false, null, undefined, [], {}]; - const result = roundTrip(mixed) as any[]; - expect(result[0]).toBe(0); - expect(result[1]).toBe(-1); - expect(result[2]).toBe(3.14); - expect(result[3]).toBe(""); - expect(result[4]).toBe("hello"); - expect(result[5]).toBe(true); - expect(result[6]).toBe(false); - expect(result[7]).toBe(null); - expect(result[8]).toBe(undefined); - expect([...(result[9] as any[])]).toEqual([]); - expect(Object.keys(result[10])).toEqual([]); - }); - - test("single-element containers", () => { - expect([...(roundTrip([42]) as any[])]).toEqual([42]); - const obj = roundTrip({ only: "one" }) as any; - expect(obj.only).toBe("one"); - }); - - test("container at exact index threshold", () => { - // Exactly at threshold: should get indexed - const atThreshold = Array.from({ length: 32 }, (_, i) => i); - const result1 = roundTrip(atThreshold, { indexThreshold: 32 }) as any[]; - expect([...result1]).toEqual(atThreshold); - - // One below threshold: should NOT get indexed - const belowThreshold = Array.from({ length: 31 }, (_, i) => i); - const result2 = roundTrip(belowThreshold, { indexThreshold: 32 }) as any[]; - expect([...result2]).toEqual(belowThreshold); - }); - - // ── Pointer / dedup edge cases ── - - test("same object at different nesting depths", () => { - const shared = { x: 1 }; - const data = { a: shared, b: { c: shared }, d: [shared] }; - const result = roundTrip(data) as any; - expect(result.a.x).toBe(1); - expect(result.b.c.x).toBe(1); - expect(result.d[0].x).toBe(1); - }); - - test("many identical small values (pointer cost vs inline cost)", () => { - // Small values like single chars may be cheaper inline than as pointers - const data = Array.from({ length: 100 }, () => "a"); - const result = roundTrip(data) as any[]; - expect([...result]).toEqual(data); - }); - - test("identical arrays are deduplicated", () => { - const shared = [1, 2, 3]; - const data = [shared, shared]; - const result = roundTrip(data) as any[]; - expect([...(result[0] as any[])]).toEqual([1, 2, 3]); - expect([...(result[1] as any[])]).toEqual([1, 2, 3]); - }); - - test("string appears as both key and value", () => { - const data = { hello: "hello", world: "world" }; - const result = roundTrip(data) as any; - expect(result.hello).toBe("hello"); - expect(result.world).toBe("world"); - }); - - test("object where all values are identical", () => { - const obj: Record = {}; - for (let i = 0; i < 20; i++) obj[`k${i}`] = 999; - const result = roundTrip(obj) as any; - for (let i = 0; i < 20; i++) expect(result[`k${i}`]).toBe(999); - }); - - // ── Chain / path edge cases ── - - test("path that is exactly the split character", () => { - expect(roundTrip("/")).toBe("/"); - expect(roundTrip("//")).toBe("//"); - }); - - test("paths with trailing slashes", () => { - const paths = ["/foo/bar/", "/foo/baz/"]; - const result = roundTrip(paths) as any[]; - expect(result[0]).toBe("/foo/bar/"); - expect(result[1]).toBe("/foo/baz/"); - }); - - test("paths with consecutive slashes", () => { - const paths = ["/foo//bar", "/foo//baz"]; - const result = roundTrip(paths) as any[]; - expect(result[0]).toBe("/foo//bar"); - expect(result[1]).toBe("/foo//baz"); - }); - - test("paths with no shared prefix despite containing slashes", () => { - const paths = ["/alpha/one", "/beta/two"]; - const result = roundTrip(paths) as any[]; - expect(result[0]).toBe("/alpha/one"); - expect(result[1]).toBe("/beta/two"); - }); - - test("chain splitting disabled preserves paths", () => { - const paths = ["/foo/bar/baz", "/foo/bar/qux"]; - const result = roundTrip(paths, { stringChainDelimiter: "" }) as any[]; - expect(result[0]).toBe("/foo/bar/baz"); - expect(result[1]).toBe("/foo/bar/qux"); - }); - - test("many paths sharing a deep prefix", () => { - const base = "/a/b/c/d/e"; - const paths = Array.from({ length: 10 }, (_, i) => `${base}/item${i}`); - const result = roundTrip(paths) as any[]; - expect([...result]).toEqual(paths); - }); - - // ── Schema edge cases ── - - test("three different object shapes interleaved", () => { - const data = [ - { a: 1 }, - { b: 2 }, - { a: 3 }, - { b: 4 }, - { c: 5 }, - ]; - const result = roundTrip(data) as any[]; - expect(result[0].a).toBe(1); - expect(result[1].b).toBe(2); - expect(result[2].a).toBe(3); - expect(result[3].b).toBe(4); - expect(result[4].c).toBe(5); - }); - - test("objects with same keys but different value types", () => { - const data = [ - { x: 1, y: "hello" }, - { x: "world", y: 2 }, - ]; - const result = roundTrip(data) as any[]; - expect(result[0].x).toBe(1); - expect(result[0].y).toBe("hello"); - expect(result[1].x).toBe("world"); - expect(result[1].y).toBe(2); - }); - - test("schema object with nested containers as values", () => { - const data = [ - { list: [1, 2], meta: { ok: true } }, - { list: [3, 4], meta: { ok: false } }, - ]; - const result = roundTrip(data) as any[]; - expect([...(result[0].list as any[])]).toEqual([1, 2]); - expect(result[0].meta.ok).toBe(true); - expect([...(result[1].list as any[])]).toEqual([3, 4]); - expect(result[1].meta.ok).toBe(false); - }); - - test("wide object (many keys)", () => { - const obj: Record = {}; - for (let i = 0; i < 200; i++) obj[`field_${String(i).padStart(3, "0")}`] = i; - const result = roundTrip(obj) as any; - for (let i = 0; i < 200; i++) { - expect(result[`field_${String(i).padStart(3, "0")}`]).toBe(i); - } - }); - - // ── Combination stress tests ── - - test("indexed objects with chain keys", () => { - const obj: Record = {}; - for (let i = 0; i < 50; i++) obj[`/api/v2/resource/${i}`] = i; - const result = roundTrip(obj, { indexThreshold: 10 }) as any; - for (let i = 0; i < 50; i++) { - expect(result[`/api/v2/resource/${i}`]).toBe(i); - } - }); - - test("schemas + indexes + chains combined", () => { - const data = Array.from({ length: 40 }, (_, i) => ({ - path: `/section/${i % 5}/item/${i}`, - value: i, - active: i % 2 === 0, - })); - const result = roundTrip(data, { indexThreshold: 10 }) as any[]; - for (let i = 0; i < 40; i++) { - expect(result[i].path).toBe(`/section/${i % 5}/item/${i}`); - expect(result[i].value).toBe(i); - expect(result[i].active).toBe(i % 2 === 0); - } - }); - - test("number string keys ('0', '1', '2') in objects", () => { - const obj = { "0": "zero", "1": "one", "10": "ten" }; - const result = roundTrip(obj) as any; - expect(result["0"]).toBe("zero"); - expect(result["1"]).toBe("one"); - expect(result["10"]).toBe("ten"); - }); -}); - -// ── inspect() tests ── - -function inspected(value: unknown, opts?: Parameters[1]) { - return inspect(encode(value, opts), opts?.refs); -} - -function childArray(node: ASTNode): ASTNode[] { - return [...node]; -} - -describe("inspect() node fields", () => { - test("integer", () => { - const node = inspected(42); - expect(node.tag).toBe("+"); - expect(node.b64).toBe(42); - expect(node.size).toBe(0); - expect(node.left).toBe(node.right - 1 - 2); // tag + b64 digits - expect(node.value).toBe(42); - expect(childArray(node)).toHaveLength(0); - }); - - test("negative integer", () => { - const node = inspected(-7); - expect(node.tag).toBe("+"); - expect(node.b64).toBe(-7); - expect(node.size).toBe(0); - expect(node.value).toBe(-7); - }); - - test("zero", () => { - const node = inspected(0); - expect(node.tag).toBe("+"); - expect(node.b64).toBe(0); - expect(node.value).toBe(0); - }); - - test("float (decimal)", () => { - const node = inspected(3.14); - expect(node.tag).toBe("*"); - expect(typeof node.b64).toBe("number"); // exponent - expect(node.size).toBeGreaterThan(0); // has integer child - expect(node.value).toBeCloseTo(3.14); - const children = childArray(node); - expect(children).toHaveLength(1); - expect(children[0].tag).toBe("+"); // integer base - }); - - test("string", () => { - const node = inspected("hello"); - expect(node.tag).toBe(","); - expect(node.b64).toBe(5); // byte length of "hello" - expect(node.size).toBe(5); - expect(node.value).toBe("hello"); - }); - - test("ref builtins", () => { - const n = inspected(null); - expect(n.tag).toBe("'"); - expect(n.b64).toBe("n"); - expect(n.size).toBe(0); - expect(n.value).toBe(null); - - const t = inspected(true); - expect(t.tag).toBe("'"); - expect(t.b64).toBe("t"); - expect(t.value).toBe(true); - - const f = inspected(false); - expect(f.tag).toBe("'"); - expect(f.b64).toBe("f"); - expect(f.value).toBe(false); - - const u = inspected(undefined); - expect(u.tag).toBe("'"); - expect(u.b64).toBe("u"); - expect(u.value).toBe(undefined); - }); - - test("special floats", () => { - const inf = inspected(Infinity); - expect(inf.tag).toBe("'"); - expect(inf.b64).toBe("inf"); - expect(inf.value).toBe(Infinity); - - const ninf = inspected(-Infinity); - expect(ninf.tag).toBe("'"); - expect(ninf.b64).toBe("nif"); - expect(ninf.value).toBe(-Infinity); - - const nan = inspected(NaN); - expect(nan.tag).toBe("'"); - expect(nan.b64).toBe("nan"); - expect(nan.value).toBeNaN(); - }); - - test("empty string", () => { - const node = inspected(""); - expect(node.tag).toBe(","); - expect(node.b64).toBe(0); - expect(node.size).toBe(0); - expect(node.value).toBe(""); - }); -}); - -describe("inspect() containers", () => { - test("empty array", () => { - const node = inspected([]); - expect(node.tag).toBe(";"); - expect(node.b64).toBe(0); - expect(node.size).toBe(0); - expect(childArray(node)).toHaveLength(0); - expect((node.value as any).length).toBe(0); - }); - - test("simple array", () => { - const node = inspected([1, 2, 3]); - expect(node.tag).toBe(";"); - expect(node.size).toBeGreaterThan(0); - const children = childArray(node); - expect(children).toHaveLength(3); - expect(children[0].tag).toBe("+"); - expect(children[0].b64).toBe(1); - expect(children[1].b64).toBe(2); - expect(children[2].b64).toBe(3); - }); - - test("empty object", () => { - const node = inspected({}); - expect(node.tag).toBe(":"); - expect(node.b64).toBe(0); - expect(node.size).toBe(0); - expect(childArray(node)).toHaveLength(0); - }); - - test("simple object — interleaved key/value children", () => { - const node = inspected({ a: 1 }); - expect(node.tag).toBe(":"); - const children = childArray(node); - // Should have key and value as children - expect(children.length).toBe(2); - // First child (rightmost in buffer = key "a") - expect(children[0].tag).toBe(","); // string key - expect(children[0].value).toBe("a"); - // Second child = value 1 - expect(children[1].tag).toBe("+"); - expect(children[1].b64).toBe(1); - }); - - test("chain", () => { - const node = inspected("/foo/bar/baz", { stringChainDelimiter: "/", stringChainThreshold: 0 }); - // Depending on dedup, might be a plain string or a chain - if (node.tag === ".") { - expect(node.size).toBeGreaterThan(0); - const children = childArray(node); - expect(children.length).toBeGreaterThan(0); - } - }); -}); - -describe("inspect() indexed containers", () => { - test("large array has # index child", () => { - const arr = Array.from({ length: 50 }, (_, i) => i); - const node = inspected(arr, { indexThreshold: 32 }); - expect(node.tag).toBe(";"); - const children = childArray(node); - // First child should be the # index node - const indexNode = children.find(c => c.tag === "#"); - expect(indexNode).toBeDefined(); - expect(typeof indexNode!.b64).toBe("object"); - const { count, width } = indexNode!.b64 as { count: number; width: number }; - expect(count).toBe(50); - expect(width).toBeGreaterThanOrEqual(1); - // Rest are element nodes - const elements = children.filter(c => c.tag !== "#"); - expect(elements).toHaveLength(50); - }); - - test("large object has # index child", () => { - const obj: Record = {}; - for (let i = 0; i < 50; i++) obj[`key${String(i).padStart(3, "0")}`] = i; - const node = inspected(obj, { indexThreshold: 32 }); - expect(node.tag).toBe(":"); - const children = childArray(node); - const indexNode = children.find(c => c.tag === "#"); - expect(indexNode).toBeDefined(); - }); -}); - -describe("inspect() pointers", () => { - test("pointer node", () => { - // Encode something that creates pointers (repeated values) - const data = encode(["hello", "hello"]); - const root = inspect(data); - expect(root.tag).toBe(";"); - const children = childArray(root); - // One should be a string, the other a pointer - const ptr = children.find(c => c.tag === "^"); - const str = children.find(c => c.tag === ","); - expect(ptr).toBeDefined(); - expect(str).toBeDefined(); - expect(ptr!.size).toBe(0); - expect(typeof ptr!.b64).toBe("number"); // delta - expect(ptr!.value).toBe("hello"); - }); -}); - -describe("inspect() value resolution", () => { - test("value matches open() for primitives", () => { - expect(inspected(42).value).toBe(42); - expect(inspected("hi").value).toBe("hi"); - expect(inspected(true).value).toBe(true); - expect(inspected(null).value).toBe(null); - expect(inspected(undefined).value).toBe(undefined); - }); - - test("value returns open() proxy for containers", () => { - const node = inspected({ x: 1, y: 2 }); - const val = node.value as any; - expect(val.x).toBe(1); - expect(val.y).toBe(2); - }); - - test("value on array proxy", () => { - const node = inspected([10, 20, 30]); - const val = node.value as any; - expect(val[0]).toBe(10); - expect(val[1]).toBe(20); - expect(val[2]).toBe(30); - expect(val.length).toBe(3); - }); - - test("value with refs", () => { - const myRef = { a: 1, b: 2 }; - const data = encode(myRef, { refs: { MYREF: myRef } }); - // The ref itself resolves to the original object - const root = inspect(data, { MYREF: myRef }); - expect(root.value).toBe(myRef); - }); -}); - -describe("inspect() semantic utilities", () => { - test("entries() on simple object", () => { - const node = inspected({ x: 1, y: 2 }); - const entries = [...node.entries()]; - expect(entries).toHaveLength(2); - // Keys should be string nodes - expect(entries[0][0].value).toBe("x"); - expect(entries[0][1].value).toBe(1); - expect(entries[1][0].value).toBe("y"); - expect(entries[1][1].value).toBe(2); - }); - - test("keys() on object", () => { - const node = inspected({ a: 1, b: 2, c: 3 }); - const keys = [...node.keys()].map(k => k.value); - expect(keys).toEqual(["a", "b", "c"]); - }); - - test("values() on object", () => { - const node = inspected({ a: 10, b: 20 }); - const vals = [...node.values()].map(v => v.value); - expect(vals).toEqual([10, 20]); - }); - - test("values() on array", () => { - const node = inspected([10, 20, 30]); - const vals = [...node.values()].map(v => v.value); - expect(vals).toEqual([10, 20, 30]); - }); - - test("index() on array", () => { - const node = inspected([10, 20, 30]); - expect(node.index(0)?.value).toBe(10); - expect(node.index(1)?.value).toBe(20); - expect(node.index(2)?.value).toBe(30); - expect(node.index(3)).toBeUndefined(); - expect(node.index(-1)).toBeUndefined(); - }); - - test("index() on object", () => { - const node = inspected({ foo: 1, bar: 2 }); - expect(node.index("foo")?.value).toBe(1); - expect(node.index("bar")?.value).toBe(2); - expect(node.index("baz")).toBeUndefined(); - }); - - test("index() on large indexed array", () => { - const arr = Array.from({ length: 50 }, (_, i) => i * 10); - const node = inspected(arr, { indexThreshold: 32 }); - expect(node.index(0)?.value).toBe(0); - expect(node.index(25)?.value).toBe(250); - expect(node.index(49)?.value).toBe(490); - expect(node.index(50)).toBeUndefined(); - }); - - test("index() on large indexed object", () => { - const obj: Record = {}; - for (let i = 0; i < 50; i++) obj[`k${String(i).padStart(3, "0")}`] = i; - const node = inspected(obj, { indexThreshold: 32 }); - expect(node.index("k000")?.value).toBe(0); - expect(node.index("k025")?.value).toBe(25); - expect(node.index("k049")?.value).toBe(49); - expect(node.index("missing")).toBeUndefined(); - }); - - test("filteredKeys() on indexed object", () => { - const obj: Record = {}; - for (let i = 0; i < 50; i++) obj[`k${String(i).padStart(3, "0")}`] = i; - const node = inspected(obj, { indexThreshold: 32 }); - const matches = [...node.filteredKeys("k00")]; - // k000..k009 = 10 matches - expect(matches).toHaveLength(10); - expect(matches[0][0].value).toBe("k000"); - expect(matches[0][1].value).toBe(0); - }); - - test("filteredKeys() on non-indexed object", () => { - const node = inspected({ apple: 1, apricot: 2, banana: 3 }); - const matches = [...node.filteredKeys("ap")]; - expect(matches).toHaveLength(2); - const keys = matches.map(([k]) => k.value); - expect(keys).toContain("apple"); - expect(keys).toContain("apricot"); - }); -}); - -describe("inspect() lazy iteration", () => { - test("partial children iteration", () => { - const arr = Array.from({ length: 100 }, (_, i) => i); - const node = inspected(arr, { indexThreshold: 32 }); - let count = 0; - for (const _child of node) { - count++; - if (count >= 3) break; - } - expect(count).toBe(3); - }); - - test("data property is not in ownKeys", () => { - const node = inspected(42); - expect(Object.keys(node)).not.toContain("data"); - // But it's still accessible - expect(node.data).toBeInstanceOf(Uint8Array); - }); -}); - -describe("inspect() schema objects", () => { - test("entries() on schema object", () => { - // Encode multiple objects with the same shape to trigger schema dedup - const data = encode([ - { name: "alice", age: 30 }, - { name: "bob", age: 25 }, - ]); - const root = inspect(data); - const children = [...root.values()]; - // Both should be object nodes - expect(children[0].tag).toBe(":"); - expect(children[1].tag).toBe(":"); - // The second object should use a schema (pointer to first) - // entries() should still work on both - const e0 = [...children[0].entries()]; - const e1 = [...children[1].entries()]; - expect(e0).toHaveLength(2); - expect(e1).toHaveLength(2); - expect(e0[0][0].value).toBe("name"); - expect(e0[0][1].value).toBe("alice"); - expect(e1[0][0].value).toBe("name"); - expect(e1[0][1].value).toBe("bob"); - }); -}); - -describe("inspect() array-like behavior", () => { - test("numeric index access", () => { - const node = inspected([10, 20, 30]); - expect(node[0].tag).toBe("+"); - expect(node[0].b64).toBe(10); - expect(node[1].b64).toBe(20); - expect(node[2].b64).toBe(30); - expect(node[3]).toBeUndefined(); - }); - - test(".length returns child count", () => { - const node = inspected([10, 20, 30]); - expect(node.length).toBe(3); - }); - - test(".length on leaf node is 0", () => { - const node = inspected(42); - expect(node.length).toBe(0); - }); - - test("for...of iteration", () => { - const node = inspected([1, 2, 3]); - const values: number[] = []; - for (const child of node) { - values.push(child.b64 as number); - } - expect(values).toEqual([1, 2, 3]); - }); - - test("spread into array", () => { - const node = inspected([1, 2, 3]); - const arr = [...node]; - expect(arr).toHaveLength(3); - expect(arr[0].b64).toBe(1); - }); - - test("incremental parsing — accessing [5] parses 0..5, not all", () => { - // Use 10 elements (below INDEX_THRESHOLD) to avoid a # index child - const arr = Array.from({ length: 10 }, (_, i) => i); - const node = inspected(arr); - // Access index 5 — should parse children 0-5 - const child5 = node[5]; - expect(child5!.b64).toBe(5); - // Now access index 2 — should be cached, no re-parsing - const child2 = node[2]; - expect(child2!.b64).toBe(2); - // Access beyond — parses more - const child8 = node[8]; - expect(child8!.b64).toBe(8); - }); -}); - -describe("inspect() JSON.stringify", () => { - test("leaf node serializes with tag and b64", () => { - const node = inspected(42); - const json = JSON.parse(JSON.stringify(node)); - expect(json.tag).toBe("+"); - expect(json.b64).toBe(42); - expect(json.left).toBeDefined(); - expect(json.right).toBeDefined(); - }); - - test("container serializes with children array", () => { - const node = inspected([1, 2, 3]); - const json = JSON.parse(JSON.stringify(node)); - expect(json.tag).toBe(";"); - expect(json.children).toHaveLength(3); - expect(json.children[0].tag).toBe("+"); - expect(json.children[0].b64).toBe(1); - expect(json.children[1].b64).toBe(2); - expect(json.children[2].b64).toBe(3); - }); - - test("nested structure serializes recursively", () => { - const node = inspected({ items: [1, 2] }); - const json = JSON.parse(JSON.stringify(node)); - expect(json.tag).toBe(":"); - expect(json.children.length).toBeGreaterThan(0); - // Should have nested children - const arrChild = json.children.find((c: any) => c.tag === ";"); - expect(arrChild).toBeDefined(); - expect(arrChild.children).toHaveLength(2); - }); - - test("pointer serializes as leaf", () => { - const data = encode(["hello", "hello"]); - const root = inspect(data); - const json = JSON.parse(JSON.stringify(root)); - const ptr = json.children.find((c: any) => c.tag === "^"); - expect(ptr).toBeDefined(); - expect(typeof ptr.b64).toBe("number"); - // Pointer has no children - expect(ptr.children).toBeUndefined(); - }); -}); - -// ── Regression: cursor corruption after strEquals/strHasPrefix/resolveKeyStr ── - -describe("cursor corruption regressions", () => { - // To trigger these bugs, keys must be encoded as pointers (^) — which - // happens when the same string was already written as a value earlier. - // We include key strings as values in an "_index" entry so the encoder - // deduplicates the keys as pointers. 20+ keys ensures indexing is used. - - /** Build an indexed object whose keys are pointer-deduplicated. - * The "_index" array contains the key strings as values so they're - * written first; when the encoder later writes them as keys, it emits - * pointer (^) nodes instead of inline strings. */ - function buildPointerKeyObject(keys: string[], valueFn: (k: string, i: number) => unknown) { - const obj: Record = {}; - for (let i = 0; i < keys.length; i++) obj[keys[i]!] = valueFn(keys[i]!, i); - obj["_index"] = keys; // forces key strings to be written as values first - return obj; - } - - test("findKey on indexed object with pointer keys returns correct value", () => { - const keys = Array.from({ length: 20 }, (_, i) => `/blog/post-${i}`); - keys.push("/blog/[slug]"); - const obj = buildPointerKeyObject(keys, (k, i) => - k === "/blog/[slug]" ? { id: 99, content: "hello" } : { id: i }); - - const data = encode(obj); - const c = makeCursor(data); - read(c); - expect(c.ixWidth).toBeGreaterThan(0); // confirm indexed - - const v = makeCursor(data); - expect(findKey(v, c, "/blog/[slug]")).toBe(true); - expect(v.tag).toBe("object"); // value is an object, not a stray string - const inner = makeCursor(data); - expect(findKey(inner, v, "id")).toBe(true); - expect(inner.val).toBe(99); - }); - - test("Proxy open() on indexed object with pointer keys", () => { - const keys = Array.from({ length: 20 }, (_, i) => `/blog/post-${i}`); - keys.push("/blog/[slug]"); - const obj = buildPointerKeyObject(keys, (k, i) => - k === "/blog/[slug]" ? { id: 42, content: "hello" } : { id: i }); - - const data = encode(obj); - const root = open(data) as Record; - - expect(root["/blog/[slug]"]).toBeDefined(); - expect(root["/blog/[slug]"].id).toBe(42); - expect(root["/blog/[slug]"].content).toBe("hello"); - expect(root["/blog/post-3"].id).toBe(3); - }); - - test("findByPrefix on indexed object with pointer keys returns correct values", () => { - const keys = Array.from({ length: 20 }, (_, i) => `/api/route-${i}`); - const obj = buildPointerKeyObject(keys, (_, i) => i); - - const data = encode(obj); - const c = makeCursor(data); - read(c); - expect(c.ixWidth).toBeGreaterThan(0); - - const results: [string, number][] = []; - const v = makeCursor(data); - findByPrefix(v, c, "/api/route-", (key, value) => { - results.push([resolveStr(key), value.val]); - }); - expect(results).toHaveLength(20); - for (let i = 0; i < 20; i++) { - expect(results.find(([k]) => k === `/api/route-${i}`)?.[1]).toBe(i); - } - }); - - test("filteredKeys on indexed inspect node with pointer keys", () => { - const keys = Array.from({ length: 20 }, (_, i) => `/page/item-${i}`); - const obj = buildPointerKeyObject(keys, (_, i) => i * 10); - - const data = encode(obj); - const node = inspect(data); - - const matches = [...node.filteredKeys("/page/item-")]; - expect(matches).toHaveLength(20); - for (const [keyNode, valNode] of matches) { - const key = keyNode.value as string; - expect(key).toMatch(/^\/page\/item-\d+$/); - const idx = parseInt(key.replace("/page/item-", "")); - expect(valNode.value).toBe(idx * 10); - } - }); - - test("ensureKeyMap with array schema and pointer keys resolves correctly", () => { - // Multiple objects sharing a schema with path-like keys. - // The encoder deduplicates repeated key sets as schemas. - const items = []; - for (let i = 0; i < 4; i++) { - items.push({ "/data/alpha": i, "/data/beta": i * 10, "/data/gamma": i * 100 }); - } - - const data = encode(items); - const root = open(data) as any[]; - - for (let i = 0; i < 4; i++) { - expect(root[i]["/data/alpha"]).toBe(i); - expect(root[i]["/data/beta"]).toBe(i * 10); - expect(root[i]["/data/gamma"]).toBe(i * 100); - } +describe("encoder options", () => { + test("minIndexDepth=1 forces root index, blocks nested", () => { + const v = { a: 1, b: { c: 2, d: 3, e: 4, f: 5, g: 6, h: 7 } }; + // With both depths set to 1: only root has an index, nothing nested + const out = stringify(v, { minIndexDepth: 1, maxIndexDepth: 1 }); + const decoded = parse(out); + expect(decoded).toEqual(v); + // Root should have a `#` index tag near the end (rough sanity check) + expect(out.includes("#")).toBe(true); }); }); diff --git a/rx.ts b/rx.ts index 6661753..7e8e77c 100644 --- a/rx.ts +++ b/rx.ts @@ -19,51 +19,75 @@ // const text = stringify(myData); // string // // For decoding / random-access reading, see rx-read.ts. -// For the binary variant (smaller output), see rxb.ts / rxb-read.ts. // For the format specification, see docs/rx-format.md. // /////////////////////////////////////////////////////////////////// -// TUNE AS NEEDED CONSTANTS -export let INDEX_THRESHOLD = 16; // Objects and Arrays with more values than this are indexed -export let STRING_CHAIN_THRESHOLD = 24; // Strings longer than this are eligible for splitting into chains -export let STRING_CHAIN_DELIMITER = "/."; // Delimiter chars for splitting long strings into chains -export let DEDUP_COMPLEXITY_LIMIT = 32; // Max recursive node count for structural dedup via JSON.stringify - -// Tag byte constants (ASCII codes of the tag characters) -export const TAG_COMMA = 44; // ',' -export const TAG_DOT = 46; // '.' -export const TAG_COLON = 58; // ':' -export const TAG_SEMI = 59; // ';' -export const TAG_HASH = 35; // '#' -export const TAG_CARET = 94; // '^' -export const TAG_PLUS = 43; // '+' -export const TAG_STAR = 42; // '*' - -export function tune(options: Partial<{ +// ── Tunable defaults ── +// Same option set is accepted by `tune()` (sets the module-wide defaults) and +// by `encode()` / `stringify()` (per-call overrides). Per-call options take +// precedence over tuned defaults; if neither is set, the values below are used. +export interface TuneOptions { + /** Container body byte threshold for emitting an index. R-to-L skipping is + * byte-proportional, so the cost we care about is total body bytes. */ indexThreshold?: number; + /** Containers shallower than this depth always carry an index (overrides + * the byte heuristic). Root is depth 0. */ + minIndexDepth?: number; + /** Containers at this depth or deeper never carry an index. */ + maxIndexDepth?: number; + /** Strings longer than this are eligible for splitting into chains. */ stringChainThreshold?: number; + /** Delimiter characters used for splitting long strings into chains. Empty + * string disables chain splitting. */ stringChainDelimiter?: string; + /** Max recursive node count for structural dedup via JSON.stringify. 0 = disable. */ dedupComplexityLimit?: number; -}>): void { - if (options.indexThreshold !== undefined) INDEX_THRESHOLD = options.indexThreshold; - if (options.stringChainThreshold !== undefined) STRING_CHAIN_THRESHOLD = options.stringChainThreshold; - if (options.stringChainDelimiter !== undefined) STRING_CHAIN_DELIMITER = options.stringChainDelimiter; - if (options.dedupComplexityLimit !== undefined) DEDUP_COMPLEXITY_LIMIT = options.dedupComplexityLimit; } +// Module-wide active values. Mutated by `tune()`, read in `encode()` as the +// fallback when a per-call option is undefined. +const tuned: Required = { + indexThreshold: 64, + minIndexDepth: 0, + maxIndexDepth: Infinity, + stringChainThreshold: 24, + stringChainDelimiter: "/.", + dedupComplexityLimit: 32, +}; + +/** Override the module-wide encoding defaults. Per-call `EncodeOptions` always + * win over tuned values; tuned values win over the built-in defaults. + * Returns the new active defaults. */ +export function tune(options: TuneOptions): Required { + for (const k of Object.keys(options) as (keyof TuneOptions)[]) { + const v = options[k]; + if (v !== undefined) (tuned as Record)[k] = v; + } + return { ...tuned }; +} + +// Tag byte constants (internal — ASCII codes of the tag characters) +const TAG_COMMA = 44; // ',' +const TAG_DOT = 46; // '.' +const TAG_HASH = 35; // '#' +const TAG_CARET = 94; // '^' +const TAG_PLUS = 43; // '+' +const TAG_STAR = 42; // '*' + // ── Base64 numeric system ── // Numbers are written big-endian with the most significant digit on the left // There is no padding, not even for zero, which is an empty string -export const b64chars = +const b64chars = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-_"; // char-code -> digit-value (0xff = invalid) +/** @internal — exported for use by rx-read.ts only */ export const b64decodeTable = new Uint8Array(256).fill(0xff); // digit-value -> char-code -export const b64encodeTable = new Uint8Array(64); +const b64encodeTable = new Uint8Array(64); for (let i = 0; i < 64; i++) { const code = b64chars.charCodeAt(i); @@ -72,79 +96,13 @@ for (let i = 0; i < 64; i++) { } // Return true if byte is 0-9, a-z, A-Z, '-' or '_' +/** @internal — exported for use by rx-read.ts only */ export function isB64(byte: number): boolean { return b64decodeTable[byte] !== 0xff; } -// Encode a number as b64 string -export function b64Stringify(num: number): string { - if (!Number.isSafeInteger(num) || num < 0) { - throw new Error(`Cannot stringify ${num} as base64`); - } - let result = ""; - while (num > 0) { - result = b64chars[num % 64] + result; - num = Math.floor(num / 64); - } - return result; -} - -// Decode a b64 string to a number -export function b64Parse(str: string): number { - let result = 0; - for (let i = 0; i < str.length; i++) { - const digit = b64decodeTable[str.charCodeAt(i)]!; - if (digit === 0xff) { - throw new Error(`Invalid base64 character: ${str[i]}`); - } - result = result * 64 + digit; - } - return result; -} - -// Read a b64 number from a byte range -export function b64Read( - data: Uint8Array, - left: number, - right: number, -): number { - let result = 0; - for (let i = left; i < right; i++) { - const digit = b64decodeTable[data[i]!]! - if (digit === 0xff) { - throw new Error(`Invalid base64 character code: ${data[i]}`); - } - result = result * 64 + digit; - } - return result; -} - -// Return the number of b64 digits needed to encode num -export function b64Sizeof(num: number): number { - if (!Number.isSafeInteger(num) || num < 0) { - throw new Error(`Cannot calculate size of ${num} as base64`); - } - return Math.ceil(Math.log(num + 1) / Math.log(64)); -} - -export function b64Write( - data: Uint8Array, - left: number, - right: number, - num: number, -) { - let offset = right - 1; - while (offset >= left) { - data[offset--] = b64encodeTable[num % 64]!; - num = Math.floor(num / 64); - } - if (num > 0) { - throw new Error(`Cannot write ${num} as base64`); - } -} - // Encode a signed integer as an unsigned zigzag value -export function toZigZag(num: number): number { +function toZigZag(num: number): number { if (num >= -0x80000000 && num <= 0x7fffffff) { return ((num << 1) ^ (num >> 31)) >>> 0; } @@ -152,6 +110,7 @@ export function toZigZag(num: number): number { } // Decode an unsigned zigzag value back to a signed integer +/** @internal — exported for use by rx-read.ts only */ export function fromZigZag(num: number): number { if (num <= 0xffffffff) { return (num >>> 1) ^ -(num & 1); @@ -166,19 +125,11 @@ const textDecoder = new TextDecoder(); export type Refs = Record; -export interface EncodeOptions { - /** Stream chunks instead of returning a buffer */ +export interface EncodeOptions extends TuneOptions { + /** Stream chunks instead of returning a buffer. */ onChunk?: (chunk: Uint8Array, offset: number) => void; - /** External dictionary of known values (UPPERCASE KEYS) */ + /** External dictionary of known values (UPPERCASE KEYS). */ refs?: Refs; - /** Override INDEX_THRESHOLD for this encode call. 0 = always index, Infinity = never index. */ - indexThreshold?: number; - /** Override STRING_CHAIN_THRESHOLD. 0 = always split on delimiter, Infinity = never split. */ - stringChainThreshold?: number; - /** Override STRING_CHAIN_DELIMITER. Empty string disables chain splitting. */ - stringChainDelimiter?: string; - /** Override DEDUP_COMPLEXITY_LIMIT. Objects/arrays with recursive node count below this are structurally deduped. 0 = disable. */ - dedupComplexityLimit?: number; /** Buffer chunk size in bytes. Chunks are flushed when full. Default 65536. */ chunkSize?: number; } @@ -201,7 +152,7 @@ function trimZeroes(str: string): [number, number] { return [parseInt(trimmed, 10), str.length - end]; } -export function splitNumber(val: number): [number, number] { +function splitNumber(val: number): [number, number] { if (Number.isInteger(val)) { if (Math.abs(val) < 10) return [val, 0]; if (Math.abs(val) < 9.999999999999999e20) return trimZeroes(val.toString()); @@ -234,7 +185,7 @@ function entryValue(e: [string, unknown]): unknown { } // Compare two strings in UTF-8 byte order (code point order preserves UTF-8 ordering) -export function utf8Sort(a: string, b: string): number { +function utf8Sort(a: string, b: string): number { const len = Math.min(a.length, b.length); for (let i = 0; i < len;) { const cpA = a.codePointAt(i) ?? 0; @@ -250,7 +201,7 @@ export function utf8Sort(a: string, b: string): number { // Generates a stable cache key for ref lookups. // Primitives get a type-tagged string. Objects use JSON.stringify (cached). const KeyMap = new WeakMap(); -export function makeKey(rootVal: unknown): unknown { +function makeKey(rootVal: unknown): unknown { if (rootVal === null || rootVal === undefined) return String(rootVal); switch (typeof rootVal) { case "string": return '"' + rootVal; @@ -293,9 +244,19 @@ export function encode( export function encode(value: unknown, options?: EncodeOptions): Uint8Array; export function encode(rootValue: unknown, options?: EncodeOptions): Uint8Array | undefined { const opts = { ...ENCODE_DEFAULTS, ...options }; - const indexThreshold = opts.indexThreshold ?? INDEX_THRESHOLD; - const chainThreshold = opts.stringChainThreshold ?? STRING_CHAIN_THRESHOLD; - const chainDelimiter = opts.stringChainDelimiter ?? STRING_CHAIN_DELIMITER; + const indexThreshold = opts.indexThreshold ?? tuned.indexThreshold; + const chainThreshold = opts.stringChainThreshold ?? tuned.stringChainThreshold; + const chainDelimiter = opts.stringChainDelimiter ?? tuned.stringChainDelimiter; + const minIndexDepth = opts.minIndexDepth ?? tuned.minIndexDepth; + const maxIndexDepth = opts.maxIndexDepth ?? tuned.maxIndexDepth; + // Container depth: root container is depth 0, its container children are 1, etc. + let depth = 0; + // Resolve index decision for a container at depth `d` with body size `bodySize`. + // Returns true (force on), false (force off), or applies the byte heuristic. + const wantsIndex = (d: number, bodySize: number) => + d < minIndexDepth ? true : + d >= maxIndexDepth ? false : + bodySize >= indexThreshold; // Build a fast delimiter lookup set for chain splitting const chainDelimSet = new Uint8Array(128); @@ -314,9 +275,15 @@ export function encode(rootValue: unknown, options?: EncodeOptions): Uint8Array const seen = new Map(); const seenBig = new Map(); // Schema trie: nested objects keyed by individual key names, avoids join() allocation. - // Terminal nodes store the offset under a Symbol key to avoid conflicts with real keys. + // Terminal nodes store metadata under Symbol keys to avoid conflicts with real keys. + // SCHEMA_OFFSET: byte position of the inline schema node (set on first encoding). + // SCHEMA_COUNT: number of times this shape appears in the input (filled by prescan). const SCHEMA_OFFSET: unique symbol = Symbol(); - type SchemaTrie = { [key: string]: SchemaTrie } & { [SCHEMA_OFFSET]?: number | string }; + const SCHEMA_COUNT: unique symbol = Symbol(); + type SchemaTrie = { [key: string]: SchemaTrie } & { + [SCHEMA_OFFSET]?: number | string; + [SCHEMA_COUNT]?: number; + }; const schemaTrie: SchemaTrie = Object.create(null); // Traverses the trie, creating nodes as needed, and returns the leaf. @@ -440,7 +407,7 @@ export function encode(rootValue: unknown, options?: EncodeOptions): Uint8Array // Pre-scan: mark objects/arrays with complexity below COMPLEXITY_LIMIT as // eligible for structural dedup via JSON.stringify. Only simple values are // stored in the set — complex values are skipped during encoding. - const complexityLimit = opts.dedupComplexityLimit ?? DEDUP_COMPLEXITY_LIMIT; + const complexityLimit = opts.dedupComplexityLimit ?? tuned.dedupComplexityLimit; const simpleValues = new WeakSet(); (function prescan(val: unknown): number { @@ -451,6 +418,15 @@ export function encode(rootValue: unknown, options?: EncodeOptions): Uint8Array for (let i = 0; i < val.length; i++) c += prescan(val[i]); } else { const keys = Object.keys(val); + // Count shape occurrences for schema sharing — only shapes that appear + // more than once will end up using a schema in the main pass. + if (keys.length >= 2) { + let leaf: SchemaTrie = schemaTrie; + for (let i = 0; i < keys.length; i++) { + leaf = leaf[keys[i]!] ??= Object.create(null); + } + leaf[SCHEMA_COUNT] = (leaf[SCHEMA_COUNT] ?? 0) + 1; + } for (let i = 0; i < keys.length; i++) c += 1 + prescan((val as any)[keys[i]!]); } if (c < complexityLimit) simpleValues.add(val); @@ -589,10 +565,11 @@ export function encode(rootValue: unknown, options?: EncodeOptions): Uint8Array if (prefixLengths!.has(offset)) { const prefix = value.slice(0, offset); if (knownPrefixes.has(prefix)) { - const before = pos; + pushASCII("<"); writeAny(value.substring(offset)); writeAny(prefix); - return emitUnsigned(TAG_DOT, pos - before); + pushASCII(">"); + return pos; } } // find next delimiter to the left @@ -663,9 +640,22 @@ export function encode(rootValue: unknown, options?: EncodeOptions): Uint8Array } function writeArray(value: unknown[]) { - const start = pos; - writeValues(value); - return emitUnsigned(TAG_SEMI, pos - start); + pushASCII("["); + const length = value.length; + if (length > 0) { + const myDepth = depth; + const offsets = new Array(length); + const bodyStart = pos; + depth = myDepth + 1; + for (let i = length - 1; i >= 0; i--) { + writeAny(value[i]); + offsets[i] = pos; + } + depth = myDepth; + if (wantsIndex(myDepth, pos - bodyStart)) writeIndex(offsets, length); + } + pushASCII("]"); + return pos; } // Write a b64-encoded number of exactly `width` digits into buf at `offset`. @@ -686,100 +676,158 @@ export function encode(rootValue: unknown, options?: EncodeOptions): Uint8Array if (width > 8) throw new Error(`Index width exceeds maximum of 8 characters: ${width}`); const totalBytes = count * width; ensureCapacity(totalBytes + 16); + // Entries are stored in REVERSE natural order so R-to-L scanning yields + // them forward — the rightmost entry holds the delta for element 0. for (let i = 0; i < count; i++) { - writeB64Fixed(buf, off + i * width, pos - offsets[i]!, width); + writeB64Fixed(buf, off + i * width, pos - offsets[count - 1 - i]!, width); } pos += totalBytes; off += totalBytes; emitUnsigned(TAG_HASH, (count << 3) | (width - 1)); } - function writeValues(values: unknown[]) { - const length = values.length; - if (length > indexThreshold) { - const offsets = new Array(length); - for (let i = length - 1; i >= 0; i--) { - writeAny(values[i]); - offsets[i] = pos; + function writeObject(value: Record, keys?: string[]) { + if (!keys) keys = Object.keys(value); + const length = keys.length; + if (length === 0) { + pushASCII("{}"); + return pos; + } + + // If this container's depth forces an index (depth < minIndexDepth), + // schema-sharing is disallowed because schemas can't carry an index. + if (depth < minIndexDepth) { + return writePlainObject(value, keys); + } + + // Schemas only earn their keep when a shape is shared (count > 1) AND has + // 2+ keys (1-key objects don't benefit). 0/1-key objects, singleton shapes, + // and shapes whose keys contain the schema delimiter all use inline keys. + if (length >= 2) { + let hasComma = false; + for (let i = 0; i < length; i++) { + if (keys[i]!.indexOf(",") !== -1) { hasComma = true; break; } } - writeIndex(offsets, length); - } else { - for (let i = length - 1; i >= 0; i--) { - writeAny(values[i]); + if (!hasComma) { + // Walk the trie to find the prescan-counted leaf for this shape. + let schemaLeaf: SchemaTrie = schemaTrie; + for (let i = 0; i < length; i++) { + schemaLeaf = schemaLeaf[keys[i]!] ??= Object.create(null); + } + const count = schemaLeaf[SCHEMA_COUNT] ?? 0; + if (count > 1) { + const schemaTarget = schemaLeaf[SCHEMA_OFFSET]; + if (schemaTarget !== undefined) { + // Subsequent occurrence: emit values + pointer to existing schema node. + return writeSchemaSharedObject(value, schemaTarget, keys); + } + // First occurrence of a shared shape: emit values + inline schema, + // record schema right-edge so subsequent occurrences can point at it. + const schemaEnd = writeFirstSchemaObject(value, keys); + schemaLeaf[SCHEMA_OFFSET] = schemaEnd; + return pos; + } } } + + return writePlainObject(value, keys); } - function writeObject(value: Record, keys?: string[]) { - if (!keys) keys = Object.keys(value); + // Object with inline keys. May carry an index for O(log n) key lookup if the + // body is large enough to make linear R-to-L scanning expensive, or if the + // depth thresholds force one. + function writePlainObject(value: Record, keys: string[]) { + pushASCII("{"); const length = keys.length; - if (length === 0) return pushASCII(":"); - // Inline schemaUpsert: walk/create trie nodes for this key sequence. - let schemaLeaf: SchemaTrie = schemaTrie; - for (let i = 0; i < length; i++) { - const k = keys[i]!; - schemaLeaf = schemaLeaf[k] ??= Object.create(null); + const myDepth = depth; + // Record key offsets unconditionally; we decide on the index after seeing the body. + const keyOffsets = new Array(length); + const bodyStart = pos; + depth = myDepth + 1; + for (let i = length - 1; i >= 0; i--) { + const key = keys[i]!; + writeAny(value[key]); + writeAny(key); + keyOffsets[i] = pos; } - const schemaTarget = schemaLeaf[SCHEMA_OFFSET]; - if (schemaTarget !== undefined) return writeSchemaObject(value, schemaTarget, keys); - - const before = pos; - const needsIndex = length > indexThreshold; + depth = myDepth; - if (needsIndex) { - // Pre-compute sorted order for index: sort key indices by UTF-8 order + if (wantsIndex(myDepth, pos - bodyStart)) { + // Sort key indices by UTF-8 order for binary-search lookup. const sortedIndices = new Array(length); for (let i = 0; i < length; i++) sortedIndices[i] = i; - sortedIndices.sort((a, b) => utf8Sort(keys![a]!, keys![b]!)); - - // Write entries in reverse insertion order, recording offset per key index - const keyOffsets = new Array(length); - for (let i = length - 1; i >= 0; i--) { - const key = keys[i]!; - writeAny(value[key]); - writeAny(key); - keyOffsets[i] = pos; - } - - // Build sorted offsets array for index + sortedIndices.sort((a, b) => utf8Sort(keys[a]!, keys[b]!)); const sortedOffsets = new Array(length); for (let i = 0; i < length; i++) { sortedOffsets[i] = keyOffsets[sortedIndices[i]!]!; } writeIndex(sortedOffsets, length); - } else { - // Small object — no index needed; iterate keys directly (no Object.entries tuple alloc) - for (let i = length - 1; i >= 0; i--) { - const key = keys[i]!; - writeAny(value[key]); - writeAny(key); - } } - const ret = emitUnsigned(TAG_COLON, pos - before); - schemaLeaf[SCHEMA_OFFSET] = pos; - return ret; + pushASCII("}"); + return pos; } - function writeSchemaObject(value: Record, target: string | number, keys: string[]) { - const before = pos; + // First occurrence of a shape: emit values + inline schema node. + // Returns the schema's right-edge (one past its varint), which subsequent + // objects of the same shape use as their pointer target. + function writeFirstSchemaObject(value: Record, keys: string[]): number { + pushASCII("{"); const length = keys.length; - // Inline writeValues logic to avoid building Object.values() array - if (length > indexThreshold) { - const offsets = new Array(length); - for (let i = length - 1; i >= 0; i--) { - writeAny(value[keys[i]!]); - offsets[i] = pos; - } - writeIndex(offsets, length); + const myDepth = depth; + + // Schema objects can't carry an index; emit values in reverse natural order. + depth = myDepth + 1; + for (let i = length - 1; i >= 0; i--) { + writeAny(value[keys[i]!]); + } + depth = myDepth; + + // Schema body: keys joined by ',' in REVERSE natural order so R-to-L + // scanning yields keys in natural order alongside R-to-L value parsing. + const reversedKeys = new Array(length); + for (let i = 0; i < length; i++) reversedKeys[i] = keys[length - 1 - i]!; + const schemaText = reversedKeys.join(","); + const schemaBody = textEncoder.encode(schemaText); + const bodyLen = schemaBody.length; + + ensureCapacity(bodyLen + 16); + buf.set(schemaBody, off); + pos += bodyLen; + off += bodyLen; + + emitUnsigned(TAG_DOT, bodyLen); + const schemaEnd = pos; + + pushASCII("}"); + return schemaEnd; + } + + // Subsequent occurrence of a shape: emit values + pointer to existing schema. + function writeSchemaSharedObject( + value: Record, + schemaPos: string | number, + keys: string[], + ) { + pushASCII("{"); + const length = keys.length; + const myDepth = depth; + + depth = myDepth + 1; + for (let i = length - 1; i >= 0; i--) { + writeAny(value[keys[i]!]); + } + depth = myDepth; + + if (typeof schemaPos === "string") { + // External ref name (from refs dictionary) — pre-existing fast path. + pushASCII(`'${schemaPos}`); } else { - for (let i = length - 1; i >= 0; i--) { - writeAny(value[keys[i]!]); - } + emitUnsigned(TAG_CARET, pos - schemaPos); } - if (typeof target === "string") pushASCII(`'${target}`); - else emitUnsigned(TAG_CARET, pos - target); - return emitUnsigned(TAG_COLON, pos - before); + + pushASCII("}"); + return pos; } } diff --git a/rxb-read.ts b/rxb-read.ts deleted file mode 100644 index 4b2c6ac..0000000 --- a/rxb-read.ts +++ /dev/null @@ -1,891 +0,0 @@ -/////////////////////////////////////////////////////////////////// -// -// RXB Reader — cursor-based decoder and Proxy API for RXB binary data. -// -// Provides zero-copy random access into RXB-encoded buffers: -// read() — parse one node at a byte offset -// findKey() — O(log n) key lookup on indexed objects -// open() — returns a read-only Proxy that looks like plain JS -// decode() — alias for open() -// -// Import the encoder from "./rxb.ts". -// -/////////////////////////////////////////////////////////////////// - -import { - tagVarintRead, - hexDecode, - b64sDecode, - TAG_INT, - TAG_DECIMAL, - TAG_STRING, - TAG_HEXSTR, - TAG_B64STR, - TAG_REF, - TAG_LIST, - TAG_MAP, - TAG_POINTER, - TAG_CHAIN, - TAG_INDEX, - REF_NULL, - REF_TRUE, - REF_FALSE, - REF_UNDEF, - REF_INF, - REF_NINF, - REF_NAN, - REF_EXTERNAL_BASE, -} from "./rxb.ts"; - -import { fromZigZag } from "./rx.ts"; - -const textEncoder = new TextEncoder(); -const textDecoder = new TextDecoder(); - -// ── Tags (semantic) ── - -export type Tag = - | "int" - | "float" - | "str" - | "hexstr" - | "b64str" - | "ref" - | "true" - | "false" - | "null" - | "undef" - | "array" - | "object" - | "ptr" - | "chain"; - -// ── Cursor ── - -export interface Cursor { - data: Uint8Array; - left: number; - right: number; - tag: Tag; - val: number; - ixWidth: number; - ixCount: number; - schema: number; -} - -export function makeCursor(data: Uint8Array): Cursor { - return { - data, - left: 0, - right: data.length, - tag: "null", - val: 0, - ixWidth: 0, - ixCount: 0, - schema: 0, - }; -} - -// Internal scratch cursors -const _empty = new Uint8Array(0); -const _k: Cursor = makeCursor(_empty); -const _s: Cursor = makeCursor(_empty); -const _cc: Cursor = makeCursor(_empty); -const _cmp: Cursor = makeCursor(_empty); - -// ── Core parsing ── - -/** Unpack index metadata: low 3 bits = width-1, rest = count */ -function unpackIndex(c: Cursor, packed: number): void { - c.ixWidth = (packed & 0b111) + 1; - c.ixCount = packed >> 3; -} - -/** Read one node ending at c.right. Fills all cursor fields. Returns the tag. */ -export function read(c: Cursor): Tag { - const { data } = c; - - // Reset container fields - c.ixWidth = 0; - c.ixCount = 0; - c.schema = 0; - - const { tag, value, left } = tagVarintRead(data, c.right); - c.left = left; - - switch (tag) { - case TAG_INT: - c.val = fromZigZag(value); - return c.tag = "int"; - - case TAG_DECIMAL: { - const exp = fromZigZag(value); - const savedRight = c.right; - c.right = left; - read(c); - c.val = parseFloat(`${c.val}e${exp}`); - c.right = savedRight; - return c.tag = "float"; - } - - case TAG_STRING: - c.left = left - value; - c.val = value; - return c.tag = "str"; - - case TAG_HEXSTR: - c.left = left - Math.ceil(value / 2); - c.val = value; - return c.tag = "hexstr"; - - case TAG_B64STR: - c.left = left - Math.ceil(value * 6 / 8); - c.val = value; - return c.tag = "b64str"; - - case TAG_REF: { - if (value === REF_TRUE) { c.val = 0; return c.tag = "true"; } - if (value === REF_FALSE) { c.val = 0; return c.tag = "false"; } - if (value === REF_NULL) { c.val = 0; return c.tag = "null"; } - if (value === REF_UNDEF) { c.val = 0; return c.tag = "undef"; } - if (value === REF_INF) { c.val = Infinity; return c.tag = "float"; } - if (value === REF_NINF) { c.val = -Infinity; return c.tag = "float"; } - if (value === REF_NAN) { c.val = NaN; return c.tag = "float"; } - c.val = value - REF_EXTERNAL_BASE; - return c.tag = "ref"; - } - - case TAG_MAP: { - let content = left; - c.left = left - value; - if (content > c.left) { - const { tag: innerTag, value: innerVal, left: innerLeft } = tagVarintRead(data, content); - if (innerTag === TAG_REF || innerTag === TAG_POINTER) { - let isSchema = true; - if (innerTag === TAG_POINTER) { - const target = innerLeft - innerVal; - const { tag: targetTag } = tagVarintRead(data, target); - isSchema = targetTag === TAG_LIST || targetTag === TAG_MAP; - } - if (isSchema) { - c.schema = content; - content = innerLeft; - } - } - if (content > c.left) { - const { tag: ixTag, value: ixVal, left: ixLeft } = tagVarintRead(data, content); - if (ixTag === TAG_INDEX) { - unpackIndex(c, ixVal); - content = ixLeft - c.ixWidth * c.ixCount; - } - } - } - c.val = content; - return c.tag = "object"; - } - - case TAG_LIST: { - let content = left; - c.left = left - value; - if (content > c.left) { - const { tag: ixTag, value: ixVal, left: ixLeft } = tagVarintRead(data, content); - if (ixTag === TAG_INDEX) { - unpackIndex(c, ixVal); - content = ixLeft - c.ixWidth * c.ixCount; - } - } - c.val = content; - return c.tag = "array"; - } - - case TAG_POINTER: - c.val = left - value; - return c.tag = "ptr"; - - case TAG_CHAIN: - c.left = left - value; - c.val = left; - return c.tag = "chain"; - - default: - throw new SyntaxError(`Unknown tag: 0x${tag.toString(16)}`); - } -} - -// ── String handling ── - -/** Decode the string at cursor position to a JS string. */ -export function readStr(c: Cursor): string { - if (c.tag === "hexstr") { - const byteLen = Math.ceil(c.val / 2); - return hexDecode(c.data, c.left, byteLen, c.val); - } - if (c.tag === "b64str") { - const byteLen = Math.ceil(c.val * 6 / 8); - return b64sDecode(c.data, c.left, byteLen, c.val); - } - return textDecoder.decode(c.data.subarray(c.left, c.left + c.val)); -} - -/** Resolve a node to a string, following pointers and concatenating chains. */ -export function resolveStr(c: Cursor): string { - const savedLeft = c.left, savedRight = c.right, savedTag = c.tag, savedVal = c.val; - const result = _resolveStr(c); - c.left = savedLeft; c.right = savedRight; c.tag = savedTag; c.val = savedVal; - return result; -} - -function _resolveStr(c: Cursor): string { - while (c.tag === "ptr") { c.right = c.val; read(c); } - if (c.tag === "str" || c.tag === "hexstr" || c.tag === "b64str") return readStr(c); - if (c.tag === "chain") { - const parts: string[] = []; - let right = c.val; - const left = c.left; - while (right > left) { - c.right = right; - read(c); - right = c.left; - parts.push(_resolveStr(c)); - } - return parts.join(""); - } - throw new TypeError(`resolveStr: expected str, hexstr, ptr, or chain, got ${c.tag}`); -} - -/** Encode a string to UTF-8 bytes for use with strEquals/strCompare. */ -export function prepareKey(target: string): Uint8Array { - return textEncoder.encode(target); -} - -/** Compare a node's string bytes against key bytes starting at offset. */ -function nodeCompare(c: Cursor, key: Uint8Array, offset: number): { cmp: number; offset: number } { - while (c.tag === "ptr") { c.right = c.val; read(c); } - - if (c.tag === "str") { - const start = c.left; - const byteLen = c.val; - const { data } = c; - const len = Math.min(byteLen, key.length - offset); - for (let i = 0; i < len; i++) { - const diff = data[start + i]! - key[offset + i]!; - if (diff !== 0) return { cmp: diff, offset: offset + i }; - } - if (byteLen > key.length - offset) return { cmp: 1, offset: key.length }; - return { cmp: 0, offset: offset + byteLen }; - } - - if (c.tag === "hexstr" || c.tag === "b64str") { - const str = readStr(c); - const strBytes = textEncoder.encode(str); - const len = Math.min(strBytes.length, key.length - offset); - for (let i = 0; i < len; i++) { - const diff = strBytes[i]! - key[offset + i]!; - if (diff !== 0) return { cmp: diff, offset: offset + i }; - } - if (strBytes.length > key.length - offset) return { cmp: 1, offset: key.length }; - return { cmp: 0, offset: offset + strBytes.length }; - } - - if (c.tag === "ref") { - return { cmp: NaN, offset }; - } - - if (c.tag === "chain") { - let right = c.val; - const left = c.left; - while (right > left) { - c.right = right; - read(c); - right = c.left; - const result = nodeCompare(c, key, offset); - if (result.cmp !== 0) return result; - offset = result.offset; - } - return { cmp: 0, offset }; - } - - return { cmp: NaN, offset }; -} - -/** Compare cursor's string against target. Returns <0, 0, >0, or NaN. */ -export function strCompare(c: Cursor, target: Uint8Array): number { - _cmp.data = c.data; _cmp.left = c.left; _cmp.right = c.right; _cmp.tag = c.tag; _cmp.val = c.val; - const { cmp, offset } = nodeCompare(_cmp, target, 0); - if (cmp !== 0) return cmp; - return offset < target.length ? -1 : 0; -} - -/** Zero-alloc equality check. */ -export function strEquals(c: Cursor, target: Uint8Array): boolean { - return strCompare(c, target) === 0; -} - -/** Zero-alloc prefix check. */ -export function strHasPrefix(c: Cursor, prefix: Uint8Array): boolean { - if (prefix.length === 0) return true; - _cmp.data = c.data; _cmp.left = c.left; _cmp.right = c.right; _cmp.tag = c.tag; _cmp.val = c.val; - const { offset } = nodeCompare(_cmp, prefix, 0); - return offset === prefix.length; -} - -// ── Container access ── - -/** Read a big-endian unsigned integer from data[left..left+width). */ -function readBinaryFixed(data: Uint8Array, left: number, width: number): number { - let result = 0; - for (let i = 0; i < width; i++) { - result = result * 256 + data[left + i]!; - } - return result; -} - -/** Jump to the Nth child of an indexed container. O(1). */ -export function seekChild(c: Cursor, container: Cursor, index: number): void { - if (container.ixWidth === 0) { - throw new Error("seekChild requires an indexed container"); - } - if (index < 0 || index >= container.ixCount) { - throw new RangeError(`seekChild: index ${index} out of range [0, ${container.ixCount})`); - } - const { data } = container; - const { val: ixBase, ixWidth } = container; - const entryLeft = ixBase + index * ixWidth; - const delta = readBinaryFixed(data, entryLeft, ixWidth); - c.data = data; - c.right = ixBase - delta; - read(c); -} - -/** Collect child right-boundaries into caller-owned array. Returns count. */ -export function collectChildren(container: Cursor, offsets: number[]): number { - _cc.data = container.data; - let right = container.val; - const end = container.left; - let count = 0; - while (right > end) { - if (count >= offsets.length) offsets.push(right); - else offsets[count] = right; - count++; - _cc.right = right; - read(_cc); - right = _cc.left; - } - return count; -} - -function keyEquals(target: Uint8Array): boolean { - return strEquals(_k, target); -} - -/** Find a key in an object. Fills c with the value node if found. */ -export function findKey(c: Cursor, container: Cursor, target: string | Uint8Array): boolean { - if (container.tag !== "object") return false; - if (typeof target === "string") target = prepareKey(target); - - const { data } = container; - _k.data = data; - - if (container.ixWidth > 0 && container.ixCount > 0 && container.schema === 0) { - let lo = 0, hi = container.ixCount; - while (lo < hi) { - const mid = (lo + hi) >>> 1; - seekChild(c, container, mid); - const cmp = strCompare(c, target); - if (cmp < 0) lo = mid + 1; - else hi = mid; - } - if (lo < container.ixCount) { - seekChild(c, container, lo); - if (strEquals(c, target)) { - c.data = data; - c.right = c.left; - read(c); - return true; - } - } - return false; - } - - let right = container.val; - const end = container.left; - - if (container.schema !== 0) { - _s.data = data; - _s.right = container.schema; - read(_s); - - if (_s.tag === "ptr") { - _s.right = _s.val; - read(_s); - } - - let keyRight = _s.val; - const keyEnd = _s.left; - let valRight = container.val; - - if (_s.tag === "object") { - while (keyRight > keyEnd && valRight > end) { - _k.right = keyRight; - read(_k); - const matched = keyEquals(target); - _s.data = data; - _s.right = _k.left; - read(_s); - keyRight = _s.left; - - if (matched) { - c.data = data; - c.right = valRight; - read(c); - return true; - } - - c.data = data; - c.right = valRight; - read(c); - valRight = c.left; - } - } - - if (_s.tag === "array") { - while (keyRight > keyEnd && valRight > end) { - _k.right = keyRight; - read(_k); - keyRight = _k.left; - - if (keyEquals(target)) { - c.data = data; - c.right = valRight; - read(c); - return true; - } - - c.data = data; - c.right = valRight; - read(c); - valRight = c.left; - } - } - - return false; - } - - while (right > end) { - _k.right = right; - read(_k); - if (keyEquals(target)) { - c.data = data; - c.right = _k.left; - read(c); - return true; - } - c.data = data; - c.right = _k.left; - read(c); - right = c.left; - } - return false; -} - -/** Find all keys matching a prefix in an object. */ -export function findByPrefix( - c: Cursor, - container: Cursor, - prefix: string | Uint8Array, - visitor: (key: Cursor, value: Cursor) => boolean | void, -): void { - if (container.tag !== "object") return; - if (typeof prefix === "string") prefix = prepareKey(prefix); - - const { data } = container; - - if (container.schema !== 0) return; - - if (container.ixWidth > 0 && container.ixCount > 0) { - let lo = 0, hi = container.ixCount; - while (lo < hi) { - const mid = (lo + hi) >>> 1; - seekChild(c, container, mid); - const cmp = strCompare(c, prefix); - if (cmp < 0) lo = mid + 1; - else hi = mid; - } - for (let i = lo; i < container.ixCount; i++) { - seekChild(c, container, i); - const keyRight = c.right; - if (!strHasPrefix(c, prefix)) break; - _cc.data = data; _cc.right = keyRight; read(_cc); - c.data = data; c.right = c.left; read(c); - if (visitor(_cc, c) === false) return; - } - return; - } - - _k.data = data; - let right = container.val; - const end = container.left; - while (right > end) { - _k.right = right; - read(_k); - const keyRight = right; - if (strHasPrefix(_k, prefix)) { - _cc.data = data; _cc.right = keyRight; read(_cc); - c.data = data; c.right = _k.left; read(c); - if (visitor(_cc, c) === false) return; - } else { - c.data = data; c.right = _k.left; read(c); - } - right = c.left; - } -} - -// ── Raw bytes ── - -export function rawBytes(c: Cursor): Uint8Array { - return c.data.subarray(c.left, c.right); -} - -export type Refs = Record; - -// ── High-level Proxy API ── - -const HANDLE = Symbol("rxb.handle"); - -type NodeInfo = { - data: Uint8Array; - right: number; - tag: Tag; - val: number; - left: number; - ixWidth: number; - ixCount: number; - schema: number; - _count?: number; - _offsets?: number[]; - _keys?: string[]; - _keyMap?: Map; -}; - -type OpenContext = { - root: unknown; - resolve(right: number): unknown; -}; - -function _openContext(buffer: Uint8Array, refs?: Refs, refNames?: string[]): OpenContext { - const nodeMap = new WeakMap(); - const proxyCache = new Map(); - const scratch = makeCursor(buffer); - - function snap(c: Cursor): NodeInfo { - return { - data: c.data, right: c.right, tag: c.tag, val: c.val, - left: c.left, ixWidth: c.ixWidth, ixCount: c.ixCount, schema: c.schema, - }; - } - - function resolveRef(refIndex: number): unknown { - if (!refs || !refNames) return undefined; - const name = refNames[refIndex]; - if (name === undefined) return undefined; - return name in refs ? refs[name] : undefined; - } - - function resolveKeyStr(c: Cursor): string { - const savedLeft = c.left, savedRight = c.right, savedTag = c.tag, savedVal = c.val; - while (c.tag === "ptr") { c.right = c.val; read(c); } - let result: string; - if (c.tag === "ref" && refs && refNames) { - const val = resolveRef(c.val); - result = typeof val === "string" ? val : resolveStr(c); - } else { - result = resolveStr(c); - } - c.left = savedLeft; c.right = savedRight; c.tag = savedTag; c.val = savedVal; - return result; - } - - function wrap(c: Cursor): unknown { - while (c.tag === "ptr") { c.right = c.val; read(c); } - if (c.tag === "ref") return resolveRef(c.val); - const cached = proxyCache.get(c.right); - if (cached !== undefined) return cached; - switch (c.tag) { - case "int": case "float": return c.val; - case "str": case "hexstr": case "b64str": return readStr(c); - case "chain": return resolveStr(c); - case "true": return true; - case "false": return false; - case "null": return null; - case "undef": return undefined; - } - const info = snap(c); - const target: object = c.tag === "array" ? [] : Object.create(null); - nodeMap.set(target, info); - const proxy = new Proxy(target, handler); - proxyCache.set(c.right, proxy); - return proxy; - } - - function childCount(info: NodeInfo): number { - if (info._count !== undefined) return info._count; - if (info.ixCount > 0) return info._count = info.ixCount; - if (info.tag === "array") { - ensureOffsets(info); - return info._count!; - } - let right = info.val, n = 0; - while (right > info.left) { - scratch.data = info.data; scratch.right = right; - read(scratch); right = scratch.left; n++; - } - return info._count = info.schema !== 0 ? n : n / 2; - } - - function ensureOffsets(info: NodeInfo): number[] { - if (!info._offsets) { - info._offsets = []; - info._count = collectChildren(info as unknown as Cursor, info._offsets); - } - return info._offsets; - } - - function getChild(info: NodeInfo, index: number): unknown { - if (index < 0 || index >= childCount(info)) return undefined; - if (info.ixWidth > 0) { - seekChild(scratch, info as unknown as Cursor, index); - return wrap(scratch); - } - const offsets = ensureOffsets(info); - scratch.data = info.data; - scratch.right = offsets[index]!; - read(scratch); - return wrap(scratch); - } - - function getValue(info: NodeInfo, key: string): unknown { - if (!info._keyMap && info.schema !== 0) ensureKeyMap(info); - if (info._keyMap) { - const valRight = info._keyMap.get(key); - if (valRight === undefined) return undefined; - scratch.data = info.data; - scratch.right = valRight; - read(scratch); - return wrap(scratch); - } - scratch.data = info.data; - if (findKey(scratch, info as unknown as Cursor, key)) return wrap(scratch); - return undefined; - } - - function ensureKeyMap(info: NodeInfo): { keys: string[]; map: Map } { - if (info._keyMap) { - return { keys: info._keys!, map: info._keyMap }; - } - const keys: string[] = []; - const map = new Map(); - const kc = makeCursor(info.data); - if (info.schema !== 0) { - const sc = makeCursor(info.data); - sc.right = info.schema; read(sc); - while (sc.tag === "ptr") { sc.right = sc.val; read(sc); } - if (sc.tag === "ref" && refs && refNames) { - const refVal = resolveRef(sc.val); - let valRight = info.val; - const keyStrings: string[] = Array.isArray(refVal) - ? refVal as string[] - : (refVal && typeof refVal === "object" ? Object.keys(refVal) : []); - for (const name of keyStrings) { - keys.push(name); - map.set(name, valRight); - scratch.data = info.data; scratch.right = valRight; read(scratch); - valRight = scratch.left; - } - } else { - kc.data = sc.data; - let valRight = info.val; - if (sc.tag === "object") { - let keyRight = sc.val; - const keyEnd = sc.left; - while (keyRight > keyEnd) { - kc.right = keyRight; read(kc); - const nextRight = kc.left; - const name = resolveKeyStr(kc); - keys.push(name); - map.set(name, valRight); - scratch.data = info.data; scratch.right = valRight; read(scratch); - valRight = scratch.left; - sc.right = nextRight; read(sc); - keyRight = sc.left; - } - } else if (sc.tag === "array") { - let keyRight = sc.val; - const keyEnd = sc.left; - while (keyRight > keyEnd) { - kc.right = keyRight; read(kc); - const name = resolveKeyStr(kc); - keys.push(name); - map.set(name, valRight); - scratch.data = info.data; scratch.right = valRight; read(scratch); - valRight = scratch.left; - keyRight = kc.left; - } - } - } - } else { - let right = info.val; - while (right > info.left) { - kc.data = info.data; kc.right = right; read(kc); - const keyLeft = kc.left; - const name = resolveKeyStr(kc); - keys.push(name); - map.set(name, keyLeft); - kc.data = info.data; kc.right = keyLeft; read(kc); - right = kc.left; - } - } - info._keys = keys; - info._keyMap = map; - return { keys, map }; - } - - const handler: ProxyHandler = { - get(target, prop) { - const info = nodeMap.get(target)!; - if (prop === HANDLE) return { data: info.data, right: info.right }; - - if (prop === Symbol.iterator) { - if (info.tag === "array") { - return function* () { - const n = childCount(info); - for (let i = 0; i < n; i++) yield getChild(info, i); - }; - } - if (info.tag === "object") { - return function* () { - const ks = ensureKeyMap(info).keys; - for (const k of ks) yield [k, getValue(info, k)] as [string, unknown]; - }; - } - return undefined; - } - - if (typeof prop === "symbol") return undefined; - if (prop === "length") return childCount(info); - - if (info.tag === "array") { - const idx = Number(prop); - if (Number.isInteger(idx) && idx >= 0) return getChild(info, idx); - const method = (Array.prototype as any)[prop]; - if (typeof method === "function") { - return function (...args: unknown[]) { - const n = childCount(info); - const arr: unknown[] = new Array(n); - for (let i = 0; i < n; i++) arr[i] = getChild(info, i); - return method.apply(arr, args); - }; - } - return undefined; - } - - if (info.tag === "object") return getValue(info, prop); - return undefined; - }, - - has(target, prop) { - const info = nodeMap.get(target)!; - if (prop === HANDLE) return true; - if (typeof prop === "symbol") return false; - if (prop === "length") return true; - if (info.tag === "array") { - const idx = Number(prop); - return Number.isInteger(idx) && idx >= 0 && idx < childCount(info); - } - if (info.tag === "object") { - if (!info._keyMap && info.schema !== 0) ensureKeyMap(info); - if (info._keyMap) return info._keyMap.has(prop); - scratch.data = info.data; - return findKey(scratch, info as unknown as Cursor, prop); - } - return false; - }, - - ownKeys(target) { - const info = nodeMap.get(target)!; - if (info.tag === "array") { - const n = childCount(info); - const ks: string[] = []; - for (let i = 0; i < n; i++) ks.push(String(i)); - ks.push("length"); - return ks; - } - return ensureKeyMap(info).keys; - }, - - getOwnPropertyDescriptor(target, prop) { - if (typeof prop === "symbol") return undefined; - const info = nodeMap.get(target)!; - if (info.tag === "array") { - if (prop === "length") { - return { configurable: false, enumerable: false, value: childCount(info), writable: true }; - } - const idx = Number(prop); - if (typeof prop === "string" && Number.isInteger(idx) && idx >= 0 && idx < childCount(info)) { - return { configurable: true, enumerable: true, value: getChild(info, idx) }; - } - return undefined; - } - if (info.tag === "object" && typeof prop === "string") { - if (!info._keyMap && info.schema !== 0) ensureKeyMap(info); - if (info._keyMap) { - if (info._keyMap.has(prop)) { - return { configurable: true, enumerable: true, value: getValue(info, prop) }; - } - } else { - scratch.data = info.data; - if (findKey(scratch, info as unknown as Cursor, prop)) { - return { configurable: true, enumerable: true, value: wrap(scratch) }; - } - } - } - return undefined; - }, - - set() { throw new TypeError("rxb data is read-only"); }, - deleteProperty() { throw new TypeError("rxb data is read-only"); }, - }; - - function resolve(right: number): unknown { - scratch.data = buffer; - scratch.right = right; - read(scratch); - return wrap(scratch); - } - - const root = resolve(buffer.length); - return { root, resolve }; -} - -/** Open an rxb buffer and return a Proxy-wrapped root value. */ -export function open(buffer: Uint8Array, refs?: Refs): unknown { - const refNames = refs ? Object.keys(refs).sort() : undefined; - return _openContext(buffer, refs, refNames).root; -} - -/** Get the raw handle from a Proxy-wrapped value. */ -export function handle(proxy: unknown): { data: Uint8Array; right: number } | undefined { - if (proxy && typeof proxy === "object" && HANDLE in proxy) { - return (proxy as any)[HANDLE]; - } - return undefined; -} - -// ── Decode ── - -export interface DecodeOptions { - refs?: Refs; -} - -export function decode(input: Uint8Array, options?: DecodeOptions): unknown { - return open(input, options?.refs); -} diff --git a/rxb.test.ts b/rxb.test.ts deleted file mode 100644 index 557cd82..0000000 --- a/rxb.test.ts +++ /dev/null @@ -1,703 +0,0 @@ -import { describe, it, expect } from "vitest"; -import { - encode, - tagVarintSize, - tagVarintWrite, - tagVarintRead, - classifyString, - hexEncodeInto, - hexDecode, - TAG_INT, - TAG_STRING, - TAG_HEXSTR, - TAG_REF, - TAG_LIST, - TAG_MAP, - TAG_POINTER, - TAG_CHAIN, - TAG_INDEX, - TAG_DECIMAL, - REF_NULL, - REF_TRUE, - REF_FALSE, - REF_UNDEF, - REF_INF, - REF_NINF, - REF_NAN, -} from "./rxb.ts"; -import { - decode, - open, - read, - readStr, - makeCursor, - findKey, - seekChild, - collectChildren, - resolveStr, - strCompare, - strEquals, - strHasPrefix, - prepareKey, -} from "./rxb-read.ts"; - -import { encode as rxEncode } from "./rx.ts"; -import { parse as rxParse } from "./rx-read.ts"; - -// ── Helper ── - -function cur(value: unknown, options?: Parameters[1]) { - const buf = encode(value, options); - const c = makeCursor(buf); - read(c); - return c; -} - -function roundtrip(value: unknown, options?: Parameters[1]) { - const buf = encode(value, options); - return decode(buf, options); -} - -// ── Tag+Varint encoding ── - -describe("tagVarint", () => { - it("encodes tag with value 0 in 1 byte", () => { - const buf = new Uint8Array(8); - const n = tagVarintWrite(buf, 0, TAG_INT, 0); - expect(n).toBe(1); - expect(buf[0]).toBe(TAG_INT); // 0x00 - const { tag, value, left } = tagVarintRead(buf, 1); - expect(tag).toBe(TAG_INT); - expect(value).toBe(0); - expect(left).toBe(0); - }); - - it("encodes tag with value 1-7 in 1 byte", () => { - for (let v = 1; v <= 7; v++) { - const buf = new Uint8Array(8); - const n = tagVarintWrite(buf, 0, TAG_STRING, v); - expect(n).toBe(1); - const { tag, value } = tagVarintRead(buf, 1); - expect(tag).toBe(TAG_STRING); - expect(value).toBe(v); - } - }); - - it("encodes tag with value 8 in 2 bytes", () => { - const buf = new Uint8Array(8); - const n = tagVarintWrite(buf, 0, TAG_STRING, 8); - expect(n).toBe(2); - const { tag, value, left } = tagVarintRead(buf, 2); - expect(tag).toBe(TAG_STRING); - expect(value).toBe(8); - expect(left).toBe(0); - }); - - it("roundtrips various values", () => { - const values = [0, 1, 7, 8, 63, 64, 127, 128, 255, 1023, 1024, 16383, 16384, 131071, 131072, 1000000]; - for (const tag of [TAG_INT, TAG_STRING, TAG_LIST, TAG_MAP]) { - for (const v of values) { - const buf = new Uint8Array(16); - const n = tagVarintWrite(buf, 0, tag, v); - expect(n).toBe(tagVarintSize(tag, v)); - const result = tagVarintRead(buf, n); - expect(result.tag).toBe(tag); - expect(result.value).toBe(v); - expect(result.left).toBe(0); - } - } - }); - - it("tagVarintSize is correct", () => { - expect(tagVarintSize(0, 0)).toBe(1); - expect(tagVarintSize(0, 7)).toBe(1); - expect(tagVarintSize(0, 8)).toBe(2); - expect(tagVarintSize(0, 1023)).toBe(2); - expect(tagVarintSize(0, 1024)).toBe(3); - expect(tagVarintSize(0, 131071)).toBe(3); - expect(tagVarintSize(0, 131072)).toBe(4); - }); -}); - -// ── Hexstring helpers ── - -describe("hexstring helpers", () => { - it("classifyString detects hex, b64, and regular", () => { - expect(classifyString("deadbeef")).toBe(1); // hex - expect(classifyString("0123456789abcdef")).toBe(1); // hex - expect(classifyString("abc")).toBe(0); // too short - expect(classifyString("")).toBe(0); // too short - expect(classifyString("DEADBEEF")).toBe(2); // uppercase = b64, not hex - expect(classifyString("hello")).toBe(2); // all b64 - expect(classifyString("abcg")).toBe(2); // g is b64 but not hex - expect(classifyString("hello world")).toBe(0); // space = regular - expect(classifyString("path/to")).toBe(0); // / = regular - }); - - it("hexEncodeInto/hexDecode roundtrip even length", () => { - const hex = "deadbeef"; - const packed = new Uint8Array(4); - hexEncodeInto(hex, packed, 0); - expect(packed[0]).toBe(0xDE); - expect(packed[1]).toBe(0xAD); - expect(packed[2]).toBe(0xBE); - expect(packed[3]).toBe(0xEF); - const decoded = hexDecode(packed, 0, packed.length, hex.length); - expect(decoded).toBe(hex); - }); - - it("hexEncodeInto/hexDecode roundtrip odd length", () => { - const hex = "abcde"; - const packed = new Uint8Array(3); - hexEncodeInto(hex, packed, 0); - const decoded = hexDecode(packed, 0, packed.length, hex.length); - expect(decoded).toBe(hex); - }); -}); - -// ── Primitives ── - -describe("primitives", () => { - it("encodes integers", () => { - expect(cur(0).tag).toBe("int"); - expect(cur(0).val).toBe(0); - expect(cur(1).val).toBe(1); - expect(cur(-1).val).toBe(-1); - expect(cur(42).val).toBe(42); - expect(cur(-42).val).toBe(-42); - expect(cur(255).val).toBe(255); - expect(cur(1000).val).toBe(1000); - expect(cur(1000000).val).toBe(1000000); - }); - - it("encodes large integers", () => { - expect(cur(Number.MAX_SAFE_INTEGER).val).toBe(Number.MAX_SAFE_INTEGER); - // MIN_SAFE_INTEGER loses precision in zigzag (same as rx.ts) — test a large negative that works - expect(cur(-4503599627370495).val).toBe(-4503599627370495); - }); - - it("encodes floats", () => { - expect(cur(3.14).tag).toBe("float"); - expect(cur(3.14).val).toBeCloseTo(3.14); - expect(cur(0.5).val).toBeCloseTo(0.5); - expect(cur(-0.5).val).toBeCloseTo(-0.5); - expect(cur(99.9).val).toBeCloseTo(99.9); - }); - - it("encodes special floats", () => { - expect(cur(NaN).val).toBeNaN(); - expect(cur(Infinity).val).toBe(Infinity); - expect(cur(-Infinity).val).toBe(-Infinity); - }); - - it("encodes booleans", () => { - expect(cur(true).tag).toBe("true"); - expect(cur(false).tag).toBe("false"); - }); - - it("encodes null", () => { - expect(cur(null).tag).toBe("null"); - }); - - it("encodes undefined", () => { - expect(cur(undefined).tag).toBe("undef"); - }); - - it("encodes strings", () => { - const c = cur("hello"); - expect(c.tag).toBe("b64str"); // all b64 chars → b64str encoding - expect(readStr(c)).toBe("hello"); - // non-b64 chars → regular string - const c2 = cur("hello world"); // space is not b64 - expect(c2.tag).toBe("str"); - expect(readStr(c2)).toBe("hello world"); - }); - - it("encodes empty string", () => { - const c = cur(""); - expect(c.tag).toBe("str"); - expect(readStr(c)).toBe(""); - }); - - it("encodes unicode strings", () => { - expect(readStr(cur("café"))).toBe("café"); - expect(readStr(cur("🎉"))).toBe("🎉"); - expect(readStr(cur("🏴‍☠️"))).toBe("🏴‍☠️"); - }); - - it("encodes hex strings as hexstr type", () => { - const c = cur("deadbeef"); - expect(c.tag).toBe("hexstr"); - expect(readStr(c)).toBe("deadbeef"); - }); - - it("encodes short hex strings as regular strings", () => { - // "abc" is too short for hex encoding (< 4 chars) - const c = cur("abc"); - expect(c.tag).toBe("str"); - expect(readStr(c)).toBe("abc"); - }); - - it("hex strings save space", () => { - const hex = "0123456789abcdef0123456789abcdef"; // 32 chars - const rxbBuf = encode(hex); - const rxBuf = rxEncode(hex); - // rxb hex: 16 bytes body + 2 bytes tag+varint = ~18 bytes - // rx text: 32 bytes body + 2 bytes tag+length = ~34 bytes - expect(rxbBuf.length).toBeLessThan(rxBuf.length); - }); -}); - -// ── Roundtrip ── - -describe("roundtrip", () => { - it("roundtrips integers", () => { - for (const v of [0, 1, -1, 42, -42, 255, 1000, -1000, Number.MAX_SAFE_INTEGER, -4503599627370495]) { - expect(roundtrip(v)).toBe(v); - } - }); - - it("roundtrips floats", () => { - for (const v of [3.14, 0.5, -0.5, 99.9, 1e100, 1e-100]) { - expect(roundtrip(v)).toBeCloseTo(v, 10); - } - }); - - it("roundtrips special floats", () => { - expect(roundtrip(NaN)).toBeNaN(); - expect(roundtrip(Infinity)).toBe(Infinity); - expect(roundtrip(-Infinity)).toBe(-Infinity); - }); - - it("roundtrips strings", () => { - for (const v of ["", "hello", "café", "🎉", "hello world this is a test"]) { - expect(roundtrip(v)).toBe(v); - } - }); - - it("roundtrips hex strings", () => { - for (const v of ["deadbeef", "0123456789abcdef", "abcde"]) { - expect(roundtrip(v)).toBe(v); - } - }); - - it("roundtrips booleans", () => { - expect(roundtrip(true)).toBe(true); - expect(roundtrip(false)).toBe(false); - }); - - it("roundtrips null", () => { - expect(roundtrip(null)).toBe(null); - }); - - it("roundtrips arrays", () => { - expect(JSON.stringify(roundtrip([]))).toBe("[]"); - expect(JSON.stringify(roundtrip([1, 2, 3]))).toBe("[1,2,3]"); - expect(JSON.stringify(roundtrip(["a", "b", "c"]))).toBe('["a","b","c"]'); - }); - - it("roundtrips objects", () => { - expect(JSON.stringify(roundtrip({}))).toBe("{}"); - expect(JSON.stringify(roundtrip({ a: 1 }))).toBe('{"a":1}'); - expect(JSON.stringify(roundtrip({ a: 1, b: 2 }))).toBe('{"a":1,"b":2}'); - }); - - it("roundtrips nested structures", () => { - const data = { - users: [ - { name: "alice", age: 30 }, - { name: "bob", age: 25 }, - ], - version: 3, - }; - expect(JSON.stringify(roundtrip(data))).toBe(JSON.stringify(data)); - }); - - it("roundtrips deeply nested", () => { - const data = { a: { b: { c: { d: [1, [2, [3]]] } } } }; - expect(JSON.stringify(roundtrip(data))).toBe(JSON.stringify(data)); - }); -}); - -// ── Containers ── - -describe("containers", () => { - it("arrays have correct tag and children", () => { - const c = cur([1, 2, 3]); - expect(c.tag).toBe("array"); - }); - - it("objects have correct tag", () => { - const c = cur({ a: 1, b: 2 }); - expect(c.tag).toBe("object"); - }); - - it("indexed arrays support seekChild", () => { - const buf = encode([10, 20, 30, 40, 50], { indexThreshold: 0 }); - const c = makeCursor(buf); - read(c); - expect(c.tag).toBe("array"); - expect(c.ixCount).toBe(5); - - const child = makeCursor(buf); - seekChild(child, c, 0); - expect(child.val).toBe(10); - seekChild(child, c, 2); - expect(child.val).toBe(30); - seekChild(child, c, 4); - expect(child.val).toBe(50); - }); - - it("indexed objects support findKey", () => { - const obj: Record = {}; - for (let i = 0; i < 20; i++) obj[`key${String(i).padStart(2, "0")}`] = i; - const buf = encode(obj, { indexThreshold: 0 }); - const c = makeCursor(buf); - read(c); - expect(c.tag).toBe("object"); - expect(c.ixCount).toBe(20); - - const result = makeCursor(buf); - expect(findKey(result, c, "key00")).toBe(true); - expect(result.val).toBe(0); - expect(findKey(result, c, "key19")).toBe(true); - expect(result.val).toBe(19); - expect(findKey(result, c, "key10")).toBe(true); - expect(result.val).toBe(10); - expect(findKey(result, c, "missing")).toBe(false); - }); - - it("collectChildren returns correct count", () => { - const buf = encode([1, 2, 3]); - const c = makeCursor(buf); - read(c); - const offsets: number[] = []; - const count = collectChildren(c, offsets); - expect(count).toBe(3); - }); -}); - -// ── Dedup (pointers) ── - -describe("dedup", () => { - it("deduplicates repeated strings", () => { - const data = ["hello world!!", "hello world!!", "hello world!!"]; - const buf = encode(data); - // Should be smaller than encoding 3 copies - const noDedupBuf = encode(data, { dedupComplexityLimit: 0 }); - // With dedup, repeated strings become pointers - expect(buf.length).toBeLessThanOrEqual(noDedupBuf.length); - }); - - it("deduplicates repeated objects", () => { - const obj = { x: 1, y: 2, z: 3 }; - const data = [obj, obj, obj]; - const buf = encode(data); - const decoded = decode(buf) as any[]; - expect(JSON.stringify(decoded[0])).toBe(JSON.stringify(obj)); - expect(JSON.stringify(decoded[1])).toBe(JSON.stringify(obj)); - expect(JSON.stringify(decoded[2])).toBe(JSON.stringify(obj)); - }); -}); - -// ── String chains ── - -describe("chains", () => { - it("splits long strings with shared prefixes", () => { - const data = [ - "/docs/getting-started/installation", - "/docs/getting-started/quickstart", - "/docs/encoding/overview", - ]; - const buf = encode(data, { stringChainThreshold: 4 }); - const decoded = decode(buf) as string[]; - expect(decoded[0]).toBe(data[0]); - expect(decoded[1]).toBe(data[1]); - expect(decoded[2]).toBe(data[2]); - }); -}); - -// ── Schema objects ── - -describe("schema", () => { - it("shares keys across repeated object shapes", () => { - const data = [ - { name: "alice", age: 30, city: "NYC" }, - { name: "bob", age: 25, city: "LA" }, - { name: "carol", age: 35, city: "SF" }, - ]; - const buf = encode(data); - const decoded = decode(buf) as any[]; - expect(decoded[0]!.name).toBe("alice"); - expect(decoded[1]!.name).toBe("bob"); - expect(decoded[2]!.name).toBe("carol"); - expect(decoded[0]!.age).toBe(30); - expect(decoded[1]!.age).toBe(25); - }); -}); - -// ── Proxy API ── - -describe("proxy API", () => { - it("array indexing", () => { - const arr = open(encode([10, 20, 30])) as number[]; - expect(arr[0]).toBe(10); - expect(arr[1]).toBe(20); - expect(arr[2]).toBe(30); - expect(arr.length).toBe(3); - }); - - it("object property access", () => { - const obj = open(encode({ x: 1, y: "hello" })) as any; - expect(obj.x).toBe(1); - expect(obj.y).toBe("hello"); - }); - - it("nested access", () => { - const data = { users: [{ name: "alice" }, { name: "bob" }] }; - const obj = open(encode(data)) as any; - expect(obj.users[0].name).toBe("alice"); - expect(obj.users[1].name).toBe("bob"); - }); - - it("Object.keys works", () => { - const obj = open(encode({ a: 1, b: 2, c: 3 })) as any; - expect(Object.keys(obj)).toEqual(["a", "b", "c"]); - }); - - it("JSON.stringify works", () => { - const data = { a: 1, b: [2, 3] }; - const obj = open(encode(data)) as any; - expect(JSON.stringify(obj)).toBe(JSON.stringify(data)); - }); - - it("for...of on arrays", () => { - const arr = open(encode([1, 2, 3])) as number[]; - const result: number[] = []; - for (const item of arr) result.push(item); - expect(result).toEqual([1, 2, 3]); - }); - - it("spread on arrays", () => { - const arr = open(encode([1, 2, 3])) as number[]; - expect([...arr]).toEqual([1, 2, 3]); - }); - - it("array methods", () => { - const arr = open(encode([1, 2, 3])) as number[]; - expect(arr.map(x => x * 2)).toEqual([2, 4, 6]); - expect(arr.filter(x => x > 1)).toEqual([2, 3]); - }); - - it("hex strings read correctly through proxy", () => { - const data = { hash: "deadbeef01234567" }; - const obj = open(encode(data)) as any; - expect(obj.hash).toBe("deadbeef01234567"); - }); -}); - -// ── String operations ── - -describe("string operations", () => { - it("strCompare works on regular strings", () => { - const buf = encode("hello"); - const c = makeCursor(buf); - read(c); - expect(strCompare(c, prepareKey("hello"))).toBe(0); - expect(strCompare(c, prepareKey("hell"))).toBeGreaterThan(0); - expect(strCompare(c, prepareKey("helloz"))).toBeLessThan(0); - }); - - it("strEquals works", () => { - const buf = encode("test"); - const c = makeCursor(buf); - read(c); - expect(strEquals(c, prepareKey("test"))).toBe(true); - expect(strEquals(c, prepareKey("other"))).toBe(false); - }); - - it("strHasPrefix works", () => { - const buf = encode("hello world"); - const c = makeCursor(buf); - read(c); - expect(strHasPrefix(c, prepareKey("hello"))).toBe(true); - expect(strHasPrefix(c, prepareKey("world"))).toBe(false); - expect(strHasPrefix(c, prepareKey(""))).toBe(true); - }); -}); - -// ── Cross-format verification ── - -describe("cross-format", () => { - it("rxb is smaller than rx for hex-heavy data", () => { - const data = { - sha256: "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", - md5: "d41d8cd98f00b204e9800998ecf8427e", - }; - const rxBuf = rxEncode(data); - const rxbBuf = encode(data); - expect(rxbBuf.length).toBeLessThan(rxBuf.length); - }); - - it("rxb produces same values as rx for common data", () => { - const data = { - name: "test", - version: 42, - items: [1, 2, 3], - nested: { a: true, b: null, c: "hello" }, - }; - const rxVal = JSON.stringify(rxParse(new TextDecoder().decode(rxEncode(data)))); - const rxbVal = JSON.stringify(decode(encode(data))); - expect(rxbVal).toBe(rxVal); - }); - - it("rxb is generally smaller than rx", () => { - const data = { - users: Array.from({ length: 5 }, (_, i) => ({ - id: i, - name: `user${i}`, - email: `user${i}@example.com`, - active: i % 2 === 0, - })), - }; - const rxBuf = rxEncode(data); - const rxbBuf = encode(data); - // Binary format should be at least as compact - expect(rxbBuf.length).toBeLessThanOrEqual(rxBuf.length); - }); -}); - -// ── Hexstring specific ── - -describe("hexstring encoding", () => { - it("UUID-like hex", () => { - const uuid = "550e8400e29b41d4a716446655440000"; - expect(roundtrip(uuid)).toBe(uuid); - const c = cur(uuid); - expect(c.tag).toBe("hexstr"); - // 32 hex chars → 16 packed bytes + tag+varint - const buf = encode(uuid); - expect(buf.length).toBeLessThan(uuid.length); // much less than 32 - }); - - it("SHA-256 hash", () => { - const sha = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"; - expect(roundtrip(sha)).toBe(sha); - const buf = encode(sha); - // 64 hex chars → 32 packed bytes + tag+varint ≈ 34 bytes - expect(buf.length).toBeLessThan(40); - }); - - it("odd-length hex", () => { - const hex = "abcde"; - expect(roundtrip(hex)).toBe(hex); - }); - - it("all zeros", () => { - const hex = "0000000000000000"; - expect(roundtrip(hex)).toBe(hex); - }); - - it("all f's", () => { - const hex = "ffffffffffffffff"; - expect(roundtrip(hex)).toBe(hex); - }); -}); - -// ── Base64 string specific ── - -describe("b64str encoding", () => { - it("encodes b64-safe strings as b64str", () => { - const c = cur("hello-world_123"); - expect(c.tag).toBe("b64str"); - expect(readStr(c)).toBe("hello-world_123"); - }); - - it("saves space vs regular string", () => { - const s = "abcdefghijklmnopqrstuvwxyz012345"; // 32 chars - const buf = encode(s); - // b64str: ceil(32*6/8)=24 bytes body + 2 bytes tag+varint = ~26 bytes - // regular: 32 bytes body + 2 bytes tag+varint = ~34 bytes - expect(buf.length).toBeLessThan(30); - }); - - it("roundtrips b64 strings", () => { - for (const v of ["abcd", "Hello-World_42", "0123456789abcdefghijklmnopqrstuvwxyz", "a-b_c"]) { - expect(roundtrip(v)).toBe(v); - } - }); - - it("short b64 strings stay as regular strings", () => { - const c = cur("abc"); // only 3 chars, below threshold - expect(c.tag).toBe("str"); - }); - - it("strings with spaces are not b64str", () => { - const c = cur("hello world"); - expect(c.tag).toBe("str"); - }); - - it("hex strings still use hexstr (more compact)", () => { - const c = cur("deadbeef"); - expect(c.tag).toBe("hexstr"); // hex is subset of b64 but more compact - }); -}); - -// ── External refs ── - -describe("external refs", () => { - it("encodes and decodes with refs", () => { - const refs = { myType: [1, 2, 3] }; - const data = { items: [1, 2, 3], other: "hello" }; - const buf = encode(data, { refs }); - const decoded = decode(buf, { refs }) as any; - expect(JSON.stringify(decoded.items)).toBe(JSON.stringify([1, 2, 3])); - expect(decoded.other).toBe("hello"); - }); -}); - -// ── Edge cases ── - -describe("edge cases", () => { - it("empty array", () => { - expect(JSON.stringify(roundtrip([]))).toBe("[]"); - }); - - it("empty object", () => { - expect(JSON.stringify(roundtrip({}))).toBe("{}"); - }); - - it("single-element array", () => { - expect(JSON.stringify(roundtrip([42]))).toBe("[42]"); - }); - - it("nested empty", () => { - expect(JSON.stringify(roundtrip({ a: [], b: {} }))).toBe('{"a":[],"b":{}}'); - }); - - it("mixed types in array", () => { - const data = [1, "hello", true, null, 3.14, [1], { a: 1 }]; - expect(JSON.stringify(roundtrip(data))).toBe(JSON.stringify(data)); - }); - - it("string with special characters", () => { - expect(roundtrip("hello\nworld")).toBe("hello\nworld"); - expect(roundtrip("tab\there")).toBe("tab\there"); - expect(roundtrip("null\0byte")).toBe("null\0byte"); - }); - - it("large array with index", () => { - const arr = Array.from({ length: 100 }, (_, i) => i); - const decoded = roundtrip(arr, { indexThreshold: 0 }) as number[]; - expect(JSON.stringify(decoded)).toBe(JSON.stringify(arr)); - }); - - it("large object with index", () => { - const obj: Record = {}; - for (let i = 0; i < 50; i++) obj[`k${i}`] = i; - const decoded = roundtrip(obj, { indexThreshold: 0 }) as any; - for (let i = 0; i < 50; i++) { - expect(decoded[`k${i}`]).toBe(i); - } - }); -}); diff --git a/rxb.ts b/rxb.ts deleted file mode 100644 index 1ec2010..0000000 --- a/rxb.ts +++ /dev/null @@ -1,667 +0,0 @@ -/////////////////////////////////////////////////////////////////// -// -// RXB Encoder — compact binary encoding for JSON-shaped data. -// -// Encodes JS values (objects, arrays, strings, numbers, booleans, null) -// into a binary format optimized for: -// - Small encoded size (97% smaller than JSON on real-world data) -// - O(log n) key lookup and O(1) array access without full parsing -// - Structural deduplication (repeated values stored once) -// - Schema sharing (repeated object shapes share key layout) -// -// The format is a binary variant of RX (rx.ts) using base-128 varints -// and compact string encodings (hex-packed, base64-packed). -// -// Usage: -// import { encode } from "./rxb.ts"; -// const buffer = encode(myData); -// -// For decoding / random-access reading, see rxb-read.ts. -// For the text-based RX format, see rx.ts. -// For the format specification, see docs/rxb-format.md. -// -/////////////////////////////////////////////////////////////////// - -import { - toZigZag, - splitNumber, - makeKey, - INDEX_THRESHOLD, - STRING_CHAIN_THRESHOLD, - DEDUP_COMPLEXITY_LIMIT, -} from "./rx.ts"; - -// ── Tag constants (4-bit, packed into low nibble of varint) ── - -export const TAG_INT = 0x0; -export const TAG_DECIMAL = 0x1; -export const TAG_STRING = 0x2; -export const TAG_HEXSTR = 0x3; -export const TAG_REF = 0x4; -export const TAG_LIST = 0x5; -export const TAG_MAP = 0x6; -export const TAG_POINTER = 0x7; -export const TAG_CHAIN = 0x8; -export const TAG_INDEX = 0x9; -export const TAG_B64STR = 0xA; - -// ── Ref code constants ── - -export const REF_NULL = 0; -export const REF_TRUE = 1; -export const REF_FALSE = 2; -export const REF_UNDEF = 3; -export const REF_INF = 4; -export const REF_NINF = 5; -export const REF_NAN = 6; -export const REF_EXTERNAL_BASE = 7; - -// ── Combined tag+varint encoding ── -// -// Each node ends with a variable-length byte sequence packing tag + value: -// Tag byte (leftmost, MSB=0): [0][value_low:3][tag:4] -// Extension bytes (MSB=1): [1][value:7], big-endian -// -// When scanning right-to-left, extension bytes (MSB=1) are consumed first, -// then the tag byte (MSB=0) terminates the scan. Body bytes to the left -// of the tag byte are never reached. - -/** Compute the number of bytes needed to encode tag+value. */ -export function tagVarintSize(tag: number, value: number): number { - if (value < 8) return 1; - let n = 1; - value = Math.floor(value / 8); - while (value > 0) { n++; value = Math.floor(value / 128); } - return n; -} - -/** Write tag+value into data starting at offset. Returns bytes written. */ -export function tagVarintWrite( - data: Uint8Array, - offset: number, - tag: number, - value: number, -): number { - const tagBits = value & 0x07; - const tagByte = (tagBits << 4) | (tag & 0x0F); - let remaining = Math.floor(value / 8); - - if (remaining === 0) { - data[offset] = tagByte; - return 1; - } - - const extBytes: number[] = []; - while (remaining > 0) { - extBytes.push((remaining & 0x7F) | 0x80); - remaining = Math.floor(remaining / 128); - } - - let pos = offset; - data[pos++] = tagByte; - for (let i = extBytes.length - 1; i >= 0; i--) { - data[pos++] = extBytes[i]!; - } - - return pos - offset; -} - -/** Read tag+value from data ending at `right`. Used by the decoder. */ -export function tagVarintRead( - data: Uint8Array, - right: number, -): { tag: number; value: number; left: number } { - let pos = right - 1; - let extValue = 0; - let shift = 1; - - while (pos >= 0 && (data[pos]! & 0x80) !== 0) { - extValue = extValue + (data[pos]! & 0x7F) * shift; - shift *= 128; - pos--; - } - - if (pos < 0) throw new SyntaxError("tagVarintRead: no tag byte found"); - - const tagByte = data[pos]!; - const tag = tagByte & 0x0F; - const tagBits = (tagByte >> 4) & 0x07; - const value = tagBits + extValue * 8; - - return { tag, value, left: pos }; -} - -// ── String encoding helpers ── - -// Hex lookup: charCode → nibble value (0xFF = not a hex char) -const hexChars = new Uint8Array(256); -for (let i = 0; i < 256; i++) hexChars[i] = 0xFF; -for (let i = 0; i < 10; i++) hexChars[0x30 + i] = i; // '0'-'9' -for (let i = 0; i < 6; i++) hexChars[0x61 + i] = 10 + i; // 'a'-'f' - -const hexDigits = "0123456789abcdef"; - -// B64 lookup: charCode → 6-bit value (0xFF = not a b64 char) -// Alphabet: 0-9 a-z A-Z - _ (URL-safe, same as RX base64) -const b64sChars = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-_"; -const b64sDecodeTable = new Uint8Array(128).fill(0xFF); -for (let i = 0; i < 64; i++) b64sDecodeTable[b64sChars.charCodeAt(i)] = i; - -/** Classify a string as hex, b64, or regular in a single pass. - * Returns 0 (regular), 1 (hex), or 2 (b64). */ -export function classifyString(s: string): 0 | 1 | 2 { - if (s.length < 4) return 0; - let allHex = true; - for (let i = 0; i < s.length; i++) { - const c = s.charCodeAt(i); - if (c > 127 || b64sDecodeTable[c]! === 0xFF) return 0; - if (allHex && hexChars[c]! === 0xFF) allHex = false; - } - return allHex ? 1 : 2; -} - -/** Pack hex string directly into target buffer. Returns bytes written. */ -export function hexEncodeInto(hex: string, target: Uint8Array, targetOff: number): number { - const byteLen = Math.ceil(hex.length / 2); - const pad = hex.length % 2; - for (let i = 0; i < hex.length; i++) { - const nibble = hexChars[hex.charCodeAt(i)]!; - const byteIdx = (i + pad) >> 1; - if ((i + pad) % 2 === 0) target[targetOff + byteIdx] = nibble << 4; - else target[targetOff + byteIdx]! |= nibble; - } - return byteLen; -} - -/** Unpack bytes back to a hex string. Used by the decoder. */ -export function hexDecode(data: Uint8Array, start: number, byteLen: number, charCount: number): string { - let result = ""; - for (let i = 0; i < byteLen; i++) { - const b = data[start + i]!; - result += hexDigits[b >> 4]; - result += hexDigits[b & 0x0F]; - } - return charCount % 2 === 0 ? result : result.slice(1); -} - -/** Pack b64 string directly into target buffer (6 bits per char). Returns bytes written. */ -export function b64sEncodeInto(s: string, target: Uint8Array, targetOff: number): number { - const byteLen = Math.ceil(s.length * 6 / 8); - let bitBuf = 0; - let bitCount = 0; - let byteIdx = targetOff; - for (let i = 0; i < s.length; i++) { - bitBuf = (bitBuf << 6) | b64sDecodeTable[s.charCodeAt(i)]!; - bitCount += 6; - while (bitCount >= 8) { - bitCount -= 8; - target[byteIdx++] = (bitBuf >> bitCount) & 0xFF; - } - } - if (bitCount > 0) { - target[byteIdx] = (bitBuf << (8 - bitCount)) & 0xFF; - } - return byteLen; -} - -/** Unpack bytes back to a b64 string. Used by the decoder. */ -export function b64sDecode(data: Uint8Array, start: number, byteLen: number, charCount: number): string { - let result = ""; - let bitBuf = 0; - let bitCount = 0; - let byteIdx = start; - for (let i = 0; i < charCount; i++) { - while (bitCount < 6 && byteIdx < start + byteLen) { - bitBuf = (bitBuf << 8) | data[byteIdx++]!; - bitCount += 8; - } - bitCount -= 6; - result += b64sChars[(bitBuf >> bitCount) & 0x3F]; - } - return result; -} - -// ── Encoder ── - -const textEncoder = new TextEncoder(); - -export interface EncodeOptions { - onChunk?: (chunk: Uint8Array, offset: number) => void; - refs?: Record; - indexThreshold?: number; - stringChainThreshold?: number; - stringChainDelimiter?: string; - dedupComplexityLimit?: number; - chunkSize?: number; -} - -const ENCODE_DEFAULTS = { - refs: {}, -} as const satisfies Partial; - -export function encode( - value: unknown, - options: EncodeOptions & { onChunk: (chunk: Uint8Array, offset: number) => void }, -): undefined; -export function encode(value: unknown, options?: EncodeOptions): Uint8Array; -export function encode(rootValue: unknown, options?: EncodeOptions): Uint8Array | undefined { - const opts = { ...ENCODE_DEFAULTS, ...options }; - const indexThreshold = opts.indexThreshold ?? INDEX_THRESHOLD; - const chainThreshold = opts.stringChainThreshold ?? Math.min(STRING_CHAIN_THRESHOLD, 24); - const chainDelimiter = opts.stringChainDelimiter ?? "/."; - - // Build a fast delimiter lookup set for chain splitting - const chainDelimSet = new Uint8Array(128); - for (let i = 0; i < chainDelimiter.length; i++) chainDelimSet[chainDelimiter.charCodeAt(i)] = 1; - - function hasDelimiter(s: string, from: number): boolean { - for (let i = from; i < s.length; i++) { - const c = s.charCodeAt(i); - if (c < 128 && chainDelimSet[c]!) return true; - } - return false; - } - - function lastDelimiterPos(s: string, before: number): number { - for (let i = before; i >= 0; i--) { - const c = s.charCodeAt(i); - if (c < 128 && chainDelimSet[c]!) return i; - } - return -1; - } - - function nextDelimiterPos(s: string, after: number): number { - for (let i = after; i < s.length; i++) { - const c = s.charCodeAt(i); - if (c < 128 && chainDelimSet[c]!) return i; - } - return -1; - } - - // Build ref name table (sorted for deterministic index assignment) - const refEntries = Object.entries({ ...opts.refs }); - const sortedRefNames = refEntries.map(([k]) => k).sort(); - const refNameToIndex = new Map(); - for (let i = 0; i < sortedRefNames.length; i++) { - refNameToIndex.set(sortedRefNames[i]!, i); - } - - const refsByKey = new Map(); - for (const [key, val] of refEntries) { - const idx = refNameToIndex.get(key)!; - refsByKey.set(makeKey(val), idx); - } - - const seenOffsets = new Map(); - const SCHEMA_OFFSET: unique symbol = Symbol(); - type SchemaTrie = { [key: string]: SchemaTrie } & { [SCHEMA_OFFSET]?: number | string }; - const schemaTrie: SchemaTrie = Object.create(null); - - function schemaUpsert(keys: string[]): SchemaTrie { - let node = schemaTrie; - for (let i = 0; i < keys.length; i++) { - node = node[keys[i]!] ??= Object.create(null); - } - return node; - } - - const seenCosts = new Map(); - - // ── Chunked buffer ── - const CHUNK_SIZE = opts.chunkSize ?? 65536; - const onChunk = opts.onChunk; - const parts: Uint8Array[] = []; - let buf = new Uint8Array(CHUNK_SIZE); - let pos = 0; - let off = 0; - - function flush() { - if (off === 0) return; - const chunk = buf.subarray(0, off); - if (onChunk) onChunk(chunk, pos - off); - else parts.push(chunk); - buf = new Uint8Array(CHUNK_SIZE); - off = 0; - } - - function ensureCapacity(needed: number) { - if (off + needed <= buf.length) return; - flush(); - if (needed > CHUNK_SIZE) buf = new Uint8Array(needed); - } - - function emitTagVarint(tag: number, value: number) { - const size = tagVarintSize(tag, value); - ensureCapacity(size); - tagVarintWrite(buf, off, tag, value); - pos += size; - off += size; - return pos; - } - - function emitSigned(tag: number, value: number) { - return emitTagVarint(tag, toZigZag(value)); - } - - // Pre-scan refs for schema keys - for (const [key, val] of refEntries) { - if (typeof val === "object" && val !== null) { - const schemaKeys = Array.isArray(val) ? val : Object.keys(val); - schemaUpsert(schemaKeys)[SCHEMA_OFFSET] = key; - } - } - - // Lazy prefix tracking for string chains - const knownPrefixes = chainDelimiter ? new Set() : undefined; - const prefixLengths = chainDelimiter ? new Set() : undefined; - - const hasRefs = refsByKey.size > 0; - - // Pre-scan: mark simple objects for structural dedup - const complexityLimit = opts.dedupComplexityLimit ?? DEDUP_COMPLEXITY_LIMIT; - const simpleValues = new WeakSet(); - - (function prescan(val: unknown): number { - if (typeof val !== "object" || val === null) return 1; - if (simpleValues.has(val)) return 1; - let c = 1; - if (Array.isArray(val)) { - for (let i = 0; i < val.length; i++) c += prescan(val[i]); - } else { - for (const k in val) c += 1 + prescan((val as any)[k]); - } - if (c < complexityLimit) simpleValues.add(val); - return c; - })(rootValue); - - writeAny(rootValue); - flush(); - - if (onChunk) return undefined; - const output = new Uint8Array(pos); - let outOff = 0; - for (const part of parts) { - output.set(part, outOff); - outOff += part.byteLength; - } - return output; - - function isCheap(value: unknown): boolean { - if (value === null || value === undefined || typeof value === "boolean") return true; - if (typeof value === "number") { - if (Number.isInteger(value) && value >= -2048 && value <= 2048) return true; - return false; - } - if (typeof value === "string") { - return value.length <= 1; - } - return false; - } - - function tryDedup(key: unknown): boolean { - const seenOffset = seenOffsets.get(key); - if (seenOffset === undefined) return false; - const delta = pos - seenOffset; - const seenCost = seenCosts.get(key) ?? 0; - if (tagVarintSize(TAG_POINTER, delta) < seenCost) { - emitTagVarint(TAG_POINTER, delta); - return true; - } - return false; - } - - function recordDedup(key: unknown, before: number) { - seenOffsets.set(key, pos); - seenCosts.set(key, pos - before); - } - - function writeAny(value: unknown) { - if (!hasRefs && isCheap(value)) return writeAnyInner(value); - - if (hasRefs) { - const refKey = refsByKey.get(typeof value === "string" ? '"' + value - : typeof value === "number" ? String(value) - : makeKey(value)); - if (refKey !== undefined) return emitTagVarint(TAG_REF, refKey + REF_EXTERNAL_BASE); - if (typeof value !== "string" && typeof value !== "number" - && (typeof value !== "object" || value === null)) return writeAnyInner(value); - } - - if (typeof value === "string") { - if (tryDedup(value)) return pos; - const before = pos; - writeString(value); - recordDedup(value, before); - return pos; - } - if (typeof value === "number") { - if (tryDedup(value)) return pos; - const before = pos; - writeNumber(value); - recordDedup(value, before); - return pos; - } - - const isArr = Array.isArray(value); - if (simpleValues.has(value as object)) { - const key = JSON.stringify(value); - if (tryDedup(key)) return pos; - const before = pos; - isArr ? writeArray(value) : writeObject(value as Record); - recordDedup(key, before); - return pos; - } - return isArr ? writeArray(value) : writeObject(value as Record); - } - - function writeAnyInner(value: unknown) { - switch (typeof value) { - case "string": return writeString(value); - case "number": return writeNumber(value); - case "boolean": return emitTagVarint(TAG_REF, value ? REF_TRUE : REF_FALSE); - case "undefined": return emitTagVarint(TAG_REF, REF_UNDEF); - case "object": - if (value === null) return emitTagVarint(TAG_REF, REF_NULL); - if (Array.isArray(value)) return writeArray(value); - return writeObject(value as Record); - default: - throw new TypeError(`Unsupported value type: ${typeof value}`); - } - } - - function writeString(value: string) { - // Chain splitting: share common prefixes across similar strings - if (knownPrefixes && value.length > chainThreshold && hasDelimiter(value, 1)) { - let offset = value.length; - while (offset > 0) { - offset = lastDelimiterPos(value, offset - 1); - if (offset <= 0) break; - if (prefixLengths!.has(offset)) { - const prefix = value.slice(0, offset); - if (knownPrefixes.has(prefix)) { - const before = pos; - writeAny(value.substring(offset)); - writeAny(prefix); - return emitTagVarint(TAG_CHAIN, pos - before); - } - } - } - offset = 0; - while (offset < value.length) { - const next = nextDelimiterPos(value, offset + 1); - if (next === -1) break; - const prefix = value.slice(0, next); - knownPrefixes.add(prefix); - prefixLengths!.add(next); - offset = next; - } - } - - // Single-pass classification: hex (50% savings), b64 (25%), or regular - const cls = classifyString(value); - if (cls === 1) { - const byteLen = Math.ceil(value.length / 2); - ensureCapacity(byteLen + 16); - hexEncodeInto(value, buf, off); - pos += byteLen; - off += byteLen; - return emitTagVarint(TAG_HEXSTR, value.length); - } - if (cls === 2) { - const byteLen = Math.ceil(value.length * 6 / 8); - ensureCapacity(byteLen + 16); - b64sEncodeInto(value, buf, off); - pos += byteLen; - off += byteLen; - return emitTagVarint(TAG_B64STR, value.length); - } - - // Regular string - const len = value.length; - if (len < 128) { - let ascii = true; - for (let i = 0; i < len; i++) { - if (value.charCodeAt(i) > 127) { ascii = false; break; } - } - if (ascii) { - ensureCapacity(len + 16); - for (let i = 0; i < len; i++) { - buf[off + i] = value.charCodeAt(i); - } - pos += len; - off += len; - return emitTagVarint(TAG_STRING, len); - } - } - - const maxBytes = len * 3; - ensureCapacity(maxBytes + 16); - const result = textEncoder.encodeInto(value, buf.subarray(off)); - pos += result.written; - off += result.written; - return emitTagVarint(TAG_STRING, result.written); - } - - function writeNumber(value: number) { - if (Number.isNaN(value)) return emitTagVarint(TAG_REF, REF_NAN); - if (value === Infinity) return emitTagVarint(TAG_REF, REF_INF); - if (value === -Infinity) return emitTagVarint(TAG_REF, REF_NINF); - const [base, exp] = splitNumber(value); - if (exp >= 0 && exp < 5 && Number.isInteger(base) && Number.isSafeInteger(base)) { - return emitSigned(TAG_INT, value); - } - emitSigned(TAG_INT, base); - return emitSigned(TAG_DECIMAL, exp); - } - - function writeArray(value: unknown[]) { - const start = pos; - writeValues(value); - return emitTagVarint(TAG_LIST, pos - start); - } - - function writeBinaryFixed(target: Uint8Array, offset: number, num: number, width: number) { - for (let i = width - 1; i >= 0; i--) { - target[offset + i] = num & 0xFF; - num = (num / 256) | 0; - } - } - - function binaryWidth(maxValue: number): number { - if (maxValue <= 0xFF) return 1; - if (maxValue <= 0xFFFF) return 2; - if (maxValue <= 0xFFFFFF) return 3; - if (maxValue <= 0xFFFFFFFF) return 4; - let w = 5; - let limit = 0x100_0000_0000; - while (maxValue >= limit && w < 8) { w++; limit *= 256; } - return Math.min(w, 8); - } - - function writeIndex(offsets: number[], count: number) { - let maxDelta = 0; - for (let i = 0; i < count; i++) { - const delta = pos - offsets[i]!; - if (delta > maxDelta) maxDelta = delta; - } - const width = binaryWidth(maxDelta); - if (width > 8) throw new Error(`Index width exceeds maximum of 8 bytes: ${width}`); - const totalBytes = count * width; - ensureCapacity(totalBytes + 16); - for (let i = 0; i < count; i++) { - writeBinaryFixed(buf, off + i * width, pos - offsets[i]!, width); - } - pos += totalBytes; - off += totalBytes; - emitTagVarint(TAG_INDEX, (count << 3) | (width - 1)); - } - - function writeValues(values: unknown[]) { - const length = values.length; - const offsets = length > indexThreshold ? new Array(length) : undefined; - for (let i = length - 1; i >= 0; i--) { - writeAny(values[i]); - if (offsets) offsets[i] = pos; - } - if (offsets) { - writeIndex(offsets, length); - } - } - - function writeObject(value: Record, keys?: string[]) { - if (!keys) keys = Object.keys(value); - const length = keys.length; - if (length === 0) return emitTagVarint(TAG_MAP, 0); - - const schemaLeaf = length > 1 ? schemaUpsert(keys) : undefined; - if (schemaLeaf) { - const schemaTarget = schemaLeaf[SCHEMA_OFFSET]; - if (schemaTarget !== undefined) return writeSchemaObject(value, schemaTarget); - } - - const before = pos; - const needsIndex = length > indexThreshold; - - if (needsIndex) { - const sortedIndices = new Array(length); - for (let i = 0; i < length; i++) sortedIndices[i] = i; - sortedIndices.sort((a, b) => keys![a]! < keys![b]! ? -1 : keys![a]! > keys![b]! ? 1 : 0); - - const keyOffsets = new Array(length); - for (let i = length - 1; i >= 0; i--) { - const key = keys[i]!; - writeAny(value[key]); - writeAny(key); - keyOffsets[i] = pos; - } - - const sortedOffsets = new Array(length); - for (let i = 0; i < length; i++) { - sortedOffsets[i] = keyOffsets[sortedIndices[i]!]!; - } - writeIndex(sortedOffsets, length); - } else { - for (let i = length - 1; i >= 0; i--) { - const key = keys[i]!; - writeAny(value[key]); - writeAny(key); - } - } - - const ret = emitTagVarint(TAG_MAP, pos - before); - if (schemaLeaf) schemaLeaf[SCHEMA_OFFSET] = pos; - return ret; - } - - function writeSchemaObject(value: Record, target: string | number) { - const before = pos; - writeValues(Object.values(value)); - if (typeof target === "string") { - const idx = refNameToIndex.get(target); - if (idx !== undefined) emitTagVarint(TAG_REF, idx + REF_EXTERNAL_BASE); - } else { - emitTagVarint(TAG_POINTER, pos - target); - } - return emitTagVarint(TAG_MAP, pos - before); - } -} diff --git a/zig-cli/.gitignore b/zig-cli/.gitignore new file mode 100644 index 0000000..7eada8a --- /dev/null +++ b/zig-cli/.gitignore @@ -0,0 +1,3 @@ +.zig-cache/ +zig-out/ +main diff --git a/zig-cli/build.zig b/zig-cli/build.zig new file mode 100644 index 0000000..a7670dd --- /dev/null +++ b/zig-cli/build.zig @@ -0,0 +1,26 @@ +const std = @import("std"); + +pub fn build(b: *std.Build) void { + const target = b.standardTargetOptions(.{}); + const optimize = b.standardOptimizeOption(.{}); + + const exe_mod = b.createModule(.{ + .root_source_file = b.path("src/main.zig"), + .target = target, + .optimize = optimize, + }); + + const exe = b.addExecutable(.{ + .name = "rx-encode", + .root_module = exe_mod, + }); + + b.installArtifact(exe); + + const run_cmd = b.addRunArtifact(exe); + run_cmd.step.dependOn(b.getInstallStep()); + if (b.args) |args| run_cmd.addArgs(args); + + const run_step = b.step("run", "Run rx-encode"); + run_step.dependOn(&run_cmd.step); +} diff --git a/zig-cli/src/main.zig b/zig-cli/src/main.zig new file mode 100644 index 0000000..0e70021 --- /dev/null +++ b/zig-cli/src/main.zig @@ -0,0 +1,704 @@ +// rx-encode: convert JSON to RX format. +// +// Algorithm (from rx-format.md design discussion): +// +// Pass 1: parse JSON into a tape (flat node array). For each node, record: +// kind, source byte range (in input), parsed value, and a bottom-up +// Merkle hash that uniquely identifies the structure. +// +// Pass 2: walk the tape in DFS post-order, emitting RX bytes. Before emitting +// a node, check the dedup table by hash. On hash hit, verify by +// byte-comparing source ranges (cheap, conservative — pretty-printed +// variants miss but that's accepted). On confirmed match, emit a +// backward pointer instead of re-emitting bytes. +// +// The encoder is comptime-generic over a Source type so that the JSON tape is +// just one implementation. A future LiveValueSource backed by Zig structs (or +// any other in-memory representation) plugs into the same encoder by exposing +// the required methods (rootIdx, kind, intValue, stringBytes, childCount, +// childAt, nodeHash, verify). +// +// Not yet implemented: schema sharing, string chains, container indexes. +// These are pure-output optimizations; correctness holds without them. + +const std = @import("std"); +const Allocator = std.mem.Allocator; + +// ============================================================================= +// Common types +// ============================================================================= + +pub const NodeKind = enum(u8) { + null_v, + true_v, + false_v, + int_v, + float_v, + string_v, + array_v, + object_v, +}; + +const b64_chars = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-_"; + +// ============================================================================= +// JSON tokenizer +// ============================================================================= + +const Token = union(enum) { + object_open, + object_close, + array_open, + array_close, + colon, + comma, + string: struct { start: u32, end: u32 }, // includes quotes + number: struct { start: u32, end: u32 }, + true_lit, + false_lit, + null_lit, + eof, +}; + +const Tokenizer = struct { + input: []const u8, + pos: u32 = 0, + + fn skipWs(self: *Tokenizer) void { + while (self.pos < self.input.len) { + const c = self.input[self.pos]; + if (c == ' ' or c == '\t' or c == '\n' or c == '\r') { + self.pos += 1; + } else break; + } + } + + fn peek(self: *Tokenizer) ?u8 { + if (self.pos >= self.input.len) return null; + return self.input[self.pos]; + } + + fn next(self: *Tokenizer) !Token { + self.skipWs(); + if (self.pos >= self.input.len) return .eof; + const c = self.input[self.pos]; + switch (c) { + '{' => { + self.pos += 1; + return .object_open; + }, + '}' => { + self.pos += 1; + return .object_close; + }, + '[' => { + self.pos += 1; + return .array_open; + }, + ']' => { + self.pos += 1; + return .array_close; + }, + ':' => { + self.pos += 1; + return .colon; + }, + ',' => { + self.pos += 1; + return .comma; + }, + '"' => return try self.scanString(), + 't' => return try self.scanLit("true", .true_lit), + 'f' => return try self.scanLit("false", .false_lit), + 'n' => return try self.scanLit("null", .null_lit), + '-', '0'...'9' => return self.scanNumber(), + else => return error.UnexpectedChar, + } + } + + fn scanString(self: *Tokenizer) !Token { + const start = self.pos; + self.pos += 1; // opening quote + while (self.pos < self.input.len) { + const c = self.input[self.pos]; + if (c == '\\') { + if (self.pos + 1 >= self.input.len) return error.UnterminatedString; + self.pos += 2; + continue; + } + if (c == '"') { + self.pos += 1; + return Token{ .string = .{ .start = start, .end = self.pos } }; + } + self.pos += 1; + } + return error.UnterminatedString; + } + + fn scanNumber(self: *Tokenizer) Token { + const start = self.pos; + if (self.input[self.pos] == '-') self.pos += 1; + while (self.pos < self.input.len) { + const c = self.input[self.pos]; + const valid = (c >= '0' and c <= '9') or c == '.' or + c == 'e' or c == 'E' or c == '+' or c == '-'; + if (!valid) break; + self.pos += 1; + } + return Token{ .number = .{ .start = start, .end = self.pos } }; + } + + fn scanLit(self: *Tokenizer, lit: []const u8, tok: Token) !Token { + if (self.pos + lit.len > self.input.len) return error.UnexpectedEnd; + if (!std.mem.eql(u8, self.input[self.pos .. self.pos + lit.len], lit)) { + return error.BadLiteral; + } + self.pos += @intCast(lit.len); + return tok; + } +}; + +// ============================================================================= +// JsonTape — concrete Source implementation +// ============================================================================= + +const Node = struct { + kind: NodeKind, + src_start: u32, + src_end: u32, + hash: u64 = 0, + // For composites: range into children array + child_start: u32 = 0, + child_count: u32 = 0, + // For primitives: parsed value + int_val: i64 = 0, + float_val: f64 = 0, + // For strings: child_start/child_count are reused as offset/length into str_pool. +}; + +pub const JsonTape = struct { + alloc: Allocator, + input: []const u8, + nodes: std.ArrayList(Node) = .{}, + children: std.ArrayList(u32) = .{}, + str_pool: std.ArrayList(u8) = .{}, + root_idx: u32 = 0, + + pub fn deinit(self: *JsonTape) void { + self.nodes.deinit(self.alloc); + self.children.deinit(self.alloc); + self.str_pool.deinit(self.alloc); + } + + pub fn parse(self: *JsonTape) !void { + var tok = Tokenizer{ .input = self.input }; + self.root_idx = try self.parseValue(&tok); + } + + fn parseValue(self: *JsonTape, tok: *Tokenizer) anyerror!u32 { + tok.skipWs(); + const start = tok.pos; + const t = try tok.next(); + const idx: u32 = @intCast(self.nodes.items.len); + + switch (t) { + .null_lit => try self.nodes.append(self.alloc, .{ + .kind = .null_v, + .src_start = start, + .src_end = tok.pos, + }), + .true_lit => try self.nodes.append(self.alloc, .{ + .kind = .true_v, + .src_start = start, + .src_end = tok.pos, + }), + .false_lit => try self.nodes.append(self.alloc, .{ + .kind = .false_v, + .src_start = start, + .src_end = tok.pos, + }), + .number => |r| { + const text = self.input[r.start..r.end]; + const looks_int = std.mem.indexOfAny(u8, text, ".eE") == null; + if (looks_int) { + if (std.fmt.parseInt(i64, text, 10)) |n| { + try self.nodes.append(self.alloc, .{ + .kind = .int_v, + .src_start = start, + .src_end = tok.pos, + .int_val = n, + }); + return idx; + } else |_| {} + } + const f = try std.fmt.parseFloat(f64, text); + try self.nodes.append(self.alloc, .{ + .kind = .float_v, + .src_start = start, + .src_end = tok.pos, + .float_val = f, + }); + }, + .string => |r| { + const raw = self.input[r.start + 1 .. r.end - 1]; + const pool_start: u32 = @intCast(self.str_pool.items.len); + try decodeJsonString(self.alloc, &self.str_pool, raw); + const pool_len: u32 = @intCast(self.str_pool.items.len - pool_start); + try self.nodes.append(self.alloc, .{ + .kind = .string_v, + .src_start = start, + .src_end = tok.pos, + .child_start = pool_start, + .child_count = pool_len, + }); + }, + .array_open => { + try self.nodes.append(self.alloc, .{ .kind = .array_v, .src_start = start, .src_end = 0 }); + // Use a local list so nested containers don't pollute our child range. + var local: std.ArrayList(u32) = .{}; + defer local.deinit(self.alloc); + var first = true; + while (true) { + tok.skipWs(); + if (tok.peek() == @as(u8, ']')) { + _ = try tok.next(); + break; + } + if (!first) { + const t2 = try tok.next(); + if (t2 != .comma) return error.ExpectedComma; + } + first = false; + const ci = try self.parseValue(tok); + try local.append(self.alloc, ci); + } + const cstart: u32 = @intCast(self.children.items.len); + try self.children.appendSlice(self.alloc, local.items); + self.nodes.items[idx].child_start = cstart; + self.nodes.items[idx].child_count = @intCast(local.items.len); + self.nodes.items[idx].src_end = tok.pos; + }, + .object_open => { + try self.nodes.append(self.alloc, .{ .kind = .object_v, .src_start = start, .src_end = 0 }); + var local: std.ArrayList(u32) = .{}; + defer local.deinit(self.alloc); + var first = true; + while (true) { + tok.skipWs(); + if (tok.peek() == @as(u8, '}')) { + _ = try tok.next(); + break; + } + if (!first) { + const t2 = try tok.next(); + if (t2 != .comma) return error.ExpectedComma; + } + first = false; + const ki = try self.parseValue(tok); + if (self.nodes.items[ki].kind != .string_v) return error.NonStringKey; + const t3 = try tok.next(); + if (t3 != .colon) return error.ExpectedColon; + const vi = try self.parseValue(tok); + try local.append(self.alloc, ki); + try local.append(self.alloc, vi); + } + const cstart: u32 = @intCast(self.children.items.len); + try self.children.appendSlice(self.alloc, local.items); + self.nodes.items[idx].child_start = cstart; + self.nodes.items[idx].child_count = @intCast(local.items.len); + self.nodes.items[idx].src_end = tok.pos; + }, + else => return error.UnexpectedToken, + } + return idx; + } + + pub fn computeHashes(self: *JsonTape) void { + _ = self.hashNode(self.root_idx); + } + + fn hashNode(self: *JsonTape, idx: u32) u64 { + const n = &self.nodes.items[idx]; + var h = std.hash.Wyhash.init(@intFromEnum(n.kind)); + switch (n.kind) { + .null_v, .true_v, .false_v => {}, + .int_v => h.update(std.mem.asBytes(&n.int_val)), + .float_v => h.update(std.mem.asBytes(&n.float_val)), + .string_v => h.update(self.stringBytes(idx)), + .array_v, .object_v => { + var i: u32 = 0; + while (i < n.child_count) : (i += 1) { + const ci = self.children.items[n.child_start + i]; + const ch = self.hashNode(ci); + h.update(std.mem.asBytes(&ch)); + } + }, + } + n.hash = h.final(); + return n.hash; + } + + // ---- Source interface ------------------------------------------------- + + pub fn rootIdx(self: *const JsonTape) u32 { + return self.root_idx; + } + + pub fn kind(self: *const JsonTape, idx: u32) NodeKind { + return self.nodes.items[idx].kind; + } + + pub fn intValue(self: *const JsonTape, idx: u32) i64 { + return self.nodes.items[idx].int_val; + } + + pub fn floatValue(self: *const JsonTape, idx: u32) f64 { + return self.nodes.items[idx].float_val; + } + + pub fn stringBytes(self: *const JsonTape, idx: u32) []const u8 { + const n = self.nodes.items[idx]; + return self.str_pool.items[n.child_start .. n.child_start + n.child_count]; + } + + pub fn childCount(self: *const JsonTape, idx: u32) u32 { + return self.nodes.items[idx].child_count; + } + + pub fn childAt(self: *const JsonTape, idx: u32, i: u32) u32 { + const n = self.nodes.items[idx]; + return self.children.items[n.child_start + i]; + } + + pub fn nodeHash(self: *const JsonTape, idx: u32) u64 { + return self.nodes.items[idx].hash; + } + + /// Verify two nodes are structurally equal by comparing JSON source bytes. + /// Conservative: bytes-equal → guaranteed value-equal. Bytes-different → + /// might still be value-equal (pretty-print variance) but we treat as + /// not-equal and skip the dedup. Lossless either way. + pub fn verify(self: *const JsonTape, idx_a: u32, idx_b: u32) bool { + const a = self.nodes.items[idx_a]; + const b = self.nodes.items[idx_b]; + const a_src = self.input[a.src_start..a.src_end]; + const b_src = self.input[b.src_start..b.src_end]; + return std.mem.eql(u8, a_src, b_src); + } +}; + +fn decodeJsonString(alloc: Allocator, out: *std.ArrayList(u8), raw: []const u8) !void { + var i: usize = 0; + while (i < raw.len) { + const c = raw[i]; + if (c != '\\') { + try out.append(alloc, c); + i += 1; + continue; + } + if (i + 1 >= raw.len) return error.BadEscape; + const e = raw[i + 1]; + switch (e) { + '"' => try out.append(alloc, '"'), + '\\' => try out.append(alloc, '\\'), + '/' => try out.append(alloc, '/'), + 'b' => try out.append(alloc, 0x08), + 'f' => try out.append(alloc, 0x0c), + 'n' => try out.append(alloc, '\n'), + 'r' => try out.append(alloc, '\r'), + 't' => try out.append(alloc, '\t'), + 'u' => { + if (i + 6 > raw.len) return error.BadEscape; + const hex = raw[i + 2 .. i + 6]; + const cp = try std.fmt.parseInt(u21, hex, 16); + var buf: [4]u8 = undefined; + const n = try std.unicode.utf8Encode(cp, &buf); + try out.appendSlice(alloc, buf[0..n]); + i += 6; + continue; + }, + else => return error.BadEscape, + } + i += 2; + } +} + +// ============================================================================= +// Encoder helpers +// ============================================================================= + +fn b64Width(value: u64) u32 { + if (value == 0) return 0; + var n: u32 = 0; + var v = value; + while (v > 0) : (n += 1) v /= 64; + return n; +} + +fn writeB64(out: *std.ArrayList(u8), alloc: Allocator, value: u64) !void { + if (value == 0) return; + var temp: [16]u8 = undefined; + var i: usize = 0; + var v = value; + while (v > 0) : (i += 1) { + temp[i] = b64_chars[v % 64]; + v /= 64; + } + while (i > 0) { + i -= 1; + try out.append(alloc, temp[i]); + } +} + +fn zigzagEncode(n: i64) u64 { + const shifted = @as(u64, @bitCast(n)) << 1; + const sign = @as(u64, @bitCast(n >> 63)); + return shifted ^ sign; +} + +// ============================================================================= +// Encoder — generic over Source type (comptime dispatch, zero overhead) +// ============================================================================= + +const DedupEntry = struct { + src_idx: u32, // node index in source — used for verify() + output_offset: u32, + output_size: u32, +}; + +pub fn Encoder(comptime Source: type) type { + return struct { + alloc: Allocator, + source: *const Source, + output: std.ArrayList(u8) = .{}, + dedup: std.AutoHashMap(u64, DedupEntry), + + const Self = @This(); + + pub fn init(alloc: Allocator, source: *const Source) Self { + return .{ + .alloc = alloc, + .source = source, + .dedup = std.AutoHashMap(u64, DedupEntry).init(alloc), + }; + } + + pub fn deinit(self: *Self) void { + self.output.deinit(self.alloc); + self.dedup.deinit(); + } + + pub fn encode(self: *Self) ![]const u8 { + try self.emitNode(self.source.rootIdx()); + return self.output.items; + } + + fn emitNode(self: *Self, idx: u32) anyerror!void { + const k = self.source.kind(idx); + const node_hash = self.source.nodeHash(idx); + + // Cheap leaves never benefit from a pointer (^ + at-least-1-byte = 2+ bytes). + const skip_dedup = switch (k) { + .null_v, .true_v, .false_v => true, + else => false, + }; + + if (!skip_dedup) { + if (self.dedup.get(node_hash)) |existing| { + if (self.source.verify(idx, existing.src_idx)) { + const here: u32 = @intCast(self.output.items.len); + const delta: u32 = here - existing.output_offset; + const ptr_size = b64Width(@intCast(delta)) + 1; + if (ptr_size < existing.output_size) { + try self.output.append(self.alloc, '^'); + try writeB64(&self.output, self.alloc, @intCast(delta)); + return; + } + } + // Hash hit but verify failed (collision) or pointer not profitable. + // Either way, fall through to fresh emit. + } + } + + const before: u32 = @intCast(self.output.items.len); + try self.emitFresh(idx); + const size: u32 = @intCast(self.output.items.len - before); + + if (!skip_dedup and !self.dedup.contains(node_hash) and size > 2) { + try self.dedup.put(node_hash, .{ + .src_idx = idx, + .output_offset = before, + .output_size = size, + }); + } + } + + fn emitFresh(self: *Self, idx: u32) anyerror!void { + const k = self.source.kind(idx); + switch (k) { + .null_v => try self.output.appendSlice(self.alloc, "'n"), + .true_v => try self.output.appendSlice(self.alloc, "'t"), + .false_v => try self.output.appendSlice(self.alloc, "'f"), + .int_v => { + const v = self.source.intValue(idx); + try self.output.append(self.alloc, '+'); + try writeB64(&self.output, self.alloc, zigzagEncode(v)); + }, + .float_v => try self.emitFloat(self.source.floatValue(idx)), + .string_v => { + const body = self.source.stringBytes(idx); + try self.output.appendSlice(self.alloc, body); + try self.output.append(self.alloc, ','); + try writeB64(&self.output, self.alloc, body.len); + }, + .array_v => { + try self.output.append(self.alloc, '['); + const n = self.source.childCount(idx); + var i = n; + while (i > 0) { + i -= 1; + const ci = self.source.childAt(idx, i); + try self.emitNode(ci); + } + try self.output.append(self.alloc, ']'); + }, + .object_v => { + try self.output.append(self.alloc, '{'); + const n = self.source.childCount(idx); + // Children are alternating (key, value, key, value, ...) in input order. + // Emit pairs in REVERSE order (so R-to-L parse yields natural). + // Within a pair, value first L-to-R then key. + const pairs = n / 2; + var pi = pairs; + while (pi > 0) { + pi -= 1; + const ki = self.source.childAt(idx, pi * 2); + const vi = self.source.childAt(idx, pi * 2 + 1); + try self.emitNode(vi); + try self.emitNode(ki); + } + try self.output.append(self.alloc, '}'); + }, + } + } + + fn emitFloat(self: *Self, val: f64) !void { + if (std.math.isNan(val)) { + try self.output.appendSlice(self.alloc, "'nan"); + return; + } + if (val == std.math.inf(f64)) { + try self.output.appendSlice(self.alloc, "'inf"); + return; + } + if (val == -std.math.inf(f64)) { + try self.output.appendSlice(self.alloc, "'nif"); + return; + } + + // Decompose val ≈ base × 10^exp via scientific notation parsing. + // Format: <-?>.e<-?> + var buf: [64]u8 = undefined; + const text = try std.fmt.bufPrint(&buf, "{e}", .{val}); + + var i: usize = 0; + var sign_neg = false; + if (text.len > 0 and text[0] == '-') { + sign_neg = true; + i = 1; + } + + const int_start = i; + while (i < text.len and text[i] >= '0' and text[i] <= '9') : (i += 1) {} + const int_part = text[int_start..i]; + + var frac_part: []const u8 = ""; + if (i < text.len and text[i] == '.') { + i += 1; + const frac_start = i; + while (i < text.len and text[i] >= '0' and text[i] <= '9') : (i += 1) {} + frac_part = text[frac_start..i]; + } + + var exp10: i64 = 0; + if (i < text.len and (text[i] == 'e' or text[i] == 'E')) { + i += 1; + exp10 = try std.fmt.parseInt(i64, text[i..], 10); + } + + // Combine integer and fractional digit sequences, adjusting exponent. + var digits_buf: [64]u8 = undefined; + const total = int_part.len + frac_part.len; + if (total == 0 or total > digits_buf.len) { + // Fallback: truncate to integer + try self.output.append(self.alloc, '+'); + try writeB64(&self.output, self.alloc, zigzagEncode(@intFromFloat(val))); + return; + } + std.mem.copyForwards(u8, digits_buf[0..int_part.len], int_part); + std.mem.copyForwards(u8, digits_buf[int_part.len .. int_part.len + frac_part.len], frac_part); + const all_digits = digits_buf[0..total]; + + // Trim trailing zeros (folding back into exponent) + var trim_end = all_digits.len; + var trailing: i64 = 0; + while (trim_end > 1 and all_digits[trim_end - 1] == '0') { + trim_end -= 1; + trailing += 1; + } + const final_digits = all_digits[0..trim_end]; + const final_exp = exp10 - @as(i64, @intCast(frac_part.len)) + trailing; + + var base = std.fmt.parseInt(i64, final_digits, 10) catch { + try self.output.append(self.alloc, '+'); + try writeB64(&self.output, self.alloc, zigzagEncode(@intFromFloat(val))); + return; + }; + if (sign_neg) base = -base; + + try self.output.append(self.alloc, '+'); + try writeB64(&self.output, self.alloc, zigzagEncode(base)); + if (final_exp != 0) { + try self.output.append(self.alloc, '*'); + try writeB64(&self.output, self.alloc, zigzagEncode(final_exp)); + } + } + }; +} + +// ============================================================================= +// Main +// ============================================================================= + +pub fn main() !void { + var gpa: std.heap.GeneralPurposeAllocator(.{}) = .{}; + defer _ = gpa.deinit(); + const alloc = gpa.allocator(); + + const args = try std.process.argsAlloc(alloc); + defer std.process.argsFree(alloc, args); + + const input: []u8 = blk: { + if (args.len > 1) { + const file = try std.fs.cwd().openFile(args[1], .{}); + defer file.close(); + break :blk try file.readToEndAlloc(alloc, 1 << 30); + } else { + const stdin = std.fs.File.stdin(); + break :blk try stdin.readToEndAlloc(alloc, 1 << 30); + } + }; + defer alloc.free(input); + + var tape = JsonTape{ .alloc = alloc, .input = input }; + defer tape.deinit(); + try tape.parse(); + tape.computeHashes(); + + var encoder = Encoder(JsonTape).init(alloc, &tape); + defer encoder.deinit(); + const out = try encoder.encode(); + + const stdout = std.fs.File.stdout(); + try stdout.writeAll(out); +}