From fe04ddbad03654b9c086d4fbcc81a4380a3ad7f3 Mon Sep 17 00:00:00 2001 From: Lisa Date: Tue, 10 Mar 2026 01:29:29 +0100 Subject: [PATCH 1/3] feat: adaptive budget scaling, proportional entity caps, T2/T3 docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the binary budget step function (200/400) with linear 30% scaling clamped to 200–600. Entity cap now scales with content length (3–15) instead of a fixed 10. Both changes improve information preservation on long content and reduce noise on short content. Add JSDoc for confidence field, inferProseTier, and T2/T3 treatment in the compression pipeline. Update docs and benchmark baseline. --- bench/baselines/current.json | 106 +++++----- bench/baselines/history/v1.0.0.json | 106 +++++----- docs/benchmark-results.md | 302 ++++++++++++++-------------- docs/compression-pipeline.md | 6 +- docs/preservation-rules.md | 6 +- src/classify.ts | 21 ++ src/compress.ts | 16 +- tests/compress.test.ts | 59 +++++- 8 files changed, 351 insertions(+), 271 deletions(-) diff --git a/bench/baselines/current.json b/bench/baselines/current.json index 77bfa0d..2211d13 100644 --- a/bench/baselines/current.json +++ b/bench/baselines/current.json @@ -1,23 +1,23 @@ { "version": "1.0.0", - "generated": "2026-02-26T05:31:42.406Z", + "generated": "2026-03-10T00:15:20.299Z", "results": { "basic": { "Coding assistant": { - "ratio": 1.6812907904278462, - "tokenRatio": 1.6729559748427674, + "ratio": 1.9385451505016722, + "tokenRatio": 1.9275362318840579, "compressed": 5, "preserved": 8 }, "Long Q&A": { - "ratio": 6.158536585365853, - "tokenRatio": 6.114164904862579, + "ratio": 4.902912621359223, + "tokenRatio": 4.87689713322091, "compressed": 4, "preserved": 6 }, "Tool-heavy": { - "ratio": 1.2991563919532771, - "tokenRatio": 1.2946428571428572, + "ratio": 1.4128440366972477, + "tokenRatio": 1.4043583535108959, "compressed": 2, "preserved": 16 }, @@ -28,8 +28,8 @@ "preserved": 7 }, "Deep conversation": { - "ratio": 2.124913733609386, - "tokenRatio": 2.1241305510968433, + "ratio": 2.5041568769202964, + "tokenRatio": 2.4905897114178166, "compressed": 50, "preserved": 1 }, @@ -40,21 +40,21 @@ "preserved": 11 }, "Structured content": { - "ratio": 1.9338990620812864, - "tokenRatio": 1.9241486068111455, + "ratio": 1.8559794256322333, + "tokenRatio": 1.8469539375928679, "compressed": 2, "preserved": 10 }, "Agentic coding session": { - "ratio": 1.428351309707242, - "tokenRatio": 1.4258962011771001, + "ratio": 1.4768201370081249, + "tokenRatio": 1.4740044247787611, "compressed": 2, "preserved": 31 } }, "tokenBudget": { "Deep conversation|dedup=false": { - "tokenCount": 3738, + "tokenCount": 3188, "fits": false, "recencyWindow": 0, "compressed": 50, @@ -62,7 +62,7 @@ "deduped": 0 }, "Deep conversation|dedup=true": { - "tokenCount": 3738, + "tokenCount": 3188, "fits": false, "recencyWindow": 0, "compressed": 50, @@ -70,7 +70,7 @@ "deduped": 0 }, "Agentic coding session|dedup=false": { - "tokenCount": 2345, + "tokenCount": 2223, "fits": false, "recencyWindow": 0, "compressed": 4, @@ -78,7 +78,7 @@ "deduped": 0 }, "Agentic coding session|dedup=true": { - "tokenCount": 1957, + "tokenCount": 1900, "fits": true, "recencyWindow": 9, "compressed": 1, @@ -88,24 +88,24 @@ }, "dedup": { "Coding assistant": { - "rw0Base": 1.6812907904278462, - "rw0Dup": 1.6812907904278462, - "rw4Base": 1.5104234527687297, - "rw4Dup": 1.5104234527687297, + "rw0Base": 1.9385451505016722, + "rw0Dup": 1.9385451505016722, + "rw4Base": 1.6061655697956356, + "rw4Dup": 1.6061655697956356, "deduped": 0 }, "Long Q&A": { - "rw0Base": 5.139949109414759, - "rw0Dup": 6.158536585365853, - "rw4Base": 1.9024298361273309, - "rw4Dup": 2.0264847512038524, + "rw0Base": 4, + "rw0Dup": 4.902912621359223, + "rw4Base": 1.76296037702915, + "rw4Dup": 1.918693009118541, "deduped": 1 }, "Tool-heavy": { - "rw0Base": 1.2991563919532771, - "rw0Dup": 1.2991563919532771, - "rw4Base": 1.2991563919532771, - "rw4Dup": 1.2991563919532771, + "rw0Base": 1.4128440366972477, + "rw0Dup": 1.4128440366972477, + "rw4Base": 1.4128440366972477, + "rw4Dup": 1.4128440366972477, "deduped": 0 }, "Short conversation": { @@ -116,10 +116,10 @@ "deduped": 0 }, "Deep conversation": { - "rw0Base": 2.124913733609386, - "rw0Dup": 2.124913733609386, - "rw4Base": 1.9527165104643789, - "rw4Dup": 1.9527165104643789, + "rw0Base": 2.5041568769202964, + "rw0Dup": 2.5041568769202964, + "rw4Base": 2.2394536932277354, + "rw4Dup": 2.2394536932277354, "deduped": 0 }, "Technical explanation": { @@ -130,17 +130,17 @@ "deduped": 0 }, "Structured content": { - "rw0Base": 1.9338990620812864, - "rw0Dup": 1.9338990620812864, - "rw4Base": 1.373730964467005, - "rw4Dup": 1.373730964467005, + "rw0Base": 1.8559794256322333, + "rw0Dup": 1.8559794256322333, + "rw4Base": 1.3339494762784967, + "rw4Dup": 1.3339494762784967, "deduped": 0 }, "Agentic coding session": { - "rw0Base": 1.1374233128834357, - "rw0Dup": 1.428351309707242, - "rw4Base": 1.1374233128834357, - "rw4Dup": 1.428351309707242, + "rw0Base": 1.2001553599171413, + "rw0Dup": 1.4768201370081249, + "rw4Base": 1.2001553599171413, + "rw4Dup": 1.4768201370081249, "deduped": 4 } }, @@ -148,17 +148,17 @@ "Coding assistant": { "exact": 0, "fuzzy": 0, - "ratio": 1.6812907904278462 + "ratio": 1.9385451505016722 }, "Long Q&A": { "exact": 1, "fuzzy": 0, - "ratio": 6.158536585365853 + "ratio": 4.902912621359223 }, "Tool-heavy": { "exact": 0, "fuzzy": 0, - "ratio": 1.2991563919532771 + "ratio": 1.4128440366972477 }, "Short conversation": { "exact": 0, @@ -168,7 +168,7 @@ "Deep conversation": { "exact": 0, "fuzzy": 0, - "ratio": 2.124913733609386 + "ratio": 2.5041568769202964 }, "Technical explanation": { "exact": 0, @@ -178,22 +178,22 @@ "Structured content": { "exact": 0, "fuzzy": 0, - "ratio": 1.9338990620812864 + "ratio": 1.8559794256322333 }, "Agentic coding session": { "exact": 4, "fuzzy": 2, - "ratio": 2.229973538609574 + "ratio": 2.3504056795131847 } }, "bundleSize": { "classify.js": { - "bytes": 7724, - "gzipBytes": 3250 + "bytes": 8074, + "gzipBytes": 3443 }, "compress.js": { - "bytes": 33941, - "gzipBytes": 8721 + "bytes": 34466, + "gzipBytes": 8914 }, "dedup.js": { "bytes": 10260, @@ -216,8 +216,8 @@ "gzipBytes": 31 }, "total": { - "bytes": 57498, - "gzipBytes": 16952 + "bytes": 58373, + "gzipBytes": 17338 } } } diff --git a/bench/baselines/history/v1.0.0.json b/bench/baselines/history/v1.0.0.json index 77bfa0d..2211d13 100644 --- a/bench/baselines/history/v1.0.0.json +++ b/bench/baselines/history/v1.0.0.json @@ -1,23 +1,23 @@ { "version": "1.0.0", - "generated": "2026-02-26T05:31:42.406Z", + "generated": "2026-03-10T00:15:20.299Z", "results": { "basic": { "Coding assistant": { - "ratio": 1.6812907904278462, - "tokenRatio": 1.6729559748427674, + "ratio": 1.9385451505016722, + "tokenRatio": 1.9275362318840579, "compressed": 5, "preserved": 8 }, "Long Q&A": { - "ratio": 6.158536585365853, - "tokenRatio": 6.114164904862579, + "ratio": 4.902912621359223, + "tokenRatio": 4.87689713322091, "compressed": 4, "preserved": 6 }, "Tool-heavy": { - "ratio": 1.2991563919532771, - "tokenRatio": 1.2946428571428572, + "ratio": 1.4128440366972477, + "tokenRatio": 1.4043583535108959, "compressed": 2, "preserved": 16 }, @@ -28,8 +28,8 @@ "preserved": 7 }, "Deep conversation": { - "ratio": 2.124913733609386, - "tokenRatio": 2.1241305510968433, + "ratio": 2.5041568769202964, + "tokenRatio": 2.4905897114178166, "compressed": 50, "preserved": 1 }, @@ -40,21 +40,21 @@ "preserved": 11 }, "Structured content": { - "ratio": 1.9338990620812864, - "tokenRatio": 1.9241486068111455, + "ratio": 1.8559794256322333, + "tokenRatio": 1.8469539375928679, "compressed": 2, "preserved": 10 }, "Agentic coding session": { - "ratio": 1.428351309707242, - "tokenRatio": 1.4258962011771001, + "ratio": 1.4768201370081249, + "tokenRatio": 1.4740044247787611, "compressed": 2, "preserved": 31 } }, "tokenBudget": { "Deep conversation|dedup=false": { - "tokenCount": 3738, + "tokenCount": 3188, "fits": false, "recencyWindow": 0, "compressed": 50, @@ -62,7 +62,7 @@ "deduped": 0 }, "Deep conversation|dedup=true": { - "tokenCount": 3738, + "tokenCount": 3188, "fits": false, "recencyWindow": 0, "compressed": 50, @@ -70,7 +70,7 @@ "deduped": 0 }, "Agentic coding session|dedup=false": { - "tokenCount": 2345, + "tokenCount": 2223, "fits": false, "recencyWindow": 0, "compressed": 4, @@ -78,7 +78,7 @@ "deduped": 0 }, "Agentic coding session|dedup=true": { - "tokenCount": 1957, + "tokenCount": 1900, "fits": true, "recencyWindow": 9, "compressed": 1, @@ -88,24 +88,24 @@ }, "dedup": { "Coding assistant": { - "rw0Base": 1.6812907904278462, - "rw0Dup": 1.6812907904278462, - "rw4Base": 1.5104234527687297, - "rw4Dup": 1.5104234527687297, + "rw0Base": 1.9385451505016722, + "rw0Dup": 1.9385451505016722, + "rw4Base": 1.6061655697956356, + "rw4Dup": 1.6061655697956356, "deduped": 0 }, "Long Q&A": { - "rw0Base": 5.139949109414759, - "rw0Dup": 6.158536585365853, - "rw4Base": 1.9024298361273309, - "rw4Dup": 2.0264847512038524, + "rw0Base": 4, + "rw0Dup": 4.902912621359223, + "rw4Base": 1.76296037702915, + "rw4Dup": 1.918693009118541, "deduped": 1 }, "Tool-heavy": { - "rw0Base": 1.2991563919532771, - "rw0Dup": 1.2991563919532771, - "rw4Base": 1.2991563919532771, - "rw4Dup": 1.2991563919532771, + "rw0Base": 1.4128440366972477, + "rw0Dup": 1.4128440366972477, + "rw4Base": 1.4128440366972477, + "rw4Dup": 1.4128440366972477, "deduped": 0 }, "Short conversation": { @@ -116,10 +116,10 @@ "deduped": 0 }, "Deep conversation": { - "rw0Base": 2.124913733609386, - "rw0Dup": 2.124913733609386, - "rw4Base": 1.9527165104643789, - "rw4Dup": 1.9527165104643789, + "rw0Base": 2.5041568769202964, + "rw0Dup": 2.5041568769202964, + "rw4Base": 2.2394536932277354, + "rw4Dup": 2.2394536932277354, "deduped": 0 }, "Technical explanation": { @@ -130,17 +130,17 @@ "deduped": 0 }, "Structured content": { - "rw0Base": 1.9338990620812864, - "rw0Dup": 1.9338990620812864, - "rw4Base": 1.373730964467005, - "rw4Dup": 1.373730964467005, + "rw0Base": 1.8559794256322333, + "rw0Dup": 1.8559794256322333, + "rw4Base": 1.3339494762784967, + "rw4Dup": 1.3339494762784967, "deduped": 0 }, "Agentic coding session": { - "rw0Base": 1.1374233128834357, - "rw0Dup": 1.428351309707242, - "rw4Base": 1.1374233128834357, - "rw4Dup": 1.428351309707242, + "rw0Base": 1.2001553599171413, + "rw0Dup": 1.4768201370081249, + "rw4Base": 1.2001553599171413, + "rw4Dup": 1.4768201370081249, "deduped": 4 } }, @@ -148,17 +148,17 @@ "Coding assistant": { "exact": 0, "fuzzy": 0, - "ratio": 1.6812907904278462 + "ratio": 1.9385451505016722 }, "Long Q&A": { "exact": 1, "fuzzy": 0, - "ratio": 6.158536585365853 + "ratio": 4.902912621359223 }, "Tool-heavy": { "exact": 0, "fuzzy": 0, - "ratio": 1.2991563919532771 + "ratio": 1.4128440366972477 }, "Short conversation": { "exact": 0, @@ -168,7 +168,7 @@ "Deep conversation": { "exact": 0, "fuzzy": 0, - "ratio": 2.124913733609386 + "ratio": 2.5041568769202964 }, "Technical explanation": { "exact": 0, @@ -178,22 +178,22 @@ "Structured content": { "exact": 0, "fuzzy": 0, - "ratio": 1.9338990620812864 + "ratio": 1.8559794256322333 }, "Agentic coding session": { "exact": 4, "fuzzy": 2, - "ratio": 2.229973538609574 + "ratio": 2.3504056795131847 } }, "bundleSize": { "classify.js": { - "bytes": 7724, - "gzipBytes": 3250 + "bytes": 8074, + "gzipBytes": 3443 }, "compress.js": { - "bytes": 33941, - "gzipBytes": 8721 + "bytes": 34466, + "gzipBytes": 8914 }, "dedup.js": { "bytes": 10260, @@ -216,8 +216,8 @@ "gzipBytes": 31 }, "total": { - "bytes": 57498, - "gzipBytes": 16952 + "bytes": 58373, + "gzipBytes": 17338 } } } diff --git a/docs/benchmark-results.md b/docs/benchmark-results.md index 458513a..277effc 100644 --- a/docs/benchmark-results.md +++ b/docs/benchmark-results.md @@ -2,19 +2,19 @@ [Back to README](../README.md) | [All docs](README.md) | [Handbook](benchmarks.md) -_Auto-generated by `npm run bench:save`. Do not edit manually._ +*Auto-generated by `npm run bench:save`. Do not edit manually.* -**v1.0.0** · Generated: 2026-02-26 +**v1.0.0** · Generated: 2026-03-10 -![avg ratio](https://img.shields.io/badge/avg%20ratio-2.08x-blue) ![best](https://img.shields.io/badge/best-6.16x-blue) ![scenarios](https://img.shields.io/badge/scenarios-8-blue) ![round-trip](https://img.shields.io/badge/round--trip-all_PASS-brightgreen) ![gzip](https://img.shields.io/badge/gzip-16.6%20KB-blue) +![avg ratio](https://img.shields.io/badge/avg%20ratio-2.01x-blue) ![best](https://img.shields.io/badge/best-4.90x-blue) ![scenarios](https://img.shields.io/badge/scenarios-8-blue) ![round-trip](https://img.shields.io/badge/round--trip-all_PASS-brightgreen) ![gzip](https://img.shields.io/badge/gzip-16.9%20KB-blue) ## Summary -| Metric | Value | -| -------------------- | -------- | -| Scenarios | 8 | -| Average compression | 2.08x | -| Best compression | 6.16x | +| Metric | Value | +| --- | --- | +| Scenarios | 8 | +| Average compression | 2.01x | +| Best compression | 4.90x | | Round-trip integrity | all PASS | ```mermaid @@ -25,26 +25,26 @@ pie title "Message Outcomes" ## Compression by Scenario -> **8 scenarios** · **2.08x** avg ratio · **1.00x** – **6.16x** range · all round-trips PASS +> **8 scenarios** · **2.01x** avg ratio · **1.00x** – **4.90x** range · all round-trips PASS ```mermaid xychart-beta title "Compression Ratio by Scenario" x-axis ["Coding", "Long Q&A", "Tool-heavy", "Short", "Deep", "Technical", "Structured", "Agentic"] y-axis "Char Ratio" - bar [1.68, 6.16, 1.30, 1.00, 2.12, 1.00, 1.93, 1.43] + bar [1.94, 4.90, 1.41, 1.00, 2.50, 1.00, 1.86, 1.48] ``` -| Scenario | Ratio | Reduction | Token Ratio | Messages | Compressed | Preserved | -| ---------------------- | ----: | --------: | ----------: | -------: | ---------: | --------: | -| Coding assistant | 1.68 | 41% | 1.67 | 13 | 5 | 8 | -| Long Q&A | 6.16 | 84% | 6.11 | 10 | 4 | 6 | -| Tool-heavy | 1.30 | 23% | 1.29 | 18 | 2 | 16 | -| Short conversation | 1.00 | 0% | 1.00 | 7 | 0 | 7 | -| Deep conversation | 2.12 | 53% | 2.12 | 51 | 50 | 1 | -| Technical explanation | 1.00 | 0% | 1.00 | 11 | 0 | 11 | -| Structured content | 1.93 | 48% | 1.92 | 12 | 2 | 10 | -| Agentic coding session | 1.43 | 30% | 1.43 | 33 | 2 | 31 | +| Scenario | Ratio | Reduction | Token Ratio | Messages | Compressed | Preserved | +| --- | ---: | ---: | ---: | ---: | ---: | ---: | +| Coding assistant | 1.94 | 48% | 1.93 | 13 | 5 | 8 | +| Long Q&A | 4.90 | 80% | 4.88 | 10 | 4 | 6 | +| Tool-heavy | 1.41 | 29% | 1.40 | 18 | 2 | 16 | +| Short conversation | 1.00 | 0% | 1.00 | 7 | 0 | 7 | +| Deep conversation | 2.50 | 60% | 2.49 | 51 | 50 | 1 | +| Technical explanation | 1.00 | 0% | 1.00 | 11 | 0 | 11 | +| Structured content | 1.86 | 46% | 1.85 | 12 | 2 | 10 | +| Agentic coding session | 1.48 | 32% | 1.47 | 33 | 2 | 31 | ## Deduplication Impact @@ -53,61 +53,61 @@ xychart-beta title "Deduplication Impact (recencyWindow=0)" x-axis ["Long Q&A", "Agentic"] y-axis "Char Ratio" - bar [5.14, 1.14] - bar [6.16, 1.43] + bar [4.00, 1.20] + bar [4.90, 1.48] ``` -_First bar: no dedup · Second bar: with dedup_ +*First bar: no dedup · Second bar: with dedup* -| Scenario | No Dedup (rw=0) | Dedup (rw=0) | No Dedup (rw=4) | Dedup (rw=4) | Deduped | -| ---------------------- | --------------: | -----------: | --------------: | -----------: | ------: | -| Coding assistant | 1.68 | 1.68 | 1.51 | 1.51 | 0 | -| Long Q&A | 5.14 | 6.16 | 1.90 | 2.03 | 1 | -| Tool-heavy | 1.30 | 1.30 | 1.30 | 1.30 | 0 | -| Short conversation | 1.00 | 1.00 | 1.00 | 1.00 | 0 | -| Deep conversation | 2.12 | 2.12 | 1.95 | 1.95 | 0 | -| Technical explanation | 1.00 | 1.00 | 1.00 | 1.00 | 0 | -| Structured content | 1.93 | 1.93 | 1.37 | 1.37 | 0 | -| Agentic coding session | 1.14 | 1.43 | 1.14 | 1.43 | 4 | +| Scenario | No Dedup (rw=0) | Dedup (rw=0) | No Dedup (rw=4) | Dedup (rw=4) | Deduped | +| --- | ---: | ---: | ---: | ---: | ---: | +| Coding assistant | 1.94 | 1.94 | 1.61 | 1.61 | 0 | +| Long Q&A | 4.00 | 4.90 | 1.76 | 1.92 | 1 | +| Tool-heavy | 1.41 | 1.41 | 1.41 | 1.41 | 0 | +| Short conversation | 1.00 | 1.00 | 1.00 | 1.00 | 0 | +| Deep conversation | 2.50 | 2.50 | 2.24 | 2.24 | 0 | +| Technical explanation | 1.00 | 1.00 | 1.00 | 1.00 | 0 | +| Structured content | 1.86 | 1.86 | 1.33 | 1.33 | 0 | +| Agentic coding session | 1.20 | 1.48 | 1.20 | 1.48 | 4 | ### Fuzzy Dedup -| Scenario | Exact Deduped | Fuzzy Deduped | Ratio | vs Base | -| ---------------------- | ------------: | ------------: | ----: | ------: | -| Coding assistant | 0 | 0 | 1.68 | - | -| Long Q&A | 1 | 0 | 6.16 | - | -| Tool-heavy | 0 | 0 | 1.30 | - | -| Short conversation | 0 | 0 | 1.00 | - | -| Deep conversation | 0 | 0 | 2.12 | - | -| Technical explanation | 0 | 0 | 1.00 | - | -| Structured content | 0 | 0 | 1.93 | - | -| Agentic coding session | 4 | 2 | 2.23 | +56% | +| Scenario | Exact Deduped | Fuzzy Deduped | Ratio | vs Base | +| --- | ---: | ---: | ---: | ---: | +| Coding assistant | 0 | 0 | 1.94 | - | +| Long Q&A | 1 | 0 | 4.90 | - | +| Tool-heavy | 0 | 0 | 1.41 | - | +| Short conversation | 0 | 0 | 1.00 | - | +| Deep conversation | 0 | 0 | 2.50 | - | +| Technical explanation | 0 | 0 | 1.00 | - | +| Structured content | 0 | 0 | 1.86 | - | +| Agentic coding session | 4 | 2 | 2.35 | +59% | ## Token Budget Target: **2000 tokens** · 1/4 fit -| Scenario | Dedup | Tokens | Fits | recencyWindow | Compressed | Preserved | Deduped | -| ---------------------- | ----- | -----: | ---- | ------------: | ---------: | --------: | ------: | -| Deep conversation | no | 3738 | no | 0 | 50 | 1 | 0 | -| Deep conversation | yes | 3738 | no | 0 | 50 | 1 | 0 | -| Agentic coding session | no | 2345 | no | 0 | 4 | 33 | 0 | -| Agentic coding session | yes | 1957 | yes | 9 | 1 | 32 | 4 | +| Scenario | Dedup | Tokens | Fits | recencyWindow | Compressed | Preserved | Deduped | +| --- | --- | ---: | --- | ---: | ---: | ---: | ---: | +| Deep conversation | no | 3188 | no | 0 | 50 | 1 | 0 | +| Deep conversation | yes | 3188 | no | 0 | 50 | 1 | 0 | +| Agentic coding session | no | 2223 | no | 0 | 4 | 33 | 0 | +| Agentic coding session | yes | 1900 | yes | 9 | 1 | 32 | 4 | ## Bundle Size > Zero-dependency ESM library — tracked per-file to catch regressions. -| File | Size | Gzip | -| ------------- | ------: | ------: | -| classify.js | 7.5 KB | 3.2 KB | -| compress.js | 33.1 KB | 8.5 KB | -| dedup.js | 10.0 KB | 2.8 KB | -| expand.js | 2.7 KB | 934 B | -| index.js | 225 B | 159 B | -| summarizer.js | 2.5 KB | 993 B | -| types.js | 11 B | 31 B | -| **total** | 56.2 KB | 16.6 KB | +| File | Size | Gzip | +| --- | ---: | ---: | +| classify.js | 7.9 KB | 3.4 KB | +| compress.js | 33.7 KB | 8.7 KB | +| dedup.js | 10.0 KB | 2.8 KB | +| expand.js | 2.7 KB | 934 B | +| index.js | 225 B | 159 B | +| summarizer.js | 2.5 KB | 993 B | +| types.js | 11 B | 31 B | +| **total** | 57.0 KB | 16.9 KB | ## LLM vs Deterministic @@ -116,26 +116,26 @@ Target: **2000 tokens** · 1/4 fit ``` Deterministic vs ollama/llama3.2 -Coding assistant Det ████████░░░░░░░░░░░░░░░░░░░░░░ 1.68x - LLM ████████░░░░░░░░░░░░░░░░░░░░░░ 1.55x +Coding assistant Det ████████████░░░░░░░░░░░░░░░░░░ 1.94x + LLM █████████░░░░░░░░░░░░░░░░░░░░░ 1.55x -Long Q&A Det ██████████████████████████████ 6.16x - LLM ██████████████████████░░░░░░░░ 4.49x +Long Q&A Det ██████████████████████████████ 4.90x + LLM ███████████████████████████░░░ 4.49x -Tool-heavy Det ██████░░░░░░░░░░░░░░░░░░░░░░░░ 1.30x - LLM ██████░░░░░░░░░░░░░░░░░░░░░░░░ 1.28x +Tool-heavy Det █████████░░░░░░░░░░░░░░░░░░░░░ 1.41x + LLM ████████░░░░░░░░░░░░░░░░░░░░░░ 1.28x -Deep conversation Det ██████████░░░░░░░░░░░░░░░░░░░░ 2.12x - LLM ████████████████░░░░░░░░░░░░░░ 3.28x ★ +Deep conversation Det ███████████████░░░░░░░░░░░░░░░ 2.50x + LLM ████████████████████░░░░░░░░░░ 3.28x ★ -Technical explanation Det █████░░░░░░░░░░░░░░░░░░░░░░░░░ 1.00x - LLM █████░░░░░░░░░░░░░░░░░░░░░░░░░ 1.00x +Technical explanation Det ██████░░░░░░░░░░░░░░░░░░░░░░░░ 1.00x + LLM ██████░░░░░░░░░░░░░░░░░░░░░░░░ 1.00x -Structured content Det █████████░░░░░░░░░░░░░░░░░░░░░ 1.93x - LLM ███████░░░░░░░░░░░░░░░░░░░░░░░ 1.46x +Structured content Det ███████████░░░░░░░░░░░░░░░░░░░ 1.86x + LLM █████████░░░░░░░░░░░░░░░░░░░░░ 1.46x -Agentic coding session Det ███████░░░░░░░░░░░░░░░░░░░░░░░ 1.43x - LLM ███████░░░░░░░░░░░░░░░░░░░░░░░ 1.40x +Agentic coding session Det █████████░░░░░░░░░░░░░░░░░░░░░ 1.48x + LLM █████████░░░░░░░░░░░░░░░░░░░░░ 1.40x ★ = LLM wins ``` @@ -143,36 +143,36 @@ Agentic coding session Det ███████░░░░░░░░░░ ``` Deterministic vs openai/gpt-4.1-mini -Coding assistant Det ████████░░░░░░░░░░░░░░░░░░░░░░ 1.68x - LLM ████████░░░░░░░░░░░░░░░░░░░░░░ 1.64x +Coding assistant Det ███████████░░░░░░░░░░░░░░░░░░░ 1.94x + LLM █████████░░░░░░░░░░░░░░░░░░░░░ 1.64x -Long Q&A Det ██████████████████████████████ 6.16x - LLM ██████████████████████████░░░░ 5.37x +Long Q&A Det ███████████████████████████░░░ 4.90x + LLM ██████████████████████████████ 5.37x ★ -Tool-heavy Det ██████░░░░░░░░░░░░░░░░░░░░░░░░ 1.30x - LLM █████░░░░░░░░░░░░░░░░░░░░░░░░░ 1.12x +Tool-heavy Det ████████░░░░░░░░░░░░░░░░░░░░░░ 1.41x + LLM ██████░░░░░░░░░░░░░░░░░░░░░░░░ 1.12x -Deep conversation Det ██████████░░░░░░░░░░░░░░░░░░░░ 2.12x - LLM ████████████░░░░░░░░░░░░░░░░░░ 2.37x ★ +Deep conversation Det ██████████████░░░░░░░░░░░░░░░░ 2.50x + LLM █████████████░░░░░░░░░░░░░░░░░ 2.37x -Technical explanation Det █████░░░░░░░░░░░░░░░░░░░░░░░░░ 1.00x - LLM █████░░░░░░░░░░░░░░░░░░░░░░░░░ 1.00x +Technical explanation Det ██████░░░░░░░░░░░░░░░░░░░░░░░░ 1.00x + LLM ██████░░░░░░░░░░░░░░░░░░░░░░░░ 1.00x -Structured content Det █████████░░░░░░░░░░░░░░░░░░░░░ 1.93x - LLM ██████░░░░░░░░░░░░░░░░░░░░░░░░ 1.29x +Structured content Det ██████████░░░░░░░░░░░░░░░░░░░░ 1.86x + LLM ███████░░░░░░░░░░░░░░░░░░░░░░░ 1.29x -Agentic coding session Det ███████░░░░░░░░░░░░░░░░░░░░░░░ 1.43x - LLM ███████░░░░░░░░░░░░░░░░░░░░░░░ 1.43x +Agentic coding session Det ████████░░░░░░░░░░░░░░░░░░░░░░ 1.48x + LLM ████████░░░░░░░░░░░░░░░░░░░░░░ 1.43x ★ = LLM wins ``` ### Provider Summary -| Provider | Model | Avg Ratio | Avg vsDet | Round-trip | Budget Fits | Avg Time | -| -------- | ------------ | --------: | --------: | ---------- | ----------- | -------: | -| ollama | llama3.2 | 2.09x | 0.96 | all PASS | 1/4 | 4.2s | -| openai | gpt-4.1-mini | 2.09x | 0.92 | all PASS | 2/4 | 8.1s | +| Provider | Model | Avg Ratio | Avg vsDet | Round-trip | Budget Fits | Avg Time | +| --- | --- | ---: | ---: | --- | --- | ---: | +| ollama | llama3.2 | 2.09x | 0.96 | all PASS | 1/4 | 4.2s | +| openai | gpt-4.1-mini | 2.09x | 0.92 | all PASS | 2/4 | 8.1s | > **Key findings:** > LLM wins on prose-heavy scenarios: Deep conversation, Technical explanation @@ -180,85 +180,85 @@ Agentic coding session Det ███████░░░░░░░░░░ ### ollama (llama3.2) -_Generated: 2026-02-25_ +*Generated: 2026-02-25*
Scenario details -| Scenario | Method | Char Ratio | Token Ratio | vsDet | Compressed | Preserved | Round-trip | Time | -| ---------------------- | ------------- | ---------: | ----------: | ----: | ---------: | --------: | ---------- | ----: | -| Coding assistant | deterministic | 1.68 | 1.67 | - | 5 | 8 | PASS | 0ms | -| | llm-basic | 1.48 | 1.48 | 0.88 | 5 | 8 | PASS | 5.9s | -| | llm-escalate | 1.55 | 1.55 | 0.92 | 5 | 8 | PASS | 3.0s | -| Long Q&A | deterministic | 6.16 | 6.11 | - | 4 | 6 | PASS | 1ms | -| | llm-basic | 4.31 | 4.28 | 0.70 | 4 | 6 | PASS | 4.1s | -| | llm-escalate | 4.49 | 4.46 | 0.73 | 4 | 6 | PASS | 3.7s | -| Tool-heavy | deterministic | 1.30 | 1.29 | - | 2 | 16 | PASS | 2ms | -| | llm-basic | 1.12 | 1.11 | 0.86 | 2 | 16 | PASS | 2.3s | -| | llm-escalate | 1.28 | 1.28 | 0.99 | 2 | 16 | PASS | 2.8s | -| Deep conversation | deterministic | 2.12 | 2.12 | - | 50 | 1 | PASS | 3ms | -| | llm-basic | 3.12 | 3.11 | 1.47 | 50 | 1 | PASS | 22.7s | -| | llm-escalate | 3.28 | 3.26 | 1.54 | 50 | 1 | PASS | 23.3s | -| Technical explanation | deterministic | 1.00 | 1.00 | - | 0 | 11 | PASS | 1ms | -| | llm-basic | 1.00 | 1.00 | 1.00 | 0 | 11 | PASS | 3.2s | -| | llm-escalate | 1.00 | 1.00 | 1.00 | 2 | 9 | PASS | 785ms | -| Structured content | deterministic | 1.93 | 1.92 | - | 2 | 10 | PASS | 0ms | -| | llm-basic | 1.46 | 1.45 | 0.75 | 2 | 10 | PASS | 3.5s | -| | llm-escalate | 1.38 | 1.38 | 0.71 | 2 | 10 | PASS | 3.7s | -| Agentic coding session | deterministic | 1.43 | 1.43 | - | 2 | 31 | PASS | 1ms | -| | llm-basic | 1.35 | 1.34 | 0.94 | 2 | 31 | PASS | 3.3s | -| | llm-escalate | 1.40 | 1.40 | 0.98 | 2 | 31 | PASS | 5.4s | +| Scenario | Method | Char Ratio | Token Ratio | vsDet | Compressed | Preserved | Round-trip | Time | +| --- | --- | ---: | ---: | ---: | ---: | ---: | --- | ---: | +| Coding assistant | deterministic | 1.68 | 1.67 | - | 5 | 8 | PASS | 0ms | +| | llm-basic | 1.48 | 1.48 | 0.88 | 5 | 8 | PASS | 5.9s | +| | llm-escalate | 1.55 | 1.55 | 0.92 | 5 | 8 | PASS | 3.0s | +| Long Q&A | deterministic | 6.16 | 6.11 | - | 4 | 6 | PASS | 1ms | +| | llm-basic | 4.31 | 4.28 | 0.70 | 4 | 6 | PASS | 4.1s | +| | llm-escalate | 4.49 | 4.46 | 0.73 | 4 | 6 | PASS | 3.7s | +| Tool-heavy | deterministic | 1.30 | 1.29 | - | 2 | 16 | PASS | 2ms | +| | llm-basic | 1.12 | 1.11 | 0.86 | 2 | 16 | PASS | 2.3s | +| | llm-escalate | 1.28 | 1.28 | 0.99 | 2 | 16 | PASS | 2.8s | +| Deep conversation | deterministic | 2.12 | 2.12 | - | 50 | 1 | PASS | 3ms | +| | llm-basic | 3.12 | 3.11 | 1.47 | 50 | 1 | PASS | 22.7s | +| | llm-escalate | 3.28 | 3.26 | 1.54 | 50 | 1 | PASS | 23.3s | +| Technical explanation | deterministic | 1.00 | 1.00 | - | 0 | 11 | PASS | 1ms | +| | llm-basic | 1.00 | 1.00 | 1.00 | 0 | 11 | PASS | 3.2s | +| | llm-escalate | 1.00 | 1.00 | 1.00 | 2 | 9 | PASS | 785ms | +| Structured content | deterministic | 1.93 | 1.92 | - | 2 | 10 | PASS | 0ms | +| | llm-basic | 1.46 | 1.45 | 0.75 | 2 | 10 | PASS | 3.5s | +| | llm-escalate | 1.38 | 1.38 | 0.71 | 2 | 10 | PASS | 3.7s | +| Agentic coding session | deterministic | 1.43 | 1.43 | - | 2 | 31 | PASS | 1ms | +| | llm-basic | 1.35 | 1.34 | 0.94 | 2 | 31 | PASS | 3.3s | +| | llm-escalate | 1.40 | 1.40 | 0.98 | 2 | 31 | PASS | 5.4s | #### Token Budget (target: 2000 tokens) -| Scenario | Method | Tokens | Fits | recencyWindow | Ratio | Round-trip | Time | -| ---------------------- | ------------- | -----: | ----- | ------------: | ----: | ---------- | -----: | -| Deep conversation | deterministic | 3738 | false | 0 | 2.12 | PASS | 12ms | -| | llm-escalate | 2593 | false | 0 | 3.08 | PASS | 132.0s | -| Agentic coding session | deterministic | 1957 | true | 9 | 1.36 | PASS | 2ms | -| | llm-escalate | 2003 | false | 9 | 1.33 | PASS | 4.1s | +| Scenario | Method | Tokens | Fits | recencyWindow | Ratio | Round-trip | Time | +| --- | --- | ---: | --- | ---: | ---: | --- | ---: | +| Deep conversation | deterministic | 3738 | false | 0 | 2.12 | PASS | 12ms | +| | llm-escalate | 2593 | false | 0 | 3.08 | PASS | 132.0s | +| Agentic coding session | deterministic | 1957 | true | 9 | 1.36 | PASS | 2ms | +| | llm-escalate | 2003 | false | 9 | 1.33 | PASS | 4.1s |
### openai (gpt-4.1-mini) -_Generated: 2026-02-25_ +*Generated: 2026-02-25*
Scenario details -| Scenario | Method | Char Ratio | Token Ratio | vsDet | Compressed | Preserved | Round-trip | Time | -| ---------------------- | ------------- | ---------: | ----------: | ----: | ---------: | --------: | ---------- | ----: | -| Coding assistant | deterministic | 1.68 | 1.67 | - | 5 | 8 | PASS | 0ms | -| | llm-basic | 1.64 | 1.63 | 0.98 | 5 | 8 | PASS | 5.6s | -| | llm-escalate | 1.63 | 1.63 | 0.97 | 5 | 8 | PASS | 6.0s | -| Long Q&A | deterministic | 6.16 | 6.11 | - | 4 | 6 | PASS | 1ms | -| | llm-basic | 5.37 | 5.33 | 0.87 | 4 | 6 | PASS | 5.9s | -| | llm-escalate | 5.35 | 5.31 | 0.87 | 4 | 6 | PASS | 7.0s | -| Tool-heavy | deterministic | 1.30 | 1.29 | - | 2 | 16 | PASS | 0ms | -| | llm-basic | 1.11 | 1.10 | 0.85 | 2 | 16 | PASS | 3.5s | -| | llm-escalate | 1.12 | 1.12 | 0.86 | 2 | 16 | PASS | 5.3s | -| Deep conversation | deterministic | 2.12 | 2.12 | - | 50 | 1 | PASS | 3ms | -| | llm-basic | 2.34 | 2.33 | 1.10 | 50 | 1 | PASS | 50.4s | -| | llm-escalate | 2.37 | 2.36 | 1.11 | 50 | 1 | PASS | 50.8s | -| Technical explanation | deterministic | 1.00 | 1.00 | - | 0 | 11 | PASS | 1ms | -| | llm-basic | 1.00 | 1.00 | 1.00 | 1 | 10 | PASS | 2.6s | -| | llm-escalate | 1.00 | 1.00 | 1.00 | 1 | 10 | PASS | 3.3s | -| Structured content | deterministic | 1.93 | 1.92 | - | 2 | 10 | PASS | 0ms | -| | llm-basic | 1.23 | 1.23 | 0.64 | 2 | 10 | PASS | 10.2s | -| | llm-escalate | 1.29 | 1.29 | 0.67 | 2 | 10 | PASS | 4.8s | -| Agentic coding session | deterministic | 1.43 | 1.43 | - | 2 | 31 | PASS | 1ms | -| | llm-basic | 1.43 | 1.43 | 1.00 | 2 | 31 | PASS | 5.8s | -| | llm-escalate | 1.32 | 1.32 | 0.93 | 1 | 32 | PASS | 9.5s | +| Scenario | Method | Char Ratio | Token Ratio | vsDet | Compressed | Preserved | Round-trip | Time | +| --- | --- | ---: | ---: | ---: | ---: | ---: | --- | ---: | +| Coding assistant | deterministic | 1.68 | 1.67 | - | 5 | 8 | PASS | 0ms | +| | llm-basic | 1.64 | 1.63 | 0.98 | 5 | 8 | PASS | 5.6s | +| | llm-escalate | 1.63 | 1.63 | 0.97 | 5 | 8 | PASS | 6.0s | +| Long Q&A | deterministic | 6.16 | 6.11 | - | 4 | 6 | PASS | 1ms | +| | llm-basic | 5.37 | 5.33 | 0.87 | 4 | 6 | PASS | 5.9s | +| | llm-escalate | 5.35 | 5.31 | 0.87 | 4 | 6 | PASS | 7.0s | +| Tool-heavy | deterministic | 1.30 | 1.29 | - | 2 | 16 | PASS | 0ms | +| | llm-basic | 1.11 | 1.10 | 0.85 | 2 | 16 | PASS | 3.5s | +| | llm-escalate | 1.12 | 1.12 | 0.86 | 2 | 16 | PASS | 5.3s | +| Deep conversation | deterministic | 2.12 | 2.12 | - | 50 | 1 | PASS | 3ms | +| | llm-basic | 2.34 | 2.33 | 1.10 | 50 | 1 | PASS | 50.4s | +| | llm-escalate | 2.37 | 2.36 | 1.11 | 50 | 1 | PASS | 50.8s | +| Technical explanation | deterministic | 1.00 | 1.00 | - | 0 | 11 | PASS | 1ms | +| | llm-basic | 1.00 | 1.00 | 1.00 | 1 | 10 | PASS | 2.6s | +| | llm-escalate | 1.00 | 1.00 | 1.00 | 1 | 10 | PASS | 3.3s | +| Structured content | deterministic | 1.93 | 1.92 | - | 2 | 10 | PASS | 0ms | +| | llm-basic | 1.23 | 1.23 | 0.64 | 2 | 10 | PASS | 10.2s | +| | llm-escalate | 1.29 | 1.29 | 0.67 | 2 | 10 | PASS | 4.8s | +| Agentic coding session | deterministic | 1.43 | 1.43 | - | 2 | 31 | PASS | 1ms | +| | llm-basic | 1.43 | 1.43 | 1.00 | 2 | 31 | PASS | 5.8s | +| | llm-escalate | 1.32 | 1.32 | 0.93 | 1 | 32 | PASS | 9.5s | #### Token Budget (target: 2000 tokens) -| Scenario | Method | Tokens | Fits | recencyWindow | Ratio | Round-trip | Time | -| ---------------------- | ------------- | -----: | ----- | ------------: | ----: | ---------- | -----: | -| Deep conversation | deterministic | 3738 | false | 0 | 2.12 | PASS | 10ms | -| | llm-escalate | 3391 | false | 0 | 2.35 | PASS | 280.5s | -| Agentic coding session | deterministic | 1957 | true | 9 | 1.36 | PASS | 2ms | -| | llm-escalate | 1915 | true | 3 | 1.39 | PASS | 28.1s | +| Scenario | Method | Tokens | Fits | recencyWindow | Ratio | Round-trip | Time | +| --- | --- | ---: | --- | ---: | ---: | --- | ---: | +| Deep conversation | deterministic | 3738 | false | 0 | 2.12 | PASS | 10ms | +| | llm-escalate | 3391 | false | 0 | 2.35 | PASS | 280.5s | +| Agentic coding session | deterministic | 1957 | true | 9 | 1.36 | PASS | 2ms | +| | llm-escalate | 1915 | true | 3 | 1.39 | PASS | 28.1s |
diff --git a/docs/compression-pipeline.md b/docs/compression-pipeline.md index f894cd4..da9c5ec 100644 --- a/docs/compression-pipeline.md +++ b/docs/compression-pipeline.md @@ -100,7 +100,7 @@ The `summarize` function uses sentence scoring: 5. Re-sort selected sentences by original position to preserve reading order 6. Join with `...` separator -Budget: 200 chars if input < 600 chars, 400 chars otherwise. +Budget scales adaptively: max(200, min(round(length × 0.3), 600)). Short content gets 200 chars, long content up to 600. ### Entity extraction @@ -111,14 +111,14 @@ After summarizing, `extractEntities` pulls out key identifiers from the original - Vowelless abbreviations - Numbers with units/context -Up to 10 entities are appended as `| entities: foo, bar, baz`. +Entities scale with content length (3–15) and are appended as `| entities: foo, bar, baz`. ### Code-split processing Messages containing code fences with significant prose (>= 80 chars) get split: 1. `splitCodeAndProse` extracts code fences and surrounding prose separately -2. Prose is summarized (budget: 200 if < 600 chars, else 400) +2. Prose is summarized (budget scales adaptively with prose length) 3. Code fences are preserved verbatim 4. Result: `[summary: ...]\n\n```code here```` diff --git a/docs/preservation-rules.md b/docs/preservation-rules.md index 1060e07..bba9bdf 100644 --- a/docs/preservation-rules.md +++ b/docs/preservation-rules.md @@ -68,11 +68,11 @@ Soft T0 content is still compressible because the entity extraction step capture ### T2 — Short prose -Prose under 20 words. Currently treated the same as T3 in the compression pipeline. +Prose under 20 words. Treated identically to T3 in the current deterministic pipeline — the distinction is preserved for future LLM classifier integration, which can apply lighter compression to short prose. ### T3 — Long prose -Prose of 20+ words. The primary target for summarization. +Prose of 20+ words. The primary target for summarization. Treated identically to T2 in the current pipeline; the LLM classifier will use the T2/T3 distinction for tier-specific strategies. ## API key detection @@ -103,7 +103,7 @@ SQL detection uses a tiered anchor system to avoid false positives on English pr Messages with code fences and significant prose (>= 80 chars) are split: 1. Code fences are extracted verbatim -2. Surrounding prose is summarized (budget: 200 chars if < 600 chars, 400 otherwise) +2. Surrounding prose is summarized (budget scales adaptively: 200–600 chars based on prose length) 3. Result: summary + preserved code fences If the total prose is < 80 chars, the entire message is preserved (not enough prose to justify splitting). diff --git a/src/classify.ts b/src/classify.ts index 6e5f5cd..68794fa 100644 --- a/src/classify.ts +++ b/src/classify.ts @@ -1,5 +1,18 @@ export type ClassifyResult = { decision: 'T0' | 'T2' | 'T3'; + /** + * Classification confidence (0–1). Higher values indicate stronger signal. + * + * For T0: starts at 0.70, increases by 0.05 per additional structural reason + * (capped at 0.95). Multiple overlapping signals → higher confidence. + * For T2/T3: fixed at 0.65 (pure prose heuristic, no structural anchors). + * + * The deterministic pipeline does not route on confidence — it uses the + * hard/soft T0 distinction instead. Consumers can use confidence for custom + * routing (e.g. only compress below a threshold), monitoring dashboards, + * or LLM classifier fallback decisions (cf. Amazon Science "Label with + * Confidence" for confidence-weighted routing patterns). + */ confidence: number; reasons: string[]; }; @@ -189,6 +202,14 @@ function detectContentTypes(text: string): { // -- Tier heuristic for clean prose -- +/** + * Assign T2 (short prose, < 20 words) or T3 (long prose, >= 20 words). + * + * Both tiers are compressed identically in the current deterministic pipeline. + * The distinction exists so a future LLM classifier can apply different + * strategies per tier — e.g. lighter summarization for T2 or aggressive + * compression for verbose T3 content. + */ function inferProseTier(text: string): 'T2' | 'T3' { const words = text.split(/\s+/).length; if (words < 20) return 'T2'; diff --git a/src/compress.ts b/src/compress.ts index b77b72c..6c09c03 100644 --- a/src/compress.ts +++ b/src/compress.ts @@ -330,6 +330,10 @@ const COMMON_STARTERS = new Set([ 'Into', ]); +function computeBudget(contentLength: number): number { + return Math.max(200, Math.min(Math.round(contentLength * 0.3), 600)); +} + function extractEntities(text: string): string[] { const entities = new Set(); @@ -376,8 +380,8 @@ function extractEntities(text: string): string[] { for (const n of numbersCtx) entities.add(n.trim()); } - // Cap at 10 - return Array.from(entities).slice(0, 10); + const maxEntities = Math.max(3, Math.min(Math.round(text.length / 200), 15)); + return Array.from(entities).slice(0, maxEntities); } function splitCodeAndProse(text: string): Array<{ type: 'prose' | 'code'; content: string }> { @@ -572,6 +576,10 @@ function classifyAll( } return { msg, preserved: true }; } + // T2 (short prose) and T3 (long prose) are intentionally treated identically + // in the current pipeline — both go through the same summarization path. + // The distinction is preserved for future LLM classifier integration, which + // can apply different strategies per tier (e.g. lighter compression for T2). if (content) { const cls = classifyMessage(content); if (cls.decision === 'T0') { @@ -735,7 +743,7 @@ function* compressGen( .map((s) => s.content) .join(' '); const codeFences = segments.filter((s) => s.type === 'code').map((s) => s.content); - const proseBudget = proseText.length < 600 ? 200 : 400; + const proseBudget = computeBudget(proseText.length); const summaryText: string = yield { text: proseText, budget: proseBudget }; const embeddedId = options.embedSummaryId ? makeSummaryId([msg.id]) : undefined; const compressed = `${formatSummary(summaryText, proseText, undefined, true, embeddedId)}\n\n${codeFences.join('\n\n')}`; @@ -762,7 +770,7 @@ function* compressGen( const allContent = group .map((g) => (typeof g.msg.content === 'string' ? g.msg.content : '')) .join(' '); - const contentBudget = allContent.length < 600 ? 200 : 400; + const contentBudget = computeBudget(allContent.length); const summaryText = isStructuredOutput(allContent) ? summarizeStructured(allContent, contentBudget) : yield { text: allContent, budget: contentBudget }; diff --git a/tests/compress.test.ts b/tests/compress.test.ts index 822cccc..a664acb 100644 --- a/tests/compress.test.ts +++ b/tests/compress.test.ts @@ -760,7 +760,7 @@ describe('compress', () => { expect(content).toContain('authentication module'); }); - it('budget ceiling at 400 chars', () => { + it('adaptive budget ceiling scales with content length', () => { const sentences = Array.from( { length: 20 }, (_, i) => `Sentence number ${i + 1} provides additional context about the deployment.`, @@ -771,7 +771,8 @@ describe('compress', () => { const result = compress(messages, { recencyWindow: 0 }); const match = result.messages[0].content!.match(/\[summary: (.*?)(?:\s*\(|\s*\||\])/); expect(match).toBeTruthy(); - expect(match![1].length).toBeLessThanOrEqual(400); + // ~3900 chars content → computeBudget = 600 + expect(match![1].length).toBeLessThanOrEqual(600); }); it('weights PASS/FAIL/ERROR status words higher', () => { @@ -878,7 +879,7 @@ describe('compress', () => { expect(content).toContain('grpc'); }); - it('caps entities at 10', () => { + it('caps entities proportionally to content length', () => { const text = 'Alice Bob Charlie Dave Eve Frank Grace Heidi Ivan Judy Karl Liam Mallory spoke about getUserData fetchItems parseConfig with user_id auth_token db_name cache_key log_level queue_size worker_count and 5 retries and 10 seconds. '.repeat( 3, @@ -889,7 +890,57 @@ describe('compress', () => { const entitiesMatch = content.match(/entities: ([^\]]+)/); expect(entitiesMatch).toBeTruthy(); const entityList = entitiesMatch![1].split(', '); - expect(entityList.length).toBeLessThanOrEqual(10); + // ~684 chars → cap = max(3, min(round(684/200), 15)) = 3 + expect(entityList.length).toBeLessThanOrEqual(3); + }); + + it('allows more entities for longer content', () => { + const text = + 'Alice Bob Charlie Dave Eve Frank Grace Heidi Ivan Judy Karl Liam Mallory spoke about getUserData fetchItems parseConfig with user_id auth_token db_name cache_key log_level queue_size worker_count and 5 retries and 10 seconds. '.repeat( + 12, + ); + const messages: Message[] = [msg({ id: '1', index: 0, role: 'user', content: text })]; + const result = compress(messages, { recencyWindow: 0 }); + const content = result.messages[0].content!; + const entitiesMatch = content.match(/entities: ([^\]]+)/); + expect(entitiesMatch).toBeTruthy(); + const entityList = entitiesMatch![1].split(', '); + // ~2736 chars → cap = max(3, min(round(2736/200), 15)) = 14 + expect(entityList.length).toBeGreaterThan(3); + expect(entityList.length).toBeLessThanOrEqual(15); + }); + }); + + describe('adaptive budget scaling', () => { + it('short content gets a small budget (≤ 200 chars)', () => { + // ~500 chars of prose → computeBudget(500) = 200 + const text = + 'The deployment process starts by pulling the latest Docker image from the registry and running pre-flight checks. '.repeat( + 4, + ); + expect(text.length).toBeLessThan(667); + const messages: Message[] = [msg({ id: '1', index: 0, role: 'user', content: text })]; + const result = compress(messages, { recencyWindow: 0 }); + const match = result.messages[0].content!.match(/\[summary: (.*?)(?:\s*\(|\s*\||\])/); + expect(match).toBeTruthy(); + expect(match![1].length).toBeLessThanOrEqual(200); + }); + + it('long content gets a larger budget (≤ 600 and > 200 chars)', () => { + // ~2400 chars of diverse prose → computeBudget(2400) = 600 + const sentences = Array.from( + { length: 30 }, + (_, i) => + `Step ${i + 1} in the deployment pipeline involves running integration tests against the staging environment.`, + ).join(' '); + expect(sentences.length).toBeGreaterThan(2000); + const messages: Message[] = [msg({ id: '1', index: 0, role: 'user', content: sentences })]; + const result = compress(messages, { recencyWindow: 0 }); + const match = result.messages[0].content!.match(/\[summary: (.*?)(?:\s*\(|\s*\||\])/); + expect(match).toBeTruthy(); + expect(match![1].length).toBeLessThanOrEqual(600); + // Budget is 600 so the summarizer has room for > 200 chars + expect(match![1].length).toBeGreaterThan(200); }); }); From c5f55aea7529a7a3248399bc7288f9cd8cf4de2c Mon Sep 17 00:00:00 2001 From: Lisa Date: Tue, 10 Mar 2026 01:47:10 +0100 Subject: [PATCH 2/3] fix(test): update stale 400-char budget assertions to adaptive values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two test names and assertions still referenced the old fixed 400-char budget ceiling. Updated to match computeBudget() output for their respective content lengths (1000 chars → 300, 1675 chars → 503). --- tests/compress.test.ts | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/compress.test.ts b/tests/compress.test.ts index a664acb..6b5512a 100644 --- a/tests/compress.test.ts +++ b/tests/compress.test.ts @@ -684,14 +684,13 @@ describe('compress', () => { expect(content).toContain('Express'); }); - it('caps at 400 chars when no punctuation', () => { - const noPunct = 'word '.repeat(200); // 1000 chars, no sentence-ending punctuation + it('caps at adaptive budget when no punctuation', () => { + const noPunct = 'word '.repeat(200); // 1000 chars → computeBudget = 300 const messages: Message[] = [msg({ id: '1', index: 0, role: 'user', content: noPunct })]; const result = compress(messages, { recencyWindow: 0 }); - // The summary text (between [summary: and the suffix) should not exceed 400 chars const match = result.messages[0].content!.match(/\[summary: (.*?)(?:\s*\(|\s*\||\])/); expect(match).toBeTruthy(); - expect(match![1].length).toBeLessThanOrEqual(400); + expect(match![1].length).toBeLessThanOrEqual(300); }); it('includes first substantive + last sentence', () => { @@ -719,19 +718,20 @@ describe('compress', () => { expect(content).toContain('Sure thing'); }); - it('hard caps overall summary at 400 chars', () => { + it('hard caps overall summary at adaptive budget', () => { // Use non-hex chars to avoid triggering hash_or_sha T0 detection const longSentence = 'Wor '.repeat(50) + 'is the architecture we chose for this particular deployment. '; const text = longSentence + 'The last sentence describes the final outcome of this deployment strategy.'; + // ~1675 chars → computeBudget = 503 const messages: Message[] = [ msg({ id: '1', index: 0, role: 'user', content: text.repeat(5) }), ]; const result = compress(messages, { recencyWindow: 0 }); const match = result.messages[0].content!.match(/\[summary: (.*?)(?:\s*\(|\s*\||\])/); expect(match).toBeTruthy(); - expect(match![1].length).toBeLessThanOrEqual(400); + expect(match![1].length).toBeLessThanOrEqual(503); }); it('extracts content from multiple paragraphs', () => { From 292321cb9fb485c14c79028dfd7398f2b6cb091e Mon Sep 17 00:00:00 2001 From: Lisa Date: Tue, 10 Mar 2026 01:50:21 +0100 Subject: [PATCH 3/3] style: apply Prettier formatting to benchmark-results.md --- docs/benchmark-results.md | 234 +++++++++++++++++++------------------- 1 file changed, 117 insertions(+), 117 deletions(-) diff --git a/docs/benchmark-results.md b/docs/benchmark-results.md index 277effc..f9d1d66 100644 --- a/docs/benchmark-results.md +++ b/docs/benchmark-results.md @@ -2,7 +2,7 @@ [Back to README](../README.md) | [All docs](README.md) | [Handbook](benchmarks.md) -*Auto-generated by `npm run bench:save`. Do not edit manually.* +_Auto-generated by `npm run bench:save`. Do not edit manually._ **v1.0.0** · Generated: 2026-03-10 @@ -10,11 +10,11 @@ ## Summary -| Metric | Value | -| --- | --- | -| Scenarios | 8 | -| Average compression | 2.01x | -| Best compression | 4.90x | +| Metric | Value | +| -------------------- | -------- | +| Scenarios | 8 | +| Average compression | 2.01x | +| Best compression | 4.90x | | Round-trip integrity | all PASS | ```mermaid @@ -35,16 +35,16 @@ xychart-beta bar [1.94, 4.90, 1.41, 1.00, 2.50, 1.00, 1.86, 1.48] ``` -| Scenario | Ratio | Reduction | Token Ratio | Messages | Compressed | Preserved | -| --- | ---: | ---: | ---: | ---: | ---: | ---: | -| Coding assistant | 1.94 | 48% | 1.93 | 13 | 5 | 8 | -| Long Q&A | 4.90 | 80% | 4.88 | 10 | 4 | 6 | -| Tool-heavy | 1.41 | 29% | 1.40 | 18 | 2 | 16 | -| Short conversation | 1.00 | 0% | 1.00 | 7 | 0 | 7 | -| Deep conversation | 2.50 | 60% | 2.49 | 51 | 50 | 1 | -| Technical explanation | 1.00 | 0% | 1.00 | 11 | 0 | 11 | -| Structured content | 1.86 | 46% | 1.85 | 12 | 2 | 10 | -| Agentic coding session | 1.48 | 32% | 1.47 | 33 | 2 | 31 | +| Scenario | Ratio | Reduction | Token Ratio | Messages | Compressed | Preserved | +| ---------------------- | ----: | --------: | ----------: | -------: | ---------: | --------: | +| Coding assistant | 1.94 | 48% | 1.93 | 13 | 5 | 8 | +| Long Q&A | 4.90 | 80% | 4.88 | 10 | 4 | 6 | +| Tool-heavy | 1.41 | 29% | 1.40 | 18 | 2 | 16 | +| Short conversation | 1.00 | 0% | 1.00 | 7 | 0 | 7 | +| Deep conversation | 2.50 | 60% | 2.49 | 51 | 50 | 1 | +| Technical explanation | 1.00 | 0% | 1.00 | 11 | 0 | 11 | +| Structured content | 1.86 | 46% | 1.85 | 12 | 2 | 10 | +| Agentic coding session | 1.48 | 32% | 1.47 | 33 | 2 | 31 | ## Deduplication Impact @@ -57,57 +57,57 @@ xychart-beta bar [4.90, 1.48] ``` -*First bar: no dedup · Second bar: with dedup* +_First bar: no dedup · Second bar: with dedup_ -| Scenario | No Dedup (rw=0) | Dedup (rw=0) | No Dedup (rw=4) | Dedup (rw=4) | Deduped | -| --- | ---: | ---: | ---: | ---: | ---: | -| Coding assistant | 1.94 | 1.94 | 1.61 | 1.61 | 0 | -| Long Q&A | 4.00 | 4.90 | 1.76 | 1.92 | 1 | -| Tool-heavy | 1.41 | 1.41 | 1.41 | 1.41 | 0 | -| Short conversation | 1.00 | 1.00 | 1.00 | 1.00 | 0 | -| Deep conversation | 2.50 | 2.50 | 2.24 | 2.24 | 0 | -| Technical explanation | 1.00 | 1.00 | 1.00 | 1.00 | 0 | -| Structured content | 1.86 | 1.86 | 1.33 | 1.33 | 0 | -| Agentic coding session | 1.20 | 1.48 | 1.20 | 1.48 | 4 | +| Scenario | No Dedup (rw=0) | Dedup (rw=0) | No Dedup (rw=4) | Dedup (rw=4) | Deduped | +| ---------------------- | --------------: | -----------: | --------------: | -----------: | ------: | +| Coding assistant | 1.94 | 1.94 | 1.61 | 1.61 | 0 | +| Long Q&A | 4.00 | 4.90 | 1.76 | 1.92 | 1 | +| Tool-heavy | 1.41 | 1.41 | 1.41 | 1.41 | 0 | +| Short conversation | 1.00 | 1.00 | 1.00 | 1.00 | 0 | +| Deep conversation | 2.50 | 2.50 | 2.24 | 2.24 | 0 | +| Technical explanation | 1.00 | 1.00 | 1.00 | 1.00 | 0 | +| Structured content | 1.86 | 1.86 | 1.33 | 1.33 | 0 | +| Agentic coding session | 1.20 | 1.48 | 1.20 | 1.48 | 4 | ### Fuzzy Dedup -| Scenario | Exact Deduped | Fuzzy Deduped | Ratio | vs Base | -| --- | ---: | ---: | ---: | ---: | -| Coding assistant | 0 | 0 | 1.94 | - | -| Long Q&A | 1 | 0 | 4.90 | - | -| Tool-heavy | 0 | 0 | 1.41 | - | -| Short conversation | 0 | 0 | 1.00 | - | -| Deep conversation | 0 | 0 | 2.50 | - | -| Technical explanation | 0 | 0 | 1.00 | - | -| Structured content | 0 | 0 | 1.86 | - | -| Agentic coding session | 4 | 2 | 2.35 | +59% | +| Scenario | Exact Deduped | Fuzzy Deduped | Ratio | vs Base | +| ---------------------- | ------------: | ------------: | ----: | ------: | +| Coding assistant | 0 | 0 | 1.94 | - | +| Long Q&A | 1 | 0 | 4.90 | - | +| Tool-heavy | 0 | 0 | 1.41 | - | +| Short conversation | 0 | 0 | 1.00 | - | +| Deep conversation | 0 | 0 | 2.50 | - | +| Technical explanation | 0 | 0 | 1.00 | - | +| Structured content | 0 | 0 | 1.86 | - | +| Agentic coding session | 4 | 2 | 2.35 | +59% | ## Token Budget Target: **2000 tokens** · 1/4 fit -| Scenario | Dedup | Tokens | Fits | recencyWindow | Compressed | Preserved | Deduped | -| --- | --- | ---: | --- | ---: | ---: | ---: | ---: | -| Deep conversation | no | 3188 | no | 0 | 50 | 1 | 0 | -| Deep conversation | yes | 3188 | no | 0 | 50 | 1 | 0 | -| Agentic coding session | no | 2223 | no | 0 | 4 | 33 | 0 | -| Agentic coding session | yes | 1900 | yes | 9 | 1 | 32 | 4 | +| Scenario | Dedup | Tokens | Fits | recencyWindow | Compressed | Preserved | Deduped | +| ---------------------- | ----- | -----: | ---- | ------------: | ---------: | --------: | ------: | +| Deep conversation | no | 3188 | no | 0 | 50 | 1 | 0 | +| Deep conversation | yes | 3188 | no | 0 | 50 | 1 | 0 | +| Agentic coding session | no | 2223 | no | 0 | 4 | 33 | 0 | +| Agentic coding session | yes | 1900 | yes | 9 | 1 | 32 | 4 | ## Bundle Size > Zero-dependency ESM library — tracked per-file to catch regressions. -| File | Size | Gzip | -| --- | ---: | ---: | -| classify.js | 7.9 KB | 3.4 KB | -| compress.js | 33.7 KB | 8.7 KB | -| dedup.js | 10.0 KB | 2.8 KB | -| expand.js | 2.7 KB | 934 B | -| index.js | 225 B | 159 B | -| summarizer.js | 2.5 KB | 993 B | -| types.js | 11 B | 31 B | -| **total** | 57.0 KB | 16.9 KB | +| File | Size | Gzip | +| ------------- | ------: | ------: | +| classify.js | 7.9 KB | 3.4 KB | +| compress.js | 33.7 KB | 8.7 KB | +| dedup.js | 10.0 KB | 2.8 KB | +| expand.js | 2.7 KB | 934 B | +| index.js | 225 B | 159 B | +| summarizer.js | 2.5 KB | 993 B | +| types.js | 11 B | 31 B | +| **total** | 57.0 KB | 16.9 KB | ## LLM vs Deterministic @@ -169,10 +169,10 @@ Agentic coding session Det ████████░░░░░░░░░ ### Provider Summary -| Provider | Model | Avg Ratio | Avg vsDet | Round-trip | Budget Fits | Avg Time | -| --- | --- | ---: | ---: | --- | --- | ---: | -| ollama | llama3.2 | 2.09x | 0.96 | all PASS | 1/4 | 4.2s | -| openai | gpt-4.1-mini | 2.09x | 0.92 | all PASS | 2/4 | 8.1s | +| Provider | Model | Avg Ratio | Avg vsDet | Round-trip | Budget Fits | Avg Time | +| -------- | ------------ | --------: | --------: | ---------- | ----------- | -------: | +| ollama | llama3.2 | 2.09x | 0.96 | all PASS | 1/4 | 4.2s | +| openai | gpt-4.1-mini | 2.09x | 0.92 | all PASS | 2/4 | 8.1s | > **Key findings:** > LLM wins on prose-heavy scenarios: Deep conversation, Technical explanation @@ -180,85 +180,85 @@ Agentic coding session Det ████████░░░░░░░░░ ### ollama (llama3.2) -*Generated: 2026-02-25* +_Generated: 2026-02-25_
Scenario details -| Scenario | Method | Char Ratio | Token Ratio | vsDet | Compressed | Preserved | Round-trip | Time | -| --- | --- | ---: | ---: | ---: | ---: | ---: | --- | ---: | -| Coding assistant | deterministic | 1.68 | 1.67 | - | 5 | 8 | PASS | 0ms | -| | llm-basic | 1.48 | 1.48 | 0.88 | 5 | 8 | PASS | 5.9s | -| | llm-escalate | 1.55 | 1.55 | 0.92 | 5 | 8 | PASS | 3.0s | -| Long Q&A | deterministic | 6.16 | 6.11 | - | 4 | 6 | PASS | 1ms | -| | llm-basic | 4.31 | 4.28 | 0.70 | 4 | 6 | PASS | 4.1s | -| | llm-escalate | 4.49 | 4.46 | 0.73 | 4 | 6 | PASS | 3.7s | -| Tool-heavy | deterministic | 1.30 | 1.29 | - | 2 | 16 | PASS | 2ms | -| | llm-basic | 1.12 | 1.11 | 0.86 | 2 | 16 | PASS | 2.3s | -| | llm-escalate | 1.28 | 1.28 | 0.99 | 2 | 16 | PASS | 2.8s | -| Deep conversation | deterministic | 2.12 | 2.12 | - | 50 | 1 | PASS | 3ms | -| | llm-basic | 3.12 | 3.11 | 1.47 | 50 | 1 | PASS | 22.7s | -| | llm-escalate | 3.28 | 3.26 | 1.54 | 50 | 1 | PASS | 23.3s | -| Technical explanation | deterministic | 1.00 | 1.00 | - | 0 | 11 | PASS | 1ms | -| | llm-basic | 1.00 | 1.00 | 1.00 | 0 | 11 | PASS | 3.2s | -| | llm-escalate | 1.00 | 1.00 | 1.00 | 2 | 9 | PASS | 785ms | -| Structured content | deterministic | 1.93 | 1.92 | - | 2 | 10 | PASS | 0ms | -| | llm-basic | 1.46 | 1.45 | 0.75 | 2 | 10 | PASS | 3.5s | -| | llm-escalate | 1.38 | 1.38 | 0.71 | 2 | 10 | PASS | 3.7s | -| Agentic coding session | deterministic | 1.43 | 1.43 | - | 2 | 31 | PASS | 1ms | -| | llm-basic | 1.35 | 1.34 | 0.94 | 2 | 31 | PASS | 3.3s | -| | llm-escalate | 1.40 | 1.40 | 0.98 | 2 | 31 | PASS | 5.4s | +| Scenario | Method | Char Ratio | Token Ratio | vsDet | Compressed | Preserved | Round-trip | Time | +| ---------------------- | ------------- | ---------: | ----------: | ----: | ---------: | --------: | ---------- | ----: | +| Coding assistant | deterministic | 1.68 | 1.67 | - | 5 | 8 | PASS | 0ms | +| | llm-basic | 1.48 | 1.48 | 0.88 | 5 | 8 | PASS | 5.9s | +| | llm-escalate | 1.55 | 1.55 | 0.92 | 5 | 8 | PASS | 3.0s | +| Long Q&A | deterministic | 6.16 | 6.11 | - | 4 | 6 | PASS | 1ms | +| | llm-basic | 4.31 | 4.28 | 0.70 | 4 | 6 | PASS | 4.1s | +| | llm-escalate | 4.49 | 4.46 | 0.73 | 4 | 6 | PASS | 3.7s | +| Tool-heavy | deterministic | 1.30 | 1.29 | - | 2 | 16 | PASS | 2ms | +| | llm-basic | 1.12 | 1.11 | 0.86 | 2 | 16 | PASS | 2.3s | +| | llm-escalate | 1.28 | 1.28 | 0.99 | 2 | 16 | PASS | 2.8s | +| Deep conversation | deterministic | 2.12 | 2.12 | - | 50 | 1 | PASS | 3ms | +| | llm-basic | 3.12 | 3.11 | 1.47 | 50 | 1 | PASS | 22.7s | +| | llm-escalate | 3.28 | 3.26 | 1.54 | 50 | 1 | PASS | 23.3s | +| Technical explanation | deterministic | 1.00 | 1.00 | - | 0 | 11 | PASS | 1ms | +| | llm-basic | 1.00 | 1.00 | 1.00 | 0 | 11 | PASS | 3.2s | +| | llm-escalate | 1.00 | 1.00 | 1.00 | 2 | 9 | PASS | 785ms | +| Structured content | deterministic | 1.93 | 1.92 | - | 2 | 10 | PASS | 0ms | +| | llm-basic | 1.46 | 1.45 | 0.75 | 2 | 10 | PASS | 3.5s | +| | llm-escalate | 1.38 | 1.38 | 0.71 | 2 | 10 | PASS | 3.7s | +| Agentic coding session | deterministic | 1.43 | 1.43 | - | 2 | 31 | PASS | 1ms | +| | llm-basic | 1.35 | 1.34 | 0.94 | 2 | 31 | PASS | 3.3s | +| | llm-escalate | 1.40 | 1.40 | 0.98 | 2 | 31 | PASS | 5.4s | #### Token Budget (target: 2000 tokens) -| Scenario | Method | Tokens | Fits | recencyWindow | Ratio | Round-trip | Time | -| --- | --- | ---: | --- | ---: | ---: | --- | ---: | -| Deep conversation | deterministic | 3738 | false | 0 | 2.12 | PASS | 12ms | -| | llm-escalate | 2593 | false | 0 | 3.08 | PASS | 132.0s | -| Agentic coding session | deterministic | 1957 | true | 9 | 1.36 | PASS | 2ms | -| | llm-escalate | 2003 | false | 9 | 1.33 | PASS | 4.1s | +| Scenario | Method | Tokens | Fits | recencyWindow | Ratio | Round-trip | Time | +| ---------------------- | ------------- | -----: | ----- | ------------: | ----: | ---------- | -----: | +| Deep conversation | deterministic | 3738 | false | 0 | 2.12 | PASS | 12ms | +| | llm-escalate | 2593 | false | 0 | 3.08 | PASS | 132.0s | +| Agentic coding session | deterministic | 1957 | true | 9 | 1.36 | PASS | 2ms | +| | llm-escalate | 2003 | false | 9 | 1.33 | PASS | 4.1s |
### openai (gpt-4.1-mini) -*Generated: 2026-02-25* +_Generated: 2026-02-25_
Scenario details -| Scenario | Method | Char Ratio | Token Ratio | vsDet | Compressed | Preserved | Round-trip | Time | -| --- | --- | ---: | ---: | ---: | ---: | ---: | --- | ---: | -| Coding assistant | deterministic | 1.68 | 1.67 | - | 5 | 8 | PASS | 0ms | -| | llm-basic | 1.64 | 1.63 | 0.98 | 5 | 8 | PASS | 5.6s | -| | llm-escalate | 1.63 | 1.63 | 0.97 | 5 | 8 | PASS | 6.0s | -| Long Q&A | deterministic | 6.16 | 6.11 | - | 4 | 6 | PASS | 1ms | -| | llm-basic | 5.37 | 5.33 | 0.87 | 4 | 6 | PASS | 5.9s | -| | llm-escalate | 5.35 | 5.31 | 0.87 | 4 | 6 | PASS | 7.0s | -| Tool-heavy | deterministic | 1.30 | 1.29 | - | 2 | 16 | PASS | 0ms | -| | llm-basic | 1.11 | 1.10 | 0.85 | 2 | 16 | PASS | 3.5s | -| | llm-escalate | 1.12 | 1.12 | 0.86 | 2 | 16 | PASS | 5.3s | -| Deep conversation | deterministic | 2.12 | 2.12 | - | 50 | 1 | PASS | 3ms | -| | llm-basic | 2.34 | 2.33 | 1.10 | 50 | 1 | PASS | 50.4s | -| | llm-escalate | 2.37 | 2.36 | 1.11 | 50 | 1 | PASS | 50.8s | -| Technical explanation | deterministic | 1.00 | 1.00 | - | 0 | 11 | PASS | 1ms | -| | llm-basic | 1.00 | 1.00 | 1.00 | 1 | 10 | PASS | 2.6s | -| | llm-escalate | 1.00 | 1.00 | 1.00 | 1 | 10 | PASS | 3.3s | -| Structured content | deterministic | 1.93 | 1.92 | - | 2 | 10 | PASS | 0ms | -| | llm-basic | 1.23 | 1.23 | 0.64 | 2 | 10 | PASS | 10.2s | -| | llm-escalate | 1.29 | 1.29 | 0.67 | 2 | 10 | PASS | 4.8s | -| Agentic coding session | deterministic | 1.43 | 1.43 | - | 2 | 31 | PASS | 1ms | -| | llm-basic | 1.43 | 1.43 | 1.00 | 2 | 31 | PASS | 5.8s | -| | llm-escalate | 1.32 | 1.32 | 0.93 | 1 | 32 | PASS | 9.5s | +| Scenario | Method | Char Ratio | Token Ratio | vsDet | Compressed | Preserved | Round-trip | Time | +| ---------------------- | ------------- | ---------: | ----------: | ----: | ---------: | --------: | ---------- | ----: | +| Coding assistant | deterministic | 1.68 | 1.67 | - | 5 | 8 | PASS | 0ms | +| | llm-basic | 1.64 | 1.63 | 0.98 | 5 | 8 | PASS | 5.6s | +| | llm-escalate | 1.63 | 1.63 | 0.97 | 5 | 8 | PASS | 6.0s | +| Long Q&A | deterministic | 6.16 | 6.11 | - | 4 | 6 | PASS | 1ms | +| | llm-basic | 5.37 | 5.33 | 0.87 | 4 | 6 | PASS | 5.9s | +| | llm-escalate | 5.35 | 5.31 | 0.87 | 4 | 6 | PASS | 7.0s | +| Tool-heavy | deterministic | 1.30 | 1.29 | - | 2 | 16 | PASS | 0ms | +| | llm-basic | 1.11 | 1.10 | 0.85 | 2 | 16 | PASS | 3.5s | +| | llm-escalate | 1.12 | 1.12 | 0.86 | 2 | 16 | PASS | 5.3s | +| Deep conversation | deterministic | 2.12 | 2.12 | - | 50 | 1 | PASS | 3ms | +| | llm-basic | 2.34 | 2.33 | 1.10 | 50 | 1 | PASS | 50.4s | +| | llm-escalate | 2.37 | 2.36 | 1.11 | 50 | 1 | PASS | 50.8s | +| Technical explanation | deterministic | 1.00 | 1.00 | - | 0 | 11 | PASS | 1ms | +| | llm-basic | 1.00 | 1.00 | 1.00 | 1 | 10 | PASS | 2.6s | +| | llm-escalate | 1.00 | 1.00 | 1.00 | 1 | 10 | PASS | 3.3s | +| Structured content | deterministic | 1.93 | 1.92 | - | 2 | 10 | PASS | 0ms | +| | llm-basic | 1.23 | 1.23 | 0.64 | 2 | 10 | PASS | 10.2s | +| | llm-escalate | 1.29 | 1.29 | 0.67 | 2 | 10 | PASS | 4.8s | +| Agentic coding session | deterministic | 1.43 | 1.43 | - | 2 | 31 | PASS | 1ms | +| | llm-basic | 1.43 | 1.43 | 1.00 | 2 | 31 | PASS | 5.8s | +| | llm-escalate | 1.32 | 1.32 | 0.93 | 1 | 32 | PASS | 9.5s | #### Token Budget (target: 2000 tokens) -| Scenario | Method | Tokens | Fits | recencyWindow | Ratio | Round-trip | Time | -| --- | --- | ---: | --- | ---: | ---: | --- | ---: | -| Deep conversation | deterministic | 3738 | false | 0 | 2.12 | PASS | 10ms | -| | llm-escalate | 3391 | false | 0 | 2.35 | PASS | 280.5s | -| Agentic coding session | deterministic | 1957 | true | 9 | 1.36 | PASS | 2ms | -| | llm-escalate | 1915 | true | 3 | 1.39 | PASS | 28.1s | +| Scenario | Method | Tokens | Fits | recencyWindow | Ratio | Round-trip | Time | +| ---------------------- | ------------- | -----: | ----- | ------------: | ----: | ---------- | -----: | +| Deep conversation | deterministic | 3738 | false | 0 | 2.12 | PASS | 10ms | +| | llm-escalate | 3391 | false | 0 | 2.35 | PASS | 280.5s | +| Agentic coding session | deterministic | 1957 | true | 9 | 1.36 | PASS | 2ms | +| | llm-escalate | 1915 | true | 3 | 1.39 | PASS | 28.1s |