diff --git a/bench/baselines/current.json b/bench/baselines/current.json
index 77bfa0d..2211d13 100644
--- a/bench/baselines/current.json
+++ b/bench/baselines/current.json
@@ -1,23 +1,23 @@
 {
   "version": "1.0.0",
-  "generated": "2026-02-26T05:31:42.406Z",
+  "generated": "2026-03-10T00:15:20.299Z",
   "results": {
     "basic": {
       "Coding assistant": {
-        "ratio": 1.6812907904278462,
-        "tokenRatio": 1.6729559748427674,
+        "ratio": 1.9385451505016722,
+        "tokenRatio": 1.9275362318840579,
         "compressed": 5,
         "preserved": 8
       },
       "Long Q&A": {
-        "ratio": 6.158536585365853,
-        "tokenRatio": 6.114164904862579,
+        "ratio": 4.902912621359223,
+        "tokenRatio": 4.87689713322091,
         "compressed": 4,
         "preserved": 6
       },
       "Tool-heavy": {
-        "ratio": 1.2991563919532771,
-        "tokenRatio": 1.2946428571428572,
+        "ratio": 1.4128440366972477,
+        "tokenRatio": 1.4043583535108959,
         "compressed": 2,
         "preserved": 16
       },
@@ -28,8 +28,8 @@
         "preserved": 7
       },
       "Deep conversation": {
-        "ratio": 2.124913733609386,
-        "tokenRatio": 2.1241305510968433,
+        "ratio": 2.5041568769202964,
+        "tokenRatio": 2.4905897114178166,
         "compressed": 50,
         "preserved": 1
       },
@@ -40,21 +40,21 @@
         "preserved": 11
       },
       "Structured content": {
-        "ratio": 1.9338990620812864,
-        "tokenRatio": 1.9241486068111455,
+        "ratio": 1.8559794256322333,
+        "tokenRatio": 1.8469539375928679,
         "compressed": 2,
         "preserved": 10
       },
       "Agentic coding session": {
-        "ratio": 1.428351309707242,
-        "tokenRatio": 1.4258962011771001,
+        "ratio": 1.4768201370081249,
+        "tokenRatio": 1.4740044247787611,
         "compressed": 2,
         "preserved": 31
       }
     },
     "tokenBudget": {
       "Deep conversation|dedup=false": {
-        "tokenCount": 3738,
+        "tokenCount": 3188,
         "fits": false,
         "recencyWindow": 0,
         "compressed": 50,
@@ -62,7 +62,7 @@
         "deduped": 0
       },
       "Deep conversation|dedup=true": {
-        "tokenCount": 3738,
+        "tokenCount": 3188,
         "fits": false,
         "recencyWindow": 0,
         "compressed": 50,
@@ -70,7 +70,7 @@
         "deduped": 0
       },
       "Agentic coding session|dedup=false": {
-        "tokenCount": 2345,
+        "tokenCount": 2223,
         "fits": false,
         "recencyWindow": 0,
         "compressed": 4,
@@ -78,7 +78,7 @@
         "deduped": 0
       },
       "Agentic coding session|dedup=true": {
-        "tokenCount": 1957,
+        "tokenCount": 1900,
         "fits": true,
         "recencyWindow": 9,
         "compressed": 1,
@@ -88,24 +88,24 @@
     },
     "dedup": {
       "Coding assistant": {
-        "rw0Base": 1.6812907904278462,
-        "rw0Dup": 1.6812907904278462,
-        "rw4Base": 1.5104234527687297,
-        "rw4Dup": 1.5104234527687297,
+        "rw0Base": 1.9385451505016722,
+        "rw0Dup": 1.9385451505016722,
+        "rw4Base": 1.6061655697956356,
+        "rw4Dup": 1.6061655697956356,
         "deduped": 0
       },
       "Long Q&A": {
-        "rw0Base": 5.139949109414759,
-        "rw0Dup": 6.158536585365853,
-        "rw4Base": 1.9024298361273309,
-        "rw4Dup": 2.0264847512038524,
+        "rw0Base": 4,
+        "rw0Dup": 4.902912621359223,
+        "rw4Base": 1.76296037702915,
+        "rw4Dup": 1.918693009118541,
         "deduped": 1
       },
       "Tool-heavy": {
-        "rw0Base": 1.2991563919532771,
-        "rw0Dup": 1.2991563919532771,
-        "rw4Base": 1.2991563919532771,
-        "rw4Dup": 1.2991563919532771,
+        "rw0Base": 1.4128440366972477,
+        "rw0Dup": 1.4128440366972477,
+        "rw4Base": 1.4128440366972477,
+        "rw4Dup": 1.4128440366972477,
         "deduped": 0
       },
       "Short conversation": {
@@ -116,10 +116,10 @@
         "deduped": 0
       },
       "Deep conversation": {
-        "rw0Base": 2.124913733609386,
-        "rw0Dup": 2.124913733609386,
-        "rw4Base": 1.9527165104643789,
-        "rw4Dup": 1.9527165104643789,
+        "rw0Base": 2.5041568769202964,
+        "rw0Dup": 2.5041568769202964,
+        "rw4Base": 2.2394536932277354,
+        "rw4Dup": 2.2394536932277354,
         "deduped": 0
       },
       "Technical explanation": {
@@ -130,17 +130,17 @@
         "deduped": 0
       },
       "Structured content": {
-        "rw0Base": 1.9338990620812864,
-        "rw0Dup": 1.9338990620812864,
-        "rw4Base": 1.373730964467005,
-        "rw4Dup": 1.373730964467005,
+        "rw0Base": 1.8559794256322333,
+        "rw0Dup": 1.8559794256322333,
+        "rw4Base": 1.3339494762784967,
+        "rw4Dup": 1.3339494762784967,
         "deduped": 0
       },
       "Agentic coding session": {
-        "rw0Base": 1.1374233128834357,
-        "rw0Dup": 1.428351309707242,
-        "rw4Base": 1.1374233128834357,
-        "rw4Dup": 1.428351309707242,
+        "rw0Base": 1.2001553599171413,
+        "rw0Dup": 1.4768201370081249,
+        "rw4Base": 1.2001553599171413,
+        "rw4Dup": 1.4768201370081249,
         "deduped": 4
       }
     },
@@ -148,17 +148,17 @@
       "Coding assistant": {
         "exact": 0,
         "fuzzy": 0,
-        "ratio": 1.6812907904278462
+        "ratio": 1.9385451505016722
       },
       "Long Q&A": {
         "exact": 1,
         "fuzzy": 0,
-        "ratio": 6.158536585365853
+        "ratio": 4.902912621359223
       },
       "Tool-heavy": {
         "exact": 0,
         "fuzzy": 0,
-        "ratio": 1.2991563919532771
+        "ratio": 1.4128440366972477
       },
       "Short conversation": {
         "exact": 0,
@@ -168,7 +168,7 @@
       "Deep conversation": {
         "exact": 0,
         "fuzzy": 0,
-        "ratio": 2.124913733609386
+        "ratio": 2.5041568769202964
       },
       "Technical explanation": {
         "exact": 0,
@@ -178,22 +178,22 @@
       "Structured content": {
         "exact": 0,
         "fuzzy": 0,
-        "ratio": 1.9338990620812864
+        "ratio": 1.8559794256322333
       },
       "Agentic coding session": {
         "exact": 4,
         "fuzzy": 2,
-        "ratio": 2.229973538609574
+        "ratio": 2.3504056795131847
       }
     },
     "bundleSize": {
       "classify.js": {
-        "bytes": 7724,
-        "gzipBytes": 3250
+        "bytes": 8074,
+        "gzipBytes": 3443
       },
       "compress.js": {
-        "bytes": 33941,
-        "gzipBytes": 8721
+        "bytes": 34466,
+        "gzipBytes": 8914
       },
       "dedup.js": {
         "bytes": 10260,
@@ -216,8 +216,8 @@
         "gzipBytes": 31
       },
       "total": {
-        "bytes": 57498,
-        "gzipBytes": 16952
+        "bytes": 58373,
+        "gzipBytes": 17338
       }
     }
   }
diff --git a/bench/baselines/history/v1.0.0.json b/bench/baselines/history/v1.0.0.json
index 77bfa0d..2211d13 100644
--- a/bench/baselines/history/v1.0.0.json
+++ b/bench/baselines/history/v1.0.0.json
@@ -1,23 +1,23 @@
 {
   "version": "1.0.0",
-  "generated": "2026-02-26T05:31:42.406Z",
+  "generated": "2026-03-10T00:15:20.299Z",
   "results": {
     "basic": {
       "Coding assistant": {
-        "ratio": 1.6812907904278462,
-        "tokenRatio": 1.6729559748427674,
+        "ratio": 1.9385451505016722,
+        "tokenRatio": 1.9275362318840579,
         "compressed": 5,
         "preserved": 8
       },
       "Long Q&A": {
-        "ratio": 6.158536585365853,
-        "tokenRatio": 6.114164904862579,
+        "ratio": 4.902912621359223,
+        "tokenRatio": 4.87689713322091,
         "compressed": 4,
         "preserved": 6
       },
       "Tool-heavy": {
-        "ratio": 1.2991563919532771,
-        "tokenRatio": 1.2946428571428572,
+        "ratio": 1.4128440366972477,
+        "tokenRatio": 1.4043583535108959,
         "compressed": 2,
         "preserved": 16
       },
@@ -28,8 +28,8 @@
         "preserved": 7
       },
       "Deep conversation": {
-        "ratio": 2.124913733609386,
-        "tokenRatio": 2.1241305510968433,
+        "ratio": 2.5041568769202964,
+        "tokenRatio": 2.4905897114178166,
         "compressed": 50,
         "preserved": 1
       },
@@ -40,21 +40,21 @@
         "preserved": 11
       },
       "Structured content": {
-        "ratio": 1.9338990620812864,
-        "tokenRatio": 1.9241486068111455,
+        "ratio": 1.8559794256322333,
+        "tokenRatio": 1.8469539375928679,
         "compressed": 2,
         "preserved": 10
       },
       "Agentic coding session": {
-        "ratio": 1.428351309707242,
-        "tokenRatio": 1.4258962011771001,
+        "ratio": 1.4768201370081249,
+        "tokenRatio": 1.4740044247787611,
         "compressed": 2,
         "preserved": 31
       }
     },
     "tokenBudget": {
       "Deep conversation|dedup=false": {
-        "tokenCount": 3738,
+        "tokenCount": 3188,
         "fits": false,
         "recencyWindow": 0,
         "compressed": 50,
@@ -62,7 +62,7 @@
         "deduped": 0
       },
       "Deep conversation|dedup=true": {
-        "tokenCount": 3738,
+        "tokenCount": 3188,
         "fits": false,
         "recencyWindow": 0,
         "compressed": 50,
@@ -70,7 +70,7 @@
         "deduped": 0
       },
       "Agentic coding session|dedup=false": {
-        "tokenCount": 2345,
+        "tokenCount": 2223,
         "fits": false,
         "recencyWindow": 0,
         "compressed": 4,
@@ -78,7 +78,7 @@
         "deduped": 0
       },
       "Agentic coding session|dedup=true": {
-        "tokenCount": 1957,
+        "tokenCount": 1900,
         "fits": true,
         "recencyWindow": 9,
         "compressed": 1,
@@ -88,24 +88,24 @@
     },
     "dedup": {
       "Coding assistant": {
-        "rw0Base": 1.6812907904278462,
-        "rw0Dup": 1.6812907904278462,
-        "rw4Base": 1.5104234527687297,
-        "rw4Dup": 1.5104234527687297,
+        "rw0Base": 1.9385451505016722,
+        "rw0Dup": 1.9385451505016722,
+        "rw4Base": 1.6061655697956356,
+        "rw4Dup": 1.6061655697956356,
         "deduped": 0
       },
       "Long Q&A": {
-        "rw0Base": 5.139949109414759,
-        "rw0Dup": 6.158536585365853,
-        "rw4Base": 1.9024298361273309,
-        "rw4Dup": 2.0264847512038524,
+        "rw0Base": 4,
+        "rw0Dup": 4.902912621359223,
+        "rw4Base": 1.76296037702915,
+        "rw4Dup": 1.918693009118541,
         "deduped": 1
       },
       "Tool-heavy": {
-        "rw0Base": 1.2991563919532771,
-        "rw0Dup": 1.2991563919532771,
-        "rw4Base": 1.2991563919532771,
-        "rw4Dup": 1.2991563919532771,
+        "rw0Base": 1.4128440366972477,
+        "rw0Dup": 1.4128440366972477,
+        "rw4Base": 1.4128440366972477,
+        "rw4Dup": 1.4128440366972477,
         "deduped": 0
       },
       "Short conversation": {
@@ -116,10 +116,10 @@
         "deduped": 0
       },
       "Deep conversation": {
-        "rw0Base": 2.124913733609386,
-        "rw0Dup": 2.124913733609386,
-        "rw4Base": 1.9527165104643789,
-        "rw4Dup": 1.9527165104643789,
+        "rw0Base": 2.5041568769202964,
+        "rw0Dup": 2.5041568769202964,
+        "rw4Base": 2.2394536932277354,
+        "rw4Dup": 2.2394536932277354,
         "deduped": 0
       },
       "Technical explanation": {
@@ -130,17 +130,17 @@
         "deduped": 0
       },
       "Structured content": {
-        "rw0Base": 1.9338990620812864,
-        "rw0Dup": 1.9338990620812864,
-        "rw4Base": 1.373730964467005,
-        "rw4Dup": 1.373730964467005,
+        "rw0Base": 1.8559794256322333,
+        "rw0Dup": 1.8559794256322333,
+        "rw4Base": 1.3339494762784967,
+        "rw4Dup": 1.3339494762784967,
         "deduped": 0
       },
       "Agentic coding session": {
-        "rw0Base": 1.1374233128834357,
-        "rw0Dup": 1.428351309707242,
-        "rw4Base": 1.1374233128834357,
-        "rw4Dup": 1.428351309707242,
+        "rw0Base": 1.2001553599171413,
+        "rw0Dup": 1.4768201370081249,
+        "rw4Base": 1.2001553599171413,
+        "rw4Dup": 1.4768201370081249,
         "deduped": 4
       }
     },
@@ -148,17 +148,17 @@
       "Coding assistant": {
         "exact": 0,
         "fuzzy": 0,
-        "ratio": 1.6812907904278462
+        "ratio": 1.9385451505016722
       },
       "Long Q&A": {
         "exact": 1,
         "fuzzy": 0,
-        "ratio": 6.158536585365853
+        "ratio": 4.902912621359223
       },
       "Tool-heavy": {
         "exact": 0,
         "fuzzy": 0,
-        "ratio": 1.2991563919532771
+        "ratio": 1.4128440366972477
       },
       "Short conversation": {
         "exact": 0,
@@ -168,7 +168,7 @@
       "Deep conversation": {
         "exact": 0,
         "fuzzy": 0,
-        "ratio": 2.124913733609386
+        "ratio": 2.5041568769202964
       },
       "Technical explanation": {
         "exact": 0,
@@ -178,22 +178,22 @@
       "Structured content": {
         "exact": 0,
         "fuzzy": 0,
-        "ratio": 1.9338990620812864
+        "ratio": 1.8559794256322333
       },
       "Agentic coding session": {
         "exact": 4,
         "fuzzy": 2,
-        "ratio": 2.229973538609574
+        "ratio": 2.3504056795131847
       }
     },
     "bundleSize": {
       "classify.js": {
-        "bytes": 7724,
-        "gzipBytes": 3250
+        "bytes": 8074,
+        "gzipBytes": 3443
       },
       "compress.js": {
-        "bytes": 33941,
-        "gzipBytes": 8721
+        "bytes": 34466,
+        "gzipBytes": 8914
       },
       "dedup.js": {
         "bytes": 10260,
@@ -216,8 +216,8 @@
         "gzipBytes": 31
       },
       "total": {
-        "bytes": 57498,
-        "gzipBytes": 16952
+        "bytes": 58373,
+        "gzipBytes": 17338
       }
     }
   }
diff --git a/docs/benchmark-results.md b/docs/benchmark-results.md
index 458513a..f9d1d66 100644
--- a/docs/benchmark-results.md
+++ b/docs/benchmark-results.md
@@ -4,17 +4,17 @@
 
 _Auto-generated by `npm run bench:save`. Do not edit manually._
 
-**v1.0.0** · Generated: 2026-02-26
+**v1.0.0** · Generated: 2026-03-10
 
-![avg ratio](https://img.shields.io/badge/avg%20ratio-2.08x-blue) ![best](https://img.shields.io/badge/best-6.16x-blue) ![scenarios](https://img.shields.io/badge/scenarios-8-blue) ![round-trip](https://img.shields.io/badge/round--trip-all_PASS-brightgreen) ![gzip](https://img.shields.io/badge/gzip-16.6%20KB-blue)
+![avg ratio](https://img.shields.io/badge/avg%20ratio-2.01x-blue) ![best](https://img.shields.io/badge/best-4.90x-blue) ![scenarios](https://img.shields.io/badge/scenarios-8-blue) ![round-trip](https://img.shields.io/badge/round--trip-all_PASS-brightgreen) ![gzip](https://img.shields.io/badge/gzip-16.9%20KB-blue)
 
 ## Summary
 
 | Metric               | Value    |
 | -------------------- | -------- |
 | Scenarios            | 8        |
-| Average compression  | 2.08x    |
-| Best compression     | 6.16x    |
+| Average compression  | 2.01x    |
+| Best compression     | 4.90x    |
 | Round-trip integrity | all PASS |
 
 ```mermaid
@@ -25,26 +25,26 @@ pie title "Message Outcomes"
 
 ## Compression by Scenario
 
-> **8 scenarios** · **2.08x** avg ratio · **1.00x** – **6.16x** range · all round-trips PASS
+> **8 scenarios** · **2.01x** avg ratio · **1.00x** – **4.90x** range · all round-trips PASS
 
 ```mermaid
 xychart-beta
     title "Compression Ratio by Scenario"
     x-axis ["Coding", "Long Q&A", "Tool-heavy", "Short", "Deep", "Technical", "Structured", "Agentic"]
     y-axis "Char Ratio"
-    bar [1.68, 6.16, 1.30, 1.00, 2.12, 1.00, 1.93, 1.43]
+    bar [1.94, 4.90, 1.41, 1.00, 2.50, 1.00, 1.86, 1.48]
 ```
 
 | Scenario               | Ratio | Reduction | Token Ratio | Messages | Compressed | Preserved |
 | ---------------------- | ----: | --------: | ----------: | -------: | ---------: | --------: |
-| Coding assistant       |  1.68 |       41% |        1.67 |       13 |          5 |         8 |
-| Long Q&A               |  6.16 |       84% |        6.11 |       10 |          4 |         6 |
-| Tool-heavy             |  1.30 |       23% |        1.29 |       18 |          2 |        16 |
+| Coding assistant       |  1.94 |       48% |        1.93 |       13 |          5 |         8 |
+| Long Q&A               |  4.90 |       80% |        4.88 |       10 |          4 |         6 |
+| Tool-heavy             |  1.41 |       29% |        1.40 |       18 |          2 |        16 |
 | Short conversation     |  1.00 |        0% |        1.00 |        7 |          0 |         7 |
-| Deep conversation      |  2.12 |       53% |        2.12 |       51 |         50 |         1 |
+| Deep conversation      |  2.50 |       60% |        2.49 |       51 |         50 |         1 |
 | Technical explanation  |  1.00 |        0% |        1.00 |       11 |          0 |        11 |
-| Structured content     |  1.93 |       48% |        1.92 |       12 |          2 |        10 |
-| Agentic coding session |  1.43 |       30% |        1.43 |       33 |          2 |        31 |
+| Structured content     |  1.86 |       46% |        1.85 |       12 |          2 |        10 |
+| Agentic coding session |  1.48 |       32% |        1.47 |       33 |          2 |        31 |
 
 ## Deduplication Impact
 
@@ -53,35 +53,35 @@ xychart-beta
     title "Deduplication Impact (recencyWindow=0)"
     x-axis ["Long Q&A", "Agentic"]
     y-axis "Char Ratio"
-    bar [5.14, 1.14]
-    bar [6.16, 1.43]
+    bar [4.00, 1.20]
+    bar [4.90, 1.48]
 ```
 
 _First bar: no dedup · Second bar: with dedup_
 
 | Scenario               | No Dedup (rw=0) | Dedup (rw=0) | No Dedup (rw=4) | Dedup (rw=4) | Deduped |
 | ---------------------- | --------------: | -----------: | --------------: | -----------: | ------: |
-| Coding assistant       |            1.68 |         1.68 |            1.51 |         1.51 |       0 |
-| Long Q&A               |            5.14 |         6.16 |            1.90 |         2.03 |       1 |
-| Tool-heavy             |            1.30 |         1.30 |            1.30 |         1.30 |       0 |
+| Coding assistant       |            1.94 |         1.94 |            1.61 |         1.61 |       0 |
+| Long Q&A               |            4.00 |         4.90 |            1.76 |         1.92 |       1 |
+| Tool-heavy             |            1.41 |         1.41 |            1.41 |         1.41 |       0 |
 | Short conversation     |            1.00 |         1.00 |            1.00 |         1.00 |       0 |
-| Deep conversation      |            2.12 |         2.12 |            1.95 |         1.95 |       0 |
+| Deep conversation      |            2.50 |         2.50 |            2.24 |         2.24 |       0 |
 | Technical explanation  |            1.00 |         1.00 |            1.00 |         1.00 |       0 |
-| Structured content     |            1.93 |         1.93 |            1.37 |         1.37 |       0 |
-| Agentic coding session |            1.14 |         1.43 |            1.14 |         1.43 |       4 |
+| Structured content     |            1.86 |         1.86 |            1.33 |         1.33 |       0 |
+| Agentic coding session |            1.20 |         1.48 |            1.20 |         1.48 |       4 |
 
 ### Fuzzy Dedup
 
 | Scenario               | Exact Deduped | Fuzzy Deduped | Ratio | vs Base |
 | ---------------------- | ------------: | ------------: | ----: | ------: |
-| Coding assistant       |             0 |             0 |  1.68 |       - |
-| Long Q&A               |             1 |             0 |  6.16 |       - |
-| Tool-heavy             |             0 |             0 |  1.30 |       - |
+| Coding assistant       |             0 |             0 |  1.94 |       - |
+| Long Q&A               |             1 |             0 |  4.90 |       - |
+| Tool-heavy             |             0 |             0 |  1.41 |       - |
 | Short conversation     |             0 |             0 |  1.00 |       - |
-| Deep conversation      |             0 |             0 |  2.12 |       - |
+| Deep conversation      |             0 |             0 |  2.50 |       - |
 | Technical explanation  |             0 |             0 |  1.00 |       - |
-| Structured content     |             0 |             0 |  1.93 |       - |
-| Agentic coding session |             4 |             2 |  2.23 |    +56% |
+| Structured content     |             0 |             0 |  1.86 |       - |
+| Agentic coding session |             4 |             2 |  2.35 |    +59% |
 
 ## Token Budget
 
@@ -89,10 +89,10 @@ Target: **2000 tokens** · 1/4 fit
 
 | Scenario               | Dedup | Tokens | Fits | recencyWindow | Compressed | Preserved | Deduped |
 | ---------------------- | ----- | -----: | ---- | ------------: | ---------: | --------: | ------: |
-| Deep conversation      | no    |   3738 | no   |             0 |         50 |         1 |       0 |
-| Deep conversation      | yes   |   3738 | no   |             0 |         50 |         1 |       0 |
-| Agentic coding session | no    |   2345 | no   |             0 |          4 |        33 |       0 |
-| Agentic coding session | yes   |   1957 | yes  |             9 |          1 |        32 |       4 |
+| Deep conversation      | no    |   3188 | no   |             0 |         50 |         1 |       0 |
+| Deep conversation      | yes   |   3188 | no   |             0 |         50 |         1 |       0 |
+| Agentic coding session | no    |   2223 | no   |             0 |          4 |        33 |       0 |
+| Agentic coding session | yes   |   1900 | yes  |             9 |          1 |        32 |       4 |
 
 ## Bundle Size
 
@@ -100,14 +100,14 @@ Target: **2000 tokens** · 1/4 fit
 
 | File          |    Size |    Gzip |
 | ------------- | ------: | ------: |
-| classify.js   |  7.5 KB |  3.2 KB |
-| compress.js   | 33.1 KB |  8.5 KB |
+| classify.js   |  7.9 KB |  3.4 KB |
+| compress.js   | 33.7 KB |  8.7 KB |
 | dedup.js      | 10.0 KB |  2.8 KB |
 | expand.js     |  2.7 KB |   934 B |
 | index.js      |   225 B |   159 B |
 | summarizer.js |  2.5 KB |   993 B |
 | types.js      |    11 B |    31 B |
-| **total**     | 56.2 KB | 16.6 KB |
+| **total**     | 57.0 KB | 16.9 KB |
 
 ## LLM vs Deterministic
 
@@ -116,26 +116,26 @@ Target: **2000 tokens** · 1/4 fit
 ```
 Deterministic vs ollama/llama3.2
 
-Coding assistant        Det ████████░░░░░░░░░░░░░░░░░░░░░░ 1.68x
-                        LLM ████████░░░░░░░░░░░░░░░░░░░░░░ 1.55x
+Coding assistant        Det ████████████░░░░░░░░░░░░░░░░░░ 1.94x
+                        LLM █████████░░░░░░░░░░░░░░░░░░░░░ 1.55x
 
-Long Q&A                Det ██████████████████████████████ 6.16x
-                        LLM ██████████████████████░░░░░░░░ 4.49x
+Long Q&A                Det ██████████████████████████████ 4.90x
+                        LLM ███████████████████████████░░░ 4.49x
 
-Tool-heavy              Det ██████░░░░░░░░░░░░░░░░░░░░░░░░ 1.30x
-                        LLM ██████░░░░░░░░░░░░░░░░░░░░░░░░ 1.28x
+Tool-heavy              Det █████████░░░░░░░░░░░░░░░░░░░░░ 1.41x
+                        LLM ████████░░░░░░░░░░░░░░░░░░░░░░ 1.28x
 
-Deep conversation       Det ██████████░░░░░░░░░░░░░░░░░░░░ 2.12x
-                        LLM ████████████████░░░░░░░░░░░░░░ 3.28x  ★
+Deep conversation       Det ███████████████░░░░░░░░░░░░░░░ 2.50x
+                        LLM ████████████████████░░░░░░░░░░ 3.28x  ★
 
-Technical explanation   Det █████░░░░░░░░░░░░░░░░░░░░░░░░░ 1.00x
-                        LLM █████░░░░░░░░░░░░░░░░░░░░░░░░░ 1.00x
+Technical explanation   Det ██████░░░░░░░░░░░░░░░░░░░░░░░░ 1.00x
+                        LLM ██████░░░░░░░░░░░░░░░░░░░░░░░░ 1.00x
 
-Structured content      Det █████████░░░░░░░░░░░░░░░░░░░░░ 1.93x
-                        LLM ███████░░░░░░░░░░░░░░░░░░░░░░░ 1.46x
+Structured content      Det ███████████░░░░░░░░░░░░░░░░░░░ 1.86x
+                        LLM █████████░░░░░░░░░░░░░░░░░░░░░ 1.46x
 
-Agentic coding session  Det ███████░░░░░░░░░░░░░░░░░░░░░░░ 1.43x
-                        LLM ███████░░░░░░░░░░░░░░░░░░░░░░░ 1.40x
+Agentic coding session  Det █████████░░░░░░░░░░░░░░░░░░░░░ 1.48x
+                        LLM █████████░░░░░░░░░░░░░░░░░░░░░ 1.40x
 
 ★ = LLM wins
 ```
@@ -143,26 +143,26 @@ Agentic coding session  Det ███████░░░░░░░░░░
 ```
 Deterministic vs openai/gpt-4.1-mini
 
-Coding assistant        Det ████████░░░░░░░░░░░░░░░░░░░░░░ 1.68x
-                        LLM ████████░░░░░░░░░░░░░░░░░░░░░░ 1.64x
+Coding assistant        Det ███████████░░░░░░░░░░░░░░░░░░░ 1.94x
+                        LLM █████████░░░░░░░░░░░░░░░░░░░░░ 1.64x
 
-Long Q&A                Det ██████████████████████████████ 6.16x
-                        LLM ██████████████████████████░░░░ 5.37x
+Long Q&A                Det ███████████████████████████░░░ 4.90x
+                        LLM ██████████████████████████████ 5.37x  ★
 
-Tool-heavy              Det ██████░░░░░░░░░░░░░░░░░░░░░░░░ 1.30x
-                        LLM █████░░░░░░░░░░░░░░░░░░░░░░░░░ 1.12x
+Tool-heavy              Det ████████░░░░░░░░░░░░░░░░░░░░░░ 1.41x
+                        LLM ██████░░░░░░░░░░░░░░░░░░░░░░░░ 1.12x
 
-Deep conversation       Det ██████████░░░░░░░░░░░░░░░░░░░░ 2.12x
-                        LLM ████████████░░░░░░░░░░░░░░░░░░ 2.37x  ★
+Deep conversation       Det ██████████████░░░░░░░░░░░░░░░░ 2.50x
+                        LLM █████████████░░░░░░░░░░░░░░░░░ 2.37x
 
-Technical explanation   Det █████░░░░░░░░░░░░░░░░░░░░░░░░░ 1.00x
-                        LLM █████░░░░░░░░░░░░░░░░░░░░░░░░░ 1.00x
+Technical explanation   Det ██████░░░░░░░░░░░░░░░░░░░░░░░░ 1.00x
+                        LLM ██████░░░░░░░░░░░░░░░░░░░░░░░░ 1.00x
 
-Structured content      Det █████████░░░░░░░░░░░░░░░░░░░░░ 1.93x
-                        LLM ██████░░░░░░░░░░░░░░░░░░░░░░░░ 1.29x
+Structured content      Det ██████████░░░░░░░░░░░░░░░░░░░░ 1.86x
+                        LLM ███████░░░░░░░░░░░░░░░░░░░░░░░ 1.29x
 
-Agentic coding session  Det ███████░░░░░░░░░░░░░░░░░░░░░░░ 1.43x
-                        LLM ███████░░░░░░░░░░░░░░░░░░░░░░░ 1.43x
+Agentic coding session  Det ████████░░░░░░░░░░░░░░░░░░░░░░ 1.48x
+                        LLM ████████░░░░░░░░░░░░░░░░░░░░░░ 1.43x
 
 ★ = LLM wins
 ```
diff --git a/docs/compression-pipeline.md b/docs/compression-pipeline.md
index f894cd4..da9c5ec 100644
--- a/docs/compression-pipeline.md
+++ b/docs/compression-pipeline.md
@@ -100,7 +100,7 @@ The `summarize` function uses sentence scoring:
 5. Re-sort selected sentences by original position to preserve reading order
 6. Join with `...` separator
 
-Budget: 200 chars if input < 600 chars, 400 chars otherwise.
+Budget scales adaptively: max(200, min(round(length × 0.3), 600)). Short content gets 200 chars, long content up to 600.
 
 ### Entity extraction
 
@@ -111,14 +111,14 @@ After summarizing, `extractEntities` pulls out key identifiers from the original
 - Vowelless abbreviations
 - Numbers with units/context
 
-Up to 10 entities are appended as `| entities: foo, bar, baz`.
+Entities scale with content length (3–15) and are appended as `| entities: foo, bar, baz`.
 
 ### Code-split processing
 
 Messages containing code fences with significant prose (>= 80 chars) get split:
 
 1. `splitCodeAndProse` extracts code fences and surrounding prose separately
-2. Prose is summarized (budget: 200 if < 600 chars, else 400)
+2. Prose is summarized (budget scales adaptively with prose length)
 3. Code fences are preserved verbatim
 4. Result: `[summary: ...]\n\n```code here````
 
diff --git a/docs/preservation-rules.md b/docs/preservation-rules.md
index 1060e07..bba9bdf 100644
--- a/docs/preservation-rules.md
+++ b/docs/preservation-rules.md
@@ -68,11 +68,11 @@ Soft T0 content is still compressible because the entity extraction step capture
 
 ### T2 — Short prose
 
-Prose under 20 words. Currently treated the same as T3 in the compression pipeline.
+Prose under 20 words. Treated identically to T3 in the current deterministic pipeline — the distinction is preserved for future LLM classifier integration, which can apply lighter compression to short prose.
 
 ### T3 — Long prose
 
-Prose of 20+ words. The primary target for summarization.
+Prose of 20+ words. The primary target for summarization. Treated identically to T2 in the current pipeline; the LLM classifier will use the T2/T3 distinction for tier-specific strategies.
 
 ## API key detection
 
@@ -103,7 +103,7 @@ SQL detection uses a tiered anchor system to avoid false positives on English pr
 Messages with code fences and significant prose (>= 80 chars) are split:
 
 1. Code fences are extracted verbatim
-2. Surrounding prose is summarized (budget: 200 chars if < 600 chars, 400 otherwise)
+2. Surrounding prose is summarized (budget scales adaptively: 200–600 chars based on prose length)
 3. Result: summary + preserved code fences
 
 If the total prose is < 80 chars, the entire message is preserved (not enough prose to justify splitting).
diff --git a/src/classify.ts b/src/classify.ts
index 6e5f5cd..68794fa 100644
--- a/src/classify.ts
+++ b/src/classify.ts
@@ -1,5 +1,18 @@
 export type ClassifyResult = {
   decision: 'T0' | 'T2' | 'T3';
+  /**
+   * Classification confidence (0–1). Higher values indicate stronger signal.
+   *
+   * For T0: starts at 0.70, increases by 0.05 per additional structural reason
+   * (capped at 0.95). Multiple overlapping signals → higher confidence.
+   * For T2/T3: fixed at 0.65 (pure prose heuristic, no structural anchors).
+   *
+   * The deterministic pipeline does not route on confidence — it uses the
+   * hard/soft T0 distinction instead. Consumers can use confidence for custom
+   * routing (e.g. only compress below a threshold), monitoring dashboards,
+   * or LLM classifier fallback decisions (cf. Amazon Science "Label with
+   * Confidence" for confidence-weighted routing patterns).
+   */
   confidence: number;
   reasons: string[];
 };
@@ -189,6 +202,14 @@ function detectContentTypes(text: string): {
 
 // -- Tier heuristic for clean prose --
 
+/**
+ * Assign T2 (short prose, < 20 words) or T3 (long prose, >= 20 words).
+ *
+ * Both tiers are compressed identically in the current deterministic pipeline.
+ * The distinction exists so a future LLM classifier can apply different
+ * strategies per tier — e.g. lighter summarization for T2 or aggressive
+ * compression for verbose T3 content.
+ */
 function inferProseTier(text: string): 'T2' | 'T3' {
   const words = text.split(/\s+/).length;
   if (words < 20) return 'T2';
diff --git a/src/compress.ts b/src/compress.ts
index b77b72c..6c09c03 100644
--- a/src/compress.ts
+++ b/src/compress.ts
@@ -330,6 +330,10 @@ const COMMON_STARTERS = new Set([
   'Into',
 ]);
 
+function computeBudget(contentLength: number): number {
+  return Math.max(200, Math.min(Math.round(contentLength * 0.3), 600));
+}
+
 function extractEntities(text: string): string[] {
   const entities = new Set<string>();
 
@@ -376,8 +380,8 @@ function extractEntities(text: string): string[] {
     for (const n of numbersCtx) entities.add(n.trim());
   }
 
-  // Cap at 10
-  return Array.from(entities).slice(0, 10);
+  const maxEntities = Math.max(3, Math.min(Math.round(text.length / 200), 15));
+  return Array.from(entities).slice(0, maxEntities);
 }
 
 function splitCodeAndProse(text: string): Array<{ type: 'prose' | 'code'; content: string }> {
@@ -572,6 +576,10 @@ function classifyAll(
       }
       return { msg, preserved: true };
     }
+    // T2 (short prose) and T3 (long prose) are intentionally treated identically
+    // in the current pipeline — both go through the same summarization path.
+    // The distinction is preserved for future LLM classifier integration, which
+    // can apply different strategies per tier (e.g. lighter compression for T2).
     if (content) {
       const cls = classifyMessage(content);
       if (cls.decision === 'T0') {
@@ -735,7 +743,7 @@ function* compressGen(
         .map((s) => s.content)
         .join(' ');
       const codeFences = segments.filter((s) => s.type === 'code').map((s) => s.content);
-      const proseBudget = proseText.length < 600 ? 200 : 400;
+      const proseBudget = computeBudget(proseText.length);
       const summaryText: string = yield { text: proseText, budget: proseBudget };
       const embeddedId = options.embedSummaryId ? makeSummaryId([msg.id]) : undefined;
       const compressed = `${formatSummary(summaryText, proseText, undefined, true, embeddedId)}\n\n${codeFences.join('\n\n')}`;
@@ -762,7 +770,7 @@ function* compressGen(
     const allContent = group
       .map((g) => (typeof g.msg.content === 'string' ? g.msg.content : ''))
       .join(' ');
-    const contentBudget = allContent.length < 600 ? 200 : 400;
+    const contentBudget = computeBudget(allContent.length);
     const summaryText = isStructuredOutput(allContent)
       ? summarizeStructured(allContent, contentBudget)
       : yield { text: allContent, budget: contentBudget };
diff --git a/tests/compress.test.ts b/tests/compress.test.ts
index 822cccc..6b5512a 100644
--- a/tests/compress.test.ts
+++ b/tests/compress.test.ts
@@ -684,14 +684,13 @@ describe('compress', () => {
       expect(content).toContain('Express');
     });
 
-    it('caps at 400 chars when no punctuation', () => {
-      const noPunct = 'word '.repeat(200); // 1000 chars, no sentence-ending punctuation
+    it('caps at adaptive budget when no punctuation', () => {
+      const noPunct = 'word '.repeat(200); // 1000 chars → computeBudget = 300
       const messages: Message[] = [msg({ id: '1', index: 0, role: 'user', content: noPunct })];
       const result = compress(messages, { recencyWindow: 0 });
-      // The summary text (between [summary: and the suffix) should not exceed 400 chars
       const match = result.messages[0].content!.match(/\[summary: (.*?)(?:\s*\(|\s*\||\])/);
       expect(match).toBeTruthy();
-      expect(match![1].length).toBeLessThanOrEqual(400);
+      expect(match![1].length).toBeLessThanOrEqual(300);
     });
 
     it('includes first substantive + last sentence', () => {
@@ -719,19 +718,20 @@ describe('compress', () => {
       expect(content).toContain('Sure thing');
     });
 
-    it('hard caps overall summary at 400 chars', () => {
+    it('hard caps overall summary at adaptive budget', () => {
       // Use non-hex chars to avoid triggering hash_or_sha T0 detection
       const longSentence =
         'Wor '.repeat(50) + 'is the architecture we chose for this particular deployment. ';
       const text =
         longSentence + 'The last sentence describes the final outcome of this deployment strategy.';
+      // ~1675 chars → computeBudget = 503
       const messages: Message[] = [
         msg({ id: '1', index: 0, role: 'user', content: text.repeat(5) }),
       ];
       const result = compress(messages, { recencyWindow: 0 });
       const match = result.messages[0].content!.match(/\[summary: (.*?)(?:\s*\(|\s*\||\])/);
       expect(match).toBeTruthy();
-      expect(match![1].length).toBeLessThanOrEqual(400);
+      expect(match![1].length).toBeLessThanOrEqual(503);
     });
 
     it('extracts content from multiple paragraphs', () => {
@@ -760,7 +760,7 @@ describe('compress', () => {
       expect(content).toContain('authentication module');
     });
 
-    it('budget ceiling at 400 chars', () => {
+    it('adaptive budget ceiling scales with content length', () => {
       const sentences = Array.from(
         { length: 20 },
         (_, i) => `Sentence number ${i + 1} provides additional context about the deployment.`,
@@ -771,7 +771,8 @@ describe('compress', () => {
       const result = compress(messages, { recencyWindow: 0 });
       const match = result.messages[0].content!.match(/\[summary: (.*?)(?:\s*\(|\s*\||\])/);
       expect(match).toBeTruthy();
-      expect(match![1].length).toBeLessThanOrEqual(400);
+      // ~3900 chars content → computeBudget = 600
+      expect(match![1].length).toBeLessThanOrEqual(600);
     });
 
     it('weights PASS/FAIL/ERROR status words higher', () => {
@@ -878,7 +879,7 @@ describe('compress', () => {
       expect(content).toContain('grpc');
     });
 
-    it('caps entities at 10', () => {
+    it('caps entities proportionally to content length', () => {
       const text =
         'Alice Bob Charlie Dave Eve Frank Grace Heidi Ivan Judy Karl Liam Mallory spoke about getUserData fetchItems parseConfig with user_id auth_token db_name cache_key log_level queue_size worker_count and 5 retries and 10 seconds. '.repeat(
           3,
@@ -889,7 +890,57 @@ describe('compress', () => {
       const entitiesMatch = content.match(/entities: ([^\]]+)/);
       expect(entitiesMatch).toBeTruthy();
       const entityList = entitiesMatch![1].split(', ');
-      expect(entityList.length).toBeLessThanOrEqual(10);
+      // ~684 chars → cap = max(3, min(round(684/200), 15)) = 3
+      expect(entityList.length).toBeLessThanOrEqual(3);
+    });
+
+    it('allows more entities for longer content', () => {
+      const text =
+        'Alice Bob Charlie Dave Eve Frank Grace Heidi Ivan Judy Karl Liam Mallory spoke about getUserData fetchItems parseConfig with user_id auth_token db_name cache_key log_level queue_size worker_count and 5 retries and 10 seconds. '.repeat(
+          12,
+        );
+      const messages: Message[] = [msg({ id: '1', index: 0, role: 'user', content: text })];
+      const result = compress(messages, { recencyWindow: 0 });
+      const content = result.messages[0].content!;
+      const entitiesMatch = content.match(/entities: ([^\]]+)/);
+      expect(entitiesMatch).toBeTruthy();
+      const entityList = entitiesMatch![1].split(', ');
+      // ~2736 chars → cap = max(3, min(round(2736/200), 15)) = 14
+      expect(entityList.length).toBeGreaterThan(3);
+      expect(entityList.length).toBeLessThanOrEqual(15);
+    });
+  });
+
+  describe('adaptive budget scaling', () => {
+    it('short content gets a small budget (≤ 200 chars)', () => {
+      // ~500 chars of prose → computeBudget(500) = 200
+      const text =
+        'The deployment process starts by pulling the latest Docker image from the registry and running pre-flight checks. '.repeat(
+          4,
+        );
+      expect(text.length).toBeLessThan(667);
+      const messages: Message[] = [msg({ id: '1', index: 0, role: 'user', content: text })];
+      const result = compress(messages, { recencyWindow: 0 });
+      const match = result.messages[0].content!.match(/\[summary: (.*?)(?:\s*\(|\s*\||\])/);
+      expect(match).toBeTruthy();
+      expect(match![1].length).toBeLessThanOrEqual(200);
+    });
+
+    it('long content gets a larger budget (≤ 600 and > 200 chars)', () => {
+      // ~2400 chars of diverse prose → computeBudget(2400) = 600
+      const sentences = Array.from(
+        { length: 30 },
+        (_, i) =>
+          `Step ${i + 1} in the deployment pipeline involves running integration tests against the staging environment.`,
+      ).join(' ');
+      expect(sentences.length).toBeGreaterThan(2000);
+      const messages: Message[] = [msg({ id: '1', index: 0, role: 'user', content: sentences })];
+      const result = compress(messages, { recencyWindow: 0 });
+      const match = result.messages[0].content!.match(/\[summary: (.*?)(?:\s*\(|\s*\||\])/);
+      expect(match).toBeTruthy();
+      expect(match![1].length).toBeLessThanOrEqual(600);
+      // Budget is 600 so the summarizer has room for > 200 chars
+      expect(match![1].length).toBeGreaterThan(200);
     });
   });