diff --git a/backends-velox/src/main/scala/org/apache/spark/sql/execution/ColumnarCachedBatchSerializer.scala b/backends-velox/src/main/scala/org/apache/spark/sql/execution/ColumnarCachedBatchSerializer.scala
index 85c9d2854c7..1da42f77762 100644
--- a/backends-velox/src/main/scala/org/apache/spark/sql/execution/ColumnarCachedBatchSerializer.scala
+++ b/backends-velox/src/main/scala/org/apache/spark/sql/execution/ColumnarCachedBatchSerializer.scala
@@ -53,7 +53,6 @@ import java.io.ByteArrayOutputStream
 import java.lang.{Double => JDouble, Float => JFloat}
 import java.math.{BigDecimal => JBigDecimal, BigInteger}
 import java.nio.{ByteBuffer, ByteOrder}
-import java.nio.charset.StandardCharsets.UTF_8
 import java.util.Arrays
 
 import scala.util.control.NonFatal
@@ -124,7 +123,7 @@ class CachedColumnarBatchKryoSerializer extends KryoSerializer[CachedColumnarBat
       output.writeBoolean(false)
     } else {
       output.writeBoolean(true)
-      val schemaBytes = batch.schema.json.getBytes(UTF_8)
+      val schemaBytes = CachedColumnarBatchKryoSerializer.SchemaIntern.encodeBytes(batch.schema)
       output.writeInt(schemaBytes.length)
       output.writeBytes(schemaBytes)
     }
@@ -220,12 +219,16 @@ class CachedColumnarBatchKryoSerializer extends KryoSerializer[CachedColumnarBat
       )
       val schemaBytes = new Array[Byte](schemaLen)
       input.readBytes(schemaBytes)
-      DataType.fromJson(new String(schemaBytes, UTF_8)).asInstanceOf[StructType]
+      CachedColumnarBatchKryoSerializer.SchemaIntern.decodeStructType(schemaBytes)
     }
   }
 }
 
 object CachedColumnarBatchKryoSerializer {
+  // Process-wide schema-codec memoizer. Singleton so the cache survives across Kryo's per-stream
+  // serializer instances within the same JVM.
+  private[execution] val SchemaIntern: SchemaJsonInternCache = new SchemaJsonInternCache
+
   // Defensive upper bound on any single length-prefixed field in the Kryo wire (payload bytes,
   // statsBlob, schema JSON). Tied to spark.kryoserializer.buffer.max because Kryo write itself
   // refuses to emit any single object larger than that ceiling, so any stream claiming a larger
diff --git a/backends-velox/src/main/scala/org/apache/spark/sql/execution/SchemaJsonInternCache.scala b/backends-velox/src/main/scala/org/apache/spark/sql/execution/SchemaJsonInternCache.scala
new file mode 100644
index 00000000000..fecf218c6a7
--- /dev/null
+++ b/backends-velox/src/main/scala/org/apache/spark/sql/execution/SchemaJsonInternCache.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution
+
+import org.apache.spark.sql.types.{DataType, StructType}
+
+import com.github.benmanes.caffeine.cache.{Cache, Caffeine}
+
+import java.nio.charset.StandardCharsets
+
+/**
+ * Process-local memoizer for `StructType <-> JSON` codec on the cached-batch hot path. Best-effort
+ * size-bounded Caffeine cache (W-TinyLFU); eviction recomputes via the same pure codec, so misses
+ * are indistinguishable from the no-cache baseline. Thread-safety via Caffeine
+ * `get(key, mappingFunction)`.
+ */
+final private[execution] class SchemaJsonInternCache {
+  import SchemaJsonInternCache._
+
+  private val encodeCache: Cache[StructType, Array[Byte]] =
+    Caffeine.newBuilder.maximumSize(CAP).build[StructType, Array[Byte]]()
+
+  private val decodeCache: Cache[String, StructType] =
+    Caffeine.newBuilder.maximumSize(CAP).build[String, StructType]()
+
+  /**
+   * Returns the canonical UTF-8 JSON byte form of `schema`. The returned array is shared with the
+   * cache; callers must treat it as immutable.
+   */
+  def encodeBytes(schema: StructType): Array[Byte] =
+    encodeCache.get(schema, k => k.json.getBytes(StandardCharsets.UTF_8))
+
+  /** Returns the canonical [[StructType]] parsed from `bytes` (UTF-8 JSON). */
+  def decodeStructType(bytes: Array[Byte]): StructType = {
+    val key = new String(bytes, StandardCharsets.UTF_8)
+    decodeCache.get(key, k => DataType.fromJson(k).asInstanceOf[StructType])
+  }
+}
+
+private[execution] object SchemaJsonInternCache {
+  // 256 entries per side. Empirically large enough to cover the unique-schema fanout of a
+  // typical multi-cached-table query; small enough that retained heap stays bounded even for
+  // wide schemas. Tune via re-running the schema-codec working-set sweep section of
+  // ColumnarTableCachePartitionStatsBenchmark.
+  private[execution] val CAP: Long = 256L
+}
diff --git a/backends-velox/src/test/scala/org/apache/spark/sql/execution/SchemaJsonInternCacheSuite.scala b/backends-velox/src/test/scala/org/apache/spark/sql/execution/SchemaJsonInternCacheSuite.scala
new file mode 100644
index 00000000000..6ef205cafde
--- /dev/null
+++ b/backends-velox/src/test/scala/org/apache/spark/sql/execution/SchemaJsonInternCacheSuite.scala
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructField, StructType}
+
+import java.nio.charset.StandardCharsets
+import java.util.concurrent.{CountDownLatch, Executors, TimeUnit}
+import java.util.concurrent.atomic.AtomicInteger
+
+import scala.util.Random
+
+/**
+ * Invariants for [[SchemaJsonInternCache]]: (1) determinism -- equal inputs yield byte-identical /
+ * canonical-instance outputs; (2) capacity -- size-bounded cap = 256, eviction never corrupts later
+ * results; (3) concurrency -- contended get-or-compute yields correct results without exception.
+ */
+class SchemaJsonInternCacheSuite extends SparkFunSuite {
+
+  private def schemaOfWidth(n: Int): StructType =
+    StructType((0 until n).map(i => StructField(s"c$i", LongType, nullable = true)))
+
+  // === Invariant 1: determinism ===
+
+  test("encode is deterministic: same StructType => byte-identical output") {
+    val intern = new SchemaJsonInternCache
+    val s = schemaOfWidth(10)
+    val a = intern.encodeBytes(s)
+    val b = intern.encodeBytes(s)
+    assert(a.sameElements(b), "encodeBytes must be deterministic for equal inputs")
+    // intern is a memoizer, not a transformer
+    val raw = s.json.getBytes(StandardCharsets.UTF_8)
+    assert(a.sameElements(raw), "encodeBytes(s) must equal s.json.getBytes(UTF_8)")
+  }
+
+  test("decode is deterministic: same bytes => structurally-equal StructType") {
+    val intern = new SchemaJsonInternCache
+    val s = StructType(Seq(
+      StructField("a", IntegerType),
+      StructField("b", StringType),
+      StructField("c", LongType, nullable = false)))
+    val bytes = s.json.getBytes(StandardCharsets.UTF_8)
+    val d1 = intern.decodeStructType(bytes)
+    val d2 = intern.decodeStructType(bytes)
+    assert(d1 == s)
+    assert(d2 == s)
+    // canonical-instance contract: equal bytes => same instance (saves repeated parse cost)
+    assert(d1.eq(d2), "decodeStructType must return the same canonical instance for equal bytes")
+  }
+
+  test("encode canonicality: same StructType returns the same byte array instance") {
+    val intern = new SchemaJsonInternCache
+    val s = schemaOfWidth(5)
+    val a = intern.encodeBytes(s)
+    val b = intern.encodeBytes(s)
+    assert(a.eq(b), "encodeBytes must return the same canonical byte array for equal inputs")
+  }
+
+  // === Invariant 2: capacity ===
+
+  test("cap = 256 entries: eviction past cap does not corrupt later results") {
+    val intern = new SchemaJsonInternCache
+    val cap = 256
+    val total = cap * 4 // 1024 distinct schemas, forces ~75% miss rate
+    val schemas = (0 until total).map(i => schemaOfWidth(8 + (i % 16)))
+    schemas.foreach(intern.encodeBytes)
+    schemas.zipWithIndex.foreach {
+      case (s, i) =>
+        val cached = intern.encodeBytes(s)
+        val raw = s.json.getBytes(StandardCharsets.UTF_8)
+        assert(
+          cached.sameElements(raw),
+          s"entry $i (width=${s.length}) was corrupted across eviction cycles")
+    }
+  }
+
+  test("decode under cap pressure: >= cap distinct bytes still all decode correctly") {
+    val intern = new SchemaJsonInternCache
+    val cap = 256
+    val distinct = cap * 4
+    val pairs = (0 until distinct).map {
+      i =>
+        val s = schemaOfWidth(8 + (i % 16))
+        (s, s.json.getBytes(StandardCharsets.UTF_8))
+    }
+    // walk twice -- second walk hits a mix of evicted and live entries
+    pairs.foreach { case (_, bytes) => intern.decodeStructType(bytes) }
+    pairs.foreach {
+      case (s, bytes) =>
+        val decoded = intern.decodeStructType(bytes)
+        assert(decoded == s, s"decoded != expected for width=${s.length}")
+    }
+  }
+
+  // === Invariant 3: concurrency ===
+
+  test("concurrent get-or-compute: N threads on overlapping keys yields correct results") {
+    val intern = new SchemaJsonInternCache
+    val threads = 8
+    val keysPerThread = 200
+    val sharedKeySpace = 64 // overlap forces contention on same cache slots
+    val schemas = (0 until sharedKeySpace).map(i => schemaOfWidth(8 + (i % 12)))
+
+    val pool = Executors.newFixedThreadPool(threads)
+    val start = new CountDownLatch(1)
+    val errors = new AtomicInteger(0)
+    val random = new Random(42)
+
+    val futures = (0 until threads).map {
+      tid =>
+        val rnd = new Random(random.nextLong())
+        pool.submit(new Runnable {
+          override def run(): Unit = {
+            start.await()
+            var i = 0
+            while (i < keysPerThread) {
+              val s = schemas(rnd.nextInt(sharedKeySpace))
+              try {
+                val enc = intern.encodeBytes(s)
+                val raw = s.json.getBytes(StandardCharsets.UTF_8)
+                if (!enc.sameElements(raw)) errors.incrementAndGet()
+
+                val dec = intern.decodeStructType(raw)
+                if (dec != s) errors.incrementAndGet()
+              } catch {
+                case _: Throwable => errors.incrementAndGet()
+              }
+              i += 1
+            }
+          }
+        })
+    }
+    start.countDown()
+    try {
+      futures.foreach(_.get(60, TimeUnit.SECONDS))
+    } finally {
+      pool.shutdown()
+      assert(pool.awaitTermination(10, TimeUnit.SECONDS), "thread pool did not terminate")
+    }
+    assert(
+      errors.get() == 0,
+      s"${errors.get()} concurrent get-or-compute errors out of ${threads * keysPerThread} ops")
+  }
+}
diff --git a/backends-velox/src/test/scala/org/apache/spark/sql/execution/benchmark/ColumnarTableCachePartitionStatsBenchmark.scala b/backends-velox/src/test/scala/org/apache/spark/sql/execution/benchmark/ColumnarTableCachePartitionStatsBenchmark.scala
index 5d66da42594..3d62689285d 100644
--- a/backends-velox/src/test/scala/org/apache/spark/sql/execution/benchmark/ColumnarTableCachePartitionStatsBenchmark.scala
+++ b/backends-velox/src/test/scala/org/apache/spark/sql/execution/benchmark/ColumnarTableCachePartitionStatsBenchmark.scala
@@ -20,11 +20,16 @@ import org.apache.gluten.config.GlutenConfig
 
 import org.apache.spark.benchmark.Benchmark
 import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.execution.SchemaJsonInternCache
+import org.apache.spark.sql.types._
 import org.apache.spark.storage.StorageLevel
 
+import java.nio.charset.StandardCharsets
+
 /**
  * Benchmark to measure write/read overhead and pruning benefit of partition stats in columnar table
- * cache. To run this benchmark:
+ * cache, plus microbench coverage for the schema-codec intern cache used by
+ * `ColumnarCachedBatchSerializer`. To run this benchmark:
  * {{{
  *   1. without sbt:
  *      bin/spark-submit --class <this class> --jars <spark core test jar> <sql core test jar>
@@ -59,6 +64,163 @@ object ColumnarTableCachePartitionStatsBenchmark extends SqlBasedBenchmark {
     }
   }
 
+  // ============================================================================
+  // Schema-codec intern microbench (SchemaJsonInternCache).
+  //
+  // ColumnarCachedBatchSerializer hot paths call StructType.json on every batch
+  // write and DataType.fromJson on every batch read. The intern cache memoizes
+  // the round-trip without changing the wire format. Sections below compare two
+  // distinct method calls in the same JVM as cache off (raw codec) vs cache on
+  // (intern memoized round-trip), with no toggle on the cache class itself.
+  // ============================================================================
+
+  private val INTERN_CAP = SchemaJsonInternCache.CAP.toInt
+
+  private def schemaFixture(numCols: Int, nameLen: Int): StructType = {
+    val name = "c" + ("x" * math.max(0, nameLen - 1))
+    StructType(
+      (0 until numCols).map(i => StructField(s"$name$i", LongType, nullable = true)))
+  }
+
+  // TPC-DS store_sales-derived 23-col mixed-type fixture; realistic name shape.
+  private def realisticSchema: StructType = StructType(
+    Seq(
+      StructField("ss_sold_date_sk", IntegerType),
+      StructField("ss_sold_time_sk", IntegerType),
+      StructField("ss_item_sk", IntegerType),
+      StructField("ss_customer_sk", IntegerType),
+      StructField("ss_cdemo_sk", IntegerType),
+      StructField("ss_hdemo_sk", IntegerType),
+      StructField("ss_addr_sk", IntegerType),
+      StructField("ss_store_sk", IntegerType),
+      StructField("ss_promo_sk", IntegerType),
+      StructField("ss_ticket_number", LongType),
+      StructField("ss_quantity", IntegerType),
+      StructField("ss_wholesale_cost", DecimalType(7, 2)),
+      StructField("ss_list_price", DecimalType(7, 2)),
+      StructField("ss_sales_price", DecimalType(7, 2)),
+      StructField("ss_ext_discount_amt", DecimalType(7, 2)),
+      StructField("ss_ext_sales_price", DecimalType(7, 2)),
+      StructField("ss_ext_wholesale_cost", DecimalType(7, 2)),
+      StructField("ss_ext_list_price", DecimalType(7, 2)),
+      StructField("ss_ext_tax", DecimalType(7, 2)),
+      StructField("ss_coupon_amt", DecimalType(7, 2)),
+      StructField("ss_net_paid", DecimalType(7, 2)),
+      StructField("ss_net_paid_inc_tax", DecimalType(7, 2)),
+      StructField("ss_net_profit", DecimalType(7, 2))
+    ))
+
+  private val internSchemas: Seq[(String, StructType)] =
+    (for {
+      width <- Seq(10, 100, 1000)
+      nameLen <- Seq(1, 32)
+    } yield (s"w=$width n=$nameLen", schemaFixture(width, nameLen))) :+
+      ("tpcds-store_sales-23col" -> realisticSchema)
+
+  private def runInternEncode(label: String, schema: StructType): Unit = {
+    val N = 1L * 1000 * 1000
+    val intern = new SchemaJsonInternCache
+    val bench = new Benchmark(label, N, output = output)
+    bench.addCase("off (raw schema.json.getBytes per call)", 5) {
+      _ =>
+        var i = 0L
+        var checksum = 0L
+        while (i < N) {
+          val bytes = schema.json.getBytes(StandardCharsets.UTF_8)
+          checksum ^= bytes.length.toLong
+          i += 1
+        }
+        assert(checksum != Long.MinValue, s"checksum=$checksum")
+    }
+    bench.addCase("on  (intern.encodeBytes: cached canonical bytes)", 5) {
+      _ =>
+        var i = 0L
+        var checksum = 0L
+        while (i < N) {
+          val bytes = intern.encodeBytes(schema)
+          checksum ^= bytes.length.toLong
+          i += 1
+        }
+        assert(checksum != Long.MinValue, s"checksum=$checksum")
+    }
+    bench.run()
+  }
+
+  private def runInternDecode(label: String, schema: StructType): Unit = {
+    val N = 1L * 100 * 1000
+    val intern = new SchemaJsonInternCache
+    val jsonBytes = schema.json.getBytes(StandardCharsets.UTF_8)
+    val bench = new Benchmark(label, N, output = output)
+    bench.addCase("off (raw DataType.fromJson per call)", 5) {
+      _ =>
+        var i = 0L
+        var checksum = 0L
+        while (i < N) {
+          val s = DataType
+            .fromJson(new String(jsonBytes, StandardCharsets.UTF_8))
+            .asInstanceOf[StructType]
+          checksum ^= s.length.toLong
+          i += 1
+        }
+        assert(checksum != Long.MinValue, s"checksum=$checksum")
+    }
+    bench.addCase("on  (intern.decodeStructType: cached canonical StructType)", 5) {
+      _ =>
+        var i = 0L
+        var checksum = 0L
+        while (i < N) {
+          val s = intern.decodeStructType(jsonBytes)
+          checksum ^= s.length.toLong
+          i += 1
+        }
+        assert(checksum != Long.MinValue, s"checksum=$checksum")
+    }
+    bench.run()
+  }
+
+  // Working-set sweep across three regimes, parameterized by distinct-schema count vs cache cap:
+  //   C1: 256  schemas == cap  -> all fit, steady state
+  //   C2: 512  schemas == 2x   -> cap pressure
+  //   C3: 1024 schemas == 4x   -> churn
+  // On/off ratios depend on W-TinyLFU's frequency-based admission and the workload's repeat
+  // pattern; see the committed `-results.txt` for the actual numbers on the bench-author's
+  // environment.
+  private def runInternWorkingSetSweep(): Unit = {
+    val passes = 100
+    Seq(
+      ("C1 hit (256 schemas == cap)", INTERN_CAP),
+      ("C2 partial (512 schemas == 2x cap)", INTERN_CAP * 2),
+      ("C3 churn (1024 schemas == 4x cap)", INTERN_CAP * 4)
+    ).foreach {
+      case (label, distinctCount) =>
+        val many = (0 until distinctCount).map(i => schemaFixture(10, 8 + (i % 16)))
+        val N = many.length.toLong * passes
+        val intern = new SchemaJsonInternCache
+        val bench = new Benchmark(label, N, output = output)
+        bench.addCase("off", 5) {
+          _ =>
+            var p = 0
+            var checksum = 0L
+            while (p < passes) {
+              many.foreach(s => checksum ^= s.json.getBytes(StandardCharsets.UTF_8).length.toLong)
+              p += 1
+            }
+            assert(checksum != Long.MinValue, s"checksum=$checksum")
+        }
+        bench.addCase("on", 5) {
+          _ =>
+            var p = 0
+            var checksum = 0L
+            while (p < passes) {
+              many.foreach(s => checksum ^= intern.encodeBytes(s).length.toLong)
+              p += 1
+            }
+            assert(checksum != Long.MinValue, s"checksum=$checksum")
+        }
+        bench.run()
+    }
+  }
+
   override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
     // === Benchmark 1: write-path overhead (cache build) ===
     val buildBench = new Benchmark("table cache build", numRows, output = output)
@@ -118,5 +280,20 @@ object ColumnarTableCachePartitionStatsBenchmark extends SqlBasedBenchmark {
     readPointBench.run()
 
     spark.catalog.clearCache()
+
+    // === Benchmark 5: schema-codec intern microbench - encode (Section A) ===
+    runBenchmark("StructType JSON codec - encode (Section A)") {
+      internSchemas.foreach { case (label, sch) => runInternEncode(s"encode $label", sch) }
+    }
+
+    // === Benchmark 6: schema-codec intern microbench - decode (Section B) ===
+    runBenchmark("StructType JSON codec - decode (Section B)") {
+      internSchemas.foreach { case (label, sch) => runInternDecode(s"decode $label", sch) }
+    }
+
+    // === Benchmark 7: schema-codec intern working-set sweep (Section C) ===
+    runBenchmark("StructType JSON codec - working-set sweep (Section C)") {
+      runInternWorkingSetSweep()
+    }
   }
 }
diff --git a/benchmarks/ColumnarTableCachePartitionStatsBenchmark-results.txt b/benchmarks/ColumnarTableCachePartitionStatsBenchmark-results.txt
index a3599570d27..07828d10a5e 100644
--- a/benchmarks/ColumnarTableCachePartitionStatsBenchmark-results.txt
+++ b/benchmarks/ColumnarTableCachePartitionStatsBenchmark-results.txt
@@ -1,28 +1,162 @@
-OpenJDK 64-Bit Server VM 17.0.18+8-Ubuntu-124.04.1 on Linux 6.6.87.2-microsoft-standard-WSL2
+OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2
 AMD EPYC 7763 64-Core Processor
 table cache build:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-partitionStats off                               126425         138565       10546          0.8        1264.3       1.0X
-partitionStats on                                131431         137094        7581          0.8        1314.3       1.0X
+partitionStats off                               127107         127856         668          0.8        1271.1       1.0X
+partitionStats on                                134398         146067       10193          0.7        1344.0       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-Ubuntu-124.04.1 on Linux 6.6.87.2-microsoft-standard-WSL2
+OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2
 AMD EPYC 7763 64-Core Processor
 table cache filter+agg (high selectivity, ~0.001%):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------
-partitionStats off                                           4431           4492          70         22.6          44.3       1.0X
-partitionStats on                                            1744           1777          31         57.3          17.4       2.5X
+partitionStats off                                           4756           5283         855         21.0          47.6       1.0X
+partitionStats on                                             570            584          14        175.6           5.7       8.4X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-Ubuntu-124.04.1 on Linux 6.6.87.2-microsoft-standard-WSL2
+OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2
 AMD EPYC 7763 64-Core Processor
 table cache filter+agg (low selectivity, ~50%):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-partitionStats off                                       5332           5411          70         18.8          53.3       1.0X
-partitionStats on                                        3392           3446          90         29.5          33.9       1.6X
+partitionStats off                                       5229           5309         115         19.1          52.3       1.0X
+partitionStats on                                        3374           3385          13         29.6          33.7       1.5X
 
-OpenJDK 64-Bit Server VM 17.0.18+8-Ubuntu-124.04.1 on Linux 6.6.87.2-microsoft-standard-WSL2
+OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2
 AMD EPYC 7763 64-Core Processor
 table cache filter+agg (point lookup, 1 row):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-partitionStats off                                     4343           4489         129         23.0          43.4       1.0X
-partitionStats on                                      1686           1709          21         59.3          16.9       2.6X
+partitionStats off                                     4409           4412           3         22.7          44.1       1.0X
+partitionStats on                                       368            417          83        271.4           3.7      12.0X
+
+================================================================================================
+StructType JSON codec - encode (Section A)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2
+AMD EPYC 7763 64-Core Processor
+encode w=10 n=1:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+--------------------------------------------------------------------------------------------------------------------------------
+off (raw schema.json.getBytes per call)                    3622           3636          27          0.3        3621.8       1.0X
+on  (intern.encodeBytes: cached canonical bytes)              6              6           0        176.5           5.7     639.4X
+
+OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2
+AMD EPYC 7763 64-Core Processor
+encode w=10 n=32:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+--------------------------------------------------------------------------------------------------------------------------------
+off (raw schema.json.getBytes per call)                    4121           4238         200          0.2        4120.7       1.0X
+on  (intern.encodeBytes: cached canonical bytes)              6              6           0        165.0           6.1     679.8X
+
+OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2
+AMD EPYC 7763 64-Core Processor
+encode w=100 n=1:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+--------------------------------------------------------------------------------------------------------------------------------
+off (raw schema.json.getBytes per call)                   32890          33149         154          0.0       32890.0       1.0X
+on  (intern.encodeBytes: cached canonical bytes)              6              6           0        163.8           6.1    5386.6X
+
+OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2
+AMD EPYC 7763 64-Core Processor
+encode w=100 n=32:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+--------------------------------------------------------------------------------------------------------------------------------
+off (raw schema.json.getBytes per call)                   35756          36230         771          0.0       35755.8       1.0X
+on  (intern.encodeBytes: cached canonical bytes)              6              6           0        163.1           6.1    5832.6X
+
+OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2
+AMD EPYC 7763 64-Core Processor
+encode w=1000 n=1:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+--------------------------------------------------------------------------------------------------------------------------------
+off (raw schema.json.getBytes per call)                  332577         334072        1843          0.0      332577.3       1.0X
+on  (intern.encodeBytes: cached canonical bytes)              6              6           0        164.6           6.1   54735.5X
+
+OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2
+AMD EPYC 7763 64-Core Processor
+encode w=1000 n=32:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+--------------------------------------------------------------------------------------------------------------------------------
+off (raw schema.json.getBytes per call)                  444692         445510        1066          0.0      444691.5       1.0X
+on  (intern.encodeBytes: cached canonical bytes)              6              6           0        165.6           6.0   73661.2X
+
+OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2
+AMD EPYC 7763 64-Core Processor
+encode tpcds-store_sales-23col:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+--------------------------------------------------------------------------------------------------------------------------------
+off (raw schema.json.getBytes per call)                    8299           8334          33          0.1        8299.2       1.0X
+on  (intern.encodeBytes: cached canonical bytes)              6              6           0        164.4           6.1    1364.2X
+
+
+================================================================================================
+StructType JSON codec - decode (Section B)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2
+AMD EPYC 7763 64-Core Processor
+decode w=10 n=1:                                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------------------------
+off (raw DataType.fromJson per call)                                 1031           1210         167          0.1       10305.2       1.0X
+on  (intern.decodeStructType: cached canonical StructType)             64             64           0          1.6         637.0      16.2X
+
+OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2
+AMD EPYC 7763 64-Core Processor
+decode w=10 n=32:                                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------------------------
+off (raw DataType.fromJson per call)                                 1061           1064           2          0.1       10610.7       1.0X
+on  (intern.decodeStructType: cached canonical StructType)            160            173           8          0.6        1598.6       6.6X
+
+OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2
+AMD EPYC 7763 64-Core Processor
+decode w=100 n=1:                                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------------------------
+off (raw DataType.fromJson per call)                                 9251           9282          37          0.0       92513.1       1.0X
+on  (intern.decodeStructType: cached canonical StructType)            630            636           6          0.2        6297.9      14.7X
+
+OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2
+AMD EPYC 7763 64-Core Processor
+decode w=100 n=32:                                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------------------------
+off (raw DataType.fromJson per call)                                 9560           9655          58          0.0       95595.5       1.0X
+on  (intern.decodeStructType: cached canonical StructType)            962            999          82          0.1        9619.5       9.9X
+
+OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2
+AMD EPYC 7763 64-Core Processor
+decode w=1000 n=1:                                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------------------------
+off (raw DataType.fromJson per call)                                92256          92822        1012          0.0      922556.9       1.0X
+on  (intern.decodeStructType: cached canonical StructType)           6544           6568          20          0.0       65437.9      14.1X
+
+OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2
+AMD EPYC 7763 64-Core Processor
+decode w=1000 n=32:                                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------------------------
+off (raw DataType.fromJson per call)                                94787          95102         483          0.0      947865.7       1.0X
+on  (intern.decodeStructType: cached canonical StructType)           9940           9970          18          0.0       99401.4       9.5X
+
+OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2
+AMD EPYC 7763 64-Core Processor
+decode tpcds-store_sales-23col:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------------------------
+off (raw DataType.fromJson per call)                                 2207           2348         182          0.0       22073.8       1.0X
+on  (intern.decodeStructType: cached canonical StructType)            185            198          24          0.5        1854.1      11.9X
+
+
+================================================================================================
+StructType JSON codec - working-set sweep (Section C)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2
+AMD EPYC 7763 64-Core Processor
+C1 hit (256 schemas == cap):              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+off                                                 102            261         138          0.3        3972.9       1.0X
+on                                                    4              4           0          6.1         163.8      24.3X
+
+OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2
+AMD EPYC 7763 64-Core Processor
+C2 partial (512 schemas == 2x cap):       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+off                                                 206            464         334          0.2        4016.1       1.0X
+on                                                    9              9           0          5.4         184.2      21.8X
+
+OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2
+AMD EPYC 7763 64-Core Processor
+C3 churn (1024 schemas == 4x cap):        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+off                                                 407            471         137          0.3        3969.8       1.0X
+on                                                   17             17           0          6.1         165.3      24.0X
+