diff --git a/backends-velox/src/main/scala/org/apache/spark/sql/execution/ColumnarCachedBatchSerializer.scala b/backends-velox/src/main/scala/org/apache/spark/sql/execution/ColumnarCachedBatchSerializer.scala index 85c9d2854c7..1da42f77762 100644 --- a/backends-velox/src/main/scala/org/apache/spark/sql/execution/ColumnarCachedBatchSerializer.scala +++ b/backends-velox/src/main/scala/org/apache/spark/sql/execution/ColumnarCachedBatchSerializer.scala @@ -53,7 +53,6 @@ import java.io.ByteArrayOutputStream import java.lang.{Double => JDouble, Float => JFloat} import java.math.{BigDecimal => JBigDecimal, BigInteger} import java.nio.{ByteBuffer, ByteOrder} -import java.nio.charset.StandardCharsets.UTF_8 import java.util.Arrays import scala.util.control.NonFatal @@ -124,7 +123,7 @@ class CachedColumnarBatchKryoSerializer extends KryoSerializer[CachedColumnarBat output.writeBoolean(false) } else { output.writeBoolean(true) - val schemaBytes = batch.schema.json.getBytes(UTF_8) + val schemaBytes = CachedColumnarBatchKryoSerializer.SchemaIntern.encodeBytes(batch.schema) output.writeInt(schemaBytes.length) output.writeBytes(schemaBytes) } @@ -220,12 +219,16 @@ class CachedColumnarBatchKryoSerializer extends KryoSerializer[CachedColumnarBat ) val schemaBytes = new Array[Byte](schemaLen) input.readBytes(schemaBytes) - DataType.fromJson(new String(schemaBytes, UTF_8)).asInstanceOf[StructType] + CachedColumnarBatchKryoSerializer.SchemaIntern.decodeStructType(schemaBytes) } } } object CachedColumnarBatchKryoSerializer { + // Process-wide schema-codec memoizer. Singleton so the cache survives across Kryo's per-stream + // serializer instances within the same JVM. + private[execution] val SchemaIntern: SchemaJsonInternCache = new SchemaJsonInternCache + // Defensive upper bound on any single length-prefixed field in the Kryo wire (payload bytes, // statsBlob, schema JSON). Tied to spark.kryoserializer.buffer.max because Kryo write itself // refuses to emit any single object larger than that ceiling, so any stream claiming a larger diff --git a/backends-velox/src/main/scala/org/apache/spark/sql/execution/SchemaJsonInternCache.scala b/backends-velox/src/main/scala/org/apache/spark/sql/execution/SchemaJsonInternCache.scala new file mode 100644 index 00000000000..fecf218c6a7 --- /dev/null +++ b/backends-velox/src/main/scala/org/apache/spark/sql/execution/SchemaJsonInternCache.scala @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.execution + +import org.apache.spark.sql.types.{DataType, StructType} + +import com.github.benmanes.caffeine.cache.{Cache, Caffeine} + +import java.nio.charset.StandardCharsets + +/** + * Process-local memoizer for `StructType <-> JSON` codec on the cached-batch hot path. Best-effort + * size-bounded Caffeine cache (W-TinyLFU); eviction recomputes via the same pure codec, so misses + * are indistinguishable from the no-cache baseline. Thread-safety via Caffeine + * `get(key, mappingFunction)`. + */ +final private[execution] class SchemaJsonInternCache { + import SchemaJsonInternCache._ + + private val encodeCache: Cache[StructType, Array[Byte]] = + Caffeine.newBuilder.maximumSize(CAP).build[StructType, Array[Byte]]() + + private val decodeCache: Cache[String, StructType] = + Caffeine.newBuilder.maximumSize(CAP).build[String, StructType]() + + /** + * Returns the canonical UTF-8 JSON byte form of `schema`. The returned array is shared with the + * cache; callers must treat it as immutable. + */ + def encodeBytes(schema: StructType): Array[Byte] = + encodeCache.get(schema, k => k.json.getBytes(StandardCharsets.UTF_8)) + + /** Returns the canonical [[StructType]] parsed from `bytes` (UTF-8 JSON). */ + def decodeStructType(bytes: Array[Byte]): StructType = { + val key = new String(bytes, StandardCharsets.UTF_8) + decodeCache.get(key, k => DataType.fromJson(k).asInstanceOf[StructType]) + } +} + +private[execution] object SchemaJsonInternCache { + // 256 entries per side. Empirically large enough to cover the unique-schema fanout of a + // typical multi-cached-table query; small enough that retained heap stays bounded even for + // wide schemas. Tune via re-running the schema-codec working-set sweep section of + // ColumnarTableCachePartitionStatsBenchmark. + private[execution] val CAP: Long = 256L +} diff --git a/backends-velox/src/test/scala/org/apache/spark/sql/execution/SchemaJsonInternCacheSuite.scala b/backends-velox/src/test/scala/org/apache/spark/sql/execution/SchemaJsonInternCacheSuite.scala new file mode 100644 index 00000000000..6ef205cafde --- /dev/null +++ b/backends-velox/src/test/scala/org/apache/spark/sql/execution/SchemaJsonInternCacheSuite.scala @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.execution + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructField, StructType} + +import java.nio.charset.StandardCharsets +import java.util.concurrent.{CountDownLatch, Executors, TimeUnit} +import java.util.concurrent.atomic.AtomicInteger + +import scala.util.Random + +/** + * Invariants for [[SchemaJsonInternCache]]: (1) determinism -- equal inputs yield byte-identical / + * canonical-instance outputs; (2) capacity -- size-bounded cap = 256, eviction never corrupts later + * results; (3) concurrency -- contended get-or-compute yields correct results without exception. + */ +class SchemaJsonInternCacheSuite extends SparkFunSuite { + + private def schemaOfWidth(n: Int): StructType = + StructType((0 until n).map(i => StructField(s"c$i", LongType, nullable = true))) + + // === Invariant 1: determinism === + + test("encode is deterministic: same StructType => byte-identical output") { + val intern = new SchemaJsonInternCache + val s = schemaOfWidth(10) + val a = intern.encodeBytes(s) + val b = intern.encodeBytes(s) + assert(a.sameElements(b), "encodeBytes must be deterministic for equal inputs") + // intern is a memoizer, not a transformer + val raw = s.json.getBytes(StandardCharsets.UTF_8) + assert(a.sameElements(raw), "encodeBytes(s) must equal s.json.getBytes(UTF_8)") + } + + test("decode is deterministic: same bytes => structurally-equal StructType") { + val intern = new SchemaJsonInternCache + val s = StructType(Seq( + StructField("a", IntegerType), + StructField("b", StringType), + StructField("c", LongType, nullable = false))) + val bytes = s.json.getBytes(StandardCharsets.UTF_8) + val d1 = intern.decodeStructType(bytes) + val d2 = intern.decodeStructType(bytes) + assert(d1 == s) + assert(d2 == s) + // canonical-instance contract: equal bytes => same instance (saves repeated parse cost) + assert(d1.eq(d2), "decodeStructType must return the same canonical instance for equal bytes") + } + + test("encode canonicality: same StructType returns the same byte array instance") { + val intern = new SchemaJsonInternCache + val s = schemaOfWidth(5) + val a = intern.encodeBytes(s) + val b = intern.encodeBytes(s) + assert(a.eq(b), "encodeBytes must return the same canonical byte array for equal inputs") + } + + // === Invariant 2: capacity === + + test("cap = 256 entries: eviction past cap does not corrupt later results") { + val intern = new SchemaJsonInternCache + val cap = 256 + val total = cap * 4 // 1024 distinct schemas, forces ~75% miss rate + val schemas = (0 until total).map(i => schemaOfWidth(8 + (i % 16))) + schemas.foreach(intern.encodeBytes) + schemas.zipWithIndex.foreach { + case (s, i) => + val cached = intern.encodeBytes(s) + val raw = s.json.getBytes(StandardCharsets.UTF_8) + assert( + cached.sameElements(raw), + s"entry $i (width=${s.length}) was corrupted across eviction cycles") + } + } + + test("decode under cap pressure: >= cap distinct bytes still all decode correctly") { + val intern = new SchemaJsonInternCache + val cap = 256 + val distinct = cap * 4 + val pairs = (0 until distinct).map { + i => + val s = schemaOfWidth(8 + (i % 16)) + (s, s.json.getBytes(StandardCharsets.UTF_8)) + } + // walk twice -- second walk hits a mix of evicted and live entries + pairs.foreach { case (_, bytes) => intern.decodeStructType(bytes) } + pairs.foreach { + case (s, bytes) => + val decoded = intern.decodeStructType(bytes) + assert(decoded == s, s"decoded != expected for width=${s.length}") + } + } + + // === Invariant 3: concurrency === + + test("concurrent get-or-compute: N threads on overlapping keys yields correct results") { + val intern = new SchemaJsonInternCache + val threads = 8 + val keysPerThread = 200 + val sharedKeySpace = 64 // overlap forces contention on same cache slots + val schemas = (0 until sharedKeySpace).map(i => schemaOfWidth(8 + (i % 12))) + + val pool = Executors.newFixedThreadPool(threads) + val start = new CountDownLatch(1) + val errors = new AtomicInteger(0) + val random = new Random(42) + + val futures = (0 until threads).map { + tid => + val rnd = new Random(random.nextLong()) + pool.submit(new Runnable { + override def run(): Unit = { + start.await() + var i = 0 + while (i < keysPerThread) { + val s = schemas(rnd.nextInt(sharedKeySpace)) + try { + val enc = intern.encodeBytes(s) + val raw = s.json.getBytes(StandardCharsets.UTF_8) + if (!enc.sameElements(raw)) errors.incrementAndGet() + + val dec = intern.decodeStructType(raw) + if (dec != s) errors.incrementAndGet() + } catch { + case _: Throwable => errors.incrementAndGet() + } + i += 1 + } + } + }) + } + start.countDown() + try { + futures.foreach(_.get(60, TimeUnit.SECONDS)) + } finally { + pool.shutdown() + assert(pool.awaitTermination(10, TimeUnit.SECONDS), "thread pool did not terminate") + } + assert( + errors.get() == 0, + s"${errors.get()} concurrent get-or-compute errors out of ${threads * keysPerThread} ops") + } +} diff --git a/backends-velox/src/test/scala/org/apache/spark/sql/execution/benchmark/ColumnarTableCachePartitionStatsBenchmark.scala b/backends-velox/src/test/scala/org/apache/spark/sql/execution/benchmark/ColumnarTableCachePartitionStatsBenchmark.scala index 5d66da42594..3d62689285d 100644 --- a/backends-velox/src/test/scala/org/apache/spark/sql/execution/benchmark/ColumnarTableCachePartitionStatsBenchmark.scala +++ b/backends-velox/src/test/scala/org/apache/spark/sql/execution/benchmark/ColumnarTableCachePartitionStatsBenchmark.scala @@ -20,11 +20,16 @@ import org.apache.gluten.config.GlutenConfig import org.apache.spark.benchmark.Benchmark import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.execution.SchemaJsonInternCache +import org.apache.spark.sql.types._ import org.apache.spark.storage.StorageLevel +import java.nio.charset.StandardCharsets + /** * Benchmark to measure write/read overhead and pruning benefit of partition stats in columnar table - * cache. To run this benchmark: + * cache, plus microbench coverage for the schema-codec intern cache used by + * `ColumnarCachedBatchSerializer`. To run this benchmark: * {{{ * 1. without sbt: * bin/spark-submit --class --jars @@ -59,6 +64,163 @@ object ColumnarTableCachePartitionStatsBenchmark extends SqlBasedBenchmark { } } + // ============================================================================ + // Schema-codec intern microbench (SchemaJsonInternCache). + // + // ColumnarCachedBatchSerializer hot paths call StructType.json on every batch + // write and DataType.fromJson on every batch read. The intern cache memoizes + // the round-trip without changing the wire format. Sections below compare two + // distinct method calls in the same JVM as cache off (raw codec) vs cache on + // (intern memoized round-trip), with no toggle on the cache class itself. + // ============================================================================ + + private val INTERN_CAP = SchemaJsonInternCache.CAP.toInt + + private def schemaFixture(numCols: Int, nameLen: Int): StructType = { + val name = "c" + ("x" * math.max(0, nameLen - 1)) + StructType( + (0 until numCols).map(i => StructField(s"$name$i", LongType, nullable = true))) + } + + // TPC-DS store_sales-derived 23-col mixed-type fixture; realistic name shape. + private def realisticSchema: StructType = StructType( + Seq( + StructField("ss_sold_date_sk", IntegerType), + StructField("ss_sold_time_sk", IntegerType), + StructField("ss_item_sk", IntegerType), + StructField("ss_customer_sk", IntegerType), + StructField("ss_cdemo_sk", IntegerType), + StructField("ss_hdemo_sk", IntegerType), + StructField("ss_addr_sk", IntegerType), + StructField("ss_store_sk", IntegerType), + StructField("ss_promo_sk", IntegerType), + StructField("ss_ticket_number", LongType), + StructField("ss_quantity", IntegerType), + StructField("ss_wholesale_cost", DecimalType(7, 2)), + StructField("ss_list_price", DecimalType(7, 2)), + StructField("ss_sales_price", DecimalType(7, 2)), + StructField("ss_ext_discount_amt", DecimalType(7, 2)), + StructField("ss_ext_sales_price", DecimalType(7, 2)), + StructField("ss_ext_wholesale_cost", DecimalType(7, 2)), + StructField("ss_ext_list_price", DecimalType(7, 2)), + StructField("ss_ext_tax", DecimalType(7, 2)), + StructField("ss_coupon_amt", DecimalType(7, 2)), + StructField("ss_net_paid", DecimalType(7, 2)), + StructField("ss_net_paid_inc_tax", DecimalType(7, 2)), + StructField("ss_net_profit", DecimalType(7, 2)) + )) + + private val internSchemas: Seq[(String, StructType)] = + (for { + width <- Seq(10, 100, 1000) + nameLen <- Seq(1, 32) + } yield (s"w=$width n=$nameLen", schemaFixture(width, nameLen))) :+ + ("tpcds-store_sales-23col" -> realisticSchema) + + private def runInternEncode(label: String, schema: StructType): Unit = { + val N = 1L * 1000 * 1000 + val intern = new SchemaJsonInternCache + val bench = new Benchmark(label, N, output = output) + bench.addCase("off (raw schema.json.getBytes per call)", 5) { + _ => + var i = 0L + var checksum = 0L + while (i < N) { + val bytes = schema.json.getBytes(StandardCharsets.UTF_8) + checksum ^= bytes.length.toLong + i += 1 + } + assert(checksum != Long.MinValue, s"checksum=$checksum") + } + bench.addCase("on (intern.encodeBytes: cached canonical bytes)", 5) { + _ => + var i = 0L + var checksum = 0L + while (i < N) { + val bytes = intern.encodeBytes(schema) + checksum ^= bytes.length.toLong + i += 1 + } + assert(checksum != Long.MinValue, s"checksum=$checksum") + } + bench.run() + } + + private def runInternDecode(label: String, schema: StructType): Unit = { + val N = 1L * 100 * 1000 + val intern = new SchemaJsonInternCache + val jsonBytes = schema.json.getBytes(StandardCharsets.UTF_8) + val bench = new Benchmark(label, N, output = output) + bench.addCase("off (raw DataType.fromJson per call)", 5) { + _ => + var i = 0L + var checksum = 0L + while (i < N) { + val s = DataType + .fromJson(new String(jsonBytes, StandardCharsets.UTF_8)) + .asInstanceOf[StructType] + checksum ^= s.length.toLong + i += 1 + } + assert(checksum != Long.MinValue, s"checksum=$checksum") + } + bench.addCase("on (intern.decodeStructType: cached canonical StructType)", 5) { + _ => + var i = 0L + var checksum = 0L + while (i < N) { + val s = intern.decodeStructType(jsonBytes) + checksum ^= s.length.toLong + i += 1 + } + assert(checksum != Long.MinValue, s"checksum=$checksum") + } + bench.run() + } + + // Working-set sweep across three regimes, parameterized by distinct-schema count vs cache cap: + // C1: 256 schemas == cap -> all fit, steady state + // C2: 512 schemas == 2x -> cap pressure + // C3: 1024 schemas == 4x -> churn + // On/off ratios depend on W-TinyLFU's frequency-based admission and the workload's repeat + // pattern; see the committed `-results.txt` for the actual numbers on the bench-author's + // environment. + private def runInternWorkingSetSweep(): Unit = { + val passes = 100 + Seq( + ("C1 hit (256 schemas == cap)", INTERN_CAP), + ("C2 partial (512 schemas == 2x cap)", INTERN_CAP * 2), + ("C3 churn (1024 schemas == 4x cap)", INTERN_CAP * 4) + ).foreach { + case (label, distinctCount) => + val many = (0 until distinctCount).map(i => schemaFixture(10, 8 + (i % 16))) + val N = many.length.toLong * passes + val intern = new SchemaJsonInternCache + val bench = new Benchmark(label, N, output = output) + bench.addCase("off", 5) { + _ => + var p = 0 + var checksum = 0L + while (p < passes) { + many.foreach(s => checksum ^= s.json.getBytes(StandardCharsets.UTF_8).length.toLong) + p += 1 + } + assert(checksum != Long.MinValue, s"checksum=$checksum") + } + bench.addCase("on", 5) { + _ => + var p = 0 + var checksum = 0L + while (p < passes) { + many.foreach(s => checksum ^= intern.encodeBytes(s).length.toLong) + p += 1 + } + assert(checksum != Long.MinValue, s"checksum=$checksum") + } + bench.run() + } + } + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { // === Benchmark 1: write-path overhead (cache build) === val buildBench = new Benchmark("table cache build", numRows, output = output) @@ -118,5 +280,20 @@ object ColumnarTableCachePartitionStatsBenchmark extends SqlBasedBenchmark { readPointBench.run() spark.catalog.clearCache() + + // === Benchmark 5: schema-codec intern microbench - encode (Section A) === + runBenchmark("StructType JSON codec - encode (Section A)") { + internSchemas.foreach { case (label, sch) => runInternEncode(s"encode $label", sch) } + } + + // === Benchmark 6: schema-codec intern microbench - decode (Section B) === + runBenchmark("StructType JSON codec - decode (Section B)") { + internSchemas.foreach { case (label, sch) => runInternDecode(s"decode $label", sch) } + } + + // === Benchmark 7: schema-codec intern working-set sweep (Section C) === + runBenchmark("StructType JSON codec - working-set sweep (Section C)") { + runInternWorkingSetSweep() + } } } diff --git a/benchmarks/ColumnarTableCachePartitionStatsBenchmark-results.txt b/benchmarks/ColumnarTableCachePartitionStatsBenchmark-results.txt index a3599570d27..07828d10a5e 100644 --- a/benchmarks/ColumnarTableCachePartitionStatsBenchmark-results.txt +++ b/benchmarks/ColumnarTableCachePartitionStatsBenchmark-results.txt @@ -1,28 +1,162 @@ -OpenJDK 64-Bit Server VM 17.0.18+8-Ubuntu-124.04.1 on Linux 6.6.87.2-microsoft-standard-WSL2 +OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2 AMD EPYC 7763 64-Core Processor table cache build: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -partitionStats off 126425 138565 10546 0.8 1264.3 1.0X -partitionStats on 131431 137094 7581 0.8 1314.3 1.0X +partitionStats off 127107 127856 668 0.8 1271.1 1.0X +partitionStats on 134398 146067 10193 0.7 1344.0 0.9X -OpenJDK 64-Bit Server VM 17.0.18+8-Ubuntu-124.04.1 on Linux 6.6.87.2-microsoft-standard-WSL2 +OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2 AMD EPYC 7763 64-Core Processor table cache filter+agg (high selectivity, ~0.001%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ---------------------------------------------------------------------------------------------------------------------------------- -partitionStats off 4431 4492 70 22.6 44.3 1.0X -partitionStats on 1744 1777 31 57.3 17.4 2.5X +partitionStats off 4756 5283 855 21.0 47.6 1.0X +partitionStats on 570 584 14 175.6 5.7 8.4X -OpenJDK 64-Bit Server VM 17.0.18+8-Ubuntu-124.04.1 on Linux 6.6.87.2-microsoft-standard-WSL2 +OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2 AMD EPYC 7763 64-Core Processor table cache filter+agg (low selectivity, ~50%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------------ -partitionStats off 5332 5411 70 18.8 53.3 1.0X -partitionStats on 3392 3446 90 29.5 33.9 1.6X +partitionStats off 5229 5309 115 19.1 52.3 1.0X +partitionStats on 3374 3385 13 29.6 33.7 1.5X -OpenJDK 64-Bit Server VM 17.0.18+8-Ubuntu-124.04.1 on Linux 6.6.87.2-microsoft-standard-WSL2 +OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2 AMD EPYC 7763 64-Core Processor table cache filter+agg (point lookup, 1 row): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ---------------------------------------------------------------------------------------------------------------------------- -partitionStats off 4343 4489 129 23.0 43.4 1.0X -partitionStats on 1686 1709 21 59.3 16.9 2.6X +partitionStats off 4409 4412 3 22.7 44.1 1.0X +partitionStats on 368 417 83 271.4 3.7 12.0X + +================================================================================================ +StructType JSON codec - encode (Section A) +================================================================================================ + +OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2 +AMD EPYC 7763 64-Core Processor +encode w=10 n=1: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +-------------------------------------------------------------------------------------------------------------------------------- +off (raw schema.json.getBytes per call) 3622 3636 27 0.3 3621.8 1.0X +on (intern.encodeBytes: cached canonical bytes) 6 6 0 176.5 5.7 639.4X + +OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2 +AMD EPYC 7763 64-Core Processor +encode w=10 n=32: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +-------------------------------------------------------------------------------------------------------------------------------- +off (raw schema.json.getBytes per call) 4121 4238 200 0.2 4120.7 1.0X +on (intern.encodeBytes: cached canonical bytes) 6 6 0 165.0 6.1 679.8X + +OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2 +AMD EPYC 7763 64-Core Processor +encode w=100 n=1: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +-------------------------------------------------------------------------------------------------------------------------------- +off (raw schema.json.getBytes per call) 32890 33149 154 0.0 32890.0 1.0X +on (intern.encodeBytes: cached canonical bytes) 6 6 0 163.8 6.1 5386.6X + +OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2 +AMD EPYC 7763 64-Core Processor +encode w=100 n=32: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +-------------------------------------------------------------------------------------------------------------------------------- +off (raw schema.json.getBytes per call) 35756 36230 771 0.0 35755.8 1.0X +on (intern.encodeBytes: cached canonical bytes) 6 6 0 163.1 6.1 5832.6X + +OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2 +AMD EPYC 7763 64-Core Processor +encode w=1000 n=1: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +-------------------------------------------------------------------------------------------------------------------------------- +off (raw schema.json.getBytes per call) 332577 334072 1843 0.0 332577.3 1.0X +on (intern.encodeBytes: cached canonical bytes) 6 6 0 164.6 6.1 54735.5X + +OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2 +AMD EPYC 7763 64-Core Processor +encode w=1000 n=32: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +-------------------------------------------------------------------------------------------------------------------------------- +off (raw schema.json.getBytes per call) 444692 445510 1066 0.0 444691.5 1.0X +on (intern.encodeBytes: cached canonical bytes) 6 6 0 165.6 6.0 73661.2X + +OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2 +AMD EPYC 7763 64-Core Processor +encode tpcds-store_sales-23col: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +-------------------------------------------------------------------------------------------------------------------------------- +off (raw schema.json.getBytes per call) 8299 8334 33 0.1 8299.2 1.0X +on (intern.encodeBytes: cached canonical bytes) 6 6 0 164.4 6.1 1364.2X + + +================================================================================================ +StructType JSON codec - decode (Section B) +================================================================================================ + +OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2 +AMD EPYC 7763 64-Core Processor +decode w=10 n=1: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------------------ +off (raw DataType.fromJson per call) 1031 1210 167 0.1 10305.2 1.0X +on (intern.decodeStructType: cached canonical StructType) 64 64 0 1.6 637.0 16.2X + +OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2 +AMD EPYC 7763 64-Core Processor +decode w=10 n=32: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------------------ +off (raw DataType.fromJson per call) 1061 1064 2 0.1 10610.7 1.0X +on (intern.decodeStructType: cached canonical StructType) 160 173 8 0.6 1598.6 6.6X + +OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2 +AMD EPYC 7763 64-Core Processor +decode w=100 n=1: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------------------ +off (raw DataType.fromJson per call) 9251 9282 37 0.0 92513.1 1.0X +on (intern.decodeStructType: cached canonical StructType) 630 636 6 0.2 6297.9 14.7X + +OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2 +AMD EPYC 7763 64-Core Processor +decode w=100 n=32: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------------------ +off (raw DataType.fromJson per call) 9560 9655 58 0.0 95595.5 1.0X +on (intern.decodeStructType: cached canonical StructType) 962 999 82 0.1 9619.5 9.9X + +OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2 +AMD EPYC 7763 64-Core Processor +decode w=1000 n=1: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------------------ +off (raw DataType.fromJson per call) 92256 92822 1012 0.0 922556.9 1.0X +on (intern.decodeStructType: cached canonical StructType) 6544 6568 20 0.0 65437.9 14.1X + +OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2 +AMD EPYC 7763 64-Core Processor +decode w=1000 n=32: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------------------ +off (raw DataType.fromJson per call) 94787 95102 483 0.0 947865.7 1.0X +on (intern.decodeStructType: cached canonical StructType) 9940 9970 18 0.0 99401.4 9.5X + +OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2 +AMD EPYC 7763 64-Core Processor +decode tpcds-store_sales-23col: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------------------ +off (raw DataType.fromJson per call) 2207 2348 182 0.0 22073.8 1.0X +on (intern.decodeStructType: cached canonical StructType) 185 198 24 0.5 1854.1 11.9X + + +================================================================================================ +StructType JSON codec - working-set sweep (Section C) +================================================================================================ + +OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2 +AMD EPYC 7763 64-Core Processor +C1 hit (256 schemas == cap): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +off 102 261 138 0.3 3972.9 1.0X +on 4 4 0 6.1 163.8 24.3X + +OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2 +AMD EPYC 7763 64-Core Processor +C2 partial (512 schemas == 2x cap): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +off 206 464 334 0.2 4016.1 1.0X +on 9 9 0 5.4 184.2 21.8X + +OpenJDK 64-Bit Server VM 17.0.18+8 on Linux 6.6.87.2-microsoft-standard-WSL2 +AMD EPYC 7763 64-Core Processor +C3 churn (1024 schemas == 4x cap): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +off 407 471 137 0.3 3969.8 1.0X +on 17 17 0 6.1 165.3 24.0X +