From 9c08b93957a8a9a0277499155fdbc57bb3dd8a6d Mon Sep 17 00:00:00 2001 From: bm1549 Date: Thu, 9 Apr 2026 11:07:35 -0400 Subject: [PATCH] perf: use ISO_8859_1 for ASCII fast path in ClassFile.utf() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Java 9+, new String(byte[], offset, length, ISO_8859_1) allows the JVM to adopt the byte array directly as the compact string encoding, avoiding the byte-by-byte validation step that US_ASCII requires. Since class-file UTF8 entries are almost always pure ASCII, and the code already confirms this with a scan before reaching the fast path, using ISO_8859_1 is safe and equivalent — both charsets produce identical results for bytes 0x00–0x7F. Benchmark results (class-match module, spring-web.jar dataset): Baseline: ClassFileBenchmark.testClassHeader avgt 5 782.666 ± 30.761 us/op ClassFileBenchmark.testClassOutline avgt 5 1989.423 ± 34.237 us/op After: ClassFileBenchmark.testClassHeader avgt 5 764.186 ± 97.926 us/op ClassFileBenchmark.testClassOutline avgt 5 1879.470 ± 40.856 us/op ~2.4% header improvement, ~5.5% outline improvement. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../main/java/datadog/instrument/classmatch/ClassFile.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/class-match/src/main/java/datadog/instrument/classmatch/ClassFile.java b/class-match/src/main/java/datadog/instrument/classmatch/ClassFile.java index e1e3a33..1a9904a 100644 --- a/class-match/src/main/java/datadog/instrument/classmatch/ClassFile.java +++ b/class-match/src/main/java/datadog/instrument/classmatch/ClassFile.java @@ -6,6 +6,7 @@ package datadog.instrument.classmatch; +import static java.nio.charset.StandardCharsets.ISO_8859_1; import static java.nio.charset.StandardCharsets.US_ASCII; import java.util.Arrays; @@ -353,8 +354,10 @@ private static String utf(byte[] bytecode, int utfOffset) { } if (chars == null) { - // fast-path for ASCII-only, avoids intermediate char array - return new String(bytecode, utfStart, utfLen, US_ASCII); + // fast-path for ASCII-only: use ISO_8859_1 because on Java 9+ the JVM + // can adopt the byte array directly as the compact string encoding, + // avoiding a byte-by-byte transcoding step that US_ASCII requires + return new String(bytecode, utfStart, utfLen, ISO_8859_1); } int charLen = 0;