From 49d9f792bbd5f8e92620ba09c0a56ace79fd1d67 Mon Sep 17 00:00:00 2001 From: "alejandro.gonzalez" Date: Wed, 4 Feb 2026 10:22:32 +0100 Subject: [PATCH 1/4] First Version --- .../benchmark/NumericConversionBenchmark.java | 221 ++++++++++++++++++ .../appsec/gateway/AppSecRequestContext.java | 65 +++++- .../AppSecRequestContextSpecification.groovy | 95 ++++++++ 3 files changed, 376 insertions(+), 5 deletions(-) create mode 100644 dd-java-agent/appsec/src/jmh/java/datadog/appsec/benchmark/NumericConversionBenchmark.java diff --git a/dd-java-agent/appsec/src/jmh/java/datadog/appsec/benchmark/NumericConversionBenchmark.java b/dd-java-agent/appsec/src/jmh/java/datadog/appsec/benchmark/NumericConversionBenchmark.java new file mode 100644 index 00000000000..9d4568ff74a --- /dev/null +++ b/dd-java-agent/appsec/src/jmh/java/datadog/appsec/benchmark/NumericConversionBenchmark.java @@ -0,0 +1,221 @@ +package datadog.appsec.benchmark; + +import static java.util.Collections.singletonMap; +import static java.util.concurrent.TimeUnit.NANOSECONDS; +import static java.util.concurrent.TimeUnit.SECONDS; + +import com.datadog.appsec.gateway.AppSecRequestContext; +import datadog.trace.api.internal.TraceSegment; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +/** + * Benchmark for numeric attribute conversion performance (issue #10494). + * + *

Tests the optimization that replaces exception-driven parsing with fast-path validation to + * avoid NumberFormatException overhead on invalid inputs. + * + *

Expected results: - Valid numeric inputs: no regression (~100-200ns/op) - Invalid inputs: + * 10-100x improvement (from ~1000ns+ with exceptions to <100ns without) - Empty/whitespace: fast + * rejection (<50ns/op) + */ +@State(Scope.Benchmark) +@Warmup(iterations = 4, time = 2, timeUnit = SECONDS) +@Measurement(iterations = 5, time = 1, timeUnit = SECONDS) +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(NANOSECONDS) +@Fork(value = 3) +public class NumericConversionBenchmark { + + static { + BenchmarkUtil.disableLogging(); + } + + private AppSecRequestContext context; + private TraceSegment mockTraceSegment; + + @Setup(Level.Iteration) + public void setUp() { + context = new AppSecRequestContext(); + // Use NoOp TraceSegment to minimize overhead + mockTraceSegment = TraceSegment.NoOp.INSTANCE; + } + + @Benchmark + public void validInteger(Blackhole blackhole) { + context.reportDerivatives(singletonMap("test_attr", singletonMap("value", "42"))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + @Benchmark + public void validIntegerNegative(Blackhole blackhole) { + context.reportDerivatives(singletonMap("test_attr", singletonMap("value", "-12345"))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + @Benchmark + public void validIntegerWithSign(Blackhole blackhole) { + context.reportDerivatives(singletonMap("test_attr", singletonMap("value", "+999"))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + @Benchmark + public void validDecimal(Blackhole blackhole) { + context.reportDerivatives(singletonMap("test_attr", singletonMap("value", "3.14"))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + @Benchmark + public void validDecimalNegative(Blackhole blackhole) { + context.reportDerivatives(singletonMap("test_attr", singletonMap("value", "-99.5"))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + @Benchmark + public void validLargeNumber(Blackhole blackhole) { + context.reportDerivatives( + singletonMap("test_attr", singletonMap("value", "9223372036854775807"))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + // Invalid inputs - these should show the biggest improvement (no exceptions thrown) + @Benchmark + public void invalidAlphabetic(Blackhole blackhole) { + context.reportDerivatives(singletonMap("test_attr", singletonMap("value", "not_a_number"))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + @Benchmark + public void invalidAlphanumeric(Blackhole blackhole) { + context.reportDerivatives(singletonMap("test_attr", singletonMap("value", "12x34"))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + @Benchmark + public void invalidMultipleDecimals(Blackhole blackhole) { + context.reportDerivatives(singletonMap("test_attr", singletonMap("value", "3.14.15"))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + @Benchmark + public void invalidHexFormat(Blackhole blackhole) { + context.reportDerivatives(singletonMap("test_attr", singletonMap("value", "0x10"))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + @Benchmark + public void invalidScientific(Blackhole blackhole) { + context.reportDerivatives(singletonMap("test_attr", singletonMap("value", "1e10"))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + @Benchmark + public void invalidSpecialChars(Blackhole blackhole) { + context.reportDerivatives(singletonMap("test_attr", singletonMap("value", "$100"))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + // Whitespace handling - now works correctly after optimization (issue #10494) + @Benchmark + public void whitespaceLeading(Blackhole blackhole) { + context.reportDerivatives(singletonMap("test_attr", singletonMap("value", " 42"))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + @Benchmark + public void whitespaceTrailing(Blackhole blackhole) { + context.reportDerivatives(singletonMap("test_attr", singletonMap("value", "42 "))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + @Benchmark + public void whitespaceBoth(Blackhole blackhole) { + context.reportDerivatives(singletonMap("test_attr", singletonMap("value", " 42 "))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + @Benchmark + public void whitespaceTabNewline(Blackhole blackhole) { + context.reportDerivatives(singletonMap("test_attr", singletonMap("value", "\t100\n"))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + // Empty/null - fast rejection + @Benchmark + public void emptyString(Blackhole blackhole) { + context.reportDerivatives(singletonMap("test_attr", singletonMap("value", ""))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + @Benchmark + public void whitespaceOnly(Blackhole blackhole) { + context.reportDerivatives(singletonMap("test_attr", singletonMap("value", " "))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + // Overflow cases - should handle gracefully without exceptions + @Benchmark + public void overflowLongMax(Blackhole blackhole) { + context.reportDerivatives( + singletonMap("test_attr", singletonMap("value", "9223372036854775808"))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + @Benchmark + public void overflowVeryLarge(Blackhole blackhole) { + context.reportDerivatives( + singletonMap("test_attr", singletonMap("value", "99999999999999999999999"))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + // Mixed workload - realistic scenario with 80% invalid, 20% valid + @Benchmark + public void mixedWorkload(Blackhole blackhole) { + // Invalid (80%) + context.reportDerivatives(singletonMap("attr1", singletonMap("value", "invalid"))); + blackhole.consume(context.commitDerivatives(mockTraceSegment)); + + context.reportDerivatives(singletonMap("attr2", singletonMap("value", "abc123"))); + blackhole.consume(context.commitDerivatives(mockTraceSegment)); + + context.reportDerivatives(singletonMap("attr3", singletonMap("value", "0x10"))); + blackhole.consume(context.commitDerivatives(mockTraceSegment)); + + context.reportDerivatives(singletonMap("attr4", singletonMap("value", ""))); + blackhole.consume(context.commitDerivatives(mockTraceSegment)); + + // Valid (20%) + context.reportDerivatives(singletonMap("attr5", singletonMap("value", "42"))); + blackhole.consume(context.commitDerivatives(mockTraceSegment)); + } +} diff --git a/dd-java-agent/appsec/src/main/java/com/datadog/appsec/gateway/AppSecRequestContext.java b/dd-java-agent/appsec/src/main/java/com/datadog/appsec/gateway/AppSecRequestContext.java index 2b197756697..dae865eaddd 100644 --- a/dd-java-agent/appsec/src/main/java/com/datadog/appsec/gateway/AppSecRequestContext.java +++ b/dd-java-agent/appsec/src/main/java/com/datadog/appsec/gateway/AppSecRequestContext.java @@ -729,20 +729,75 @@ List getStackTraces() { /** * Attempts to parse a string value as a number. Returns the parsed number if successful, null * otherwise. Tries to parse as integer first, then as double if it contains a decimal point. + * + *

This method uses fast-path validation to avoid throwing NumberFormatException for invalid + * inputs, significantly reducing overhead when processing non-numeric attribute values (issue + * #10494). The method trims whitespace before validation, allowing strings like " 42 " to parse + * successfully. + * + * @param value the string to parse + * @return a Long or Double if the string is a valid number, null otherwise */ private static Number convertToNumericAttribute(String value) { if (value == null || value.isEmpty()) { return null; } + + // Trim whitespace to handle common cases like " 42 " or "\t100\n" + String trimmed = value.trim(); + if (trimmed.isEmpty()) { + return null; + } + + // Fast-path validation: check if the string looks like a valid number + // before attempting to parse, to avoid expensive NumberFormatException throws + boolean hasDecimal = false; + int startIdx = 0; + int length = trimmed.length(); + + // Handle optional leading sign + char firstChar = trimmed.charAt(0); + if (firstChar == '+' || firstChar == '-') { + startIdx = 1; + // Sign-only strings like "+" or "-" are not valid numbers + if (startIdx >= length) { + return null; + } + } + + // Validate that remaining characters are digits or a single decimal point + boolean hasDigits = false; + for (int i = startIdx; i < length; i++) { + char c = trimmed.charAt(i); + if (c == '.') { + if (hasDecimal) { + // Multiple decimal points (e.g., "3.14.15") - invalid + return null; + } + hasDecimal = true; + } else if (c >= '0' && c <= '9') { + hasDigits = true; + } else { + // Non-digit, non-decimal character (e.g., "12x34", "abc") - invalid + return null; + } + } + + // Must have at least one digit (reject strings like "." or "+.") + if (!hasDigits) { + return null; + } + + // Now attempt parsing - the try-catch remains as a fallback for overflow cases + // (e.g., Long.MAX_VALUE+1) or other edge cases that passed pre-validation try { - // Check if it contains a decimal point to determine if it's a double - if (value.contains(".")) { - return Double.parseDouble(value); + if (hasDecimal) { + return Double.parseDouble(trimmed); } else { - // Try to parse as integer first - return Long.parseLong(value); + return Long.parseLong(trimmed); } } catch (NumberFormatException e) { + // Overflow or other parse failure despite pre-check return null; } } diff --git a/dd-java-agent/appsec/src/test/groovy/com/datadog/appsec/gateway/AppSecRequestContextSpecification.groovy b/dd-java-agent/appsec/src/test/groovy/com/datadog/appsec/gateway/AppSecRequestContextSpecification.groovy index ae0d3736bca..93f33a267c4 100644 --- a/dd-java-agent/appsec/src/test/groovy/com/datadog/appsec/gateway/AppSecRequestContextSpecification.groovy +++ b/dd-java-agent/appsec/src/test/groovy/com/datadog/appsec/gateway/AppSecRequestContextSpecification.groovy @@ -655,6 +655,101 @@ class AppSecRequestContextSpecification extends DDSpecification { } } + def "test numeric conversion with edge cases: #description"() { + given: + def context = new AppSecRequestContext() + def mockTraceSegment = Mock(datadog.trace.api.internal.TraceSegment) + def committedTags = [:] + + mockTraceSegment.setTagTop(_ as String, _ as Object) >> { String key, Object value -> + committedTags[key] = value + } + + when: + context.reportDerivatives(['test_attr': ['value': inputValue]]) + context.commitDerivatives(mockTraceSegment) + + then: + // When conversion fails (expectedValue is null), the original string value should be preserved + // When conversion succeeds, the converted numeric value should be used + committedTags['test_attr'] == (expectedValue != null ? expectedValue : inputValue) + + where: + description | inputValue | expectedValue + // Valid integers + 'zero' | '0' | 0L + 'positive integer' | '42' | 42L + 'negative integer' | '-100' | -100L + 'integer with plus sign' | '+999' | 999L + 'large valid long' | '9223372036854775807' | 9223372036854775807L + 'large negative long' | '-9223372036854775808' | -9223372036854775808L + + // Valid decimals + 'simple decimal' | '3.14' | 3.14d + 'negative decimal' | '-0.5' | -0.5d + 'decimal with plus sign' | '+99.99' | 99.99d + 'zero decimal' | '0.0' | 0.0d + 'decimal with many digits' | '123.456789' | 123.456789d + + // Whitespace handling (should now parse after trim - issue #10494 fix) + 'leading whitespace integer' | ' 42' | 42L + 'trailing whitespace integer' | '42 ' | 42L + 'both whitespace integer' | ' 42 ' | 42L + 'tab and newline whitespace' | '\t100\n' | 100L + 'multiple spaces decimal' | ' -3.14 ' | -3.14d + + // Empty and null + 'null value' | null | null + 'empty string' | '' | null + 'whitespace only' | ' ' | null + 'tab only' | '\t' | null + + // Invalid formats (should return null, original string preserved) + 'alphabetic string' | 'abc' | null + 'alphanumeric string' | '12x34' | null + 'multiple decimals' | '3.14.15' | null + 'multiple signs' | '+-5' | null + 'sign in middle' | '12-34' | null + + // Sign-only strings + 'plus sign only' | '+' | null + 'minus sign only' | '-' | null + 'plus with whitespace' | ' + ' | null + + // Overflow cases (should return null gracefully) + 'long overflow positive' | '9223372036854775808' | null + 'long overflow negative' | '-9223372036854775809' | null + 'very large number' | '99999999999999999999999' | null + + // Exotic number formats (should return null - not supported) + 'hexadecimal' | '0x10' | null + 'scientific notation' | '1e10' | null + 'binary' | '0b1010' | null + 'octal' | '0777' | 777L + + // Edge decimal cases (note: .5 and 5. are valid Java double literals) + 'decimal starts with dot' | '.5' | 0.5d + 'decimal ends with dot' | '5.' | 5.0d + 'dot only' | '.' | null + 'multiple dots' | '...' | null + + // Regression - ensure existing valid formats still work + 'regression valid integer' | '100' | 100L + 'regression valid decimal' | '99.5' | 99.5d + 'regression negative' | '-50' | -50L + + // Special characters + 'comma separator' | '1,000' | null + 'underscore separator' | '1_000' | null + 'currency symbol' | '$100' | null + 'percentage' | '50%' | null + + // Edge cases with zeros + 'zero with plus' | '+0' | 0L + 'zero with minus' | '-0' | 0L + 'decimal zero variations' | '0.00' | 0.0d + } + def "test getDerivativeKeys with empty derivatives"() { given: def context = new AppSecRequestContext() From 22a5723672b9db3d416563d68f8a2bc8a9d34fe9 Mon Sep 17 00:00:00 2001 From: "alejandro.gonzalez" Date: Wed, 4 Feb 2026 13:21:57 +0100 Subject: [PATCH 2/4] Support scientific notation --- .../appsec/gateway/AppSecRequestContext.java | 40 +++++++++++++++---- .../AppSecRequestContextSpecification.groovy | 11 ++++- 2 files changed, 42 insertions(+), 9 deletions(-) diff --git a/dd-java-agent/appsec/src/main/java/com/datadog/appsec/gateway/AppSecRequestContext.java b/dd-java-agent/appsec/src/main/java/com/datadog/appsec/gateway/AppSecRequestContext.java index dae865eaddd..2382449994c 100644 --- a/dd-java-agent/appsec/src/main/java/com/datadog/appsec/gateway/AppSecRequestContext.java +++ b/dd-java-agent/appsec/src/main/java/com/datadog/appsec/gateway/AppSecRequestContext.java @@ -728,13 +728,15 @@ List getStackTraces() { /** * Attempts to parse a string value as a number. Returns the parsed number if successful, null - * otherwise. Tries to parse as integer first, then as double if it contains a decimal point. + * otherwise. Tries to parse as integer first, then as double if it contains a decimal point or + * scientific notation. * *

This method uses fast-path validation to avoid throwing NumberFormatException for invalid * inputs, significantly reducing overhead when processing non-numeric attribute values (issue * #10494). The method trims whitespace before validation, allowing strings like " 42 " to parse * successfully. * + * * @param value the string to parse * @return a Long or Double if the string is a valid number, null otherwise */ @@ -751,7 +753,9 @@ private static Number convertToNumericAttribute(String value) { // Fast-path validation: check if the string looks like a valid number // before attempting to parse, to avoid expensive NumberFormatException throws + // Supports: integers, decimals, and scientific notation (e.g., "1.5e10", "-3E-7") boolean hasDecimal = false; + boolean hasExponent = false; int startIdx = 0; int length = trimmed.length(); @@ -765,25 +769,45 @@ private static Number convertToNumericAttribute(String value) { } } - // Validate that remaining characters are digits or a single decimal point + // Validate characters: digits, optional decimal point, optional exponent boolean hasDigits = false; for (int i = startIdx; i < length; i++) { char c = trimmed.charAt(i); if (c == '.') { - if (hasDecimal) { - // Multiple decimal points (e.g., "3.14.15") - invalid + if (hasDecimal || hasExponent) { + // Multiple decimal points or decimal after exponent - invalid return null; } hasDecimal = true; + } else if (c == 'e' || c == 'E') { + if (hasExponent || !hasDigits) { + // Multiple exponents or exponent without preceding digits - invalid + return null; + } + hasExponent = true; + // Next character can be optional sign (+/-) followed by digits + if (i + 1 < length) { + char next = trimmed.charAt(i + 1); + if (next == '+' || next == '-') { + i++; // Skip the sign after exponent + // Must have at least one digit after exponent sign + if (i + 1 >= length) { + return null; + } + } + } else { + // Exponent must be followed by digits + return null; + } } else if (c >= '0' && c <= '9') { hasDigits = true; } else { - // Non-digit, non-decimal character (e.g., "12x34", "abc") - invalid + // Non-digit, non-decimal, non-exponent character - invalid return null; } } - // Must have at least one digit (reject strings like "." or "+.") + // Must have at least one digit (reject strings like ".", "+.", "e10") if (!hasDigits) { return null; } @@ -791,9 +815,11 @@ private static Number convertToNumericAttribute(String value) { // Now attempt parsing - the try-catch remains as a fallback for overflow cases // (e.g., Long.MAX_VALUE+1) or other edge cases that passed pre-validation try { - if (hasDecimal) { + if (hasDecimal || hasExponent) { + // Parse as Double for decimals and scientific notation return Double.parseDouble(trimmed); } else { + // Parse as Long for plain integers return Long.parseLong(trimmed); } } catch (NumberFormatException e) { diff --git a/dd-java-agent/appsec/src/test/groovy/com/datadog/appsec/gateway/AppSecRequestContextSpecification.groovy b/dd-java-agent/appsec/src/test/groovy/com/datadog/appsec/gateway/AppSecRequestContextSpecification.groovy index 93f33a267c4..2be7ffbd2f0 100644 --- a/dd-java-agent/appsec/src/test/groovy/com/datadog/appsec/gateway/AppSecRequestContextSpecification.groovy +++ b/dd-java-agent/appsec/src/test/groovy/com/datadog/appsec/gateway/AppSecRequestContextSpecification.groovy @@ -721,9 +721,16 @@ class AppSecRequestContextSpecification extends DDSpecification { 'long overflow negative' | '-9223372036854775809' | null 'very large number' | '99999999999999999999999' | null - // Exotic number formats (should return null - not supported) + // Scientific notation (now supported for backward compatibility) + 'scientific notation lowercase' | '1e10' | 1.0e10d + 'scientific notation uppercase' | '1E10' | 1.0E10d + 'scientific with decimal' | '1.5e10' | 1.5e10d + 'scientific negative exponent' | '3.5E-7' | 3.5E-7d + 'scientific with sign' | '-2.5e+3' | -2.5e+3d + 'scientific integer base' | '5e3' | 5000.0d + + // Exotic number formats (not supported) 'hexadecimal' | '0x10' | null - 'scientific notation' | '1e10' | null 'binary' | '0b1010' | null 'octal' | '0777' | 777L From fb3d48bcf5c0c0bded7b3fccb668d990ede64025 Mon Sep 17 00:00:00 2001 From: "alejandro.gonzalez" Date: Wed, 4 Feb 2026 13:32:37 +0100 Subject: [PATCH 3/4] spotless --- .../java/com/datadog/appsec/gateway/AppSecRequestContext.java | 1 - 1 file changed, 1 deletion(-) diff --git a/dd-java-agent/appsec/src/main/java/com/datadog/appsec/gateway/AppSecRequestContext.java b/dd-java-agent/appsec/src/main/java/com/datadog/appsec/gateway/AppSecRequestContext.java index 2382449994c..a1540ddac49 100644 --- a/dd-java-agent/appsec/src/main/java/com/datadog/appsec/gateway/AppSecRequestContext.java +++ b/dd-java-agent/appsec/src/main/java/com/datadog/appsec/gateway/AppSecRequestContext.java @@ -736,7 +736,6 @@ List getStackTraces() { * #10494). The method trims whitespace before validation, allowing strings like " 42 " to parse * successfully. * - * * @param value the string to parse * @return a Long or Double if the string is a valid number, null otherwise */ From 537a668f642c8c4cf259a13d5c2e07127276dbed Mon Sep 17 00:00:00 2001 From: "alejandro.gonzalez" Date: Fri, 6 Feb 2026 09:48:51 +0100 Subject: [PATCH 4/4] Move to static utility --- .../appsec/gateway/AppSecRequestContext.java | 104 +----------- .../main/java/datadog/trace/util/Numbers.java | 132 +++++++++++++++ .../datadog/trace/util/NumbersTest.groovy | 150 ++++++++++++++++++ 3 files changed, 284 insertions(+), 102 deletions(-) create mode 100644 internal-api/src/main/java/datadog/trace/util/Numbers.java create mode 100644 internal-api/src/test/groovy/datadog/trace/util/NumbersTest.groovy diff --git a/dd-java-agent/appsec/src/main/java/com/datadog/appsec/gateway/AppSecRequestContext.java b/dd-java-agent/appsec/src/main/java/com/datadog/appsec/gateway/AppSecRequestContext.java index a1540ddac49..11162674339 100644 --- a/dd-java-agent/appsec/src/main/java/com/datadog/appsec/gateway/AppSecRequestContext.java +++ b/dd-java-agent/appsec/src/main/java/com/datadog/appsec/gateway/AppSecRequestContext.java @@ -13,6 +13,7 @@ import datadog.trace.api.Config; import datadog.trace.api.http.StoredBodySupplier; import datadog.trace.api.internal.TraceSegment; +import datadog.trace.util.Numbers; import datadog.trace.util.stacktrace.StackTraceEvent; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import java.io.Closeable; @@ -726,107 +727,6 @@ List getStackTraces() { return stackTraces; } - /** - * Attempts to parse a string value as a number. Returns the parsed number if successful, null - * otherwise. Tries to parse as integer first, then as double if it contains a decimal point or - * scientific notation. - * - *

This method uses fast-path validation to avoid throwing NumberFormatException for invalid - * inputs, significantly reducing overhead when processing non-numeric attribute values (issue - * #10494). The method trims whitespace before validation, allowing strings like " 42 " to parse - * successfully. - * - * @param value the string to parse - * @return a Long or Double if the string is a valid number, null otherwise - */ - private static Number convertToNumericAttribute(String value) { - if (value == null || value.isEmpty()) { - return null; - } - - // Trim whitespace to handle common cases like " 42 " or "\t100\n" - String trimmed = value.trim(); - if (trimmed.isEmpty()) { - return null; - } - - // Fast-path validation: check if the string looks like a valid number - // before attempting to parse, to avoid expensive NumberFormatException throws - // Supports: integers, decimals, and scientific notation (e.g., "1.5e10", "-3E-7") - boolean hasDecimal = false; - boolean hasExponent = false; - int startIdx = 0; - int length = trimmed.length(); - - // Handle optional leading sign - char firstChar = trimmed.charAt(0); - if (firstChar == '+' || firstChar == '-') { - startIdx = 1; - // Sign-only strings like "+" or "-" are not valid numbers - if (startIdx >= length) { - return null; - } - } - - // Validate characters: digits, optional decimal point, optional exponent - boolean hasDigits = false; - for (int i = startIdx; i < length; i++) { - char c = trimmed.charAt(i); - if (c == '.') { - if (hasDecimal || hasExponent) { - // Multiple decimal points or decimal after exponent - invalid - return null; - } - hasDecimal = true; - } else if (c == 'e' || c == 'E') { - if (hasExponent || !hasDigits) { - // Multiple exponents or exponent without preceding digits - invalid - return null; - } - hasExponent = true; - // Next character can be optional sign (+/-) followed by digits - if (i + 1 < length) { - char next = trimmed.charAt(i + 1); - if (next == '+' || next == '-') { - i++; // Skip the sign after exponent - // Must have at least one digit after exponent sign - if (i + 1 >= length) { - return null; - } - } - } else { - // Exponent must be followed by digits - return null; - } - } else if (c >= '0' && c <= '9') { - hasDigits = true; - } else { - // Non-digit, non-decimal, non-exponent character - invalid - return null; - } - } - - // Must have at least one digit (reject strings like ".", "+.", "e10") - if (!hasDigits) { - return null; - } - - // Now attempt parsing - the try-catch remains as a fallback for overflow cases - // (e.g., Long.MAX_VALUE+1) or other edge cases that passed pre-validation - try { - if (hasDecimal || hasExponent) { - // Parse as Double for decimals and scientific notation - return Double.parseDouble(trimmed); - } else { - // Parse as Long for plain integers - return Long.parseLong(trimmed); - } - } catch (NumberFormatException e) { - // Overflow or other parse failure despite pre-check - return null; - } - } - public void reportDerivatives(Map data) { log.debug("Reporting derivatives: {}", data); if (data == null || data.isEmpty()) return; @@ -1048,7 +948,7 @@ public boolean commitDerivatives(TraceSegment traceSegment) { traceSegment.setTagTop(key, (Number) value); } else if (value instanceof String) { // Try to parse as numeric, otherwise use as string - Number parsedNumber = convertToNumericAttribute((String) value); + Number parsedNumber = Numbers.parseNumber((String) value); if (parsedNumber != null) { traceSegment.setTagTop(key, parsedNumber); } else { diff --git a/internal-api/src/main/java/datadog/trace/util/Numbers.java b/internal-api/src/main/java/datadog/trace/util/Numbers.java new file mode 100644 index 00000000000..284aa4b733a --- /dev/null +++ b/internal-api/src/main/java/datadog/trace/util/Numbers.java @@ -0,0 +1,132 @@ +package datadog.trace.util; + +import javax.annotation.Nullable; + +/** + * Utility methods for working with numbers and numeric string parsing. + * + *

This class provides optimized numeric parsing that avoids throwing exceptions for invalid + * inputs, making it suitable for high-throughput scenarios where non-numeric strings are common. + */ +public final class Numbers { + + private Numbers() { + // Utility class - prevent instantiation + } + + /** + * Attempts to parse a string value as a number with fast-path validation to avoid throwing + * NumberFormatException for invalid inputs. + * + *

This method first validates that the string looks like a valid number before attempting to + * parse, significantly reducing overhead when processing non-numeric strings. The validation + * supports: + * + *

+ * + *

Performance characteristics: + * + *

+ * + * @param value the string to parse, may be null + * @return a Long for plain integers, Double for decimals or scientific notation, or null if the + * string is not a valid number + */ + @Nullable + public static Number parseNumber(@Nullable String value) { + if (value == null || value.isEmpty()) { + return null; + } + + // Trim whitespace to handle common cases like " 42 " or "\t100\n" + String trimmed = value.trim(); + if (trimmed.isEmpty()) { + return null; + } + + // Fast-path validation: check if the string looks like a valid number + // before attempting to parse, to avoid expensive NumberFormatException throws + // Supports: integers, decimals, and scientific notation (e.g., "1.5e10", "-3E-7") + boolean hasDecimal = false; + boolean hasExponent = false; + int startIdx = 0; + int length = trimmed.length(); + + // Handle optional leading sign + char firstChar = trimmed.charAt(0); + if (firstChar == '+' || firstChar == '-') { + startIdx = 1; + // Sign-only strings like "+" or "-" are not valid numbers + if (startIdx >= length) { + return null; + } + } + + // Validate characters: digits, optional decimal point, optional exponent + boolean hasDigits = false; + for (int i = startIdx; i < length; i++) { + char c = trimmed.charAt(i); + if (c == '.') { + if (hasDecimal || hasExponent) { + // Multiple decimal points or decimal after exponent - invalid + return null; + } + hasDecimal = true; + } else if (c == 'e' || c == 'E') { + if (hasExponent || !hasDigits) { + // Multiple exponents or exponent without preceding digits - invalid + return null; + } + hasExponent = true; + // Next character can be optional sign (+/-) followed by digits + if (i + 1 < length) { + char next = trimmed.charAt(i + 1); + if (next == '+' || next == '-') { + i++; // Skip the sign after exponent + // Must have at least one digit after exponent sign + if (i + 1 >= length) { + return null; + } + } + } else { + // Exponent must be followed by digits + return null; + } + } else if (c >= '0' && c <= '9') { + hasDigits = true; + } else { + // Non-digit, non-decimal, non-exponent character - invalid + return null; + } + } + + // Must have at least one digit (reject strings like ".", "+.", "e10") + if (!hasDigits) { + return null; + } + + // Now attempt parsing - the try-catch remains as a fallback for overflow cases + // (e.g., Long.MAX_VALUE+1) or other edge cases that passed pre-validation + try { + if (hasDecimal || hasExponent) { + // Parse as Double for decimals and scientific notation + return Double.parseDouble(trimmed); + } else { + // Parse as Long for plain integers + return Long.parseLong(trimmed); + } + } catch (NumberFormatException e) { + // Overflow or other parse failure despite pre-check (rare) + return null; + } + } +} diff --git a/internal-api/src/test/groovy/datadog/trace/util/NumbersTest.groovy b/internal-api/src/test/groovy/datadog/trace/util/NumbersTest.groovy new file mode 100644 index 00000000000..c5f75d95fbc --- /dev/null +++ b/internal-api/src/test/groovy/datadog/trace/util/NumbersTest.groovy @@ -0,0 +1,150 @@ +package datadog.trace.util + +import datadog.trace.test.util.DDSpecification + +class NumbersTest extends DDSpecification { + + def "parseNumber with valid integers: #description"() { + when: + def result = Numbers.parseNumber(input) + + then: + result == expected + result instanceof Long + + where: + description | input | expected + 'zero' | '0' | 0L + 'positive integer' | '42' | 42L + 'negative integer' | '-100' | -100L + 'integer with plus sign' | '+999' | 999L + 'large valid long' | '9223372036854775807' | 9223372036854775807L + 'large negative long' | '-9223372036854775808' | -9223372036854775808L + } + + def "parseNumber with valid decimals: #description"() { + when: + def result = Numbers.parseNumber(input) + + then: + result == expected + result instanceof Double + + where: + description | input | expected + 'simple decimal' | '3.14' | 3.14d + 'negative decimal' | '-0.5' | -0.5d + 'decimal with plus sign' | '+99.99' | 99.99d + 'zero decimal' | '0.0' | 0.0d + 'decimal with many digits' | '123.456789' | 123.456789d + 'decimal starts with dot' | '.5' | 0.5d + 'decimal ends with dot' | '5.' | 5.0d + } + + def "parseNumber with scientific notation: #description"() { + when: + def result = Numbers.parseNumber(input) + + then: + result == expected + result instanceof Double + + where: + description | input | expected + 'scientific notation lowercase' | '1e10' | 1.0e10d + 'scientific notation uppercase' | '1E10' | 1.0E10d + 'scientific with decimal' | '1.5e10' | 1.5e10d + 'scientific negative exponent' | '3.5E-7' | 3.5E-7d + 'scientific with positive exp sign' | '-2.5e+3' | -2.5e+3d + 'scientific integer base' | '5e3' | 5000.0d + } + + def "parseNumber with whitespace: #description"() { + when: + def result = Numbers.parseNumber(input) + + then: + result == expected + + where: + description | input | expected + 'leading whitespace' | ' 42' | 42L + 'trailing whitespace' | '42 ' | 42L + 'both whitespace' | ' 42 ' | 42L + 'tab and newline' | '\t100\n' | 100L + 'multiple spaces decimal' | ' -3.14 ' | -3.14d + } + + def "parseNumber with invalid input: #description"() { + when: + def result = Numbers.parseNumber(input) + + then: + result == null + + where: + description | input + 'null value' | null + 'empty string' | '' + 'whitespace only' | ' ' + 'alphabetic string' | 'abc' + 'alphanumeric string' | '12x34' + 'multiple decimals' | '3.14.15' + 'multiple signs' | '+-5' + 'sign only plus' | '+' + 'sign only minus' | '-' + 'hexadecimal' | '0x10' + 'binary' | '0b1010' + 'dot only' | '.' + 'multiple dots' | '...' + 'comma separator' | '1,000' + 'currency symbol' | '$100' + } + + def "parseNumber with overflow: #description"() { + when: + def result = Numbers.parseNumber(input) + + then: + result == null + + where: + description | input + 'long overflow positive' | '9223372036854775808' + 'long overflow negative' | '-9223372036854775809' + 'very large number' | '99999999999999999999999' + } + + def "parseNumber returns correct type based on input format"() { + expect: + // Integers return Long + Numbers.parseNumber('42') instanceof Long + Numbers.parseNumber('-100') instanceof Long + + // Decimals return Double + Numbers.parseNumber('3.14') instanceof Double + Numbers.parseNumber('.5') instanceof Double + + // Scientific notation returns Double + Numbers.parseNumber('1e10') instanceof Double + Numbers.parseNumber('5E-3') instanceof Double + } + + def "parseNumber handles edge cases for type checking"() { + when: + def intResult = Numbers.parseNumber('42') + def doubleResult = Numbers.parseNumber('3.14') + def sciResult = Numbers.parseNumber('1e10') + + then: + // Can extract values with proper casting + intResult == 42L + doubleResult == 3.14d + sciResult == 1.0e10d + + // Check types + intResult instanceof Long + doubleResult instanceof Double + sciResult instanceof Double + } +}