diff --git a/dd-java-agent/appsec/src/jmh/java/datadog/appsec/benchmark/NumericConversionBenchmark.java b/dd-java-agent/appsec/src/jmh/java/datadog/appsec/benchmark/NumericConversionBenchmark.java new file mode 100644 index 00000000000..9d4568ff74a --- /dev/null +++ b/dd-java-agent/appsec/src/jmh/java/datadog/appsec/benchmark/NumericConversionBenchmark.java @@ -0,0 +1,221 @@ +package datadog.appsec.benchmark; + +import static java.util.Collections.singletonMap; +import static java.util.concurrent.TimeUnit.NANOSECONDS; +import static java.util.concurrent.TimeUnit.SECONDS; + +import com.datadog.appsec.gateway.AppSecRequestContext; +import datadog.trace.api.internal.TraceSegment; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +/** + * Benchmark for numeric attribute conversion performance (issue #10494). + * + *

Tests the optimization that replaces exception-driven parsing with fast-path validation to + * avoid NumberFormatException overhead on invalid inputs. + * + *

Expected results: - Valid numeric inputs: no regression (~100-200ns/op) - Invalid inputs: + * 10-100x improvement (from ~1000ns+ with exceptions to <100ns without) - Empty/whitespace: fast + * rejection (<50ns/op) + */ +@State(Scope.Benchmark) +@Warmup(iterations = 4, time = 2, timeUnit = SECONDS) +@Measurement(iterations = 5, time = 1, timeUnit = SECONDS) +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(NANOSECONDS) +@Fork(value = 3) +public class NumericConversionBenchmark { + + static { + BenchmarkUtil.disableLogging(); + } + + private AppSecRequestContext context; + private TraceSegment mockTraceSegment; + + @Setup(Level.Iteration) + public void setUp() { + context = new AppSecRequestContext(); + // Use NoOp TraceSegment to minimize overhead + mockTraceSegment = TraceSegment.NoOp.INSTANCE; + } + + @Benchmark + public void validInteger(Blackhole blackhole) { + context.reportDerivatives(singletonMap("test_attr", singletonMap("value", "42"))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + @Benchmark + public void validIntegerNegative(Blackhole blackhole) { + context.reportDerivatives(singletonMap("test_attr", singletonMap("value", "-12345"))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + @Benchmark + public void validIntegerWithSign(Blackhole blackhole) { + context.reportDerivatives(singletonMap("test_attr", singletonMap("value", "+999"))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + @Benchmark + public void validDecimal(Blackhole blackhole) { + context.reportDerivatives(singletonMap("test_attr", singletonMap("value", "3.14"))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + @Benchmark + public void validDecimalNegative(Blackhole blackhole) { + context.reportDerivatives(singletonMap("test_attr", singletonMap("value", "-99.5"))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + @Benchmark + public void validLargeNumber(Blackhole blackhole) { + context.reportDerivatives( + singletonMap("test_attr", singletonMap("value", "9223372036854775807"))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + // Invalid inputs - these should show the biggest improvement (no exceptions thrown) + @Benchmark + public void invalidAlphabetic(Blackhole blackhole) { + context.reportDerivatives(singletonMap("test_attr", singletonMap("value", "not_a_number"))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + @Benchmark + public void invalidAlphanumeric(Blackhole blackhole) { + context.reportDerivatives(singletonMap("test_attr", singletonMap("value", "12x34"))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + @Benchmark + public void invalidMultipleDecimals(Blackhole blackhole) { + context.reportDerivatives(singletonMap("test_attr", singletonMap("value", "3.14.15"))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + @Benchmark + public void invalidHexFormat(Blackhole blackhole) { + context.reportDerivatives(singletonMap("test_attr", singletonMap("value", "0x10"))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + @Benchmark + public void invalidScientific(Blackhole blackhole) { + context.reportDerivatives(singletonMap("test_attr", singletonMap("value", "1e10"))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + @Benchmark + public void invalidSpecialChars(Blackhole blackhole) { + context.reportDerivatives(singletonMap("test_attr", singletonMap("value", "$100"))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + // Whitespace handling - now works correctly after optimization (issue #10494) + @Benchmark + public void whitespaceLeading(Blackhole blackhole) { + context.reportDerivatives(singletonMap("test_attr", singletonMap("value", " 42"))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + @Benchmark + public void whitespaceTrailing(Blackhole blackhole) { + context.reportDerivatives(singletonMap("test_attr", singletonMap("value", "42 "))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + @Benchmark + public void whitespaceBoth(Blackhole blackhole) { + context.reportDerivatives(singletonMap("test_attr", singletonMap("value", " 42 "))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + @Benchmark + public void whitespaceTabNewline(Blackhole blackhole) { + context.reportDerivatives(singletonMap("test_attr", singletonMap("value", "\t100\n"))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + // Empty/null - fast rejection + @Benchmark + public void emptyString(Blackhole blackhole) { + context.reportDerivatives(singletonMap("test_attr", singletonMap("value", ""))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + @Benchmark + public void whitespaceOnly(Blackhole blackhole) { + context.reportDerivatives(singletonMap("test_attr", singletonMap("value", " "))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + // Overflow cases - should handle gracefully without exceptions + @Benchmark + public void overflowLongMax(Blackhole blackhole) { + context.reportDerivatives( + singletonMap("test_attr", singletonMap("value", "9223372036854775808"))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + @Benchmark + public void overflowVeryLarge(Blackhole blackhole) { + context.reportDerivatives( + singletonMap("test_attr", singletonMap("value", "99999999999999999999999"))); + boolean result = context.commitDerivatives(mockTraceSegment); + blackhole.consume(result); + } + + // Mixed workload - realistic scenario with 80% invalid, 20% valid + @Benchmark + public void mixedWorkload(Blackhole blackhole) { + // Invalid (80%) + context.reportDerivatives(singletonMap("attr1", singletonMap("value", "invalid"))); + blackhole.consume(context.commitDerivatives(mockTraceSegment)); + + context.reportDerivatives(singletonMap("attr2", singletonMap("value", "abc123"))); + blackhole.consume(context.commitDerivatives(mockTraceSegment)); + + context.reportDerivatives(singletonMap("attr3", singletonMap("value", "0x10"))); + blackhole.consume(context.commitDerivatives(mockTraceSegment)); + + context.reportDerivatives(singletonMap("attr4", singletonMap("value", ""))); + blackhole.consume(context.commitDerivatives(mockTraceSegment)); + + // Valid (20%) + context.reportDerivatives(singletonMap("attr5", singletonMap("value", "42"))); + blackhole.consume(context.commitDerivatives(mockTraceSegment)); + } +} diff --git a/dd-java-agent/appsec/src/main/java/com/datadog/appsec/gateway/AppSecRequestContext.java b/dd-java-agent/appsec/src/main/java/com/datadog/appsec/gateway/AppSecRequestContext.java index 2b197756697..11162674339 100644 --- a/dd-java-agent/appsec/src/main/java/com/datadog/appsec/gateway/AppSecRequestContext.java +++ b/dd-java-agent/appsec/src/main/java/com/datadog/appsec/gateway/AppSecRequestContext.java @@ -13,6 +13,7 @@ import datadog.trace.api.Config; import datadog.trace.api.http.StoredBodySupplier; import datadog.trace.api.internal.TraceSegment; +import datadog.trace.util.Numbers; import datadog.trace.util.stacktrace.StackTraceEvent; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import java.io.Closeable; @@ -726,27 +727,6 @@ List getStackTraces() { return stackTraces; } - /** - * Attempts to parse a string value as a number. Returns the parsed number if successful, null - * otherwise. Tries to parse as integer first, then as double if it contains a decimal point. - */ - private static Number convertToNumericAttribute(String value) { - if (value == null || value.isEmpty()) { - return null; - } - try { - // Check if it contains a decimal point to determine if it's a double - if (value.contains(".")) { - return Double.parseDouble(value); - } else { - // Try to parse as integer first - return Long.parseLong(value); - } - } catch (NumberFormatException e) { - return null; - } - } - public void reportDerivatives(Map data) { log.debug("Reporting derivatives: {}", data); if (data == null || data.isEmpty()) return; @@ -968,7 +948,7 @@ public boolean commitDerivatives(TraceSegment traceSegment) { traceSegment.setTagTop(key, (Number) value); } else if (value instanceof String) { // Try to parse as numeric, otherwise use as string - Number parsedNumber = convertToNumericAttribute((String) value); + Number parsedNumber = Numbers.parseNumber((String) value); if (parsedNumber != null) { traceSegment.setTagTop(key, parsedNumber); } else { diff --git a/dd-java-agent/appsec/src/test/groovy/com/datadog/appsec/gateway/AppSecRequestContextSpecification.groovy b/dd-java-agent/appsec/src/test/groovy/com/datadog/appsec/gateway/AppSecRequestContextSpecification.groovy index ae0d3736bca..2be7ffbd2f0 100644 --- a/dd-java-agent/appsec/src/test/groovy/com/datadog/appsec/gateway/AppSecRequestContextSpecification.groovy +++ b/dd-java-agent/appsec/src/test/groovy/com/datadog/appsec/gateway/AppSecRequestContextSpecification.groovy @@ -655,6 +655,108 @@ class AppSecRequestContextSpecification extends DDSpecification { } } + def "test numeric conversion with edge cases: #description"() { + given: + def context = new AppSecRequestContext() + def mockTraceSegment = Mock(datadog.trace.api.internal.TraceSegment) + def committedTags = [:] + + mockTraceSegment.setTagTop(_ as String, _ as Object) >> { String key, Object value -> + committedTags[key] = value + } + + when: + context.reportDerivatives(['test_attr': ['value': inputValue]]) + context.commitDerivatives(mockTraceSegment) + + then: + // When conversion fails (expectedValue is null), the original string value should be preserved + // When conversion succeeds, the converted numeric value should be used + committedTags['test_attr'] == (expectedValue != null ? expectedValue : inputValue) + + where: + description | inputValue | expectedValue + // Valid integers + 'zero' | '0' | 0L + 'positive integer' | '42' | 42L + 'negative integer' | '-100' | -100L + 'integer with plus sign' | '+999' | 999L + 'large valid long' | '9223372036854775807' | 9223372036854775807L + 'large negative long' | '-9223372036854775808' | -9223372036854775808L + + // Valid decimals + 'simple decimal' | '3.14' | 3.14d + 'negative decimal' | '-0.5' | -0.5d + 'decimal with plus sign' | '+99.99' | 99.99d + 'zero decimal' | '0.0' | 0.0d + 'decimal with many digits' | '123.456789' | 123.456789d + + // Whitespace handling (should now parse after trim - issue #10494 fix) + 'leading whitespace integer' | ' 42' | 42L + 'trailing whitespace integer' | '42 ' | 42L + 'both whitespace integer' | ' 42 ' | 42L + 'tab and newline whitespace' | '\t100\n' | 100L + 'multiple spaces decimal' | ' -3.14 ' | -3.14d + + // Empty and null + 'null value' | null | null + 'empty string' | '' | null + 'whitespace only' | ' ' | null + 'tab only' | '\t' | null + + // Invalid formats (should return null, original string preserved) + 'alphabetic string' | 'abc' | null + 'alphanumeric string' | '12x34' | null + 'multiple decimals' | '3.14.15' | null + 'multiple signs' | '+-5' | null + 'sign in middle' | '12-34' | null + + // Sign-only strings + 'plus sign only' | '+' | null + 'minus sign only' | '-' | null + 'plus with whitespace' | ' + ' | null + + // Overflow cases (should return null gracefully) + 'long overflow positive' | '9223372036854775808' | null + 'long overflow negative' | '-9223372036854775809' | null + 'very large number' | '99999999999999999999999' | null + + // Scientific notation (now supported for backward compatibility) + 'scientific notation lowercase' | '1e10' | 1.0e10d + 'scientific notation uppercase' | '1E10' | 1.0E10d + 'scientific with decimal' | '1.5e10' | 1.5e10d + 'scientific negative exponent' | '3.5E-7' | 3.5E-7d + 'scientific with sign' | '-2.5e+3' | -2.5e+3d + 'scientific integer base' | '5e3' | 5000.0d + + // Exotic number formats (not supported) + 'hexadecimal' | '0x10' | null + 'binary' | '0b1010' | null + 'octal' | '0777' | 777L + + // Edge decimal cases (note: .5 and 5. are valid Java double literals) + 'decimal starts with dot' | '.5' | 0.5d + 'decimal ends with dot' | '5.' | 5.0d + 'dot only' | '.' | null + 'multiple dots' | '...' | null + + // Regression - ensure existing valid formats still work + 'regression valid integer' | '100' | 100L + 'regression valid decimal' | '99.5' | 99.5d + 'regression negative' | '-50' | -50L + + // Special characters + 'comma separator' | '1,000' | null + 'underscore separator' | '1_000' | null + 'currency symbol' | '$100' | null + 'percentage' | '50%' | null + + // Edge cases with zeros + 'zero with plus' | '+0' | 0L + 'zero with minus' | '-0' | 0L + 'decimal zero variations' | '0.00' | 0.0d + } + def "test getDerivativeKeys with empty derivatives"() { given: def context = new AppSecRequestContext() diff --git a/internal-api/src/main/java/datadog/trace/util/Numbers.java b/internal-api/src/main/java/datadog/trace/util/Numbers.java new file mode 100644 index 00000000000..284aa4b733a --- /dev/null +++ b/internal-api/src/main/java/datadog/trace/util/Numbers.java @@ -0,0 +1,132 @@ +package datadog.trace.util; + +import javax.annotation.Nullable; + +/** + * Utility methods for working with numbers and numeric string parsing. + * + *

This class provides optimized numeric parsing that avoids throwing exceptions for invalid + * inputs, making it suitable for high-throughput scenarios where non-numeric strings are common. + */ +public final class Numbers { + + private Numbers() { + // Utility class - prevent instantiation + } + + /** + * Attempts to parse a string value as a number with fast-path validation to avoid throwing + * NumberFormatException for invalid inputs. + * + *

This method first validates that the string looks like a valid number before attempting to + * parse, significantly reducing overhead when processing non-numeric strings. The validation + * supports: + * + *

+ * + *

Performance characteristics: + * + *

+ * + * @param value the string to parse, may be null + * @return a Long for plain integers, Double for decimals or scientific notation, or null if the + * string is not a valid number + */ + @Nullable + public static Number parseNumber(@Nullable String value) { + if (value == null || value.isEmpty()) { + return null; + } + + // Trim whitespace to handle common cases like " 42 " or "\t100\n" + String trimmed = value.trim(); + if (trimmed.isEmpty()) { + return null; + } + + // Fast-path validation: check if the string looks like a valid number + // before attempting to parse, to avoid expensive NumberFormatException throws + // Supports: integers, decimals, and scientific notation (e.g., "1.5e10", "-3E-7") + boolean hasDecimal = false; + boolean hasExponent = false; + int startIdx = 0; + int length = trimmed.length(); + + // Handle optional leading sign + char firstChar = trimmed.charAt(0); + if (firstChar == '+' || firstChar == '-') { + startIdx = 1; + // Sign-only strings like "+" or "-" are not valid numbers + if (startIdx >= length) { + return null; + } + } + + // Validate characters: digits, optional decimal point, optional exponent + boolean hasDigits = false; + for (int i = startIdx; i < length; i++) { + char c = trimmed.charAt(i); + if (c == '.') { + if (hasDecimal || hasExponent) { + // Multiple decimal points or decimal after exponent - invalid + return null; + } + hasDecimal = true; + } else if (c == 'e' || c == 'E') { + if (hasExponent || !hasDigits) { + // Multiple exponents or exponent without preceding digits - invalid + return null; + } + hasExponent = true; + // Next character can be optional sign (+/-) followed by digits + if (i + 1 < length) { + char next = trimmed.charAt(i + 1); + if (next == '+' || next == '-') { + i++; // Skip the sign after exponent + // Must have at least one digit after exponent sign + if (i + 1 >= length) { + return null; + } + } + } else { + // Exponent must be followed by digits + return null; + } + } else if (c >= '0' && c <= '9') { + hasDigits = true; + } else { + // Non-digit, non-decimal, non-exponent character - invalid + return null; + } + } + + // Must have at least one digit (reject strings like ".", "+.", "e10") + if (!hasDigits) { + return null; + } + + // Now attempt parsing - the try-catch remains as a fallback for overflow cases + // (e.g., Long.MAX_VALUE+1) or other edge cases that passed pre-validation + try { + if (hasDecimal || hasExponent) { + // Parse as Double for decimals and scientific notation + return Double.parseDouble(trimmed); + } else { + // Parse as Long for plain integers + return Long.parseLong(trimmed); + } + } catch (NumberFormatException e) { + // Overflow or other parse failure despite pre-check (rare) + return null; + } + } +} diff --git a/internal-api/src/test/groovy/datadog/trace/util/NumbersTest.groovy b/internal-api/src/test/groovy/datadog/trace/util/NumbersTest.groovy new file mode 100644 index 00000000000..c5f75d95fbc --- /dev/null +++ b/internal-api/src/test/groovy/datadog/trace/util/NumbersTest.groovy @@ -0,0 +1,150 @@ +package datadog.trace.util + +import datadog.trace.test.util.DDSpecification + +class NumbersTest extends DDSpecification { + + def "parseNumber with valid integers: #description"() { + when: + def result = Numbers.parseNumber(input) + + then: + result == expected + result instanceof Long + + where: + description | input | expected + 'zero' | '0' | 0L + 'positive integer' | '42' | 42L + 'negative integer' | '-100' | -100L + 'integer with plus sign' | '+999' | 999L + 'large valid long' | '9223372036854775807' | 9223372036854775807L + 'large negative long' | '-9223372036854775808' | -9223372036854775808L + } + + def "parseNumber with valid decimals: #description"() { + when: + def result = Numbers.parseNumber(input) + + then: + result == expected + result instanceof Double + + where: + description | input | expected + 'simple decimal' | '3.14' | 3.14d + 'negative decimal' | '-0.5' | -0.5d + 'decimal with plus sign' | '+99.99' | 99.99d + 'zero decimal' | '0.0' | 0.0d + 'decimal with many digits' | '123.456789' | 123.456789d + 'decimal starts with dot' | '.5' | 0.5d + 'decimal ends with dot' | '5.' | 5.0d + } + + def "parseNumber with scientific notation: #description"() { + when: + def result = Numbers.parseNumber(input) + + then: + result == expected + result instanceof Double + + where: + description | input | expected + 'scientific notation lowercase' | '1e10' | 1.0e10d + 'scientific notation uppercase' | '1E10' | 1.0E10d + 'scientific with decimal' | '1.5e10' | 1.5e10d + 'scientific negative exponent' | '3.5E-7' | 3.5E-7d + 'scientific with positive exp sign' | '-2.5e+3' | -2.5e+3d + 'scientific integer base' | '5e3' | 5000.0d + } + + def "parseNumber with whitespace: #description"() { + when: + def result = Numbers.parseNumber(input) + + then: + result == expected + + where: + description | input | expected + 'leading whitespace' | ' 42' | 42L + 'trailing whitespace' | '42 ' | 42L + 'both whitespace' | ' 42 ' | 42L + 'tab and newline' | '\t100\n' | 100L + 'multiple spaces decimal' | ' -3.14 ' | -3.14d + } + + def "parseNumber with invalid input: #description"() { + when: + def result = Numbers.parseNumber(input) + + then: + result == null + + where: + description | input + 'null value' | null + 'empty string' | '' + 'whitespace only' | ' ' + 'alphabetic string' | 'abc' + 'alphanumeric string' | '12x34' + 'multiple decimals' | '3.14.15' + 'multiple signs' | '+-5' + 'sign only plus' | '+' + 'sign only minus' | '-' + 'hexadecimal' | '0x10' + 'binary' | '0b1010' + 'dot only' | '.' + 'multiple dots' | '...' + 'comma separator' | '1,000' + 'currency symbol' | '$100' + } + + def "parseNumber with overflow: #description"() { + when: + def result = Numbers.parseNumber(input) + + then: + result == null + + where: + description | input + 'long overflow positive' | '9223372036854775808' + 'long overflow negative' | '-9223372036854775809' + 'very large number' | '99999999999999999999999' + } + + def "parseNumber returns correct type based on input format"() { + expect: + // Integers return Long + Numbers.parseNumber('42') instanceof Long + Numbers.parseNumber('-100') instanceof Long + + // Decimals return Double + Numbers.parseNumber('3.14') instanceof Double + Numbers.parseNumber('.5') instanceof Double + + // Scientific notation returns Double + Numbers.parseNumber('1e10') instanceof Double + Numbers.parseNumber('5E-3') instanceof Double + } + + def "parseNumber handles edge cases for type checking"() { + when: + def intResult = Numbers.parseNumber('42') + def doubleResult = Numbers.parseNumber('3.14') + def sciResult = Numbers.parseNumber('1e10') + + then: + // Can extract values with proper casting + intResult == 42L + doubleResult == 3.14d + sciResult == 1.0e10d + + // Check types + intResult instanceof Long + doubleResult instanceof Double + sciResult instanceof Double + } +}