diff --git a/internal-api/src/main/java/datadog/trace/util/StringIndex.java b/internal-api/src/main/java/datadog/trace/util/StringIndex.java new file mode 100644 index 00000000000..3b6980ff492 --- /dev/null +++ b/internal-api/src/main/java/datadog/trace/util/StringIndex.java @@ -0,0 +1,144 @@ +package datadog.trace.util; + +/** + * Flat open-addressed name set. Generic — it knows only names. + * + *

Three ways to use it, trading convenience for indirection: + * + *

+ * + *

Consumers attach their own parallel payload arrays (ids, values, ...) sized to {@link #slots} + * and indexed by the slot {@code indexOf} returns. + * + *

Slot 0-value is the empty sentinel: {@link Support#hash} never returns 0, so {@code hashes[i] + * == 0} unambiguously means an empty slot. + */ +public final class StringIndex { + private final int[] hashes; + private final String[] names; + public final int slots; // == hashes.length + + private StringIndex(int[] hashes, String[] names) { + this.hashes = hashes; + this.names = names; + this.slots = hashes.length; + } + + /** + * Convenience instance — wraps the placed arrays. For the hot path prefer raw {@link Support}. + */ + public static StringIndex of(String... names) { + Data data = Support.create(names); + return new StringIndex(data.hashes, data.names); + } + + /** Slot of {@code name}, or -1. Delegates to {@link Support} on the instance's arrays. */ + public int indexOf(String name) { + return Support.indexOf(this.hashes, this.names, name); + } + + public boolean contains(String name) { + return indexOf(name) >= 0; + } + + /** Table size — allocate parallel payload arrays of this length. */ + public int slots() { + return this.slots; + } + + /** Build-time carrier. Pull the fields into your own (static final) fields; don't keep this. */ + public static final class Data { + public final int[] hashes; + public final String[] names; + + Data(int[] hashes, String[] names) { + this.hashes = hashes; + this.names = names; + } + } + + /** + * Static algorithm over raw arrays. Query helpers take raw arrays, never a Data or a StringIndex. + */ + public static final class Support { + private Support() {} + + /** Spread of String.hashCode; 0 reserved as the empty sentinel. */ + public static int hash(String name) { + int h = name.hashCode(); // cached on String -> field load + return h == 0 ? 0xDD06 : h ^ (h >>> 16); + } + + /** Power-of-two size, 2x-oversized so load factor stays <= 0.5. */ + public static int tableSizeFor(int n) { + int size = 1; + while (size <= n) { + size <<= 1; + } + return size << 1; + } + + /** Build the placed table. Returns a Data carrier; pull its arrays into your own fields. */ + public static Data create(String... names) { + int size = tableSizeFor(names.length); + int[] hashes = new int[size]; + String[] placed = new String[size]; + for (String name : names) { + put(hashes, placed, name, hash(name)); + } + return new Data(hashes, placed); + } + + /** Build-time placement. Returns the slot. */ + public static int put(int[] hashes, String[] names, String name, int h) { + final int mask = hashes.length - 1; + int i = h & mask; + for (int probes = 0; probes <= mask; probes++, i = (i + 1) & mask) { + if (hashes[i] == 0) { + hashes[i] = h; + names[i] = name; + return i; + } + if (hashes[i] == h && eq(names[i], name)) { + return i; // already present + } + } + throw new IllegalStateException("table full"); // impossible at LF <= 0.5 + } + + /** Probe; returns the slot or -1. Raw arrays — no Data, no instance. */ + public static int indexOf(int[] hashes, String[] names, String name, int h) { + final int mask = hashes.length - 1; + int i = h & mask; + for (int probes = 0; probes <= mask; probes++, i = (i + 1) & mask) { + int sh = hashes[i]; + if (sh == 0) { + return -1; + } + if (sh == h && eq(names[i], name)) { + return i; + } + } + return -1; + } + + public static int indexOf(int[] hashes, String[] names, String name) { + return indexOf(hashes, names, name, hash(name)); + } + + // `a` is a stored name on an occupied slot (never null); `b` is a non-null query. + private static boolean eq(String a, String b) { + return a == b || a.equals(b); // interned literals hit the == fast path + } + } +} diff --git a/internal-api/src/test/java/datadog/trace/util/StringIndexTest.java b/internal-api/src/test/java/datadog/trace/util/StringIndexTest.java new file mode 100644 index 00000000000..60c0d9998ea --- /dev/null +++ b/internal-api/src/test/java/datadog/trace/util/StringIndexTest.java @@ -0,0 +1,102 @@ +package datadog.trace.util; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import datadog.trace.util.StringIndex.Data; +import datadog.trace.util.StringIndex.Support; +import org.junit.jupiter.api.Test; + +class StringIndexTest { + + @Test + void hash_spread_and_zeroSentinel() { + // "".hashCode() == 0 -> remapped to the non-zero sentinel so 0 can mean "empty slot" + assertEquals(0xDD06, Support.hash("")); + + int raw = "foo".hashCode(); + assertEquals(raw ^ (raw >>> 16), Support.hash("foo")); + assertNotEquals(0, Support.hash("foo")); + } + + @Test + void tableSizeFor_isPow2_andOversized() { + assertEquals(2, Support.tableSizeFor(0)); + assertEquals(4, Support.tableSizeFor(1)); + assertEquals(8, Support.tableSizeFor(3)); + assertEquals(16, Support.tableSizeFor(4)); + } + + @Test + void instance_contains_internedAndCopy_andMiss() { + StringIndex set = StringIndex.of("foo", "bar", "baz"); + + assertEquals(8, set.slots()); // 3 names -> tableSizeFor(3) == 8 + + assertTrue(set.contains("foo")); // interned literal -> == fast path in eq + assertTrue(set.contains(new String("bar"))); // non-interned -> .equals path + assertFalse(set.contains("nope")); + + assertTrue(set.indexOf("baz") >= 0); + assertEquals(-1, set.indexOf("nope")); + } + + @Test + void support_create_then_indexOf() { + Data d = Support.create("x", "y"); + + int slot = Support.indexOf(d.hashes, d.names, "x"); // 3-arg overload computes the hash + assertTrue(slot >= 0); + assertEquals("x", d.names[slot]); + + assertEquals(-1, Support.indexOf(d.hashes, d.names, "q")); + } + + /** Controlled hashes force collision, linear-probe wraparound, and the already-present path. */ + @Test + void put_and_indexOf_collisionAndWraparound() { + int[] hashes = new int[4]; // mask = 3 + String[] names = new String[4]; + + assertEquals(3, Support.put(hashes, names, "a", 7)); // 7 & 3 == 3 + assertEquals(0, Support.put(hashes, names, "b", 7)); // collides at 3, probes (3+1)&3 == 0 + assertEquals(3, Support.put(hashes, names, "a", 7)); // already present -> existing slot + + assertEquals(3, Support.indexOf(hashes, names, "a", 7)); // direct hit + assertEquals(0, Support.indexOf(hashes, names, "b", 7)); // hit after collision + wraparound + assertEquals( + -1, Support.indexOf(hashes, names, "c", 7)); // miss after probing 3 -> 0 -> 1(empty) + assertEquals(-1, Support.indexOf(hashes, names, "z", 6)); // 6 & 3 == 2, empty -> immediate miss + } + + @Test + void put_throwsWhenFull() { + int[] hashes = new int[2]; // mask = 1 + String[] names = new String[2]; + + Support.put(hashes, names, "a", 4); // 4 & 1 == 0 + Support.put(hashes, names, "b", 5); // 5 & 1 == 1 + + // both slots occupied, no match -> probe exhausts -> throw + assertThrows(IllegalStateException.class, () -> Support.put(hashes, names, "c", 6)); + } + + /** The documented usage: build a StringIndex, attach a parallel payload indexed by slot. */ + @Test + void parallelPayloadBySlot() { + String[] names = {"a", "b", "c"}; + Data d = Support.create(names); + + long[] ids = new long[d.names.length]; + for (int j = 0; j < names.length; j++) { + ids[Support.indexOf(d.hashes, d.names, names[j])] = j + 1L; + } + + assertEquals(1L, ids[Support.indexOf(d.hashes, d.names, "a")]); + assertEquals(2L, ids[Support.indexOf(d.hashes, d.names, "b")]); + assertEquals(3L, ids[Support.indexOf(d.hashes, d.names, "c")]); + } +}