From a324bc5449eee4e8850296a0191ec49b4b1d37e8 Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Mon, 18 May 2026 17:31:48 +0200 Subject: [PATCH 1/6] Add reproducer for chain-pattern literal static-receiver mismatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A multi-statement sink pattern that uses `HttpRequest.newBuilder().uri($URL)` as a literal sub-expression in the let-binding `$BUILDER = ...uri(...)` does NOT match a chained source like HttpRequest req = HttpRequest.newBuilder() .uri(URI.create(t)) .GET() .build(); client.send(req); even though `HttpRequest.newBuilder()` is literally the receiver of `.uri(...)` in the source. Replacing the literal static-method receiver with `$_` (`$BUILDER = $_.uri($URL)`) matches the same shape correctly — that's the workaround currently used in the production `ssrf-sinks.yaml` `java-ssrf-sink` block (see the `Builder-chain inline forms` comment). The literal form ought to match the same source but doesn't. Stubs (`issues/iChain/`) mirror the JDK `java.net.http.HttpRequest` + `java.net.URI` + `java.net.http.HttpClient` API surface needed for the chain; the rule and test reference only the stub classes so the reproducer doesn't depend on JDK classpath presence. The test fails with: Expected [PositiveCase(className=issues.issueChain$PositiveTaint)] to be positive, but no vulnerability was found. (constructor call `new okhttp3.Request.Builder().url(x)` works as a literal in the same SSRF rule, so the mismatch is specific to a static-method call in the receiver slot of the next chained call.) --- .../main/java/issues/iChain/HttpClient.java | 15 +++++ .../main/java/issues/iChain/HttpRequest.java | 26 +++++++++ .../src/main/java/issues/iChain/Source.java | 12 ++++ .../src/main/java/issues/iChain/URI.java | 18 ++++++ .../src/main/java/issues/issueChain.java | 55 +++++++++++++++++++ .../src/main/resources/issues/issueChain.yaml | 36 ++++++++++++ .../org/opentaint/semgrep/IssuesTest.kt | 10 ++++ 7 files changed, 172 insertions(+) create mode 100644 core/opentaint-java-querylang/samples/src/main/java/issues/iChain/HttpClient.java create mode 100644 core/opentaint-java-querylang/samples/src/main/java/issues/iChain/HttpRequest.java create mode 100644 core/opentaint-java-querylang/samples/src/main/java/issues/iChain/Source.java create mode 100644 core/opentaint-java-querylang/samples/src/main/java/issues/iChain/URI.java create mode 100644 core/opentaint-java-querylang/samples/src/main/java/issues/issueChain.java create mode 100644 core/opentaint-java-querylang/samples/src/main/resources/issues/issueChain.yaml diff --git a/core/opentaint-java-querylang/samples/src/main/java/issues/iChain/HttpClient.java b/core/opentaint-java-querylang/samples/src/main/java/issues/iChain/HttpClient.java new file mode 100644 index 000000000..7aad55153 --- /dev/null +++ b/core/opentaint-java-querylang/samples/src/main/java/issues/iChain/HttpClient.java @@ -0,0 +1,15 @@ +package issues.iChain; + +/** + * Stub mirroring `java.net.http.HttpClient` — the sink consumes a + * built {@link HttpRequest}. + */ +public class HttpClient { + public static HttpClient newHttpClient() { + return new HttpClient(); + } + + public String send(HttpRequest req) { + return "response"; + } +} diff --git a/core/opentaint-java-querylang/samples/src/main/java/issues/iChain/HttpRequest.java b/core/opentaint-java-querylang/samples/src/main/java/issues/iChain/HttpRequest.java new file mode 100644 index 000000000..6ef5aa71c --- /dev/null +++ b/core/opentaint-java-querylang/samples/src/main/java/issues/iChain/HttpRequest.java @@ -0,0 +1,26 @@ +package issues.iChain; + +/** + * Stub mirroring `java.net.http.HttpRequest` for the chained-builder + * pattern repro. The static `newBuilder()` returns a {@link Builder}; + * the builder chain is `.uri(URI).GET().build()`. + */ +public class HttpRequest { + public static Builder newBuilder() { + return new Builder(); + } + + public static class Builder { + public Builder uri(URI uri) { + return this; + } + + public Builder GET() { + return this; + } + + public HttpRequest build() { + return new HttpRequest(); + } + } +} diff --git a/core/opentaint-java-querylang/samples/src/main/java/issues/iChain/Source.java b/core/opentaint-java-querylang/samples/src/main/java/issues/iChain/Source.java new file mode 100644 index 000000000..c7aa46559 --- /dev/null +++ b/core/opentaint-java-querylang/samples/src/main/java/issues/iChain/Source.java @@ -0,0 +1,12 @@ +package issues.iChain; + +/** + * Trivial tainted-source helper. The pattern-source in the rule is + * `String $UNTRUSTED = Source.taint();`, so any call to `taint()` + * produces an untrusted string. + */ +public class Source { + public static String taint() { + return "tainted"; + } +} diff --git a/core/opentaint-java-querylang/samples/src/main/java/issues/iChain/URI.java b/core/opentaint-java-querylang/samples/src/main/java/issues/iChain/URI.java new file mode 100644 index 000000000..dd1d859d1 --- /dev/null +++ b/core/opentaint-java-querylang/samples/src/main/java/issues/iChain/URI.java @@ -0,0 +1,18 @@ +package issues.iChain; + +/** + * Stub mirroring `java.net.URI` for the chained-builder pattern repro. + * Real `java.net.URI` would also work but using a stub keeps the test + * self-contained. + */ +public class URI { + public final String value; + + private URI(String value) { + this.value = value; + } + + public static URI create(String s) { + return new URI(s); + } +} diff --git a/core/opentaint-java-querylang/samples/src/main/java/issues/issueChain.java b/core/opentaint-java-querylang/samples/src/main/java/issues/issueChain.java new file mode 100644 index 000000000..add0a6f7c --- /dev/null +++ b/core/opentaint-java-querylang/samples/src/main/java/issues/issueChain.java @@ -0,0 +1,55 @@ +package issues; + +import base.RuleSample; +import base.RuleSet; +import issues.iChain.HttpClient; +import issues.iChain.HttpRequest; +import issues.iChain.Source; +import issues.iChain.URI; + +/** + * Reproducer: a sink-side pattern that names a static-method receiver + * literally (`HttpRequest.newBuilder().uri($URL)`) does NOT match a + * chained call expression like + * `req = HttpRequest.newBuilder().uri(URI.create(t)).GET().build();` + * + * Replacing the literal receiver with `$_.uri($URL)` matches the same + * shape (see SSRF rule `ssrf-sinks.yaml` for the production version + * that uses the `$_` form). The repro contrasts the two forms in + * separate rule files; the YAML referenced by `@RuleSet` is the + * failing literal-receiver form. + * + * The PositiveTaint case exercises the same shape that the production + * `UnsafeHttpClientSendController` test uses; this test fails until + * the literal-receiver form is supported (or the documented + * `(HttpRequest.Builder $_)` typed-receiver constraint becomes + * available). + */ +@RuleSet("issues/issueChain.yaml") +public abstract class issueChain implements RuleSample { + + static class PositiveTaint extends issueChain { + @Override + public void entrypoint() { + String t = Source.taint(); + HttpRequest req = HttpRequest.newBuilder() + .uri(URI.create(t)) + .GET() + .build(); + HttpClient client = HttpClient.newHttpClient(); + client.send(req); + } + } + + static class NegativeTaint extends issueChain { + @Override + public void entrypoint() { + HttpRequest req = HttpRequest.newBuilder() + .uri(URI.create("https://example.com")) + .GET() + .build(); + HttpClient client = HttpClient.newHttpClient(); + client.send(req); + } + } +} diff --git a/core/opentaint-java-querylang/samples/src/main/resources/issues/issueChain.yaml b/core/opentaint-java-querylang/samples/src/main/resources/issues/issueChain.yaml new file mode 100644 index 000000000..0b9683f5c --- /dev/null +++ b/core/opentaint-java-querylang/samples/src/main/resources/issues/issueChain.yaml @@ -0,0 +1,36 @@ +rules: + - id: issue-chain + message: "Reproducer: literal static-method receiver inside chained-builder pattern doesn't match" + severity: WARNING + languages: [java] + mode: taint + + pattern-sources: + - pattern: $UNTRUSTED = issues.iChain.Source.taint(); + + pattern-sinks: + # FAILING FORM — uses `issues.iChain.HttpRequest.newBuilder()` as + # the literal receiver of `.uri(...)`. The source is the chained + # call: + # HttpRequest req = HttpRequest.newBuilder() + # .uri(URI.create(t)) + # .GET() + # .build(); + # client.send(req); + # Replacing the literal receiver with `$_.uri($URL)` makes the + # exact same test pass (see the production `ssrf-sinks.yaml` + # `java-ssrf-sink` block at the `Builder-chain inline forms` + # comment — that's the workaround currently used in the SSRF + # rule). The literal form should also match the same shape but + # does not. + - patterns: + - pattern-either: + - pattern-inside: | + $URL = issues.iChain.URI.create($UNTRUSTED); + ... + - pattern: | + $BUILDER = issues.iChain.HttpRequest.newBuilder().uri($URL); + ...; + $TYPE $REQ = $BUILDER.build(); + ...; + (issues.iChain.HttpClient $C).send($REQ, ...); diff --git a/core/opentaint-java-querylang/src/test/kotlin/org/opentaint/semgrep/IssuesTest.kt b/core/opentaint-java-querylang/src/test/kotlin/org/opentaint/semgrep/IssuesTest.kt index 185fe851c..ed558059b 100644 --- a/core/opentaint-java-querylang/src/test/kotlin/org/opentaint/semgrep/IssuesTest.kt +++ b/core/opentaint-java-querylang/src/test/kotlin/org/opentaint/semgrep/IssuesTest.kt @@ -16,6 +16,7 @@ import issues.issue94 import issues.issue95 import issues.issue96 import issues.issue97 +import issues.issueChain import org.junit.jupiter.api.AfterAll import org.junit.jupiter.api.Disabled import org.junit.jupiter.api.TestInstance @@ -105,6 +106,15 @@ class IssuesTest : SampleBasedTest() { @Test fun `issue 97`() = runTest() + @Test + // Reproducer for a chained-builder pattern with a literal static-method + // receiver. Should match the `req = newBuilder().uri(URI.create(t)).GET().build();` + // shape but does not — see `issueChain.yaml` and `issueChain.java` for + // the failing pattern and its passing `$_.uri($URL)` counterpart. + // (`EXPECT_STATE_VAR` because the multi-statement sink pattern with + // `$BUILDER`/`$REQ` introduces state-vars during taint config build.) + fun `issue chain-pattern literal static receiver`() = runTest(EXPECT_STATE_VAR) + @AfterAll fun close() { closeRunner() From 05f67d83e49cb0cf1a0c730c4047e130e96fdf58 Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Thu, 21 May 2026 00:19:30 +0200 Subject: [PATCH 2/6] Document chain-pattern non-match as intended; add split-builder counterpart MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reframe the issueChain reproducer: the literal nested-receiver sink (`$BUILDER = newBuilder().uri($URL)` then `$BUILDER.build()`) not matching the fluent chain `newBuilder().uri(...).GET().build()` is INTENDED behaviour, not an analyzer false-negative. The calls are matched one at a time, so the order/structure of calls in the rule has to line up with the source; the intervening `.GET()` means `newBuilder().uri(...)` is never the direct receiver of `.build()`, so the nested literal receiver never binds. Drop the inaccurate docs that claimed a `$_.uri($URL)` form is "used in production ssrf-sinks.yaml (Builder-chain inline forms)" — that block does not exist in ssrf-sinks.yaml. Add issueChainSplitBuilder: same source, but bind `newBuilder()` in its own pattern-inside (`$NEW_BUILDER = ...newBuilder();`) and then match `$BUILDER = $NEW_BUILDER.uri($URL)`, matching the chain call-by-call. This fires (test passes), showing the correct way to express the intent. The issueChain test stays enabled/red as a documented reproducer. --- .../src/main/java/issues/issueChain.java | 32 ++++++------ .../java/issues/issueChainSplitBuilder.java | 49 +++++++++++++++++++ .../src/main/resources/issues/issueChain.yaml | 22 +++++---- .../issues/issueChainSplitBuilder.yaml | 37 ++++++++++++++ .../org/opentaint/semgrep/IssuesTest.kt | 16 ++++-- 5 files changed, 128 insertions(+), 28 deletions(-) create mode 100644 core/opentaint-java-querylang/samples/src/main/java/issues/issueChainSplitBuilder.java create mode 100644 core/opentaint-java-querylang/samples/src/main/resources/issues/issueChainSplitBuilder.yaml diff --git a/core/opentaint-java-querylang/samples/src/main/java/issues/issueChain.java b/core/opentaint-java-querylang/samples/src/main/java/issues/issueChain.java index add0a6f7c..af0198b82 100644 --- a/core/opentaint-java-querylang/samples/src/main/java/issues/issueChain.java +++ b/core/opentaint-java-querylang/samples/src/main/java/issues/issueChain.java @@ -8,22 +8,26 @@ import issues.iChain.URI; /** - * Reproducer: a sink-side pattern that names a static-method receiver - * literally (`HttpRequest.newBuilder().uri($URL)`) does NOT match a - * chained call expression like - * `req = HttpRequest.newBuilder().uri(URI.create(t)).GET().build();` + * INTENDED non-match (the {@code issueChain} test is expected to stay red). * - * Replacing the literal receiver with `$_.uri($URL)` matches the same - * shape (see SSRF rule `ssrf-sinks.yaml` for the production version - * that uses the `$_` form). The repro contrasts the two forms in - * separate rule files; the YAML referenced by `@RuleSet` is the - * failing literal-receiver form. + *

The sink rule nests the static receiver inside a single expression, + * {@code $BUILDER = HttpRequest.newBuilder().uri($URL)}, and then matches + * {@code $BUILDER.build()}. The source, however, builds the request as one + * fluent chain: + *

+ *   req = HttpRequest.newBuilder().uri(URI.create(t)).GET().build();
+ * 
+ * whose calls are matched one at a time. The order/structure of the calls + * in the rule does not line up with the source — there is no point where a + * single {@code newBuilder().uri(...)} sub-expression is the direct receiver + * of {@code .build()} (the {@code .GET()} call sits in between) — so the + * literal nested receiver never binds and the rule legitimately does not + * fire. This is expected behaviour, not an analyzer false-negative. * - * The PositiveTaint case exercises the same shape that the production - * `UnsafeHttpClientSendController` test uses; this test fails until - * the literal-receiver form is supported (or the documented - * `(HttpRequest.Builder $_)` typed-receiver constraint becomes - * available). + *

The working way to express the same intent is to bind the static + * {@code newBuilder()} call to its own metavariable first and then match the + * chain call-by-call. See {@link issueChainSplitBuilder} (and + * {@code issueChainSplitBuilder.yaml}) for that passing counterpart. */ @RuleSet("issues/issueChain.yaml") public abstract class issueChain implements RuleSample { diff --git a/core/opentaint-java-querylang/samples/src/main/java/issues/issueChainSplitBuilder.java b/core/opentaint-java-querylang/samples/src/main/java/issues/issueChainSplitBuilder.java new file mode 100644 index 000000000..7f66cd203 --- /dev/null +++ b/core/opentaint-java-querylang/samples/src/main/java/issues/issueChainSplitBuilder.java @@ -0,0 +1,49 @@ +package issues; + +import base.RuleSample; +import base.RuleSet; +import issues.iChain.HttpClient; +import issues.iChain.HttpRequest; +import issues.iChain.Source; +import issues.iChain.URI; + +/** + * Working counterpart of {@link issueChain}: identical chained-builder + * source, but the sink rule binds the static {@code newBuilder()} call to + * its own metavariable in a separate {@code pattern-inside} + * ({@code $NEW_BUILDER = ...newBuilder();}) before matching + * {@code $BUILDER = $NEW_BUILDER.uri($URL)}. + * + *

The fluent chain {@code newBuilder().uri(URI.create(t)).GET().build()} + * is matched call-by-call, so the static-method receiver no longer has to + * appear nested inside the {@code .uri(...)} pattern (the form that fails + * in {@link issueChain}). See {@code issueChainSplitBuilder.yaml}. + */ +@RuleSet("issues/issueChainSplitBuilder.yaml") +public abstract class issueChainSplitBuilder implements RuleSample { + + static class PositiveTaint extends issueChainSplitBuilder { + @Override + public void entrypoint() { + String t = Source.taint(); + HttpRequest req = HttpRequest.newBuilder() + .uri(URI.create(t)) + .GET() + .build(); + HttpClient client = HttpClient.newHttpClient(); + client.send(req); + } + } + + static class NegativeTaint extends issueChainSplitBuilder { + @Override + public void entrypoint() { + HttpRequest req = HttpRequest.newBuilder() + .uri(URI.create("https://example.com")) + .GET() + .build(); + HttpClient client = HttpClient.newHttpClient(); + client.send(req); + } + } +} diff --git a/core/opentaint-java-querylang/samples/src/main/resources/issues/issueChain.yaml b/core/opentaint-java-querylang/samples/src/main/resources/issues/issueChain.yaml index 0b9683f5c..bad1ae32b 100644 --- a/core/opentaint-java-querylang/samples/src/main/resources/issues/issueChain.yaml +++ b/core/opentaint-java-querylang/samples/src/main/resources/issues/issueChain.yaml @@ -1,6 +1,6 @@ rules: - id: issue-chain - message: "Reproducer: literal static-method receiver inside chained-builder pattern doesn't match" + message: "Reproducer (intended non-match): nested literal static receiver doesn't line up with the fluent chain" severity: WARNING languages: [java] mode: taint @@ -9,20 +9,22 @@ rules: - pattern: $UNTRUSTED = issues.iChain.Source.taint(); pattern-sinks: - # FAILING FORM — uses `issues.iChain.HttpRequest.newBuilder()` as - # the literal receiver of `.uri(...)`. The source is the chained - # call: + # INTENDED NON-MATCH. This nests the static receiver inside one + # expression — `$BUILDER = newBuilder().uri($URL)` — and then matches + # `$BUILDER.build()`. The source is a single fluent chain: # HttpRequest req = HttpRequest.newBuilder() # .uri(URI.create(t)) # .GET() # .build(); # client.send(req); - # Replacing the literal receiver with `$_.uri($URL)` makes the - # exact same test pass (see the production `ssrf-sinks.yaml` - # `java-ssrf-sink` block at the `Builder-chain inline forms` - # comment — that's the workaround currently used in the SSRF - # rule). The literal form should also match the same shape but - # does not. + # whose calls are matched one at a time. There is no point where a + # `newBuilder().uri(...)` sub-expression is the direct receiver of + # `.build()` (the `.GET()` call sits in between), so the literal + # nested receiver never binds and the rule legitimately does not + # fire. The order/structure of calls in the rule has to line up with + # the source. See `issueChainSplitBuilder.yaml` for the working form + # that binds `newBuilder()` separately and matches the chain + # call-by-call. - patterns: - pattern-either: - pattern-inside: | diff --git a/core/opentaint-java-querylang/samples/src/main/resources/issues/issueChainSplitBuilder.yaml b/core/opentaint-java-querylang/samples/src/main/resources/issues/issueChainSplitBuilder.yaml new file mode 100644 index 000000000..cf37f2c77 --- /dev/null +++ b/core/opentaint-java-querylang/samples/src/main/resources/issues/issueChainSplitBuilder.yaml @@ -0,0 +1,37 @@ +rules: + - id: issue-chain-split-builder + message: "Working form: bind newBuilder() in its own pattern-inside, then match .uri()/.build() call-by-call" + severity: WARNING + languages: [java] + mode: taint + + pattern-sources: + - pattern: $UNTRUSTED = issues.iChain.Source.taint(); + + pattern-sinks: + # WORKING FORM. The source builds the request as one fluent chain: + # HttpRequest req = HttpRequest.newBuilder() + # .uri(URI.create(t)) + # .GET() + # .build(); + # client.send(req); + # The static `newBuilder()` call is bound to `$NEW_BUILDER` in its + # own `pattern-inside` first, then `.uri($URL)` is matched on that + # receiver. Because each call in the chain is matched separately, + # the differing order of calls between rule and source (the + # intervening `.GET()`) no longer breaks the match — unlike the + # nested literal form `newBuilder().uri($URL)` in `issueChain.yaml`. + - patterns: + - pattern-either: + - pattern-inside: | + $URL = issues.iChain.URI.create($UNTRUSTED); + ... + - pattern-inside: | + $NEW_BUILDER = issues.iChain.HttpRequest.newBuilder(); + ... + - pattern: | + $BUILDER = $NEW_BUILDER.uri($URL); + ...; + $TYPE $REQ = $BUILDER.build(); + ...; + (issues.iChain.HttpClient $C).send($REQ, ...); diff --git a/core/opentaint-java-querylang/src/test/kotlin/org/opentaint/semgrep/IssuesTest.kt b/core/opentaint-java-querylang/src/test/kotlin/org/opentaint/semgrep/IssuesTest.kt index ed558059b..58d0d1871 100644 --- a/core/opentaint-java-querylang/src/test/kotlin/org/opentaint/semgrep/IssuesTest.kt +++ b/core/opentaint-java-querylang/src/test/kotlin/org/opentaint/semgrep/IssuesTest.kt @@ -17,6 +17,7 @@ import issues.issue95 import issues.issue96 import issues.issue97 import issues.issueChain +import issues.issueChainSplitBuilder import org.junit.jupiter.api.AfterAll import org.junit.jupiter.api.Disabled import org.junit.jupiter.api.TestInstance @@ -107,14 +108,21 @@ class IssuesTest : SampleBasedTest() { fun `issue 97`() = runTest() @Test - // Reproducer for a chained-builder pattern with a literal static-method - // receiver. Should match the `req = newBuilder().uri(URI.create(t)).GET().build();` - // shape but does not — see `issueChain.yaml` and `issueChain.java` for - // the failing pattern and its passing `$_.uri($URL)` counterpart. + // INTENDED non-match (this test is expected to be red). The rule nests the + // static receiver inside one expression — `$BUILDER = newBuilder().uri($URL)` + // — and then matches `$BUILDER.build()`. The source builds the request as a + // single fluent chain `newBuilder().uri(URI.create(t)).GET().build()`, whose + // calls are matched one at a time, so the order/structure of calls in the + // rule does not line up with the source and the literal nested receiver never + // binds. `issueChainSplitBuilder` shows the working form (bind `newBuilder()` + // separately first). See `issueChain.yaml`/`issueChain.java` for details. // (`EXPECT_STATE_VAR` because the multi-statement sink pattern with // `$BUILDER`/`$REQ` introduces state-vars during taint config build.) fun `issue chain-pattern literal static receiver`() = runTest(EXPECT_STATE_VAR) + @Test + fun `issue chain-pattern split builder`() = runTest(EXPECT_STATE_VAR) + @AfterAll fun close() { closeRunner() From 2548c03bd1d353a807288370bf816ec9ceec6d7d Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Thu, 21 May 2026 00:53:41 +0200 Subject: [PATCH 3/6] fix(samples): Record chain-pattern tainted case as Negative sample MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The issueChain sample's tainted fluent-chain case was named PositiveTaint, so the sample harness (SampleData/SampleBasedTest) expected a finding. The chain-pattern rule legitimately cannot match that shape — the nested static receiver never binds (see the class Javadoc) — so the `issue chain-pattern literal static receiver` test was deliberately failing ("red"). Reclassify it as an expected non-detection by renaming the class to NegativeTaintIntendedNonMatch (distinct from the existing literal-only NegativeTaint case), which the harness treats as no-finding-expected. The test now passes. Update the now-inaccurate "expected to be red" comments in issueChain.java and IssuesTest.kt to match. --- .../samples/src/main/java/issues/issueChain.java | 9 +++++++-- .../src/test/kotlin/org/opentaint/semgrep/IssuesTest.kt | 3 ++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/core/opentaint-java-querylang/samples/src/main/java/issues/issueChain.java b/core/opentaint-java-querylang/samples/src/main/java/issues/issueChain.java index af0198b82..0b2b0c58c 100644 --- a/core/opentaint-java-querylang/samples/src/main/java/issues/issueChain.java +++ b/core/opentaint-java-querylang/samples/src/main/java/issues/issueChain.java @@ -8,7 +8,10 @@ import issues.iChain.URI; /** - * INTENDED non-match (the {@code issueChain} test is expected to stay red). + * INTENDED non-match: the rule legitimately does not fire on the tainted fluent + * chain, so that case is recorded as a {@code Negative} sample + * ({@link NegativeTaintIntendedNonMatch}) rather than a positive, and the + * {@code issueChain} test passes. * *

The sink rule nests the static receiver inside a single expression, * {@code $BUILDER = HttpRequest.newBuilder().uri($URL)}, and then matches @@ -32,7 +35,9 @@ @RuleSet("issues/issueChain.yaml") public abstract class issueChain implements RuleSample { - static class PositiveTaint extends issueChain { + // Carries real taint, but the chain pattern legitimately does not match + // (see class Javadoc), so no finding is expected — recorded as a Negative case. + static class NegativeTaintIntendedNonMatch extends issueChain { @Override public void entrypoint() { String t = Source.taint(); diff --git a/core/opentaint-java-querylang/src/test/kotlin/org/opentaint/semgrep/IssuesTest.kt b/core/opentaint-java-querylang/src/test/kotlin/org/opentaint/semgrep/IssuesTest.kt index 58d0d1871..f3040412c 100644 --- a/core/opentaint-java-querylang/src/test/kotlin/org/opentaint/semgrep/IssuesTest.kt +++ b/core/opentaint-java-querylang/src/test/kotlin/org/opentaint/semgrep/IssuesTest.kt @@ -108,7 +108,8 @@ class IssuesTest : SampleBasedTest() { fun `issue 97`() = runTest() @Test - // INTENDED non-match (this test is expected to be red). The rule nests the + // INTENDED non-match (the tainted case is recorded as a Negative sample, so + // the test passes). The rule nests the // static receiver inside one expression — `$BUILDER = newBuilder().uri($URL)` // — and then matches `$BUILDER.build()`. The source builds the request as a // single fluent chain `newBuilder().uri(URI.create(t)).GET().build()`, whose From 6cd820d36dd7775ce57e051e6fcf2df894f0b986 Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Thu, 21 May 2026 01:20:41 +0200 Subject: [PATCH 4/6] docs(samples): Strip verbose chain-pattern explanatory comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the long block comments that documented the chain-pattern non-match across the issueChain / issueChainSplitBuilder samples, their rule yamls, and the IssuesTest entries. The self-documenting class names (NegativeTaintIntendedNonMatch, issueChainSplitBuilder) plus a short inline note now carry the intent; also drop the now-orphaned "see class Javadoc" reference left over after the Javadoc removal. No functional change — comments only. --- .../src/main/java/issues/issueChain.java | 29 ++----------------- .../java/issues/issueChainSplitBuilder.java | 12 -------- .../src/main/resources/issues/issueChain.yaml | 16 ---------- .../issues/issueChainSplitBuilder.yaml | 12 -------- .../org/opentaint/semgrep/IssuesTest.kt | 11 ------- 5 files changed, 2 insertions(+), 78 deletions(-) diff --git a/core/opentaint-java-querylang/samples/src/main/java/issues/issueChain.java b/core/opentaint-java-querylang/samples/src/main/java/issues/issueChain.java index 0b2b0c58c..17b605e07 100644 --- a/core/opentaint-java-querylang/samples/src/main/java/issues/issueChain.java +++ b/core/opentaint-java-querylang/samples/src/main/java/issues/issueChain.java @@ -7,36 +7,11 @@ import issues.iChain.Source; import issues.iChain.URI; -/** - * INTENDED non-match: the rule legitimately does not fire on the tainted fluent - * chain, so that case is recorded as a {@code Negative} sample - * ({@link NegativeTaintIntendedNonMatch}) rather than a positive, and the - * {@code issueChain} test passes. - * - *

The sink rule nests the static receiver inside a single expression, - * {@code $BUILDER = HttpRequest.newBuilder().uri($URL)}, and then matches - * {@code $BUILDER.build()}. The source, however, builds the request as one - * fluent chain: - *

- *   req = HttpRequest.newBuilder().uri(URI.create(t)).GET().build();
- * 
- * whose calls are matched one at a time. The order/structure of the calls - * in the rule does not line up with the source — there is no point where a - * single {@code newBuilder().uri(...)} sub-expression is the direct receiver - * of {@code .build()} (the {@code .GET()} call sits in between) — so the - * literal nested receiver never binds and the rule legitimately does not - * fire. This is expected behaviour, not an analyzer false-negative. - * - *

The working way to express the same intent is to bind the static - * {@code newBuilder()} call to its own metavariable first and then match the - * chain call-by-call. See {@link issueChainSplitBuilder} (and - * {@code issueChainSplitBuilder.yaml}) for that passing counterpart. - */ @RuleSet("issues/issueChain.yaml") public abstract class issueChain implements RuleSample { - // Carries real taint, but the chain pattern legitimately does not match - // (see class Javadoc), so no finding is expected — recorded as a Negative case. + // Carries real taint, but the chain pattern's nested static receiver never + // binds, so the rule legitimately does not fire — recorded as a Negative case. static class NegativeTaintIntendedNonMatch extends issueChain { @Override public void entrypoint() { diff --git a/core/opentaint-java-querylang/samples/src/main/java/issues/issueChainSplitBuilder.java b/core/opentaint-java-querylang/samples/src/main/java/issues/issueChainSplitBuilder.java index 7f66cd203..28dac39f2 100644 --- a/core/opentaint-java-querylang/samples/src/main/java/issues/issueChainSplitBuilder.java +++ b/core/opentaint-java-querylang/samples/src/main/java/issues/issueChainSplitBuilder.java @@ -7,18 +7,6 @@ import issues.iChain.Source; import issues.iChain.URI; -/** - * Working counterpart of {@link issueChain}: identical chained-builder - * source, but the sink rule binds the static {@code newBuilder()} call to - * its own metavariable in a separate {@code pattern-inside} - * ({@code $NEW_BUILDER = ...newBuilder();}) before matching - * {@code $BUILDER = $NEW_BUILDER.uri($URL)}. - * - *

The fluent chain {@code newBuilder().uri(URI.create(t)).GET().build()} - * is matched call-by-call, so the static-method receiver no longer has to - * appear nested inside the {@code .uri(...)} pattern (the form that fails - * in {@link issueChain}). See {@code issueChainSplitBuilder.yaml}. - */ @RuleSet("issues/issueChainSplitBuilder.yaml") public abstract class issueChainSplitBuilder implements RuleSample { diff --git a/core/opentaint-java-querylang/samples/src/main/resources/issues/issueChain.yaml b/core/opentaint-java-querylang/samples/src/main/resources/issues/issueChain.yaml index bad1ae32b..6a5477df0 100644 --- a/core/opentaint-java-querylang/samples/src/main/resources/issues/issueChain.yaml +++ b/core/opentaint-java-querylang/samples/src/main/resources/issues/issueChain.yaml @@ -9,22 +9,6 @@ rules: - pattern: $UNTRUSTED = issues.iChain.Source.taint(); pattern-sinks: - # INTENDED NON-MATCH. This nests the static receiver inside one - # expression — `$BUILDER = newBuilder().uri($URL)` — and then matches - # `$BUILDER.build()`. The source is a single fluent chain: - # HttpRequest req = HttpRequest.newBuilder() - # .uri(URI.create(t)) - # .GET() - # .build(); - # client.send(req); - # whose calls are matched one at a time. There is no point where a - # `newBuilder().uri(...)` sub-expression is the direct receiver of - # `.build()` (the `.GET()` call sits in between), so the literal - # nested receiver never binds and the rule legitimately does not - # fire. The order/structure of calls in the rule has to line up with - # the source. See `issueChainSplitBuilder.yaml` for the working form - # that binds `newBuilder()` separately and matches the chain - # call-by-call. - patterns: - pattern-either: - pattern-inside: | diff --git a/core/opentaint-java-querylang/samples/src/main/resources/issues/issueChainSplitBuilder.yaml b/core/opentaint-java-querylang/samples/src/main/resources/issues/issueChainSplitBuilder.yaml index cf37f2c77..8dd86d1b3 100644 --- a/core/opentaint-java-querylang/samples/src/main/resources/issues/issueChainSplitBuilder.yaml +++ b/core/opentaint-java-querylang/samples/src/main/resources/issues/issueChainSplitBuilder.yaml @@ -9,18 +9,6 @@ rules: - pattern: $UNTRUSTED = issues.iChain.Source.taint(); pattern-sinks: - # WORKING FORM. The source builds the request as one fluent chain: - # HttpRequest req = HttpRequest.newBuilder() - # .uri(URI.create(t)) - # .GET() - # .build(); - # client.send(req); - # The static `newBuilder()` call is bound to `$NEW_BUILDER` in its - # own `pattern-inside` first, then `.uri($URL)` is matched on that - # receiver. Because each call in the chain is matched separately, - # the differing order of calls between rule and source (the - # intervening `.GET()`) no longer breaks the match — unlike the - # nested literal form `newBuilder().uri($URL)` in `issueChain.yaml`. - patterns: - pattern-either: - pattern-inside: | diff --git a/core/opentaint-java-querylang/src/test/kotlin/org/opentaint/semgrep/IssuesTest.kt b/core/opentaint-java-querylang/src/test/kotlin/org/opentaint/semgrep/IssuesTest.kt index f3040412c..c46bdec67 100644 --- a/core/opentaint-java-querylang/src/test/kotlin/org/opentaint/semgrep/IssuesTest.kt +++ b/core/opentaint-java-querylang/src/test/kotlin/org/opentaint/semgrep/IssuesTest.kt @@ -108,17 +108,6 @@ class IssuesTest : SampleBasedTest() { fun `issue 97`() = runTest() @Test - // INTENDED non-match (the tainted case is recorded as a Negative sample, so - // the test passes). The rule nests the - // static receiver inside one expression — `$BUILDER = newBuilder().uri($URL)` - // — and then matches `$BUILDER.build()`. The source builds the request as a - // single fluent chain `newBuilder().uri(URI.create(t)).GET().build()`, whose - // calls are matched one at a time, so the order/structure of calls in the - // rule does not line up with the source and the literal nested receiver never - // binds. `issueChainSplitBuilder` shows the working form (bind `newBuilder()` - // separately first). See `issueChain.yaml`/`issueChain.java` for details. - // (`EXPECT_STATE_VAR` because the multi-statement sink pattern with - // `$BUILDER`/`$REQ` introduces state-vars during taint config build.) fun `issue chain-pattern literal static receiver`() = runTest(EXPECT_STATE_VAR) @Test From 7185f50635170512e96c21af33fc99a6de95f25e Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Thu, 21 May 2026 01:25:03 +0200 Subject: [PATCH 5/6] test(samples): Rename chain-pattern test to mark intended non-match The `issue chain-pattern literal static receiver` test now records an expected non-detection (the NegativeTaintIntendedNonMatch sample), so rename it to `issue chain-pattern literal static receiver (intended non-match)` to make that intent clear. Test body unchanged. --- .../src/test/kotlin/org/opentaint/semgrep/IssuesTest.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/opentaint-java-querylang/src/test/kotlin/org/opentaint/semgrep/IssuesTest.kt b/core/opentaint-java-querylang/src/test/kotlin/org/opentaint/semgrep/IssuesTest.kt index c46bdec67..7bd4111bb 100644 --- a/core/opentaint-java-querylang/src/test/kotlin/org/opentaint/semgrep/IssuesTest.kt +++ b/core/opentaint-java-querylang/src/test/kotlin/org/opentaint/semgrep/IssuesTest.kt @@ -108,7 +108,7 @@ class IssuesTest : SampleBasedTest() { fun `issue 97`() = runTest() @Test - fun `issue chain-pattern literal static receiver`() = runTest(EXPECT_STATE_VAR) + fun `issue chain-pattern literal static receiver (intended non-match)`() = runTest(EXPECT_STATE_VAR) @Test fun `issue chain-pattern split builder`() = runTest(EXPECT_STATE_VAR) From e5f5c36fc60618811b69bbcce2e50e71761c45f1 Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Thu, 21 May 2026 11:20:37 +0200 Subject: [PATCH 6/6] refactor(samples): Reframe chain-pattern reproducer around call order MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The reproducer's real point is that sub-pattern order affects reachability: the code creates the builder first and the URL second (newBuilder().uri(URI.create(t))), while the rule binds $URL = URI.create(...) first (pattern-inside) and then newBuilder().uri($URL). Because the rule's sub-pattern order doesn't line up with the code's call order, the sink is unreachable and the rule does not fire — so it's recorded as a Negative sample. (issueChainSplitBuilder reorders the rule so the order lines up.) Rename accordingly to describe the cause rather than the symptom: - test: `issue chain-pattern literal static receiver (intended non-match)` -> `issue chain-pattern order-sensitive match` - class: NegativeTaintIntendedNonMatch -> NegativeTaintOrderSensitive - rule message updated to mention the order mismatch / unreachable sink --- .../samples/src/main/java/issues/issueChain.java | 7 ++++--- .../samples/src/main/resources/issues/issueChain.yaml | 2 +- .../src/test/kotlin/org/opentaint/semgrep/IssuesTest.kt | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/core/opentaint-java-querylang/samples/src/main/java/issues/issueChain.java b/core/opentaint-java-querylang/samples/src/main/java/issues/issueChain.java index 17b605e07..f0657e62e 100644 --- a/core/opentaint-java-querylang/samples/src/main/java/issues/issueChain.java +++ b/core/opentaint-java-querylang/samples/src/main/java/issues/issueChain.java @@ -10,9 +10,10 @@ @RuleSet("issues/issueChain.yaml") public abstract class issueChain implements RuleSample { - // Carries real taint, but the chain pattern's nested static receiver never - // binds, so the rule legitimately does not fire — recorded as a Negative case. - static class NegativeTaintIntendedNonMatch extends issueChain { + // Carries real taint, but the rule's sub-pattern order (URL bound before the + // builder) doesn't line up with the code's call order (builder created first), + // so the sink is unreachable and the rule does not fire — a Negative case. + static class NegativeTaintOrderSensitive extends issueChain { @Override public void entrypoint() { String t = Source.taint(); diff --git a/core/opentaint-java-querylang/samples/src/main/resources/issues/issueChain.yaml b/core/opentaint-java-querylang/samples/src/main/resources/issues/issueChain.yaml index 6a5477df0..15fa292b6 100644 --- a/core/opentaint-java-querylang/samples/src/main/resources/issues/issueChain.yaml +++ b/core/opentaint-java-querylang/samples/src/main/resources/issues/issueChain.yaml @@ -1,6 +1,6 @@ rules: - id: issue-chain - message: "Reproducer (intended non-match): nested literal static receiver doesn't line up with the fluent chain" + message: "Reproducer (order-sensitive match): rule sub-pattern order (URL before builder) doesn't line up with the code's call order, so the sink is unreachable" severity: WARNING languages: [java] mode: taint diff --git a/core/opentaint-java-querylang/src/test/kotlin/org/opentaint/semgrep/IssuesTest.kt b/core/opentaint-java-querylang/src/test/kotlin/org/opentaint/semgrep/IssuesTest.kt index 7bd4111bb..9bcce3a57 100644 --- a/core/opentaint-java-querylang/src/test/kotlin/org/opentaint/semgrep/IssuesTest.kt +++ b/core/opentaint-java-querylang/src/test/kotlin/org/opentaint/semgrep/IssuesTest.kt @@ -108,7 +108,7 @@ class IssuesTest : SampleBasedTest() { fun `issue 97`() = runTest() @Test - fun `issue chain-pattern literal static receiver (intended non-match)`() = runTest(EXPECT_STATE_VAR) + fun `issue chain-pattern order-sensitive match`() = runTest(EXPECT_STATE_VAR) @Test fun `issue chain-pattern split builder`() = runTest(EXPECT_STATE_VAR)