From fd461a370ea8d953ce1792e241901201faae5858 Mon Sep 17 00:00:00 2001
From: Stephen Freudenthaler <steve.freudenthaler@dotcms.com>
Date: Tue, 9 Jun 2026 18:23:44 -0400
Subject: [PATCH 1/5] feat: add codex executor for OpenAI GPT/Codex via
 bedrock-mantle

Route openai.* model_ids (openai.gpt-5.5, openai.gpt-5.4) to a new
codex-executor. These models are served only by the bedrock-mantle
endpoint (OpenAI Responses API), not bedrock-runtime, so the generic
Converse executor can't reach them.

- codex-executor.yml: SigV4-signed (service name "bedrock", no bearer
  token) STREAMING call to the Responses API; accumulates SSE
  response.output_text.delta. Remaps us-east-1 -> us-east-2 (mantle
  region). Reuses the /tmp sticky-comment helper + sticky_namespace.
  reasoning_effort input (default medium). botocore installed at runtime
  (AWS CLI v2 bundle doesn't expose it to system python).
- claude-orchestrator.yml: anchored ^([a-z]+\.)?openai\. route ->
  openai-mantle -> codex-mantle job (checked before the generic
  fallback). Optional reasoning_effort pass-through. Consumers change
  only model_id.
- CLAUDE.md / ARCHITECTURE.md: routing tables, executor docs, diagram.

IAM provisioned in dotCMS/Infrastructure-as-code#7836. Streaming is
mandatory (GPT-5.x reasons before emitting); max_output_tokens does not
cap reasoning tokens. Auth posture confirmed by the spike on #7836.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .github/workflows/claude-orchestrator.yml |  26 ++
 .github/workflows/codex-executor.yml      | 381 ++++++++++++++++++++++
 ARCHITECTURE.md                           |  24 +-
 CLAUDE.md                                 |   4 +-
 4 files changed, 430 insertions(+), 5 deletions(-)
 create mode 100644 .github/workflows/codex-executor.yml

diff --git a/.github/workflows/claude-orchestrator.yml b/.github/workflows/claude-orchestrator.yml
index b2f98e7..51b1140 100644
--- a/.github/workflows/claude-orchestrator.yml
+++ b/.github/workflows/claude-orchestrator.yml
@@ -82,6 +82,11 @@ on:
         required: false
         type: string
         default: ''
+      reasoning_effort:
+        description: 'Reasoning effort for the OpenAI/Codex (mantle) path: minimal | low | medium | high. Ignored on other paths.'
+        required: false
+        type: string
+        default: 'medium'
     secrets:
       ANTHROPIC_API_KEY:
         description: 'Anthropic API key — required only when using the direct Anthropic API path (model_id empty)'
@@ -158,6 +163,11 @@ jobs:
           # Avoids false matches on third-party model IDs that contain "anthropic." as substring.
           elif [[ "${MODEL_ID}" =~ ^([a-z]+\.)?anthropic\. ]]; then
             PROVIDER="anthropic-bedrock"
+          # OpenAI GPT/Codex (openai.gpt-5.5, openai.gpt-5.4). Served only by bedrock-mantle
+          # (OpenAI Responses API), not bedrock-runtime — routed to the codex executor.
+          # Anchored like the anthropic match so "openai." as a substring can't misroute.
+          elif [[ "${MODEL_ID}" =~ ^([a-z]+\.)?openai\. ]]; then
+            PROVIDER="openai-mantle"
           else
             PROVIDER="bedrock-generic"
           fi
@@ -201,3 +211,19 @@ jobs:
       sticky_namespace: ${{ inputs.sticky_namespace }}
       timeout_minutes: ${{ inputs.timeout_minutes }}
       runner: ${{ inputs.runner }}
+
+  codex-mantle:
+    needs: route
+    if: needs.route.outputs.provider == 'openai-mantle'
+    uses: ./.github/workflows/codex-executor.yml
+    with:
+      model_id: ${{ inputs.model_id }}
+      bedrock_role_arn: ${{ inputs.bedrock_role_arn }}
+      # aws_region defaults to us-east-1; the codex executor remaps that to us-east-2
+      # (mantle is not offered in us-east-1). Consumers only set model_id.
+      aws_region: ${{ inputs.aws_region }}
+      prompt: ${{ inputs.prompt }}
+      sticky_namespace: ${{ inputs.sticky_namespace }}
+      reasoning_effort: ${{ inputs.reasoning_effort }}
+      timeout_minutes: ${{ inputs.timeout_minutes }}
+      runner: ${{ inputs.runner }}
diff --git a/.github/workflows/codex-executor.yml b/.github/workflows/codex-executor.yml
new file mode 100644
index 0000000..cbb24da
--- /dev/null
+++ b/.github/workflows/codex-executor.yml
@@ -0,0 +1,381 @@
+---
+# Codex Executor Workflow (Reusable)
+#
+# PURPOSE: Reviews PRs using OpenAI GPT/Codex models (GPT-5.5, GPT-5.4) served by the
+# AWS **bedrock-mantle** endpoint — a separate SigV4 service exposing the OpenAI Responses
+# API (https://bedrock-mantle.{region}.api.aws/openai/v1). These models are NOT on
+# bedrock-runtime: there is no InvokeModel/Converse, so the generic Bedrock executor cannot
+# reach them. Maintains the same auto-updating sticky comment as the other executors.
+#
+# AUTH: OIDC -> assumed role -> SigV4 (signing service name "bedrock", NOT "bedrock-mantle";
+# the IAM authorization prefix is still bedrock-mantle:*). No Bedrock API key / bearer token
+# is used — AWS_BEARER_TOKEN_BEDROCK is intentionally left UNSET so botocore signs with the
+# assumed-role credentials. This posture was confirmed by the auth-path spike on
+# dotCMS/Infrastructure-as-code#7836.
+#
+# STREAMING IS MANDATORY: GPT-5.5 reasons before emitting output; a non-streaming call buffers
+# the whole response and looks like a 60-100s hang. We stream Server-Sent Events and accumulate
+# response.output_text.delta chunks. max_output_tokens does NOT cap reasoning tokens.
+#
+# REGION: GPT-5.5 is us-east-2 only; GPT-5.4 adds us-west-2. Neither is in us-east-1 (the
+# orchestrator's default), so this executor remaps the us-east-1 default to us-east-2.
+#
+# ROUTING: claude-orchestrator.yml selects this executor automatically when model_id matches
+# openai.* — consumers shouldn't usually call it directly.
+#
+# REQUIREMENTS: The caller's repo must have id-token: write permission and a mantle-capable
+# IAM role accessible via OIDC (bedrock-mantle:CreateInference with a bedrock-mantle:Model
+# StringLike openai.* condition). See dotCMS/Infrastructure-as-code bedrock-code-review/.
+
+name: Codex Executor (Reusable)
+
+on:
+  workflow_call:
+    inputs:
+      model_id:
+        description: 'OpenAI mantle model ID (e.g. openai.gpt-5.5, openai.gpt-5.4)'
+        required: true
+        type: string
+      bedrock_role_arn:
+        description: 'IAM role ARN that GitHub Actions assumes via OIDC to call bedrock-mantle'
+        required: true
+        type: string
+      aws_region:
+        description: 'AWS region for the mantle endpoint. us-east-1 (orchestrator default) is remapped to us-east-2, since mantle is not offered in us-east-1.'
+        required: false
+        type: string
+        default: 'us-east-1'
+      prompt:
+        description: 'Review prompt sent to the model along with the PR diff'
+        required: false
+        type: string
+        default: |
+          Review this PR diff. Flag anything that looks wrong, risky, or worth a second look:
+          bad assumptions, missing edge cases, design problems, security issues. Skip praise.
+          If it is clean, say so in one line.
+      sticky_namespace:
+        description: 'Namespace appended to the sticky-comment marker to keep multiple review jobs from clobbering each other (e.g. "codex-reviewer"). Defaults to the model id.'
+        required: false
+        type: string
+        default: ''
+      max_diff_chars:
+        description: 'Maximum diff length to send to the model (chars). Larger diffs are truncated at a line boundary.'
+        required: false
+        type: number
+        default: 80000
+      max_output_tokens:
+        description: 'Maximum tokens the model may emit for the visible answer. Does NOT cap reasoning tokens (cost/latency note).'
+        required: false
+        type: number
+        default: 2048
+      reasoning_effort:
+        description: 'Reasoning effort for GPT-5.x: minimal | low | medium | high. Higher = better review, more reasoning tokens + latency.'
+        required: false
+        type: string
+        default: 'medium'
+      timeout_minutes:
+        description: 'Job timeout in minutes. Reasoning models stream slowly; default is generous.'
+        required: false
+        type: number
+        default: 20
+      runner:
+        description: 'GitHub runner label'
+        required: false
+        type: string
+        default: 'ubuntu-latest'
+
+jobs:
+  review:
+    runs-on: ${{ inputs.runner }}
+    timeout-minutes: ${{ inputs.timeout_minutes }}
+    permissions:
+      id-token: write       # OIDC -> STS
+      contents: read
+      pull-requests: write  # post / update sticky comment
+    env:
+      MODEL_ID: ${{ inputs.model_id }}
+      MAX_DIFF_CHARS: ${{ inputs.max_diff_chars }}
+      MAX_OUTPUT_TOKENS: ${{ inputs.max_output_tokens }}
+      REASONING_EFFORT: ${{ inputs.reasoning_effort }}
+      # Default uses the model id so different models naturally get different stickies.
+      STICKY_MARKER: ${{ format('<!-- dotcms-ai-review:v3:{0} -->', inputs.sticky_namespace != '' && inputs.sticky_namespace || inputs.model_id) }}
+    steps:
+      - name: Resolve mantle region
+        id: region
+        env:
+          REQUESTED_REGION: ${{ inputs.aws_region }}
+        run: |
+          set -euo pipefail
+          # Mantle is not offered in us-east-1 (the orchestrator's default). Treat the default
+          # as "use the mantle default" so consumers only ever set model_id. An explicit
+          # us-west-2 (valid for GPT-5.4) is honored as-is.
+          REGION="${REQUESTED_REGION}"
+          if [ -z "${REGION}" ] || [ "${REGION}" = "us-east-1" ]; then
+            REGION="us-east-2"
+          fi
+          echo "Effective mantle region: ${REGION}"
+          echo "region=${REGION}" >> "$GITHUB_OUTPUT"
+
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 1
+
+      - name: Configure AWS credentials (OIDC)
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          role-to-assume: ${{ inputs.bedrock_role_arn }}
+          aws-region: ${{ steps.region.outputs.region }}
+
+      # Inlined because actions/checkout above checks out the *consumer's* repo, not this one,
+      # so a relative script path would resolve against the consumer (and not exist for
+      # external consumers). Writing to /tmp avoids the cross-repo path dependency.
+      - name: Set up sticky-comment helper
+        run: |
+          cat > /tmp/sticky-comment.sh <<'STICKY_EOF'
+          #!/usr/bin/env bash
+          # Find-or-update a single PR comment identified by STICKY_MARKER.
+          # Usage: sticky-comment.sh <pr_number> <body_file>
+          # Env:   GH_TOKEN, GITHUB_REPOSITORY, STICKY_MARKER
+          set -euo pipefail
+          PR_NUMBER="${1:?pr number required}"
+          BODY_FILE="${2:?body file required}"
+          : "${GH_TOKEN:?GH_TOKEN must be set}"
+          : "${GITHUB_REPOSITORY:?GITHUB_REPOSITORY must be set}"
+          : "${STICKY_MARKER:?STICKY_MARKER must be set}"
+          [ -r "$BODY_FILE" ] || { echo "Body file not readable: $BODY_FILE" >&2; exit 1; }
+          EXISTING_ID=$(
+            gh api "repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/comments" --paginate \
+              | jq -r --arg marker "$STICKY_MARKER" \
+                  '.[] | select(.body | startswith($marker)) | .id' \
+              | head -1
+          )
+          if [ -n "$EXISTING_ID" ] && ! [[ "$EXISTING_ID" =~ ^[0-9]+$ ]]; then
+            echo "::warning::EXISTING_ID is non-numeric ($EXISTING_ID); creating a new comment instead"
+            EXISTING_ID=""
+          fi
+          PAYLOAD=$(jq -Rs --arg key body '{($key): .}' < "$BODY_FILE")
+          if [ -n "$EXISTING_ID" ]; then
+            echo "Updating existing sticky comment $EXISTING_ID"
+            echo "$PAYLOAD" | gh api "repos/${GITHUB_REPOSITORY}/issues/comments/${EXISTING_ID}" \
+              -X PATCH --input -
+          else
+            echo "Creating new sticky comment on PR #${PR_NUMBER}"
+            echo "$PAYLOAD" | gh api "repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/comments" \
+              -X POST --input -
+          fi
+          STICKY_EOF
+          chmod +x /tmp/sticky-comment.sh
+
+      - name: Set up mantle review helper (SigV4 streaming)
+        run: |
+          cat > /tmp/mantle_review.py <<'PY_EOF'
+          #!/usr/bin/env python3
+          # Call the OpenAI Responses API on bedrock-mantle with SigV4 (assumed-role creds,
+          # no bearer token) and STREAM the result. Stdlib + botocore only — no requests.
+          import json, os, sys, urllib.request, urllib.error
+          from botocore.session import Session
+          from botocore.auth import SigV4Auth
+          from botocore.awsrequest import AWSRequest
+
+          region = os.environ["MANTLE_REGION"]
+          model = os.environ["MODEL_ID"]
+          max_out = int(os.environ.get("MAX_OUTPUT_TOKENS") or "2048")
+          effort = os.environ.get("REASONING_EFFORT") or "medium"
+          endpoint = f"https://bedrock-mantle.{region}.api.aws/openai/v1/responses"
+
+          with open("/tmp/prompt.txt", encoding="utf-8") as f:
+              user_input = f.read()
+
+          body = {
+              "model": model,
+              "instructions": "You are a senior code reviewer. Output GitHub-flavored markdown. Be concise.",
+              "input": user_input,
+              "stream": True,
+              "max_output_tokens": max_out,
+              "reasoning": {"effort": effort},
+          }
+          payload = json.dumps(body).encode("utf-8")
+
+          creds = Session().get_credentials()
+          if creds is None:
+              print("::error::No AWS credentials available for SigV4 signing", file=sys.stderr)
+              sys.exit(1)
+
+          # SigV4: the signing service name is "bedrock" (NOT "bedrock-mantle"); the IAM
+          # authorization prefix is bedrock-mantle:* — the two deliberately differ.
+          signed = AWSRequest(
+              method="POST", url=endpoint, data=payload,
+              headers={"Content-Type": "application/json", "Accept": "text/event-stream"},
+          )
+          SigV4Auth(creds.get_frozen_credentials(), "bedrock", region).add_auth(signed)
+
+          req = urllib.request.Request(endpoint, data=payload, method="POST")
+          for k, v in signed.headers.items():
+              req.add_header(k, v)
+
+          text_parts, usage = [], {}
+          try:
+              with urllib.request.urlopen(req, timeout=900) as resp:
+                  for raw in resp:
+                      line = raw.decode("utf-8", "replace").rstrip("\r\n")
+                      if not line.startswith("data:"):
+                          continue
+                      data = line[5:].strip()
+                      if not data or data == "[DONE]":
+                          continue
+                      try:
+                          evt = json.loads(data)
+                      except json.JSONDecodeError:
+                          continue
+                      etype = evt.get("type", "")
+                      if etype == "response.output_text.delta":
+                          text_parts.append(evt.get("delta", ""))
+                      elif etype in ("response.completed", "response.incomplete"):
+                          usage = (evt.get("response") or {}).get("usage") or {}
+                      elif etype == "response.error" or etype == "error":
+                          print(f"::error::mantle stream error: {json.dumps(evt)[:500]}", file=sys.stderr)
+          except urllib.error.HTTPError as e:
+              detail = e.read().decode("utf-8", "replace")[:1000]
+              print(f"::error::mantle HTTP {e.code}: {detail}", file=sys.stderr)
+              sys.exit(1)
+
+          review = "".join(text_parts).strip()
+          with open("/tmp/review.md", "w", encoding="utf-8") as f:
+              f.write(review if review else "_(model returned no text)_\n")
+
+          # Responses API usage shape: input_tokens, output_tokens, total_tokens, with
+          # output_tokens_details.reasoning_tokens for the (uncapped) reasoning spend.
+          it = usage.get("input_tokens", "?")
+          ot = usage.get("output_tokens", "?")
+          tt = usage.get("total_tokens", "?")
+          rt = (usage.get("output_tokens_details") or {}).get("reasoning_tokens", "?")
+          with open("/tmp/usage.txt", "w", encoding="utf-8") as f:
+              f.write(f"in: {it} · out: {ot} (reasoning: {rt}) · total: {tt}")
+          print("Tokens:", open("/tmp/usage.txt", encoding="utf-8").read())
+
+          if not review:
+              sys.exit(1)
+          PY_EOF
+          chmod +x /tmp/mantle_review.py
+
+      - name: Resolve PR number
+        id: pr
+        env:
+          ISSUE_PR_URL: ${{ github.event.issue.pull_request.url }}
+        run: |
+          set -euo pipefail
+          case "${GITHUB_EVENT_NAME}" in
+            pull_request|pull_request_target|pull_request_review|pull_request_review_comment)
+              PR_NUM="${{ github.event.pull_request.number }}"
+              ;;
+            issue_comment)
+              if [ -n "${ISSUE_PR_URL}" ]; then
+                PR_NUM="${{ github.event.issue.number }}"
+              else
+                echo "::error::codex-executor needs a PR context; issue_comment fired on a non-PR issue"
+                exit 1
+              fi
+              ;;
+            *)
+              echo "::error::codex-executor doesn't support event type: ${GITHUB_EVENT_NAME}"
+              exit 1
+              ;;
+          esac
+          if ! [[ "${PR_NUM}" =~ ^[0-9]+$ ]]; then
+            echo "::error::Resolved PR number is not a positive integer: ${PR_NUM}"
+            exit 1
+          fi
+          echo "number=${PR_NUM}" >> "$GITHUB_OUTPUT"
+
+      - name: Post in-progress sticky comment
+        env:
+          GH_TOKEN: ${{ github.token }}
+          PR_NUMBER: ${{ steps.pr.outputs.number }}
+        run: |
+          set -euo pipefail
+          RUN_LINK="https://github.com/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}"
+          {
+            printf "%s\n\n" "${STICKY_MARKER}"
+            printf "🔄 **Codex review in progress** — model: \`%s\`\n\n" "${MODEL_ID}"
+            printf "<sub>Run: [#%s](%s)</sub>\n" "${GITHUB_RUN_ID}" "${RUN_LINK}"
+          } > /tmp/comment.md
+          /tmp/sticky-comment.sh "${PR_NUMBER}" /tmp/comment.md
+
+      - name: Gather PR diff
+        id: diff
+        env:
+          GH_TOKEN: ${{ github.token }}
+          PR_NUMBER: ${{ steps.pr.outputs.number }}
+        run: |
+          set -euo pipefail
+          gh pr diff "${PR_NUMBER}" > /tmp/pr.diff
+          ORIG=$(wc -c < /tmp/pr.diff)
+          if [ "${ORIG}" -gt "${MAX_DIFF_CHARS}" ]; then
+            head -c "${MAX_DIFF_CHARS}" /tmp/pr.diff | sed '$d' > /tmp/pr.diff.trimmed
+            printf "\n\n[TRUNCATED — diff was %s chars, kept first ~%s]\n" "${ORIG}" "${MAX_DIFF_CHARS}" >> /tmp/pr.diff.trimmed
+            mv /tmp/pr.diff.trimmed /tmp/pr.diff
+          fi
+          echo "diff_chars=$(wc -c < /tmp/pr.diff)" >> "$GITHUB_OUTPUT"
+
+      - name: Build prompt
+        env:
+          REVIEW_PROMPT: ${{ inputs.prompt }}
+        run: |
+          set -euo pipefail
+          # workflow_call always passes the caller's value, even when empty, so the input
+          # default above is never reached from the orchestrator. Fall back here.
+          if [ -z "${REVIEW_PROMPT}" ]; then
+            REVIEW_PROMPT="Review this PR diff. Flag anything that looks wrong, risky, or worth a second look: bad assumptions, missing edge cases, design problems, security issues. Skip praise. If it is clean, say so in one line."
+          fi
+          {
+            printf "%s\n\n" "${REVIEW_PROMPT}"
+            printf -- "--- BEGIN DIFF ---\n"
+            cat /tmp/pr.diff
+            printf -- "\n--- END DIFF ---\n"
+          } > /tmp/prompt.txt
+
+      - name: Invoke bedrock-mantle (OpenAI Responses API, streaming)
+        id: invoke
+        env:
+          MANTLE_REGION: ${{ steps.region.outputs.region }}
+        run: |
+          set -euo pipefail
+          # botocore provides SigV4 signing. AWS CLI v2 on the runner is a self-contained
+          # bundle that does NOT expose botocore to system python, so install it explicitly.
+          python3 -m pip install --quiet --disable-pip-version-check botocore
+          python3 /tmp/mantle_review.py
+          echo "has_review=true" >> "$GITHUB_OUTPUT"
+
+      - name: Update sticky comment with review
+        if: steps.invoke.outputs.has_review == 'true'
+        env:
+          GH_TOKEN: ${{ github.token }}
+          PR_NUMBER: ${{ steps.pr.outputs.number }}
+        run: |
+          set -euo pipefail
+          USAGE=$(cat /tmp/usage.txt)
+          RUN_LINK="https://github.com/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}"
+          {
+            printf "%s\n\n" "${STICKY_MARKER}"
+            printf "## 🤖 Codex Review — \`%s\`\n\n" "${MODEL_ID}"
+            cat /tmp/review.md
+            printf "\n\n---\n"
+            printf "<sub>Run: [#%s](%s) · tokens: %s</sub>\n" "${GITHUB_RUN_ID}" "${RUN_LINK}" "${USAGE}"
+          } > /tmp/comment.md
+          /tmp/sticky-comment.sh "${PR_NUMBER}" /tmp/comment.md
+
+      - name: Report failure into sticky comment
+        if: failure() && steps.invoke.outputs.has_review != 'true'
+        env:
+          GH_TOKEN: ${{ github.token }}
+          PR_NUMBER: ${{ steps.pr.outputs.number }}
+        run: |
+          set -euo pipefail
+          RUN_LINK="https://github.com/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}"
+          {
+            printf "%s\n\n" "${STICKY_MARKER}"
+            printf "## ❌ Codex Review failed — \`%s\`\n\n" "${MODEL_ID}"
+            printf "The review job failed before producing output. See the run for details.\n\n"
+            printf "<sub>Run: [#%s](%s)</sub>\n" "${GITHUB_RUN_ID}" "${RUN_LINK}"
+          } > /tmp/comment.md
+          /tmp/sticky-comment.sh "${PR_NUMBER}" /tmp/comment.md
diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md
index 9a85b43..61ad6f4 100644
--- a/ARCHITECTURE.md
+++ b/ARCHITECTURE.md
@@ -81,27 +81,31 @@ flowchart TD
         route{"route job<br/>inspect model_id"}
         claude_exec["claude-executor.yml<br/>(Anthropic models)"]
         generic_exec["bedrock-generic-executor.yml<br/>(any other Bedrock model)"]
+        codex_exec["codex-executor.yml<br/>(OpenAI GPT/Codex)"]
 
         orch --> route
         route -->|"empty or *.anthropic.*"| claude_exec
+        route -->|"openai.*"| codex_exec
         route -->|"anything else"| generic_exec
     end
 
     subgraph upstreams["External calls"]
         anth_api["Anthropic API<br/>(api.anthropic.com)"]
         bedrock["AWS Bedrock<br/>(Converse / claude-code-action use_bedrock)"]
+        mantle["bedrock-mantle<br/>(OpenAI Responses API, SigV4)"]
     end
 
     consumer_wf -->|"workflow_call"| orch
     claude_exec -->|"provider=anthropic-api"| anth_api
     claude_exec -->|"provider=anthropic-bedrock<br/>via OIDC"| bedrock
     generic_exec -->|"OIDC + Converse"| bedrock
+    codex_exec -->|"OIDC + SigV4 streaming"| mantle
 
     classDef new fill:#e8f5e9,stroke:#1b5e20,stroke-width:1px
-    class generic_exec,route new
+    class generic_exec,route,codex_exec new
 ```
 
-Nodes shaded green are new in v3. The `route` job uses an anchored regex (`^([a-z]+\.)?anthropic\.`) so model IDs that merely contain the substring `"anthropic."` (e.g. `us.not-anthropic.foo`) are **not** misrouted.
+Nodes shaded green are new in v3 (`codex-executor` added later for the OpenAI/mantle path). The `route` job uses anchored regexes (`^([a-z]+\.)?anthropic\.`, `^([a-z]+\.)?openai\.`) so model IDs that merely contain the substring `"anthropic."`/`"openai."` (e.g. `us.not-anthropic.foo`) are **not** misrouted.
 
 ### Routing table
 
@@ -109,9 +113,10 @@ Nodes shaded green are new in v3. The `route` job uses an anchored regex (`^([a-
 | ------------------------------------------------------------- | ------------------- | --------------------------------- |
 | _(empty)_                                                     | `anthropic-api`     | `claude-executor.yml`             |
 | `anthropic.*` or `<region>.anthropic.*`                       | `anthropic-bedrock` | `claude-executor.yml`             |
-| Anything else (`us.amazon.*`, `meta.*`, `mistral.*`, ...)     | n/a                 | `bedrock-generic-executor.yml`    |
+| `openai.*` (e.g. `openai.gpt-5.5`, `openai.gpt-5.4`)          | `openai-mantle`     | `codex-executor.yml`              |
+| Anything else (`us.amazon.*`, `meta.*`, `mistral.*`, ...)     | `bedrock-generic`   | `bedrock-generic-executor.yml`    |
 
-The non-matching executor job is **skipped** by job-level `if:` conditional, not "ran and exited" — billable runner time is zero for the skipped path.
+The non-matching executor jobs are **skipped** by job-level `if:` conditional, not "ran and exited" — billable runner time is zero for the skipped paths.
 
 ---
 
@@ -144,6 +149,17 @@ Includes a pre-flight API health check on the `anthropic-api` path that skips gr
 
 Uses the Bedrock Converse API, which is model-family-agnostic. Maintains its own sticky comment via an inline helper (a setup step writes a bash find-or-update helper to `/tmp` so the logic isn't dependent on the consumer's checkout), replicating the auto-update behavior `claude-code-action` provides for free on the Anthropic path. Accepts a `sticky_namespace` input so multiple review jobs on the same PR don't clobber each other.
 
+#### 4. `codex-executor.yml` (OpenAI GPT/Codex via bedrock-mantle)
+
+For `openai.*` models (GPT-5.5, GPT-5.4), which are **not** on bedrock-runtime — there is no `InvokeModel`/`Converse`. They are served only by the separate **bedrock-mantle** endpoint exposing the OpenAI Responses API (`https://bedrock-mantle.{region}.api.aws/openai/v1/responses`). The executor:
+
+- Signs the request with **SigV4 using the assumed-role credentials** — signing service name `bedrock` (not `bedrock-mantle`; the IAM authorization prefix `bedrock-mantle:*` and the signing name deliberately differ). **No bearer token / Bedrock API key** is used (`AWS_BEARER_TOKEN_BEDROCK` stays unset), consistent with dotCMS's OIDC-only posture.
+- **Streams** Server-Sent Events and accumulates `response.output_text.delta` chunks. Streaming is mandatory: GPT-5.x reasons before emitting, so a non-streaming call buffers and looks like a 60–100s hang.
+- Remaps the orchestrator's `us-east-1` default to **us-east-2** (mantle is not offered in us-east-1; GPT-5.4 also accepts an explicit us-west-2).
+- Reuses the same `/tmp` sticky-comment helper and `sticky_namespace` input as the generic executor. Exposes `reasoning_effort` (default `medium`). Note `max_output_tokens` caps only the visible answer, **not** reasoning tokens.
+
+IAM: `bedrock-mantle:CreateInference` scoped by a `bedrock-mantle:Model StringLike openai.*` condition (no per-model ARNs exist on mantle). Provisioned in dotCMS/Infrastructure-as-code `bedrock-code-review/` (#7836). No AWS Marketplace subscription is required — mantle bills on-demand directly.
+
 ---
 
 ## Key benefits
diff --git a/CLAUDE.md b/CLAUDE.md
index 7e8d0c9..18d847e 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -15,6 +15,7 @@ The repository implements a reusable workflow architecture with model-aware rout
 - **Claude Orchestrator** (`.github/workflows/claude-orchestrator.yml`): Lightweight wrapper that handles @claude mention detection AND routes to the appropriate executor based on `model_id`. Consumer repositories call this with `trigger_mode: interactive` or `trigger_mode: automatic`. Exactly one executor runs per call.
 - **Claude Executor** (`.github/workflows/claude-executor.yml`): Execution engine for Anthropic models — runs `anthropics/claude-code-action@v1` either against the direct Anthropic API (`provider: anthropic-api`, default) or via AWS Bedrock (`provider: anthropic-bedrock`, OIDC + `use_bedrock=true`).
 - **Bedrock Generic Executor** (`.github/workflows/bedrock-generic-executor.yml`): Execution engine for **any non-Anthropic Bedrock model** (Amazon Nova, Meta Llama, Mistral, Cohere, AI21). Uses the Bedrock Converse API and maintains its own sticky comment via an inlined helper (set up to `/tmp` at job start, so no cross-repo path dependency).
+- **Codex Executor** (`.github/workflows/codex-executor.yml`): Execution engine for **OpenAI GPT/Codex models** (`openai.gpt-5.5`, `openai.gpt-5.4`). These are served only by the separate **bedrock-mantle** endpoint (OpenAI Responses API), not bedrock-runtime — so it calls mantle directly with a **SigV4-signed streaming** request (signing service `bedrock`, no bearer token / API key) and accumulates the SSE deltas. Streaming is mandatory (GPT-5.x reasons before emitting). Remaps the `us-east-1` default region to `us-east-2` (mantle is not in us-east-1). Reuses the same `/tmp` sticky-comment helper. See dotCMS/Infrastructure-as-code#7836.
 - **Deployment Guard** (`.github/workflows/deployment-guard.yml`): Reusable workflow for validating deployment changes with configurable rules. Features organization-based bypass for trusted members, file allowlist validation, image-only change detection, and comprehensive image validation (format, repository, version pattern, registry existence, anti-downgrade logic).
 
 ### Multi-model Routing (v3)
@@ -26,9 +27,10 @@ The orchestrator picks the executor by inspecting `model_id`:
 | _(empty / unset)_                                 | `claude-executor` (`anthropic-api`)| Backward-compat default; requires `ANTHROPIC_API_KEY` secret |
 | `*.anthropic.*` (e.g. `global.anthropic.claude-sonnet-4-6`) | `claude-executor` (`anthropic-bedrock`) | Requires `bedrock_role_arn` input              |
 | `anthropic.*` (bare)                              | `claude-executor` (`anthropic-bedrock`) | Requires `bedrock_role_arn` input              |
+| `openai.*` (e.g. `openai.gpt-5.5`, `openai.gpt-5.4`) | `codex-executor`                  | Requires `bedrock_role_arn`; mantle path (us-east-2) |
 | Anything else (Nova, Llama, Mistral, …)           | `bedrock-generic-executor`          | Requires `bedrock_role_arn` input              |
 
-The match for the Anthropic family is anchored: `^([a-z]+\.)?anthropic\.` — so a model ID that merely contains the substring `anthropic.` (e.g. `us.not-anthropic.foo`) is **not** misrouted.
+The matches for the Anthropic and OpenAI families are anchored: `^([a-z]+\.)?anthropic\.` and `^([a-z]+\.)?openai\.` — so a model ID that merely contains the substring `anthropic.`/`openai.` (e.g. `us.not-anthropic.foo`) is **not** misrouted. `openai.*` is checked before the generic fallback.
 
 ### Sticky Comments
 

From 2f93c7b40dd15be135947b9b3a5dbd0ccaccd0ed Mon Sep 17 00:00:00 2001
From: Stephen Freudenthaler <steve.freudenthaler@dotcms.com>
Date: Tue, 9 Jun 2026 19:39:47 -0400
Subject: [PATCH 2/5] fix(codex): correct mantle path to /v1, set store=false,
 fix region rationale

Verified live against the R&D account Models API:
- Endpoint path is /v1/responses, NOT /openai/v1/responses (the latter
  404s). Fixed the endpoint URL + header/docs.
- The bedrock-mantle endpoint IS available in us-east-1, but GPT-5.5/5.4
  are served only in us-east-2 (us-east-1 lists gpt-oss but no gpt-5*).
  The us-east-1 -> us-east-2 remap stays (routes to where the models
  live), but the rationale comments are corrected (it's model
  availability, not endpoint availability).
- Send store=false for zero data retention. The Responses API defaults
  store=true, which retains input+output 30 days in-region for
  previous_response_id chaining; single-shot review doesn't need state
  and shouldn't retain the diff.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .github/workflows/claude-orchestrator.yml |  5 ++--
 .github/workflows/codex-executor.yml      | 30 ++++++++++++++++-------
 ARCHITECTURE.md                           |  5 ++--
 CLAUDE.md                                 |  2 +-
 4 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/claude-orchestrator.yml b/.github/workflows/claude-orchestrator.yml
index 51b1140..6b4240a 100644
--- a/.github/workflows/claude-orchestrator.yml
+++ b/.github/workflows/claude-orchestrator.yml
@@ -219,8 +219,9 @@ jobs:
     with:
       model_id: ${{ inputs.model_id }}
       bedrock_role_arn: ${{ inputs.bedrock_role_arn }}
-      # aws_region defaults to us-east-1; the codex executor remaps that to us-east-2
-      # (mantle is not offered in us-east-1). Consumers only set model_id.
+      # aws_region defaults to us-east-1; the codex executor remaps that to us-east-2,
+      # where GPT-5.5/5.4 are served (the mantle endpoint exists in us-east-1 but the
+      # models do not yet). Consumers only set model_id.
       aws_region: ${{ inputs.aws_region }}
       prompt: ${{ inputs.prompt }}
       sticky_namespace: ${{ inputs.sticky_namespace }}
diff --git a/.github/workflows/codex-executor.yml b/.github/workflows/codex-executor.yml
index cbb24da..6194e75 100644
--- a/.github/workflows/codex-executor.yml
+++ b/.github/workflows/codex-executor.yml
@@ -3,9 +3,9 @@
 #
 # PURPOSE: Reviews PRs using OpenAI GPT/Codex models (GPT-5.5, GPT-5.4) served by the
 # AWS **bedrock-mantle** endpoint — a separate SigV4 service exposing the OpenAI Responses
-# API (https://bedrock-mantle.{region}.api.aws/openai/v1). These models are NOT on
-# bedrock-runtime: there is no InvokeModel/Converse, so the generic Bedrock executor cannot
-# reach them. Maintains the same auto-updating sticky comment as the other executors.
+# API (https://bedrock-mantle.{region}.api.aws/v1). These models are NOT on bedrock-runtime:
+# there is no InvokeModel/Converse, so the generic Bedrock executor cannot reach them.
+# Maintains the same auto-updating sticky comment as the other executors.
 #
 # AUTH: OIDC -> assumed role -> SigV4 (signing service name "bedrock", NOT "bedrock-mantle";
 # the IAM authorization prefix is still bedrock-mantle:*). No Bedrock API key / bearer token
@@ -17,8 +17,15 @@
 # the whole response and looks like a 60-100s hang. We stream Server-Sent Events and accumulate
 # response.output_text.delta chunks. max_output_tokens does NOT cap reasoning tokens.
 #
-# REGION: GPT-5.5 is us-east-2 only; GPT-5.4 adds us-west-2. Neither is in us-east-1 (the
-# orchestrator's default), so this executor remaps the us-east-1 default to us-east-2.
+# REGION: the bedrock-mantle ENDPOINT exists in many regions including us-east-1, BUT the
+# GPT-5.5/5.4 MODELS are currently served only in us-east-2 — verified live via the Models
+# API (us-east-1 lists gpt-oss but no gpt-5*; us-east-2 lists openai.gpt-5.5 / openai.gpt-5.4).
+# So this executor remaps the us-east-1 default to us-east-2 where the models live. GPT-5.4 is
+# also offered in us-west-2. (Re-check the Models API if AWS expands GPT-5.x to us-east-1.)
+#
+# DATA RETENTION: the Responses API defaults store=true, which retains input+output for 30
+# days in-region for previous_response_id chaining. Code review is single-shot, so we send
+# store=false for zero data retention.
 #
 # ROUTING: claude-orchestrator.yml selects this executor automatically when model_id matches
 # openai.* — consumers shouldn't usually call it directly.
@@ -41,7 +48,7 @@ on:
         required: true
         type: string
       aws_region:
-        description: 'AWS region for the mantle endpoint. us-east-1 (orchestrator default) is remapped to us-east-2, since mantle is not offered in us-east-1.'
+        description: 'AWS region for the mantle endpoint. The us-east-1 default is remapped to us-east-2, where GPT-5.5/5.4 are served (the endpoint exists in us-east-1 but the models do not yet).'
         required: false
         type: string
         default: 'us-east-1'
@@ -106,8 +113,9 @@ jobs:
           REQUESTED_REGION: ${{ inputs.aws_region }}
         run: |
           set -euo pipefail
-          # Mantle is not offered in us-east-1 (the orchestrator's default). Treat the default
-          # as "use the mantle default" so consumers only ever set model_id. An explicit
+          # The mantle endpoint exists in us-east-1, but GPT-5.5/5.4 are served only in
+          # us-east-2 (us-east-1 lists gpt-oss but no gpt-5*). Treat the us-east-1 default as
+          # "send to where the models live" so consumers only set model_id. An explicit
           # us-west-2 (valid for GPT-5.4) is honored as-is.
           REGION="${REQUESTED_REGION}"
           if [ -z "${REGION}" ] || [ "${REGION}" = "us-east-1" ]; then
@@ -181,7 +189,7 @@ jobs:
           model = os.environ["MODEL_ID"]
           max_out = int(os.environ.get("MAX_OUTPUT_TOKENS") or "2048")
           effort = os.environ.get("REASONING_EFFORT") or "medium"
-          endpoint = f"https://bedrock-mantle.{region}.api.aws/openai/v1/responses"
+          endpoint = f"https://bedrock-mantle.{region}.api.aws/v1/responses"
 
           with open("/tmp/prompt.txt", encoding="utf-8") as f:
               user_input = f.read()
@@ -193,6 +201,10 @@ jobs:
               "stream": True,
               "max_output_tokens": max_out,
               "reasoning": {"effort": effort},
+              # store=false → zero data retention. The Responses API defaults to true (retains
+              # input+output 30 days in-region for previous_response_id chaining); code review
+              # is single-shot, so we don't need state and don't want the diff retained.
+              "store": False,
           }
           payload = json.dumps(body).encode("utf-8")
 
diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md
index 61ad6f4..b93f5aa 100644
--- a/ARCHITECTURE.md
+++ b/ARCHITECTURE.md
@@ -151,11 +151,12 @@ Uses the Bedrock Converse API, which is model-family-agnostic. Maintains its own
 
 #### 4. `codex-executor.yml` (OpenAI GPT/Codex via bedrock-mantle)
 
-For `openai.*` models (GPT-5.5, GPT-5.4), which are **not** on bedrock-runtime — there is no `InvokeModel`/`Converse`. They are served only by the separate **bedrock-mantle** endpoint exposing the OpenAI Responses API (`https://bedrock-mantle.{region}.api.aws/openai/v1/responses`). The executor:
+For `openai.*` models (GPT-5.5, GPT-5.4), which are **not** on bedrock-runtime — there is no `InvokeModel`/`Converse`. They are served only by the separate **bedrock-mantle** endpoint exposing the OpenAI Responses API (`https://bedrock-mantle.{region}.api.aws/v1/responses`). The executor:
 
 - Signs the request with **SigV4 using the assumed-role credentials** — signing service name `bedrock` (not `bedrock-mantle`; the IAM authorization prefix `bedrock-mantle:*` and the signing name deliberately differ). **No bearer token / Bedrock API key** is used (`AWS_BEARER_TOKEN_BEDROCK` stays unset), consistent with dotCMS's OIDC-only posture.
 - **Streams** Server-Sent Events and accumulates `response.output_text.delta` chunks. Streaming is mandatory: GPT-5.x reasons before emitting, so a non-streaming call buffers and looks like a 60–100s hang.
-- Remaps the orchestrator's `us-east-1` default to **us-east-2** (mantle is not offered in us-east-1; GPT-5.4 also accepts an explicit us-west-2).
+- Remaps the orchestrator's `us-east-1` default to **us-east-2**, where GPT-5.5/5.4 are served. The mantle *endpoint* exists in us-east-1, but the *models* are not there yet (verified via the Models API: us-east-1 lists gpt-oss but no gpt-5*). GPT-5.4 also accepts an explicit us-west-2.
+- Sends `store: false` on each request for **zero data retention** — the Responses API otherwise defaults `store: true`, retaining input+output for 30 days in-region for `previous_response_id` chaining, which single-shot review doesn't need.
 - Reuses the same `/tmp` sticky-comment helper and `sticky_namespace` input as the generic executor. Exposes `reasoning_effort` (default `medium`). Note `max_output_tokens` caps only the visible answer, **not** reasoning tokens.
 
 IAM: `bedrock-mantle:CreateInference` scoped by a `bedrock-mantle:Model StringLike openai.*` condition (no per-model ARNs exist on mantle). Provisioned in dotCMS/Infrastructure-as-code `bedrock-code-review/` (#7836). No AWS Marketplace subscription is required — mantle bills on-demand directly.
diff --git a/CLAUDE.md b/CLAUDE.md
index 18d847e..a1c01e1 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -15,7 +15,7 @@ The repository implements a reusable workflow architecture with model-aware rout
 - **Claude Orchestrator** (`.github/workflows/claude-orchestrator.yml`): Lightweight wrapper that handles @claude mention detection AND routes to the appropriate executor based on `model_id`. Consumer repositories call this with `trigger_mode: interactive` or `trigger_mode: automatic`. Exactly one executor runs per call.
 - **Claude Executor** (`.github/workflows/claude-executor.yml`): Execution engine for Anthropic models — runs `anthropics/claude-code-action@v1` either against the direct Anthropic API (`provider: anthropic-api`, default) or via AWS Bedrock (`provider: anthropic-bedrock`, OIDC + `use_bedrock=true`).
 - **Bedrock Generic Executor** (`.github/workflows/bedrock-generic-executor.yml`): Execution engine for **any non-Anthropic Bedrock model** (Amazon Nova, Meta Llama, Mistral, Cohere, AI21). Uses the Bedrock Converse API and maintains its own sticky comment via an inlined helper (set up to `/tmp` at job start, so no cross-repo path dependency).
-- **Codex Executor** (`.github/workflows/codex-executor.yml`): Execution engine for **OpenAI GPT/Codex models** (`openai.gpt-5.5`, `openai.gpt-5.4`). These are served only by the separate **bedrock-mantle** endpoint (OpenAI Responses API), not bedrock-runtime — so it calls mantle directly with a **SigV4-signed streaming** request (signing service `bedrock`, no bearer token / API key) and accumulates the SSE deltas. Streaming is mandatory (GPT-5.x reasons before emitting). Remaps the `us-east-1` default region to `us-east-2` (mantle is not in us-east-1). Reuses the same `/tmp` sticky-comment helper. See dotCMS/Infrastructure-as-code#7836.
+- **Codex Executor** (`.github/workflows/codex-executor.yml`): Execution engine for **OpenAI GPT/Codex models** (`openai.gpt-5.5`, `openai.gpt-5.4`). These are served only by the separate **bedrock-mantle** endpoint (OpenAI Responses API), not bedrock-runtime — so it calls mantle directly with a **SigV4-signed streaming** request (signing service `bedrock`, no bearer token / API key) and accumulates the SSE deltas. Streaming is mandatory (GPT-5.x reasons before emitting). Remaps the `us-east-1` default to `us-east-2`, where GPT-5.5/5.4 are served (the mantle endpoint exists in us-east-1 but the models are not there yet — verified via the Models API). Sends `store: false` for zero data retention. Reuses the same `/tmp` sticky-comment helper. See dotCMS/Infrastructure-as-code#7836.
 - **Deployment Guard** (`.github/workflows/deployment-guard.yml`): Reusable workflow for validating deployment changes with configurable rules. Features organization-based bypass for trusted members, file allowlist validation, image-only change detection, and comprehensive image validation (format, repository, version pattern, registry existence, anti-downgrade logic).
 
 ### Multi-model Routing (v3)

From 3254a11d1de22a8a4d362422a0cb71a61c300122 Mon Sep 17 00:00:00 2001
From: Stephen Freudenthaler <steve.freudenthaler@dotcms.com>
Date: Tue, 9 Jun 2026 19:52:47 -0400
Subject: [PATCH 3/5] feat(codex): switch to OpenAI SDK with short-term bearer
 token

Per the auth-path decision, move the executor from hand-rolled SigV4 +
urllib to the OpenAI SDK. The SDK can't consume SigV4, so authenticate
with a SHORT-TERM Bedrock bearer token minted in-process from the
OIDC-assumed-role session via aws-bedrock-token-generator. The token is
OIDC-derived (no long-lived secret), expires with the role session
(<=1h), is not a stored resource (nothing to clean up), and is never
written to env/disk/logs.

- Replace the SigV4 helper with client.responses.create(stream=True);
  accumulate response.output_text.delta, read usage off response.completed.
- Install openai + aws-bedrock-token-generator at runtime (was botocore).
- Keep store=false, reasoning_effort, region remap, sticky comment.
- Docs (CLAUDE.md/ARCHITECTURE.md) updated to the SDK + bearer path.

Requires bedrock-mantle:CallWithBearerToken (SHORT_TERM) in the IAM PR.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .github/workflows/codex-executor.yml | 138 ++++++++++++---------------
 ARCHITECTURE.md                      |   8 +-
 CLAUDE.md                            |   2 +-
 3 files changed, 66 insertions(+), 82 deletions(-)

diff --git a/.github/workflows/codex-executor.yml b/.github/workflows/codex-executor.yml
index 6194e75..63a95fe 100644
--- a/.github/workflows/codex-executor.yml
+++ b/.github/workflows/codex-executor.yml
@@ -2,20 +2,23 @@
 # Codex Executor Workflow (Reusable)
 #
 # PURPOSE: Reviews PRs using OpenAI GPT/Codex models (GPT-5.5, GPT-5.4) served by the
-# AWS **bedrock-mantle** endpoint — a separate SigV4 service exposing the OpenAI Responses
-# API (https://bedrock-mantle.{region}.api.aws/v1). These models are NOT on bedrock-runtime:
+# AWS **bedrock-mantle** endpoint — the OpenAI Responses API at
+# https://bedrock-mantle.{region}.api.aws/v1. These models are NOT on bedrock-runtime:
 # there is no InvokeModel/Converse, so the generic Bedrock executor cannot reach them.
 # Maintains the same auto-updating sticky comment as the other executors.
 #
-# AUTH: OIDC -> assumed role -> SigV4 (signing service name "bedrock", NOT "bedrock-mantle";
-# the IAM authorization prefix is still bedrock-mantle:*). No Bedrock API key / bearer token
-# is used — AWS_BEARER_TOKEN_BEDROCK is intentionally left UNSET so botocore signs with the
-# assumed-role credentials. This posture was confirmed by the auth-path spike on
-# dotCMS/Infrastructure-as-code#7836.
+# AUTH: OIDC -> assumed role -> a SHORT-TERM Bedrock bearer token minted from that session
+# (aws-bedrock-token-generator `provide_token()`), passed to the OpenAI SDK. The OpenAI SDK
+# requires a bearer token (it cannot consume SigV4 directly), but a short-term key keeps the
+# OIDC-only posture: it is derived from the current STS credentials with NO long-lived secret,
+# NO stored resource to clean up, and a lifetime of min(12h, role session). Our role's
+# max_session_duration is 1h, so each token dies within the hour and lives only in this
+# ephemeral runner's memory. We never export it to GITHUB_ENV / disk / logs. IAM requires
+# bedrock-mantle:CallWithBearerToken (scoped to BearerTokenType=SHORT_TERM). See #7836.
 #
 # STREAMING IS MANDATORY: GPT-5.5 reasons before emitting output; a non-streaming call buffers
-# the whole response and looks like a 60-100s hang. We stream Server-Sent Events and accumulate
-# response.output_text.delta chunks. max_output_tokens does NOT cap reasoning tokens.
+# the whole response and looks like a 60-100s hang. We stream the Responses API and accumulate
+# response.output_text.delta events. max_output_tokens does NOT cap reasoning tokens.
 #
 # REGION: the bedrock-mantle ENDPOINT exists in many regions including us-east-1, BUT the
 # GPT-5.5/5.4 MODELS are currently served only in us-east-2 — verified live via the Models
@@ -174,93 +177,72 @@ jobs:
           STICKY_EOF
           chmod +x /tmp/sticky-comment.sh
 
-      - name: Set up mantle review helper (SigV4 streaming)
+      - name: Set up mantle review helper (OpenAI SDK + short-term bearer)
         run: |
           cat > /tmp/mantle_review.py <<'PY_EOF'
           #!/usr/bin/env python3
-          # Call the OpenAI Responses API on bedrock-mantle with SigV4 (assumed-role creds,
-          # no bearer token) and STREAM the result. Stdlib + botocore only — no requests.
-          import json, os, sys, urllib.request, urllib.error
-          from botocore.session import Session
-          from botocore.auth import SigV4Auth
-          from botocore.awsrequest import AWSRequest
+          # Review a PR diff via the OpenAI Responses API on bedrock-mantle, using the OpenAI
+          # SDK with a SHORT-TERM Bedrock bearer token minted from the assumed-role session.
+          import os, sys
+          from aws_bedrock_token_generator import provide_token
+          from openai import OpenAI
 
           region = os.environ["MANTLE_REGION"]
           model = os.environ["MODEL_ID"]
           max_out = int(os.environ.get("MAX_OUTPUT_TOKENS") or "2048")
           effort = os.environ.get("REASONING_EFFORT") or "medium"
-          endpoint = f"https://bedrock-mantle.{region}.api.aws/v1/responses"
 
           with open("/tmp/prompt.txt", encoding="utf-8") as f:
               user_input = f.read()
 
-          body = {
-              "model": model,
-              "instructions": "You are a senior code reviewer. Output GitHub-flavored markdown. Be concise.",
-              "input": user_input,
-              "stream": True,
-              "max_output_tokens": max_out,
-              "reasoning": {"effort": effort},
-              # store=false → zero data retention. The Responses API defaults to true (retains
-              # input+output 30 days in-region for previous_response_id chaining); code review
-              # is single-shot, so we don't need state and don't want the diff retained.
-              "store": False,
-          }
-          payload = json.dumps(body).encode("utf-8")
-
-          creds = Session().get_credentials()
-          if creds is None:
-              print("::error::No AWS credentials available for SigV4 signing", file=sys.stderr)
-              sys.exit(1)
+          # Mint a short-term bearer token from the current STS (OIDC-assumed-role) credentials.
+          # provide_token() is a local signing operation (no API call, no stored resource); the
+          # token inherits the role's permissions, lives only in this process, and expires with
+          # the role session (max_session_duration=1h here; <=12h cap). It is NEVER written to
+          # GITHUB_ENV, disk, or logs — keep it a local variable. AWS_REGION is set to the
+          # mantle region by configure-aws-credentials, so the token is signed for that region.
+          token = provide_token()
 
-          # SigV4: the signing service name is "bedrock" (NOT "bedrock-mantle"); the IAM
-          # authorization prefix is bedrock-mantle:* — the two deliberately differ.
-          signed = AWSRequest(
-              method="POST", url=endpoint, data=payload,
-              headers={"Content-Type": "application/json", "Accept": "text/event-stream"},
-          )
-          SigV4Auth(creds.get_frozen_credentials(), "bedrock", region).add_auth(signed)
-
-          req = urllib.request.Request(endpoint, data=payload, method="POST")
-          for k, v in signed.headers.items():
-              req.add_header(k, v)
+          client = OpenAI(base_url=f"https://bedrock-mantle.{region}.api.aws/v1", api_key=token)
 
-          text_parts, usage = [], {}
+          text_parts, usage = [], None
           try:
-              with urllib.request.urlopen(req, timeout=900) as resp:
-                  for raw in resp:
-                      line = raw.decode("utf-8", "replace").rstrip("\r\n")
-                      if not line.startswith("data:"):
-                          continue
-                      data = line[5:].strip()
-                      if not data or data == "[DONE]":
-                          continue
-                      try:
-                          evt = json.loads(data)
-                      except json.JSONDecodeError:
-                          continue
-                      etype = evt.get("type", "")
-                      if etype == "response.output_text.delta":
-                          text_parts.append(evt.get("delta", ""))
-                      elif etype in ("response.completed", "response.incomplete"):
-                          usage = (evt.get("response") or {}).get("usage") or {}
-                      elif etype == "response.error" or etype == "error":
-                          print(f"::error::mantle stream error: {json.dumps(evt)[:500]}", file=sys.stderr)
-          except urllib.error.HTTPError as e:
-              detail = e.read().decode("utf-8", "replace")[:1000]
-              print(f"::error::mantle HTTP {e.code}: {detail}", file=sys.stderr)
+              # store=False → zero data retention. The Responses API otherwise defaults store=True
+              # (retains input+output 30 days in-region for previous_response_id chaining); review
+              # is single-shot, so we don't need state and don't want the diff retained.
+              stream = client.responses.create(
+                  model=model,
+                  instructions="You are a senior code reviewer. Output GitHub-flavored markdown. Be concise.",
+                  input=user_input,
+                  max_output_tokens=max_out,
+                  reasoning={"effort": effort},
+                  store=False,
+                  stream=True,
+              )
+              for event in stream:
+                  etype = getattr(event, "type", "")
+                  if etype == "response.output_text.delta":
+                      text_parts.append(event.delta)
+                  elif etype == "response.completed":
+                      usage = getattr(event.response, "usage", None)
+                  elif etype in ("response.failed", "error"):
+                      print(f"::error::mantle stream error: {etype}", file=sys.stderr)
+          except Exception as e:  # noqa: BLE001 — surface any SDK/transport error into the job log
+              print(f"::error::mantle request failed: {type(e).__name__}: {str(e)[:500]}", file=sys.stderr)
               sys.exit(1)
 
           review = "".join(text_parts).strip()
           with open("/tmp/review.md", "w", encoding="utf-8") as f:
               f.write(review if review else "_(model returned no text)_\n")
 
-          # Responses API usage shape: input_tokens, output_tokens, total_tokens, with
+          # Responses API usage: input_tokens, output_tokens, total_tokens, plus
           # output_tokens_details.reasoning_tokens for the (uncapped) reasoning spend.
-          it = usage.get("input_tokens", "?")
-          ot = usage.get("output_tokens", "?")
-          tt = usage.get("total_tokens", "?")
-          rt = (usage.get("output_tokens_details") or {}).get("reasoning_tokens", "?")
+          def _g(obj, name, default="?"):
+              return getattr(obj, name, default) if obj is not None else default
+          it = _g(usage, "input_tokens")
+          ot = _g(usage, "output_tokens")
+          tt = _g(usage, "total_tokens")
+          rt = _g(_g(usage, "output_tokens_details", None), "reasoning_tokens")
           with open("/tmp/usage.txt", "w", encoding="utf-8") as f:
               f.write(f"in: {it} · out: {ot} (reasoning: {rt}) · total: {tt}")
           print("Tokens:", open("/tmp/usage.txt", encoding="utf-8").read())
@@ -352,9 +334,11 @@ jobs:
           MANTLE_REGION: ${{ steps.region.outputs.region }}
         run: |
           set -euo pipefail
-          # botocore provides SigV4 signing. AWS CLI v2 on the runner is a self-contained
-          # bundle that does NOT expose botocore to system python, so install it explicitly.
-          python3 -m pip install --quiet --disable-pip-version-check botocore
+          # OpenAI SDK (Responses API client) + the AWS token generator (mints the short-term
+          # bearer; pulls botocore for SigV4 signing). AWS CLI v2 on the runner is a
+          # self-contained bundle that doesn't expose these to system python, so install them.
+          python3 -m pip install --quiet --disable-pip-version-check \
+            'openai>=1.66' 'aws-bedrock-token-generator>=1.0'
           python3 /tmp/mantle_review.py
           echo "has_review=true" >> "$GITHUB_OUTPUT"
 
diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md
index b93f5aa..e90e706 100644
--- a/ARCHITECTURE.md
+++ b/ARCHITECTURE.md
@@ -92,14 +92,14 @@ flowchart TD
     subgraph upstreams["External calls"]
         anth_api["Anthropic API<br/>(api.anthropic.com)"]
         bedrock["AWS Bedrock<br/>(Converse / claude-code-action use_bedrock)"]
-        mantle["bedrock-mantle<br/>(OpenAI Responses API, SigV4)"]
+        mantle["bedrock-mantle<br/>(OpenAI Responses API)"]
     end
 
     consumer_wf -->|"workflow_call"| orch
     claude_exec -->|"provider=anthropic-api"| anth_api
     claude_exec -->|"provider=anthropic-bedrock<br/>via OIDC"| bedrock
     generic_exec -->|"OIDC + Converse"| bedrock
-    codex_exec -->|"OIDC + SigV4 streaming"| mantle
+    codex_exec -->|"OIDC + short-term bearer (SDK)"| mantle
 
     classDef new fill:#e8f5e9,stroke:#1b5e20,stroke-width:1px
     class generic_exec,route,codex_exec new
@@ -153,13 +153,13 @@ Uses the Bedrock Converse API, which is model-family-agnostic. Maintains its own
 
 For `openai.*` models (GPT-5.5, GPT-5.4), which are **not** on bedrock-runtime — there is no `InvokeModel`/`Converse`. They are served only by the separate **bedrock-mantle** endpoint exposing the OpenAI Responses API (`https://bedrock-mantle.{region}.api.aws/v1/responses`). The executor:
 
-- Signs the request with **SigV4 using the assumed-role credentials** — signing service name `bedrock` (not `bedrock-mantle`; the IAM authorization prefix `bedrock-mantle:*` and the signing name deliberately differ). **No bearer token / Bedrock API key** is used (`AWS_BEARER_TOKEN_BEDROCK` stays unset), consistent with dotCMS's OIDC-only posture.
+- Calls mantle with the **OpenAI SDK**, authenticated by a **short-term Bedrock bearer token** minted in-process from the assumed-role session via `aws-bedrock-token-generator` (`provide_token()`). The SDK can't consume SigV4 directly, but a short-term key keeps the OIDC-only posture: it's derived from the current STS credentials (no long-lived secret), inherits the role's permissions, expires with the role session (≤1h here, ≤12h cap), is **not a stored resource** (nothing to delete), and is never written to env/disk/logs. No marketplace subscription; no long-term API key.
 - **Streams** Server-Sent Events and accumulates `response.output_text.delta` chunks. Streaming is mandatory: GPT-5.x reasons before emitting, so a non-streaming call buffers and looks like a 60–100s hang.
 - Remaps the orchestrator's `us-east-1` default to **us-east-2**, where GPT-5.5/5.4 are served. The mantle *endpoint* exists in us-east-1, but the *models* are not there yet (verified via the Models API: us-east-1 lists gpt-oss but no gpt-5*). GPT-5.4 also accepts an explicit us-west-2.
 - Sends `store: false` on each request for **zero data retention** — the Responses API otherwise defaults `store: true`, retaining input+output for 30 days in-region for `previous_response_id` chaining, which single-shot review doesn't need.
 - Reuses the same `/tmp` sticky-comment helper and `sticky_namespace` input as the generic executor. Exposes `reasoning_effort` (default `medium`). Note `max_output_tokens` caps only the visible answer, **not** reasoning tokens.
 
-IAM: `bedrock-mantle:CreateInference` scoped by a `bedrock-mantle:Model StringLike openai.*` condition (no per-model ARNs exist on mantle). Provisioned in dotCMS/Infrastructure-as-code `bedrock-code-review/` (#7836). No AWS Marketplace subscription is required — mantle bills on-demand directly.
+IAM: `bedrock-mantle:CreateInference` scoped by `bedrock-mantle:Model StringLike openai.*` (no per-model ARNs exist on mantle), plus `bedrock-mantle:CallWithBearerToken` scoped to `BearerTokenType=SHORT_TERM` (required to use any API key; short-term-only blocks long-term keys). Provisioned in dotCMS/Infrastructure-as-code `bedrock-code-review/` (#7836). No AWS Marketplace subscription is required — mantle bills on-demand directly.
 
 ---
 
diff --git a/CLAUDE.md b/CLAUDE.md
index a1c01e1..177c758 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -15,7 +15,7 @@ The repository implements a reusable workflow architecture with model-aware rout
 - **Claude Orchestrator** (`.github/workflows/claude-orchestrator.yml`): Lightweight wrapper that handles @claude mention detection AND routes to the appropriate executor based on `model_id`. Consumer repositories call this with `trigger_mode: interactive` or `trigger_mode: automatic`. Exactly one executor runs per call.
 - **Claude Executor** (`.github/workflows/claude-executor.yml`): Execution engine for Anthropic models — runs `anthropics/claude-code-action@v1` either against the direct Anthropic API (`provider: anthropic-api`, default) or via AWS Bedrock (`provider: anthropic-bedrock`, OIDC + `use_bedrock=true`).
 - **Bedrock Generic Executor** (`.github/workflows/bedrock-generic-executor.yml`): Execution engine for **any non-Anthropic Bedrock model** (Amazon Nova, Meta Llama, Mistral, Cohere, AI21). Uses the Bedrock Converse API and maintains its own sticky comment via an inlined helper (set up to `/tmp` at job start, so no cross-repo path dependency).
-- **Codex Executor** (`.github/workflows/codex-executor.yml`): Execution engine for **OpenAI GPT/Codex models** (`openai.gpt-5.5`, `openai.gpt-5.4`). These are served only by the separate **bedrock-mantle** endpoint (OpenAI Responses API), not bedrock-runtime — so it calls mantle directly with a **SigV4-signed streaming** request (signing service `bedrock`, no bearer token / API key) and accumulates the SSE deltas. Streaming is mandatory (GPT-5.x reasons before emitting). Remaps the `us-east-1` default to `us-east-2`, where GPT-5.5/5.4 are served (the mantle endpoint exists in us-east-1 but the models are not there yet — verified via the Models API). Sends `store: false` for zero data retention. Reuses the same `/tmp` sticky-comment helper. See dotCMS/Infrastructure-as-code#7836.
+- **Codex Executor** (`.github/workflows/codex-executor.yml`): Execution engine for **OpenAI GPT/Codex models** (`openai.gpt-5.5`, `openai.gpt-5.4`). These are served only by the separate **bedrock-mantle** endpoint (OpenAI Responses API), not bedrock-runtime — so it calls mantle with the **OpenAI SDK** authenticated by a **short-term Bedrock bearer token** minted in-process from the OIDC-assumed-role session (`aws-bedrock-token-generator`), and streams `response.output_text.delta` events. The token is OIDC-derived (no long-lived secret, nothing to clean up, ≤1h via the role session) and never written to env/disk/logs; IAM grants `bedrock-mantle:CallWithBearerToken` scoped to `BearerTokenType=SHORT_TERM`. Streaming is mandatory (GPT-5.x reasons before emitting). Remaps the `us-east-1` default to `us-east-2`, where GPT-5.5/5.4 are served (the mantle endpoint exists in us-east-1 but the models are not there yet — verified via the Models API). Sends `store: false` for zero data retention. Reuses the same `/tmp` sticky-comment helper. See dotCMS/Infrastructure-as-code#7836.
 - **Deployment Guard** (`.github/workflows/deployment-guard.yml`): Reusable workflow for validating deployment changes with configurable rules. Features organization-based bypass for trusted members, file allowlist validation, image-only change detection, and comprehensive image validation (format, repository, version pattern, registry existence, anti-downgrade logic).
 
 ### Multi-model Routing (v3)

From 9a26dddadaddb6f1d4cb6f610a6be81445afa391 Mon Sep 17 00:00:00 2001
From: Stephen Freudenthaler <steve.freudenthaler@dotcms.com>
Date: Tue, 9 Jun 2026 19:59:06 -0400
Subject: [PATCH 4/5] chore(codex): use uv (PEP 723 inline deps) instead of pip

Run the mantle review script via `uv run` with dependencies declared as
PEP 723 inline script metadata, instead of a system `pip install`. Adds
an astral-sh/setup-uv step (with cache). uv provisions openai +
aws-bedrock-token-generator (and Python) into an ephemeral cached env;
no system-python pollution, faster and reproducible across runs.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .github/workflows/codex-executor.yml | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/codex-executor.yml b/.github/workflows/codex-executor.yml
index 63a95fe..3636a85 100644
--- a/.github/workflows/codex-executor.yml
+++ b/.github/workflows/codex-executor.yml
@@ -137,6 +137,11 @@ jobs:
           role-to-assume: ${{ inputs.bedrock_role_arn }}
           aws-region: ${{ steps.region.outputs.region }}
 
+      - name: Set up uv
+        uses: astral-sh/setup-uv@v6
+        with:
+          enable-cache: true
+
       # Inlined because actions/checkout above checks out the *consumer's* repo, not this one,
       # so a relative script path would resolve against the consumer (and not exist for
       # external consumers). Writing to /tmp avoids the cross-repo path dependency.
@@ -181,8 +186,14 @@ jobs:
         run: |
           cat > /tmp/mantle_review.py <<'PY_EOF'
           #!/usr/bin/env python3
+          # /// script
+          # requires-python = ">=3.9"
+          # dependencies = ["openai>=1.66", "aws-bedrock-token-generator>=1.0"]
+          # ///
           # Review a PR diff via the OpenAI Responses API on bedrock-mantle, using the OpenAI
           # SDK with a SHORT-TERM Bedrock bearer token minted from the assumed-role session.
+          # Dependencies are declared above as PEP 723 inline metadata; `uv run` provisions
+          # them (and Python) into an ephemeral, cached environment — no system pip install.
           import os, sys
           from aws_bedrock_token_generator import provide_token
           from openai import OpenAI
@@ -334,12 +345,11 @@ jobs:
           MANTLE_REGION: ${{ steps.region.outputs.region }}
         run: |
           set -euo pipefail
-          # OpenAI SDK (Responses API client) + the AWS token generator (mints the short-term
-          # bearer; pulls botocore for SigV4 signing). AWS CLI v2 on the runner is a
-          # self-contained bundle that doesn't expose these to system python, so install them.
-          python3 -m pip install --quiet --disable-pip-version-check \
-            'openai>=1.66' 'aws-bedrock-token-generator>=1.0'
-          python3 /tmp/mantle_review.py
+          # Dependencies (openai SDK + aws-bedrock-token-generator) are declared inline in the
+          # script via PEP 723 metadata; `uv run` resolves them into an ephemeral, cached env
+          # and provisions Python as needed — no system pip install, no venv to manage. Job env
+          # vars and the OIDC-issued AWS credentials are inherited by the subprocess.
+          uv run /tmp/mantle_review.py
           echo "has_review=true" >> "$GITHUB_OUTPUT"
 
       - name: Update sticky comment with review

From 2de45c329df4a9f65bb9790d0613eadb33c20ba2 Mon Sep 17 00:00:00 2001
From: Stephen Freudenthaler <steve.freudenthaler@dotcms.com>
Date: Tue, 9 Jun 2026 20:33:15 -0400
Subject: [PATCH 5/5] docs(codex): explain why the review script is inlined
 (not a separate file)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Document that inlining mantle_review.py to /tmp is deliberate: this is a
cross-repo reusable workflow, so actions/checkout pulls the consumer's
repo and a relative local action/script reference resolves against the
consumer, not ai-workflows. Shipping it as a real file would need a
fully-qualified composite action or a self-checkout at a pinned ref —
avoided to keep the executor self-contained and version-locked.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .github/workflows/codex-executor.yml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/.github/workflows/codex-executor.yml b/.github/workflows/codex-executor.yml
index 3636a85..985eead 100644
--- a/.github/workflows/codex-executor.yml
+++ b/.github/workflows/codex-executor.yml
@@ -183,6 +183,12 @@ jobs:
           chmod +x /tmp/sticky-comment.sh
 
       - name: Set up mantle review helper (OpenAI SDK + short-term bearer)
+        # Inlined to /tmp on purpose (same reason as the sticky-comment helper above): this is
+        # a cross-repo REUSABLE workflow, so actions/checkout pulls the *consumer's* repo, and a
+        # relative local action/script reference (`./...`) resolves against the consumer, not
+        # ai-workflows. Shipping the script as a real file would require a fully-qualified
+        # composite action or a self-checkout of ai-workflows at a pinned ref — deliberately
+        # avoided here to keep this executor self-contained and version-locked. See PR #31.
         run: |
           cat > /tmp/mantle_review.py <<'PY_EOF'
           #!/usr/bin/env python3