From fd461a370ea8d953ce1792e241901201faae5858 Mon Sep 17 00:00:00 2001 From: Stephen Freudenthaler Date: Tue, 9 Jun 2026 18:23:44 -0400 Subject: [PATCH 1/5] feat: add codex executor for OpenAI GPT/Codex via bedrock-mantle Route openai.* model_ids (openai.gpt-5.5, openai.gpt-5.4) to a new codex-executor. These models are served only by the bedrock-mantle endpoint (OpenAI Responses API), not bedrock-runtime, so the generic Converse executor can't reach them. - codex-executor.yml: SigV4-signed (service name "bedrock", no bearer token) STREAMING call to the Responses API; accumulates SSE response.output_text.delta. Remaps us-east-1 -> us-east-2 (mantle region). Reuses the /tmp sticky-comment helper + sticky_namespace. reasoning_effort input (default medium). botocore installed at runtime (AWS CLI v2 bundle doesn't expose it to system python). - claude-orchestrator.yml: anchored ^([a-z]+\.)?openai\. route -> openai-mantle -> codex-mantle job (checked before the generic fallback). Optional reasoning_effort pass-through. Consumers change only model_id. - CLAUDE.md / ARCHITECTURE.md: routing tables, executor docs, diagram. IAM provisioned in dotCMS/Infrastructure-as-code#7836. Streaming is mandatory (GPT-5.x reasons before emitting); max_output_tokens does not cap reasoning tokens. Auth posture confirmed by the spike on #7836. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/claude-orchestrator.yml | 26 ++ .github/workflows/codex-executor.yml | 381 ++++++++++++++++++++++ ARCHITECTURE.md | 24 +- CLAUDE.md | 4 +- 4 files changed, 430 insertions(+), 5 deletions(-) create mode 100644 .github/workflows/codex-executor.yml diff --git a/.github/workflows/claude-orchestrator.yml b/.github/workflows/claude-orchestrator.yml index b2f98e7..51b1140 100644 --- a/.github/workflows/claude-orchestrator.yml +++ b/.github/workflows/claude-orchestrator.yml @@ -82,6 +82,11 @@ on: required: false type: string default: '' + reasoning_effort: + description: 'Reasoning effort for the OpenAI/Codex (mantle) path: minimal | low | medium | high. Ignored on other paths.' + required: false + type: string + default: 'medium' secrets: ANTHROPIC_API_KEY: description: 'Anthropic API key — required only when using the direct Anthropic API path (model_id empty)' @@ -158,6 +163,11 @@ jobs: # Avoids false matches on third-party model IDs that contain "anthropic." as substring. elif [[ "${MODEL_ID}" =~ ^([a-z]+\.)?anthropic\. ]]; then PROVIDER="anthropic-bedrock" + # OpenAI GPT/Codex (openai.gpt-5.5, openai.gpt-5.4). Served only by bedrock-mantle + # (OpenAI Responses API), not bedrock-runtime — routed to the codex executor. + # Anchored like the anthropic match so "openai." as a substring can't misroute. + elif [[ "${MODEL_ID}" =~ ^([a-z]+\.)?openai\. ]]; then + PROVIDER="openai-mantle" else PROVIDER="bedrock-generic" fi @@ -201,3 +211,19 @@ jobs: sticky_namespace: ${{ inputs.sticky_namespace }} timeout_minutes: ${{ inputs.timeout_minutes }} runner: ${{ inputs.runner }} + + codex-mantle: + needs: route + if: needs.route.outputs.provider == 'openai-mantle' + uses: ./.github/workflows/codex-executor.yml + with: + model_id: ${{ inputs.model_id }} + bedrock_role_arn: ${{ inputs.bedrock_role_arn }} + # aws_region defaults to us-east-1; the codex executor remaps that to us-east-2 + # (mantle is not offered in us-east-1). Consumers only set model_id. + aws_region: ${{ inputs.aws_region }} + prompt: ${{ inputs.prompt }} + sticky_namespace: ${{ inputs.sticky_namespace }} + reasoning_effort: ${{ inputs.reasoning_effort }} + timeout_minutes: ${{ inputs.timeout_minutes }} + runner: ${{ inputs.runner }} diff --git a/.github/workflows/codex-executor.yml b/.github/workflows/codex-executor.yml new file mode 100644 index 0000000..cbb24da --- /dev/null +++ b/.github/workflows/codex-executor.yml @@ -0,0 +1,381 @@ +--- +# Codex Executor Workflow (Reusable) +# +# PURPOSE: Reviews PRs using OpenAI GPT/Codex models (GPT-5.5, GPT-5.4) served by the +# AWS **bedrock-mantle** endpoint — a separate SigV4 service exposing the OpenAI Responses +# API (https://bedrock-mantle.{region}.api.aws/openai/v1). These models are NOT on +# bedrock-runtime: there is no InvokeModel/Converse, so the generic Bedrock executor cannot +# reach them. Maintains the same auto-updating sticky comment as the other executors. +# +# AUTH: OIDC -> assumed role -> SigV4 (signing service name "bedrock", NOT "bedrock-mantle"; +# the IAM authorization prefix is still bedrock-mantle:*). No Bedrock API key / bearer token +# is used — AWS_BEARER_TOKEN_BEDROCK is intentionally left UNSET so botocore signs with the +# assumed-role credentials. This posture was confirmed by the auth-path spike on +# dotCMS/Infrastructure-as-code#7836. +# +# STREAMING IS MANDATORY: GPT-5.5 reasons before emitting output; a non-streaming call buffers +# the whole response and looks like a 60-100s hang. We stream Server-Sent Events and accumulate +# response.output_text.delta chunks. max_output_tokens does NOT cap reasoning tokens. +# +# REGION: GPT-5.5 is us-east-2 only; GPT-5.4 adds us-west-2. Neither is in us-east-1 (the +# orchestrator's default), so this executor remaps the us-east-1 default to us-east-2. +# +# ROUTING: claude-orchestrator.yml selects this executor automatically when model_id matches +# openai.* — consumers shouldn't usually call it directly. +# +# REQUIREMENTS: The caller's repo must have id-token: write permission and a mantle-capable +# IAM role accessible via OIDC (bedrock-mantle:CreateInference with a bedrock-mantle:Model +# StringLike openai.* condition). See dotCMS/Infrastructure-as-code bedrock-code-review/. + +name: Codex Executor (Reusable) + +on: + workflow_call: + inputs: + model_id: + description: 'OpenAI mantle model ID (e.g. openai.gpt-5.5, openai.gpt-5.4)' + required: true + type: string + bedrock_role_arn: + description: 'IAM role ARN that GitHub Actions assumes via OIDC to call bedrock-mantle' + required: true + type: string + aws_region: + description: 'AWS region for the mantle endpoint. us-east-1 (orchestrator default) is remapped to us-east-2, since mantle is not offered in us-east-1.' + required: false + type: string + default: 'us-east-1' + prompt: + description: 'Review prompt sent to the model along with the PR diff' + required: false + type: string + default: | + Review this PR diff. Flag anything that looks wrong, risky, or worth a second look: + bad assumptions, missing edge cases, design problems, security issues. Skip praise. + If it is clean, say so in one line. + sticky_namespace: + description: 'Namespace appended to the sticky-comment marker to keep multiple review jobs from clobbering each other (e.g. "codex-reviewer"). Defaults to the model id.' + required: false + type: string + default: '' + max_diff_chars: + description: 'Maximum diff length to send to the model (chars). Larger diffs are truncated at a line boundary.' + required: false + type: number + default: 80000 + max_output_tokens: + description: 'Maximum tokens the model may emit for the visible answer. Does NOT cap reasoning tokens (cost/latency note).' + required: false + type: number + default: 2048 + reasoning_effort: + description: 'Reasoning effort for GPT-5.x: minimal | low | medium | high. Higher = better review, more reasoning tokens + latency.' + required: false + type: string + default: 'medium' + timeout_minutes: + description: 'Job timeout in minutes. Reasoning models stream slowly; default is generous.' + required: false + type: number + default: 20 + runner: + description: 'GitHub runner label' + required: false + type: string + default: 'ubuntu-latest' + +jobs: + review: + runs-on: ${{ inputs.runner }} + timeout-minutes: ${{ inputs.timeout_minutes }} + permissions: + id-token: write # OIDC -> STS + contents: read + pull-requests: write # post / update sticky comment + env: + MODEL_ID: ${{ inputs.model_id }} + MAX_DIFF_CHARS: ${{ inputs.max_diff_chars }} + MAX_OUTPUT_TOKENS: ${{ inputs.max_output_tokens }} + REASONING_EFFORT: ${{ inputs.reasoning_effort }} + # Default uses the model id so different models naturally get different stickies. + STICKY_MARKER: ${{ format('', inputs.sticky_namespace != '' && inputs.sticky_namespace || inputs.model_id) }} + steps: + - name: Resolve mantle region + id: region + env: + REQUESTED_REGION: ${{ inputs.aws_region }} + run: | + set -euo pipefail + # Mantle is not offered in us-east-1 (the orchestrator's default). Treat the default + # as "use the mantle default" so consumers only ever set model_id. An explicit + # us-west-2 (valid for GPT-5.4) is honored as-is. + REGION="${REQUESTED_REGION}" + if [ -z "${REGION}" ] || [ "${REGION}" = "us-east-1" ]; then + REGION="us-east-2" + fi + echo "Effective mantle region: ${REGION}" + echo "region=${REGION}" >> "$GITHUB_OUTPUT" + + - uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Configure AWS credentials (OIDC) + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{ inputs.bedrock_role_arn }} + aws-region: ${{ steps.region.outputs.region }} + + # Inlined because actions/checkout above checks out the *consumer's* repo, not this one, + # so a relative script path would resolve against the consumer (and not exist for + # external consumers). Writing to /tmp avoids the cross-repo path dependency. + - name: Set up sticky-comment helper + run: | + cat > /tmp/sticky-comment.sh <<'STICKY_EOF' + #!/usr/bin/env bash + # Find-or-update a single PR comment identified by STICKY_MARKER. + # Usage: sticky-comment.sh + # Env: GH_TOKEN, GITHUB_REPOSITORY, STICKY_MARKER + set -euo pipefail + PR_NUMBER="${1:?pr number required}" + BODY_FILE="${2:?body file required}" + : "${GH_TOKEN:?GH_TOKEN must be set}" + : "${GITHUB_REPOSITORY:?GITHUB_REPOSITORY must be set}" + : "${STICKY_MARKER:?STICKY_MARKER must be set}" + [ -r "$BODY_FILE" ] || { echo "Body file not readable: $BODY_FILE" >&2; exit 1; } + EXISTING_ID=$( + gh api "repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/comments" --paginate \ + | jq -r --arg marker "$STICKY_MARKER" \ + '.[] | select(.body | startswith($marker)) | .id' \ + | head -1 + ) + if [ -n "$EXISTING_ID" ] && ! [[ "$EXISTING_ID" =~ ^[0-9]+$ ]]; then + echo "::warning::EXISTING_ID is non-numeric ($EXISTING_ID); creating a new comment instead" + EXISTING_ID="" + fi + PAYLOAD=$(jq -Rs --arg key body '{($key): .}' < "$BODY_FILE") + if [ -n "$EXISTING_ID" ]; then + echo "Updating existing sticky comment $EXISTING_ID" + echo "$PAYLOAD" | gh api "repos/${GITHUB_REPOSITORY}/issues/comments/${EXISTING_ID}" \ + -X PATCH --input - + else + echo "Creating new sticky comment on PR #${PR_NUMBER}" + echo "$PAYLOAD" | gh api "repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/comments" \ + -X POST --input - + fi + STICKY_EOF + chmod +x /tmp/sticky-comment.sh + + - name: Set up mantle review helper (SigV4 streaming) + run: | + cat > /tmp/mantle_review.py <<'PY_EOF' + #!/usr/bin/env python3 + # Call the OpenAI Responses API on bedrock-mantle with SigV4 (assumed-role creds, + # no bearer token) and STREAM the result. Stdlib + botocore only — no requests. + import json, os, sys, urllib.request, urllib.error + from botocore.session import Session + from botocore.auth import SigV4Auth + from botocore.awsrequest import AWSRequest + + region = os.environ["MANTLE_REGION"] + model = os.environ["MODEL_ID"] + max_out = int(os.environ.get("MAX_OUTPUT_TOKENS") or "2048") + effort = os.environ.get("REASONING_EFFORT") or "medium" + endpoint = f"https://bedrock-mantle.{region}.api.aws/openai/v1/responses" + + with open("/tmp/prompt.txt", encoding="utf-8") as f: + user_input = f.read() + + body = { + "model": model, + "instructions": "You are a senior code reviewer. Output GitHub-flavored markdown. Be concise.", + "input": user_input, + "stream": True, + "max_output_tokens": max_out, + "reasoning": {"effort": effort}, + } + payload = json.dumps(body).encode("utf-8") + + creds = Session().get_credentials() + if creds is None: + print("::error::No AWS credentials available for SigV4 signing", file=sys.stderr) + sys.exit(1) + + # SigV4: the signing service name is "bedrock" (NOT "bedrock-mantle"); the IAM + # authorization prefix is bedrock-mantle:* — the two deliberately differ. + signed = AWSRequest( + method="POST", url=endpoint, data=payload, + headers={"Content-Type": "application/json", "Accept": "text/event-stream"}, + ) + SigV4Auth(creds.get_frozen_credentials(), "bedrock", region).add_auth(signed) + + req = urllib.request.Request(endpoint, data=payload, method="POST") + for k, v in signed.headers.items(): + req.add_header(k, v) + + text_parts, usage = [], {} + try: + with urllib.request.urlopen(req, timeout=900) as resp: + for raw in resp: + line = raw.decode("utf-8", "replace").rstrip("\r\n") + if not line.startswith("data:"): + continue + data = line[5:].strip() + if not data or data == "[DONE]": + continue + try: + evt = json.loads(data) + except json.JSONDecodeError: + continue + etype = evt.get("type", "") + if etype == "response.output_text.delta": + text_parts.append(evt.get("delta", "")) + elif etype in ("response.completed", "response.incomplete"): + usage = (evt.get("response") or {}).get("usage") or {} + elif etype == "response.error" or etype == "error": + print(f"::error::mantle stream error: {json.dumps(evt)[:500]}", file=sys.stderr) + except urllib.error.HTTPError as e: + detail = e.read().decode("utf-8", "replace")[:1000] + print(f"::error::mantle HTTP {e.code}: {detail}", file=sys.stderr) + sys.exit(1) + + review = "".join(text_parts).strip() + with open("/tmp/review.md", "w", encoding="utf-8") as f: + f.write(review if review else "_(model returned no text)_\n") + + # Responses API usage shape: input_tokens, output_tokens, total_tokens, with + # output_tokens_details.reasoning_tokens for the (uncapped) reasoning spend. + it = usage.get("input_tokens", "?") + ot = usage.get("output_tokens", "?") + tt = usage.get("total_tokens", "?") + rt = (usage.get("output_tokens_details") or {}).get("reasoning_tokens", "?") + with open("/tmp/usage.txt", "w", encoding="utf-8") as f: + f.write(f"in: {it} · out: {ot} (reasoning: {rt}) · total: {tt}") + print("Tokens:", open("/tmp/usage.txt", encoding="utf-8").read()) + + if not review: + sys.exit(1) + PY_EOF + chmod +x /tmp/mantle_review.py + + - name: Resolve PR number + id: pr + env: + ISSUE_PR_URL: ${{ github.event.issue.pull_request.url }} + run: | + set -euo pipefail + case "${GITHUB_EVENT_NAME}" in + pull_request|pull_request_target|pull_request_review|pull_request_review_comment) + PR_NUM="${{ github.event.pull_request.number }}" + ;; + issue_comment) + if [ -n "${ISSUE_PR_URL}" ]; then + PR_NUM="${{ github.event.issue.number }}" + else + echo "::error::codex-executor needs a PR context; issue_comment fired on a non-PR issue" + exit 1 + fi + ;; + *) + echo "::error::codex-executor doesn't support event type: ${GITHUB_EVENT_NAME}" + exit 1 + ;; + esac + if ! [[ "${PR_NUM}" =~ ^[0-9]+$ ]]; then + echo "::error::Resolved PR number is not a positive integer: ${PR_NUM}" + exit 1 + fi + echo "number=${PR_NUM}" >> "$GITHUB_OUTPUT" + + - name: Post in-progress sticky comment + env: + GH_TOKEN: ${{ github.token }} + PR_NUMBER: ${{ steps.pr.outputs.number }} + run: | + set -euo pipefail + RUN_LINK="https://github.com/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}" + { + printf "%s\n\n" "${STICKY_MARKER}" + printf "🔄 **Codex review in progress** — model: \`%s\`\n\n" "${MODEL_ID}" + printf "Run: [#%s](%s)\n" "${GITHUB_RUN_ID}" "${RUN_LINK}" + } > /tmp/comment.md + /tmp/sticky-comment.sh "${PR_NUMBER}" /tmp/comment.md + + - name: Gather PR diff + id: diff + env: + GH_TOKEN: ${{ github.token }} + PR_NUMBER: ${{ steps.pr.outputs.number }} + run: | + set -euo pipefail + gh pr diff "${PR_NUMBER}" > /tmp/pr.diff + ORIG=$(wc -c < /tmp/pr.diff) + if [ "${ORIG}" -gt "${MAX_DIFF_CHARS}" ]; then + head -c "${MAX_DIFF_CHARS}" /tmp/pr.diff | sed '$d' > /tmp/pr.diff.trimmed + printf "\n\n[TRUNCATED — diff was %s chars, kept first ~%s]\n" "${ORIG}" "${MAX_DIFF_CHARS}" >> /tmp/pr.diff.trimmed + mv /tmp/pr.diff.trimmed /tmp/pr.diff + fi + echo "diff_chars=$(wc -c < /tmp/pr.diff)" >> "$GITHUB_OUTPUT" + + - name: Build prompt + env: + REVIEW_PROMPT: ${{ inputs.prompt }} + run: | + set -euo pipefail + # workflow_call always passes the caller's value, even when empty, so the input + # default above is never reached from the orchestrator. Fall back here. + if [ -z "${REVIEW_PROMPT}" ]; then + REVIEW_PROMPT="Review this PR diff. Flag anything that looks wrong, risky, or worth a second look: bad assumptions, missing edge cases, design problems, security issues. Skip praise. If it is clean, say so in one line." + fi + { + printf "%s\n\n" "${REVIEW_PROMPT}" + printf -- "--- BEGIN DIFF ---\n" + cat /tmp/pr.diff + printf -- "\n--- END DIFF ---\n" + } > /tmp/prompt.txt + + - name: Invoke bedrock-mantle (OpenAI Responses API, streaming) + id: invoke + env: + MANTLE_REGION: ${{ steps.region.outputs.region }} + run: | + set -euo pipefail + # botocore provides SigV4 signing. AWS CLI v2 on the runner is a self-contained + # bundle that does NOT expose botocore to system python, so install it explicitly. + python3 -m pip install --quiet --disable-pip-version-check botocore + python3 /tmp/mantle_review.py + echo "has_review=true" >> "$GITHUB_OUTPUT" + + - name: Update sticky comment with review + if: steps.invoke.outputs.has_review == 'true' + env: + GH_TOKEN: ${{ github.token }} + PR_NUMBER: ${{ steps.pr.outputs.number }} + run: | + set -euo pipefail + USAGE=$(cat /tmp/usage.txt) + RUN_LINK="https://github.com/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}" + { + printf "%s\n\n" "${STICKY_MARKER}" + printf "## 🤖 Codex Review — \`%s\`\n\n" "${MODEL_ID}" + cat /tmp/review.md + printf "\n\n---\n" + printf "Run: [#%s](%s) · tokens: %s\n" "${GITHUB_RUN_ID}" "${RUN_LINK}" "${USAGE}" + } > /tmp/comment.md + /tmp/sticky-comment.sh "${PR_NUMBER}" /tmp/comment.md + + - name: Report failure into sticky comment + if: failure() && steps.invoke.outputs.has_review != 'true' + env: + GH_TOKEN: ${{ github.token }} + PR_NUMBER: ${{ steps.pr.outputs.number }} + run: | + set -euo pipefail + RUN_LINK="https://github.com/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}" + { + printf "%s\n\n" "${STICKY_MARKER}" + printf "## ❌ Codex Review failed — \`%s\`\n\n" "${MODEL_ID}" + printf "The review job failed before producing output. See the run for details.\n\n" + printf "Run: [#%s](%s)\n" "${GITHUB_RUN_ID}" "${RUN_LINK}" + } > /tmp/comment.md + /tmp/sticky-comment.sh "${PR_NUMBER}" /tmp/comment.md diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 9a85b43..61ad6f4 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -81,27 +81,31 @@ flowchart TD route{"route job
inspect model_id"} claude_exec["claude-executor.yml
(Anthropic models)"] generic_exec["bedrock-generic-executor.yml
(any other Bedrock model)"] + codex_exec["codex-executor.yml
(OpenAI GPT/Codex)"] orch --> route route -->|"empty or *.anthropic.*"| claude_exec + route -->|"openai.*"| codex_exec route -->|"anything else"| generic_exec end subgraph upstreams["External calls"] anth_api["Anthropic API
(api.anthropic.com)"] bedrock["AWS Bedrock
(Converse / claude-code-action use_bedrock)"] + mantle["bedrock-mantle
(OpenAI Responses API, SigV4)"] end consumer_wf -->|"workflow_call"| orch claude_exec -->|"provider=anthropic-api"| anth_api claude_exec -->|"provider=anthropic-bedrock
via OIDC"| bedrock generic_exec -->|"OIDC + Converse"| bedrock + codex_exec -->|"OIDC + SigV4 streaming"| mantle classDef new fill:#e8f5e9,stroke:#1b5e20,stroke-width:1px - class generic_exec,route new + class generic_exec,route,codex_exec new ``` -Nodes shaded green are new in v3. The `route` job uses an anchored regex (`^([a-z]+\.)?anthropic\.`) so model IDs that merely contain the substring `"anthropic."` (e.g. `us.not-anthropic.foo`) are **not** misrouted. +Nodes shaded green are new in v3 (`codex-executor` added later for the OpenAI/mantle path). The `route` job uses anchored regexes (`^([a-z]+\.)?anthropic\.`, `^([a-z]+\.)?openai\.`) so model IDs that merely contain the substring `"anthropic."`/`"openai."` (e.g. `us.not-anthropic.foo`) are **not** misrouted. ### Routing table @@ -109,9 +113,10 @@ Nodes shaded green are new in v3. The `route` job uses an anchored regex (`^([a- | ------------------------------------------------------------- | ------------------- | --------------------------------- | | _(empty)_ | `anthropic-api` | `claude-executor.yml` | | `anthropic.*` or `.anthropic.*` | `anthropic-bedrock` | `claude-executor.yml` | -| Anything else (`us.amazon.*`, `meta.*`, `mistral.*`, ...) | n/a | `bedrock-generic-executor.yml` | +| `openai.*` (e.g. `openai.gpt-5.5`, `openai.gpt-5.4`) | `openai-mantle` | `codex-executor.yml` | +| Anything else (`us.amazon.*`, `meta.*`, `mistral.*`, ...) | `bedrock-generic` | `bedrock-generic-executor.yml` | -The non-matching executor job is **skipped** by job-level `if:` conditional, not "ran and exited" — billable runner time is zero for the skipped path. +The non-matching executor jobs are **skipped** by job-level `if:` conditional, not "ran and exited" — billable runner time is zero for the skipped paths. --- @@ -144,6 +149,17 @@ Includes a pre-flight API health check on the `anthropic-api` path that skips gr Uses the Bedrock Converse API, which is model-family-agnostic. Maintains its own sticky comment via an inline helper (a setup step writes a bash find-or-update helper to `/tmp` so the logic isn't dependent on the consumer's checkout), replicating the auto-update behavior `claude-code-action` provides for free on the Anthropic path. Accepts a `sticky_namespace` input so multiple review jobs on the same PR don't clobber each other. +#### 4. `codex-executor.yml` (OpenAI GPT/Codex via bedrock-mantle) + +For `openai.*` models (GPT-5.5, GPT-5.4), which are **not** on bedrock-runtime — there is no `InvokeModel`/`Converse`. They are served only by the separate **bedrock-mantle** endpoint exposing the OpenAI Responses API (`https://bedrock-mantle.{region}.api.aws/openai/v1/responses`). The executor: + +- Signs the request with **SigV4 using the assumed-role credentials** — signing service name `bedrock` (not `bedrock-mantle`; the IAM authorization prefix `bedrock-mantle:*` and the signing name deliberately differ). **No bearer token / Bedrock API key** is used (`AWS_BEARER_TOKEN_BEDROCK` stays unset), consistent with dotCMS's OIDC-only posture. +- **Streams** Server-Sent Events and accumulates `response.output_text.delta` chunks. Streaming is mandatory: GPT-5.x reasons before emitting, so a non-streaming call buffers and looks like a 60–100s hang. +- Remaps the orchestrator's `us-east-1` default to **us-east-2** (mantle is not offered in us-east-1; GPT-5.4 also accepts an explicit us-west-2). +- Reuses the same `/tmp` sticky-comment helper and `sticky_namespace` input as the generic executor. Exposes `reasoning_effort` (default `medium`). Note `max_output_tokens` caps only the visible answer, **not** reasoning tokens. + +IAM: `bedrock-mantle:CreateInference` scoped by a `bedrock-mantle:Model StringLike openai.*` condition (no per-model ARNs exist on mantle). Provisioned in dotCMS/Infrastructure-as-code `bedrock-code-review/` (#7836). No AWS Marketplace subscription is required — mantle bills on-demand directly. + --- ## Key benefits diff --git a/CLAUDE.md b/CLAUDE.md index 7e8d0c9..18d847e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -15,6 +15,7 @@ The repository implements a reusable workflow architecture with model-aware rout - **Claude Orchestrator** (`.github/workflows/claude-orchestrator.yml`): Lightweight wrapper that handles @claude mention detection AND routes to the appropriate executor based on `model_id`. Consumer repositories call this with `trigger_mode: interactive` or `trigger_mode: automatic`. Exactly one executor runs per call. - **Claude Executor** (`.github/workflows/claude-executor.yml`): Execution engine for Anthropic models — runs `anthropics/claude-code-action@v1` either against the direct Anthropic API (`provider: anthropic-api`, default) or via AWS Bedrock (`provider: anthropic-bedrock`, OIDC + `use_bedrock=true`). - **Bedrock Generic Executor** (`.github/workflows/bedrock-generic-executor.yml`): Execution engine for **any non-Anthropic Bedrock model** (Amazon Nova, Meta Llama, Mistral, Cohere, AI21). Uses the Bedrock Converse API and maintains its own sticky comment via an inlined helper (set up to `/tmp` at job start, so no cross-repo path dependency). +- **Codex Executor** (`.github/workflows/codex-executor.yml`): Execution engine for **OpenAI GPT/Codex models** (`openai.gpt-5.5`, `openai.gpt-5.4`). These are served only by the separate **bedrock-mantle** endpoint (OpenAI Responses API), not bedrock-runtime — so it calls mantle directly with a **SigV4-signed streaming** request (signing service `bedrock`, no bearer token / API key) and accumulates the SSE deltas. Streaming is mandatory (GPT-5.x reasons before emitting). Remaps the `us-east-1` default region to `us-east-2` (mantle is not in us-east-1). Reuses the same `/tmp` sticky-comment helper. See dotCMS/Infrastructure-as-code#7836. - **Deployment Guard** (`.github/workflows/deployment-guard.yml`): Reusable workflow for validating deployment changes with configurable rules. Features organization-based bypass for trusted members, file allowlist validation, image-only change detection, and comprehensive image validation (format, repository, version pattern, registry existence, anti-downgrade logic). ### Multi-model Routing (v3) @@ -26,9 +27,10 @@ The orchestrator picks the executor by inspecting `model_id`: | _(empty / unset)_ | `claude-executor` (`anthropic-api`)| Backward-compat default; requires `ANTHROPIC_API_KEY` secret | | `*.anthropic.*` (e.g. `global.anthropic.claude-sonnet-4-6`) | `claude-executor` (`anthropic-bedrock`) | Requires `bedrock_role_arn` input | | `anthropic.*` (bare) | `claude-executor` (`anthropic-bedrock`) | Requires `bedrock_role_arn` input | +| `openai.*` (e.g. `openai.gpt-5.5`, `openai.gpt-5.4`) | `codex-executor` | Requires `bedrock_role_arn`; mantle path (us-east-2) | | Anything else (Nova, Llama, Mistral, …) | `bedrock-generic-executor` | Requires `bedrock_role_arn` input | -The match for the Anthropic family is anchored: `^([a-z]+\.)?anthropic\.` — so a model ID that merely contains the substring `anthropic.` (e.g. `us.not-anthropic.foo`) is **not** misrouted. +The matches for the Anthropic and OpenAI families are anchored: `^([a-z]+\.)?anthropic\.` and `^([a-z]+\.)?openai\.` — so a model ID that merely contains the substring `anthropic.`/`openai.` (e.g. `us.not-anthropic.foo`) is **not** misrouted. `openai.*` is checked before the generic fallback. ### Sticky Comments From 2f93c7b40dd15be135947b9b3a5dbd0ccaccd0ed Mon Sep 17 00:00:00 2001 From: Stephen Freudenthaler Date: Tue, 9 Jun 2026 19:39:47 -0400 Subject: [PATCH 2/5] fix(codex): correct mantle path to /v1, set store=false, fix region rationale Verified live against the R&D account Models API: - Endpoint path is /v1/responses, NOT /openai/v1/responses (the latter 404s). Fixed the endpoint URL + header/docs. - The bedrock-mantle endpoint IS available in us-east-1, but GPT-5.5/5.4 are served only in us-east-2 (us-east-1 lists gpt-oss but no gpt-5*). The us-east-1 -> us-east-2 remap stays (routes to where the models live), but the rationale comments are corrected (it's model availability, not endpoint availability). - Send store=false for zero data retention. The Responses API defaults store=true, which retains input+output 30 days in-region for previous_response_id chaining; single-shot review doesn't need state and shouldn't retain the diff. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/claude-orchestrator.yml | 5 ++-- .github/workflows/codex-executor.yml | 30 ++++++++++++++++------- ARCHITECTURE.md | 5 ++-- CLAUDE.md | 2 +- 4 files changed, 28 insertions(+), 14 deletions(-) diff --git a/.github/workflows/claude-orchestrator.yml b/.github/workflows/claude-orchestrator.yml index 51b1140..6b4240a 100644 --- a/.github/workflows/claude-orchestrator.yml +++ b/.github/workflows/claude-orchestrator.yml @@ -219,8 +219,9 @@ jobs: with: model_id: ${{ inputs.model_id }} bedrock_role_arn: ${{ inputs.bedrock_role_arn }} - # aws_region defaults to us-east-1; the codex executor remaps that to us-east-2 - # (mantle is not offered in us-east-1). Consumers only set model_id. + # aws_region defaults to us-east-1; the codex executor remaps that to us-east-2, + # where GPT-5.5/5.4 are served (the mantle endpoint exists in us-east-1 but the + # models do not yet). Consumers only set model_id. aws_region: ${{ inputs.aws_region }} prompt: ${{ inputs.prompt }} sticky_namespace: ${{ inputs.sticky_namespace }} diff --git a/.github/workflows/codex-executor.yml b/.github/workflows/codex-executor.yml index cbb24da..6194e75 100644 --- a/.github/workflows/codex-executor.yml +++ b/.github/workflows/codex-executor.yml @@ -3,9 +3,9 @@ # # PURPOSE: Reviews PRs using OpenAI GPT/Codex models (GPT-5.5, GPT-5.4) served by the # AWS **bedrock-mantle** endpoint — a separate SigV4 service exposing the OpenAI Responses -# API (https://bedrock-mantle.{region}.api.aws/openai/v1). These models are NOT on -# bedrock-runtime: there is no InvokeModel/Converse, so the generic Bedrock executor cannot -# reach them. Maintains the same auto-updating sticky comment as the other executors. +# API (https://bedrock-mantle.{region}.api.aws/v1). These models are NOT on bedrock-runtime: +# there is no InvokeModel/Converse, so the generic Bedrock executor cannot reach them. +# Maintains the same auto-updating sticky comment as the other executors. # # AUTH: OIDC -> assumed role -> SigV4 (signing service name "bedrock", NOT "bedrock-mantle"; # the IAM authorization prefix is still bedrock-mantle:*). No Bedrock API key / bearer token @@ -17,8 +17,15 @@ # the whole response and looks like a 60-100s hang. We stream Server-Sent Events and accumulate # response.output_text.delta chunks. max_output_tokens does NOT cap reasoning tokens. # -# REGION: GPT-5.5 is us-east-2 only; GPT-5.4 adds us-west-2. Neither is in us-east-1 (the -# orchestrator's default), so this executor remaps the us-east-1 default to us-east-2. +# REGION: the bedrock-mantle ENDPOINT exists in many regions including us-east-1, BUT the +# GPT-5.5/5.4 MODELS are currently served only in us-east-2 — verified live via the Models +# API (us-east-1 lists gpt-oss but no gpt-5*; us-east-2 lists openai.gpt-5.5 / openai.gpt-5.4). +# So this executor remaps the us-east-1 default to us-east-2 where the models live. GPT-5.4 is +# also offered in us-west-2. (Re-check the Models API if AWS expands GPT-5.x to us-east-1.) +# +# DATA RETENTION: the Responses API defaults store=true, which retains input+output for 30 +# days in-region for previous_response_id chaining. Code review is single-shot, so we send +# store=false for zero data retention. # # ROUTING: claude-orchestrator.yml selects this executor automatically when model_id matches # openai.* — consumers shouldn't usually call it directly. @@ -41,7 +48,7 @@ on: required: true type: string aws_region: - description: 'AWS region for the mantle endpoint. us-east-1 (orchestrator default) is remapped to us-east-2, since mantle is not offered in us-east-1.' + description: 'AWS region for the mantle endpoint. The us-east-1 default is remapped to us-east-2, where GPT-5.5/5.4 are served (the endpoint exists in us-east-1 but the models do not yet).' required: false type: string default: 'us-east-1' @@ -106,8 +113,9 @@ jobs: REQUESTED_REGION: ${{ inputs.aws_region }} run: | set -euo pipefail - # Mantle is not offered in us-east-1 (the orchestrator's default). Treat the default - # as "use the mantle default" so consumers only ever set model_id. An explicit + # The mantle endpoint exists in us-east-1, but GPT-5.5/5.4 are served only in + # us-east-2 (us-east-1 lists gpt-oss but no gpt-5*). Treat the us-east-1 default as + # "send to where the models live" so consumers only set model_id. An explicit # us-west-2 (valid for GPT-5.4) is honored as-is. REGION="${REQUESTED_REGION}" if [ -z "${REGION}" ] || [ "${REGION}" = "us-east-1" ]; then @@ -181,7 +189,7 @@ jobs: model = os.environ["MODEL_ID"] max_out = int(os.environ.get("MAX_OUTPUT_TOKENS") or "2048") effort = os.environ.get("REASONING_EFFORT") or "medium" - endpoint = f"https://bedrock-mantle.{region}.api.aws/openai/v1/responses" + endpoint = f"https://bedrock-mantle.{region}.api.aws/v1/responses" with open("/tmp/prompt.txt", encoding="utf-8") as f: user_input = f.read() @@ -193,6 +201,10 @@ jobs: "stream": True, "max_output_tokens": max_out, "reasoning": {"effort": effort}, + # store=false → zero data retention. The Responses API defaults to true (retains + # input+output 30 days in-region for previous_response_id chaining); code review + # is single-shot, so we don't need state and don't want the diff retained. + "store": False, } payload = json.dumps(body).encode("utf-8") diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 61ad6f4..b93f5aa 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -151,11 +151,12 @@ Uses the Bedrock Converse API, which is model-family-agnostic. Maintains its own #### 4. `codex-executor.yml` (OpenAI GPT/Codex via bedrock-mantle) -For `openai.*` models (GPT-5.5, GPT-5.4), which are **not** on bedrock-runtime — there is no `InvokeModel`/`Converse`. They are served only by the separate **bedrock-mantle** endpoint exposing the OpenAI Responses API (`https://bedrock-mantle.{region}.api.aws/openai/v1/responses`). The executor: +For `openai.*` models (GPT-5.5, GPT-5.4), which are **not** on bedrock-runtime — there is no `InvokeModel`/`Converse`. They are served only by the separate **bedrock-mantle** endpoint exposing the OpenAI Responses API (`https://bedrock-mantle.{region}.api.aws/v1/responses`). The executor: - Signs the request with **SigV4 using the assumed-role credentials** — signing service name `bedrock` (not `bedrock-mantle`; the IAM authorization prefix `bedrock-mantle:*` and the signing name deliberately differ). **No bearer token / Bedrock API key** is used (`AWS_BEARER_TOKEN_BEDROCK` stays unset), consistent with dotCMS's OIDC-only posture. - **Streams** Server-Sent Events and accumulates `response.output_text.delta` chunks. Streaming is mandatory: GPT-5.x reasons before emitting, so a non-streaming call buffers and looks like a 60–100s hang. -- Remaps the orchestrator's `us-east-1` default to **us-east-2** (mantle is not offered in us-east-1; GPT-5.4 also accepts an explicit us-west-2). +- Remaps the orchestrator's `us-east-1` default to **us-east-2**, where GPT-5.5/5.4 are served. The mantle *endpoint* exists in us-east-1, but the *models* are not there yet (verified via the Models API: us-east-1 lists gpt-oss but no gpt-5*). GPT-5.4 also accepts an explicit us-west-2. +- Sends `store: false` on each request for **zero data retention** — the Responses API otherwise defaults `store: true`, retaining input+output for 30 days in-region for `previous_response_id` chaining, which single-shot review doesn't need. - Reuses the same `/tmp` sticky-comment helper and `sticky_namespace` input as the generic executor. Exposes `reasoning_effort` (default `medium`). Note `max_output_tokens` caps only the visible answer, **not** reasoning tokens. IAM: `bedrock-mantle:CreateInference` scoped by a `bedrock-mantle:Model StringLike openai.*` condition (no per-model ARNs exist on mantle). Provisioned in dotCMS/Infrastructure-as-code `bedrock-code-review/` (#7836). No AWS Marketplace subscription is required — mantle bills on-demand directly. diff --git a/CLAUDE.md b/CLAUDE.md index 18d847e..a1c01e1 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -15,7 +15,7 @@ The repository implements a reusable workflow architecture with model-aware rout - **Claude Orchestrator** (`.github/workflows/claude-orchestrator.yml`): Lightweight wrapper that handles @claude mention detection AND routes to the appropriate executor based on `model_id`. Consumer repositories call this with `trigger_mode: interactive` or `trigger_mode: automatic`. Exactly one executor runs per call. - **Claude Executor** (`.github/workflows/claude-executor.yml`): Execution engine for Anthropic models — runs `anthropics/claude-code-action@v1` either against the direct Anthropic API (`provider: anthropic-api`, default) or via AWS Bedrock (`provider: anthropic-bedrock`, OIDC + `use_bedrock=true`). - **Bedrock Generic Executor** (`.github/workflows/bedrock-generic-executor.yml`): Execution engine for **any non-Anthropic Bedrock model** (Amazon Nova, Meta Llama, Mistral, Cohere, AI21). Uses the Bedrock Converse API and maintains its own sticky comment via an inlined helper (set up to `/tmp` at job start, so no cross-repo path dependency). -- **Codex Executor** (`.github/workflows/codex-executor.yml`): Execution engine for **OpenAI GPT/Codex models** (`openai.gpt-5.5`, `openai.gpt-5.4`). These are served only by the separate **bedrock-mantle** endpoint (OpenAI Responses API), not bedrock-runtime — so it calls mantle directly with a **SigV4-signed streaming** request (signing service `bedrock`, no bearer token / API key) and accumulates the SSE deltas. Streaming is mandatory (GPT-5.x reasons before emitting). Remaps the `us-east-1` default region to `us-east-2` (mantle is not in us-east-1). Reuses the same `/tmp` sticky-comment helper. See dotCMS/Infrastructure-as-code#7836. +- **Codex Executor** (`.github/workflows/codex-executor.yml`): Execution engine for **OpenAI GPT/Codex models** (`openai.gpt-5.5`, `openai.gpt-5.4`). These are served only by the separate **bedrock-mantle** endpoint (OpenAI Responses API), not bedrock-runtime — so it calls mantle directly with a **SigV4-signed streaming** request (signing service `bedrock`, no bearer token / API key) and accumulates the SSE deltas. Streaming is mandatory (GPT-5.x reasons before emitting). Remaps the `us-east-1` default to `us-east-2`, where GPT-5.5/5.4 are served (the mantle endpoint exists in us-east-1 but the models are not there yet — verified via the Models API). Sends `store: false` for zero data retention. Reuses the same `/tmp` sticky-comment helper. See dotCMS/Infrastructure-as-code#7836. - **Deployment Guard** (`.github/workflows/deployment-guard.yml`): Reusable workflow for validating deployment changes with configurable rules. Features organization-based bypass for trusted members, file allowlist validation, image-only change detection, and comprehensive image validation (format, repository, version pattern, registry existence, anti-downgrade logic). ### Multi-model Routing (v3) From 3254a11d1de22a8a4d362422a0cb71a61c300122 Mon Sep 17 00:00:00 2001 From: Stephen Freudenthaler Date: Tue, 9 Jun 2026 19:52:47 -0400 Subject: [PATCH 3/5] feat(codex): switch to OpenAI SDK with short-term bearer token Per the auth-path decision, move the executor from hand-rolled SigV4 + urllib to the OpenAI SDK. The SDK can't consume SigV4, so authenticate with a SHORT-TERM Bedrock bearer token minted in-process from the OIDC-assumed-role session via aws-bedrock-token-generator. The token is OIDC-derived (no long-lived secret), expires with the role session (<=1h), is not a stored resource (nothing to clean up), and is never written to env/disk/logs. - Replace the SigV4 helper with client.responses.create(stream=True); accumulate response.output_text.delta, read usage off response.completed. - Install openai + aws-bedrock-token-generator at runtime (was botocore). - Keep store=false, reasoning_effort, region remap, sticky comment. - Docs (CLAUDE.md/ARCHITECTURE.md) updated to the SDK + bearer path. Requires bedrock-mantle:CallWithBearerToken (SHORT_TERM) in the IAM PR. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/codex-executor.yml | 138 ++++++++++++--------------- ARCHITECTURE.md | 8 +- CLAUDE.md | 2 +- 3 files changed, 66 insertions(+), 82 deletions(-) diff --git a/.github/workflows/codex-executor.yml b/.github/workflows/codex-executor.yml index 6194e75..63a95fe 100644 --- a/.github/workflows/codex-executor.yml +++ b/.github/workflows/codex-executor.yml @@ -2,20 +2,23 @@ # Codex Executor Workflow (Reusable) # # PURPOSE: Reviews PRs using OpenAI GPT/Codex models (GPT-5.5, GPT-5.4) served by the -# AWS **bedrock-mantle** endpoint — a separate SigV4 service exposing the OpenAI Responses -# API (https://bedrock-mantle.{region}.api.aws/v1). These models are NOT on bedrock-runtime: +# AWS **bedrock-mantle** endpoint — the OpenAI Responses API at +# https://bedrock-mantle.{region}.api.aws/v1. These models are NOT on bedrock-runtime: # there is no InvokeModel/Converse, so the generic Bedrock executor cannot reach them. # Maintains the same auto-updating sticky comment as the other executors. # -# AUTH: OIDC -> assumed role -> SigV4 (signing service name "bedrock", NOT "bedrock-mantle"; -# the IAM authorization prefix is still bedrock-mantle:*). No Bedrock API key / bearer token -# is used — AWS_BEARER_TOKEN_BEDROCK is intentionally left UNSET so botocore signs with the -# assumed-role credentials. This posture was confirmed by the auth-path spike on -# dotCMS/Infrastructure-as-code#7836. +# AUTH: OIDC -> assumed role -> a SHORT-TERM Bedrock bearer token minted from that session +# (aws-bedrock-token-generator `provide_token()`), passed to the OpenAI SDK. The OpenAI SDK +# requires a bearer token (it cannot consume SigV4 directly), but a short-term key keeps the +# OIDC-only posture: it is derived from the current STS credentials with NO long-lived secret, +# NO stored resource to clean up, and a lifetime of min(12h, role session). Our role's +# max_session_duration is 1h, so each token dies within the hour and lives only in this +# ephemeral runner's memory. We never export it to GITHUB_ENV / disk / logs. IAM requires +# bedrock-mantle:CallWithBearerToken (scoped to BearerTokenType=SHORT_TERM). See #7836. # # STREAMING IS MANDATORY: GPT-5.5 reasons before emitting output; a non-streaming call buffers -# the whole response and looks like a 60-100s hang. We stream Server-Sent Events and accumulate -# response.output_text.delta chunks. max_output_tokens does NOT cap reasoning tokens. +# the whole response and looks like a 60-100s hang. We stream the Responses API and accumulate +# response.output_text.delta events. max_output_tokens does NOT cap reasoning tokens. # # REGION: the bedrock-mantle ENDPOINT exists in many regions including us-east-1, BUT the # GPT-5.5/5.4 MODELS are currently served only in us-east-2 — verified live via the Models @@ -174,93 +177,72 @@ jobs: STICKY_EOF chmod +x /tmp/sticky-comment.sh - - name: Set up mantle review helper (SigV4 streaming) + - name: Set up mantle review helper (OpenAI SDK + short-term bearer) run: | cat > /tmp/mantle_review.py <<'PY_EOF' #!/usr/bin/env python3 - # Call the OpenAI Responses API on bedrock-mantle with SigV4 (assumed-role creds, - # no bearer token) and STREAM the result. Stdlib + botocore only — no requests. - import json, os, sys, urllib.request, urllib.error - from botocore.session import Session - from botocore.auth import SigV4Auth - from botocore.awsrequest import AWSRequest + # Review a PR diff via the OpenAI Responses API on bedrock-mantle, using the OpenAI + # SDK with a SHORT-TERM Bedrock bearer token minted from the assumed-role session. + import os, sys + from aws_bedrock_token_generator import provide_token + from openai import OpenAI region = os.environ["MANTLE_REGION"] model = os.environ["MODEL_ID"] max_out = int(os.environ.get("MAX_OUTPUT_TOKENS") or "2048") effort = os.environ.get("REASONING_EFFORT") or "medium" - endpoint = f"https://bedrock-mantle.{region}.api.aws/v1/responses" with open("/tmp/prompt.txt", encoding="utf-8") as f: user_input = f.read() - body = { - "model": model, - "instructions": "You are a senior code reviewer. Output GitHub-flavored markdown. Be concise.", - "input": user_input, - "stream": True, - "max_output_tokens": max_out, - "reasoning": {"effort": effort}, - # store=false → zero data retention. The Responses API defaults to true (retains - # input+output 30 days in-region for previous_response_id chaining); code review - # is single-shot, so we don't need state and don't want the diff retained. - "store": False, - } - payload = json.dumps(body).encode("utf-8") - - creds = Session().get_credentials() - if creds is None: - print("::error::No AWS credentials available for SigV4 signing", file=sys.stderr) - sys.exit(1) + # Mint a short-term bearer token from the current STS (OIDC-assumed-role) credentials. + # provide_token() is a local signing operation (no API call, no stored resource); the + # token inherits the role's permissions, lives only in this process, and expires with + # the role session (max_session_duration=1h here; <=12h cap). It is NEVER written to + # GITHUB_ENV, disk, or logs — keep it a local variable. AWS_REGION is set to the + # mantle region by configure-aws-credentials, so the token is signed for that region. + token = provide_token() - # SigV4: the signing service name is "bedrock" (NOT "bedrock-mantle"); the IAM - # authorization prefix is bedrock-mantle:* — the two deliberately differ. - signed = AWSRequest( - method="POST", url=endpoint, data=payload, - headers={"Content-Type": "application/json", "Accept": "text/event-stream"}, - ) - SigV4Auth(creds.get_frozen_credentials(), "bedrock", region).add_auth(signed) - - req = urllib.request.Request(endpoint, data=payload, method="POST") - for k, v in signed.headers.items(): - req.add_header(k, v) + client = OpenAI(base_url=f"https://bedrock-mantle.{region}.api.aws/v1", api_key=token) - text_parts, usage = [], {} + text_parts, usage = [], None try: - with urllib.request.urlopen(req, timeout=900) as resp: - for raw in resp: - line = raw.decode("utf-8", "replace").rstrip("\r\n") - if not line.startswith("data:"): - continue - data = line[5:].strip() - if not data or data == "[DONE]": - continue - try: - evt = json.loads(data) - except json.JSONDecodeError: - continue - etype = evt.get("type", "") - if etype == "response.output_text.delta": - text_parts.append(evt.get("delta", "")) - elif etype in ("response.completed", "response.incomplete"): - usage = (evt.get("response") or {}).get("usage") or {} - elif etype == "response.error" or etype == "error": - print(f"::error::mantle stream error: {json.dumps(evt)[:500]}", file=sys.stderr) - except urllib.error.HTTPError as e: - detail = e.read().decode("utf-8", "replace")[:1000] - print(f"::error::mantle HTTP {e.code}: {detail}", file=sys.stderr) + # store=False → zero data retention. The Responses API otherwise defaults store=True + # (retains input+output 30 days in-region for previous_response_id chaining); review + # is single-shot, so we don't need state and don't want the diff retained. + stream = client.responses.create( + model=model, + instructions="You are a senior code reviewer. Output GitHub-flavored markdown. Be concise.", + input=user_input, + max_output_tokens=max_out, + reasoning={"effort": effort}, + store=False, + stream=True, + ) + for event in stream: + etype = getattr(event, "type", "") + if etype == "response.output_text.delta": + text_parts.append(event.delta) + elif etype == "response.completed": + usage = getattr(event.response, "usage", None) + elif etype in ("response.failed", "error"): + print(f"::error::mantle stream error: {etype}", file=sys.stderr) + except Exception as e: # noqa: BLE001 — surface any SDK/transport error into the job log + print(f"::error::mantle request failed: {type(e).__name__}: {str(e)[:500]}", file=sys.stderr) sys.exit(1) review = "".join(text_parts).strip() with open("/tmp/review.md", "w", encoding="utf-8") as f: f.write(review if review else "_(model returned no text)_\n") - # Responses API usage shape: input_tokens, output_tokens, total_tokens, with + # Responses API usage: input_tokens, output_tokens, total_tokens, plus # output_tokens_details.reasoning_tokens for the (uncapped) reasoning spend. - it = usage.get("input_tokens", "?") - ot = usage.get("output_tokens", "?") - tt = usage.get("total_tokens", "?") - rt = (usage.get("output_tokens_details") or {}).get("reasoning_tokens", "?") + def _g(obj, name, default="?"): + return getattr(obj, name, default) if obj is not None else default + it = _g(usage, "input_tokens") + ot = _g(usage, "output_tokens") + tt = _g(usage, "total_tokens") + rt = _g(_g(usage, "output_tokens_details", None), "reasoning_tokens") with open("/tmp/usage.txt", "w", encoding="utf-8") as f: f.write(f"in: {it} · out: {ot} (reasoning: {rt}) · total: {tt}") print("Tokens:", open("/tmp/usage.txt", encoding="utf-8").read()) @@ -352,9 +334,11 @@ jobs: MANTLE_REGION: ${{ steps.region.outputs.region }} run: | set -euo pipefail - # botocore provides SigV4 signing. AWS CLI v2 on the runner is a self-contained - # bundle that does NOT expose botocore to system python, so install it explicitly. - python3 -m pip install --quiet --disable-pip-version-check botocore + # OpenAI SDK (Responses API client) + the AWS token generator (mints the short-term + # bearer; pulls botocore for SigV4 signing). AWS CLI v2 on the runner is a + # self-contained bundle that doesn't expose these to system python, so install them. + python3 -m pip install --quiet --disable-pip-version-check \ + 'openai>=1.66' 'aws-bedrock-token-generator>=1.0' python3 /tmp/mantle_review.py echo "has_review=true" >> "$GITHUB_OUTPUT" diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index b93f5aa..e90e706 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -92,14 +92,14 @@ flowchart TD subgraph upstreams["External calls"] anth_api["Anthropic API
(api.anthropic.com)"] bedrock["AWS Bedrock
(Converse / claude-code-action use_bedrock)"] - mantle["bedrock-mantle
(OpenAI Responses API, SigV4)"] + mantle["bedrock-mantle
(OpenAI Responses API)"] end consumer_wf -->|"workflow_call"| orch claude_exec -->|"provider=anthropic-api"| anth_api claude_exec -->|"provider=anthropic-bedrock
via OIDC"| bedrock generic_exec -->|"OIDC + Converse"| bedrock - codex_exec -->|"OIDC + SigV4 streaming"| mantle + codex_exec -->|"OIDC + short-term bearer (SDK)"| mantle classDef new fill:#e8f5e9,stroke:#1b5e20,stroke-width:1px class generic_exec,route,codex_exec new @@ -153,13 +153,13 @@ Uses the Bedrock Converse API, which is model-family-agnostic. Maintains its own For `openai.*` models (GPT-5.5, GPT-5.4), which are **not** on bedrock-runtime — there is no `InvokeModel`/`Converse`. They are served only by the separate **bedrock-mantle** endpoint exposing the OpenAI Responses API (`https://bedrock-mantle.{region}.api.aws/v1/responses`). The executor: -- Signs the request with **SigV4 using the assumed-role credentials** — signing service name `bedrock` (not `bedrock-mantle`; the IAM authorization prefix `bedrock-mantle:*` and the signing name deliberately differ). **No bearer token / Bedrock API key** is used (`AWS_BEARER_TOKEN_BEDROCK` stays unset), consistent with dotCMS's OIDC-only posture. +- Calls mantle with the **OpenAI SDK**, authenticated by a **short-term Bedrock bearer token** minted in-process from the assumed-role session via `aws-bedrock-token-generator` (`provide_token()`). The SDK can't consume SigV4 directly, but a short-term key keeps the OIDC-only posture: it's derived from the current STS credentials (no long-lived secret), inherits the role's permissions, expires with the role session (≤1h here, ≤12h cap), is **not a stored resource** (nothing to delete), and is never written to env/disk/logs. No marketplace subscription; no long-term API key. - **Streams** Server-Sent Events and accumulates `response.output_text.delta` chunks. Streaming is mandatory: GPT-5.x reasons before emitting, so a non-streaming call buffers and looks like a 60–100s hang. - Remaps the orchestrator's `us-east-1` default to **us-east-2**, where GPT-5.5/5.4 are served. The mantle *endpoint* exists in us-east-1, but the *models* are not there yet (verified via the Models API: us-east-1 lists gpt-oss but no gpt-5*). GPT-5.4 also accepts an explicit us-west-2. - Sends `store: false` on each request for **zero data retention** — the Responses API otherwise defaults `store: true`, retaining input+output for 30 days in-region for `previous_response_id` chaining, which single-shot review doesn't need. - Reuses the same `/tmp` sticky-comment helper and `sticky_namespace` input as the generic executor. Exposes `reasoning_effort` (default `medium`). Note `max_output_tokens` caps only the visible answer, **not** reasoning tokens. -IAM: `bedrock-mantle:CreateInference` scoped by a `bedrock-mantle:Model StringLike openai.*` condition (no per-model ARNs exist on mantle). Provisioned in dotCMS/Infrastructure-as-code `bedrock-code-review/` (#7836). No AWS Marketplace subscription is required — mantle bills on-demand directly. +IAM: `bedrock-mantle:CreateInference` scoped by `bedrock-mantle:Model StringLike openai.*` (no per-model ARNs exist on mantle), plus `bedrock-mantle:CallWithBearerToken` scoped to `BearerTokenType=SHORT_TERM` (required to use any API key; short-term-only blocks long-term keys). Provisioned in dotCMS/Infrastructure-as-code `bedrock-code-review/` (#7836). No AWS Marketplace subscription is required — mantle bills on-demand directly. --- diff --git a/CLAUDE.md b/CLAUDE.md index a1c01e1..177c758 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -15,7 +15,7 @@ The repository implements a reusable workflow architecture with model-aware rout - **Claude Orchestrator** (`.github/workflows/claude-orchestrator.yml`): Lightweight wrapper that handles @claude mention detection AND routes to the appropriate executor based on `model_id`. Consumer repositories call this with `trigger_mode: interactive` or `trigger_mode: automatic`. Exactly one executor runs per call. - **Claude Executor** (`.github/workflows/claude-executor.yml`): Execution engine for Anthropic models — runs `anthropics/claude-code-action@v1` either against the direct Anthropic API (`provider: anthropic-api`, default) or via AWS Bedrock (`provider: anthropic-bedrock`, OIDC + `use_bedrock=true`). - **Bedrock Generic Executor** (`.github/workflows/bedrock-generic-executor.yml`): Execution engine for **any non-Anthropic Bedrock model** (Amazon Nova, Meta Llama, Mistral, Cohere, AI21). Uses the Bedrock Converse API and maintains its own sticky comment via an inlined helper (set up to `/tmp` at job start, so no cross-repo path dependency). -- **Codex Executor** (`.github/workflows/codex-executor.yml`): Execution engine for **OpenAI GPT/Codex models** (`openai.gpt-5.5`, `openai.gpt-5.4`). These are served only by the separate **bedrock-mantle** endpoint (OpenAI Responses API), not bedrock-runtime — so it calls mantle directly with a **SigV4-signed streaming** request (signing service `bedrock`, no bearer token / API key) and accumulates the SSE deltas. Streaming is mandatory (GPT-5.x reasons before emitting). Remaps the `us-east-1` default to `us-east-2`, where GPT-5.5/5.4 are served (the mantle endpoint exists in us-east-1 but the models are not there yet — verified via the Models API). Sends `store: false` for zero data retention. Reuses the same `/tmp` sticky-comment helper. See dotCMS/Infrastructure-as-code#7836. +- **Codex Executor** (`.github/workflows/codex-executor.yml`): Execution engine for **OpenAI GPT/Codex models** (`openai.gpt-5.5`, `openai.gpt-5.4`). These are served only by the separate **bedrock-mantle** endpoint (OpenAI Responses API), not bedrock-runtime — so it calls mantle with the **OpenAI SDK** authenticated by a **short-term Bedrock bearer token** minted in-process from the OIDC-assumed-role session (`aws-bedrock-token-generator`), and streams `response.output_text.delta` events. The token is OIDC-derived (no long-lived secret, nothing to clean up, ≤1h via the role session) and never written to env/disk/logs; IAM grants `bedrock-mantle:CallWithBearerToken` scoped to `BearerTokenType=SHORT_TERM`. Streaming is mandatory (GPT-5.x reasons before emitting). Remaps the `us-east-1` default to `us-east-2`, where GPT-5.5/5.4 are served (the mantle endpoint exists in us-east-1 but the models are not there yet — verified via the Models API). Sends `store: false` for zero data retention. Reuses the same `/tmp` sticky-comment helper. See dotCMS/Infrastructure-as-code#7836. - **Deployment Guard** (`.github/workflows/deployment-guard.yml`): Reusable workflow for validating deployment changes with configurable rules. Features organization-based bypass for trusted members, file allowlist validation, image-only change detection, and comprehensive image validation (format, repository, version pattern, registry existence, anti-downgrade logic). ### Multi-model Routing (v3) From 9a26dddadaddb6f1d4cb6f610a6be81445afa391 Mon Sep 17 00:00:00 2001 From: Stephen Freudenthaler Date: Tue, 9 Jun 2026 19:59:06 -0400 Subject: [PATCH 4/5] chore(codex): use uv (PEP 723 inline deps) instead of pip Run the mantle review script via `uv run` with dependencies declared as PEP 723 inline script metadata, instead of a system `pip install`. Adds an astral-sh/setup-uv step (with cache). uv provisions openai + aws-bedrock-token-generator (and Python) into an ephemeral cached env; no system-python pollution, faster and reproducible across runs. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/codex-executor.yml | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/.github/workflows/codex-executor.yml b/.github/workflows/codex-executor.yml index 63a95fe..3636a85 100644 --- a/.github/workflows/codex-executor.yml +++ b/.github/workflows/codex-executor.yml @@ -137,6 +137,11 @@ jobs: role-to-assume: ${{ inputs.bedrock_role_arn }} aws-region: ${{ steps.region.outputs.region }} + - name: Set up uv + uses: astral-sh/setup-uv@v6 + with: + enable-cache: true + # Inlined because actions/checkout above checks out the *consumer's* repo, not this one, # so a relative script path would resolve against the consumer (and not exist for # external consumers). Writing to /tmp avoids the cross-repo path dependency. @@ -181,8 +186,14 @@ jobs: run: | cat > /tmp/mantle_review.py <<'PY_EOF' #!/usr/bin/env python3 + # /// script + # requires-python = ">=3.9" + # dependencies = ["openai>=1.66", "aws-bedrock-token-generator>=1.0"] + # /// # Review a PR diff via the OpenAI Responses API on bedrock-mantle, using the OpenAI # SDK with a SHORT-TERM Bedrock bearer token minted from the assumed-role session. + # Dependencies are declared above as PEP 723 inline metadata; `uv run` provisions + # them (and Python) into an ephemeral, cached environment — no system pip install. import os, sys from aws_bedrock_token_generator import provide_token from openai import OpenAI @@ -334,12 +345,11 @@ jobs: MANTLE_REGION: ${{ steps.region.outputs.region }} run: | set -euo pipefail - # OpenAI SDK (Responses API client) + the AWS token generator (mints the short-term - # bearer; pulls botocore for SigV4 signing). AWS CLI v2 on the runner is a - # self-contained bundle that doesn't expose these to system python, so install them. - python3 -m pip install --quiet --disable-pip-version-check \ - 'openai>=1.66' 'aws-bedrock-token-generator>=1.0' - python3 /tmp/mantle_review.py + # Dependencies (openai SDK + aws-bedrock-token-generator) are declared inline in the + # script via PEP 723 metadata; `uv run` resolves them into an ephemeral, cached env + # and provisions Python as needed — no system pip install, no venv to manage. Job env + # vars and the OIDC-issued AWS credentials are inherited by the subprocess. + uv run /tmp/mantle_review.py echo "has_review=true" >> "$GITHUB_OUTPUT" - name: Update sticky comment with review From 2de45c329df4a9f65bb9790d0613eadb33c20ba2 Mon Sep 17 00:00:00 2001 From: Stephen Freudenthaler Date: Tue, 9 Jun 2026 20:33:15 -0400 Subject: [PATCH 5/5] docs(codex): explain why the review script is inlined (not a separate file) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Document that inlining mantle_review.py to /tmp is deliberate: this is a cross-repo reusable workflow, so actions/checkout pulls the consumer's repo and a relative local action/script reference resolves against the consumer, not ai-workflows. Shipping it as a real file would need a fully-qualified composite action or a self-checkout at a pinned ref — avoided to keep the executor self-contained and version-locked. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/codex-executor.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/codex-executor.yml b/.github/workflows/codex-executor.yml index 3636a85..985eead 100644 --- a/.github/workflows/codex-executor.yml +++ b/.github/workflows/codex-executor.yml @@ -183,6 +183,12 @@ jobs: chmod +x /tmp/sticky-comment.sh - name: Set up mantle review helper (OpenAI SDK + short-term bearer) + # Inlined to /tmp on purpose (same reason as the sticky-comment helper above): this is + # a cross-repo REUSABLE workflow, so actions/checkout pulls the *consumer's* repo, and a + # relative local action/script reference (`./...`) resolves against the consumer, not + # ai-workflows. Shipping the script as a real file would require a fully-qualified + # composite action or a self-checkout of ai-workflows at a pinned ref — deliberately + # avoided here to keep this executor self-contained and version-locked. See PR #31. run: | cat > /tmp/mantle_review.py <<'PY_EOF' #!/usr/bin/env python3