From d2b26916a1aaea545cd8db160bc85607218723b2 Mon Sep 17 00:00:00 2001 From: Patryk Matuszak Date: Mon, 15 Jun 2026 10:27:11 +0200 Subject: [PATCH 1/4] microshift-ci-doctor: add openshift-ci MCP and widen analysis budget The microshift-ci plugin's prow-job skill now performs evidence-cited root cause analysis (sosreport extraction, source correlation, causal chains) and consults job history through the openshift-ci MCP server: - Download a pinned, checksum-verified openshift-ci-mcp release binary and register it as the "openshift-ci" stdio MCP (read-only Sippy/ Release-Controller/Search.CI access, no credentials). Failures are non-fatal: the skills record the absence in their analysis gaps. - Allow mcp__openshift-ci__* in the permission settings. - Doctor session budget: 45m/100 turns -> 60m/150 turns; step timeout 1h30m -> 2h15m. The deeper per-job analysis needs more wall clock than the old surface-level scan. --- ...e-tooling-microshift-ci-doctor-commands.sh | 39 ++++++++++++++++--- ...edge-tooling-microshift-ci-doctor-ref.yaml | 2 +- 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/ci-operator/step-registry/openshift/edge-tooling/microshift-ci/doctor/openshift-edge-tooling-microshift-ci-doctor-commands.sh b/ci-operator/step-registry/openshift/edge-tooling/microshift-ci/doctor/openshift-edge-tooling-microshift-ci-doctor-commands.sh index f31f09af255f1..7426c42b4e51a 100644 --- a/ci-operator/step-registry/openshift/edge-tooling/microshift-ci/doctor/openshift-edge-tooling-microshift-ci-doctor-commands.sh +++ b/ci-operator/step-registry/openshift/edge-tooling/microshift-ci/doctor/openshift-edge-tooling-microshift-ci-doctor-commands.sh @@ -196,7 +196,8 @@ configure_claude() { "Write(//tmp/**)", "Bash(bash plugins/microshift-ci/scripts/*)", "Bash(python3 plugins/microshift-ci/scripts/*)", - "Skill(microshift-ci:*)" + "Skill(microshift-ci:*)", + "mcp__openshift-ci__*" ] } } @@ -223,6 +224,31 @@ EOF else echo "WARNING: Jira API token or username not available. Jira MCP will not be available." fi + + # Configure the OpenShift CI MCP — read-only Sippy/Release-Controller/ + # Search.CI access used by the analysis skills for job history and + # known-regression context. No credentials involved. Failures are + # non-fatal: skills record the absence in their analysis gaps. + echo "Configuring OpenShift CI MCP..." + local -r ocimcp_version="v0.5.0" + local -r ocimcp_sha256="a9221011c8aded3108a89a9ee8fa19bcd86daed0582d997ff51913445d5eb53e" + local -r ocimcp_bin="${WORKDIR}/bin/openshift-ci-mcp" + mkdir -p "${WORKDIR}/bin" + if curl -sL --retry 3 -o "${ocimcp_bin}" \ + "https://github.com/openshift-eng/openshift-ci-mcp/releases/download/${ocimcp_version}/openshift-ci-mcp-linux-amd64" && + echo "${ocimcp_sha256} ${ocimcp_bin}" | sha256sum --check --quiet; then + chmod +x "${ocimcp_bin}" + claude mcp add --scope user --transport stdio openshift-ci -- "${ocimcp_bin}" --tools core,jobs,tests,prs,search + echo "Waiting for OpenShift CI MCP to become available..." + if wait_for_mcp_status "openshift-ci" "Connected"; then + echo "OpenShift CI MCP is available." + else + echo "WARNING: OpenShift CI MCP did not connect. Job history will not be available." + fi + else + echo "WARNING: Failed to download or verify openshift-ci-mcp ${ocimcp_version}. Job history will not be available." + rm -f "${ocimcp_bin}" + fi } # @@ -260,17 +286,20 @@ echo "Running automatic closing of duplicate rebase PRs..." --filter 'NO-ISSUE: rebase-release' echo "Automatic closing of duplicate rebase PRs completed" -# Run analysis on all releases and open rebase PRs (45m and 100 turns). +# Run analysis on all releases and open rebase PRs (60m and 150 turns). +# The deeper per-job root cause analysis (sosreport extraction, source +# correlation, causal chains) needs more wall clock than the old +# surface-level scan did. echo "Running Claude to analyze MicroShift CI jobs and pull requests..." CLAUDE_RC=0 -timeout 2700 claude \ +timeout 3600 claude \ --model "${CLAUDE_MODEL}" \ - --max-turns 100 \ + --max-turns 150 \ --output-format stream-json \ --plugin-dir "${PLUGIN_DIR}" \ -p "/microshift-ci:doctor ${RELEASE_VERSIONS}" \ --verbose 2>&1 | tee "${CLAUDE_DOCTOR_LOG}" || CLAUDE_RC=$? -check_claude_rc "${CLAUDE_RC}" "doctor" 45 +check_claude_rc "${CLAUDE_RC}" "doctor" 60 # Run bug creation for failed jobs (15m and 50 turns). echo "Running Claude to create bugs for failed jobs..." diff --git a/ci-operator/step-registry/openshift/edge-tooling/microshift-ci/doctor/openshift-edge-tooling-microshift-ci-doctor-ref.yaml b/ci-operator/step-registry/openshift/edge-tooling/microshift-ci/doctor/openshift-edge-tooling-microshift-ci-doctor-ref.yaml index 2764a288b6301..dff0606827b15 100644 --- a/ci-operator/step-registry/openshift/edge-tooling/microshift-ci/doctor/openshift-edge-tooling-microshift-ci-doctor-ref.yaml +++ b/ci-operator/step-registry/openshift/edge-tooling/microshift-ci/doctor/openshift-edge-tooling-microshift-ci-doctor-ref.yaml @@ -42,7 +42,7 @@ ref: requests: cpu: 2000m memory: 4Gi - timeout: 1h30m0s + timeout: 2h15m0s grace_period: 10m0s documentation: |- Analyzes MicroShift periodic jobs and pull requests using Claude AI. From 990920234c98a3781cb5424c262362cb534831a1 Mon Sep 17 00:00:00 2001 From: Patryk Matuszak Date: Mon, 15 Jun 2026 10:37:07 +0200 Subject: [PATCH 2/4] debug: test branch --- ...penshift-edge-tooling-microshift-ci-doctor-commands.sh | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/ci-operator/step-registry/openshift/edge-tooling/microshift-ci/doctor/openshift-edge-tooling-microshift-ci-doctor-commands.sh b/ci-operator/step-registry/openshift/edge-tooling/microshift-ci/doctor/openshift-edge-tooling-microshift-ci-doctor-commands.sh index 7426c42b4e51a..9d03faceee969 100644 --- a/ci-operator/step-registry/openshift/edge-tooling/microshift-ci/doctor/openshift-edge-tooling-microshift-ci-doctor-commands.sh +++ b/ci-operator/step-registry/openshift/edge-tooling/microshift-ci/doctor/openshift-edge-tooling-microshift-ci-doctor-commands.sh @@ -269,10 +269,16 @@ load_secrets configure_claude # Use the edge-tooling source pre-installed in the image -SRC_DIR="${EDGE_TOOLING_DIR}" +#SRC_DIR="${EDGE_TOOLING_DIR}" +#PLUGIN_DIR="${SRC_DIR}/plugins/microshift-ci" + +SRC_DIR=/tmp/edge-tooling PLUGIN_DIR="${SRC_DIR}/plugins/microshift-ci" +git clone https://github.com/pmtk/edge-tooling.git -b ci-doctor-rca "${SRC_DIR}" + cd "${SRC_DIR}" + # Configure the GitHub token for MicroShift repo operations { set +x; export GITHUB_TOKEN="${GITHUB_TOKEN_USHIFT}"; set -x; } From ee5838453d83e282fbec847312ad02c67c63ac59 Mon Sep 17 00:00:00 2001 From: Patryk Matuszak Date: Mon, 15 Jun 2026 20:21:11 +0200 Subject: [PATCH 3/4] use claude-latest --- ...edge-tooling-microshift-ci-doctor-commands.sh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/ci-operator/step-registry/openshift/edge-tooling/microshift-ci/doctor/openshift-edge-tooling-microshift-ci-doctor-commands.sh b/ci-operator/step-registry/openshift/edge-tooling/microshift-ci/doctor/openshift-edge-tooling-microshift-ci-doctor-commands.sh index 9d03faceee969..dec13c08a62d9 100644 --- a/ci-operator/step-registry/openshift/edge-tooling/microshift-ci/doctor/openshift-edge-tooling-microshift-ci-doctor-commands.sh +++ b/ci-operator/step-registry/openshift/edge-tooling/microshift-ci/doctor/openshift-edge-tooling-microshift-ci-doctor-commands.sh @@ -163,7 +163,7 @@ wait_for_mcp_status() { local -r attempts=$((timeout / interval)) for ((i=0; i Date: Mon, 15 Jun 2026 20:55:17 +0200 Subject: [PATCH 4/4] debug: run selected flows --- ...e-tooling-microshift-ci-doctor-commands.sh | 66 +++++++++---------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/ci-operator/step-registry/openshift/edge-tooling/microshift-ci/doctor/openshift-edge-tooling-microshift-ci-doctor-commands.sh b/ci-operator/step-registry/openshift/edge-tooling/microshift-ci/doctor/openshift-edge-tooling-microshift-ci-doctor-commands.sh index dec13c08a62d9..fbb833a875827 100644 --- a/ci-operator/step-registry/openshift/edge-tooling/microshift-ci/doctor/openshift-edge-tooling-microshift-ci-doctor-commands.sh +++ b/ci-operator/step-registry/openshift/edge-tooling/microshift-ci/doctor/openshift-edge-tooling-microshift-ci-doctor-commands.sh @@ -284,13 +284,13 @@ cd "${SRC_DIR}" # Close duplicate rebase PRs before running the analysis to prevent them # from being included in the analysis and bug creation. -echo "Running automatic closing of duplicate rebase PRs..." -"${PLUGIN_DIR}/scripts/prow-jobs-for-pull-requests.sh" \ - --mode close-duplicates \ - --execute \ - --author 'microshift-rebase-script[bot]' \ - --filter 'NO-ISSUE: rebase-release' -echo "Automatic closing of duplicate rebase PRs completed" +#echo "Running automatic closing of duplicate rebase PRs..." +#"${PLUGIN_DIR}/scripts/prow-jobs-for-pull-requests.sh" \ +# --mode close-duplicates \ +# --execute \ +# --author 'microshift-rebase-script[bot]' \ +# --filter 'NO-ISSUE: rebase-release' +#echo "Automatic closing of duplicate rebase PRs completed" # Run analysis on all releases and open rebase PRs (60m and 150 turns). # The deeper per-job root cause analysis (sosreport extraction, source @@ -308,28 +308,28 @@ timeout 3600 claude-latest \ check_claude_rc "${CLAUDE_RC}" "doctor" 60 # Run bug creation for failed jobs (15m and 50 turns). -echo "Running Claude to create bugs for failed jobs..." -CLAUDE_RC=0 -timeout 900 claude-latest \ - --model "${CLAUDE_MODEL}" \ - --max-turns 50 \ - --output-format stream-json \ - --plugin-dir "${PLUGIN_DIR}" \ - -p "/microshift-ci:create-bugs ${RELEASE_VERSIONS} --create" \ - --verbose 2>&1 | tee "${CLAUDE_CREATE_BUGS_LOG}" || CLAUDE_RC=$? -check_claude_rc "${CLAUDE_RC}" "create-bugs" 15 +#echo "Running Claude to create bugs for failed jobs..." +#CLAUDE_RC=0 +#timeout 900 claude-latest \ +# --model "${CLAUDE_MODEL}" \ +# --max-turns 50 \ +# --output-format stream-json \ +# --plugin-dir "${PLUGIN_DIR}" \ +# -p "/microshift-ci:create-bugs ${RELEASE_VERSIONS} --create" \ +# --verbose 2>&1 | tee "${CLAUDE_CREATE_BUGS_LOG}" || CLAUDE_RC=$? +#check_claude_rc "${CLAUDE_RC}" "create-bugs" 15 # Close stale bugs that are no longer linked to current failures (10m and 20 turns). -echo "Running Claude to close stale bugs..." -CLAUDE_RC=0 -timeout 600 claude-latest \ - --model "${CLAUDE_MODEL}" \ - --max-turns 20 \ - --output-format stream-json \ - --plugin-dir "${PLUGIN_DIR}" \ - -p "/microshift-ci:close-stale-bugs --close" \ - --verbose 2>&1 | tee "${CLAUDE_CLOSE_STALE_BUGS_LOG}" || CLAUDE_RC=$? -check_claude_rc "${CLAUDE_RC}" "close-stale-bugs" 10 +#echo "Running Claude to close stale bugs..." +#CLAUDE_RC=0 +#timeout 600 claude-latest \ +# --model "${CLAUDE_MODEL}" \ +# --max-turns 20 \ +# --output-format stream-json \ +# --plugin-dir "${PLUGIN_DIR}" \ +# -p "/microshift-ci:close-stale-bugs --close" \ +# --verbose 2>&1 | tee "${CLAUDE_CLOSE_STALE_BUGS_LOG}" || CLAUDE_RC=$? +#check_claude_rc "${CLAUDE_RC}" "close-stale-bugs" 10 # Run bug fix for test bugs (15m and 50 turns). # Dry-run mode only. @@ -358,9 +358,9 @@ check_claude_rc "${CLAUDE_RC}" "doctor-refresh" 10 # Now attempt to restart failed rebase PRs tests. If the restarted tests # complete successfully, the PR will be automatically merged. -echo "Running automatic restart of failed rebase PRs tests..." -"${PLUGIN_DIR}/scripts/prow-jobs-for-pull-requests.sh" \ - --mode restart \ - --execute \ - --author 'microshift-rebase-script[bot]' -echo "Automatic restart of failed rebase PRs tests completed" +#echo "Running automatic restart of failed rebase PRs tests..." +#"${PLUGIN_DIR}/scripts/prow-jobs-for-pull-requests.sh" \ +# --mode restart \ +# --execute \ +# --author 'microshift-rebase-script[bot]' +#echo "Automatic restart of failed rebase PRs tests completed"