splunk · kubabuczak · Mar 2, 2026 · Mar 2, 2026 · Mar 2, 2026 · Mar 2, 2026
diff --git a/.github/workflows/flaky-test-analysis-workflow.yml b/.github/workflows/flaky-test-analysis-workflow.yml
@@ -0,0 +1,107 @@
+name: Flaky Test Analysis
+permissions:
+  contents: read
+  actions: read
+on:
+  push:
+    branches:
+      - CSPL-3763-add-flay-test-analysis-tool
+  workflow_dispatch:
+    inputs:
+      start_date:
+        description: 'Start date (YYYY-MM-DD). Defaults to 7 days before yesterday.'
+        required: false
+      end_date:
+        description: 'End date (YYYY-MM-DD). Defaults to yesterday.'
+        required: false
+      top_n:
+        description: 'Number of flakiest tests to report'
+        required: false
+        default: '20'
+      window_size:
+        description: 'Window size in days for flip rate calculation'
+        required: false
+        default: '1'
+      branch:
+        description: 'Only include runs from this branch (e.g. develop). All branches if empty.'
+        required: false
+jobs:
+  analyze:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+
+      - name: Install dependencies
+        run: pip install flaky-tests-detection
+
+      - name: Run flaky test analysis
+        id: run
+        env:
+          GH_TOKEN: ${{ github.token }}
+          START_DATE: ${{ inputs.start_date }}
+          END_DATE: ${{ inputs.end_date }}
+          TOP_N: ${{ inputs.top_n }}
+          WINDOW_SIZE: ${{ inputs.window_size }}
+          BRANCH: ${{ inputs.branch }}
+          RESULTS_FILE: flaky-results.txt
+        run: |
+          ./tools/flaky-test-analysis.sh
+          echo "start=${START_DATE:-$(date -u -d '7 days ago' +%Y-%m-%d)}" >> "$GITHUB_OUTPUT"
+          echo "end=${END_DATE:-$(date -u -d 'yesterday' +%Y-%m-%d)}" >> "$GITHUB_OUTPUT"
+
+      - name: Generate job summary
+        if: always()
+        run: |
+          {
+            echo "## Flaky Test Analysis"
+            echo ""
+            echo "**Date range:** \`${{ steps.run.outputs.start }}\` .. \`${{ steps.run.outputs.end }}\`"
+            BRANCH_INFO="${{ inputs.branch }}"
+            if [[ -n "$BRANCH_INFO" ]]; then
+              echo "**Branch:** \`${BRANCH_INFO}\` | **Top N:** ${{ inputs.top_n || '20' }} | **Window size:** ${{ inputs.window_size || '1' }} day(s)"
+            else
+              echo "**Top N:** ${{ inputs.top_n || '20' }} | **Window size:** ${{ inputs.window_size || '1' }} day(s)"
+            fi
+            echo ""
+
+            if [[ ! -f flaky-results.txt ]]; then
+              echo "> No results produced. Check the workflow logs."
+              exit 0
+            fi
+
+            # Extract the score lines (format: suite::[It] description --- score: N)
+            flaky_lines=$(grep -E ' --- score: ' flaky-results.txt || true)
+
+            if [[ -z "$flaky_lines" ]]; then
+              echo "> No flaky tests detected in this period."
+            else
+              echo "| Score | Suite | Test |"
+              echo "|------:|-------|------|"
+              echo "$flaky_lines" | while IFS= read -r line; do
+                score=$(echo "$line" | sed -E 's/.* --- score: (.+)/\1/')
+                suite=$(echo "$line" | sed -E 's/^([^:]+)::.*/\1/')
+                test_name=$(echo "$line" | sed -E 's/^[^:]+::\[It\] (.*) --- score:.*/\1/')
+                echo "| ${score} | ${suite} | ${test_name} |"
+              done
+            fi
+
+          } >> "$GITHUB_STEP_SUMMARY"
+
+      - name: Generate failure stats summary
+        if: always()
+        run: python3 tools/test-failure-stats.py ./junit-reports >> "$GITHUB_STEP_SUMMARY"
+
+      - name: Upload results
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: flaky-test-results-${{ steps.run.outputs.start }}-to-${{ steps.run.outputs.end }}
+          path: |
+            flaky-results.txt
+            *_flip_rate_*.png
+          if-no-files-found: ignore
+          retention-days: 30
diff --git a/.gitignore b/.gitignore
@@ -93,6 +93,7 @@ release-*
 deploy/olm-certified
 report-junit*.xml
 unit_test*.xml
+junit-reports/
 # Bias Language Linter
 .biaslanguage/
 bin/

diff --git a/tools/flaky-test-analysis-mpl-config.py b/tools/flaky-test-analysis-mpl-config.py
@@ -0,0 +1,43 @@
+"""Matplotlib customizations for flaky-test-analysis heatmaps.
+
+Monkey-patches matplotlib to:
+- Increase font sizes for readability
+- Wrap long y-axis tick labels
+- Override the library's hardcoded title fontsize
+"""
+import textwrap
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+
+WRAP_WIDTH = 60
+TITLE_FONTSIZE = 64
+
+mpl.rcParams.update({
+    "font.size": 50,
+    "xtick.labelsize": 40,
+    "ytick.labelsize": 40,
+    "axes.labelsize": 60,
+})
+
+_original_savefig = plt.savefig
+_original_title = plt.title
+
+
+def _title_with_fontsize(*args, **kwargs):
+    kwargs["fontsize"] = TITLE_FONTSIZE
+    return _original_title(*args, **kwargs)
+
+
+def _savefig_with_wrapped_labels(*args, **kwargs):
+    fig = plt.gcf()
+    for ax in fig.axes:
+        labels = ax.get_yticklabels()
+        if labels:
+            ticks = ax.get_yticks()
+            ax.set_yticks(ticks)
+            ax.set_yticklabels([textwrap.fill(l.get_text(), WRAP_WIDTH) for l in labels])
+    _original_savefig(*args, **kwargs)
+
+
+plt.title = _title_with_fontsize
+plt.savefig = _savefig_with_wrapped_labels
diff --git a/tools/flaky-test-analysis.sh b/tools/flaky-test-analysis.sh
@@ -0,0 +1,218 @@
+#!/usr/bin/env bash
+#
+# Download JUnit test report artifacts from GitHub Actions and run flaky test detection.
+#
+# Usage:
+#   ./tools/flaky-test-analysis.sh [start-date] [end-date] [--branch <name>] [--dry-run] [--skip-analysis]
+#
+# Dates default to the last 7 full days (excluding today) if not provided.
+#
+# Examples:
+#   ./tools/flaky-test-analysis.sh                          # last 7 full days
+#   ./tools/flaky-test-analysis.sh 2026-02-01 2026-02-26
+#   ./tools/flaky-test-analysis.sh --branch develop         # only runs on develop
+#   ./tools/flaky-test-analysis.sh --dry-run                # preview only
+#   ./tools/flaky-test-analysis.sh --skip-analysis          # download only, no flaky detection
+#
+# Requires: gh (GitHub CLI), authenticated via 'gh auth login' or GH_TOKEN.
+# Optional: flaky-tests-detection (pip install flaky-tests-detection)
+#
+set -euo pipefail
+
+REPO="${REPO:-splunk/splunk-operator}"
+OUTPUT_DIR="${OUTPUT_DIR:-./junit-reports}"
+ARTIFACT_PATTERN="${ARTIFACT_PATTERN:-^test-report-.*}"
+TOP_N="${TOP_N:-20}"
+WINDOW_SIZE="${WINDOW_SIZE:-1}"
+
+if [[ "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then
+  sed -n '2,17p' "$0" | sed 's/^# \?//'
+  exit 0
+fi
+
+DRY_RUN="${DRY_RUN:-false}"
+SKIP_ANALYSIS="${SKIP_ANALYSIS:-false}"
+BRANCH="${BRANCH:-}"
+START_DATE="${START_DATE:-}"
+END_DATE="${END_DATE:-}"
+
+_next_is_branch=false
+for arg in "$@"; do
+  if [[ "$_next_is_branch" == "true" ]]; then
+    BRANCH="$arg"
+    _next_is_branch=false
+  elif [[ "$arg" == "--branch" ]]; then
+    _next_is_branch=true
+  elif [[ "$arg" == --branch=* ]]; then
+    BRANCH="${arg#--branch=}"
+  elif [[ "$arg" == "--dry-run" ]]; then
+    DRY_RUN=true
+  elif [[ "$arg" == "--skip-analysis" ]]; then
+    SKIP_ANALYSIS=true
+  elif [[ -z "$START_DATE" ]]; then
+    START_DATE="$arg"
+  elif [[ -z "$END_DATE" ]]; then
+    END_DATE="$arg"
+  fi
+done
+
+END_DATE="${END_DATE:-$(date -u -v-1d +%Y-%m-%d 2>/dev/null || date -u -d 'yesterday' +%Y-%m-%d)}"
+START_DATE="${START_DATE:-$(date -u -v-7d +%Y-%m-%d 2>/dev/null || date -u -d '7 days ago' +%Y-%m-%d)}"
+
+# Derive WINDOW_COUNT from the date range (one window per WINDOW_SIZE days)
+if date -v+0d +%s &>/dev/null; then
+  _start_epoch=$(date -jf "%Y-%m-%d" "$START_DATE" +%s)
+  _end_epoch=$(date -jf "%Y-%m-%d" "$END_DATE" +%s)
+else
+  _start_epoch=$(date -d "$START_DATE" +%s)
+  _end_epoch=$(date -d "$END_DATE" +%s)
+fi
+WINDOW_COUNT=$(( ((_end_epoch - _start_epoch) / 86400 + WINDOW_SIZE) / WINDOW_SIZE ))
+
+if ! command -v gh &>/dev/null; then
+  echo "ERROR: 'gh' (GitHub CLI) is required. Install from https://cli.github.com/" >&2
+  exit 1
+fi
+
+echo "Repository:  $REPO"
+echo "Date range:  $START_DATE .. $END_DATE"
+if [[ -n "$BRANCH" ]]; then
+  echo "Branch:      $BRANCH"
+fi
+echo "Output:      $OUTPUT_DIR"
+echo ""
+
+BRANCH_FILTER=""
+if [[ -n "$BRANCH" ]]; then
+  BRANCH_FILTER="| select(.workflow_run.head_branch == \"${BRANCH}\")"
+fi
+ART_FILTER=".artifacts[] | select(.name | test(\"${ARTIFACT_PATTERN}\")) | select(.expired == false) ${BRANCH_FILTER}"
+DATE_FILTER="select(.created_at >= \"${START_DATE}T00:00:00Z\" and .created_at <= \"${END_DATE}T23:59:59Z\")"
+
+echo "Fetching artifact list..."
+
+artifacts_json="[]"
+page=1
+while true; do
+  response=$(gh api "repos/${REPO}/actions/artifacts?per_page=100&page=${page}" 2>/dev/null)
+
+  page_artifacts=$(echo "$response" | \
+    jq "[${ART_FILTER} | ${DATE_FILTER} | {id, name, created_at, workflow_run_id: .workflow_run.id}]")
+  artifacts_json=$(echo "$artifacts_json" "$page_artifacts" | jq -s 'add')
+
+  oldest=$(echo "$response" | jq -r '.artifacts[-1].created_at // empty')
+  if [[ -z "$oldest" || "$oldest" < "${START_DATE}T00:00:00Z" ]]; then
+    break
+  fi
+
+  count=$(echo "$response" | jq '.artifacts | length')
+  if [[ "$count" -lt 100 ]]; then
+    break
+  fi
+
+  page=$((page + 1))
+done
+
+artifact_count=$(echo "$artifacts_json" | jq 'length')
+echo "Found $artifact_count matching artifacts."
+echo ""
+
+if [[ "$artifact_count" -eq 0 ]]; then
+  echo "No artifacts matched. Check your date range and artifact retention settings."
+  exit 0
+fi
+
+echo "--------------------------------------------------------------"
+printf "%-12s %-50s %s\n" "RUN ID" "ARTIFACT NAME" "CREATED"
+echo "--------------------------------------------------------------"
+echo "$artifacts_json" | jq -r '.[] | "\(.workflow_run_id)\t\(.name)\t\(.created_at)"' | \
+  while IFS=$'\t' read -r run_id name created; do
+    printf "%-12s %-50s %s\n" "$run_id" "$name" "${created%%T*}"
+  done
+echo "--------------------------------------------------------------"
+echo ""
+
+if [[ "$DRY_RUN" == "true" ]]; then
+  echo "(dry run - skipping downloads)"
+  exit 0
+fi
+
+mkdir -p "$OUTPUT_DIR"
+
+tmpdir=$(mktemp -d)
+trap 'rm -rf "$tmpdir"' EXIT
+
+dl_current=0
+echo "$artifacts_json" | jq -r '.[] | "\(.id)\t\(.name)\t\(.workflow_run_id)"' | \
+  while IFS=$'\t' read -r art_id art_name run_id; do
+    dl_current=$((dl_current + 1))
+    echo "  [${dl_current}/${artifact_count}] ${art_name} (run ${run_id})..."
+    zipfile="${tmpdir}/${art_id}.zip"
+    if gh api "repos/${REPO}/actions/artifacts/${art_id}/zip" > "$zipfile" 2>/dev/null; then
+      unzip -qo "$zipfile" -d "$tmpdir/extract" 2>/dev/null
+      for f in "$tmpdir/extract"/*.xml; do
+        [[ -f "$f" ]] || continue
+        base=$(basename "$f" .xml)
+        mv "$f" "${OUTPUT_DIR}/${run_id}-${base}.xml"
+      done
+      rm -rf "$tmpdir/extract" "$zipfile"
+    else
+      echo "    FAILED to download artifact ${art_id}"
+      rm -f "$zipfile"
+    fi
+  done
+
+total_files=$(find "$OUTPUT_DIR" -name '*.xml' 2>/dev/null | wc -l | tr -d ' ')
+echo ""
+echo "Done. ${total_files} XML files saved to ${OUTPUT_DIR}/"
+
+echo ""
+echo "Normalizing classnames (stripping Ginkgo random suffixes)..."
+for f in "${OUTPUT_DIR}"/*.xml; do
+  [[ -f "$f" ]] || continue
+  sed -E 's/classname="Running (.+)-[a-z0-9]{3}"/classname="\1"/g' "$f" > "${f}.tmp" && mv "${f}.tmp" "$f"
+done
+echo "Done."
+
+if [[ "$SKIP_ANALYSIS" == "true" ]]; then
+  exit 0
+fi
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+PYTHON=""
+if [[ -x "${SCRIPT_DIR}/.venv/bin/python" ]]; then
+  PYTHON="${SCRIPT_DIR}/.venv/bin/python"
+elif command -v python3 &>/dev/null; then
+  PYTHON="python3"
+else
+  echo ""
+  echo "Python not found. Install with: cd tools && poetry install"
+  exit 0
+fi
+
+if ! "$PYTHON" -c "from flaky_tests_detection.check_flakes import main" &>/dev/null; then
+  echo ""
+  echo "flaky-tests-detection not installed. Install with: cd tools && poetry install"
+  exit 0
+fi
+
+echo ""
+echo "================================================================"
+echo "Running flaky test detection..."
+echo "  Window size:  ${WINDOW_SIZE} days"
+echo "  Window count: ${WINDOW_COUNT}"
+echo "  Top N:        ${TOP_N}"
+echo "================================================================"
+echo ""
+
+RESULTS_FILE="${RESULTS_FILE:-flaky-results.txt}"
+
+PYTHONPATH="${SCRIPT_DIR}:${PYTHONPATH:-}" "$PYTHON" -c \
+  "import importlib; importlib.import_module('flaky-test-analysis-mpl-config'); from flaky_tests_detection.check_flakes import main; main()" \
+  --junit-files="${OUTPUT_DIR}" \
+  --grouping-option=days \
+  --window-size="${WINDOW_SIZE}" \
+  --window-count="${WINDOW_COUNT}" \
+  --top-n="${TOP_N}" \
+  --heatmap \
+  2>&1 | tee "$RESULTS_FILE"