From 84c33f342a31040d8243f3da3ebd894a501f8bd3 Mon Sep 17 00:00:00 2001
From: Jakub Buczak <jbuczak@splunk.com>
Date: Mon, 2 Mar 2026 13:59:04 +0100
Subject: [PATCH 01/10] CSPL-3763 Add Flaky Test Analysis Workflow and Script

- Introduced a new GitHub Actions workflow for flaky test analysis, allowing users to specify date ranges and parameters for reporting the flakiest tests.
- Added a Bash script to download JUnit test report artifacts and perform flaky test detection, enhancing the testing process and providing detailed results.
- The workflow includes steps for dependency installation, running the analysis, generating summaries, and uploading results as artifacts.
---
 .../flaky-test-analysis-workflow.yml          |  93 ++++++++
 tools/flaky-test-analysis.sh                  | 199 ++++++++++++++++++
 2 files changed, 292 insertions(+)
 create mode 100644 .github/workflows/flaky-test-analysis-workflow.yml
 create mode 100755 tools/flaky-test-analysis.sh

diff --git a/.github/workflows/flaky-test-analysis-workflow.yml b/.github/workflows/flaky-test-analysis-workflow.yml
new file mode 100644
index 000000000..3ab09ca51
--- /dev/null
+++ b/.github/workflows/flaky-test-analysis-workflow.yml
@@ -0,0 +1,93 @@
+name: Flaky Test Analysis
+permissions:
+  contents: read
+  actions: read
+on:
+  workflow_dispatch:
+    inputs:
+      start_date:
+        description: 'Start date (YYYY-MM-DD). Defaults to 7 days before yesterday.'
+        required: false
+      end_date:
+        description: 'End date (YYYY-MM-DD). Defaults to yesterday.'
+        required: false
+      top_n:
+        description: 'Number of flakiest tests to report'
+        required: false
+        default: '20'
+      window_size:
+        description: 'Window size in days for flip rate calculation'
+        required: false
+        default: '1'
+jobs:
+  analyze:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+
+      - name: Install dependencies
+        run: pip install flaky-tests-detection
+
+      - name: Run flaky test analysis
+        env:
+          GH_TOKEN: ${{ github.token }}
+          START_DATE: ${{ inputs.start_date }}
+          END_DATE: ${{ inputs.end_date }}
+          TOP_N: ${{ inputs.top_n }}
+          WINDOW_SIZE: ${{ inputs.window_size }}
+          RESULTS_FILE: flaky-results.txt
+        run: ./tools/flaky-test-analysis.sh
+
+      - name: Generate job summary
+        if: always()
+        run: |
+          {
+            echo "## Flaky Test Analysis"
+            echo ""
+            echo "**Date range:** \`${START_DATE:-last 7 days}\` .. \`${END_DATE:-yesterday}\`"
+            echo "**Top N:** ${{ inputs.top_n || '20' }} | **Window size:** ${{ inputs.window_size || '1' }} day(s)"
+            echo ""
+
+            if [[ ! -f flaky-results.txt ]]; then
+              echo "> No results produced. Check the workflow logs."
+              exit 0
+            fi
+
+            # Extract the score lines (format: suite::[It] description --- score: N)
+            flaky_lines=$(grep -E ' --- score: ' flaky-results.txt || true)
+
+            if [[ -z "$flaky_lines" ]]; then
+              echo "> No flaky tests detected in this period."
+            else
+              echo "| Score | Suite | Test |"
+              echo "|------:|-------|------|"
+              echo "$flaky_lines" | while IFS= read -r line; do
+                score=$(echo "$line" | sed -E 's/.* --- score: (.+)/\1/')
+                suite=$(echo "$line" | sed -E 's/^([^:]+)::.*/\1/')
+                test_name=$(echo "$line" | sed -E 's/^[^:]+::\[It\] (.*) --- score:.*/\1/')
+                echo "| ${score} | ${suite} | ${test_name} |"
+              done
+            fi
+
+            echo ""
+
+            if ls *_flip_rate_*.png 1>/dev/null 2>&1; then
+              echo "---"
+              echo "Heatmap uploaded as workflow artifact."
+            fi
+          } >> "$GITHUB_STEP_SUMMARY"
+
+      - name: Upload results
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: flaky-test-results
+          path: |
+            flaky-results.txt
+            *_flip_rate_*.png
+          if-no-files-found: ignore
+          retention-days: 30
diff --git a/tools/flaky-test-analysis.sh b/tools/flaky-test-analysis.sh
new file mode 100755
index 000000000..086cd6bc6
--- /dev/null
+++ b/tools/flaky-test-analysis.sh
@@ -0,0 +1,199 @@
+#!/usr/bin/env bash
+#
+# Download JUnit test report artifacts from GitHub Actions and run flaky test detection.
+#
+# Usage:
+#   ./tools/flaky-test-analysis.sh [start-date] [end-date] [--dry-run] [--skip-analysis]
+#
+# Dates default to the last 7 full days (excluding today) if not provided.
+#
+# Examples:
+#   ./tools/flaky-test-analysis.sh                          # last 7 full days
+#   ./tools/flaky-test-analysis.sh 2026-02-01 2026-02-26
+#   ./tools/flaky-test-analysis.sh --dry-run                # preview only
+#   ./tools/flaky-test-analysis.sh --skip-analysis          # download only, no flaky detection
+#
+# Requires: gh (GitHub CLI), authenticated via 'gh auth login' or GH_TOKEN.
+# Optional: flaky-tests-detection (pip install flaky-tests-detection)
+#
+set -euo pipefail
+
+REPO="${REPO:-splunk/splunk-operator}"
+OUTPUT_DIR="${OUTPUT_DIR:-./junit-reports}"
+ARTIFACT_PATTERN="${ARTIFACT_PATTERN:-^test-report-.*}"
+TOP_N="${TOP_N:-20}"
+WINDOW_SIZE="${WINDOW_SIZE:-1}"
+
+if [[ "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then
+  sed -n '2,17p' "$0" | sed 's/^# \?//'
+  exit 0
+fi
+
+DRY_RUN="${DRY_RUN:-false}"
+SKIP_ANALYSIS="${SKIP_ANALYSIS:-false}"
+START_DATE="${START_DATE:-}"
+END_DATE="${END_DATE:-}"
+
+for arg in "$@"; do
+  if [[ "$arg" == "--dry-run" ]]; then
+    DRY_RUN=true
+  elif [[ "$arg" == "--skip-analysis" ]]; then
+    SKIP_ANALYSIS=true
+  elif [[ -z "$START_DATE" ]]; then
+    START_DATE="$arg"
+  elif [[ -z "$END_DATE" ]]; then
+    END_DATE="$arg"
+  fi
+done
+
+END_DATE="${END_DATE:-$(date -u -v-1d +%Y-%m-%d 2>/dev/null || date -u -d 'yesterday' +%Y-%m-%d)}"
+START_DATE="${START_DATE:-$(date -u -v-7d +%Y-%m-%d 2>/dev/null || date -u -d '7 days ago' +%Y-%m-%d)}"
+
+# Derive WINDOW_COUNT from the date range (one window per WINDOW_SIZE days)
+if date -v+0d +%s &>/dev/null; then
+  _start_epoch=$(date -jf "%Y-%m-%d" "$START_DATE" +%s)
+  _end_epoch=$(date -jf "%Y-%m-%d" "$END_DATE" +%s)
+else
+  _start_epoch=$(date -d "$START_DATE" +%s)
+  _end_epoch=$(date -d "$END_DATE" +%s)
+fi
+WINDOW_COUNT=$(( ((_end_epoch - _start_epoch) / 86400 + WINDOW_SIZE) / WINDOW_SIZE ))
+
+if ! command -v gh &>/dev/null; then
+  echo "ERROR: 'gh' (GitHub CLI) is required. Install from https://cli.github.com/" >&2
+  exit 1
+fi
+
+echo "Repository:  $REPO"
+echo "Date range:  $START_DATE .. $END_DATE"
+echo "Output:      $OUTPUT_DIR"
+echo ""
+
+ART_FILTER=".artifacts[] | select(.name | test(\"${ARTIFACT_PATTERN}\")) | select(.expired == false)"
+DATE_FILTER="select(.created_at >= \"${START_DATE}T00:00:00Z\" and .created_at <= \"${END_DATE}T23:59:59Z\")"
+
+echo "Fetching artifact list..."
+
+artifacts_json="[]"
+page=1
+while true; do
+  response=$(gh api "repos/${REPO}/actions/artifacts?per_page=100&page=${page}" 2>/dev/null)
+
+  page_artifacts=$(echo "$response" | \
+    jq "[${ART_FILTER} | ${DATE_FILTER} | {id, name, created_at, workflow_run_id: .workflow_run.id}]")
+  artifacts_json=$(echo "$artifacts_json" "$page_artifacts" | jq -s 'add')
+
+  oldest=$(echo "$response" | jq -r '.artifacts[-1].created_at // empty')
+  if [[ -z "$oldest" || "$oldest" < "${START_DATE}T00:00:00Z" ]]; then
+    break
+  fi
+
+  count=$(echo "$response" | jq '.artifacts | length')
+  if [[ "$count" -lt 100 ]]; then
+    break
+  fi
+
+  page=$((page + 1))
+done
+
+artifact_count=$(echo "$artifacts_json" | jq 'length')
+echo "Found $artifact_count matching artifacts."
+echo ""
+
+if [[ "$artifact_count" -eq 0 ]]; then
+  echo "No artifacts matched. Check your date range and artifact retention settings."
+  exit 0
+fi
+
+echo "--------------------------------------------------------------"
+printf "%-12s %-50s %s\n" "RUN ID" "ARTIFACT NAME" "CREATED"
+echo "--------------------------------------------------------------"
+echo "$artifacts_json" | jq -r '.[] | "\(.workflow_run_id)\t\(.name)\t\(.created_at)"' | \
+  while IFS=$'\t' read -r run_id name created; do
+    printf "%-12s %-50s %s\n" "$run_id" "$name" "${created%%T*}"
+  done
+echo "--------------------------------------------------------------"
+echo ""
+
+if [[ "$DRY_RUN" == "true" ]]; then
+  echo "(dry run - skipping downloads)"
+  exit 0
+fi
+
+mkdir -p "$OUTPUT_DIR"
+
+tmpdir=$(mktemp -d)
+trap 'rm -rf "$tmpdir"' EXIT
+
+echo "$artifacts_json" | jq -r '.[] | "\(.id)\t\(.name)\t\(.workflow_run_id)"' | \
+  while IFS=$'\t' read -r art_id art_name run_id; do
+    echo "  Downloading: ${art_name} (run ${run_id})..."
+    zipfile="${tmpdir}/${art_id}.zip"
+    if gh api "repos/${REPO}/actions/artifacts/${art_id}/zip" > "$zipfile" 2>/dev/null; then
+      unzip -qo "$zipfile" -d "$tmpdir/extract" 2>/dev/null
+      for f in "$tmpdir/extract"/*.xml; do
+        [[ -f "$f" ]] || continue
+        base=$(basename "$f" .xml)
+        mv "$f" "${OUTPUT_DIR}/${run_id}-${base}.xml"
+      done
+      rm -rf "$tmpdir/extract" "$zipfile"
+    else
+      echo "    FAILED to download artifact ${art_id}"
+      rm -f "$zipfile"
+    fi
+  done
+
+total_files=$(find "$OUTPUT_DIR" -name '*.xml' 2>/dev/null | wc -l | tr -d ' ')
+echo ""
+echo "Done. ${total_files} XML files saved to ${OUTPUT_DIR}/"
+
+echo ""
+echo "Normalizing classnames (stripping Ginkgo random suffixes)..."
+for f in "${OUTPUT_DIR}"/*.xml; do
+  [[ -f "$f" ]] || continue
+  sed -i.bak -E 's/classname="Running (.+)-[a-z0-9]{3}"/classname="\1"/g' "$f"
+  rm -f "${f}.bak"
+done
+echo "Done."
+
+if [[ "$SKIP_ANALYSIS" == "true" ]]; then
+  exit 0
+fi
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+PYTHON=""
+if [[ -x "${SCRIPT_DIR}/.venv/bin/python" ]]; then
+  PYTHON="${SCRIPT_DIR}/.venv/bin/python"
+elif command -v python3 &>/dev/null; then
+  PYTHON="python3"
+else
+  echo ""
+  echo "Python not found. Install with: cd tools && poetry install"
+  exit 0
+fi
+
+if ! "$PYTHON" -c "from flaky_tests_detection.check_flakes import main" &>/dev/null; then
+  echo ""
+  echo "flaky-tests-detection not installed. Install with: cd tools && poetry install"
+  exit 0
+fi
+
+echo ""
+echo "================================================================"
+echo "Running flaky test detection..."
+echo "  Window size:  ${WINDOW_SIZE} days"
+echo "  Window count: ${WINDOW_COUNT}"
+echo "  Top N:        ${TOP_N}"
+echo "================================================================"
+echo ""
+
+RESULTS_FILE="${RESULTS_FILE:-flaky-results.txt}"
+
+"$PYTHON" -m flaky_tests_detection.check_flakes \
+  --junit-files="${OUTPUT_DIR}" \
+  --grouping-option=days \
+  --window-size="${WINDOW_SIZE}" \
+  --window-count="${WINDOW_COUNT}" \
+  --top-n="${TOP_N}" \
+  --heatmap \
+  2>&1 | tee "$RESULTS_FILE"

From 813d563ffa216d3fe026049b87b2db594ac0bec2 Mon Sep 17 00:00:00 2001
From: Jakub Buczak <jbuczak@splunk.com>
Date: Mon, 2 Mar 2026 15:17:55 +0100
Subject: [PATCH 02/10] Update flaky test analysis workflow to include push
 trigger for specific branch

- Added a push trigger for the 'CSPL-3763-add-flay-test-analysis-tool' branch to the flaky test analysis workflow, allowing for automated execution upon code changes.
- This change is intended for testing purposes and will be removed before merging.
---
 .github/workflows/flaky-test-analysis-workflow.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/flaky-test-analysis-workflow.yml b/.github/workflows/flaky-test-analysis-workflow.yml
index 3ab09ca51..c5bbf5f37 100644
--- a/.github/workflows/flaky-test-analysis-workflow.yml
+++ b/.github/workflows/flaky-test-analysis-workflow.yml
@@ -3,6 +3,8 @@ permissions:
   contents: read
   actions: read
 on:
+  push:
+    branches: [CSPL-3763-add-flay-test-analysis-tool] # TODO: remove before merging
   workflow_dispatch:
     inputs:
       start_date:

From 48bd3ed06a567ceacf390f9537d4fd919beb9250 Mon Sep 17 00:00:00 2001
From: Jakub Buczak <jbuczak@splunk.com>
Date: Mon, 2 Mar 2026 15:21:53 +0100
Subject: [PATCH 03/10] Remove push trigger for specific branch in flaky test
 analysis workflow

- Eliminated the push trigger for the 'CSPL-3763-add-flay-test-analysis-tool' branch from the flaky test analysis workflow, streamlining the workflow configuration and preparing for final merge.
---
 .github/workflows/flaky-test-analysis-workflow.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.github/workflows/flaky-test-analysis-workflow.yml b/.github/workflows/flaky-test-analysis-workflow.yml
index c5bbf5f37..3ab09ca51 100644
--- a/.github/workflows/flaky-test-analysis-workflow.yml
+++ b/.github/workflows/flaky-test-analysis-workflow.yml
@@ -3,8 +3,6 @@ permissions:
   contents: read
   actions: read
 on:
-  push:
-    branches: [CSPL-3763-add-flay-test-analysis-tool] # TODO: remove before merging
   workflow_dispatch:
     inputs:
       start_date:

From 452ac57bf8269a1fadf495e14a58d4ff371656d1 Mon Sep 17 00:00:00 2001
From: Jakub Buczak <jbuczak@splunk.com>
Date: Mon, 2 Mar 2026 15:38:28 +0100
Subject: [PATCH 04/10] Enhance flaky test analysis workflow to display heatmap
 images in summary

- Updated the workflow to iterate over heatmap images and include them in the GitHub step summary as base64-encoded images, improving visibility of test results.
- Retained the push trigger for the 'CSPL-3763-add-flay-test-analysis-tool' branch for testing purposes, with plans to remove it before merging.
---
 .github/workflows/flaky-test-analysis-workflow.yml | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/flaky-test-analysis-workflow.yml b/.github/workflows/flaky-test-analysis-workflow.yml
index 3ab09ca51..f725bd658 100644
--- a/.github/workflows/flaky-test-analysis-workflow.yml
+++ b/.github/workflows/flaky-test-analysis-workflow.yml
@@ -3,6 +3,8 @@ permissions:
   contents: read
   actions: read
 on:
+  push:
+    branches: [CSPL-3763-add-flay-test-analysis-tool] # TODO: remove before merging
   workflow_dispatch:
     inputs:
       start_date:
@@ -75,10 +77,12 @@ jobs:
 
             echo ""
 
-            if ls *_flip_rate_*.png 1>/dev/null 2>&1; then
-              echo "---"
-              echo "Heatmap uploaded as workflow artifact."
-            fi
+            for img in *_flip_rate_*.png; do
+              [[ -f "$img" ]] || continue
+              echo "### ${img%.png}"
+              echo "<img src=\"data:image/png;base64,$(base64 -w0 "$img")\" alt=\"${img}\" />"
+              echo ""
+            done
           } >> "$GITHUB_STEP_SUMMARY"
 
       - name: Upload results

From a2f15f6fe2695758c2f310d1fc7ae89062a3dda2 Mon Sep 17 00:00:00 2001
From: Jakub Buczak <jbuczak@splunk.com>
Date: Mon, 2 Mar 2026 19:30:31 +0100
Subject: [PATCH 05/10] Remove unused image processing code from flaky test
 analysis workflow

- Eliminated the code block that processed and displayed heatmap images in the GitHub step summary, streamlining the workflow.
- Removed the push trigger for the 'CSPL-3763-add-flay-test-analysis-tool' branch, finalizing the workflow configuration for merging.
---
 .github/workflows/flaky-test-analysis-workflow.yml | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/.github/workflows/flaky-test-analysis-workflow.yml b/.github/workflows/flaky-test-analysis-workflow.yml
index f725bd658..c0151ff8e 100644
--- a/.github/workflows/flaky-test-analysis-workflow.yml
+++ b/.github/workflows/flaky-test-analysis-workflow.yml
@@ -3,8 +3,6 @@ permissions:
   contents: read
   actions: read
 on:
-  push:
-    branches: [CSPL-3763-add-flay-test-analysis-tool] # TODO: remove before merging
   workflow_dispatch:
     inputs:
       start_date:
@@ -75,14 +73,6 @@ jobs:
               done
             fi
 
-            echo ""
-
-            for img in *_flip_rate_*.png; do
-              [[ -f "$img" ]] || continue
-              echo "### ${img%.png}"
-              echo "<img src=\"data:image/png;base64,$(base64 -w0 "$img")\" alt=\"${img}\" />"
-              echo ""
-            done
           } >> "$GITHUB_STEP_SUMMARY"
 
       - name: Upload results

From 84118968196c14637e0d12d571e7736a2e365c67 Mon Sep 17 00:00:00 2001
From: Jakub Buczak <jbuczak@splunk.com>
Date: Tue, 3 Mar 2026 10:06:35 +0100
Subject: [PATCH 06/10] Add test failure statistics generation to flaky test
 analysis workflow

- Introduced a new Python script to analyze JUnit XML reports and generate a Markdown summary of test failure statistics, including failure counts and rates.
- Updated the flaky test analysis workflow to include a step for generating failure statistics, ensuring comprehensive reporting of test results.
- The workflow now triggers on pushes to the 'CSPL-3763-add-flay-test-analysis-tool' branch, facilitating automated analysis during development.
---
 .../flaky-test-analysis-workflow.yml          |   7 +
 tools/test-failure-stats.py                   | 153 ++++++++++++++++++
 2 files changed, 160 insertions(+)
 create mode 100755 tools/test-failure-stats.py

diff --git a/.github/workflows/flaky-test-analysis-workflow.yml b/.github/workflows/flaky-test-analysis-workflow.yml
index c0151ff8e..e8a5dc240 100644
--- a/.github/workflows/flaky-test-analysis-workflow.yml
+++ b/.github/workflows/flaky-test-analysis-workflow.yml
@@ -3,6 +3,9 @@ permissions:
   contents: read
   actions: read
 on:
+  push:
+    branches:
+      - CSPL-3763-add-flay-test-analysis-tool
   workflow_dispatch:
     inputs:
       start_date:
@@ -75,6 +78,10 @@ jobs:
 
           } >> "$GITHUB_STEP_SUMMARY"
 
+      - name: Generate failure stats summary
+        if: always()
+        run: python3 tools/test-failure-stats.py ./junit-reports >> "$GITHUB_STEP_SUMMARY"
+
       - name: Upload results
         if: always()
         uses: actions/upload-artifact@v4
diff --git a/tools/test-failure-stats.py b/tools/test-failure-stats.py
new file mode 100755
index 000000000..255ab1c1b
--- /dev/null
+++ b/tools/test-failure-stats.py
@@ -0,0 +1,153 @@
+#!/usr/bin/env python3
+"""
+Analyze JUnit XML reports and produce a Markdown report of test failure statistics.
+
+Parses all JUnit XML files (downloaded by flaky-test-analysis.sh) and writes
+a Markdown file with per-test-case failure counts and rates.
+
+Usage:
+    ./tools/test-failure-stats.py [junit-dir]
+    ./tools/test-failure-stats.py > report.md
+
+    junit-dir   Directory with JUnit XML files (default: ./junit-reports)
+
+Requires: Python 3.8+ (stdlib only).
+"""
+
+import re
+import sys
+import xml.etree.ElementTree as ET
+from collections import defaultdict
+from dataclasses import dataclass, field
+from pathlib import Path
+
+
+FILENAME_RE = re.compile(
+    r"^(\d+)-(?:report-junit|unit_test)-(\d{8})-(\d{6})-\d+(?:-(.+))?\.xml$"
+)
+
+INFRA_NAMES = {
+    "[BeforeSuite]", "[AfterSuite]", "[ReportAfterSuite]",
+    "[SynchronizedBeforeSuite]", "[SynchronizedAfterSuite]",
+}
+
+
+@dataclass
+class TestRecord:
+    runs: int = 0
+    failures: int = 0
+    timeouts: int = 0
+    passes: int = 0
+    failure_dates: list = field(default_factory=list)
+
+
+def parse_filename(fname: str):
+    m = FILENAME_RE.match(fname)
+    if not m:
+        return None, None
+    date_str = m.group(2)
+    return f"{date_str[:4]}-{date_str[4:6]}-{date_str[6:]}", m.group(4) or "unit_test"
+
+
+def parse_junit_file(filepath: Path):
+    try:
+        tree = ET.parse(str(filepath))
+    except ET.ParseError:
+        print(f"  WARNING: Could not parse {filepath.name}, skipping", file=sys.stderr)
+        return
+
+    for tc in tree.iter("testcase"):
+        name = tc.get("name", "")
+        if name in INFRA_NAMES:
+            continue
+        if tc.get("status") == "skipped" or tc.find("skipped") is not None:
+            continue
+
+        classname = tc.get("classname", "")
+        status = tc.get("status", "")
+        has_failure = tc.find("failure") is not None
+        yield classname, name, status, has_failure
+
+
+def build_stats(junit_dir: Path):
+    stats: dict[str, TestRecord] = defaultdict(TestRecord)
+    files_parsed = 0
+
+    for fpath in sorted(junit_dir.glob("*.xml")):
+        date_str, _ = parse_filename(fpath.name)
+        if date_str is None:
+            continue
+
+        files_parsed += 1
+        for classname, name, status, has_failure in parse_junit_file(fpath):
+            rec = stats[f"{classname}::{name}"]
+            rec.runs += 1
+            if has_failure:
+                rec.failures += 1
+                if status == "timedout":
+                    rec.timeouts += 1
+                rec.failure_dates.append(date_str)
+            else:
+                rec.passes += 1
+
+    return stats, files_parsed
+
+
+def short_name(full_name: str, max_len: int = 120) -> str:
+    name = re.sub(r"^\[It\]\s*", "", full_name)
+    if len(name) > max_len:
+        return name[: max_len - 3] + "..."
+    return name
+
+
+def write_markdown(stats: dict[str, TestRecord], files_parsed: int):
+    failing = {k: v for k, v in stats.items() if v.failures > 0}
+    total_runs = sum(r.runs for r in stats.values())
+
+    print("# Test Failure Statistics")
+    print()
+    print("| Metric | Value |")
+    print("|--------|-------|")
+    print(f"| Files parsed | {files_parsed} |")
+    print(f"| Unique tests | {len(stats)} |")
+    print(f"| Tests with failures | {len(failing)} |")
+    print(f"| Total test runs (non-skipped) | {total_runs} |")
+    print(f"| Total failure occurrences | {sum(r.failures for r in failing.values())} |")
+    print()
+
+    if not failing:
+        print("**No test failures found.**")
+        return
+
+    ranked = sorted(failing.items(),
+                    key=lambda x: (-x[1].failures, -x[1].failures / max(x[1].runs, 1)))
+
+    print("## Failing Tests")
+    print()
+    print("| # | Fail | Runs | Rate | Timeouts | Last Failure | Suite | Test |")
+    print("|--:|-----:|-----:|-----:|---------:|:------------:|:------|:-----|")
+
+    for i, (key, rec) in enumerate(ranked, 1):
+        classname, name = key.split("::", 1)
+        rate = rec.failures / rec.runs * 100 if rec.runs > 0 else 0
+        last_fail = max(rec.failure_dates) if rec.failure_dates else "n/a"
+        display = short_name(name).replace("|", "\\|")
+        print(
+            f"| {i} | {rec.failures} | {rec.runs} | {rate:.1f}% "
+            f"| {rec.timeouts} | {last_fail} | `{classname}` | {display} |"
+        )
+
+
+def main():
+    junit_dir = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("./junit-reports")
+
+    if not junit_dir.is_dir():
+        print(f"ERROR: {junit_dir} is not a directory", file=sys.stderr)
+        sys.exit(1)
+
+    stats, files_parsed = build_stats(junit_dir)
+    write_markdown(stats, files_parsed)
+
+
+if __name__ == "__main__":
+    main()

From 266327fffed7b081b07d5615e91f2f52a94f5483 Mon Sep 17 00:00:00 2001
From: Jakub Buczak <jbuczak@splunk.com>
Date: Tue, 3 Mar 2026 10:53:57 +0100
Subject: [PATCH 07/10] Add custom Matplotlib configurations for flaky test
 analysis

- Introduced a new Python script to customize Matplotlib for generating heatmaps in flaky test analysis, enhancing readability with increased font sizes and wrapped y-axis labels.
- Updated the flaky test analysis script to incorporate the new Matplotlib configurations, improving the presentation of test results.
- Added a new entry to .gitignore to exclude generated JUnit report directories from version control.
---
 .gitignore                              |  1 +
 tools/flaky-test-analysis-mpl-config.py | 43 +++++++++++++++++++++++++
 tools/flaky-test-analysis.sh            | 10 +++---
 3 files changed, 50 insertions(+), 4 deletions(-)
 create mode 100644 tools/flaky-test-analysis-mpl-config.py

diff --git a/.gitignore b/.gitignore
index 5de8f6d85..384bdc255 100644
--- a/.gitignore
+++ b/.gitignore
@@ -93,6 +93,7 @@ release-*
 deploy/olm-certified
 report-junit*.xml
 unit_test*.xml
+junit-reports/
 # Bias Language Linter
 .biaslanguage/
 bin/
diff --git a/tools/flaky-test-analysis-mpl-config.py b/tools/flaky-test-analysis-mpl-config.py
new file mode 100644
index 000000000..870af4a0c
--- /dev/null
+++ b/tools/flaky-test-analysis-mpl-config.py
@@ -0,0 +1,43 @@
+"""Matplotlib customizations for flaky-test-analysis heatmaps.
+
+Monkey-patches matplotlib to:
+- Increase font sizes for readability
+- Wrap long y-axis tick labels
+- Override the library's hardcoded title fontsize
+"""
+import textwrap
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+
+WRAP_WIDTH = 60
+TITLE_FONTSIZE = 64
+
+mpl.rcParams.update({
+    "font.size": 50,
+    "xtick.labelsize": 40,
+    "ytick.labelsize": 40,
+    "axes.labelsize": 60,
+})
+
+_original_savefig = plt.savefig
+_original_title = plt.title
+
+
+def _title_with_fontsize(*args, **kwargs):
+    kwargs["fontsize"] = TITLE_FONTSIZE
+    return _original_title(*args, **kwargs)
+
+
+def _savefig_with_wrapped_labels(*args, **kwargs):
+    fig = plt.gcf()
+    for ax in fig.axes:
+        labels = ax.get_yticklabels()
+        if labels:
+            ticks = ax.get_yticks()
+            ax.set_yticks(ticks)
+            ax.set_yticklabels([textwrap.fill(l.get_text(), WRAP_WIDTH) for l in labels])
+    _original_savefig(*args, **kwargs)
+
+
+plt.title = _title_with_fontsize
+plt.savefig = _savefig_with_wrapped_labels
diff --git a/tools/flaky-test-analysis.sh b/tools/flaky-test-analysis.sh
index 086cd6bc6..ff052076b 100755
--- a/tools/flaky-test-analysis.sh
+++ b/tools/flaky-test-analysis.sh
@@ -125,9 +125,11 @@ mkdir -p "$OUTPUT_DIR"
 tmpdir=$(mktemp -d)
 trap 'rm -rf "$tmpdir"' EXIT
 
+dl_current=0
 echo "$artifacts_json" | jq -r '.[] | "\(.id)\t\(.name)\t\(.workflow_run_id)"' | \
   while IFS=$'\t' read -r art_id art_name run_id; do
-    echo "  Downloading: ${art_name} (run ${run_id})..."
+    dl_current=$((dl_current + 1))
+    echo "  [${dl_current}/${artifact_count}] ${art_name} (run ${run_id})..."
     zipfile="${tmpdir}/${art_id}.zip"
     if gh api "repos/${REPO}/actions/artifacts/${art_id}/zip" > "$zipfile" 2>/dev/null; then
       unzip -qo "$zipfile" -d "$tmpdir/extract" 2>/dev/null
@@ -151,8 +153,7 @@ echo ""
 echo "Normalizing classnames (stripping Ginkgo random suffixes)..."
 for f in "${OUTPUT_DIR}"/*.xml; do
   [[ -f "$f" ]] || continue
-  sed -i.bak -E 's/classname="Running (.+)-[a-z0-9]{3}"/classname="\1"/g' "$f"
-  rm -f "${f}.bak"
+  sed -E 's/classname="Running (.+)-[a-z0-9]{3}"/classname="\1"/g' "$f" > "${f}.tmp" && mv "${f}.tmp" "$f"
 done
 echo "Done."
 
@@ -189,7 +190,8 @@ echo ""
 
 RESULTS_FILE="${RESULTS_FILE:-flaky-results.txt}"
 
-"$PYTHON" -m flaky_tests_detection.check_flakes \
+PYTHONPATH="${SCRIPT_DIR}:${PYTHONPATH:-}" "$PYTHON" -c \
+  "import importlib; importlib.import_module('flaky-test-analysis-mpl-config'); from flaky_tests_detection.check_flakes import main; main()" \
   --junit-files="${OUTPUT_DIR}" \
   --grouping-option=days \
   --window-size="${WINDOW_SIZE}" \

From 2d6331049cfe78932c04bdbf3ac486290b446bab Mon Sep 17 00:00:00 2001
From: Jakub Buczak <jbuczak@splunk.com>
Date: Tue, 3 Mar 2026 11:10:35 +0100
Subject: [PATCH 08/10] Enhance flaky test analysis workflow with output
 variables and naming improvements

- Added output variables for start and end dates in the flaky test analysis workflow, improving the clarity of date ranges in job summaries.
- Refactored the naming function in the test failure statistics script to improve readability and maintainability.
- Updated the artifact upload step to include dynamic naming based on the date range, enhancing the organization of test results.
---
 .github/workflows/flaky-test-analysis-workflow.yml | 10 +++++++---
 tools/test-failure-stats.py                        |  9 +++------
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/flaky-test-analysis-workflow.yml b/.github/workflows/flaky-test-analysis-workflow.yml
index e8a5dc240..64f898a2c 100644
--- a/.github/workflows/flaky-test-analysis-workflow.yml
+++ b/.github/workflows/flaky-test-analysis-workflow.yml
@@ -36,6 +36,7 @@ jobs:
         run: pip install flaky-tests-detection
 
       - name: Run flaky test analysis
+        id: run
         env:
           GH_TOKEN: ${{ github.token }}
           START_DATE: ${{ inputs.start_date }}
@@ -43,7 +44,10 @@ jobs:
           TOP_N: ${{ inputs.top_n }}
           WINDOW_SIZE: ${{ inputs.window_size }}
           RESULTS_FILE: flaky-results.txt
-        run: ./tools/flaky-test-analysis.sh
+        run: |
+          ./tools/flaky-test-analysis.sh
+          echo "start=${START_DATE:-$(date -u -d '7 days ago' +%Y-%m-%d)}" >> "$GITHUB_OUTPUT"
+          echo "end=${END_DATE:-$(date -u -d 'yesterday' +%Y-%m-%d)}" >> "$GITHUB_OUTPUT"
 
       - name: Generate job summary
         if: always()
@@ -51,7 +55,7 @@ jobs:
           {
             echo "## Flaky Test Analysis"
             echo ""
-            echo "**Date range:** \`${START_DATE:-last 7 days}\` .. \`${END_DATE:-yesterday}\`"
+            echo "**Date range:** \`${{ steps.run.outputs.start }}\` .. \`${{ steps.run.outputs.end }}\`"
             echo "**Top N:** ${{ inputs.top_n || '20' }} | **Window size:** ${{ inputs.window_size || '1' }} day(s)"
             echo ""
 
@@ -86,7 +90,7 @@ jobs:
         if: always()
         uses: actions/upload-artifact@v4
         with:
-          name: flaky-test-results
+          name: flaky-test-results-${{ steps.run.outputs.start }}-to-${{ steps.run.outputs.end }}
           path: |
             flaky-results.txt
             *_flip_rate_*.png
diff --git a/tools/test-failure-stats.py b/tools/test-failure-stats.py
index 255ab1c1b..8affba488 100755
--- a/tools/test-failure-stats.py
+++ b/tools/test-failure-stats.py
@@ -93,11 +93,8 @@ def build_stats(junit_dir: Path):
     return stats, files_parsed
 
 
-def short_name(full_name: str, max_len: int = 120) -> str:
-    name = re.sub(r"^\[It\]\s*", "", full_name)
-    if len(name) > max_len:
-        return name[: max_len - 3] + "..."
-    return name
+def clean_name(full_name: str) -> str:
+    return re.sub(r"^\[It\]\s*", "", full_name)
 
 
 def write_markdown(stats: dict[str, TestRecord], files_parsed: int):
@@ -131,7 +128,7 @@ def write_markdown(stats: dict[str, TestRecord], files_parsed: int):
         classname, name = key.split("::", 1)
         rate = rec.failures / rec.runs * 100 if rec.runs > 0 else 0
         last_fail = max(rec.failure_dates) if rec.failure_dates else "n/a"
-        display = short_name(name).replace("|", "\\|")
+        display = clean_name(name).replace("|", "\\|")
         print(
             f"| {i} | {rec.failures} | {rec.runs} | {rate:.1f}% "
             f"| {rec.timeouts} | {last_fail} | `{classname}` | {display} |"

From 8d5208b66220b0a55e73f3eb51ab1eca9db5841e Mon Sep 17 00:00:00 2001
From: Jakub Buczak <jbuczak@splunk.com>
Date: Fri, 6 Mar 2026 11:36:22 +0100
Subject: [PATCH 09/10] Add branch filtering option to flaky test analysis
 workflow and script

- Introduced a new input parameter for the flaky test analysis workflow to allow users to specify a branch for analysis, enhancing flexibility in test result reporting.
- Updated the flaky test analysis script to accept a branch argument, enabling focused analysis on specific branches.
- Enhanced output messages to include branch information when specified, improving clarity in the analysis results.
---
 .../flaky-test-analysis-workflow.yml          | 11 +++++++-
 tools/flaky-test-analysis.sh                  | 25 ++++++++++++++++---
 2 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/flaky-test-analysis-workflow.yml b/.github/workflows/flaky-test-analysis-workflow.yml
index 64f898a2c..6975d1636 100644
--- a/.github/workflows/flaky-test-analysis-workflow.yml
+++ b/.github/workflows/flaky-test-analysis-workflow.yml
@@ -22,6 +22,9 @@ on:
         description: 'Window size in days for flip rate calculation'
         required: false
         default: '1'
+      branch:
+        description: 'Only include runs from this branch (e.g. develop). All branches if empty.'
+        required: false
 jobs:
   analyze:
     runs-on: ubuntu-latest
@@ -43,6 +46,7 @@ jobs:
           END_DATE: ${{ inputs.end_date }}
           TOP_N: ${{ inputs.top_n }}
           WINDOW_SIZE: ${{ inputs.window_size }}
+          BRANCH: ${{ inputs.branch }}
           RESULTS_FILE: flaky-results.txt
         run: |
           ./tools/flaky-test-analysis.sh
@@ -56,7 +60,12 @@ jobs:
             echo "## Flaky Test Analysis"
             echo ""
             echo "**Date range:** \`${{ steps.run.outputs.start }}\` .. \`${{ steps.run.outputs.end }}\`"
-            echo "**Top N:** ${{ inputs.top_n || '20' }} | **Window size:** ${{ inputs.window_size || '1' }} day(s)"
+            BRANCH_INFO="${{ inputs.branch }}"
+            if [[ -n "$BRANCH_INFO" ]]; then
+              echo "**Branch:** \`${BRANCH_INFO}\` | **Top N:** ${{ inputs.top_n || '20' }} | **Window size:** ${{ inputs.window_size || '1' }} day(s)"
+            else
+              echo "**Top N:** ${{ inputs.top_n || '20' }} | **Window size:** ${{ inputs.window_size || '1' }} day(s)"
+            fi
             echo ""
 
             if [[ ! -f flaky-results.txt ]]; then
diff --git a/tools/flaky-test-analysis.sh b/tools/flaky-test-analysis.sh
index ff052076b..11cc57def 100755
--- a/tools/flaky-test-analysis.sh
+++ b/tools/flaky-test-analysis.sh
@@ -3,13 +3,14 @@
 # Download JUnit test report artifacts from GitHub Actions and run flaky test detection.
 #
 # Usage:
-#   ./tools/flaky-test-analysis.sh [start-date] [end-date] [--dry-run] [--skip-analysis]
+#   ./tools/flaky-test-analysis.sh [start-date] [end-date] [--branch <name>] [--dry-run] [--skip-analysis]
 #
 # Dates default to the last 7 full days (excluding today) if not provided.
 #
 # Examples:
 #   ./tools/flaky-test-analysis.sh                          # last 7 full days
 #   ./tools/flaky-test-analysis.sh 2026-02-01 2026-02-26
+#   ./tools/flaky-test-analysis.sh --branch develop         # only runs on develop
 #   ./tools/flaky-test-analysis.sh --dry-run                # preview only
 #   ./tools/flaky-test-analysis.sh --skip-analysis          # download only, no flaky detection
 #
@@ -31,11 +32,20 @@ fi
 
 DRY_RUN="${DRY_RUN:-false}"
 SKIP_ANALYSIS="${SKIP_ANALYSIS:-false}"
+BRANCH="${BRANCH:-}"
 START_DATE="${START_DATE:-}"
 END_DATE="${END_DATE:-}"
 
+_next_is_branch=false
 for arg in "$@"; do
-  if [[ "$arg" == "--dry-run" ]]; then
+  if [[ "$_next_is_branch" == "true" ]]; then
+    BRANCH="$arg"
+    _next_is_branch=false
+  elif [[ "$arg" == "--branch" ]]; then
+    _next_is_branch=true
+  elif [[ "$arg" == --branch=* ]]; then
+    BRANCH="${arg#--branch=}"
+  elif [[ "$arg" == "--dry-run" ]]; then
     DRY_RUN=true
   elif [[ "$arg" == "--skip-analysis" ]]; then
     SKIP_ANALYSIS=true
@@ -66,11 +76,18 @@ fi
 
 echo "Repository:  $REPO"
 echo "Date range:  $START_DATE .. $END_DATE"
+if [[ -n "$BRANCH" ]]; then
+  echo "Branch:      $BRANCH"
+fi
 echo "Output:      $OUTPUT_DIR"
 echo ""
 
-ART_FILTER=".artifacts[] | select(.name | test(\"${ARTIFACT_PATTERN}\")) | select(.expired == false)"
-DATE_FILTER="select(.created_at >= \"${START_DATE}T00:00:00Z\" and .created_at <= \"${END_DATE}T23:59:59Z\")"
+BRANCH_FILTER=""
+if [[ -n "$BRANCH" ]]; then
+  BRANCH_FILTER="| select(.workflow_run.head_branch == \"${BRANCH}\")"
+fi
+ART_FILTER=".artifacts[] | select(.name | test(\"${ARTIFACT_PATTERN}\")) | select(.expired == false) ${BRANCH_FILTER}"
+DATE_FI LTER="select(.created_at >= \"${START_DATE}T00:00:00Z\" and .created_at <= \"${END_DATE}T23:59:59Z\")"
 
 echo "Fetching artifact list..."
 

From c5c2e7df2986022a65e26f71bf2787d7ff350237 Mon Sep 17 00:00:00 2001
From: Jakub Buczak <jbuczak@splunk.com>
Date: Fri, 6 Mar 2026 11:42:30 +0100
Subject: [PATCH 10/10] Fix typo in flaky test analysis script for date
 filtering

- Corrected a typo in the variable name from DATE_FI LTER to DATE_FILTER, ensuring proper functionality in filtering artifacts by creation date.
---
 tools/flaky-test-analysis.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/flaky-test-analysis.sh b/tools/flaky-test-analysis.sh
index 11cc57def..731bfa179 100755
--- a/tools/flaky-test-analysis.sh
+++ b/tools/flaky-test-analysis.sh
@@ -87,7 +87,7 @@ if [[ -n "$BRANCH" ]]; then
   BRANCH_FILTER="| select(.workflow_run.head_branch == \"${BRANCH}\")"
 fi
 ART_FILTER=".artifacts[] | select(.name | test(\"${ARTIFACT_PATTERN}\")) | select(.expired == false) ${BRANCH_FILTER}"
-DATE_FI LTER="select(.created_at >= \"${START_DATE}T00:00:00Z\" and .created_at <= \"${END_DATE}T23:59:59Z\")"
+DATE_FILTER="select(.created_at >= \"${START_DATE}T00:00:00Z\" and .created_at <= \"${END_DATE}T23:59:59Z\")"
 
 echo "Fetching artifact list..."