From 84c33f342a31040d8243f3da3ebd894a501f8bd3 Mon Sep 17 00:00:00 2001 From: Jakub Buczak Date: Mon, 2 Mar 2026 13:59:04 +0100 Subject: [PATCH 01/10] CSPL-3763 Add Flaky Test Analysis Workflow and Script - Introduced a new GitHub Actions workflow for flaky test analysis, allowing users to specify date ranges and parameters for reporting the flakiest tests. - Added a Bash script to download JUnit test report artifacts and perform flaky test detection, enhancing the testing process and providing detailed results. - The workflow includes steps for dependency installation, running the analysis, generating summaries, and uploading results as artifacts. --- .../flaky-test-analysis-workflow.yml | 93 ++++++++ tools/flaky-test-analysis.sh | 199 ++++++++++++++++++ 2 files changed, 292 insertions(+) create mode 100644 .github/workflows/flaky-test-analysis-workflow.yml create mode 100755 tools/flaky-test-analysis.sh diff --git a/.github/workflows/flaky-test-analysis-workflow.yml b/.github/workflows/flaky-test-analysis-workflow.yml new file mode 100644 index 000000000..3ab09ca51 --- /dev/null +++ b/.github/workflows/flaky-test-analysis-workflow.yml @@ -0,0 +1,93 @@ +name: Flaky Test Analysis +permissions: + contents: read + actions: read +on: + workflow_dispatch: + inputs: + start_date: + description: 'Start date (YYYY-MM-DD). Defaults to 7 days before yesterday.' + required: false + end_date: + description: 'End date (YYYY-MM-DD). Defaults to yesterday.' + required: false + top_n: + description: 'Number of flakiest tests to report' + required: false + default: '20' + window_size: + description: 'Window size in days for flip rate calculation' + required: false + default: '1' +jobs: + analyze: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Install dependencies + run: pip install flaky-tests-detection + + - name: Run flaky test analysis + env: + GH_TOKEN: ${{ github.token }} + START_DATE: ${{ inputs.start_date }} + END_DATE: ${{ inputs.end_date }} + TOP_N: ${{ inputs.top_n }} + WINDOW_SIZE: ${{ inputs.window_size }} + RESULTS_FILE: flaky-results.txt + run: ./tools/flaky-test-analysis.sh + + - name: Generate job summary + if: always() + run: | + { + echo "## Flaky Test Analysis" + echo "" + echo "**Date range:** \`${START_DATE:-last 7 days}\` .. \`${END_DATE:-yesterday}\`" + echo "**Top N:** ${{ inputs.top_n || '20' }} | **Window size:** ${{ inputs.window_size || '1' }} day(s)" + echo "" + + if [[ ! -f flaky-results.txt ]]; then + echo "> No results produced. Check the workflow logs." + exit 0 + fi + + # Extract the score lines (format: suite::[It] description --- score: N) + flaky_lines=$(grep -E ' --- score: ' flaky-results.txt || true) + + if [[ -z "$flaky_lines" ]]; then + echo "> No flaky tests detected in this period." + else + echo "| Score | Suite | Test |" + echo "|------:|-------|------|" + echo "$flaky_lines" | while IFS= read -r line; do + score=$(echo "$line" | sed -E 's/.* --- score: (.+)/\1/') + suite=$(echo "$line" | sed -E 's/^([^:]+)::.*/\1/') + test_name=$(echo "$line" | sed -E 's/^[^:]+::\[It\] (.*) --- score:.*/\1/') + echo "| ${score} | ${suite} | ${test_name} |" + done + fi + + echo "" + + if ls *_flip_rate_*.png 1>/dev/null 2>&1; then + echo "---" + echo "Heatmap uploaded as workflow artifact." + fi + } >> "$GITHUB_STEP_SUMMARY" + + - name: Upload results + if: always() + uses: actions/upload-artifact@v4 + with: + name: flaky-test-results + path: | + flaky-results.txt + *_flip_rate_*.png + if-no-files-found: ignore + retention-days: 30 diff --git a/tools/flaky-test-analysis.sh b/tools/flaky-test-analysis.sh new file mode 100755 index 000000000..086cd6bc6 --- /dev/null +++ b/tools/flaky-test-analysis.sh @@ -0,0 +1,199 @@ +#!/usr/bin/env bash +# +# Download JUnit test report artifacts from GitHub Actions and run flaky test detection. +# +# Usage: +# ./tools/flaky-test-analysis.sh [start-date] [end-date] [--dry-run] [--skip-analysis] +# +# Dates default to the last 7 full days (excluding today) if not provided. +# +# Examples: +# ./tools/flaky-test-analysis.sh # last 7 full days +# ./tools/flaky-test-analysis.sh 2026-02-01 2026-02-26 +# ./tools/flaky-test-analysis.sh --dry-run # preview only +# ./tools/flaky-test-analysis.sh --skip-analysis # download only, no flaky detection +# +# Requires: gh (GitHub CLI), authenticated via 'gh auth login' or GH_TOKEN. +# Optional: flaky-tests-detection (pip install flaky-tests-detection) +# +set -euo pipefail + +REPO="${REPO:-splunk/splunk-operator}" +OUTPUT_DIR="${OUTPUT_DIR:-./junit-reports}" +ARTIFACT_PATTERN="${ARTIFACT_PATTERN:-^test-report-.*}" +TOP_N="${TOP_N:-20}" +WINDOW_SIZE="${WINDOW_SIZE:-1}" + +if [[ "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then + sed -n '2,17p' "$0" | sed 's/^# \?//' + exit 0 +fi + +DRY_RUN="${DRY_RUN:-false}" +SKIP_ANALYSIS="${SKIP_ANALYSIS:-false}" +START_DATE="${START_DATE:-}" +END_DATE="${END_DATE:-}" + +for arg in "$@"; do + if [[ "$arg" == "--dry-run" ]]; then + DRY_RUN=true + elif [[ "$arg" == "--skip-analysis" ]]; then + SKIP_ANALYSIS=true + elif [[ -z "$START_DATE" ]]; then + START_DATE="$arg" + elif [[ -z "$END_DATE" ]]; then + END_DATE="$arg" + fi +done + +END_DATE="${END_DATE:-$(date -u -v-1d +%Y-%m-%d 2>/dev/null || date -u -d 'yesterday' +%Y-%m-%d)}" +START_DATE="${START_DATE:-$(date -u -v-7d +%Y-%m-%d 2>/dev/null || date -u -d '7 days ago' +%Y-%m-%d)}" + +# Derive WINDOW_COUNT from the date range (one window per WINDOW_SIZE days) +if date -v+0d +%s &>/dev/null; then + _start_epoch=$(date -jf "%Y-%m-%d" "$START_DATE" +%s) + _end_epoch=$(date -jf "%Y-%m-%d" "$END_DATE" +%s) +else + _start_epoch=$(date -d "$START_DATE" +%s) + _end_epoch=$(date -d "$END_DATE" +%s) +fi +WINDOW_COUNT=$(( ((_end_epoch - _start_epoch) / 86400 + WINDOW_SIZE) / WINDOW_SIZE )) + +if ! command -v gh &>/dev/null; then + echo "ERROR: 'gh' (GitHub CLI) is required. Install from https://cli.github.com/" >&2 + exit 1 +fi + +echo "Repository: $REPO" +echo "Date range: $START_DATE .. $END_DATE" +echo "Output: $OUTPUT_DIR" +echo "" + +ART_FILTER=".artifacts[] | select(.name | test(\"${ARTIFACT_PATTERN}\")) | select(.expired == false)" +DATE_FILTER="select(.created_at >= \"${START_DATE}T00:00:00Z\" and .created_at <= \"${END_DATE}T23:59:59Z\")" + +echo "Fetching artifact list..." + +artifacts_json="[]" +page=1 +while true; do + response=$(gh api "repos/${REPO}/actions/artifacts?per_page=100&page=${page}" 2>/dev/null) + + page_artifacts=$(echo "$response" | \ + jq "[${ART_FILTER} | ${DATE_FILTER} | {id, name, created_at, workflow_run_id: .workflow_run.id}]") + artifacts_json=$(echo "$artifacts_json" "$page_artifacts" | jq -s 'add') + + oldest=$(echo "$response" | jq -r '.artifacts[-1].created_at // empty') + if [[ -z "$oldest" || "$oldest" < "${START_DATE}T00:00:00Z" ]]; then + break + fi + + count=$(echo "$response" | jq '.artifacts | length') + if [[ "$count" -lt 100 ]]; then + break + fi + + page=$((page + 1)) +done + +artifact_count=$(echo "$artifacts_json" | jq 'length') +echo "Found $artifact_count matching artifacts." +echo "" + +if [[ "$artifact_count" -eq 0 ]]; then + echo "No artifacts matched. Check your date range and artifact retention settings." + exit 0 +fi + +echo "--------------------------------------------------------------" +printf "%-12s %-50s %s\n" "RUN ID" "ARTIFACT NAME" "CREATED" +echo "--------------------------------------------------------------" +echo "$artifacts_json" | jq -r '.[] | "\(.workflow_run_id)\t\(.name)\t\(.created_at)"' | \ + while IFS=$'\t' read -r run_id name created; do + printf "%-12s %-50s %s\n" "$run_id" "$name" "${created%%T*}" + done +echo "--------------------------------------------------------------" +echo "" + +if [[ "$DRY_RUN" == "true" ]]; then + echo "(dry run - skipping downloads)" + exit 0 +fi + +mkdir -p "$OUTPUT_DIR" + +tmpdir=$(mktemp -d) +trap 'rm -rf "$tmpdir"' EXIT + +echo "$artifacts_json" | jq -r '.[] | "\(.id)\t\(.name)\t\(.workflow_run_id)"' | \ + while IFS=$'\t' read -r art_id art_name run_id; do + echo " Downloading: ${art_name} (run ${run_id})..." + zipfile="${tmpdir}/${art_id}.zip" + if gh api "repos/${REPO}/actions/artifacts/${art_id}/zip" > "$zipfile" 2>/dev/null; then + unzip -qo "$zipfile" -d "$tmpdir/extract" 2>/dev/null + for f in "$tmpdir/extract"/*.xml; do + [[ -f "$f" ]] || continue + base=$(basename "$f" .xml) + mv "$f" "${OUTPUT_DIR}/${run_id}-${base}.xml" + done + rm -rf "$tmpdir/extract" "$zipfile" + else + echo " FAILED to download artifact ${art_id}" + rm -f "$zipfile" + fi + done + +total_files=$(find "$OUTPUT_DIR" -name '*.xml' 2>/dev/null | wc -l | tr -d ' ') +echo "" +echo "Done. ${total_files} XML files saved to ${OUTPUT_DIR}/" + +echo "" +echo "Normalizing classnames (stripping Ginkgo random suffixes)..." +for f in "${OUTPUT_DIR}"/*.xml; do + [[ -f "$f" ]] || continue + sed -i.bak -E 's/classname="Running (.+)-[a-z0-9]{3}"/classname="\1"/g' "$f" + rm -f "${f}.bak" +done +echo "Done." + +if [[ "$SKIP_ANALYSIS" == "true" ]]; then + exit 0 +fi + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +PYTHON="" +if [[ -x "${SCRIPT_DIR}/.venv/bin/python" ]]; then + PYTHON="${SCRIPT_DIR}/.venv/bin/python" +elif command -v python3 &>/dev/null; then + PYTHON="python3" +else + echo "" + echo "Python not found. Install with: cd tools && poetry install" + exit 0 +fi + +if ! "$PYTHON" -c "from flaky_tests_detection.check_flakes import main" &>/dev/null; then + echo "" + echo "flaky-tests-detection not installed. Install with: cd tools && poetry install" + exit 0 +fi + +echo "" +echo "================================================================" +echo "Running flaky test detection..." +echo " Window size: ${WINDOW_SIZE} days" +echo " Window count: ${WINDOW_COUNT}" +echo " Top N: ${TOP_N}" +echo "================================================================" +echo "" + +RESULTS_FILE="${RESULTS_FILE:-flaky-results.txt}" + +"$PYTHON" -m flaky_tests_detection.check_flakes \ + --junit-files="${OUTPUT_DIR}" \ + --grouping-option=days \ + --window-size="${WINDOW_SIZE}" \ + --window-count="${WINDOW_COUNT}" \ + --top-n="${TOP_N}" \ + --heatmap \ + 2>&1 | tee "$RESULTS_FILE" From 813d563ffa216d3fe026049b87b2db594ac0bec2 Mon Sep 17 00:00:00 2001 From: Jakub Buczak Date: Mon, 2 Mar 2026 15:17:55 +0100 Subject: [PATCH 02/10] Update flaky test analysis workflow to include push trigger for specific branch - Added a push trigger for the 'CSPL-3763-add-flay-test-analysis-tool' branch to the flaky test analysis workflow, allowing for automated execution upon code changes. - This change is intended for testing purposes and will be removed before merging. --- .github/workflows/flaky-test-analysis-workflow.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/flaky-test-analysis-workflow.yml b/.github/workflows/flaky-test-analysis-workflow.yml index 3ab09ca51..c5bbf5f37 100644 --- a/.github/workflows/flaky-test-analysis-workflow.yml +++ b/.github/workflows/flaky-test-analysis-workflow.yml @@ -3,6 +3,8 @@ permissions: contents: read actions: read on: + push: + branches: [CSPL-3763-add-flay-test-analysis-tool] # TODO: remove before merging workflow_dispatch: inputs: start_date: From 48bd3ed06a567ceacf390f9537d4fd919beb9250 Mon Sep 17 00:00:00 2001 From: Jakub Buczak Date: Mon, 2 Mar 2026 15:21:53 +0100 Subject: [PATCH 03/10] Remove push trigger for specific branch in flaky test analysis workflow - Eliminated the push trigger for the 'CSPL-3763-add-flay-test-analysis-tool' branch from the flaky test analysis workflow, streamlining the workflow configuration and preparing for final merge. --- .github/workflows/flaky-test-analysis-workflow.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/flaky-test-analysis-workflow.yml b/.github/workflows/flaky-test-analysis-workflow.yml index c5bbf5f37..3ab09ca51 100644 --- a/.github/workflows/flaky-test-analysis-workflow.yml +++ b/.github/workflows/flaky-test-analysis-workflow.yml @@ -3,8 +3,6 @@ permissions: contents: read actions: read on: - push: - branches: [CSPL-3763-add-flay-test-analysis-tool] # TODO: remove before merging workflow_dispatch: inputs: start_date: From 452ac57bf8269a1fadf495e14a58d4ff371656d1 Mon Sep 17 00:00:00 2001 From: Jakub Buczak Date: Mon, 2 Mar 2026 15:38:28 +0100 Subject: [PATCH 04/10] Enhance flaky test analysis workflow to display heatmap images in summary - Updated the workflow to iterate over heatmap images and include them in the GitHub step summary as base64-encoded images, improving visibility of test results. - Retained the push trigger for the 'CSPL-3763-add-flay-test-analysis-tool' branch for testing purposes, with plans to remove it before merging. --- .github/workflows/flaky-test-analysis-workflow.yml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/flaky-test-analysis-workflow.yml b/.github/workflows/flaky-test-analysis-workflow.yml index 3ab09ca51..f725bd658 100644 --- a/.github/workflows/flaky-test-analysis-workflow.yml +++ b/.github/workflows/flaky-test-analysis-workflow.yml @@ -3,6 +3,8 @@ permissions: contents: read actions: read on: + push: + branches: [CSPL-3763-add-flay-test-analysis-tool] # TODO: remove before merging workflow_dispatch: inputs: start_date: @@ -75,10 +77,12 @@ jobs: echo "" - if ls *_flip_rate_*.png 1>/dev/null 2>&1; then - echo "---" - echo "Heatmap uploaded as workflow artifact." - fi + for img in *_flip_rate_*.png; do + [[ -f "$img" ]] || continue + echo "### ${img%.png}" + echo "\"${img}\"" + echo "" + done } >> "$GITHUB_STEP_SUMMARY" - name: Upload results From a2f15f6fe2695758c2f310d1fc7ae89062a3dda2 Mon Sep 17 00:00:00 2001 From: Jakub Buczak Date: Mon, 2 Mar 2026 19:30:31 +0100 Subject: [PATCH 05/10] Remove unused image processing code from flaky test analysis workflow - Eliminated the code block that processed and displayed heatmap images in the GitHub step summary, streamlining the workflow. - Removed the push trigger for the 'CSPL-3763-add-flay-test-analysis-tool' branch, finalizing the workflow configuration for merging. --- .github/workflows/flaky-test-analysis-workflow.yml | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/.github/workflows/flaky-test-analysis-workflow.yml b/.github/workflows/flaky-test-analysis-workflow.yml index f725bd658..c0151ff8e 100644 --- a/.github/workflows/flaky-test-analysis-workflow.yml +++ b/.github/workflows/flaky-test-analysis-workflow.yml @@ -3,8 +3,6 @@ permissions: contents: read actions: read on: - push: - branches: [CSPL-3763-add-flay-test-analysis-tool] # TODO: remove before merging workflow_dispatch: inputs: start_date: @@ -75,14 +73,6 @@ jobs: done fi - echo "" - - for img in *_flip_rate_*.png; do - [[ -f "$img" ]] || continue - echo "### ${img%.png}" - echo "\"${img}\"" - echo "" - done } >> "$GITHUB_STEP_SUMMARY" - name: Upload results From 84118968196c14637e0d12d571e7736a2e365c67 Mon Sep 17 00:00:00 2001 From: Jakub Buczak Date: Tue, 3 Mar 2026 10:06:35 +0100 Subject: [PATCH 06/10] Add test failure statistics generation to flaky test analysis workflow - Introduced a new Python script to analyze JUnit XML reports and generate a Markdown summary of test failure statistics, including failure counts and rates. - Updated the flaky test analysis workflow to include a step for generating failure statistics, ensuring comprehensive reporting of test results. - The workflow now triggers on pushes to the 'CSPL-3763-add-flay-test-analysis-tool' branch, facilitating automated analysis during development. --- .../flaky-test-analysis-workflow.yml | 7 + tools/test-failure-stats.py | 153 ++++++++++++++++++ 2 files changed, 160 insertions(+) create mode 100755 tools/test-failure-stats.py diff --git a/.github/workflows/flaky-test-analysis-workflow.yml b/.github/workflows/flaky-test-analysis-workflow.yml index c0151ff8e..e8a5dc240 100644 --- a/.github/workflows/flaky-test-analysis-workflow.yml +++ b/.github/workflows/flaky-test-analysis-workflow.yml @@ -3,6 +3,9 @@ permissions: contents: read actions: read on: + push: + branches: + - CSPL-3763-add-flay-test-analysis-tool workflow_dispatch: inputs: start_date: @@ -75,6 +78,10 @@ jobs: } >> "$GITHUB_STEP_SUMMARY" + - name: Generate failure stats summary + if: always() + run: python3 tools/test-failure-stats.py ./junit-reports >> "$GITHUB_STEP_SUMMARY" + - name: Upload results if: always() uses: actions/upload-artifact@v4 diff --git a/tools/test-failure-stats.py b/tools/test-failure-stats.py new file mode 100755 index 000000000..255ab1c1b --- /dev/null +++ b/tools/test-failure-stats.py @@ -0,0 +1,153 @@ +#!/usr/bin/env python3 +""" +Analyze JUnit XML reports and produce a Markdown report of test failure statistics. + +Parses all JUnit XML files (downloaded by flaky-test-analysis.sh) and writes +a Markdown file with per-test-case failure counts and rates. + +Usage: + ./tools/test-failure-stats.py [junit-dir] + ./tools/test-failure-stats.py > report.md + + junit-dir Directory with JUnit XML files (default: ./junit-reports) + +Requires: Python 3.8+ (stdlib only). +""" + +import re +import sys +import xml.etree.ElementTree as ET +from collections import defaultdict +from dataclasses import dataclass, field +from pathlib import Path + + +FILENAME_RE = re.compile( + r"^(\d+)-(?:report-junit|unit_test)-(\d{8})-(\d{6})-\d+(?:-(.+))?\.xml$" +) + +INFRA_NAMES = { + "[BeforeSuite]", "[AfterSuite]", "[ReportAfterSuite]", + "[SynchronizedBeforeSuite]", "[SynchronizedAfterSuite]", +} + + +@dataclass +class TestRecord: + runs: int = 0 + failures: int = 0 + timeouts: int = 0 + passes: int = 0 + failure_dates: list = field(default_factory=list) + + +def parse_filename(fname: str): + m = FILENAME_RE.match(fname) + if not m: + return None, None + date_str = m.group(2) + return f"{date_str[:4]}-{date_str[4:6]}-{date_str[6:]}", m.group(4) or "unit_test" + + +def parse_junit_file(filepath: Path): + try: + tree = ET.parse(str(filepath)) + except ET.ParseError: + print(f" WARNING: Could not parse {filepath.name}, skipping", file=sys.stderr) + return + + for tc in tree.iter("testcase"): + name = tc.get("name", "") + if name in INFRA_NAMES: + continue + if tc.get("status") == "skipped" or tc.find("skipped") is not None: + continue + + classname = tc.get("classname", "") + status = tc.get("status", "") + has_failure = tc.find("failure") is not None + yield classname, name, status, has_failure + + +def build_stats(junit_dir: Path): + stats: dict[str, TestRecord] = defaultdict(TestRecord) + files_parsed = 0 + + for fpath in sorted(junit_dir.glob("*.xml")): + date_str, _ = parse_filename(fpath.name) + if date_str is None: + continue + + files_parsed += 1 + for classname, name, status, has_failure in parse_junit_file(fpath): + rec = stats[f"{classname}::{name}"] + rec.runs += 1 + if has_failure: + rec.failures += 1 + if status == "timedout": + rec.timeouts += 1 + rec.failure_dates.append(date_str) + else: + rec.passes += 1 + + return stats, files_parsed + + +def short_name(full_name: str, max_len: int = 120) -> str: + name = re.sub(r"^\[It\]\s*", "", full_name) + if len(name) > max_len: + return name[: max_len - 3] + "..." + return name + + +def write_markdown(stats: dict[str, TestRecord], files_parsed: int): + failing = {k: v for k, v in stats.items() if v.failures > 0} + total_runs = sum(r.runs for r in stats.values()) + + print("# Test Failure Statistics") + print() + print("| Metric | Value |") + print("|--------|-------|") + print(f"| Files parsed | {files_parsed} |") + print(f"| Unique tests | {len(stats)} |") + print(f"| Tests with failures | {len(failing)} |") + print(f"| Total test runs (non-skipped) | {total_runs} |") + print(f"| Total failure occurrences | {sum(r.failures for r in failing.values())} |") + print() + + if not failing: + print("**No test failures found.**") + return + + ranked = sorted(failing.items(), + key=lambda x: (-x[1].failures, -x[1].failures / max(x[1].runs, 1))) + + print("## Failing Tests") + print() + print("| # | Fail | Runs | Rate | Timeouts | Last Failure | Suite | Test |") + print("|--:|-----:|-----:|-----:|---------:|:------------:|:------|:-----|") + + for i, (key, rec) in enumerate(ranked, 1): + classname, name = key.split("::", 1) + rate = rec.failures / rec.runs * 100 if rec.runs > 0 else 0 + last_fail = max(rec.failure_dates) if rec.failure_dates else "n/a" + display = short_name(name).replace("|", "\\|") + print( + f"| {i} | {rec.failures} | {rec.runs} | {rate:.1f}% " + f"| {rec.timeouts} | {last_fail} | `{classname}` | {display} |" + ) + + +def main(): + junit_dir = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("./junit-reports") + + if not junit_dir.is_dir(): + print(f"ERROR: {junit_dir} is not a directory", file=sys.stderr) + sys.exit(1) + + stats, files_parsed = build_stats(junit_dir) + write_markdown(stats, files_parsed) + + +if __name__ == "__main__": + main() From 266327fffed7b081b07d5615e91f2f52a94f5483 Mon Sep 17 00:00:00 2001 From: Jakub Buczak Date: Tue, 3 Mar 2026 10:53:57 +0100 Subject: [PATCH 07/10] Add custom Matplotlib configurations for flaky test analysis - Introduced a new Python script to customize Matplotlib for generating heatmaps in flaky test analysis, enhancing readability with increased font sizes and wrapped y-axis labels. - Updated the flaky test analysis script to incorporate the new Matplotlib configurations, improving the presentation of test results. - Added a new entry to .gitignore to exclude generated JUnit report directories from version control. --- .gitignore | 1 + tools/flaky-test-analysis-mpl-config.py | 43 +++++++++++++++++++++++++ tools/flaky-test-analysis.sh | 10 +++--- 3 files changed, 50 insertions(+), 4 deletions(-) create mode 100644 tools/flaky-test-analysis-mpl-config.py diff --git a/.gitignore b/.gitignore index 5de8f6d85..384bdc255 100644 --- a/.gitignore +++ b/.gitignore @@ -93,6 +93,7 @@ release-* deploy/olm-certified report-junit*.xml unit_test*.xml +junit-reports/ # Bias Language Linter .biaslanguage/ bin/ diff --git a/tools/flaky-test-analysis-mpl-config.py b/tools/flaky-test-analysis-mpl-config.py new file mode 100644 index 000000000..870af4a0c --- /dev/null +++ b/tools/flaky-test-analysis-mpl-config.py @@ -0,0 +1,43 @@ +"""Matplotlib customizations for flaky-test-analysis heatmaps. + +Monkey-patches matplotlib to: +- Increase font sizes for readability +- Wrap long y-axis tick labels +- Override the library's hardcoded title fontsize +""" +import textwrap +import matplotlib as mpl +import matplotlib.pyplot as plt + +WRAP_WIDTH = 60 +TITLE_FONTSIZE = 64 + +mpl.rcParams.update({ + "font.size": 50, + "xtick.labelsize": 40, + "ytick.labelsize": 40, + "axes.labelsize": 60, +}) + +_original_savefig = plt.savefig +_original_title = plt.title + + +def _title_with_fontsize(*args, **kwargs): + kwargs["fontsize"] = TITLE_FONTSIZE + return _original_title(*args, **kwargs) + + +def _savefig_with_wrapped_labels(*args, **kwargs): + fig = plt.gcf() + for ax in fig.axes: + labels = ax.get_yticklabels() + if labels: + ticks = ax.get_yticks() + ax.set_yticks(ticks) + ax.set_yticklabels([textwrap.fill(l.get_text(), WRAP_WIDTH) for l in labels]) + _original_savefig(*args, **kwargs) + + +plt.title = _title_with_fontsize +plt.savefig = _savefig_with_wrapped_labels diff --git a/tools/flaky-test-analysis.sh b/tools/flaky-test-analysis.sh index 086cd6bc6..ff052076b 100755 --- a/tools/flaky-test-analysis.sh +++ b/tools/flaky-test-analysis.sh @@ -125,9 +125,11 @@ mkdir -p "$OUTPUT_DIR" tmpdir=$(mktemp -d) trap 'rm -rf "$tmpdir"' EXIT +dl_current=0 echo "$artifacts_json" | jq -r '.[] | "\(.id)\t\(.name)\t\(.workflow_run_id)"' | \ while IFS=$'\t' read -r art_id art_name run_id; do - echo " Downloading: ${art_name} (run ${run_id})..." + dl_current=$((dl_current + 1)) + echo " [${dl_current}/${artifact_count}] ${art_name} (run ${run_id})..." zipfile="${tmpdir}/${art_id}.zip" if gh api "repos/${REPO}/actions/artifacts/${art_id}/zip" > "$zipfile" 2>/dev/null; then unzip -qo "$zipfile" -d "$tmpdir/extract" 2>/dev/null @@ -151,8 +153,7 @@ echo "" echo "Normalizing classnames (stripping Ginkgo random suffixes)..." for f in "${OUTPUT_DIR}"/*.xml; do [[ -f "$f" ]] || continue - sed -i.bak -E 's/classname="Running (.+)-[a-z0-9]{3}"/classname="\1"/g' "$f" - rm -f "${f}.bak" + sed -E 's/classname="Running (.+)-[a-z0-9]{3}"/classname="\1"/g' "$f" > "${f}.tmp" && mv "${f}.tmp" "$f" done echo "Done." @@ -189,7 +190,8 @@ echo "" RESULTS_FILE="${RESULTS_FILE:-flaky-results.txt}" -"$PYTHON" -m flaky_tests_detection.check_flakes \ +PYTHONPATH="${SCRIPT_DIR}:${PYTHONPATH:-}" "$PYTHON" -c \ + "import importlib; importlib.import_module('flaky-test-analysis-mpl-config'); from flaky_tests_detection.check_flakes import main; main()" \ --junit-files="${OUTPUT_DIR}" \ --grouping-option=days \ --window-size="${WINDOW_SIZE}" \ From 2d6331049cfe78932c04bdbf3ac486290b446bab Mon Sep 17 00:00:00 2001 From: Jakub Buczak Date: Tue, 3 Mar 2026 11:10:35 +0100 Subject: [PATCH 08/10] Enhance flaky test analysis workflow with output variables and naming improvements - Added output variables for start and end dates in the flaky test analysis workflow, improving the clarity of date ranges in job summaries. - Refactored the naming function in the test failure statistics script to improve readability and maintainability. - Updated the artifact upload step to include dynamic naming based on the date range, enhancing the organization of test results. --- .github/workflows/flaky-test-analysis-workflow.yml | 10 +++++++--- tools/test-failure-stats.py | 9 +++------ 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/.github/workflows/flaky-test-analysis-workflow.yml b/.github/workflows/flaky-test-analysis-workflow.yml index e8a5dc240..64f898a2c 100644 --- a/.github/workflows/flaky-test-analysis-workflow.yml +++ b/.github/workflows/flaky-test-analysis-workflow.yml @@ -36,6 +36,7 @@ jobs: run: pip install flaky-tests-detection - name: Run flaky test analysis + id: run env: GH_TOKEN: ${{ github.token }} START_DATE: ${{ inputs.start_date }} @@ -43,7 +44,10 @@ jobs: TOP_N: ${{ inputs.top_n }} WINDOW_SIZE: ${{ inputs.window_size }} RESULTS_FILE: flaky-results.txt - run: ./tools/flaky-test-analysis.sh + run: | + ./tools/flaky-test-analysis.sh + echo "start=${START_DATE:-$(date -u -d '7 days ago' +%Y-%m-%d)}" >> "$GITHUB_OUTPUT" + echo "end=${END_DATE:-$(date -u -d 'yesterday' +%Y-%m-%d)}" >> "$GITHUB_OUTPUT" - name: Generate job summary if: always() @@ -51,7 +55,7 @@ jobs: { echo "## Flaky Test Analysis" echo "" - echo "**Date range:** \`${START_DATE:-last 7 days}\` .. \`${END_DATE:-yesterday}\`" + echo "**Date range:** \`${{ steps.run.outputs.start }}\` .. \`${{ steps.run.outputs.end }}\`" echo "**Top N:** ${{ inputs.top_n || '20' }} | **Window size:** ${{ inputs.window_size || '1' }} day(s)" echo "" @@ -86,7 +90,7 @@ jobs: if: always() uses: actions/upload-artifact@v4 with: - name: flaky-test-results + name: flaky-test-results-${{ steps.run.outputs.start }}-to-${{ steps.run.outputs.end }} path: | flaky-results.txt *_flip_rate_*.png diff --git a/tools/test-failure-stats.py b/tools/test-failure-stats.py index 255ab1c1b..8affba488 100755 --- a/tools/test-failure-stats.py +++ b/tools/test-failure-stats.py @@ -93,11 +93,8 @@ def build_stats(junit_dir: Path): return stats, files_parsed -def short_name(full_name: str, max_len: int = 120) -> str: - name = re.sub(r"^\[It\]\s*", "", full_name) - if len(name) > max_len: - return name[: max_len - 3] + "..." - return name +def clean_name(full_name: str) -> str: + return re.sub(r"^\[It\]\s*", "", full_name) def write_markdown(stats: dict[str, TestRecord], files_parsed: int): @@ -131,7 +128,7 @@ def write_markdown(stats: dict[str, TestRecord], files_parsed: int): classname, name = key.split("::", 1) rate = rec.failures / rec.runs * 100 if rec.runs > 0 else 0 last_fail = max(rec.failure_dates) if rec.failure_dates else "n/a" - display = short_name(name).replace("|", "\\|") + display = clean_name(name).replace("|", "\\|") print( f"| {i} | {rec.failures} | {rec.runs} | {rate:.1f}% " f"| {rec.timeouts} | {last_fail} | `{classname}` | {display} |" From 8d5208b66220b0a55e73f3eb51ab1eca9db5841e Mon Sep 17 00:00:00 2001 From: Jakub Buczak Date: Fri, 6 Mar 2026 11:36:22 +0100 Subject: [PATCH 09/10] Add branch filtering option to flaky test analysis workflow and script - Introduced a new input parameter for the flaky test analysis workflow to allow users to specify a branch for analysis, enhancing flexibility in test result reporting. - Updated the flaky test analysis script to accept a branch argument, enabling focused analysis on specific branches. - Enhanced output messages to include branch information when specified, improving clarity in the analysis results. --- .../flaky-test-analysis-workflow.yml | 11 +++++++- tools/flaky-test-analysis.sh | 25 ++++++++++++++++--- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/.github/workflows/flaky-test-analysis-workflow.yml b/.github/workflows/flaky-test-analysis-workflow.yml index 64f898a2c..6975d1636 100644 --- a/.github/workflows/flaky-test-analysis-workflow.yml +++ b/.github/workflows/flaky-test-analysis-workflow.yml @@ -22,6 +22,9 @@ on: description: 'Window size in days for flip rate calculation' required: false default: '1' + branch: + description: 'Only include runs from this branch (e.g. develop). All branches if empty.' + required: false jobs: analyze: runs-on: ubuntu-latest @@ -43,6 +46,7 @@ jobs: END_DATE: ${{ inputs.end_date }} TOP_N: ${{ inputs.top_n }} WINDOW_SIZE: ${{ inputs.window_size }} + BRANCH: ${{ inputs.branch }} RESULTS_FILE: flaky-results.txt run: | ./tools/flaky-test-analysis.sh @@ -56,7 +60,12 @@ jobs: echo "## Flaky Test Analysis" echo "" echo "**Date range:** \`${{ steps.run.outputs.start }}\` .. \`${{ steps.run.outputs.end }}\`" - echo "**Top N:** ${{ inputs.top_n || '20' }} | **Window size:** ${{ inputs.window_size || '1' }} day(s)" + BRANCH_INFO="${{ inputs.branch }}" + if [[ -n "$BRANCH_INFO" ]]; then + echo "**Branch:** \`${BRANCH_INFO}\` | **Top N:** ${{ inputs.top_n || '20' }} | **Window size:** ${{ inputs.window_size || '1' }} day(s)" + else + echo "**Top N:** ${{ inputs.top_n || '20' }} | **Window size:** ${{ inputs.window_size || '1' }} day(s)" + fi echo "" if [[ ! -f flaky-results.txt ]]; then diff --git a/tools/flaky-test-analysis.sh b/tools/flaky-test-analysis.sh index ff052076b..11cc57def 100755 --- a/tools/flaky-test-analysis.sh +++ b/tools/flaky-test-analysis.sh @@ -3,13 +3,14 @@ # Download JUnit test report artifacts from GitHub Actions and run flaky test detection. # # Usage: -# ./tools/flaky-test-analysis.sh [start-date] [end-date] [--dry-run] [--skip-analysis] +# ./tools/flaky-test-analysis.sh [start-date] [end-date] [--branch ] [--dry-run] [--skip-analysis] # # Dates default to the last 7 full days (excluding today) if not provided. # # Examples: # ./tools/flaky-test-analysis.sh # last 7 full days # ./tools/flaky-test-analysis.sh 2026-02-01 2026-02-26 +# ./tools/flaky-test-analysis.sh --branch develop # only runs on develop # ./tools/flaky-test-analysis.sh --dry-run # preview only # ./tools/flaky-test-analysis.sh --skip-analysis # download only, no flaky detection # @@ -31,11 +32,20 @@ fi DRY_RUN="${DRY_RUN:-false}" SKIP_ANALYSIS="${SKIP_ANALYSIS:-false}" +BRANCH="${BRANCH:-}" START_DATE="${START_DATE:-}" END_DATE="${END_DATE:-}" +_next_is_branch=false for arg in "$@"; do - if [[ "$arg" == "--dry-run" ]]; then + if [[ "$_next_is_branch" == "true" ]]; then + BRANCH="$arg" + _next_is_branch=false + elif [[ "$arg" == "--branch" ]]; then + _next_is_branch=true + elif [[ "$arg" == --branch=* ]]; then + BRANCH="${arg#--branch=}" + elif [[ "$arg" == "--dry-run" ]]; then DRY_RUN=true elif [[ "$arg" == "--skip-analysis" ]]; then SKIP_ANALYSIS=true @@ -66,11 +76,18 @@ fi echo "Repository: $REPO" echo "Date range: $START_DATE .. $END_DATE" +if [[ -n "$BRANCH" ]]; then + echo "Branch: $BRANCH" +fi echo "Output: $OUTPUT_DIR" echo "" -ART_FILTER=".artifacts[] | select(.name | test(\"${ARTIFACT_PATTERN}\")) | select(.expired == false)" -DATE_FILTER="select(.created_at >= \"${START_DATE}T00:00:00Z\" and .created_at <= \"${END_DATE}T23:59:59Z\")" +BRANCH_FILTER="" +if [[ -n "$BRANCH" ]]; then + BRANCH_FILTER="| select(.workflow_run.head_branch == \"${BRANCH}\")" +fi +ART_FILTER=".artifacts[] | select(.name | test(\"${ARTIFACT_PATTERN}\")) | select(.expired == false) ${BRANCH_FILTER}" +DATE_FI LTER="select(.created_at >= \"${START_DATE}T00:00:00Z\" and .created_at <= \"${END_DATE}T23:59:59Z\")" echo "Fetching artifact list..." From c5c2e7df2986022a65e26f71bf2787d7ff350237 Mon Sep 17 00:00:00 2001 From: Jakub Buczak Date: Fri, 6 Mar 2026 11:42:30 +0100 Subject: [PATCH 10/10] Fix typo in flaky test analysis script for date filtering - Corrected a typo in the variable name from DATE_FI LTER to DATE_FILTER, ensuring proper functionality in filtering artifacts by creation date. --- tools/flaky-test-analysis.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/flaky-test-analysis.sh b/tools/flaky-test-analysis.sh index 11cc57def..731bfa179 100755 --- a/tools/flaky-test-analysis.sh +++ b/tools/flaky-test-analysis.sh @@ -87,7 +87,7 @@ if [[ -n "$BRANCH" ]]; then BRANCH_FILTER="| select(.workflow_run.head_branch == \"${BRANCH}\")" fi ART_FILTER=".artifacts[] | select(.name | test(\"${ARTIFACT_PATTERN}\")) | select(.expired == false) ${BRANCH_FILTER}" -DATE_FI LTER="select(.created_at >= \"${START_DATE}T00:00:00Z\" and .created_at <= \"${END_DATE}T23:59:59Z\")" +DATE_FILTER="select(.created_at >= \"${START_DATE}T00:00:00Z\" and .created_at <= \"${END_DATE}T23:59:59Z\")" echo "Fetching artifact list..."