diff --git a/.github/workflows/flaky-test-analysis-workflow.yml b/.github/workflows/flaky-test-analysis-workflow.yml new file mode 100644 index 000000000..6975d1636 --- /dev/null +++ b/.github/workflows/flaky-test-analysis-workflow.yml @@ -0,0 +1,107 @@ +name: Flaky Test Analysis +permissions: + contents: read + actions: read +on: + push: + branches: + - CSPL-3763-add-flay-test-analysis-tool + workflow_dispatch: + inputs: + start_date: + description: 'Start date (YYYY-MM-DD). Defaults to 7 days before yesterday.' + required: false + end_date: + description: 'End date (YYYY-MM-DD). Defaults to yesterday.' + required: false + top_n: + description: 'Number of flakiest tests to report' + required: false + default: '20' + window_size: + description: 'Window size in days for flip rate calculation' + required: false + default: '1' + branch: + description: 'Only include runs from this branch (e.g. develop). All branches if empty.' + required: false +jobs: + analyze: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Install dependencies + run: pip install flaky-tests-detection + + - name: Run flaky test analysis + id: run + env: + GH_TOKEN: ${{ github.token }} + START_DATE: ${{ inputs.start_date }} + END_DATE: ${{ inputs.end_date }} + TOP_N: ${{ inputs.top_n }} + WINDOW_SIZE: ${{ inputs.window_size }} + BRANCH: ${{ inputs.branch }} + RESULTS_FILE: flaky-results.txt + run: | + ./tools/flaky-test-analysis.sh + echo "start=${START_DATE:-$(date -u -d '7 days ago' +%Y-%m-%d)}" >> "$GITHUB_OUTPUT" + echo "end=${END_DATE:-$(date -u -d 'yesterday' +%Y-%m-%d)}" >> "$GITHUB_OUTPUT" + + - name: Generate job summary + if: always() + run: | + { + echo "## Flaky Test Analysis" + echo "" + echo "**Date range:** \`${{ steps.run.outputs.start }}\` .. \`${{ steps.run.outputs.end }}\`" + BRANCH_INFO="${{ inputs.branch }}" + if [[ -n "$BRANCH_INFO" ]]; then + echo "**Branch:** \`${BRANCH_INFO}\` | **Top N:** ${{ inputs.top_n || '20' }} | **Window size:** ${{ inputs.window_size || '1' }} day(s)" + else + echo "**Top N:** ${{ inputs.top_n || '20' }} | **Window size:** ${{ inputs.window_size || '1' }} day(s)" + fi + echo "" + + if [[ ! -f flaky-results.txt ]]; then + echo "> No results produced. Check the workflow logs." + exit 0 + fi + + # Extract the score lines (format: suite::[It] description --- score: N) + flaky_lines=$(grep -E ' --- score: ' flaky-results.txt || true) + + if [[ -z "$flaky_lines" ]]; then + echo "> No flaky tests detected in this period." + else + echo "| Score | Suite | Test |" + echo "|------:|-------|------|" + echo "$flaky_lines" | while IFS= read -r line; do + score=$(echo "$line" | sed -E 's/.* --- score: (.+)/\1/') + suite=$(echo "$line" | sed -E 's/^([^:]+)::.*/\1/') + test_name=$(echo "$line" | sed -E 's/^[^:]+::\[It\] (.*) --- score:.*/\1/') + echo "| ${score} | ${suite} | ${test_name} |" + done + fi + + } >> "$GITHUB_STEP_SUMMARY" + + - name: Generate failure stats summary + if: always() + run: python3 tools/test-failure-stats.py ./junit-reports >> "$GITHUB_STEP_SUMMARY" + + - name: Upload results + if: always() + uses: actions/upload-artifact@v4 + with: + name: flaky-test-results-${{ steps.run.outputs.start }}-to-${{ steps.run.outputs.end }} + path: | + flaky-results.txt + *_flip_rate_*.png + if-no-files-found: ignore + retention-days: 30 diff --git a/.gitignore b/.gitignore index 5de8f6d85..384bdc255 100644 --- a/.gitignore +++ b/.gitignore @@ -93,6 +93,7 @@ release-* deploy/olm-certified report-junit*.xml unit_test*.xml +junit-reports/ # Bias Language Linter .biaslanguage/ bin/ diff --git a/tools/flaky-test-analysis-mpl-config.py b/tools/flaky-test-analysis-mpl-config.py new file mode 100644 index 000000000..870af4a0c --- /dev/null +++ b/tools/flaky-test-analysis-mpl-config.py @@ -0,0 +1,43 @@ +"""Matplotlib customizations for flaky-test-analysis heatmaps. + +Monkey-patches matplotlib to: +- Increase font sizes for readability +- Wrap long y-axis tick labels +- Override the library's hardcoded title fontsize +""" +import textwrap +import matplotlib as mpl +import matplotlib.pyplot as plt + +WRAP_WIDTH = 60 +TITLE_FONTSIZE = 64 + +mpl.rcParams.update({ + "font.size": 50, + "xtick.labelsize": 40, + "ytick.labelsize": 40, + "axes.labelsize": 60, +}) + +_original_savefig = plt.savefig +_original_title = plt.title + + +def _title_with_fontsize(*args, **kwargs): + kwargs["fontsize"] = TITLE_FONTSIZE + return _original_title(*args, **kwargs) + + +def _savefig_with_wrapped_labels(*args, **kwargs): + fig = plt.gcf() + for ax in fig.axes: + labels = ax.get_yticklabels() + if labels: + ticks = ax.get_yticks() + ax.set_yticks(ticks) + ax.set_yticklabels([textwrap.fill(l.get_text(), WRAP_WIDTH) for l in labels]) + _original_savefig(*args, **kwargs) + + +plt.title = _title_with_fontsize +plt.savefig = _savefig_with_wrapped_labels diff --git a/tools/flaky-test-analysis.sh b/tools/flaky-test-analysis.sh new file mode 100755 index 000000000..731bfa179 --- /dev/null +++ b/tools/flaky-test-analysis.sh @@ -0,0 +1,218 @@ +#!/usr/bin/env bash +# +# Download JUnit test report artifacts from GitHub Actions and run flaky test detection. +# +# Usage: +# ./tools/flaky-test-analysis.sh [start-date] [end-date] [--branch ] [--dry-run] [--skip-analysis] +# +# Dates default to the last 7 full days (excluding today) if not provided. +# +# Examples: +# ./tools/flaky-test-analysis.sh # last 7 full days +# ./tools/flaky-test-analysis.sh 2026-02-01 2026-02-26 +# ./tools/flaky-test-analysis.sh --branch develop # only runs on develop +# ./tools/flaky-test-analysis.sh --dry-run # preview only +# ./tools/flaky-test-analysis.sh --skip-analysis # download only, no flaky detection +# +# Requires: gh (GitHub CLI), authenticated via 'gh auth login' or GH_TOKEN. +# Optional: flaky-tests-detection (pip install flaky-tests-detection) +# +set -euo pipefail + +REPO="${REPO:-splunk/splunk-operator}" +OUTPUT_DIR="${OUTPUT_DIR:-./junit-reports}" +ARTIFACT_PATTERN="${ARTIFACT_PATTERN:-^test-report-.*}" +TOP_N="${TOP_N:-20}" +WINDOW_SIZE="${WINDOW_SIZE:-1}" + +if [[ "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then + sed -n '2,17p' "$0" | sed 's/^# \?//' + exit 0 +fi + +DRY_RUN="${DRY_RUN:-false}" +SKIP_ANALYSIS="${SKIP_ANALYSIS:-false}" +BRANCH="${BRANCH:-}" +START_DATE="${START_DATE:-}" +END_DATE="${END_DATE:-}" + +_next_is_branch=false +for arg in "$@"; do + if [[ "$_next_is_branch" == "true" ]]; then + BRANCH="$arg" + _next_is_branch=false + elif [[ "$arg" == "--branch" ]]; then + _next_is_branch=true + elif [[ "$arg" == --branch=* ]]; then + BRANCH="${arg#--branch=}" + elif [[ "$arg" == "--dry-run" ]]; then + DRY_RUN=true + elif [[ "$arg" == "--skip-analysis" ]]; then + SKIP_ANALYSIS=true + elif [[ -z "$START_DATE" ]]; then + START_DATE="$arg" + elif [[ -z "$END_DATE" ]]; then + END_DATE="$arg" + fi +done + +END_DATE="${END_DATE:-$(date -u -v-1d +%Y-%m-%d 2>/dev/null || date -u -d 'yesterday' +%Y-%m-%d)}" +START_DATE="${START_DATE:-$(date -u -v-7d +%Y-%m-%d 2>/dev/null || date -u -d '7 days ago' +%Y-%m-%d)}" + +# Derive WINDOW_COUNT from the date range (one window per WINDOW_SIZE days) +if date -v+0d +%s &>/dev/null; then + _start_epoch=$(date -jf "%Y-%m-%d" "$START_DATE" +%s) + _end_epoch=$(date -jf "%Y-%m-%d" "$END_DATE" +%s) +else + _start_epoch=$(date -d "$START_DATE" +%s) + _end_epoch=$(date -d "$END_DATE" +%s) +fi +WINDOW_COUNT=$(( ((_end_epoch - _start_epoch) / 86400 + WINDOW_SIZE) / WINDOW_SIZE )) + +if ! command -v gh &>/dev/null; then + echo "ERROR: 'gh' (GitHub CLI) is required. Install from https://cli.github.com/" >&2 + exit 1 +fi + +echo "Repository: $REPO" +echo "Date range: $START_DATE .. $END_DATE" +if [[ -n "$BRANCH" ]]; then + echo "Branch: $BRANCH" +fi +echo "Output: $OUTPUT_DIR" +echo "" + +BRANCH_FILTER="" +if [[ -n "$BRANCH" ]]; then + BRANCH_FILTER="| select(.workflow_run.head_branch == \"${BRANCH}\")" +fi +ART_FILTER=".artifacts[] | select(.name | test(\"${ARTIFACT_PATTERN}\")) | select(.expired == false) ${BRANCH_FILTER}" +DATE_FILTER="select(.created_at >= \"${START_DATE}T00:00:00Z\" and .created_at <= \"${END_DATE}T23:59:59Z\")" + +echo "Fetching artifact list..." + +artifacts_json="[]" +page=1 +while true; do + response=$(gh api "repos/${REPO}/actions/artifacts?per_page=100&page=${page}" 2>/dev/null) + + page_artifacts=$(echo "$response" | \ + jq "[${ART_FILTER} | ${DATE_FILTER} | {id, name, created_at, workflow_run_id: .workflow_run.id}]") + artifacts_json=$(echo "$artifacts_json" "$page_artifacts" | jq -s 'add') + + oldest=$(echo "$response" | jq -r '.artifacts[-1].created_at // empty') + if [[ -z "$oldest" || "$oldest" < "${START_DATE}T00:00:00Z" ]]; then + break + fi + + count=$(echo "$response" | jq '.artifacts | length') + if [[ "$count" -lt 100 ]]; then + break + fi + + page=$((page + 1)) +done + +artifact_count=$(echo "$artifacts_json" | jq 'length') +echo "Found $artifact_count matching artifacts." +echo "" + +if [[ "$artifact_count" -eq 0 ]]; then + echo "No artifacts matched. Check your date range and artifact retention settings." + exit 0 +fi + +echo "--------------------------------------------------------------" +printf "%-12s %-50s %s\n" "RUN ID" "ARTIFACT NAME" "CREATED" +echo "--------------------------------------------------------------" +echo "$artifacts_json" | jq -r '.[] | "\(.workflow_run_id)\t\(.name)\t\(.created_at)"' | \ + while IFS=$'\t' read -r run_id name created; do + printf "%-12s %-50s %s\n" "$run_id" "$name" "${created%%T*}" + done +echo "--------------------------------------------------------------" +echo "" + +if [[ "$DRY_RUN" == "true" ]]; then + echo "(dry run - skipping downloads)" + exit 0 +fi + +mkdir -p "$OUTPUT_DIR" + +tmpdir=$(mktemp -d) +trap 'rm -rf "$tmpdir"' EXIT + +dl_current=0 +echo "$artifacts_json" | jq -r '.[] | "\(.id)\t\(.name)\t\(.workflow_run_id)"' | \ + while IFS=$'\t' read -r art_id art_name run_id; do + dl_current=$((dl_current + 1)) + echo " [${dl_current}/${artifact_count}] ${art_name} (run ${run_id})..." + zipfile="${tmpdir}/${art_id}.zip" + if gh api "repos/${REPO}/actions/artifacts/${art_id}/zip" > "$zipfile" 2>/dev/null; then + unzip -qo "$zipfile" -d "$tmpdir/extract" 2>/dev/null + for f in "$tmpdir/extract"/*.xml; do + [[ -f "$f" ]] || continue + base=$(basename "$f" .xml) + mv "$f" "${OUTPUT_DIR}/${run_id}-${base}.xml" + done + rm -rf "$tmpdir/extract" "$zipfile" + else + echo " FAILED to download artifact ${art_id}" + rm -f "$zipfile" + fi + done + +total_files=$(find "$OUTPUT_DIR" -name '*.xml' 2>/dev/null | wc -l | tr -d ' ') +echo "" +echo "Done. ${total_files} XML files saved to ${OUTPUT_DIR}/" + +echo "" +echo "Normalizing classnames (stripping Ginkgo random suffixes)..." +for f in "${OUTPUT_DIR}"/*.xml; do + [[ -f "$f" ]] || continue + sed -E 's/classname="Running (.+)-[a-z0-9]{3}"/classname="\1"/g' "$f" > "${f}.tmp" && mv "${f}.tmp" "$f" +done +echo "Done." + +if [[ "$SKIP_ANALYSIS" == "true" ]]; then + exit 0 +fi + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +PYTHON="" +if [[ -x "${SCRIPT_DIR}/.venv/bin/python" ]]; then + PYTHON="${SCRIPT_DIR}/.venv/bin/python" +elif command -v python3 &>/dev/null; then + PYTHON="python3" +else + echo "" + echo "Python not found. Install with: cd tools && poetry install" + exit 0 +fi + +if ! "$PYTHON" -c "from flaky_tests_detection.check_flakes import main" &>/dev/null; then + echo "" + echo "flaky-tests-detection not installed. Install with: cd tools && poetry install" + exit 0 +fi + +echo "" +echo "================================================================" +echo "Running flaky test detection..." +echo " Window size: ${WINDOW_SIZE} days" +echo " Window count: ${WINDOW_COUNT}" +echo " Top N: ${TOP_N}" +echo "================================================================" +echo "" + +RESULTS_FILE="${RESULTS_FILE:-flaky-results.txt}" + +PYTHONPATH="${SCRIPT_DIR}:${PYTHONPATH:-}" "$PYTHON" -c \ + "import importlib; importlib.import_module('flaky-test-analysis-mpl-config'); from flaky_tests_detection.check_flakes import main; main()" \ + --junit-files="${OUTPUT_DIR}" \ + --grouping-option=days \ + --window-size="${WINDOW_SIZE}" \ + --window-count="${WINDOW_COUNT}" \ + --top-n="${TOP_N}" \ + --heatmap \ + 2>&1 | tee "$RESULTS_FILE" diff --git a/tools/test-failure-stats.py b/tools/test-failure-stats.py new file mode 100755 index 000000000..8affba488 --- /dev/null +++ b/tools/test-failure-stats.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python3 +""" +Analyze JUnit XML reports and produce a Markdown report of test failure statistics. + +Parses all JUnit XML files (downloaded by flaky-test-analysis.sh) and writes +a Markdown file with per-test-case failure counts and rates. + +Usage: + ./tools/test-failure-stats.py [junit-dir] + ./tools/test-failure-stats.py > report.md + + junit-dir Directory with JUnit XML files (default: ./junit-reports) + +Requires: Python 3.8+ (stdlib only). +""" + +import re +import sys +import xml.etree.ElementTree as ET +from collections import defaultdict +from dataclasses import dataclass, field +from pathlib import Path + + +FILENAME_RE = re.compile( + r"^(\d+)-(?:report-junit|unit_test)-(\d{8})-(\d{6})-\d+(?:-(.+))?\.xml$" +) + +INFRA_NAMES = { + "[BeforeSuite]", "[AfterSuite]", "[ReportAfterSuite]", + "[SynchronizedBeforeSuite]", "[SynchronizedAfterSuite]", +} + + +@dataclass +class TestRecord: + runs: int = 0 + failures: int = 0 + timeouts: int = 0 + passes: int = 0 + failure_dates: list = field(default_factory=list) + + +def parse_filename(fname: str): + m = FILENAME_RE.match(fname) + if not m: + return None, None + date_str = m.group(2) + return f"{date_str[:4]}-{date_str[4:6]}-{date_str[6:]}", m.group(4) or "unit_test" + + +def parse_junit_file(filepath: Path): + try: + tree = ET.parse(str(filepath)) + except ET.ParseError: + print(f" WARNING: Could not parse {filepath.name}, skipping", file=sys.stderr) + return + + for tc in tree.iter("testcase"): + name = tc.get("name", "") + if name in INFRA_NAMES: + continue + if tc.get("status") == "skipped" or tc.find("skipped") is not None: + continue + + classname = tc.get("classname", "") + status = tc.get("status", "") + has_failure = tc.find("failure") is not None + yield classname, name, status, has_failure + + +def build_stats(junit_dir: Path): + stats: dict[str, TestRecord] = defaultdict(TestRecord) + files_parsed = 0 + + for fpath in sorted(junit_dir.glob("*.xml")): + date_str, _ = parse_filename(fpath.name) + if date_str is None: + continue + + files_parsed += 1 + for classname, name, status, has_failure in parse_junit_file(fpath): + rec = stats[f"{classname}::{name}"] + rec.runs += 1 + if has_failure: + rec.failures += 1 + if status == "timedout": + rec.timeouts += 1 + rec.failure_dates.append(date_str) + else: + rec.passes += 1 + + return stats, files_parsed + + +def clean_name(full_name: str) -> str: + return re.sub(r"^\[It\]\s*", "", full_name) + + +def write_markdown(stats: dict[str, TestRecord], files_parsed: int): + failing = {k: v for k, v in stats.items() if v.failures > 0} + total_runs = sum(r.runs for r in stats.values()) + + print("# Test Failure Statistics") + print() + print("| Metric | Value |") + print("|--------|-------|") + print(f"| Files parsed | {files_parsed} |") + print(f"| Unique tests | {len(stats)} |") + print(f"| Tests with failures | {len(failing)} |") + print(f"| Total test runs (non-skipped) | {total_runs} |") + print(f"| Total failure occurrences | {sum(r.failures for r in failing.values())} |") + print() + + if not failing: + print("**No test failures found.**") + return + + ranked = sorted(failing.items(), + key=lambda x: (-x[1].failures, -x[1].failures / max(x[1].runs, 1))) + + print("## Failing Tests") + print() + print("| # | Fail | Runs | Rate | Timeouts | Last Failure | Suite | Test |") + print("|--:|-----:|-----:|-----:|---------:|:------------:|:------|:-----|") + + for i, (key, rec) in enumerate(ranked, 1): + classname, name = key.split("::", 1) + rate = rec.failures / rec.runs * 100 if rec.runs > 0 else 0 + last_fail = max(rec.failure_dates) if rec.failure_dates else "n/a" + display = clean_name(name).replace("|", "\\|") + print( + f"| {i} | {rec.failures} | {rec.runs} | {rate:.1f}% " + f"| {rec.timeouts} | {last_fail} | `{classname}` | {display} |" + ) + + +def main(): + junit_dir = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("./junit-reports") + + if not junit_dir.is_dir(): + print(f"ERROR: {junit_dir} is not a directory", file=sys.stderr) + sys.exit(1) + + stats, files_parsed = build_stats(junit_dir) + write_markdown(stats, files_parsed) + + +if __name__ == "__main__": + main()