From 930181c46a38c4eaea61956951e40065c1f21190 Mon Sep 17 00:00:00 2001 From: Remy DUTHU Date: Fri, 10 Apr 2026 16:41:56 +0200 Subject: [PATCH] docs(test-insights): Add Test Insights documentation section Add overview, Prevention, Detection, and Mitigation pages covering the full test reliability lifecycle. Add navigation entry after CI Insights. Fixes: MRGFY-6885 Co-Authored-By: Claude Opus 4.6 (1M context) Change-Id: I648e33db87d7fd6a76bf6db2ad4da994e6a579e9 --- src/content/docs/test-insights.mdx | 55 +++++++++++++ src/content/docs/test-insights/detection.mdx | 74 +++++++++++++++++ src/content/docs/test-insights/mitigation.mdx | 81 +++++++++++++++++++ src/content/docs/test-insights/prevention.mdx | 73 +++++++++++++++++ src/content/navItems.tsx | 11 +++ 5 files changed, 294 insertions(+) create mode 100644 src/content/docs/test-insights.mdx create mode 100644 src/content/docs/test-insights/detection.mdx create mode 100644 src/content/docs/test-insights/mitigation.mdx create mode 100644 src/content/docs/test-insights/prevention.mdx diff --git a/src/content/docs/test-insights.mdx b/src/content/docs/test-insights.mdx new file mode 100644 index 0000000000..3d8ec3070f --- /dev/null +++ b/src/content/docs/test-insights.mdx @@ -0,0 +1,55 @@ +--- +title: Test Insights +description: Monitor, detect, and manage unreliable tests across your repositories. +--- + +Test Insights helps you manage test reliability across the full lifecycle. +It catches flaky tests on pull requests before they merge, surfaces unhealthy +tests across your repositories, and lets you quarantine problematic tests so +they don't block your CI pipeline. + +## How it works + +Test Insights is organized into three phases that follow the natural lifecycle +of a test reliability problem: + +1. **[Prevention](/test-insights/prevention)**: Catch flaky and broken tests + on pull requests before they reach your codebase. Mergify reruns tests on + PRs to detect inconsistent behavior early. + +2. **[Detection](/test-insights/detection)**: Identify and prioritize + unhealthy tests across your repositories. See which tests are flaky or + broken, and focus on the ones with the most impact. + +3. **[Mitigation](/test-insights/mitigation)**: Quarantine problematic tests + to unblock CI without removing them. Tests keep running, but their failures + no longer block merges. + +## Key concepts + +- **Flaky test**: A test that produces different results on the same commit. + For example, passing on one run and failing on the next with identical code. + +- **Broken test**: A test that fails consistently, with recent runs weighted + more heavily. + +- **Health status**: A test's reliability classification: healthy, flaky, or + broken. Based on results from multiple CI runs. + +- **Confidence**: How much data is available to assess a test's health. Low + confidence means the status could still change significantly as more runs + are collected. + +- **Quarantine**: Isolating a test so its failures are ignored for merge + decisions. The test still runs and results are still collected, preserving + full visibility. + +## Setup + +Test Insights is powered by the same CI integration as +[CI Insights](/ci-insights). To get started, configure your CI system and test +framework: + +- [GitHub Actions setup](/ci-insights/setup/github-actions) +- [Jenkins setup](/ci-insights/setup/jenkins) +- [Test framework configuration](/ci-insights#test-framework-configuration) diff --git a/src/content/docs/test-insights/detection.mdx b/src/content/docs/test-insights/detection.mdx new file mode 100644 index 0000000000..275d57f55b --- /dev/null +++ b/src/content/docs/test-insights/detection.mdx @@ -0,0 +1,74 @@ +--- +title: Detection +description: Identify and prioritize unhealthy tests across your repositories. +--- + +Even with prevention in place, tests can degrade over time. Detection surfaces +all unhealthy tests (flaky and broken) across your repositories, so you can +see the full picture and prioritize what to fix. + +## How tests are classified + +Mergify classifies tests based on their results across multiple CI runs, +with recent results weighted more heavily: + +- **Flaky**: The test produces inconsistent results on the same commit. It + passes on some runs and fails on others, without any code changes. + +- **Broken**: The test fails consistently. Recent runs are weighted more + heavily, so a test that started failing recently will be classified as + broken even if it passed in earlier runs. + +Only unhealthy tests (flaky or broken) appear in Detection. Healthy tests +are not listed. + +## Understanding confidence + +Confidence indicates how much data is available to assess a test's health. + +- **High confidence**: Enough runs have been collected to make a reliable + assessment. The health status is unlikely to change significantly. + +- **Low confidence**: Limited data is available. The health status could + still shift as more runs are collected. Treat low-confidence results as + preliminary. + +Confidence increases as more CI runs are collected for a given test. + +## Prioritizing with impact + +The impact metric reflects how many failed executions a test causes. A +high-impact flaky test wastes more CI time and disrupts more workflows than +a low-impact one. + +Use impact to decide which tests to fix first: high-impact tests give you +the most return on investment when fixed. + +## Practical workflows + +### Finding your worst tests + +Sort by impact to surface the tests causing the most CI disruption. These +are the best candidates for immediate attention. + +### Narrowing scope + +Use filters to focus on specific areas: + +- **Test name**: Search for a specific test or pattern +- **Job name**: Focus on tests within a particular CI job +- **Pipeline name**: Narrow to a specific CI pipeline + +### Checking quarantine status + +Tests that have already been quarantined are indicated in the health status. +This helps you avoid spending time investigating tests that are already being +managed through [Mitigation](/test-insights/mitigation). + +## Setup + +Detection requires test metrics collection through repeated CI runs. See the +CI setup guides for your platform: + +- [GitHub Actions setup](/ci-insights/setup/github-actions) +- [Jenkins setup](/ci-insights/setup/jenkins) diff --git a/src/content/docs/test-insights/mitigation.mdx b/src/content/docs/test-insights/mitigation.mdx new file mode 100644 index 0000000000..6a5ed3da25 --- /dev/null +++ b/src/content/docs/test-insights/mitigation.mdx @@ -0,0 +1,81 @@ +--- +title: Mitigation +description: Quarantine problematic tests to unblock CI without losing visibility. +--- + +When a flaky or broken test blocks CI, teams face a tough choice: fix it +immediately, delete it, or ignore it. Quarantine offers a better option. The +test keeps running, but its failures no longer block merges. You maintain full +visibility without disruption. + +## How quarantine works + +A quarantined test still executes in your CI pipeline and its results are +still collected by Mergify. The difference is that failures are ignored for +merge decisions. + +This means: + +- Your CI stays green while you work on a fix + +- Historical data is preserved, so you can track whether the test improves + or worsens over time + +- Other team members can see the test is quarantined and why + +Quarantine works on any branch, not just the default branch. + +:::note + Quarantined tests must still be uploaded through one of the supported CI + integrations. See the + [test framework configuration](/ci-insights#test-framework-configuration) + for setup details. +::: + +## Manual quarantine + +You can manually add or remove specific tests from quarantine through the +Mergify dashboard. This is useful when you've identified a problematic test +through [Detection](/test-insights/detection) and want to stop it from +blocking your team while you investigate. + +For technical details on how quarantine integrates with your CI pipeline, +see the [Quarantine documentation](/ci-insights/quarantine). + +## Auto-quarantine + +Auto-quarantine lets Mergify automatically quarantine tests without manual +intervention. By default, only flaky tests are quarantined automatically. +You can also enable quarantining of known broken tests through an additional +option. + +This is useful for teams that want hands-off management of unreliable tests. +You can enable or disable auto-quarantine per repository from the Mitigation +page in the dashboard. + +## Practical workflows + +### Quarantining a test from Detection + +When you identify a high-impact flaky or broken test in +[Detection](/test-insights/detection), you can quarantine it directly to +stop it from blocking merges while you work on a fix. + +### Reviewing quarantined tests + +Periodically check the Mitigation page to review quarantined tests. Look +for tests whose health status has improved; these may be ready to be +removed from quarantine. + +### Enabling auto-quarantine + +For repositories where broken tests frequently block CI, enable +auto-quarantine to let Mergify handle it automatically. This reduces manual +overhead and keeps your CI pipeline moving. + +## Setup + +Mitigation uses the same CI integration as Detection. To ensure quarantine +works correctly, your CI must be configured to check quarantine status. See +the [Quarantine documentation](/ci-insights/quarantine) for technical setup +details. diff --git a/src/content/docs/test-insights/prevention.mdx b/src/content/docs/test-insights/prevention.mdx new file mode 100644 index 0000000000..2fd5a32491 --- /dev/null +++ b/src/content/docs/test-insights/prevention.mdx @@ -0,0 +1,73 @@ +--- +title: Prevention +description: Catch flaky and broken tests on pull requests before they reach your codebase. +--- + +Prevention monitors tests introduced or modified in pull requests. By +rerunning tests on PRs, it detects flaky behavior before code merges, keeping +your codebase reliable. + +## How it works + +When a pull request runs tests, Mergify reruns them to check for consistency. +Tests that produce different results on the same commit are flagged as flaky. +This happens transparently as part of your existing CI pipeline, with no changes +to your test code needed. + +Tests caught as flaky on a PR are prevented from silently degrading your +test suite. You can review their health status before deciding to merge. + +## What you can track + +Prevention provides key metrics to help you understand test reliability +on pull requests: + +### Caught flaky tests + +The number of flaky tests detected during PR reruns. This is the core value +of Prevention: every caught test is a reliability problem that didn't make it +into your codebase. + +### New tests + +Tests being introduced on PRs, along with their health status. Each new test +is classified as healthy, flaky, or broken based on its rerun results. This +helps you spot unreliable tests before they're merged. + +### CI budget spent + +The total CI time spent on reruns. This metric helps teams understand the +cost of flaky test prevention and make informed trade-offs between +thoroughness and CI budget. + +## Practical workflows + +### Reviewing tests before merging + +When a PR introduces or modifies tests, check the Prevention page to see +their health status. Tests with a flaky or broken status should be +investigated before merging. + +### Filtering by pull request state + +Use the pull request state filter to focus on specific PRs: + +- **Open**: Tests on PRs still in review +- **Merged**: Tests on PRs that have already been merged +- **Closed**: Tests on PRs that were closed without merging + +### Understanding confidence on new tests + +New tests have limited run data, so their confidence level may be low. A low +confidence means the health status could change as more data is collected. +Consider waiting for more runs before drawing conclusions about a test's +reliability. + +## Setup + +Prevention requires test framework plugins that instrument test runs to track +flakiness on pull requests. + +See the [test framework configuration](/ci-insights#test-framework-configuration) +for setup instructions specific to your framework (pytest-mergify, +rspec-mergify, etc.). diff --git a/src/content/navItems.tsx b/src/content/navItems.tsx index d26273e68d..d3d91aac68 100644 --- a/src/content/navItems.tsx +++ b/src/content/navItems.tsx @@ -100,6 +100,17 @@ const navItems: NavItem[] = [ }, ], }, + { + title: 'Test Insights', + path: '/test-insights', + icon: 'fa6-solid:flask-vial', + children: [ + { title: 'Overview', path: '/test-insights', icon: 'fa6-regular:lightbulb' }, + { title: 'Prevention', path: '/test-insights/prevention', icon: 'fa6-solid:shield-halved' }, + { title: 'Detection', path: '/test-insights/detection', icon: 'fa6-solid:magnifying-glass' }, + { title: 'Mitigation', path: '/test-insights/mitigation', icon: 'fa-solid:radiation' }, + ], + }, { title: 'Merge Queue', icon: MergeQueueIcon,