From 7be58846ea346e23685803b71e298e6f419f4104 Mon Sep 17 00:00:00 2001 From: Ger I Date: Mon, 20 Apr 2026 09:52:37 -0700 Subject: [PATCH 01/10] docs(design): proposed unified Go/Ginkgo E2E suite Adds design doc for consolidating the four existing E2E workflows (test-integration, test-E2E, test-backup-and-restore, test-upgrade-and-rollback) into a single Ginkgo v2 + Gomega + go.mongodb.org/mongo-driver/v2 suite that reuses ~20 CNPG tests/utils packages. Key decisions for discussion: - Go + Ginkgo over Python + pytest (spike found large CNPG util reuse) - Per-area package layout under test/e2e/tests/ - Shared session cluster for read-only tests (data/perf/status) with per-spec Mongo DB isolation - Fresh DocumentDB per spec for mutating tests (lifecycle/tls/backup/upgrade) - Lazy MinIO session fixture, imported directly from CNPG - Operator-health gate auto-skips non-disruptive specs on operator churn - Label + imported-level (TEST_DEPTH) selection See doc for full fixture tiers, parallelism strategy, CI layout, and 28-item implementation plan. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- docs/designs/e2e-test-suite.md | 522 +++++++++++++++++++++++++++++++++ 1 file changed, 522 insertions(+) create mode 100644 docs/designs/e2e-test-suite.md diff --git a/docs/designs/e2e-test-suite.md b/docs/designs/e2e-test-suite.md new file mode 100644 index 00000000..02b86f6e --- /dev/null +++ b/docs/designs/e2e-test-suite.md @@ -0,0 +1,522 @@ +# Plan: Unified Go/Ginkgo E2E Suite for DocumentDB Operator + +## Problem + +Four independent black-box test workflows exercise overlapping parts of the operator, each +with its own bash glue, port-forward logic, and inline mongosh/Python assertions: + +| Workflow | What it covers today | +|---|---| +| `test-integration.yml` | Port-forward + mongosh comprehensive JS + pymongo heredoc | +| `test-E2E.yml` | Port-forward + `comprehensive_mongosh_tests.js` + `performance_test.js` + status/PV/mount checks | +| `test-backup-and-restore.yml` | Seed data → ScheduledBackup → wait → delete data → restore CR → validate | +| `test-upgrade-and-rollback.yml` | Install released operator → seed → Helm upgrade to built → verify → recreate → verify again | + +Pain points: port-forward lifecycle re-implemented everywhere; assertion logic is JS +(`throw new Error`) or homegrown Python — no JUnit; no coverage for update/scale/delete-reclaim/ +TLS modes/ChangeStreams/service exposure/PV recovery; heavy operations (cluster creation +~60–120 s) are repeated per workflow and per test; two toolchains (bash + JS + Python) for +contributors to navigate on top of the Go operator code. + +## Proposed Approach + +Build **one unified Go + Ginkgo v2 + Gomega E2E suite** that drives the operator +end-to-end, reusing CNPG's `tests/utils/` Go packages wherever possible. Tests are grouped +by CRD operation in per-area Go packages; the data plane is validated via +`go.mongodb.org/mongo-driver/v2`. The suite fully replaces the four workflows. + +### Why Go (v5 — reversed from v4) + +Spike result (see `Spike Findings` below): ~20 CNPG util packages are directly reusable +because DocumentDB wraps the same `apiv1.Cluster` / `apiv1.Backup` CRs CNPG defines. +Reusing them deletes a large fraction of the infrastructure we were about to rebuild +(MinIO deploy, namespace management, envsubst, stern log streaming, CNPG Cluster +introspection, backup CR helpers, timeouts map). + +### Design principles (unchanged from v4) + +1. **Amortize heavy lifting.** Cluster creation (~60–120 s per 1-instance cluster) is the + single biggest cost. Classify every spec as *read-only* or *mutating*. Read-only specs + share a session-scoped cluster and isolate via per-spec Mongo database names; only + mutating specs pay for a fresh cluster. +2. **Small, single-purpose tests.** Each `It(...)` asserts one behavior. Porting + `comprehensive_mongosh_tests.js` produces ~10 small specs, not one monolith. +3. **Parallelize safely.** Ginkgo `-p` (process-per-package) + worker-aware namespace + naming. Marker/label-grouped CI jobs add a second parallelism layer. +4. **Structure for growth.** Per-area Go packages + shared `pkg/e2eutils/` + composable + manifest fragments. Adding a new CRD field = one new package, not sprawl. + +### Stack + +- **Ginkgo v2 + Gomega** — BDD runner + matchers. Same framework the operator already + uses for `envtest`, so contributors share patterns and caches. +- **`sigs.k8s.io/controller-runtime/pkg/client`** — typed CR access via our `api/preview` + types (no dynamic client / unstructured dicts). +- **`go.mongodb.org/mongo-driver/v2`** — data-plane assertions. +- **CNPG `tests/utils/`** — imported as a library (Apache-2.0, compatible with our MIT). + Pin version in `go.mod`. +- **`github.com/cloudnative-pg/cloudnative-pg/tests/labels`** + `tests/levels` — import the + depth/label plumbing rather than re-implementing. + +### Layout + +``` +test/e2e/ # new top-level Go test tree +├── go.mod # separate module; pins CNPG utils version +├── README.md # local run instructions +├── suite_test.go # SynchronizedBeforeSuite, global fixtures +├── labels.go # our label taxonomy (wraps CNPG's) +├── levels.go # thin re-export of CNPG's levels +├── pkg/e2eutils/ # our helpers; each file <300 LOC +│ ├── testenv/ +│ │ └── env.go # wraps CNPG's TestingEnvironment with dummy PG values +│ ├── documentdb/ +│ │ └── documentdb.go # CR verbs: Create, PatchSpec, WaitHealthy, Delete +│ ├── mongo/ +│ │ └── client.go # MongoClient builder, Seed, Probe, Count +│ ├── assertions/ +│ │ └── assertions.go # AssertDocumentDBReady, AssertPrimaryUnchanged, AssertWalLevel, … +│ ├── timeouts/ +│ │ └── timeouts.go # DocumentDB-specific overrides atop CNPG's map +│ ├── seed/ +│ │ └── datasets.go # canonical datasets (Small, Medium, Sort, Agg) +│ ├── portforward/ +│ │ └── portforward.go # wraps CNPG's forwardconnection for Mongo port +│ ├── operatorhealth/ +│ │ └── gate.go # adapted from CNPG's operator/ for documentdb-operator ns +│ └── fixtures/ # shared cluster fixtures (session scope) +│ ├── shared_ro.go # 1-instance cluster, per-spec DB names +│ ├── shared_scale.go # 2-instance cluster; tests reset to 2 on teardown +│ └── minio.go # lazy, label-gated (wraps CNPG minio.Deploy) +├── manifests/ # .yaml.template files; CNPG envsubst expands +│ ├── base/ +│ │ └── documentdb.yaml.template +│ ├── mixins/ # concatenated into base; simple sh envsubst pipeline +│ │ ├── tls_disabled.yaml.template +│ │ ├── tls_selfsigned.yaml.template +│ │ ├── tls_certmanager.yaml.template +│ │ ├── tls_provided.yaml.template +│ │ ├── feature_changestreams.yaml.template +│ │ ├── exposure_loadbalancer.yaml.template +│ │ ├── exposure_clusterip.yaml.template +│ │ ├── storage_custom.yaml.template +│ │ └── reclaim_retain.yaml.template +│ └── backup/ +│ ├── backup.yaml.template +│ ├── scheduled_backup.yaml.template +│ ├── recovery_from_backup.yaml.template +│ └── recovery_from_pv.yaml.template +└── tests/ # per-area Go packages; Ginkgo `-p` = 1 proc/pkg + ├── lifecycle/ + │ ├── lifecycle_suite_test.go + │ ├── deploy_test.go + │ ├── update_image_test.go + │ ├── update_loglevel_test.go + │ ├── update_storage_test.go + │ └── delete_reclaim_test.go + ├── scale/ + │ ├── scale_suite_test.go # spins up shared_scale_cluster + │ ├── scale_up_test.go # 1→2, 2→3 + │ └── scale_down_test.go # 3→2, 2→1; primary re-election + ├── data/ # all read-only; shares ro cluster + │ ├── data_suite_test.go # spins up shared_ro_cluster + │ ├── crud_test.go + │ ├── query_test.go + │ ├── aggregation_test.go + │ ├── sort_limit_skip_test.go + │ ├── update_ops_test.go + │ ├── delete_ops_test.go + │ └── pipeline_test.go + ├── performance/ # read-only; shares ro cluster; serial (-procs=1) + │ ├── performance_suite_test.go + │ ├── perf_insert_test.go + │ ├── perf_count_range_test.go + │ ├── perf_aggregation_test.go + │ ├── perf_sort_test.go + │ ├── perf_update_test.go + │ └── perf_delete_drop_test.go + ├── backup/ + │ ├── backup_suite_test.go # spins up minio + │ ├── backup_ondemand_test.go + │ ├── backup_scheduled_test.go + │ ├── restore_from_backup_test.go + │ └── restore_from_pv_test.go + ├── tls/ + │ ├── tls_suite_test.go + │ ├── tls_disabled_test.go + │ ├── tls_selfsigned_test.go + │ ├── tls_certmanager_test.go # skipped via Label("needs-certmanager") + │ └── tls_provided_test.go + ├── feature_gates/ + │ ├── feature_gates_suite_test.go + │ └── changestreams_test.go # table-driven over (enabled/disabled) + ├── exposure/ + │ ├── exposure_suite_test.go + │ ├── clusterip_test.go + │ └── loadbalancer_test.go # Label("needs-metallb") + ├── status/ + │ ├── status_suite_test.go # shared_ro_cluster + │ ├── connection_string_test.go + │ ├── pv_name_test.go + │ └── mount_options_test.go + └── upgrade/ + ├── upgrade_suite_test.go # owns its own operator install; Label("disruptive") + ├── upgrade_control_plane_test.go # released chart → built chart, verify data + ├── upgrade_images_test.go # extension + gateway image bump + └── rollback_test.go # optional — if rollback is supported +``` + +### Fixture tiers (dedup heavy lifting) + +All fixtures read config from env + CLI options (`--kube-context`, `--operator-ns`, +`--image-tag`, `--chart-version`, `--test-depth`, `--keep-clusters`). Ginkgo uses `flag` +registration; env vars mirror flags. + +**Session-scoped (most expensive, created once per `go test` invocation of a package):** + +- `Env` — wraps `environment.TestingEnvironment` from CNPG, constructed in + `SynchronizedBeforeSuite`. `POSTGRES_IMG` set to a dummy value because we don't use + the `postgres/` helpers that read it. +- `OperatorReady` — one-time check the documentdb-operator Deployment is Available + + CRDs installed. +- `SharedROCluster` — 1-instance DocumentDB, created once per package that imports it. + Consumed by `data/`, `performance/`, `status/`. **Read-only-by-convention**: each spec + uses its own Mongo database `db_`. The fixture wraps + the CR handle in a read-only proxy that panics on `PatchSpec`/`Delete`. +- `SharedScaleCluster` — 2-instance cluster used as starting state for `scale/`. Tests + reset instance count to 2 in `AfterEach` so the cluster is reusable. +- `Minio` — lazy session fixture in `backup/backup_suite_test.go`; calls + `cnpgminio.Deploy` only if the package is selected. + +**Per-spec (cheap or mutating), constructed in `BeforeEach`:** + +- `FreshDocumentDB(spec *apiv1preview.DocumentDB)` — factory used by lifecycle/tls/ + feature/exposure/backup/upgrade. Unique namespace, wait healthy, register cleanup via + `DeferCleanup`. +- `MongoClient(documentdb)` — mongo-driver client bound to the CR's service via a + Ginkgo-owned port-forward. +- `TmpNamespace()` — `e2e--`, auto-deleted. + +**Auto-applied:** + +- `operatorhealth.Gate` — invoked from `BeforeEach` and `AfterEach` of a top-level + `Describe` in `suite_test.go`. Snapshots operator pod UID + restart count; if it + churned, all subsequent non-`disruptive`/`upgrade` specs are **skipped** via a + package-global sentinel. Adapted from CNPG's `operator/` package, retargeted to our + `documentdb-operator` namespace and image. + +**Dedup summary:** + +| Test area | Cluster source | Wall-time saving vs all-fresh | +|---|---|---| +| `data/` (7 specs) | `SharedROCluster` | ~10 min | +| `performance/` (6 specs) | `SharedROCluster` | ~9 min | +| `status/` (3 specs) | `SharedROCluster` | ~5 min | +| `scale/` (4 specs) | `SharedScaleCluster` | ~5 min | +| `lifecycle/`, `tls/`, `feature_gates/`, `exposure/`, `backup/`, `upgrade/` | `FreshDocumentDB` | N/A (need isolation) | + +### Parallelism + +- `ginkgo -p ./tests/...` — one process per package. `SharedROCluster` is created once + per `data/` / `performance/` / `status/` process (acceptable, Ginkgo cannot share + across processes without external coordination). +- Within a package: Ginkgo defaults to serial within a process. For `data/` we enable + `--procs=N` and use `BeforeAll` (ordered container) so the cluster is created once per + process while specs run in parallel against their own DBs. +- Per-process naming: namespaces `e2e--`, DBs + `db__`, cluster names `ro-`. +- CI: marker-grouped GitHub Actions jobs run in parallel; within each job, Ginkgo + parallelizes at the process level. +- Performance job forces `--procs=1` so timing thresholds aren't noisy. +- Upgrade job forces `--procs=1` (disruptive; owns its own operator install). + +### Level/depth control + +- Import CNPG's `tests/levels` package. Every top-level `Describe`/`Context` adds a + level tag via `Label(levels.Medium.String())` (or Highest/High/Low/Lowest). +- `TEST_DEPTH=N` env var — reused as-is from CNPG's plumbing. +- Default depth = Medium. Smoke CI job uses Highest; nightly uses Lowest. + +### Labels (replaces "markers") + +Ginkgo labels, applied via `Label("…")` on `Describe`/`Context`/`It` and filtered via +`--label-filter`. We wrap CNPG's `tests/labels.go` and add DocumentDB-specific ones: + +```go +// labels.go +const ( + // Functional area (one per package via suite_test Describe label) + LifecycleLabel = "lifecycle" + ScaleLabel = "scale" + DataLabel = "data" + PerformanceLabel = "performance" + BackupLabel = "backup" + RecoveryLabel = "recovery" + TLSLabel = "tls" + FeatureLabel = "feature" + ExposureLabel = "exposure" + StatusLabel = "status" + UpgradeLabel = "upgrade" + + // Cross-cutting + SmokeLabel = "smoke" + BasicLabel = "basic" + DestructiveLabel = "destructive" // mutates cluster data + DisruptiveLabel = "disruptive" // may break operator; exempt from health gate + SlowLabel = "slow" // >5 min + + // Prereqs — tests with these labels Skip() if env missing + NeedsMinioLabel = "needs-minio" + NeedsCertManagerLabel = "needs-certmanager" + NeedsMetalLBLabel = "needs-metallb" +) +``` + +### Manifests — base + mixin templates + +Plain text files expanded by `cnpgenvsubst.Envsubst` (from `tests/utils/envsubst`). +Composition is done in Go: + +```go +// pkg/e2eutils/documentdb/documentdb.go +func RenderCR(name, ns string, mixins []string, vars map[string]string) ([]byte, error) { + parts := []string{"manifests/base/documentdb.yaml.template"} + for _, m := range mixins { + parts = append(parts, "manifests/mixins/"+m+".yaml.template") + } + return envsubst.Expand(concatFiles(parts), vars) +} +``` + +No Jinja2; `envsubst` is enough for our CRs, and it matches what CNPG uses so mental +model is shared. + +### Assertions & timeouts + +- `pkg/e2eutils/assertions/assertions.go` — Gomega-wrapped verbs: + `AssertDocumentDBReady`, `AssertInstanceCount`, `AssertPrimaryUnchanged`, + `AssertPVCCount`, `AssertTLSSecretReady`, `AssertWalLevel`, `AssertServiceType`, + `AssertConnectionStringMatches`. Each returns `func()` suitable for + `Eventually(...).Should(Succeed())`. +- `pkg/e2eutils/timeouts/timeouts.go` — starts from + `cnpgtimeouts.Timeouts()`, overrides/adds DocumentDB-specific ops: + ```go + type Op string + const ( + DocumentDBReady Op = "documentdb-ready" + DocumentDBUpgrade Op = "documentdb-upgrade" + InstanceScale Op = "instance-scale" + PVCResize Op = "pvc-resize" + ) + func For(op Op) time.Duration { … } + ``` + +### CI Workflow + +One workflow `test-e2e.yml` with amd64+arm64 matrix. Within each matrix row, marker-grouped +jobs in parallel: + +| CI job | `--label-filter` | `ginkgo --procs` | Runner | +|---|---|---|---| +| `smoke` | `smoke` | auto | ubuntu-latest | +| `lifecycle` | `lifecycle` | auto | ubuntu-latest | +| `scale` | `scale` | 2 | ubuntu-latest | +| `data` | `data` | auto | ubuntu-latest | +| `performance` | `performance` | 1 | ubuntu-latest (dedicated) | +| `backup` | `backup` | 2 | ubuntu-latest | +| `tls` | `tls` | auto | ubuntu-latest | +| `feature` | `feature \|\| exposure \|\| status` | auto | ubuntu-latest | +| `upgrade` | `upgrade` | 1 | ubuntu-latest | + +Each job: setup kind → install operator (existing `setup-test-environment` action) → +`ginkgo -r --label-filter="…" --procs=N --junit-report=junit.xml ./tests/...` → upload +JUnit + logs. `workflow_dispatch` inputs: `label`, `depth`, `keep_clusters`. + +### Fate of Existing Artifacts + +**Delete** after the new suite is green in CI for one full run: +- `.github/workflows/{test-integration,test-E2E,test-backup-and-restore,test-upgrade-and-rollback}.yml` +- `.github/actions/setup-port-forwarding/` +- `operator/src/scripts/test-scripts/{test-mongodb-connection.sh,test-python-pymongo.sh,mongo-python-data-pusher.py,comprehensive_mongosh_tests.js,performance_test.js}` + +**Keep:** +- `.github/actions/setup-test-environment/`, `.github/actions/collect-logs/` +- `operator/src/scripts/test-scripts/deploy-csi-driver.sh` (infra prep) +- Go unit/envtest suite — out of scope. + +### Scope Boundaries + +- In scope: single-cluster operations on kind; all CRD spec fields + CRs. +- Out of scope: cross-cluster replication, multi-cloud, AKS/EKS-specific LB annotations, + Azure Fleet — stays in `documentdb-playground/`. +- Operator install/uninstall is in `setup-test-environment`; the suite assumes a running + operator. `tests/upgrade/` owns its two-phase install. + +### Module layout (go.mod placement) + +`test/e2e/` is a **separate Go module** (own `go.mod`). Reasons: +- Pulls in CNPG test utils + Ginkgo + mongo-driver without polluting the operator's + runtime dependencies. +- Lets us iterate on test deps without triggering operator builds. +- Matches how CNPG itself is organized (`tests/e2e/`). + +## Spike findings (informed v5 decision) + +**Repo investigated:** `github.com/cloudnative-pg/cloudnative-pg` @ main, `tests/utils/`. +**License:** Apache-2.0 (compatible with our MIT; no NOTICE file). +**API stability:** `tests/utils/*` is public (not `internal/`) but has no stability +contract — expect occasional churn at CNPG version bumps; pin version in `go.mod`. + +Reusability tally of the 29 `tests/utils/*` packages: + +| Status | Packages | Count | +|---|---|---| +| ✅ Direct reuse | `clusterutils`, `minio`, `backups`, `timeouts`, `namespaces`, `pods`, `services`, `storage`, `secrets`, `yaml`, `envsubst`, `exec`, `run`, `logs`, `objects`, `sternmultitailer`, `forwardconnection`, `nodes`, `endpoints`, `deployments` | ~20 | +| ⚠️ Adapt | `environment.TestingEnvironment` (PG-coupled; construct with dummy POSTGRES_IMG), `operator` (retarget to `documentdb-operator` namespace) | 2 | +| ❌ Skip | `postgres`, `replicationslot`, `fencing`, `importdb`, `cloudvendors`, `openshift`, `proxy`, `azurite` | ~7 | + +Key enabling fact: DocumentDB's operator **wraps CNPG's `apiv1.Cluster` and +`apiv1.Backup`** — so `clusterutils.GetPrimary`, `clusterutils.GetReplicas`, +`backups.Create`, `backups.AssertBackupConditionInClusterStatus` work on our resources as-is. + +## Todos + +### Phase 0 — Spike verification (new) + +1. `cnpg-utils-probe` — Write 30-line `cmd/probe/main.go` that constructs + `environment.TestingEnvironment` with dummy PG env vars, calls + `clusterutils.GetPrimary` on a live DocumentDB cluster in kind, confirms compile + run. + Gate for the rest of Phase 1. + +### Phase 1 — Scaffolding & helpers + +2. `scaffold` — `test/e2e/` tree, separate `go.mod` (pinning CNPG utils version), Ginkgo + suite boilerplate, `labels.go`, re-export of CNPG `levels.go`, CLI flag plumbing, + area-package skeleton with empty `*_suite_test.go` in each. +3. `testenv` — `pkg/e2eutils/testenv/env.go`: constructor that wraps + `environment.NewTestingEnvironment()` with dummy `POSTGRES_IMG`; exposes our typed + `client.Client` with `api/preview` scheme registered. +4. `helpers-documentdb` — `documentdb.go`: `Create`, `PatchSpec`, `WaitHealthy`, + `Delete`, `List`, `RenderCR` (base+mixin envsubst pipeline). +5. `helpers-mongo` — `mongo/client.go`: `NewClient(host, port, user, pw, tls)`, + `Seed(ctx, db, n)`, `Ping`, `Count`. +6. `helpers-portforward` — `portforward.go`: thin wrapper over CNPG's + `forwardconnection` targeting the DocumentDB gateway port. +7. `helpers-assertions` — `assertions.go`: `AssertDocumentDBReady`, + `AssertInstanceCount`, `AssertPrimaryUnchanged`, `AssertPVCCount`, + `AssertTLSSecretReady`, `AssertWalLevel`, `AssertServiceType`, + `AssertConnectionStringMatches`. Each returns `func() error` for `Eventually`. +8. `helpers-timeouts` — `timeouts.go`: extends CNPG's map with DocumentDB ops. +9. `helpers-seed` — `seed/datasets.go`: `SmallDataset(10)`, `MediumDataset(1000)`, + `SortDataset`, `AggDataset` — reused by data/performance/backup/upgrade. +10. `operator-health-gate` — `operatorhealth/gate.go`: adapted from CNPG's `operator/` + package for `documentdb-operator` ns; `BeforeEach`/`AfterEach` hooks + package + sentinel to skip subsequent specs on churn. +11. `shared-fixtures` — `pkg/e2eutils/fixtures/`: `shared_ro.go`, `shared_scale.go`, + `minio.go` (wraps CNPG `minio.Deploy`, lazy-constructed). +12. `manifests-base` — `manifests/base/documentdb.yaml.template` + all mixins under + `manifests/mixins/` and `manifests/backup/`. +13. `suite-root` — `suite_test.go`: `SynchronizedBeforeSuite` builds `Env`, installs + lazy MinIO hook, starts stern log tailer, registers operator-health gate. + +### Phase 2 — Test packages (one per area) + +14. `tests-data` — `data_suite_test.go` spins up `SharedROCluster`; port + `comprehensive_mongosh_tests.js` + pymongo heredoc, **split** into 7 spec files. + Package label `DataLabel`. +15. `tests-performance` — 6 spec files, one per timed op; shares `SharedROCluster`; + forced serial in CI. Thresholds preserved. +16. `tests-status` — 3 spec files; shares `SharedROCluster`. +17. `tests-lifecycle` — 5 spec files; each owns its own `FreshDocumentDB`. +18. `tests-scale` — `scale_suite_test.go` with `SharedScaleCluster`; up + down spec + files; each `AfterEach` resets to 2 instances. +19. `tests-backup` — `backup_suite_test.go` owns `Minio`; 4 spec files. +20. `tests-tls` — 4 spec files, one per mode. CertManager file uses + `NeedsCertManagerLabel`. +21. `tests-feature-gates` — `changestreams_test.go` table-driven over (enabled, disabled). +22. `tests-exposure` — ClusterIP + LoadBalancer spec files; LB uses `NeedsMetalLBLabel`. +23. `tests-upgrade` — `upgrade_suite_test.go` with multi-phase install helpers; **split** + into 2–3 spec files so failures pinpoint the phase. + +### Phase 3 — Integration + +24. `local-run` — Full suite green locally on kind at `TEST_DEPTH=Medium` with `ginkgo -p`. +25. `ci-workflow` — `.github/workflows/test-e2e.yml`: amd64+arm64 matrix, label-grouped + jobs per table above, `workflow_dispatch` inputs. +26. `cleanup-workflows` — Delete the 4 old workflows + `setup-port-forwarding` composite. +27. `cleanup-scripts` — Delete old bash/JS/Python test scripts. +28. `docs` — Update `docs/developer-guides/` + AGENTS.md: tree, local run (`ginkgo -p + ./tests/...`), labels, levels, how to add a new area / mixin / assertion; CHANGELOG + migration note; document the CNPG utils dependency + pin policy. + +## Comparison: Our Plan vs CloudNative-PG E2E Suite + +| Aspect | CNPG | Our plan (v5) | Decision | +|---|---|---|---| +| Language | Go (Ginkgo+Gomega) | Go (Ginkgo+Gomega) | **Aligned.** | +| Test selection | 28 labels + TEST_DEPTH | Our labels + **imported** `tests/levels` | Aligned; we re-export CNPG's levels. | +| Matrix (K8s×PG×engine) | full 3-D | amd64/arm64 only | Defer to GA. | +| Cluster bring-up | `hack/setup-cluster.sh` | existing `setup-test-environment` action | Keep ours. | +| Session-scoped MinIO | yes (`minio.Deploy`) | **imported as-is** from CNPG | Adopted verbatim. | +| Operator health gate | yes (`BeforeEach` pod check) | `operatorhealth/gate.go` — adapted from CNPG `operator/` | Adapted (ns retargeted). | +| Shared cluster for read-only | implicit per-namespace | explicit `SharedROCluster` + read-only proxy | **We go further.** | +| Assertion composables | `AssertClusterIsReady`, etc. | `pkg/e2eutils/assertions` | Aligned. | +| Manifest templating | `envsubst` over `.yaml.template` | `envsubst` over `.yaml.template` | **Imported.** | +| Per-op timeouts | `Timeouts()` map | extends CNPG's map | **Imported + extended.** | +| Parallelism | `ginkgo -p` + within-pkg procs | `ginkgo -p` + within-pkg procs + label-grouped CI | Two-layer. | +| Stern log streaming | yes | **imported** (`sternmultitailer`) | Adopted. | +| Label filter (`/test` comment) | yes | `workflow_dispatch` inputs | Defer. | + +### Not copying, with rationale + +- **Multi-engine** (k3d/EKS/AKS/GKE/OpenShift) — defer to GA. +- **Branch-snapshot operator install** from artifacts repo — we build in the same workflow. +- **postgres/** helpers — we speak Mongo, not libpq. + +## Open Questions / Risks + +- **CNPG utils API churn**: pinned version mitigates but doesn't eliminate. Budget ~½ day + per CNPG bump for test-util compat fixes. Document in contribute guide. +- **Dummy `POSTGRES_IMG`** in `testenv.Env` feels brittle; if CNPG starts *eagerly* + validating the image in `NewTestingEnvironment`, we'd need to fork. Check on first + probe; fallback plan is to copy the constructor (~100 LOC). +- **Read-only proxy enforcement**: making sure tests can't accidentally call + `PatchSpec` on `SharedROCluster`. The proxy panics at runtime — acceptable; maybe add + a linter later. +- **Backup object store**: confirm `test-backup-and-restore.yml` uses MinIO (likely) so + CNPG's `minio.Deploy` is a drop-in. Verify during Phase 0 probe. +- **MetalLB / SC expansion / cert-manager**: label-gated skips; document the env + contract in README. +- **Ginkgo parallelism across processes** can't share `SharedROCluster`; acceptable + cost (we pay for one cluster per Go process in `data/`+`performance/`+`status/` = + 3 clusters max per CI job instead of 1). Lower than the N-per-spec baseline we're + replacing. +- **Total CI wallclock**: budget review after first full run. +- **Rubber-duck review**: after Phase 0 (probe) + Phase 1 (scaffold + helpers + + suite_test) + one populated area (e.g. `tests/data/`), review shape before building + the rest. + +## v5 changes vs v4 (what flipped) + +- **Language flipped Python → Go.** Spike confirmed ~20 CNPG `tests/utils/` packages are + directly reusable (DocumentDB wraps the same `apiv1.Cluster`/`apiv1.Backup` CRs). +- **Framework**: pytest + pytest-xdist → Ginkgo v2 + Gomega (already in the operator + repo's envtest). +- **Data-plane lib**: pymongo → `go.mongodb.org/mongo-driver/v2`. +- **Manifests**: Jinja2 → CNPG's `envsubst` (simpler, shared mental model). +- **Location**: `test/pytest-e2e/` → `test/e2e/` (Go idiom). +- **Depth/levels**: custom marker → import CNPG's `levels` package directly. +- **MinIO**: write fixture → import `minio.Deploy` verbatim. +- **Operator health gate**: write from scratch → adapted from CNPG's `operator/` package. +- **Stern log tailing**: deferred in v4 → included via imported `sternmultitailer`. +- **Todo count**: 27 → 28 (added `cnpg-utils-probe` as Phase 0 gate). + +**Unchanged from v4:** + +- Fixture tiers (`SharedROCluster`, `SharedScaleCluster`, `FreshDocumentDB`, lazy `Minio`). +- Per-area package structure with per-area suite files. +- Small, single-purpose spec files (7 for data, 6 for performance, 3 for upgrade, etc.). +- Label taxonomy (functional + cross-cutting + needs-*). +- Marker-grouped CI jobs with per-job `--procs` override. +- Read-only contract for shared clusters. +- Branch: `developer/e2e-suite` (renamed from `developer/pytest-e2e-suite`). From e0a89e8edab50df085ca2d75ca4109ad321539e1 Mon Sep 17 00:00:00 2001 From: Ger I Date: Mon, 20 Apr 2026 12:37:53 -0700 Subject: [PATCH 02/10] docs(design): drop internal v4/v5 drafting artifacts Address review feedback: the design doc should read as a current-state proposal, not a diff log. Remove version labels from headings and drop the 'v5 changes vs v4' section since the Spike Findings section already tells the story. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- docs/designs/e2e-test-suite.md | 33 ++++----------------------------- 1 file changed, 4 insertions(+), 29 deletions(-) diff --git a/docs/designs/e2e-test-suite.md b/docs/designs/e2e-test-suite.md index 02b86f6e..229a9c82 100644 --- a/docs/designs/e2e-test-suite.md +++ b/docs/designs/e2e-test-suite.md @@ -25,7 +25,7 @@ end-to-end, reusing CNPG's `tests/utils/` Go packages wherever possible. Tests a by CRD operation in per-area Go packages; the data plane is validated via `go.mongodb.org/mongo-driver/v2`. The suite fully replaces the four workflows. -### Why Go (v5 — reversed from v4) +### Why Go over Python Spike result (see `Spike Findings` below): ~20 CNPG util packages are directly reusable because DocumentDB wraps the same `apiv1.Cluster` / `apiv1.Backup` CRs CNPG defines. @@ -33,7 +33,7 @@ Reusing them deletes a large fraction of the infrastructure we were about to reb (MinIO deploy, namespace management, envsubst, stern log streaming, CNPG Cluster introspection, backup CR helpers, timeouts map). -### Design principles (unchanged from v4) +### Design principles 1. **Amortize heavy lifting.** Cluster creation (~60–120 s per 1-instance cluster) is the single biggest cost. Classify every spec as *read-only* or *mutating*. Read-only specs @@ -360,7 +360,7 @@ JUnit + logs. `workflow_dispatch` inputs: `label`, `depth`, `keep_clusters`. - Lets us iterate on test deps without triggering operator builds. - Matches how CNPG itself is organized (`tests/e2e/`). -## Spike findings (informed v5 decision) +## Spike findings **Repo investigated:** `github.com/cloudnative-pg/cloudnative-pg` @ main, `tests/utils/`. **License:** Apache-2.0 (compatible with our MIT; no NOTICE file). @@ -451,7 +451,7 @@ Key enabling fact: DocumentDB's operator **wraps CNPG's `apiv1.Cluster` and ## Comparison: Our Plan vs CloudNative-PG E2E Suite -| Aspect | CNPG | Our plan (v5) | Decision | +| Aspect | CNPG | Our plan | Decision | |---|---|---|---| | Language | Go (Ginkgo+Gomega) | Go (Ginkgo+Gomega) | **Aligned.** | | Test selection | 28 labels + TEST_DEPTH | Our labels + **imported** `tests/levels` | Aligned; we re-export CNPG's levels. | @@ -495,28 +495,3 @@ Key enabling fact: DocumentDB's operator **wraps CNPG's `apiv1.Cluster` and - **Rubber-duck review**: after Phase 0 (probe) + Phase 1 (scaffold + helpers + suite_test) + one populated area (e.g. `tests/data/`), review shape before building the rest. - -## v5 changes vs v4 (what flipped) - -- **Language flipped Python → Go.** Spike confirmed ~20 CNPG `tests/utils/` packages are - directly reusable (DocumentDB wraps the same `apiv1.Cluster`/`apiv1.Backup` CRs). -- **Framework**: pytest + pytest-xdist → Ginkgo v2 + Gomega (already in the operator - repo's envtest). -- **Data-plane lib**: pymongo → `go.mongodb.org/mongo-driver/v2`. -- **Manifests**: Jinja2 → CNPG's `envsubst` (simpler, shared mental model). -- **Location**: `test/pytest-e2e/` → `test/e2e/` (Go idiom). -- **Depth/levels**: custom marker → import CNPG's `levels` package directly. -- **MinIO**: write fixture → import `minio.Deploy` verbatim. -- **Operator health gate**: write from scratch → adapted from CNPG's `operator/` package. -- **Stern log tailing**: deferred in v4 → included via imported `sternmultitailer`. -- **Todo count**: 27 → 28 (added `cnpg-utils-probe` as Phase 0 gate). - -**Unchanged from v4:** - -- Fixture tiers (`SharedROCluster`, `SharedScaleCluster`, `FreshDocumentDB`, lazy `Minio`). -- Per-area package structure with per-area suite files. -- Small, single-purpose spec files (7 for data, 6 for performance, 3 for upgrade, etc.). -- Label taxonomy (functional + cross-cutting + needs-*). -- Marker-grouped CI jobs with per-job `--procs` override. -- Read-only contract for shared clusters. -- Branch: `developer/e2e-suite` (renamed from `developer/pytest-e2e-suite`). From d91ce673ecf593d6e31f945b9e373c0419a55182 Mon Sep 17 00:00:00 2001 From: George Eichberger Date: Mon, 20 Apr 2026 16:36:22 -0700 Subject: [PATCH 03/10] feat(test/e2e): unified Go/Ginkgo end-to-end suite (PR #346) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the four legacy end-to-end workflows (test-integration.yml, test-E2E.yml, test-backup-and-restore.yml, test-upgrade-and-rollback.yml) and their bash/mongosh/pymongo glue with a single Go/Ginkgo v2/Gomega suite under test/e2e/. Highlights: - test/e2e/ Go module with pkg/e2eutils/ helpers (testenv, namespaces, portforward, fixtures, mongo, seed, assertions, timeouts, tlscerts) - specs organised by CRD area: lifecycle, scale, data, performance, backup, tls, feature-gates, exposure, status, upgrade - labels in test/e2e/labels.go (areas + smoke/basic/destructive/ disruptive/slow + needs-* capability labels) - TEST_DEPTH=0..4 (Highest..Lowest) selects spec density - fixtures pipeline: renderDocumentDB preprocesses templates with documentdb.DropEmptyVarLines before envsubst to tolerate empty vars (CNPG's envsubst treats empty as missing) - image architecture preserved: CNPG pg18 base + DocumentDB extension via image-library + independent gateway sidecar — harness leaves image env vars empty so operator composes layered defaults - upgrade/image spec requires explicit old/new env vars for both extension and (optionally) gateway — never conflates the two - lifecycle/update_image spec skips unless E2E_DOCUMENTDB_IMAGE_NEXT is a genuinely different tag from DOCUMENTDB_IMAGE (no silent no-op) - new .github/workflows/test-e2e.yml is the single PR-level gate - three release_*.yml workflows no longer reference the deleted legacy test workflows; dead run_tests inputs removed from release_operator.yml / release_images.yml Design: docs/designs/e2e-test-suite.md Runbook: test/e2e/README.md Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../actions/setup-port-forwarding/action.yml | 305 ---- .../workflows/release_documentdb_images.yml | 26 +- .github/workflows/release_images.yml | 39 +- .github/workflows/release_operator.yml | 39 +- .github/workflows/test-E2E.yml | 493 ------ .github/workflows/test-backup-and-restore.yml | 580 ------- .github/workflows/test-e2e.yml | 274 ++++ .github/workflows/test-integration.yml | 163 -- .../workflows/test-upgrade-and-rollback.yml | 1337 ----------------- AGENTS.md | 33 + CHANGELOG.md | 12 + .../comprehensive_mongosh_tests.js | 497 ------ .../test-scripts/mongo-python-data-pusher.py | 41 - .../scripts/test-scripts/performance_test.js | 222 --- .../test-scripts/test-mongodb-connection.sh | 435 ------ .../test-scripts/test-python-pymongo.sh | 317 ---- test/e2e/README.md | 262 ++++ test/e2e/go.mod | 110 ++ test/e2e/go.sum | 303 ++++ test/e2e/labels.go | 59 + test/e2e/levels.go | 109 ++ test/e2e/levels_test.go | 88 ++ test/e2e/manifests/base/.keep | 0 .../manifests/base/documentdb.yaml.template | 18 + test/e2e/manifests/embed.go | 15 + test/e2e/manifests/mixins/.keep | 0 .../mixins/exposure_clusterip.yaml.template | 8 + .../exposure_loadbalancer.yaml.template | 8 + .../feature_changestreams.yaml.template | 8 + .../mixins/reclaim_retain.yaml.template | 11 + .../mixins/storage_custom.yaml.template | 10 + .../mixins/tls_certmanager.yaml.template | 14 + .../mixins/tls_disabled.yaml.template | 9 + .../mixins/tls_provided.yaml.template | 11 + .../mixins/tls_selfsigned.yaml.template | 9 + test/e2e/pkg/e2eutils/assertions/.keep | 0 .../e2e/pkg/e2eutils/assertions/assertions.go | 197 +++ .../e2eutils/assertions/assertions_test.go | 166 ++ .../pkg/e2eutils/clusterprobe/clusterprobe.go | 173 +++ .../clusterprobe/clusterprobe_test.go | 217 +++ test/e2e/pkg/e2eutils/documentdb/.keep | 0 .../e2e/pkg/e2eutils/documentdb/documentdb.go | 444 ++++++ .../e2eutils/documentdb/documentdb_test.go | 353 +++++ test/e2e/pkg/e2eutils/fixtures/.keep | 0 test/e2e/pkg/e2eutils/fixtures/fixtures.go | 478 ++++++ .../pkg/e2eutils/fixtures/fixtures_test.go | 217 +++ test/e2e/pkg/e2eutils/fixtures/shared_ro.go | 100 ++ .../e2e/pkg/e2eutils/fixtures/shared_scale.go | 156 ++ .../pkg/e2eutils/fixtures/teardown_test.go | 177 +++ test/e2e/pkg/e2eutils/helmop/helmop.go | 192 +++ test/e2e/pkg/e2eutils/helmop/helmop_test.go | 69 + test/e2e/pkg/e2eutils/mongo/.keep | 0 test/e2e/pkg/e2eutils/mongo/client.go | 221 +++ test/e2e/pkg/e2eutils/mongo/client_test.go | 207 +++ test/e2e/pkg/e2eutils/mongo/connect.go | 268 ++++ .../e2e/pkg/e2eutils/namespaces/namespaces.go | 127 ++ .../e2eutils/namespaces/namespaces_test.go | 76 + test/e2e/pkg/e2eutils/operatorhealth/.keep | 0 test/e2e/pkg/e2eutils/operatorhealth/gate.go | 213 +++ .../pkg/e2eutils/operatorhealth/gate_test.go | 159 ++ test/e2e/pkg/e2eutils/portforward/.keep | 0 .../pkg/e2eutils/portforward/portforward.go | 151 ++ .../e2eutils/portforward/portforward_test.go | 79 + test/e2e/pkg/e2eutils/seed/.keep | 0 test/e2e/pkg/e2eutils/seed/datasets.go | 126 ++ test/e2e/pkg/e2eutils/seed/datasets_test.go | 96 ++ test/e2e/pkg/e2eutils/testenv/.keep | 0 test/e2e/pkg/e2eutils/testenv/env.go | 96 ++ test/e2e/pkg/e2eutils/testenv/env_test.go | 38 + test/e2e/pkg/e2eutils/timeouts/.keep | 0 test/e2e/pkg/e2eutils/timeouts/timeouts.go | 119 ++ .../pkg/e2eutils/timeouts/timeouts_test.go | 50 + test/e2e/pkg/e2eutils/tlscerts/tlscerts.go | 117 ++ .../pkg/e2eutils/tlscerts/tlscerts_test.go | 97 ++ test/e2e/runid.go | 69 + test/e2e/runid_test.go | 54 + test/e2e/suite.go | 212 +++ test/e2e/suite_test.go | 127 ++ test/e2e/tests/data/aggregation_test.go | 110 ++ test/e2e/tests/data/crud_test.go | 85 ++ test/e2e/tests/data/data_suite_test.go | 56 + test/e2e/tests/data/delete_ops_test.go | 79 + test/e2e/tests/data/helpers_test.go | 31 + test/e2e/tests/data/pipeline_test.go | 120 ++ test/e2e/tests/data/query_test.go | 98 ++ test/e2e/tests/data/sort_limit_skip_test.go | 113 ++ test/e2e/tests/data/update_ops_test.go | 91 ++ test/e2e/tests/exposure/clusterip_test.go | 101 ++ .../e2e/tests/exposure/exposure_suite_test.go | 56 + test/e2e/tests/exposure/helpers_test.go | 115 ++ test/e2e/tests/exposure/loadbalancer_test.go | 140 ++ .../tests/feature_gates/changestreams_test.go | 97 ++ .../feature_gates/feature_gates_suite_test.go | 56 + test/e2e/tests/feature_gates/helpers_test.go | 123 ++ .../tests/lifecycle/delete_reclaim_test.go | 98 ++ test/e2e/tests/lifecycle/deploy_test.go | 79 + test/e2e/tests/lifecycle/helpers_test.go | 109 ++ .../tests/lifecycle/lifecycle_suite_test.go | 57 + test/e2e/tests/lifecycle/update_image_test.go | 99 ++ .../tests/lifecycle/update_loglevel_test.go | 79 + .../tests/lifecycle/update_storage_test.go | 108 ++ .../performance/perf_aggregation_test.go | 87 ++ .../performance/perf_count_range_test.go | 66 + .../performance/perf_delete_drop_test.go | 68 + .../tests/performance/perf_helpers_test.go | 68 + .../e2e/tests/performance/perf_insert_test.go | 76 + test/e2e/tests/performance/perf_sort_test.go | 86 ++ .../e2e/tests/performance/perf_update_test.go | 63 + .../performance/performance_suite_test.go | 56 + test/e2e/tests/scale/scale_down_test.go | 117 ++ test/e2e/tests/scale/scale_suite_test.go | 56 + test/e2e/tests/scale/scale_up_test.go | 109 ++ .../tests/status/connection_string_test.go | 68 + test/e2e/tests/status/mount_options_test.go | 95 ++ test/e2e/tests/status/pv_name_test.go | 86 ++ test/e2e/tests/status/status_suite_test.go | 56 + test/e2e/tests/tls/helpers_test.go | 174 +++ test/e2e/tests/tls/tls_certmanager_test.go | 182 +++ test/e2e/tests/tls/tls_disabled_test.go | 57 + test/e2e/tests/tls/tls_provided_test.go | 113 ++ test/e2e/tests/tls/tls_selfsigned_test.go | 89 ++ test/e2e/tests/tls/tls_suite_test.go | 56 + test/e2e/tests/upgrade/helpers_test.go | 163 ++ test/e2e/tests/upgrade/rollback_test.go | 42 + .../upgrade/upgrade_control_plane_test.go | 137 ++ test/e2e/tests/upgrade/upgrade_images_test.go | 214 +++ test/e2e/tests/upgrade/upgrade_suite_test.go | 58 + 127 files changed, 11579 insertions(+), 4469 deletions(-) delete mode 100644 .github/actions/setup-port-forwarding/action.yml delete mode 100644 .github/workflows/test-E2E.yml delete mode 100644 .github/workflows/test-backup-and-restore.yml create mode 100644 .github/workflows/test-e2e.yml delete mode 100644 .github/workflows/test-integration.yml delete mode 100644 .github/workflows/test-upgrade-and-rollback.yml delete mode 100644 operator/src/scripts/test-scripts/comprehensive_mongosh_tests.js delete mode 100644 operator/src/scripts/test-scripts/mongo-python-data-pusher.py delete mode 100644 operator/src/scripts/test-scripts/performance_test.js delete mode 100644 operator/src/scripts/test-scripts/test-mongodb-connection.sh delete mode 100755 operator/src/scripts/test-scripts/test-python-pymongo.sh create mode 100644 test/e2e/README.md create mode 100644 test/e2e/go.mod create mode 100644 test/e2e/go.sum create mode 100644 test/e2e/labels.go create mode 100644 test/e2e/levels.go create mode 100644 test/e2e/levels_test.go create mode 100644 test/e2e/manifests/base/.keep create mode 100644 test/e2e/manifests/base/documentdb.yaml.template create mode 100644 test/e2e/manifests/embed.go create mode 100644 test/e2e/manifests/mixins/.keep create mode 100644 test/e2e/manifests/mixins/exposure_clusterip.yaml.template create mode 100644 test/e2e/manifests/mixins/exposure_loadbalancer.yaml.template create mode 100644 test/e2e/manifests/mixins/feature_changestreams.yaml.template create mode 100644 test/e2e/manifests/mixins/reclaim_retain.yaml.template create mode 100644 test/e2e/manifests/mixins/storage_custom.yaml.template create mode 100644 test/e2e/manifests/mixins/tls_certmanager.yaml.template create mode 100644 test/e2e/manifests/mixins/tls_disabled.yaml.template create mode 100644 test/e2e/manifests/mixins/tls_provided.yaml.template create mode 100644 test/e2e/manifests/mixins/tls_selfsigned.yaml.template create mode 100644 test/e2e/pkg/e2eutils/assertions/.keep create mode 100644 test/e2e/pkg/e2eutils/assertions/assertions.go create mode 100644 test/e2e/pkg/e2eutils/assertions/assertions_test.go create mode 100644 test/e2e/pkg/e2eutils/clusterprobe/clusterprobe.go create mode 100644 test/e2e/pkg/e2eutils/clusterprobe/clusterprobe_test.go create mode 100644 test/e2e/pkg/e2eutils/documentdb/.keep create mode 100644 test/e2e/pkg/e2eutils/documentdb/documentdb.go create mode 100644 test/e2e/pkg/e2eutils/documentdb/documentdb_test.go create mode 100644 test/e2e/pkg/e2eutils/fixtures/.keep create mode 100644 test/e2e/pkg/e2eutils/fixtures/fixtures.go create mode 100644 test/e2e/pkg/e2eutils/fixtures/fixtures_test.go create mode 100644 test/e2e/pkg/e2eutils/fixtures/shared_ro.go create mode 100644 test/e2e/pkg/e2eutils/fixtures/shared_scale.go create mode 100644 test/e2e/pkg/e2eutils/fixtures/teardown_test.go create mode 100644 test/e2e/pkg/e2eutils/helmop/helmop.go create mode 100644 test/e2e/pkg/e2eutils/helmop/helmop_test.go create mode 100644 test/e2e/pkg/e2eutils/mongo/.keep create mode 100644 test/e2e/pkg/e2eutils/mongo/client.go create mode 100644 test/e2e/pkg/e2eutils/mongo/client_test.go create mode 100644 test/e2e/pkg/e2eutils/mongo/connect.go create mode 100644 test/e2e/pkg/e2eutils/namespaces/namespaces.go create mode 100644 test/e2e/pkg/e2eutils/namespaces/namespaces_test.go create mode 100644 test/e2e/pkg/e2eutils/operatorhealth/.keep create mode 100644 test/e2e/pkg/e2eutils/operatorhealth/gate.go create mode 100644 test/e2e/pkg/e2eutils/operatorhealth/gate_test.go create mode 100644 test/e2e/pkg/e2eutils/portforward/.keep create mode 100644 test/e2e/pkg/e2eutils/portforward/portforward.go create mode 100644 test/e2e/pkg/e2eutils/portforward/portforward_test.go create mode 100644 test/e2e/pkg/e2eutils/seed/.keep create mode 100644 test/e2e/pkg/e2eutils/seed/datasets.go create mode 100644 test/e2e/pkg/e2eutils/seed/datasets_test.go create mode 100644 test/e2e/pkg/e2eutils/testenv/.keep create mode 100644 test/e2e/pkg/e2eutils/testenv/env.go create mode 100644 test/e2e/pkg/e2eutils/testenv/env_test.go create mode 100644 test/e2e/pkg/e2eutils/timeouts/.keep create mode 100644 test/e2e/pkg/e2eutils/timeouts/timeouts.go create mode 100644 test/e2e/pkg/e2eutils/timeouts/timeouts_test.go create mode 100644 test/e2e/pkg/e2eutils/tlscerts/tlscerts.go create mode 100644 test/e2e/pkg/e2eutils/tlscerts/tlscerts_test.go create mode 100644 test/e2e/runid.go create mode 100644 test/e2e/runid_test.go create mode 100644 test/e2e/suite.go create mode 100644 test/e2e/suite_test.go create mode 100644 test/e2e/tests/data/aggregation_test.go create mode 100644 test/e2e/tests/data/crud_test.go create mode 100644 test/e2e/tests/data/data_suite_test.go create mode 100644 test/e2e/tests/data/delete_ops_test.go create mode 100644 test/e2e/tests/data/helpers_test.go create mode 100644 test/e2e/tests/data/pipeline_test.go create mode 100644 test/e2e/tests/data/query_test.go create mode 100644 test/e2e/tests/data/sort_limit_skip_test.go create mode 100644 test/e2e/tests/data/update_ops_test.go create mode 100644 test/e2e/tests/exposure/clusterip_test.go create mode 100644 test/e2e/tests/exposure/exposure_suite_test.go create mode 100644 test/e2e/tests/exposure/helpers_test.go create mode 100644 test/e2e/tests/exposure/loadbalancer_test.go create mode 100644 test/e2e/tests/feature_gates/changestreams_test.go create mode 100644 test/e2e/tests/feature_gates/feature_gates_suite_test.go create mode 100644 test/e2e/tests/feature_gates/helpers_test.go create mode 100644 test/e2e/tests/lifecycle/delete_reclaim_test.go create mode 100644 test/e2e/tests/lifecycle/deploy_test.go create mode 100644 test/e2e/tests/lifecycle/helpers_test.go create mode 100644 test/e2e/tests/lifecycle/lifecycle_suite_test.go create mode 100644 test/e2e/tests/lifecycle/update_image_test.go create mode 100644 test/e2e/tests/lifecycle/update_loglevel_test.go create mode 100644 test/e2e/tests/lifecycle/update_storage_test.go create mode 100644 test/e2e/tests/performance/perf_aggregation_test.go create mode 100644 test/e2e/tests/performance/perf_count_range_test.go create mode 100644 test/e2e/tests/performance/perf_delete_drop_test.go create mode 100644 test/e2e/tests/performance/perf_helpers_test.go create mode 100644 test/e2e/tests/performance/perf_insert_test.go create mode 100644 test/e2e/tests/performance/perf_sort_test.go create mode 100644 test/e2e/tests/performance/perf_update_test.go create mode 100644 test/e2e/tests/performance/performance_suite_test.go create mode 100644 test/e2e/tests/scale/scale_down_test.go create mode 100644 test/e2e/tests/scale/scale_suite_test.go create mode 100644 test/e2e/tests/scale/scale_up_test.go create mode 100644 test/e2e/tests/status/connection_string_test.go create mode 100644 test/e2e/tests/status/mount_options_test.go create mode 100644 test/e2e/tests/status/pv_name_test.go create mode 100644 test/e2e/tests/status/status_suite_test.go create mode 100644 test/e2e/tests/tls/helpers_test.go create mode 100644 test/e2e/tests/tls/tls_certmanager_test.go create mode 100644 test/e2e/tests/tls/tls_disabled_test.go create mode 100644 test/e2e/tests/tls/tls_provided_test.go create mode 100644 test/e2e/tests/tls/tls_selfsigned_test.go create mode 100644 test/e2e/tests/tls/tls_suite_test.go create mode 100644 test/e2e/tests/upgrade/helpers_test.go create mode 100644 test/e2e/tests/upgrade/rollback_test.go create mode 100644 test/e2e/tests/upgrade/upgrade_control_plane_test.go create mode 100644 test/e2e/tests/upgrade/upgrade_images_test.go create mode 100644 test/e2e/tests/upgrade/upgrade_suite_test.go diff --git a/.github/actions/setup-port-forwarding/action.yml b/.github/actions/setup-port-forwarding/action.yml deleted file mode 100644 index c587daf6..00000000 --- a/.github/actions/setup-port-forwarding/action.yml +++ /dev/null @@ -1,305 +0,0 @@ -name: 'Setup Port Forwarding' -description: 'Sets up robust port forwarding with retry logic for DocumentDB testing' -inputs: - namespace: - description: 'Kubernetes namespace where the DocumentDB cluster is running' - required: true - cluster-name: - description: 'Name of the DocumentDB cluster' - required: true - port: - description: 'Port to forward' - required: true - architecture: - description: 'Target architecture for logging purposes' - required: true - test-type: - description: 'Type of test (comprehensive or performance)' - required: false - default: 'comprehensive' -runs: - using: 'composite' - steps: - - name: Setup port forwarding with enhanced retries - shell: bash - run: | - echo "Setting up port forwarding for ${{ inputs.test-type }} tests on ${{ inputs.architecture }}..." - - # Function to setup port forwarding with enhanced retries - setup_port_forward() { - local max_attempts=5 - local attempt=1 - local base_sleep=5 - local test_type="${{ inputs.test-type }}" - - # Adjust retry parameters based on test type - if [[ "$test_type" == "performance" ]]; then - max_attempts=4 - base_sleep=3 - fi - - while [ $attempt -le $max_attempts ]; do - echo "Port forwarding attempt $attempt/$max_attempts..." - - # Exponential/Progressive backoff for retry delays - local retry_delay=$((base_sleep * attempt)) - if [ $attempt -gt 1 ]; then - echo "Waiting ${retry_delay}s before retry attempt..." - sleep $retry_delay - fi - - # Get the actual pod name and ensure it's ready - POD_NAME=$(kubectl get pods -n ${{ inputs.namespace }} -l cnpg.io/cluster=${{ inputs.cluster-name }} -o jsonpath='{.items[0].metadata.name}') - if [ -z "$POD_NAME" ]; then - echo "❌ No DocumentDB pod found" - kubectl get pods -n ${{ inputs.namespace }} - ((attempt++)) - continue - fi - echo "Using pod: $POD_NAME" - - # Comprehensive pod readiness check with retries - pod_ready=false - local readiness_checks=3 - if [[ "$test_type" == "performance" ]]; then - readiness_checks=2 - fi - - for ready_check in $(seq 1 $readiness_checks); do - pod_phase=$(kubectl get pod $POD_NAME -n ${{ inputs.namespace }} -o jsonpath='{.status.phase}' 2>/dev/null || echo "Unknown") - echo "Pod phase: $pod_phase (readiness check $ready_check/$readiness_checks)" - - if [[ "$pod_phase" == "Running" ]]; then - # Wait for pod to be ready - local timeout=60 - if [[ "$test_type" == "performance" ]]; then - timeout=45 - fi - - if kubectl wait --for=condition=Ready pod/$POD_NAME -n ${{ inputs.namespace }} --timeout=${timeout}s 2>/dev/null; then - echo "✓ Pod is ready" - pod_ready=true - break - else - echo "❌ Pod readiness check failed, retrying..." - sleep 10 - fi - else - echo "Pod is not running, waiting..." - local timeout=90 - if [[ "$test_type" == "performance" ]]; then - timeout=45 - fi - - if kubectl wait --for=condition=Ready pod/$POD_NAME -n ${{ inputs.namespace }} --timeout=${timeout}s 2>/dev/null; then - echo "✓ Pod became ready" - pod_ready=true - break - else - echo "❌ Pod failed to become ready, retrying..." - local sleep_time=15 - if [[ "$test_type" == "performance" ]]; then - sleep_time=8 - fi - sleep $sleep_time - fi - fi - done - - if [ "$pod_ready" = false ]; then - echo "❌ Pod $POD_NAME is not ready after multiple checks (attempt $attempt)" - if [[ "$test_type" == "comprehensive" ]]; then - kubectl describe pod/$POD_NAME -n ${{ inputs.namespace }} | head -30 - fi - ((attempt++)) - continue - fi - - # Clean up any existing port forwarding with more thorough cleanup - cleanup_port_forward() { - # Kill any existing port forwarding processes - pkill -f "kubectl port-forward.*${{ inputs.port }}" 2>/dev/null || true - - # Determine file prefix based on test type - local file_prefix="/tmp/pf" - if [[ "$test_type" == "performance" ]]; then - file_prefix="/tmp/perf_pf" - fi - - # Clean up PID files and logs - for file in "${file_prefix}_pid" "${file_prefix}_output.log"; do - if [ -f "$file" ]; then - [ -f "${file_prefix}_pid" ] && kill $(cat "${file_prefix}_pid") 2>/dev/null || true - rm -f "$file" - fi - done - - # Wait for port to be released - local cleanup_sleep=3 - if [[ "$test_type" == "performance" ]]; then - cleanup_sleep=2 - fi - sleep $cleanup_sleep - - # Check if port is still in use and force kill if needed - local port_users=$(lsof -ti:${{ inputs.port }} 2>/dev/null || true) - if [ -n "$port_users" ]; then - echo "Force killing processes using port ${{ inputs.port }}: $port_users" - echo "$port_users" | xargs -r kill -9 2>/dev/null || true - local force_sleep=2 - if [[ "$test_type" == "performance" ]]; then - force_sleep=1 - fi - sleep $force_sleep - fi - } - - cleanup_port_forward - - # Start port-forward with enhanced error handling - echo "Starting $test_type port forwarding from pod $POD_NAME..." - local file_prefix="/tmp/pf" - if [[ "$test_type" == "performance" ]]; then - file_prefix="/tmp/perf_pf" - fi - - kubectl port-forward pod/$POD_NAME ${{ inputs.port }}:${{ inputs.port }} -n ${{ inputs.namespace }} > "${file_prefix}_output.log" 2>&1 & - PF_PID=$! - echo $PF_PID > "${file_prefix}_pid" - - # Wait for port-forward to establish - echo "Waiting for $test_type port forwarding to establish..." - local establish_sleep=20 - if [[ "$test_type" == "performance" ]]; then - establish_sleep=15 - fi - sleep $establish_sleep - - # Check if port-forward process is still running - if ! kill -0 $PF_PID 2>/dev/null; then - echo "❌ $test_type port forwarding process died immediately (attempt $attempt)" - if [ -f "${file_prefix}_output.log" ]; then - echo "$test_type port forwarding output:" - cat "${file_prefix}_output.log" - fi - ((attempt++)) - continue - fi - - # Enhanced connection testing with progressive delays - connection_success=false - local connection_tests=8 - local connection_sleep=3 - if [[ "$test_type" == "performance" ]]; then - connection_tests=6 - connection_sleep=2 - fi - - for i in $(seq 1 $connection_tests); do - sleep $connection_sleep - if nc -z 127.0.0.1 ${{ inputs.port }} 2>/dev/null; then - echo "✓ $test_type port forwarding connection test passed (attempt $i/$connection_tests)" - connection_success=true - break - else - echo "❌ $test_type port forwarding connection test failed (attempt $i/$connection_tests)" - # Check if port-forward is still alive - if ! kill -0 $PF_PID 2>/dev/null; then - echo "❌ $test_type port forwarding process died during connection testing" - break - fi - fi - done - - if [ "$connection_success" = false ]; then - echo "❌ All $test_type connection tests failed (attempt $attempt)" - kill $PF_PID 2>/dev/null || true - ((attempt++)) - continue - fi - - # Extended stability check with more thorough validation - echo "Running $test_type stability check..." - stable=true - local stability_checks=5 - local stability_sleep=8 - if [[ "$test_type" == "performance" ]]; then - stability_checks=3 - stability_sleep=6 - fi - - for check in $(seq 1 $stability_checks); do - sleep $stability_sleep - - # Check if process is still alive - if ! kill -0 $PF_PID 2>/dev/null; then - echo "❌ $test_type port forwarding process died during stability check $check/$stability_checks (attempt $attempt)" - stable=false - break - fi - - # Check connection stability - if ! nc -z 127.0.0.1 ${{ inputs.port }} 2>/dev/null; then - echo "❌ $test_type connection lost during stability check $check/$stability_checks (attempt $attempt)" - stable=false - break - fi - - # Additional validation: try to establish a brief connection - local tcp_timeout=5 - if [[ "$test_type" == "performance" ]]; then - tcp_timeout=3 - fi - - if timeout $tcp_timeout bash -c "/dev/null; then - echo "✓ $test_type stability check $check/$stability_checks passed (TCP connection verified)" - else - echo "❌ $test_type TCP connection verification failed during stability check $check/$stability_checks (attempt $attempt)" - stable=false - break - fi - done - - if [ "$stable" = true ]; then - echo "✓ $test_type port forwarding established and stable on ${{ inputs.architecture }} (attempt $attempt)" - if [[ "$test_type" == "comprehensive" ]]; then - echo "✓ Final validation: Port forwarding is ready for use" - fi - return 0 - else - echo "❌ $test_type stability check failed (attempt $attempt)" - if [ -f "${file_prefix}_output.log" ]; then - echo "$test_type port forwarding output:" - local tail_lines=30 - if [[ "$test_type" == "performance" ]]; then - tail_lines=20 - fi - tail -$tail_lines "${file_prefix}_output.log" - fi - kill $PF_PID 2>/dev/null || true - cleanup_port_forward - ((attempt++)) - fi - done - - echo "❌ Failed to establish stable $test_type port forwarding after $max_attempts attempts" - return 1 - } - - # Call the function with enhanced error handling - if ! setup_port_forward; then - echo "=== Final ${{ inputs.test-type }} diagnostics ===" - kubectl get pods -n ${{ inputs.namespace }} -o wide - kubectl describe pods -n ${{ inputs.namespace }} - kubectl get events -n ${{ inputs.namespace }} --sort-by='.lastTimestamp' | tail -15 - kubectl logs -n ${{ inputs.namespace }} -l cnpg.io/cluster=${{ inputs.cluster-name }} --tail=50 - - # Check for any system-level issues - echo "=== ${{ inputs.test-type }} system diagnostics ===" - lsof -i:${{ inputs.port }} || echo "No processes using port ${{ inputs.port }}" - if [[ "${{ inputs.test-type }}" == "comprehensive" ]]; then - netstat -tuln | grep ${{ inputs.port }} || echo "Port ${{ inputs.port }} not in use" - fi - - exit 1 - fi diff --git a/.github/workflows/release_documentdb_images.yml b/.github/workflows/release_documentdb_images.yml index b25d48c2..cf3747e2 100644 --- a/.github/workflows/release_documentdb_images.yml +++ b/.github/workflows/release_documentdb_images.yml @@ -110,31 +110,28 @@ jobs: sed -i "s|documentDbVersion: \"${OLD_VERSION}\"|documentDbVersion: \"${NEW_VERSION}\"|" \ operator/documentdb-helm-chart/values.yaml - # 4. Update test workflow fallback images - sed -i "s|documentdb:${OLD_VERSION}|documentdb:${NEW_VERSION}|g" \ - .github/workflows/test-backup-and-restore.yml - sed -i "s|gateway:${OLD_VERSION}|gateway:${NEW_VERSION}|g" \ - .github/workflows/test-backup-and-restore.yml - - # 5. Update the released database baseline used by upgrade tests - sed -i "s|RELEASED_DATABASE_VERSION: ${OLD_VERSION}|RELEASED_DATABASE_VERSION: ${NEW_VERSION}|" \ - .github/workflows/test-upgrade-and-rollback.yml - - # 6. Update sidecar plugin config test (hardcoded expected gateway image) + # 4. (Removed) Test workflow fallback images — the legacy + # test-backup-and-restore.yml and test-upgrade-and-rollback.yml + # workflows have been consolidated into test-e2e.yml. Database + # image versions for e2e are resolved from the operator's + # built-in defaults (constants.go) rather than per-workflow + # fallback tags, so no sed is required here. + + # 5. Update sidecar plugin config test (hardcoded expected gateway image) sed -i "s|:${OLD_VERSION}\"|:${NEW_VERSION}\"|g" \ operator/cnpg-plugins/sidecar-injector/internal/config/config_test.go - # 7. Update build workflow defaults + # 6. Update build workflow defaults sed -i "s|DEFAULT_DOCUMENTDB_VERSION: '${OLD_VERSION}'|DEFAULT_DOCUMENTDB_VERSION: '${NEW_VERSION}'|" \ .github/workflows/build_documentdb_images.yml sed -i "s|default: '${OLD_VERSION}'|default: '${NEW_VERSION}'|g" \ .github/workflows/build_documentdb_images.yml - # 8. Update release workflow default version + # 7. Update release workflow default version sed -i "s|default: '${OLD_VERSION}'|default: '${NEW_VERSION}'|g" \ .github/workflows/release_documentdb_images.yml - # 9. Update gateway Dockerfile default source image ARG + # 8. Update gateway Dockerfile default source image ARG sed -i "s|pg17-${OLD_VERSION}|pg17-${NEW_VERSION}|" \ .github/dockerfiles/Dockerfile_gateway_public_image @@ -159,7 +156,6 @@ jobs: - Updated `DEFAULT_DOCUMENTDB_IMAGE` and `DEFAULT_GATEWAY_IMAGE` in `constants.go` - Updated sidecar plugin default gateway image in `config.go` and `config_test.go` - Updated `documentDbVersion` in Helm chart `values.yaml` - - Updated fallback images in `test-backup-and-restore.yml` - Updated build/release workflow defaults in `build_documentdb_images.yml` and `release_documentdb_images.yml` - Updated gateway Dockerfile default source image in `Dockerfile_gateway_public_image` diff --git a/.github/workflows/release_images.yml b/.github/workflows/release_images.yml index 8ca23db9..e1e603c4 100644 --- a/.github/workflows/release_images.yml +++ b/.github/workflows/release_images.yml @@ -25,11 +25,6 @@ on: source_ref: description: 'Git ref to package the Helm chart from (tag or commit recommended to avoid drift)' required: true - run_tests: - description: 'Run tests before releasing' - required: false - default: true - type: boolean permissions: contents: read @@ -38,36 +33,16 @@ permissions: id-token: write jobs: - # Optional test jobs - run both E2E and integration tests in parallel if enabled - test-e2e: - name: E2E Test Images Before Release - if: ${{ inputs.run_tests == true }} - uses: ./.github/workflows/test-E2E.yml - with: - image_tag: ${{ inputs.candidate_version }} - secrets: inherit - - test-integration: - name: Integration Test Images Before Release - if: ${{ inputs.run_tests == true }} - uses: ./.github/workflows/test-integration.yml - with: - image_tag: ${{ inputs.candidate_version }} - secrets: inherit - - test-backup-and-restore: - name: Test Backup and Restore - if: ${{ inputs.run_tests == true }} - uses: ./.github/workflows/test-backup-and-restore.yml - with: - image_tag: ${{ inputs.candidate_version }} - secrets: inherit - + # NOTE: Pre-release E2E/integration/backup gates were removed when the + # legacy test-E2E.yml / test-integration.yml / test-backup-and-restore.yml + # workflows were consolidated into the unified test-e2e.yml workflow + # (see docs/designs/e2e-test-suite.md). That workflow is triggered + # on pull_request to main and guards merges to the source branch. + # This release workflow now assumes the candidate artifact has + # already passed PR-level CI. copy-and-push-manifest: name: Release Images runs-on: ubuntu-latest - needs: [test-e2e, test-integration, test-backup-and-restore] - if: ${{ always() && (needs.test-e2e.result == 'success' || needs.test-e2e.result == 'skipped') && (needs.test-integration.result == 'success' || needs.test-integration.result == 'skipped') && (needs.test-backup-and-restore.result == 'success' || needs.test-backup-and-restore.result == 'skipped') }} strategy: matrix: image: [operator, sidecar, documentdb, gateway] diff --git a/.github/workflows/release_operator.yml b/.github/workflows/release_operator.yml index 013b0b8b..5fbbbe88 100644 --- a/.github/workflows/release_operator.yml +++ b/.github/workflows/release_operator.yml @@ -18,11 +18,6 @@ on: source_ref: description: 'Git ref to package the Helm chart from (tag or commit recommended to avoid drift)' required: true - run_tests: - description: 'Run tests before releasing' - required: false - default: true - type: boolean permissions: contents: read @@ -32,31 +27,13 @@ permissions: jobs: # --------------------------------------------------------------------------- - # Optional test gate — run E2E, integration, and backup tests in parallel - # --------------------------------------------------------------------------- - test-e2e: - name: E2E Test Images Before Release - if: ${{ inputs.run_tests == true }} - uses: ./.github/workflows/test-E2E.yml - with: - image_tag: ${{ inputs.candidate_version }} - secrets: inherit - - test-integration: - name: Integration Test Images Before Release - if: ${{ inputs.run_tests == true }} - uses: ./.github/workflows/test-integration.yml - with: - image_tag: ${{ inputs.candidate_version }} - secrets: inherit - - test-backup-and-restore: - name: Test Backup and Restore - if: ${{ inputs.run_tests == true }} - uses: ./.github/workflows/test-backup-and-restore.yml - with: - image_tag: ${{ inputs.candidate_version }} - secrets: inherit + # NOTE: Pre-release E2E/integration/backup gates were removed when the + # legacy test-E2E.yml / test-integration.yml / test-backup-and-restore.yml + # workflows were consolidated into the unified test-e2e.yml workflow + # (see docs/designs/e2e-test-suite.md). That workflow is triggered + # on pull_request to main and guards merges to the source branch. + # This release workflow now assumes the candidate artifact has + # already passed PR-level CI. # --------------------------------------------------------------------------- # Promote operator and sidecar images (retag candidate → release) @@ -64,8 +41,6 @@ jobs: promote-operator-images: name: Promote ${{ matrix.image }} runs-on: ubuntu-latest - needs: [test-e2e, test-integration, test-backup-and-restore] - if: ${{ always() && (needs.test-e2e.result == 'success' || needs.test-e2e.result == 'skipped') && (needs.test-integration.result == 'success' || needs.test-integration.result == 'skipped') && (needs.test-backup-and-restore.result == 'success' || needs.test-backup-and-restore.result == 'skipped') }} strategy: matrix: # NOTE: wal-replica excluded until its Dockerfile is created (feature-flagged, disabled by default). diff --git a/.github/workflows/test-E2E.yml b/.github/workflows/test-E2E.yml deleted file mode 100644 index d18985bf..00000000 --- a/.github/workflows/test-E2E.yml +++ /dev/null @@ -1,493 +0,0 @@ -name: TEST - E2E Test with mongosh - -on: - push: - branches: [ main, develop ] - pull_request: - branches: [ main, develop ] - schedule: - # Run daily at 2 AM UTC - - cron: '0 2 * * *' - workflow_dispatch: - inputs: - node_count: - description: 'Number of DocumentDB nodes' - required: false - default: '1' - test_level: - description: 'Test level to run' - required: false - default: 'full' - type: choice - options: - - quick - - integration - - full - image_tag: - description: 'Optional: Use existing image tag instead of building locally' - required: false - type: string - workflow_call: - inputs: - image_tag: - description: 'Optional: Use existing image tag instead of building locally' - required: false - type: string - node_count: - description: 'Number of DocumentDB nodes' - required: false - default: '1' - type: string - test_level: - description: 'Test level to run' - required: false - default: 'full' - type: string - -permissions: - contents: read - actions: read - packages: read - -env: - CERT_MANAGER_NS: cert-manager - OPERATOR_NS: documentdb-operator - DB_NS: documentdb-e2e-test - DB_NAME: documentdb-e2e - DB_USERNAME: k8s_secret_user - DB_PASSWORD: K8sSecret100 - DB_PORT: 10260 - -jobs: - # Conditional build workflow - only run if image_tag is not provided or on pull_request - build: - name: Build Images and Charts - if: ${{ (inputs.image_tag == '' || inputs.image_tag == null) || github.event_name == 'pull_request' }} - uses: ./.github/workflows/test-build-and-package.yml - with: - version: '0.2.0' - secrets: inherit - - e2e-test: - name: E2E (${{ matrix.architecture }}, K8s ${{ matrix.kubernetes_version }}) - runs-on: ${{ matrix.runner }} - timeout-minutes: 60 - needs: build - if: always() && (needs.build.result == 'success' || needs.build.result == 'skipped') - - strategy: - matrix: - include: - # ImageVolume mode (K8s >= 1.35) - uses separate PostgreSQL + extension images - - architecture: amd64 - runner: ubuntu-22.04 - test_scenario_name: "single-node" - node_count: 1 - instances_per_node: 1 - kubernetes_version: "v1.35.0" - - architecture: arm64 - runner: ubuntu-22.04-arm - test_scenario_name: "single-node" - node_count: 1 - instances_per_node: 1 - kubernetes_version: "v1.35.0" - - env: - # Use built image tag on PR or when no external tag provided - IMAGE_TAG: ${{ (github.event_name == 'pull_request' || inputs.image_tag == '' || inputs.image_tag == null) && needs.build.outputs.image_tag || inputs.image_tag }} - EXT_IMAGE_TAG: ${{ needs.build.outputs.ext_image_tag || '' }} - CHART_VERSION: ${{ needs.build.outputs.chart_version || '0.1.0' }} - - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Download artifacts - if: ${{ (inputs.image_tag == '' || inputs.image_tag == null) || github.event_name == 'pull_request' }} - uses: actions/download-artifact@v4 - with: - pattern: 'build-*' - path: ./artifacts - - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.11' - - - name: Log test configuration - run: | - echo "## E2E Test Configuration" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - if [[ -n "${{ inputs.image_tag }}" ]]; then - echo "- **Mode**: Using provided image tag" >> $GITHUB_STEP_SUMMARY - echo "- **Image Tag**: \`${{ inputs.image_tag }}\`" >> $GITHUB_STEP_SUMMARY - echo "- **Source**: External (no local build)" >> $GITHUB_STEP_SUMMARY - else - echo "- **Mode**: Using locally built images" >> $GITHUB_STEP_SUMMARY - echo "- **Image Tag**: \`${{ env.IMAGE_TAG }}\`" >> $GITHUB_STEP_SUMMARY - echo "- **Source**: Local build pipeline" >> $GITHUB_STEP_SUMMARY - fi - echo "- **Architecture**: \`${{ matrix.architecture }}\`" >> $GITHUB_STEP_SUMMARY - echo "- **Kubernetes Version**: \`${{ matrix.kubernetes_version }}\`" >> $GITHUB_STEP_SUMMARY - - - name: Setup test environment - uses: ./.github/actions/setup-test-environment - with: - test-type: 'e2e' - architecture: ${{ matrix.architecture }} - runner: ${{ matrix.runner }} - test-scenario-name: ${{ matrix.test_scenario_name }} - node-count: '${{ matrix.node_count }}' - instances-per-node: '${{ matrix.instances_per_node }}' - cert-manager-namespace: ${{ env.CERT_MANAGER_NS }} - operator-namespace: ${{ env.OPERATOR_NS }} - db-namespace: ${{ env.DB_NS }} - db-cluster-name: ${{ env.DB_NAME }} - db-username: ${{ env.DB_USERNAME }} - db-password: ${{ env.DB_PASSWORD }} - db-port: ${{ env.DB_PORT }} - image-tag: ${{ env.IMAGE_TAG }} - documentdb-image-tag: ${{ env.EXT_IMAGE_TAG }} - chart-version: ${{ env.CHART_VERSION }} - use-external-images: ${{ github.event_name != 'pull_request' && inputs.image_tag != '' && inputs.image_tag != null }} - github-token: ${{ secrets.GITHUB_TOKEN }} - repository-owner: ${{ github.repository_owner }} - kubernetes-version: ${{ matrix.kubernetes_version }} - - - name: Setup port forwarding for comprehensive tests - uses: ./.github/actions/setup-port-forwarding - with: - namespace: ${{ env.DB_NS }} - cluster-name: ${{ env.DB_NAME }} - port: ${{ env.DB_PORT }} - architecture: ${{ matrix.architecture }} - test-type: 'comprehensive' - - - name: Execute comprehensive mongosh tests - run: | - echo "Running comprehensive mongosh validation tests on ${{ matrix.architecture }}..." - - # Run comprehensive tests with validation using external script - if mongosh 127.0.0.1:$DB_PORT \ - -u $DB_USERNAME \ - -p $DB_PASSWORD \ - --authenticationMechanism SCRAM-SHA-256 \ - --tls \ - --tlsAllowInvalidCertificates \ - --file operator/src/scripts/test-scripts/comprehensive_mongosh_tests.js; then - echo "✓ Comprehensive mongosh tests completed successfully on ${{ matrix.architecture }}" - else - echo "❌ Comprehensive mongosh tests failed on ${{ matrix.architecture }}" - exit 1 - fi - - - name: Cleanup comprehensive test port forwarding - if: always() - run: | - # Stop port-forward if it exists - if [ -f /tmp/pf_pid ]; then - PF_PID=$(cat /tmp/pf_pid) - kill $PF_PID 2>/dev/null || true - rm -f /tmp/pf_pid - fi - - # Clean up output log - rm -f /tmp/pf_output.log - - # Clean up output log - rm -f /tmp/pf_output.log - - - name: Setup port forwarding for performance tests - uses: ./.github/actions/setup-port-forwarding - with: - namespace: ${{ env.DB_NS }} - cluster-name: ${{ env.DB_NAME }} - port: ${{ env.DB_PORT }} - architecture: ${{ matrix.architecture }} - test-type: 'performance' - - - name: Execute performance tests - run: | - echo "Running performance validation tests on ${{ matrix.architecture }}..." - - # Run performance tests using external script - if mongosh 127.0.0.1:$DB_PORT \ - -u $DB_USERNAME \ - -p $DB_PASSWORD \ - --authenticationMechanism SCRAM-SHA-256 \ - --tls \ - --tlsAllowInvalidCertificates \ - --file operator/src/scripts/test-scripts/performance_test.js; then - echo "✓ Performance tests completed successfully on ${{ matrix.architecture }}" - else - echo "❌ Performance tests failed on ${{ matrix.architecture }}" - exit 1 - fi - - - name: Cleanup performance testing - if: always() - run: | - # Stop performance test port-forward - if [ -f /tmp/perf_pf_pid ]; then - PF_PID=$(cat /tmp/perf_pf_pid) - kill $PF_PID 2>/dev/null || true - rm -f /tmp/perf_pf_pid - fi - - # Clean up output log - rm -f /tmp/perf_pf_output.log - - # Clean up output log - rm -f /tmp/perf_pf_output.log - - - name: Verify DocumentDB Status and Connection String - run: | - echo "Verifying DocumentDB status fields on ${{ matrix.architecture }}..." - - # Get the DocumentDB resource status - DB_STATUS=$(kubectl get documentdb $DB_NAME -n $DB_NS -o jsonpath='{.status.status}') - CONNECTION_STRING=$(kubectl get documentdb $DB_NAME -n $DB_NS -o jsonpath='{.status.connectionString}') - - echo "DocumentDB Status: $DB_STATUS" - echo "Connection String: ${CONNECTION_STRING:0:50}..." # Print first 50 chars only - - # Verify status is "Cluster in healthy state" - if [[ "$DB_STATUS" == "Cluster in healthy state" ]]; then - echo "✓ DocumentDB status is healthy" - else - echo "❌ DocumentDB status is not healthy: $DB_STATUS" - kubectl get documentdb $DB_NAME -n $DB_NS -o yaml - exit 1 - fi - - # Verify connection string is not empty - if [[ -n "$CONNECTION_STRING" ]]; then - echo "✓ Connection string is populated" - else - echo "❌ Connection string is empty" - kubectl get documentdb $DB_NAME -n $DB_NS -o yaml - exit 1 - fi - - echo "✅ DocumentDB status validation passed" - - - name: Test OTel monitoring sidecar enable/disable - run: | - echo "Testing OTel Collector sidecar injection on ${{ matrix.architecture }}..." - chmod +x operator/src/scripts/test-scripts/test-otel-monitoring.sh - operator/src/scripts/test-scripts/test-otel-monitoring.sh - - - name: Test cluster health and monitoring - run: | - echo "Testing cluster health and monitoring on ${{ matrix.architecture }}..." - - # Check DocumentDB resource status - kubectl get documentdb $DB_NAME -n $DB_NS -o yaml - - # Check pod resources and health - kubectl top pods -n $DB_NS --containers || echo "Metrics server not available" - - # Check logs for any errors - kubectl logs -n $DB_NS -l cnpg.io/cluster=$DB_NAME --tail=50 - - # Check events - kubectl get events -n $DB_NS --sort-by='.lastTimestamp' - - - name: Verify mount options are set by PV controller - run: | - echo "Verifying PV mount options are set by the PV controller..." - - # Find PV directly using documentdb.io labels set by the PV controller - pv_name=$(kubectl get pv -l documentdb.io/cluster=${{ env.DB_NAME }},documentdb.io/namespace=${{ env.DB_NS }} -o jsonpath='{.items[0].metadata.name}') - echo "PV name: $pv_name" - - if [ -z "$pv_name" ]; then - echo "❌ Failed to find PV with documentdb.io/cluster=${{ env.DB_NAME }} and documentdb.io/namespace=${{ env.DB_NS }}" - exit 1 - fi - - # Get mount options from PV - mount_options=$(kubectl get pv $pv_name -o jsonpath='{.spec.mountOptions}') - echo "PV mount options: $mount_options" - - # Check for security mount options (nodev, nosuid, noexec) - if echo "$mount_options" | grep -q "nodev" && \ - echo "$mount_options" | grep -q "nosuid" && \ - echo "$mount_options" | grep -q "noexec"; then - echo "✓ PV mount options (nodev, nosuid, noexec) are set correctly" - else - echo "❌ PV mount options are missing. Expected nodev, nosuid, noexec" - exit 1 - fi - - - name: Test PV reclaim policy default and explicit Delete - shell: bash - run: | - echo "Testing PV reclaim policy - default (Retain) and explicit Delete..." - - # Test 1: Verify default policy is Retain on the existing cluster - echo "=== Test 1: Verify default PV reclaim policy is Retain ===" - - # Find PV directly using documentdb.io labels set by the PV controller - pv_name=$(kubectl get pv -l documentdb.io/cluster=${{ env.DB_NAME }},documentdb.io/namespace=${{ env.DB_NS }} -o jsonpath='{.items[0].metadata.name}') - echo "PV name: $pv_name" - - if [ -z "$pv_name" ]; then - echo "❌ Failed to find PV with documentdb.io/cluster=${{ env.DB_NAME }} and documentdb.io/namespace=${{ env.DB_NS }}" - exit 1 - fi - - # Verify default PV reclaim policy is Retain - current_policy=$(kubectl get pv $pv_name -o jsonpath='{.spec.persistentVolumeReclaimPolicy}') - echo "Current PV reclaim policy: $current_policy" - - if [ "$current_policy" != "Retain" ]; then - echo "❌ Expected default PV reclaim policy to be 'Retain', but got '$current_policy'" - exit 1 - fi - echo "✓ Default PV reclaim policy is correctly set to Retain" - - # Test 2: Change policy to Delete and verify PV is deleted with cluster - echo "" - echo "=== Test 2: Change policy to Delete and verify PV cleanup ===" - - # Patch the existing DocumentDB to set persistentVolumeReclaimPolicy to Delete - echo "Patching DocumentDB to set persistentVolumeReclaimPolicy to Delete..." - kubectl -n ${{ env.DB_NS }} patch documentdb ${{ env.DB_NAME }} --type=merge \ - -p '{"spec":{"resource":{"storage":{"persistentVolumeReclaimPolicy":"Delete"}}}}' - - # Wait for PV controller to update the PV reclaim policy - echo "Waiting for PV reclaim policy to be updated to Delete..." - MAX_RETRIES=30 - SLEEP_INTERVAL=5 - ITER=0 - while [ $ITER -lt $MAX_RETRIES ]; do - new_policy=$(kubectl get pv $pv_name -o jsonpath='{.spec.persistentVolumeReclaimPolicy}') - if [ "$new_policy" == "Delete" ]; then - echo "✓ PV reclaim policy updated to Delete" - break - else - echo "PV reclaim policy is still '$new_policy'. Waiting..." - sleep $SLEEP_INTERVAL - fi - ((++ITER)) - done - - if [ "$new_policy" != "Delete" ]; then - echo "❌ PV reclaim policy was not updated to Delete within expected time" - exit 1 - fi - - # Delete the DocumentDB cluster - echo "Deleting DocumentDB cluster to test PV cleanup with Delete policy..." - kubectl -n ${{ env.DB_NS }} delete documentdb ${{ env.DB_NAME }} --wait=false - - # Wait for DocumentDB to be deleted - echo "Waiting for DocumentDB to be deleted..." - MAX_RETRIES=30 - SLEEP_INTERVAL=10 - ITER=0 - while [ $ITER -lt $MAX_RETRIES ]; do - db_exists=$(kubectl -n ${{ env.DB_NS }} get documentdb ${{ env.DB_NAME }} --ignore-not-found) - if [ -z "$db_exists" ]; then - echo "✓ DocumentDB deleted successfully." - break - else - echo "DocumentDB still exists. Waiting..." - sleep $SLEEP_INTERVAL - fi - ((++ITER)) - done - - # Verify no PVsRetained warning event was emitted (since policy is Delete) - events=$(kubectl -n ${{ env.DB_NS }} get events --field-selector reason=PVsRetained,involvedObject.name=${{ env.DB_NAME }} --ignore-not-found -o jsonpath='{.items}') - if [ -z "$events" ] || [ "$events" == "[]" ]; then - echo "✓ No PVsRetained warning event emitted (expected for Delete policy)" - else - echo "⚠️ Unexpected PVsRetained event found for Delete policy cluster" - fi - - # Wait a bit for PV to be deleted (the storage class handles actual deletion) - echo "Waiting for PV to be deleted..." - sleep 30 - - # Verify PV was deleted (because reclaim policy is Delete) - pv_exists=$(kubectl get pv $pv_name --ignore-not-found 2>/dev/null) - if [ -z "$pv_exists" ]; then - echo "✓ PV $pv_name was deleted as expected (Delete policy)" - else - pv_status=$(kubectl get pv $pv_name -o jsonpath='{.status.phase}') - echo "⚠️ PV $pv_name still exists with status: $pv_status" - echo "Note: PV deletion depends on the storage provisioner. The reclaim policy was correctly set to Delete." - fi - - echo "" - echo "✓ PV reclaim policy test completed successfully" - - - name: Collect comprehensive logs on failure - if: failure() - uses: ./.github/actions/collect-logs - with: - architecture: ${{ matrix.architecture }} - operator-namespace: ${{ env.OPERATOR_NS }} - db-namespace: ${{ env.DB_NS }} - db-name: ${{ env.DB_NAME }} - - - name: Test completion summary - if: always() - run: | - echo "## E2E Test Summary for ${{ matrix.architecture }} (K8s ${{ matrix.kubernetes_version }})" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "- **Architecture**: ${{ matrix.architecture }}" >> $GITHUB_STEP_SUMMARY - echo "- **Runner**: ${{ matrix.runner }}" >> $GITHUB_STEP_SUMMARY - echo "- **Kubernetes Version**: ${{ matrix.kubernetes_version }}" >> $GITHUB_STEP_SUMMARY - echo "- **Test Scenario**: ${{ matrix.test_scenario_name }}" >> $GITHUB_STEP_SUMMARY - echo "- **Node Count**: ${{ matrix.node_count }}" >> $GITHUB_STEP_SUMMARY - echo "- **Image Tag**: ${{ env.IMAGE_TAG }}" >> $GITHUB_STEP_SUMMARY - echo "- **Chart Version**: ${{ env.CHART_VERSION }}" >> $GITHUB_STEP_SUMMARY - if [[ -n "${{ inputs.image_tag }}" ]]; then - echo "- **Using External Images**: true" >> $GITHUB_STEP_SUMMARY - else - echo "- **Using External Images**: false" >> $GITHUB_STEP_SUMMARY - fi - - if [[ "${{ job.status }}" == "success" ]]; then - echo "- **Status**: ✅ PASSED" >> $GITHUB_STEP_SUMMARY - else - echo "- **Status**: ❌ FAILED" >> $GITHUB_STEP_SUMMARY - fi - - test-summary: - name: E2E Test Summary - runs-on: ubuntu-latest - if: always() - needs: [build, e2e-test] - steps: - - name: Generate overall test summary - run: | - echo "## E2E Test Results Summary" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "### Test Configuration:" >> $GITHUB_STEP_SUMMARY - echo "- **Build Step**: ${{ inputs.image_tag && 'Skipped (using external images)' || 'Executed' }}" >> $GITHUB_STEP_SUMMARY - echo "- **External Images**: ${{ inputs.image_tag && 'true' || 'false' }}" >> $GITHUB_STEP_SUMMARY - echo "- **Image Tag**: ${{ inputs.image_tag || 'Built from source' }}" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "### Parallel Architecture Testing:" >> $GITHUB_STEP_SUMMARY - echo "- **AMD64**: Tested in parallel on ubuntu-latest" >> $GITHUB_STEP_SUMMARY - echo "- **ARM64**: Tested in parallel on ubuntu-22.04-arm" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "Both architectures run simultaneously for faster feedback!" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "### Job Results:" >> $GITHUB_STEP_SUMMARY - echo "- **Build**: ${{ needs.build.result }}" >> $GITHUB_STEP_SUMMARY - echo "- **E2E Tests**: ${{ needs.e2e-test.result }}" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - - # Overall status - if [[ "${{ needs.e2e-test.result }}" == "success" ]]; then - echo "### Overall Status: ✅ ALL TESTS PASSED" >> $GITHUB_STEP_SUMMARY - echo "Both AMD64 and ARM64 architectures tested successfully in parallel!" >> $GITHUB_STEP_SUMMARY - else - echo "### Overall Status: ❌ SOME TESTS FAILED" >> $GITHUB_STEP_SUMMARY - echo "Check individual job results above for details." >> $GITHUB_STEP_SUMMARY - fi diff --git a/.github/workflows/test-backup-and-restore.yml b/.github/workflows/test-backup-and-restore.yml deleted file mode 100644 index e1dfb899..00000000 --- a/.github/workflows/test-backup-and-restore.yml +++ /dev/null @@ -1,580 +0,0 @@ -name: Test - Backup and Restore - -on: - push: - branches: [ main, develop ] - pull_request: - branches: [ main, develop ] - schedule: - - cron: '0 2 * * *' - workflow_dispatch: - inputs: - node_count: - description: 'Number of DocumentDB nodes' - required: false - default: '1' - image_tag: - description: 'Optional: Use existing image tag instead of building locally' - required: false - type: string - workflow_call: - inputs: - image_tag: - description: 'Optional: Use existing image tag instead of building locally' - required: false - type: string - node_count: - description: 'Number of DocumentDB nodes' - required: false - default: '1' - type: string - -permissions: - contents: read - actions: read - packages: read - -env: - CERT_MANAGER_NS: cert-manager - OPERATOR_NS: documentdb-operator - DB_NS: documentdb-backup-and-restore-test - DB_NAME: documentdb-backup-and-restore - DB_RESTORE_NAME: documentdb-restore-from-backup - DB_USERNAME: k8s_secret_user - DB_PASSWORD: K8sSecret100 - DB_PORT: 10260 - DOCUMENTDB_IMAGE: "" - GATEWAY_IMAGE: "" - -jobs: - # Conditional build workflow - only run if image_tag is not provided or on pull_request - build: - name: Build Images and Charts - if: ${{ (inputs.image_tag == '' || inputs.image_tag == null) || github.event_name == 'pull_request' }} - uses: ./.github/workflows/test-build-and-package.yml - with: - version: '0.2.0' - secrets: inherit - - backup-and-restore-test: - name: Run Backup and Restore Tests - runs-on: ${{ matrix.runner }} - timeout-minutes: 60 - needs: build - if: always() && (needs.build.result == 'success' || needs.build.result == 'skipped') - - strategy: - matrix: - include: - - architecture: amd64 - runner: ubuntu-22.04 - test_scenario_name: "single-node" - node_count: 1 - instances_per_node: 1 - - architecture: arm64 - runner: ubuntu-22.04-arm - test_scenario_name: "single-node" - node_count: 1 - instances_per_node: 1 - env: - # Use built image tag on PR or when no external tag provided - IMAGE_TAG: ${{ (github.event_name == 'pull_request' || inputs.image_tag == '' || inputs.image_tag == null) && needs.build.outputs.image_tag || inputs.image_tag }} - EXT_IMAGE_TAG: ${{ needs.build.outputs.ext_image_tag || '' }} - CHART_VERSION: ${{ needs.build.outputs.chart_version || '0.1.0' }} - - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Download artifacts - if: ${{ (inputs.image_tag == '' || inputs.image_tag == null) || github.event_name == 'pull_request' }} - uses: actions/download-artifact@v4 - with: - pattern: 'build-*' - path: ./artifacts - - - name: Log test configuration - run: | - echo "## Backup and Restore Test Configuration" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - if [[ -n "${{ inputs.image_tag }}" ]]; then - echo "- **Mode**: Using provided image tag" >> $GITHUB_STEP_SUMMARY - echo "- **Image Tag**: \`${{ inputs.image_tag }}\`" >> $GITHUB_STEP_SUMMARY - echo "- **Source**: External (no local build)" >> $GITHUB_STEP_SUMMARY - else - echo "- **Mode**: Using locally built images" >> $GITHUB_STEP_SUMMARY - echo "- **Image Tag**: \`${{ env.IMAGE_TAG }}\`" >> $GITHUB_STEP_SUMMARY - echo "- **Source**: Local build pipeline" >> $GITHUB_STEP_SUMMARY - fi - echo "- **Architecture**: \`${{ matrix.architecture }}\`" >> $GITHUB_STEP_SUMMARY - - - name: Determine DocumentDB and Gateway image references - run: | - # For local builds, use the locally built images; for external, read documentDbVersion from values.yaml. - # Database images use a separate version track from operator images. - if [[ "${{ github.event_name }}" == "pull_request" || -z "${{ inputs.image_tag }}" ]]; then - DOCDB_IMAGE="ghcr.io/${{ github.repository_owner }}/documentdb-kubernetes-operator/documentdb:${{ env.EXT_IMAGE_TAG }}-${{ matrix.architecture }}" - GW_IMAGE="ghcr.io/${{ github.repository_owner }}/documentdb-kubernetes-operator/gateway:${{ env.EXT_IMAGE_TAG }}-${{ matrix.architecture }}" - else - DOCDB_TAG=$(grep 'documentDbVersion:' operator/documentdb-helm-chart/values.yaml | sed 's/.*"\(.*\)".*/\1/') - echo "Using documentDbVersion from values.yaml: $DOCDB_TAG" - DOCDB_IMAGE="ghcr.io/${{ github.repository_owner }}/documentdb-kubernetes-operator/documentdb:${DOCDB_TAG}" - GW_IMAGE="ghcr.io/${{ github.repository_owner }}/documentdb-kubernetes-operator/gateway:${DOCDB_TAG}" - fi - echo "DOCUMENTDB_IMAGE_REF=$DOCDB_IMAGE" >> $GITHUB_ENV - echo "GATEWAY_IMAGE_REF=$GW_IMAGE" >> $GITHUB_ENV - echo "DocumentDB image: $DOCDB_IMAGE" - echo "Gateway image: $GW_IMAGE" - - - name: Setup test environment - uses: ./.github/actions/setup-test-environment - with: - architecture: ${{ matrix.architecture }} - runner: ${{ matrix.runner }} - test-scenario-name: ${{ matrix.test_scenario_name }} - node-count: '${{ matrix.node_count }}' - instances-per-node: '${{ matrix.instances_per_node }}' - cert-manager-namespace: ${{ env.CERT_MANAGER_NS }} - operator-namespace: ${{ env.OPERATOR_NS }} - db-namespace: ${{ env.DB_NS }} - db-cluster-name: ${{ env.DB_NAME }} - db-username: ${{ env.DB_USERNAME }} - db-password: ${{ env.DB_PASSWORD }} - db-port: ${{ env.DB_PORT }} - image-tag: ${{ env.IMAGE_TAG }} - documentdb-image-tag: ${{ env.EXT_IMAGE_TAG }} - chart-version: ${{ env.CHART_VERSION }} - documentdb-image: ${{ env.DOCUMENTDB_IMAGE_REF }} - gateway-image: ${{ env.GATEWAY_IMAGE_REF }} - use-external-images: ${{ github.event_name != 'pull_request' && inputs.image_tag != '' && inputs.image_tag != null }} - github-token: ${{ secrets.GITHUB_TOKEN }} - repository-owner: ${{ github.repository_owner }} - - name: Setup port forwarding for comprehensive tests - uses: ./.github/actions/setup-port-forwarding - with: - namespace: ${{ env.DB_NS }} - cluster-name: ${{ env.DB_NAME }} - port: ${{ env.DB_PORT }} - architecture: ${{ matrix.architecture }} - test-type: 'comprehensive' - - - name: Insert test data using mongosh - run: | - echo "Inserting test data into DocumentDB cluster..." - if mongosh 127.0.0.1:$DB_PORT \ - -u $DB_USERNAME \ - -p $DB_PASSWORD \ - --authenticationMechanism SCRAM-SHA-256 \ - --tls \ - --tlsAllowInvalidCertificates \ - --eval "for (let i = 1; i <= 100; i++) { db.testCollection.insertOne({ index: i, message: 'This is document ' + i }); }" ; then - echo "✓ Test data insertion completed successfully on ${{ matrix.architecture }}" - else - echo "❌ Test data insertion failed on ${{ matrix.architecture }}" - exit 1 - fi - - echo "Verifying inserted test data..." - count=$(mongosh 127.0.0.1:$DB_PORT --quiet --eval "db.testCollection.countDocuments({})" -u $DB_USERNAME -p $DB_PASSWORD --authenticationMechanism SCRAM-SHA-256 --tls --tlsAllowInvalidCertificates) - if [[ "$count" -eq 100 ]]; then - echo "✓ Test data verification completed successfully on ${{ matrix.architecture }}" - else - echo "❌ Test data verification failed on ${{ matrix.architecture }}" - exit 1 - fi - - - name: Create ScheduledBackup to trigger backups - shell: bash - run: | - cat </dev/null || true - rm -f /tmp/pf_pid - fi - - # Clean up output log - rm -f /tmp/pf_output.log - - # Clean up output log - rm -f /tmp/pf_output.log - - - name: Restore from backup - shell: bash - run: | - # Get the latest backup name - backup_name=$(kubectl -n ${{ env.DB_NS }} get backups -o jsonpath='{.items[?(@.status.phase=="completed")].metadata.name}' | tr ' ' '\n' | sort | tail -n 1) - - # Create DocumentDB resource - cat </dev/null || echo "") - if [ "$status" == "Cluster in healthy state" ]; then - echo "✓ DocumentDB cluster ${{ env.DB_RESTORE_NAME }} is healthy." - exit 0 - else - echo "Current status: $status. Waiting..." - kubectl -n ${{ env.DB_NS }} get documentdb ${{ env.DB_RESTORE_NAME }} || true - sleep $SLEEP_INTERVAL - fi - ((++ITER)) - done - echo "❌ DocumentDB cluster ${{ env.DB_RESTORE_NAME }} did not become healthy within expected time." - kubectl -n ${{ env.DB_NS }} describe documentdb ${{ env.DB_RESTORE_NAME }} || true - kubectl -n ${{ env.DB_NS }} get pods -l cnpg.io/cluster=${{ env.DB_RESTORE_NAME }} || true - exit 1 - - - name: Setup port forwarding for comprehensive tests - uses: ./.github/actions/setup-port-forwarding - with: - namespace: ${{ env.DB_NS }} - cluster-name: ${{ env.DB_RESTORE_NAME }} - port: ${{ env.DB_PORT }} - architecture: ${{ matrix.architecture }} - test-type: 'comprehensive' - - - name: Validate restored data - run: | - # Validate that the restored cluster has the expected data - count=$(mongosh 127.0.0.1:$DB_PORT --quiet --eval "db.testCollection.countDocuments({})" -u $DB_USERNAME -p $DB_PASSWORD --authenticationMechanism SCRAM-SHA-256 --tls --tlsAllowInvalidCertificates) - if [ "$count" -eq 100 ]; then - echo "✓ Data validation completed successfully on ${{ matrix.architecture }}" - else - echo "❌ Data validation failed on ${{ matrix.architecture }}" - exit 1 - fi - - - name: Cleanup comprehensive test port forwarding - if: always() - run: | - # Stop port-forward if it exists - if [ -f /tmp/pf_pid ]; then - PF_PID=$(cat /tmp/pf_pid) - kill $PF_PID 2>/dev/null || true - rm -f /tmp/pf_pid - fi - - # Clean up output log - rm -f /tmp/pf_output.log - - # Clean up output log - rm -f /tmp/pf_output.log - - - name: Test if expired backups are cleaned up - shell: bash - run: | - echo "Verifying expired backups are cleaned up..." - # pick up one backup name - backup_name=$(kubectl -n $DB_NS get backups -o jsonpath='{.items[0].metadata.name}') - # set expiration time to past - kubectl -n $DB_NS patch backup $backup_name --type='json' --type=merge -p='{"status":{"expiredAt":"2000-01-01T00:00:00Z"}}' --subresource=status - # wait for cleanup - MAX_RETRIES=10 - SLEEP_INTERVAL=15 - ITER=0 - while [ $ITER -lt $MAX_RETRIES ]; do - backup_status=$(kubectl -n $DB_NS get backup $backup_name --ignore-not-found) - if [ -z "$backup_status" ]; then - echo "✓ Expired backup cleaned up successfully." - exit 0 - else - echo "Backup $backup_name still exists. Retrying in $SLEEP_INTERVAL seconds..." - kubectl -n $DB_NS get backup $backup_name - sleep $SLEEP_INTERVAL - fi - ((++ITER)) - done - echo "❌ Expired backup was not cleaned up within expected time." - exit 1 - - - name: Test PV retention after DocumentDB deletion - id: test-pv-retention - shell: bash - run: | - echo "Testing PV retention after DocumentDB deletion..." - - # Find PV directly using documentdb.io labels set by the PV controller - # PVs are cluster-scoped and labeled with documentdb.io/cluster and documentdb.io/namespace - pv_name=$(kubectl get pv -l documentdb.io/cluster=${{ env.DB_RESTORE_NAME }},documentdb.io/namespace=${{ env.DB_NS }} -o jsonpath='{.items[0].metadata.name}') - echo "PV name: $pv_name" - - if [ -z "$pv_name" ]; then - echo "❌ Failed to find PV with documentdb.io/cluster=${{ env.DB_RESTORE_NAME }} and documentdb.io/namespace=${{ env.DB_NS }}" - exit 1 - fi - - # Check current PV reclaim policy - should be Retain by default - current_policy=$(kubectl get pv $pv_name -o jsonpath='{.spec.persistentVolumeReclaimPolicy}') - echo "Current PV reclaim policy: $current_policy" - - if [ "$current_policy" != "Retain" ]; then - echo "❌ Expected PV reclaim policy to be 'Retain' (default), but got '$current_policy'" - exit 1 - fi - echo "✓ PV reclaim policy is correctly set to Retain (default)" - - # Delete the restored DocumentDB cluster - kubectl -n ${{ env.DB_NS }} delete documentdb ${{ env.DB_RESTORE_NAME }} --wait=false - - # Wait for DocumentDB to be deleted - echo "Waiting for DocumentDB to be deleted..." - MAX_RETRIES=30 - SLEEP_INTERVAL=10 - ITER=0 - while [ $ITER -lt $MAX_RETRIES ]; do - db_exists=$(kubectl -n ${{ env.DB_NS }} get documentdb ${{ env.DB_RESTORE_NAME }} --ignore-not-found) - if [ -z "$db_exists" ]; then - echo "✓ DocumentDB deleted successfully." - break - else - echo "DocumentDB still exists. Waiting..." - sleep $SLEEP_INTERVAL - fi - ((++ITER)) - done - - # Verify PV still exists (because reclaim policy is Retain) - pv_exists=$(kubectl get pv $pv_name --ignore-not-found) - if [ -n "$pv_exists" ]; then - echo "✓ PV $pv_name retained after DocumentDB deletion" - else - echo "❌ PV $pv_name was deleted unexpectedly" - exit 1 - fi - - # Verify pv_name is not empty before writing to GITHUB_OUTPUT - if [ -z "$pv_name" ]; then - echo "❌ Error: PV name is empty, cannot proceed with recovery test" - exit 1 - fi - - # Store PV name for later steps using GitHub Actions output (more robust than temp files) - echo "pv_name=$pv_name" >> $GITHUB_OUTPUT - - - name: Restore DocumentDB from retained PV - shell: bash - run: | - pv_name="${{ steps.test-pv-retention.outputs.pv_name }}" - echo "Restoring DocumentDB from retained PV: $pv_name" - - # Create DocumentDB resource with PV recovery - echo "Creating DocumentDB with PV recovery from $pv_name" - cat </dev/null || echo "") - if [ "$status" == "Cluster in healthy state" ]; then - echo "✓ DocumentDB cluster ${{ env.DB_RESTORE_NAME }}-from-pv is healthy." - exit 0 - else - echo "Current status: $status. Waiting..." - kubectl -n ${{ env.DB_NS }} get documentdb ${{ env.DB_RESTORE_NAME }}-from-pv || true - sleep $SLEEP_INTERVAL - fi - ((++ITER)) - done - echo "❌ DocumentDB cluster ${{ env.DB_RESTORE_NAME }}-from-pv did not become healthy within expected time." - kubectl -n ${{ env.DB_NS }} describe documentdb ${{ env.DB_RESTORE_NAME }}-from-pv || true - kubectl -n ${{ env.DB_NS }} get pods -l cnpg.io/cluster=${{ env.DB_RESTORE_NAME }}-from-pv || true - exit 1 - - - name: Setup port forwarding for PV restored cluster - uses: ./.github/actions/setup-port-forwarding - with: - namespace: ${{ env.DB_NS }} - cluster-name: ${{ env.DB_RESTORE_NAME }}-from-pv - port: ${{ env.DB_PORT }} - architecture: ${{ matrix.architecture }} - test-type: 'comprehensive' - - - name: Validate data exists after PV restoration - run: | - echo "Validating data exists after PV restoration..." - - # Validate that the restored cluster has the expected data - count=$(mongosh 127.0.0.1:$DB_PORT --quiet --eval "db.testCollection.countDocuments({})" -u $DB_USERNAME -p $DB_PASSWORD --authenticationMechanism SCRAM-SHA-256 --tls --tlsAllowInvalidCertificates) - if [ "$count" -eq 100 ]; then - echo "✓ Data validation completed successfully after PV restoration on ${{ matrix.architecture }}" - else - echo "❌ Data validation failed after PV restoration on ${{ matrix.architecture }}. Count: $count" - exit 1 - fi - - - name: Verify temporary recovery PVC is cleaned up - shell: bash - run: | - echo "Verifying temporary recovery PVC is cleaned up after cluster is healthy..." - - # The temp PVC name follows the pattern: -pv-recovery-temp - temp_pvc_name="${{ env.DB_RESTORE_NAME }}-from-pv-pv-recovery-temp" - - # Wait a bit for cleanup to happen - sleep 10 - - # Check if temporary PVC still exists - temp_pvc_exists=$(kubectl -n ${{ env.DB_NS }} get pvc $temp_pvc_name --ignore-not-found -o name) - if [ -z "$temp_pvc_exists" ]; then - echo "✓ Temporary recovery PVC $temp_pvc_name was cleaned up successfully" - else - echo "❌ Temporary recovery PVC $temp_pvc_name still exists after cluster is healthy" - kubectl -n ${{ env.DB_NS }} get pvc $temp_pvc_name - exit 1 - fi - - - name: Cleanup PV restored cluster port forwarding - if: always() - run: | - # Stop port-forward if it exists - if [ -f /tmp/pf_pid ]; then - PF_PID=$(cat /tmp/pf_pid) - kill $PF_PID 2>/dev/null || true - rm -f /tmp/pf_pid - fi - - # Clean up output log - rm -f /tmp/pf_output.log - - - name: Collect logs on failure - if: failure() - uses: ./.github/actions/collect-logs - with: - architecture: ${{ matrix.architecture }} - operator-namespace: ${{ env.OPERATOR_NS }} - db-namespace: ${{ env.DB_NS }} - db-cluster-name: ${{ env.DB_NAME }} - cert-manager-namespace: ${{ env.CERT_MANAGER_NS }} diff --git a/.github/workflows/test-e2e.yml b/.github/workflows/test-e2e.yml new file mode 100644 index 00000000..8a0633c9 --- /dev/null +++ b/.github/workflows/test-e2e.yml @@ -0,0 +1,274 @@ +# Unified DocumentDB E2E test workflow. +# +# Replaces the legacy test-E2E.yml / test-integration.yml / +# test-backup-and-restore.yml / test-upgrade-and-rollback.yml quartet. +# See docs/designs/e2e-test-suite.md ("CI Workflow" section) for the +# design rationale. +# +# Each matrix job runs a single Ginkgo label-filtered slice of the suite +# under test/e2e/. The composite action .github/actions/setup-test-environment +# provisions a Kind cluster, installs cert-manager + the operator, and +# deploys any scenario-specific prerequisites. + +name: TEST - E2E + +on: + push: + branches: [main] + pull_request: + branches: [main] + paths: + - 'test/e2e/**' + - 'operator/src/**' + - 'operator/documentdb-helm-chart/**' + - '.github/workflows/test-e2e.yml' + - '.github/actions/**' + workflow_dispatch: + inputs: + label: + description: 'Ginkgo --label-filter override (empty = use per-job default)' + required: false + type: string + default: '' + depth: + description: 'Test depth tier (maps to TEST_DEPTH / E2E_DEPTH)' + required: false + type: choice + options: + - Low + - Medium + - High + default: Medium + keep_clusters: + description: 'Keep Kind clusters running after tests (for debugging)' + required: false + type: boolean + default: false + +permissions: + contents: read + actions: read + packages: read + +env: + # Namespaces / identity used by the composite setup action. Held here + # so every job inherits them without duplication. + CERT_MANAGER_NS: cert-manager + OPERATOR_NS: documentdb-operator + DB_NS: documentdb-e2e + DB_NAME: documentdb-e2e + DB_USERNAME: k8s_secret_user + DB_PASSWORD: K8sSecret100 + DB_PORT: 10260 + +jobs: + # --------------------------------------------------------------------------- + # Build operator + gateway images and the helm chart once per workflow run. + # Each E2E job below downloads the resulting artifacts into ./artifacts so + # setup-test-environment can load them into its Kind cluster. + # --------------------------------------------------------------------------- + build: + name: Build Images and Charts + uses: ./.github/workflows/test-build-and-package.yml + with: + version: '0.2.0' + secrets: inherit + + # --------------------------------------------------------------------------- + # E2E matrix. + # + # Rows = (label-group, architecture). The label-group carries the + # Ginkgo --label-filter, Ginkgo --procs setting, and a human-readable + # scenario name used for artifact naming and kind cluster isolation. + # + # arm64 rows target `ubuntu-22.04-arm` — the same GitHub-hosted runner + # the legacy workflows use. If that SKU becomes unavailable in the + # future, the arm64 rows can be removed or gated on a feature flag; + # do not silently drop them. + # --------------------------------------------------------------------------- + e2e: + name: E2E ${{ matrix.group }} (${{ matrix.architecture }}) + needs: build + if: | + always() + && needs.build.result == 'success' + && ( + matrix.group != 'performance' + || github.event_name == 'workflow_dispatch' + || contains(github.event.pull_request.labels.*.name, 'run-perf') + ) + runs-on: ${{ matrix.runner }} + timeout-minutes: 90 + strategy: + fail-fast: false + matrix: + architecture: [amd64, arm64] + group: + - smoke + - lifecycle + - scale + - data + - performance + - backup + - tls + - feature + - upgrade + include: + # Per-group defaults. `default_filter` is used when the + # workflow_dispatch `label` input is empty. + # + # NOTE: the design doc's "feature || exposure || status" filter + # uses the code-level label name `feature-gates` (see + # test/e2e/labels.go FeatureLabel). The design-doc name + # `feature` is a shorthand; we honour code as source of truth. + - group: smoke + default_filter: 'smoke' + procs: 'auto' + - group: lifecycle + default_filter: 'lifecycle' + procs: 'auto' + - group: scale + default_filter: 'scale' + procs: '2' + - group: data + default_filter: 'data' + procs: 'auto' + - group: performance + default_filter: 'performance' + procs: '1' + - group: backup + default_filter: 'backup' + procs: '2' + - group: tls + default_filter: 'tls' + procs: 'auto' + - group: feature + default_filter: 'feature-gates || exposure || status' + procs: 'auto' + - group: upgrade + default_filter: 'upgrade' + procs: '1' + # Per-architecture runner mapping. + - architecture: amd64 + runner: ubuntu-22.04 + - architecture: arm64 + runner: ubuntu-22.04-arm + env: + E2E_RUN_ID: ${{ github.run_id }}-${{ github.run_attempt }} + E2E_DEPTH: ${{ inputs.depth || 'Medium' }} + TEST_DEPTH: ${{ inputs.depth || 'Medium' }} + GINKGO_LABEL_FILTER: ${{ inputs.label != '' && inputs.label || matrix.default_filter }} + E2E_KEEP_CLUSTERS: ${{ inputs.keep_clusters && '1' || '0' }} + IMAGE_TAG: ${{ needs.build.outputs.image_tag }} + CHART_VERSION: ${{ needs.build.outputs.chart_version || '0.1.0' }} + # Upgrade job knobs. Defaults point at the public OCI chart; + # repository admins may override via `vars`/`secrets` for private + # scenarios. If any of the *_IMAGE values are left unset the + # upgrade specs Skip at runtime (see test/e2e/tests/upgrade/). + E2E_UPGRADE: ${{ matrix.group == 'upgrade' && '1' || '' }} + E2E_UPGRADE_PREVIOUS_CHART: ${{ vars.E2E_UPGRADE_PREVIOUS_CHART || 'oci://ghcr.io/documentdb/charts/documentdb-operator' }} + E2E_UPGRADE_PREVIOUS_VERSION: ${{ vars.E2E_UPGRADE_PREVIOUS_VERSION || '' }} + E2E_UPGRADE_CURRENT_CHART: ${{ vars.E2E_UPGRADE_CURRENT_CHART || '' }} + E2E_UPGRADE_OLD_DOCUMENTDB_IMAGE: ${{ vars.E2E_UPGRADE_OLD_DOCUMENTDB_IMAGE || '' }} + E2E_UPGRADE_NEW_DOCUMENTDB_IMAGE: ${{ vars.E2E_UPGRADE_NEW_DOCUMENTDB_IMAGE || '' }} + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version-file: test/e2e/go.mod + cache-dependency-path: test/e2e/go.sum + + - name: Download build artifacts + uses: actions/download-artifact@v4 + with: + pattern: 'build-*' + path: ./artifacts + + - name: Log test configuration + run: | + { + echo "## E2E — ${{ matrix.group }} (${{ matrix.architecture }})" + echo "" + echo "- **Label filter**: \`${GINKGO_LABEL_FILTER}\`" + echo "- **Procs**: \`${{ matrix.procs }}\`" + echo "- **Depth**: \`${E2E_DEPTH}\`" + echo "- **Run ID**: \`${E2E_RUN_ID}\`" + echo "- **Image tag**: \`${IMAGE_TAG}\`" + echo "- **Chart version**: \`${CHART_VERSION}\`" + } >> "$GITHUB_STEP_SUMMARY" + + - name: Setup test environment + uses: ./.github/actions/setup-test-environment + with: + test-type: 'e2e' + architecture: ${{ matrix.architecture }} + runner: ${{ matrix.runner }} + test-scenario-name: ${{ matrix.group }} + node-count: '1' + instances-per-node: '1' + cert-manager-namespace: ${{ env.CERT_MANAGER_NS }} + operator-namespace: ${{ env.OPERATOR_NS }} + db-namespace: ${{ env.DB_NS }} + db-cluster-name: ${{ env.DB_NAME }} + db-username: ${{ env.DB_USERNAME }} + db-password: ${{ env.DB_PASSWORD }} + db-port: ${{ env.DB_PORT }} + image-tag: ${{ env.IMAGE_TAG }} + chart-version: ${{ env.CHART_VERSION }} + use-external-images: 'false' + github-token: ${{ secrets.GITHUB_TOKEN }} + repository-owner: ${{ github.repository_owner }} + + - name: Install Ginkgo CLI + working-directory: test/e2e + run: | + go install github.com/onsi/ginkgo/v2/ginkgo + ginkgo version + + - name: Run E2E specs + working-directory: test/e2e + run: | + mkdir -p artifacts + set -o pipefail + ginkgo run \ + -r \ + --label-filter="${GINKGO_LABEL_FILTER}" \ + --procs=${{ matrix.procs }} \ + --timeout=75m \ + --keep-going \ + --junit-report=junit.xml \ + --output-dir=artifacts \ + ./tests/... 2>&1 | tee artifacts/ginkgo.log + + - name: Collect cluster diagnostics + if: failure() + uses: ./.github/actions/collect-logs + with: + architecture: ${{ matrix.architecture }} + operator-namespace: ${{ env.OPERATOR_NS }} + db-namespace: ${{ env.DB_NS }} + db-name: ${{ env.DB_NAME }} + + - name: Upload JUnit report + if: always() + uses: actions/upload-artifact@v4 + with: + name: e2e-junit-${{ matrix.group }}-${{ matrix.architecture }}-${{ github.run_attempt }} + path: test/e2e/artifacts/junit.xml + if-no-files-found: warn + retention-days: 14 + + - name: Upload E2E logs and diagnostics + if: failure() + uses: actions/upload-artifact@v4 + with: + name: e2e-logs-${{ matrix.group }}-${{ matrix.architecture }}-${{ github.run_attempt }} + path: | + test/e2e/artifacts/ + /tmp/cluster-logs/ + if-no-files-found: ignore + retention-days: 14 diff --git a/.github/workflows/test-integration.yml b/.github/workflows/test-integration.yml deleted file mode 100644 index 1f718d4e..00000000 --- a/.github/workflows/test-integration.yml +++ /dev/null @@ -1,163 +0,0 @@ -name: TEST - Integration with python - -on: - push: - branches: [ main, develop ] - pull_request: - branches: [ main, develop ] - workflow_dispatch: - inputs: - image_tag: - description: 'Optional: Use existing image tag instead of building locally' - required: false - type: string - workflow_call: - inputs: - image_tag: - description: 'Optional: Use existing image tag instead of building locally' - required: false - type: string - -permissions: - packages: write - contents: read - id-token: write - -env: - # Cluster configuration - CERT_MANAGER_NS: cert-manager - OPERATOR_NS: documentdb-operator - DB_NS: documentdb-preview-ns - DB_NAME: documentdb-preview - # Connection parameters - DB_USERNAME: default_user - DB_PASSWORD: Admin100 - DB_PORT: 10260 - -jobs: - # Use the reusable build workflow - only if no image tag is provided or on pull_request - build: - name: Build Images and Charts - if: ${{ (github.event.inputs.image_tag == '' || github.event.inputs.image_tag == null) || github.event_name == 'pull_request' }} - uses: ./.github/workflows/test-build-and-package.yml - with: - version: '0.2.0' - secrets: inherit - - integration-test: - name: Run Integration Tests - runs-on: ${{ matrix.runner }} - timeout-minutes: 45 - needs: build - if: always() && (needs.build.result == 'success' || needs.build.result == 'skipped') - - strategy: - matrix: - include: - - architecture: amd64 - runner: ubuntu-22.04 - test_scenario_name: "single-node" - node_count: 1 - instances_per_node: 1 - - architecture: arm64 - runner: ubuntu-22.04-arm - test_scenario_name: "single-node" - node_count: 1 - instances_per_node: 1 - - env: - # Use built image tag on PR or when no external tag provided - IMAGE_TAG: ${{ (github.event_name == 'pull_request' || github.event.inputs.image_tag == '' || github.event.inputs.image_tag == null) && needs.build.outputs.image_tag || github.event.inputs.image_tag }} - EXT_IMAGE_TAG: ${{ needs.build.outputs.ext_image_tag || '' }} - CHART_VERSION: ${{ needs.build.outputs.chart_version || '0.1.0' }} - - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Download artifacts - if: ${{ (github.event.inputs.image_tag == '' || github.event.inputs.image_tag == null) || github.event_name == 'pull_request' }} - uses: actions/download-artifact@v4 - with: - pattern: 'build-*' - path: ./artifacts - - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.11' - - - name: Log test configuration - run: | - echo "## Integration Test Configuration" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - if [[ -n "${{ github.event.inputs.image_tag }}" ]]; then - echo "- **Mode**: Using provided image tag" >> $GITHUB_STEP_SUMMARY - echo "- **Image Tag**: \`${{ github.event.inputs.image_tag }}\`" >> $GITHUB_STEP_SUMMARY - echo "- **Source**: External (no local build)" >> $GITHUB_STEP_SUMMARY - else - echo "- **Mode**: Using locally built images" >> $GITHUB_STEP_SUMMARY - echo "- **Image Tag**: \`${{ env.IMAGE_TAG }}\`" >> $GITHUB_STEP_SUMMARY - echo "- **Source**: Local build pipeline" >> $GITHUB_STEP_SUMMARY - fi - echo "- **Architecture**: \`${{ matrix.architecture }}\`" >> $GITHUB_STEP_SUMMARY - - - name: Setup test environment - uses: ./.github/actions/setup-test-environment - with: - test-type: 'integration' - architecture: ${{ matrix.architecture }} - runner: ${{ matrix.runner }} - test-scenario-name: ${{ matrix.test_scenario_name }} - node-count: '${{ matrix.node_count }}' - instances-per-node: '${{ matrix.instances_per_node }}' - cert-manager-namespace: ${{ env.CERT_MANAGER_NS }} - operator-namespace: ${{ env.OPERATOR_NS }} - db-namespace: ${{ env.DB_NS }} - db-cluster-name: ${{ env.DB_NAME }} - db-username: ${{ env.DB_USERNAME }} - db-password: ${{ env.DB_PASSWORD }} - db-port: ${{ env.DB_PORT }} - image-tag: ${{ env.IMAGE_TAG }} - documentdb-image-tag: ${{ env.EXT_IMAGE_TAG }} - chart-version: ${{ env.CHART_VERSION }} - use-external-images: ${{ github.event_name != 'pull_request' && github.event.inputs.image_tag != '' && github.event.inputs.image_tag != null }} - github-token: ${{ secrets.GITHUB_TOKEN }} - repository-owner: ${{ github.repository_owner }} - - - name: Test connection with mongosh - run: | - echo "Testing connection with mongosh on ${{ matrix.architecture }} architecture..." - chmod +x operator/src/scripts/test-scripts/test-mongodb-connection.sh - ./operator/src/scripts/test-scripts/test-mongodb-connection.sh \ - --architecture "${{ matrix.architecture }}" \ - --namespace "${{ env.DB_NS }}" \ - --cluster-name "${{ env.DB_NAME }}" \ - --pod-name "${{ env.DB_NAME }}-1" \ - --port "${{ env.DB_PORT }}" \ - --username "${{ env.DB_USERNAME }}" \ - --password "${{ env.DB_PASSWORD }}" \ - --test-type 'basic' - - - name: Test with Python PyMongo client - run: | - echo "Testing with Python PyMongo client on ${{ matrix.architecture }} architecture..." - chmod +x operator/src/scripts/test-scripts/test-python-pymongo.sh - ./operator/src/scripts/test-scripts/test-python-pymongo.sh \ - --architecture "${{ matrix.architecture }}" \ - --namespace "${{ env.DB_NS }}" \ - --cluster-name "${{ env.DB_NAME }}" \ - --pod-name "${{ env.DB_NAME }}-1" \ - --port "${{ env.DB_PORT }}" \ - --username "${{ env.DB_USERNAME }}" \ - --password "${{ env.DB_PASSWORD }}" - - - name: Collect logs on failure - if: failure() - uses: ./.github/actions/collect-logs - with: - architecture: ${{ matrix.architecture }} - operator-namespace: ${{ env.OPERATOR_NS }} - db-namespace: ${{ env.DB_NS }} - db-cluster-name: ${{ env.DB_NAME }} - cert-manager-namespace: ${{ env.CERT_MANAGER_NS }} diff --git a/.github/workflows/test-upgrade-and-rollback.yml b/.github/workflows/test-upgrade-and-rollback.yml deleted file mode 100644 index d1e9c05f..00000000 --- a/.github/workflows/test-upgrade-and-rollback.yml +++ /dev/null @@ -1,1337 +0,0 @@ -name: TEST - Upgrade and Rollback - -on: - push: - branches: [ main, develop ] - pull_request: - branches: [ main, develop ] - schedule: - - cron: '0 2 * * *' - workflow_dispatch: - inputs: - image_tag: - description: 'Optional: Use existing image tag instead of building locally' - required: false - type: string - released_chart_version: - description: 'Released chart version to upgrade from (default: latest)' - required: false - type: string - default: 'latest' - workflow_call: - inputs: - image_tag: - description: 'Optional: Use existing image tag instead of building locally' - required: false - type: string - released_chart_version: - description: 'Released chart version to upgrade from (default: latest)' - required: false - type: string - default: 'latest' - -permissions: - contents: read - actions: read - packages: read - -env: - CERT_MANAGER_NS: cert-manager - OPERATOR_NS: documentdb-operator - DB_NS: documentdb-upgrade-test - DB_NAME: documentdb-upgrade - DB_USERNAME: k8s_secret_user - DB_PASSWORD: K8sSecret100 - DB_PORT: 10260 - RELEASED_DATABASE_VERSION: 0.109.0 - # Always resolve released baseline images from the canonical org, not the fork owner. - RELEASED_DATABASE_OWNER: documentdb - -jobs: - build: - name: Build Images and Charts - if: ${{ (inputs.image_tag == '' || inputs.image_tag == null) || github.event_name == 'pull_request' }} - uses: ./.github/workflows/test-build-and-package.yml - with: - version: '0.2.0' - secrets: inherit - - upgrade-and-rollback-test: - name: Upgrade & Rollback (${{ matrix.architecture }}) - runs-on: ${{ matrix.runner }} - timeout-minutes: 60 - needs: build - if: always() && (needs.build.result == 'success' || needs.build.result == 'skipped') - - strategy: - matrix: - include: - - architecture: amd64 - runner: ubuntu-22.04 - test_scenario_name: "single-node" - node_count: 1 - instances_per_node: 1 - - architecture: arm64 - runner: ubuntu-22.04-arm - test_scenario_name: "single-node" - node_count: 1 - instances_per_node: 1 - - env: - IMAGE_TAG: ${{ (github.event_name == 'pull_request' || inputs.image_tag == '' || inputs.image_tag == null) && needs.build.outputs.image_tag || inputs.image_tag }} - EXT_IMAGE_TAG: ${{ needs.build.outputs.ext_image_tag || '' }} - CHART_VERSION: ${{ needs.build.outputs.chart_version || '0.1.0' }} - DOCUMENTDB_COMBINED_IMAGE: ghcr.io/microsoft/documentdb/documentdb-local:16 - RELEASED_CHART_VERSION: ${{ inputs.released_chart_version || 'latest' }} - - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Download artifacts - if: ${{ (inputs.image_tag == '' || inputs.image_tag == null) || github.event_name == 'pull_request' }} - uses: actions/download-artifact@v4 - with: - pattern: 'build-*' - path: ./artifacts - - - name: Determine new DocumentDB and Gateway image references - run: | - # Old images should always come from the latest released database baseline for this repository owner. - OLD_DOCDB="ghcr.io/${{ env.RELEASED_DATABASE_OWNER }}/documentdb-kubernetes-operator/documentdb:${{ env.RELEASED_DATABASE_VERSION }}" - OLD_GW="ghcr.io/${{ env.RELEASED_DATABASE_OWNER }}/documentdb-kubernetes-operator/gateway:${{ env.RELEASED_DATABASE_VERSION }}" - - # New images come from this workflow run (self-built) or from the provided candidate tag. - # Database images use a separate version track from operator images. - # CI-built images are now tagged with the extension version (e.g., 0.109.0-test-RUNID-arch) - # by test-build-and-package.yml, so no re-tagging is needed. - if [[ "${{ github.event_name }}" == "pull_request" || -z "${{ inputs.image_tag }}" ]]; then - NEW_DOCDB="ghcr.io/${{ github.repository_owner }}/documentdb-kubernetes-operator/documentdb:${{ env.EXT_IMAGE_TAG }}-${{ matrix.architecture }}" - NEW_GW="ghcr.io/${{ github.repository_owner }}/documentdb-kubernetes-operator/gateway:${{ env.EXT_IMAGE_TAG }}-${{ matrix.architecture }}" - else - # External images: read documentDbVersion from values.yaml (separate version track from operator). - DOCDB_TAG=$(grep 'documentDbVersion:' operator/documentdb-helm-chart/values.yaml | sed 's/.*"\(.*\)".*/\1/') - echo "Using documentDbVersion from values.yaml: $DOCDB_TAG" - NEW_DOCDB="ghcr.io/${{ github.repository_owner }}/documentdb-kubernetes-operator/documentdb:${DOCDB_TAG}" - NEW_GW="ghcr.io/${{ github.repository_owner }}/documentdb-kubernetes-operator/gateway:${DOCDB_TAG}" - fi - echo "DOCUMENTDB_IMAGE=$NEW_DOCDB" >> $GITHUB_ENV - echo "GATEWAY_IMAGE=$NEW_GW" >> $GITHUB_ENV - echo "DOCUMENTDB_OLD_IMAGE=$OLD_DOCDB" >> $GITHUB_ENV - echo "GATEWAY_OLD_IMAGE=$OLD_GW" >> $GITHUB_ENV - echo "Old DocumentDB image: $OLD_DOCDB" - echo "Old Gateway image: $OLD_GW" - echo "New DocumentDB image: $NEW_DOCDB" - echo "New Gateway image: $NEW_GW" - - - name: Log test configuration - run: | - echo "## Upgrade & Rollback Test Configuration" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - if [[ -n "${{ inputs.image_tag }}" ]]; then - echo "- **Mode**: Using provided image tag" >> $GITHUB_STEP_SUMMARY - echo "- **Image Tag**: \`${{ inputs.image_tag }}\`" >> $GITHUB_STEP_SUMMARY - else - echo "- **Mode**: Using locally built images" >> $GITHUB_STEP_SUMMARY - echo "- **Image Tag**: \`${{ env.IMAGE_TAG }}\`" >> $GITHUB_STEP_SUMMARY - fi - echo "- **Architecture**: \`${{ matrix.architecture }}\`" >> $GITHUB_STEP_SUMMARY - echo "- **Released Database Baseline**: \`${{ env.RELEASED_DATABASE_VERSION }}\`" >> $GITHUB_STEP_SUMMARY - echo "- **Old Extension Image**: \`${{ env.DOCUMENTDB_OLD_IMAGE }}\`" >> $GITHUB_STEP_SUMMARY - echo "- **New Extension Image**: \`${{ env.DOCUMENTDB_IMAGE }}\`" >> $GITHUB_STEP_SUMMARY - echo "- **Combined Image**: \`${{ env.DOCUMENTDB_COMBINED_IMAGE }}\`" >> $GITHUB_STEP_SUMMARY - echo "- **Old Gateway Image**: \`${{ env.GATEWAY_OLD_IMAGE }}\`" >> $GITHUB_STEP_SUMMARY - echo "- **New Gateway Image**: \`${{ env.GATEWAY_IMAGE }}\`" >> $GITHUB_STEP_SUMMARY - - # TODO: Remove this step once release versions > 0.1.3 - - name: Determine initial DocumentDB image - run: | - echo "=== Determining DocumentDB image for initial deployment ===" - - # Add the public DocumentDB Helm repository - helm repo add documentdb https://documentdb.github.io/documentdb-kubernetes-operator 2>/dev/null || true - helm repo update - - # Resolve the released chart version - CHART_VERSION="${{ env.RELEASED_CHART_VERSION }}" - if [[ "$CHART_VERSION" == "latest" ]]; then - RESOLVED_VERSION=$(helm search repo documentdb/documentdb-operator -o json | jq -r '.[0].version' 2>/dev/null || echo "") - if [[ -z "$RESOLVED_VERSION" || "$RESOLVED_VERSION" == "null" ]]; then - echo "⚠️ Failed to resolve chart version from Helm repo, defaulting to threshold" - RESOLVED_VERSION="0.1.3" - fi - else - RESOLVED_VERSION="$CHART_VERSION" - fi - echo "Resolved released chart version: $RESOLVED_VERSION" - - # Determine image mode based on release version - # Versions <= 0.1.3 use combined image (no ImageVolume support) - # Versions > 0.1.3 use extension image (ImageVolume mode) - THRESHOLD="0.1.3" - # Strip any pre-release suffix (e.g., 0.1.3-rc1 → 0.1.3) for clean semver comparison - CLEAN_VERSION=$(echo "$RESOLVED_VERSION" | sed 's/-.*//') - if [[ "$(printf '%s\n' "$THRESHOLD" "$CLEAN_VERSION" | sort -V | head -n1)" == "$CLEAN_VERSION" ]]; then - echo "Released version $RESOLVED_VERSION <= $THRESHOLD → combined image required" - USE_COMBINED=true - else - echo "Released version $RESOLVED_VERSION > $THRESHOLD → extension image supported" - USE_COMBINED=false - fi - - # Persist USE_COMBINED for later steps - # TODO: Remove once we deprecate combined mode - echo "USE_COMBINED=$USE_COMBINED" >> $GITHUB_ENV - - # Set the initial image based on determination - COMBINED_IMAGE="${{ env.DOCUMENTDB_COMBINED_IMAGE }}" - EXTENSION_IMAGE="${{ env.DOCUMENTDB_OLD_IMAGE }}" - if [[ "$USE_COMBINED" == "true" ]]; then - echo "DOCUMENTDB_INITIAL_IMAGE=$COMBINED_IMAGE" >> $GITHUB_ENV - # In combined mode, the gateway is part of the combined image - echo "GATEWAY_OLD_IMAGE=$COMBINED_IMAGE" >> $GITHUB_ENV - echo "✓ Using combined image for initial deployment: $COMBINED_IMAGE" - else - echo "DOCUMENTDB_INITIAL_IMAGE=$EXTENSION_IMAGE" >> $GITHUB_ENV - echo "✓ Using extension image for initial deployment: $EXTENSION_IMAGE" - fi - - - name: Setup test environment - uses: ./.github/actions/setup-test-environment - with: - test-type: 'e2e' - architecture: ${{ matrix.architecture }} - runner: ${{ matrix.runner }} - test-scenario-name: ${{ matrix.test_scenario_name }} - node-count: '${{ matrix.node_count }}' - instances-per-node: '${{ matrix.instances_per_node }}' - cert-manager-namespace: ${{ env.CERT_MANAGER_NS }} - operator-namespace: ${{ env.OPERATOR_NS }} - db-namespace: ${{ env.DB_NS }} - db-cluster-name: ${{ env.DB_NAME }} - db-username: ${{ env.DB_USERNAME }} - db-password: ${{ env.DB_PASSWORD }} - db-port: ${{ env.DB_PORT }} - image-tag: ${{ env.IMAGE_TAG }} - documentdb-image-tag: ${{ env.EXT_IMAGE_TAG }} - chart-version: ${{ env.CHART_VERSION }} - documentdb-image: ${{ env.DOCUMENTDB_INITIAL_IMAGE }} - gateway-image: ${{ env.GATEWAY_OLD_IMAGE }} - use-external-images: ${{ github.event_name != 'pull_request' && inputs.image_tag != '' && inputs.image_tag != null }} - released-chart-version: ${{ env.RELEASED_CHART_VERSION }} - github-token: ${{ secrets.GITHUB_TOKEN }} - repository-owner: ${{ github.repository_owner }} - - - name: Setup port forwarding for data seeding - uses: ./.github/actions/setup-port-forwarding - with: - namespace: ${{ env.DB_NS }} - cluster-name: ${{ env.DB_NAME }} - port: ${{ env.DB_PORT }} - architecture: ${{ matrix.architecture }} - test-type: 'comprehensive' - - - name: Seed test data before upgrade - run: | - echo "=== Data Persistence: Writing seed data before upgrade ===" - mongosh 127.0.0.1:$DB_PORT \ - -u $DB_USERNAME \ - -p $DB_PASSWORD \ - --authenticationMechanism SCRAM-SHA-256 \ - --tls \ - --tlsAllowInvalidCertificates \ - --eval ' - db = db.getSiblingDB("upgrade_test_db"); - db.test_collection.insertOne({ _id: "upgrade_marker", step: "pre-upgrade", timestamp: new Date().toISOString() }); - db.test_collection.insertOne({ _id: "persistence_check", data: "this_must_survive_rollback", count: 42 }); - var count = db.test_collection.countDocuments(); - print("✓ Seed data written: " + count + " documents"); - assert(count === 2, "Expected 2 documents but found " + count); - ' - echo "✓ Seed data written successfully on old version" - - - name: Cleanup port forwarding after data seeding - if: always() - run: | - if [ -f /tmp/pf_pid ]; then - PF_PID=$(cat /tmp/pf_pid) - kill $PF_PID 2>/dev/null || true - rm -f /tmp/pf_pid - fi - rm -f /tmp/pf_output.log - - - name: "Step 1: Operator Control Plane Upgrade (released → built)" - run: | - echo "=== Step 1: Operator Control Plane Upgrade ===" - echo "Upgrading operator from released chart to locally built version on ${{ matrix.architecture }}..." - - ARCH="${{ matrix.architecture }}" - - # --- Baseline from Released Operator --- - echo "" - echo "--- Baseline (Released Operator) ---" - echo "Helm release info:" - helm list -n $OPERATOR_NS - - RELEASED_OPERATOR_IMAGE=$(kubectl get deployment documentdb-operator -n $OPERATOR_NS -o jsonpath='{.spec.template.spec.containers[0].image}') - echo "Released operator image: $RELEASED_OPERATOR_IMAGE" - - # Record DB pod state before operator upgrade - echo "" - echo "DB pods before operator upgrade:" - kubectl get pods -n $DB_NS -l cnpg.io/cluster=$DB_NAME -o wide - PRE_UPGRADE_UIDS=$(kubectl get pods -n $DB_NS -l cnpg.io/cluster=$DB_NAME -o jsonpath='{.items[*].metadata.uid}') - echo "Pod UIDs: $PRE_UPGRADE_UIDS" - - # --- Prepare Built Chart --- - echo "" - echo "--- Preparing Built Chart ---" - CHART_ARTIFACT_DIR="./artifacts/build-helm-chart-${ARCH}" - EXPECTED_CHART_FILE="$CHART_ARTIFACT_DIR/documentdb-chart-${{ env.CHART_VERSION }}-${ARCH}.tgz" - - if [ ! -f "$EXPECTED_CHART_FILE" ]; then - echo "❌ Built Helm chart not found: $EXPECTED_CHART_FILE" - ls -la "$CHART_ARTIFACT_DIR/" || echo "Chart artifact directory not found" - exit 1 - fi - - echo "Extracting built chart: $EXPECTED_CHART_FILE" - rm -rf ./documentdb-chart - tar -xzf "$EXPECTED_CHART_FILE" - - echo "Built chart version:" - cat ./documentdb-chart/Chart.yaml | grep -E "^(version|appVersion):" - - # --- Apply CRDs before Helm Upgrade --- - # Helm does not upgrade CRDs on `helm upgrade` (only on `helm install`). - # We must apply them separately so new fields (e.g. spec.schemaVersion) are - # recognised by the API server and passed to the validating webhook. - echo "" - echo "--- Applying CRDs from new chart ---" - kubectl apply --server-side --force-conflicts -f ./documentdb-chart/crds/ - echo "CRDs applied." - - # --- Perform Helm Upgrade --- - echo "" - echo "--- Performing Helm Upgrade ---" - LOCAL_IMAGE_TAG="${{ env.IMAGE_TAG }}-${ARCH}" - echo "Upgrading with image tag: $LOCAL_IMAGE_TAG" - - helm upgrade documentdb-operator ./documentdb-chart \ - --namespace $OPERATOR_NS \ - --set documentDbVersion="$LOCAL_IMAGE_TAG" \ - --set image.documentdbk8soperator.tag="$LOCAL_IMAGE_TAG" \ - --set image.documentdbk8soperator.pullPolicy=IfNotPresent \ - --set image.sidecarinjector.tag="$LOCAL_IMAGE_TAG" \ - --set image.sidecarinjector.pullPolicy=IfNotPresent \ - --wait --timeout=15m - - echo "Helm upgrade completed. Release info:" - helm list -n $OPERATOR_NS - - # --- Verify Upgraded Operator --- - echo "" - echo "--- Verifying Upgraded Operator ---" - kubectl wait --for=condition=Available deployment/documentdb-operator -n $OPERATOR_NS --timeout=300s - - UPGRADED_OPERATOR_IMAGE=$(kubectl get deployment documentdb-operator -n $OPERATOR_NS -o jsonpath='{.spec.template.spec.containers[0].image}') - echo "Upgraded operator image: $UPGRADED_OPERATOR_IMAGE" - - if [[ "$UPGRADED_OPERATOR_IMAGE" == "$RELEASED_OPERATOR_IMAGE" ]]; then - echo "❌ Operator image did not change after upgrade" - exit 1 - fi - echo "✓ Operator image changed: $RELEASED_OPERATOR_IMAGE → $UPGRADED_OPERATOR_IMAGE" - - # --- Verify DB Pod Stability --- - echo "" - echo "--- Verifying DB Pod Stability ---" - kubectl get pods -n $DB_NS -l cnpg.io/cluster=$DB_NAME -o wide - POST_UPGRADE_UIDS=$(kubectl get pods -n $DB_NS -l cnpg.io/cluster=$DB_NAME -o jsonpath='{.items[*].metadata.uid}') - echo "Pod UIDs after upgrade: $POST_UPGRADE_UIDS" - - if [[ "$PRE_UPGRADE_UIDS" == "$POST_UPGRADE_UIDS" ]]; then - echo "✓ DB pod UIDs unchanged — operator upgrade did not restart DB pods" - else - echo "⚠️ DB pod UIDs changed — pods may have been restarted during operator upgrade" - echo " Before: $PRE_UPGRADE_UIDS" - echo " After: $POST_UPGRADE_UIDS" - fi - - # --- Verify Cluster Health --- - echo "" - echo "--- Verifying Cluster Health ---" - timeout 300 bash -c ' - while true; do - DB_STATUS=$(kubectl get documentdb "$1" -n "$2" -o jsonpath="{.status.status}" 2>/dev/null) - CLUSTER_STATUS=$(kubectl get cluster "$1" -n "$2" -o jsonpath="{.status.phase}" 2>/dev/null) - echo "DocumentDB status: $DB_STATUS, CNPG phase: $CLUSTER_STATUS" - if [[ "$DB_STATUS" == "Cluster in healthy state" && "$CLUSTER_STATUS" == "Cluster in healthy state" ]]; then - echo "✓ Cluster is healthy after operator upgrade" - break - fi - sleep 10 - done - ' -- "$DB_NAME" "$DB_NS" - - echo "" - echo "✅ Step 1 passed: Operator control plane upgraded successfully" - echo " Operator: $RELEASED_OPERATOR_IMAGE → $UPGRADED_OPERATOR_IMAGE" - - - name: Setup port forwarding for operator upgrade verification - uses: ./.github/actions/setup-port-forwarding - with: - namespace: ${{ env.DB_NS }} - cluster-name: ${{ env.DB_NAME }} - port: ${{ env.DB_PORT }} - architecture: ${{ matrix.architecture }} - test-type: 'comprehensive' - - - name: Verify data persistence after operator upgrade - run: | - echo "=== Data Persistence: Verifying after operator upgrade ===" - mongosh 127.0.0.1:$DB_PORT \ - -u $DB_USERNAME \ - -p $DB_PASSWORD \ - --authenticationMechanism SCRAM-SHA-256 \ - --tls \ - --tlsAllowInvalidCertificates \ - --eval ' - db = db.getSiblingDB("upgrade_test_db"); - var count = db.test_collection.countDocuments(); - assert(count === 2, "Expected 2 documents but found " + count + " after operator upgrade"); - print("✓ All " + count + " documents persisted through operator upgrade"); - ' - echo "✓ Data persistence verified after operator upgrade" - - - name: Cleanup port forwarding after operator upgrade verification - if: always() - run: | - if [ -f /tmp/pf_pid ]; then - PF_PID=$(cat /tmp/pf_pid) - kill $PF_PID 2>/dev/null || true - rm -f /tmp/pf_pid - fi - rm -f /tmp/pf_output.log - - # ============================================================ - # TODO: Remove the following 4 steps once released version > 0.1.3 - # When the released operator uses combined mode, the cluster must be - # recreated under the upgraded operator to switch to ImageVolume mode. - # ============================================================ - - - name: "Recreate cluster for ImageVolume mode (combined → extension)" - if: env.USE_COMBINED == 'true' - run: | - echo "=== Recreating cluster: combined mode → ImageVolume mode ===" - echo "The released operator deployed in combined mode. After operator upgrade," - echo "we must recreate the cluster so the new operator deploys it in ImageVolume mode." - - # Delete the combined-mode cluster - echo "" - echo "Deleting combined-mode cluster..." - kubectl delete documentdb $DB_NAME -n $DB_NS --wait=false - - echo "Waiting for DocumentDB to be deleted..." - timeout 300 bash -c ' - while true; do - db_exists=$(kubectl -n "$1" get documentdb "$2" --ignore-not-found -o name) - if [[ -z "$db_exists" ]]; then - echo "✓ DocumentDB deleted successfully." - break - fi - echo "DocumentDB still exists. Waiting..." - sleep 10 - done - ' -- "$DB_NS" "$DB_NAME" - - echo "Waiting for cluster pods to be cleaned up..." - timeout 120 bash -c ' - while true; do - pod_count=$(kubectl get pods -n "$1" -l cnpg.io/cluster="$2" --no-headers 2>/dev/null | wc -l) - if [[ "$pod_count" -eq 0 ]]; then - echo "✓ All cluster pods cleaned up." - break - fi - echo "Still $pod_count pods remaining. Waiting..." - sleep 5 - done - ' -- "$DB_NS" "$DB_NAME" - - echo "Cleaning up old PVCs..." - kubectl delete pvc -n $DB_NS -l cnpg.io/cluster=$DB_NAME --wait=true --timeout=60s || true - - # Create a fresh cluster with extension image under the upgraded operator - OLD_EXTENSION="${{ env.DOCUMENTDB_OLD_IMAGE }}" - OLD_GATEWAY="${{ env.GATEWAY_OLD_IMAGE }}" - echo "" - echo "Creating new cluster with ImageVolume mode..." - echo " Extension image: $OLD_EXTENSION" - echo " Gateway image: $OLD_GATEWAY" - cat </dev/null) - CLUSTER_STATUS=$(kubectl get cluster "$1" -n "$2" -o jsonpath="{.status.phase}" 2>/dev/null) - echo "DocumentDB status: $DB_STATUS, CNPG phase: $CLUSTER_STATUS" - if [[ "$DB_STATUS" == "Cluster in healthy state" && "$CLUSTER_STATUS" == "Cluster in healthy state" ]]; then - echo "✓ Recreated cluster is healthy" - break - fi - sleep 10 - done - ' -- "$DB_NAME" "$DB_NS" - - # Update DOCUMENTDB_INITIAL_IMAGE so Step 2 baseline check uses the correct image - echo "DOCUMENTDB_INITIAL_IMAGE=$OLD_EXTENSION" >> $GITHUB_ENV - echo "" - echo "✅ Cluster recreated in ImageVolume mode" - echo " DOCUMENTDB_INITIAL_IMAGE updated to: $OLD_EXTENSION" - - - name: Setup port forwarding for re-seeding after recreation - if: env.USE_COMBINED == 'true' - uses: ./.github/actions/setup-port-forwarding - with: - namespace: ${{ env.DB_NS }} - cluster-name: ${{ env.DB_NAME }} - port: ${{ env.DB_PORT }} - architecture: ${{ matrix.architecture }} - test-type: 'comprehensive' - - - name: Re-seed test data after cluster recreation - if: env.USE_COMBINED == 'true' - run: | - echo "=== Re-seeding test data after cluster recreation ===" - mongosh 127.0.0.1:$DB_PORT \ - -u $DB_USERNAME \ - -p $DB_PASSWORD \ - --authenticationMechanism SCRAM-SHA-256 \ - --tls \ - --tlsAllowInvalidCertificates \ - --eval ' - db = db.getSiblingDB("upgrade_test_db"); - db.test_collection.insertOne({ _id: "upgrade_marker", step: "pre-upgrade", timestamp: new Date().toISOString() }); - db.test_collection.insertOne({ _id: "persistence_check", data: "this_must_survive_rollback", count: 42 }); - var count = db.test_collection.countDocuments(); - print("✓ Seed data written: " + count + " documents"); - assert(count === 2, "Expected 2 documents but found " + count); - ' - echo "✓ Seed data re-written after cluster recreation" - - - name: Cleanup port forwarding after re-seeding - if: always() && env.USE_COMBINED == 'true' - run: | - if [ -f /tmp/pf_pid ]; then - PF_PID=$(cat /tmp/pf_pid) - kill $PF_PID 2>/dev/null || true - rm -f /tmp/pf_pid - fi - rm -f /tmp/pf_output.log - - # ============================================================ - # END TODO: Remove the above 4 steps once released version > 0.1.3 - # ============================================================ - - - name: "Step 2: Upgrade Both Extension and Gateway Images" - run: | - echo "=== Step 2: Upgrade Both Extension and Gateway Images ===" - echo "Testing simultaneous extension + gateway upgrade on ${{ matrix.architecture }}..." - - OLD_EXTENSION="${{ env.DOCUMENTDB_OLD_IMAGE }}" - NEW_EXTENSION="${{ env.DOCUMENTDB_IMAGE }}" - OLD_GATEWAY="${{ env.GATEWAY_OLD_IMAGE }}" - NEW_GATEWAY="${{ env.GATEWAY_IMAGE }}" - - # Verify baseline: cluster deployed with old images - CURRENT_EXTENSION=$(kubectl get documentdb $DB_NAME -n $DB_NS -o jsonpath='{.spec.documentDBImage}') - echo "Current extension image: $CURRENT_EXTENSION" - if [[ "$CURRENT_EXTENSION" != "$OLD_EXTENSION" ]]; then - echo "❌ Expected old extension image $OLD_EXTENSION but found $CURRENT_EXTENSION" - exit 1 - fi - - CURRENT_GATEWAY=$(kubectl get cluster $DB_NAME -n $DB_NS -o jsonpath='{.spec.plugins[0].parameters.gatewayImage}') - echo "Current gateway image: $CURRENT_GATEWAY" - if [[ "$CURRENT_GATEWAY" != "$OLD_GATEWAY" ]]; then - echo "❌ Expected old gateway image $OLD_GATEWAY but found $CURRENT_GATEWAY" - exit 1 - fi - echo "✓ Cluster deployed with old images" - - # Record and verify version before upgrade - VERSION_BEFORE=$(kubectl get documentdb $DB_NAME -n $DB_NS -o jsonpath='{.status.schemaVersion}') - echo "DocumentDB schema version before upgrade: $VERSION_BEFORE" - - if [[ -z "$VERSION_BEFORE" ]]; then - echo "❌ status.schemaVersion is empty before upgrade" - exit 1 - fi - echo "✓ DocumentDB schema version is populated before upgrade" - - # Patch both images simultaneously - echo "" - echo "Upgrading both images..." - echo " Extension: $OLD_EXTENSION → $NEW_EXTENSION" - echo " Gateway: $OLD_GATEWAY → $NEW_GATEWAY" - kubectl patch documentdb $DB_NAME -n $DB_NS --type='merge' \ - -p "{\"spec\":{\"documentDBImage\":\"$NEW_EXTENSION\",\"gatewayImage\":\"$NEW_GATEWAY\"}}" - - echo "Waiting for cluster to be healthy with new images..." - timeout 600 bash -c ' - while true; do - DB_STATUS=$(kubectl get documentdb "$1" -n "$2" -o jsonpath="{.status.status}" 2>/dev/null) - CLUSTER_STATUS=$(kubectl get cluster "$1" -n "$2" -o jsonpath="{.status.phase}" 2>/dev/null) - SCHEMA_VERSION=$(kubectl get documentdb "$1" -n "$2" -o jsonpath="{.status.schemaVersion}" 2>/dev/null || echo "N/A") - echo "DocumentDB status: $DB_STATUS, CNPG phase: $CLUSTER_STATUS, schemaVersion: $SCHEMA_VERSION" - if [[ "$DB_STATUS" == "Cluster in healthy state" && "$CLUSTER_STATUS" == "Cluster in healthy state" ]]; then - HEALTHY_PODS=$(kubectl get cluster "$1" -n "$2" -o jsonpath="{.status.instancesStatus.healthy}" 2>/dev/null | jq length 2>/dev/null || echo "0") - if [[ "$HEALTHY_PODS" -ge "1" ]]; then - # Verify pods are actually running the new extension image - # With ImageVolume (K8s >= 1.35), the extension image is mounted as a volume, not an init container - POD_IMAGES=$(kubectl get pods -n "$2" -l cnpg.io/cluster="$1" -o jsonpath="{.items[*].spec.volumes[*].image.reference}" 2>/dev/null) - if echo "$POD_IMAGES" | grep -q "$3"; then - echo "✓ Cluster healthy with $HEALTHY_PODS pods running new images" - break - else - echo "Pods not yet running new extension image, waiting..." - fi - fi - fi - sleep 10 - done - ' -- "$DB_NAME" "$DB_NS" "$NEW_EXTENSION" - - # Verify extension image - FINAL_EXTENSION=$(kubectl get documentdb $DB_NAME -n $DB_NS -o jsonpath='{.spec.documentDBImage}') - if [[ "$FINAL_EXTENSION" != "$NEW_EXTENSION" ]]; then - echo "❌ Extension image not applied: expected $NEW_EXTENSION, got $FINAL_EXTENSION" - exit 1 - fi - echo "✓ Extension image upgraded to $NEW_EXTENSION" - - # Verify gateway image in CNPG cluster - FINAL_GATEWAY=$(kubectl get cluster $DB_NAME -n $DB_NS -o jsonpath='{.spec.plugins[0].parameters.gatewayImage}') - if [[ "$FINAL_GATEWAY" != "$NEW_GATEWAY" ]]; then - echo "❌ Gateway image not applied: expected $NEW_GATEWAY, got $FINAL_GATEWAY" - exit 1 - fi - echo "✓ Gateway image upgraded to $NEW_GATEWAY" - - # Verify DocumentDB schema version unchanged (two-phase default: schema stays at old version) - VERSION_AFTER=$(kubectl get documentdb $DB_NAME -n $DB_NS -o jsonpath='{.status.schemaVersion}') - echo "DocumentDB schema version after upgrade: $VERSION_AFTER" - if [[ -z "$VERSION_AFTER" ]]; then - echo "❌ status.schemaVersion is empty after upgrade" - exit 1 - fi - if [[ "$VERSION_AFTER" != "$VERSION_BEFORE" ]]; then - echo "❌ Schema version changed from $VERSION_BEFORE to $VERSION_AFTER — expected unchanged (two-phase default)" - exit 1 - fi - echo "✓ Schema version unchanged after binary upgrade: $VERSION_AFTER (two-phase default validated)" - - # Verify status fields - echo "" - echo "=== Status Field Verification ===" - STATUS_DB_IMAGE=$(kubectl get documentdb $DB_NAME -n $DB_NS -o jsonpath='{.status.documentDBImage}') - echo "status.documentDBImage: $STATUS_DB_IMAGE" - if [[ "$STATUS_DB_IMAGE" == "$NEW_EXTENSION" ]]; then - echo "✓ status.documentDBImage matches new extension image" - else - echo "⚠️ status.documentDBImage ($STATUS_DB_IMAGE) does not match expected ($NEW_EXTENSION)" - fi - - STATUS_GATEWAY_IMAGE=$(kubectl get documentdb $DB_NAME -n $DB_NS -o jsonpath='{.status.gatewayImage}') - echo "status.gatewayImage: $STATUS_GATEWAY_IMAGE" - if [[ "$STATUS_GATEWAY_IMAGE" == "$NEW_GATEWAY" ]]; then - echo "✓ status.gatewayImage matches new gateway image" - else - echo "⚠️ status.gatewayImage ($STATUS_GATEWAY_IMAGE) does not match expected ($NEW_GATEWAY)" - fi - - echo "" - echo "✅ Step 2 passed: Both images upgraded successfully" - echo " Extension: $OLD_EXTENSION → $NEW_EXTENSION" - echo " Gateway: $OLD_GATEWAY → $NEW_GATEWAY" - - - name: Setup port forwarding for upgrade verification - uses: ./.github/actions/setup-port-forwarding - with: - namespace: ${{ env.DB_NS }} - cluster-name: ${{ env.DB_NAME }} - port: ${{ env.DB_PORT }} - architecture: ${{ matrix.architecture }} - test-type: 'comprehensive' - - - name: Verify data persistence after upgrade - run: | - echo "=== Data Persistence: Verifying after upgrade ===" - mongosh 127.0.0.1:$DB_PORT \ - -u $DB_USERNAME \ - -p $DB_PASSWORD \ - --authenticationMechanism SCRAM-SHA-256 \ - --tls \ - --tlsAllowInvalidCertificates \ - --eval ' - db = db.getSiblingDB("upgrade_test_db"); - var count = db.test_collection.countDocuments(); - assert(count === 2, "Expected 2 documents but found " + count + " after upgrade"); - print("✓ All " + count + " documents persisted through upgrade"); - ' - echo "✓ Data persistence verified after upgrade" - - - name: Cleanup port forwarding after upgrade verification - if: always() - run: | - if [ -f /tmp/pf_pid ]; then - PF_PID=$(cat /tmp/pf_pid) - kill $PF_PID 2>/dev/null || true - rm -f /tmp/pf_pid - fi - rm -f /tmp/pf_output.log - - - name: "Step 3: Rollback Extension Image (gateway stays at new version)" - run: | - echo "=== Step 3: Rollback Extension Image ===" - echo "Rolling back extension image while keeping gateway at new version..." - - OLD_EXTENSION="${{ env.DOCUMENTDB_OLD_IMAGE }}" - NEW_EXTENSION="${{ env.DOCUMENTDB_IMAGE }}" - NEW_GATEWAY="${{ env.GATEWAY_IMAGE }}" - - # Record state before rollback - VERSION_BEFORE=$(kubectl get documentdb $DB_NAME -n $DB_NS -o jsonpath='{.status.schemaVersion}') - echo "DocumentDB schema version before extension rollback: $VERSION_BEFORE" - - EVENTS_BEFORE=$(kubectl get events -n $DB_NS --field-selector reason=ExtensionRollback --no-headers 2>/dev/null | wc -l || echo "0") - echo "ExtensionRollback events before: $EVENTS_BEFORE" - - # Rollback only extension image - echo "" - echo "Patching spec.documentDBImage: $NEW_EXTENSION → $OLD_EXTENSION" - kubectl patch documentdb $DB_NAME -n $DB_NS --type='merge' \ - -p "{\"spec\":{\"documentDBImage\":\"$OLD_EXTENSION\"}}" - - echo "Waiting for cluster to stabilize after extension rollback..." - timeout 600 bash -c ' - while true; do - DB_STATUS=$(kubectl get documentdb "$1" -n "$2" -o jsonpath="{.status.status}" 2>/dev/null) - CLUSTER_STATUS=$(kubectl get cluster "$1" -n "$2" -o jsonpath="{.status.phase}" 2>/dev/null) - echo "DocumentDB status: $DB_STATUS, CNPG phase: $CLUSTER_STATUS" - if [[ "$DB_STATUS" == "Cluster in healthy state" && "$CLUSTER_STATUS" == "Cluster in healthy state" ]]; then - HEALTHY_PODS=$(kubectl get cluster "$1" -n "$2" -o jsonpath="{.status.instancesStatus.healthy}" 2>/dev/null | jq length 2>/dev/null || echo "0") - if [[ "$HEALTHY_PODS" -ge "1" ]]; then - # Verify pods are running the rolled-back extension image - # With ImageVolume (K8s >= 1.35), the extension image is mounted as a volume, not an init container - POD_IMAGES=$(kubectl get pods -n "$2" -l cnpg.io/cluster="$1" -o jsonpath="{.items[*].spec.volumes[*].image.reference}" 2>/dev/null) - if echo "$POD_IMAGES" | grep -q "$3"; then - echo "✓ Cluster healthy with $HEALTHY_PODS pods running rolled-back extension image" - break - else - echo "Pods not yet running rolled-back extension image, waiting..." - fi - fi - fi - sleep 10 - done - ' -- "$DB_NAME" "$DB_NS" "$OLD_EXTENSION" - - echo "" - echo "=== Extension Rollback Verification ===" - - # Verify extension image rolled back - CURRENT_EXTENSION=$(kubectl get documentdb $DB_NAME -n $DB_NS -o jsonpath='{.spec.documentDBImage}') - if [[ "$CURRENT_EXTENSION" == "$OLD_EXTENSION" ]]; then - echo "✓ spec.documentDBImage correctly rolled back to $OLD_EXTENSION" - else - echo "❌ spec.documentDBImage should be $OLD_EXTENSION but is $CURRENT_EXTENSION" - exit 1 - fi - - # Verify schema version preserved (ALTER EXTENSION UPDATE skipped) - VERSION_AFTER=$(kubectl get documentdb $DB_NAME -n $DB_NS -o jsonpath='{.status.schemaVersion}') - echo "DocumentDB schema version before rollback: $VERSION_BEFORE" - echo "DocumentDB schema version after rollback: $VERSION_AFTER" - if [[ "$VERSION_AFTER" == "$VERSION_BEFORE" ]]; then - echo "✓ Schema version preserved — ALTER EXTENSION UPDATE correctly skipped" - else - echo "⚠️ Schema version changed from $VERSION_BEFORE to $VERSION_AFTER" - fi - - # Verify ExtensionRollback warning event (poll up to 60s instead of hardcoded sleep) - echo "Waiting for ExtensionRollback event..." - EVENTS_AFTER=$EVENTS_BEFORE - for i in $(seq 1 12); do - EVENTS_AFTER=$(kubectl get events -n $DB_NS --field-selector reason=ExtensionRollback --no-headers 2>/dev/null | wc -l || echo "0") - if [[ "$EVENTS_AFTER" -gt "$EVENTS_BEFORE" ]]; then - break - fi - sleep 5 - done - echo "ExtensionRollback events after: $EVENTS_AFTER" - if [[ "$EVENTS_AFTER" -gt "$EVENTS_BEFORE" ]]; then - echo "✓ ExtensionRollback warning event detected" - kubectl get events -n $DB_NS --field-selector reason=ExtensionRollback - else - echo "⚠️ No new ExtensionRollback event detected within 60s" - kubectl get events -n $DB_NS --sort-by='.lastTimestamp' | tail -20 - fi - - # Verify gateway image UNCHANGED at new version - CURRENT_GATEWAY=$(kubectl get cluster $DB_NAME -n $DB_NS -o jsonpath='{.spec.plugins[0].parameters.gatewayImage}') - echo "" - echo "Gateway image after extension rollback: $CURRENT_GATEWAY" - if [[ "$CURRENT_GATEWAY" == "$NEW_GATEWAY" ]]; then - echo "✓ Gateway image unchanged at $NEW_GATEWAY (extension rollback did not affect gateway)" - else - echo "❌ Gateway image changed unexpectedly: expected $NEW_GATEWAY, got $CURRENT_GATEWAY" - exit 1 - fi - - # Verify status fields - STATUS_DB_IMAGE=$(kubectl get documentdb $DB_NAME -n $DB_NS -o jsonpath='{.status.documentDBImage}') - echo "status.documentDBImage: $STATUS_DB_IMAGE" - if [[ "$STATUS_DB_IMAGE" == "$OLD_EXTENSION" ]]; then - echo "✓ status.documentDBImage reflects rolled-back extension" - else - echo "⚠️ status.documentDBImage ($STATUS_DB_IMAGE) does not match $OLD_EXTENSION" - fi - - STATUS_GATEWAY_IMAGE=$(kubectl get documentdb $DB_NAME -n $DB_NS -o jsonpath='{.status.gatewayImage}') - echo "status.gatewayImage: $STATUS_GATEWAY_IMAGE" - if [[ "$STATUS_GATEWAY_IMAGE" == "$NEW_GATEWAY" ]]; then - echo "✓ status.gatewayImage still at new gateway version" - else - echo "⚠️ status.gatewayImage ($STATUS_GATEWAY_IMAGE) does not match $NEW_GATEWAY" - fi - - echo "" - echo "✅ Step 3 passed: Extension rolled back, gateway unchanged" - echo " Extension: $NEW_EXTENSION → $OLD_EXTENSION (rolled back)" - echo " Gateway: $NEW_GATEWAY (unchanged)" - - - name: Setup port forwarding for extension rollback verification - uses: ./.github/actions/setup-port-forwarding - with: - namespace: ${{ env.DB_NS }} - cluster-name: ${{ env.DB_NAME }} - port: ${{ env.DB_PORT }} - architecture: ${{ matrix.architecture }} - test-type: 'comprehensive' - - - name: Verify data persistence after extension rollback - run: | - echo "=== Data Persistence: Verifying after extension rollback ===" - mongosh 127.0.0.1:$DB_PORT \ - -u $DB_USERNAME \ - -p $DB_PASSWORD \ - --authenticationMechanism SCRAM-SHA-256 \ - --tls \ - --tlsAllowInvalidCertificates \ - --eval ' - db = db.getSiblingDB("upgrade_test_db"); - var count = db.test_collection.countDocuments(); - assert(count === 2, "Expected 2 documents but found " + count + " after extension rollback"); - print("✓ All " + count + " documents persisted through extension rollback"); - ' - echo "✓ Data persistence verified after extension rollback" - - - name: Cleanup port forwarding after extension rollback verification - if: always() - run: | - if [ -f /tmp/pf_pid ]; then - PF_PID=$(cat /tmp/pf_pid) - kill $PF_PID 2>/dev/null || true - rm -f /tmp/pf_pid - fi - rm -f /tmp/pf_output.log - - - name: "Step 4: Rollback Gateway Image (extension stays at old version)" - run: | - echo "=== Step 4: Rollback Gateway Image ===" - echo "Rolling back gateway image while keeping extension at old version..." - - OLD_EXTENSION="${{ env.DOCUMENTDB_OLD_IMAGE }}" - OLD_GATEWAY="${{ env.GATEWAY_OLD_IMAGE }}" - NEW_GATEWAY="${{ env.GATEWAY_IMAGE }}" - - # Record state before gateway rollback - VERSION_BEFORE=$(kubectl get documentdb $DB_NAME -n $DB_NS -o jsonpath='{.status.schemaVersion}') - echo "DocumentDB schema version before gateway rollback: $VERSION_BEFORE" - - # Rollback only gateway image - echo "" - echo "Patching spec.gatewayImage: $NEW_GATEWAY → $OLD_GATEWAY" - kubectl patch documentdb $DB_NAME -n $DB_NS --type='merge' \ - -p "{\"spec\":{\"gatewayImage\":\"$OLD_GATEWAY\"}}" - - echo "Waiting for cluster to stabilize after gateway rollback..." - timeout 600 bash -c ' - while true; do - DB_STATUS=$(kubectl get documentdb "$1" -n "$2" -o jsonpath="{.status.status}" 2>/dev/null) - CLUSTER_STATUS=$(kubectl get cluster "$1" -n "$2" -o jsonpath="{.status.phase}" 2>/dev/null) - echo "DocumentDB status: $DB_STATUS, CNPG phase: $CLUSTER_STATUS" - if [[ "$DB_STATUS" == "Cluster in healthy state" && "$CLUSTER_STATUS" == "Cluster in healthy state" ]]; then - HEALTHY_PODS=$(kubectl get cluster "$1" -n "$2" -o jsonpath="{.status.instancesStatus.healthy}" 2>/dev/null | jq length 2>/dev/null || echo "0") - if [[ "$HEALTHY_PODS" -ge "1" ]]; then - # Verify gateway plugin parameter reflects the rolled-back image - CURRENT_GW_PARAM=$(kubectl get cluster "$1" -n "$2" -o jsonpath="{.spec.plugins[0].parameters.gatewayImage}" 2>/dev/null) - if [[ "$CURRENT_GW_PARAM" == "$3" ]]; then - echo "✓ Cluster healthy with $HEALTHY_PODS pods and gateway image rolled back" - break - else - echo "Gateway image not yet rolled back in cluster spec, waiting..." - fi - fi - fi - sleep 10 - done - ' -- "$DB_NAME" "$DB_NS" "$OLD_GATEWAY" - - echo "" - echo "=== Gateway Rollback Verification ===" - - # Verify gateway image rolled back in CNPG cluster - CURRENT_GATEWAY=$(kubectl get cluster $DB_NAME -n $DB_NS -o jsonpath='{.spec.plugins[0].parameters.gatewayImage}') - if [[ "$CURRENT_GATEWAY" == "$OLD_GATEWAY" ]]; then - echo "✓ Gateway image rolled back to $OLD_GATEWAY" - else - echo "❌ Gateway image should be $OLD_GATEWAY but is $CURRENT_GATEWAY" - exit 1 - fi - - # Verify extension image UNCHANGED at old version - CURRENT_EXTENSION=$(kubectl get documentdb $DB_NAME -n $DB_NS -o jsonpath='{.spec.documentDBImage}') - echo "Extension image after gateway rollback: $CURRENT_EXTENSION" - if [[ "$CURRENT_EXTENSION" == "$OLD_EXTENSION" ]]; then - echo "✓ Extension image unchanged at $OLD_EXTENSION (gateway rollback did not affect extension)" - else - echo "❌ Extension image changed unexpectedly: expected $OLD_EXTENSION, got $CURRENT_EXTENSION" - exit 1 - fi - - # Verify schema version unchanged (gateway is stateless, no schema impact) - VERSION_AFTER=$(kubectl get documentdb $DB_NAME -n $DB_NS -o jsonpath='{.status.schemaVersion}') - echo "DocumentDB schema version before gateway rollback: $VERSION_BEFORE" - echo "DocumentDB schema version after gateway rollback: $VERSION_AFTER" - if [[ "$VERSION_AFTER" == "$VERSION_BEFORE" ]]; then - echo "✓ Schema version unchanged — gateway rollback has no schema impact" - else - echo "⚠️ Schema version changed unexpectedly from $VERSION_BEFORE to $VERSION_AFTER" - fi - - # Verify status fields - STATUS_DB_IMAGE=$(kubectl get documentdb $DB_NAME -n $DB_NS -o jsonpath='{.status.documentDBImage}') - echo "status.documentDBImage: $STATUS_DB_IMAGE" - if [[ "$STATUS_DB_IMAGE" == "$OLD_EXTENSION" ]]; then - echo "✓ status.documentDBImage still at old extension" - else - echo "⚠️ status.documentDBImage ($STATUS_DB_IMAGE) does not match $OLD_EXTENSION" - fi - - STATUS_GATEWAY_IMAGE=$(kubectl get documentdb $DB_NAME -n $DB_NS -o jsonpath='{.status.gatewayImage}') - echo "status.gatewayImage: $STATUS_GATEWAY_IMAGE" - if [[ "$STATUS_GATEWAY_IMAGE" == "$OLD_GATEWAY" ]]; then - echo "✓ status.gatewayImage reflects rolled-back gateway" - else - echo "⚠️ status.gatewayImage ($STATUS_GATEWAY_IMAGE) does not match $OLD_GATEWAY" - fi - - echo "" - echo "✅ Step 4 passed: Gateway rolled back, extension unchanged" - echo " Extension: $OLD_EXTENSION (unchanged)" - echo " Gateway: $NEW_GATEWAY → $OLD_GATEWAY (rolled back)" - - - name: Setup port forwarding for gateway rollback verification - uses: ./.github/actions/setup-port-forwarding - with: - namespace: ${{ env.DB_NS }} - cluster-name: ${{ env.DB_NAME }} - port: ${{ env.DB_PORT }} - architecture: ${{ matrix.architecture }} - test-type: 'comprehensive' - - - name: Verify data persistence after gateway rollback - run: | - echo "=== Data Persistence: Verifying after gateway rollback ===" - mongosh 127.0.0.1:$DB_PORT \ - -u $DB_USERNAME \ - -p $DB_PASSWORD \ - --authenticationMechanism SCRAM-SHA-256 \ - --tls \ - --tlsAllowInvalidCertificates \ - --eval ' - db = db.getSiblingDB("upgrade_test_db"); - var count = db.test_collection.countDocuments(); - assert(count === 2, "Expected 2 documents but found " + count + " after gateway rollback"); - print("✓ All " + count + " documents persisted through full upgrade/rollback cycle"); - ' - echo "✓ Data persistence verified after gateway rollback" - - - name: Cleanup port forwarding after gateway rollback verification - if: always() - run: | - if [ -f /tmp/pf_pid ]; then - PF_PID=$(cat /tmp/pf_pid) - kill $PF_PID 2>/dev/null || true - rm -f /tmp/pf_pid - fi - rm -f /tmp/pf_output.log - - # ============================================================ - # Steps 5-8: Two-phase schema upgrade and webhook validation - # ============================================================ - - - name: "Step 5: Re-upgrade binary (setup for schema tests)" - run: | - echo "=== Step 5: Re-upgrade Binary ===" - echo "Re-upgrading extension and gateway images to new version for schema tests..." - - NEW_EXTENSION="${{ env.DOCUMENTDB_IMAGE }}" - NEW_GATEWAY="${{ env.GATEWAY_IMAGE }}" - - # Patch both images back to new version - echo "Patching images to new versions..." - echo " Extension: → $NEW_EXTENSION" - echo " Gateway: → $NEW_GATEWAY" - kubectl patch documentdb $DB_NAME -n $DB_NS --type='merge' \ - -p "{\"spec\":{\"documentDBImage\":\"$NEW_EXTENSION\",\"gatewayImage\":\"$NEW_GATEWAY\"}}" - - echo "Waiting for cluster to be healthy with new images..." - timeout 600 bash -c ' - while true; do - DB_STATUS=$(kubectl get documentdb "$1" -n "$2" -o jsonpath="{.status.status}" 2>/dev/null) - CLUSTER_STATUS=$(kubectl get cluster "$1" -n "$2" -o jsonpath="{.status.phase}" 2>/dev/null) - echo "DocumentDB status: $DB_STATUS, CNPG phase: $CLUSTER_STATUS" - if [[ "$DB_STATUS" == "Cluster in healthy state" && "$CLUSTER_STATUS" == "Cluster in healthy state" ]]; then - HEALTHY_PODS=$(kubectl get cluster "$1" -n "$2" -o jsonpath="{.status.instancesStatus.healthy}" 2>/dev/null | jq length 2>/dev/null || echo "0") - if [[ "$HEALTHY_PODS" -ge "1" ]]; then - POD_IMAGES=$(kubectl get pods -n "$2" -l cnpg.io/cluster="$1" -o jsonpath="{.items[*].spec.volumes[*].image.reference}" 2>/dev/null) - if echo "$POD_IMAGES" | grep -q "$3"; then - echo "✓ Cluster healthy with $HEALTHY_PODS pods running new images" - break - else - echo "Pods not yet running new extension image, waiting..." - fi - fi - fi - sleep 10 - done - ' -- "$DB_NAME" "$DB_NS" "$NEW_EXTENSION" - - # Verify schema version is still at baseline - VERSION_CURRENT=$(kubectl get documentdb $DB_NAME -n $DB_NS -o jsonpath='{.status.schemaVersion}') - echo "Schema version after re-upgrade: $VERSION_CURRENT" - echo "SCHEMA_BASELINE=$VERSION_CURRENT" >> $GITHUB_ENV - - echo "" - echo "✅ Step 5 passed: Binary re-upgraded for schema tests" - echo " Extension: $NEW_EXTENSION" - echo " Gateway: $NEW_GATEWAY" - echo " Schema: $VERSION_CURRENT (baseline)" - - - name: Setup port forwarding for re-upgrade verification - uses: ./.github/actions/setup-port-forwarding - with: - namespace: ${{ env.DB_NS }} - cluster-name: ${{ env.DB_NAME }} - port: ${{ env.DB_PORT }} - architecture: ${{ matrix.architecture }} - test-type: 'comprehensive' - - - name: Verify data persistence after re-upgrade - run: | - echo "=== Data Persistence: Verifying after re-upgrade ===" - mongosh 127.0.0.1:$DB_PORT \ - -u $DB_USERNAME \ - -p $DB_PASSWORD \ - --authenticationMechanism SCRAM-SHA-256 \ - --tls \ - --tlsAllowInvalidCertificates \ - --eval ' - db = db.getSiblingDB("upgrade_test_db"); - var count = db.test_collection.countDocuments(); - assert(count === 2, "Expected 2 documents but found " + count + " after re-upgrade"); - print("✓ All " + count + " documents persisted through re-upgrade"); - ' - echo "✓ Data persistence verified after re-upgrade" - - - name: Cleanup port forwarding after re-upgrade verification - if: always() - run: | - if [ -f /tmp/pf_pid ]; then - PF_PID=$(cat /tmp/pf_pid) - kill $PF_PID 2>/dev/null || true - rm -f /tmp/pf_pid - fi - rm -f /tmp/pf_output.log - - - name: "Step 6: Schema Finalization (two-phase commit)" - run: | - echo "=== Step 6: Schema Finalization ===" - echo "Setting spec.schemaVersion to finalize the schema migration..." - - NEW_EXTENSION="${{ env.DOCUMENTDB_IMAGE }}" - SCHEMA_BASELINE="${{ env.SCHEMA_BASELINE }}" - - # Determine the new schema version from the new extension image tag - # Strip any architecture suffix (e.g., "0.112.0-amd64" → "0.112.0") - RAW_TAG=$(echo "$NEW_EXTENSION" | sed 's/.*://') - NEW_SCHEMA_VERSION=$(echo "$RAW_TAG" | grep -oP '^\d+\.\d+\.\d+') - if [[ -z "$NEW_SCHEMA_VERSION" ]]; then - echo "✗ Could not extract semver from image tag: $RAW_TAG" - exit 1 - fi - echo "Baseline schema version: $SCHEMA_BASELINE" - echo "Target schema version: $NEW_SCHEMA_VERSION" - - # Ensure we're actually testing a version upgrade, not a no-op - if [[ "$SCHEMA_BASELINE" == "$NEW_SCHEMA_VERSION" ]]; then - echo "❌ Baseline and target schema versions are identical ($SCHEMA_BASELINE)." - echo " The upgrade test requires different versions to validate the two-phase commit." - exit 1 - fi - - # Set spec.schemaVersion to trigger ALTER EXTENSION UPDATE - kubectl patch documentdb $DB_NAME -n $DB_NS --type='merge' \ - -p "{\"spec\":{\"schemaVersion\":\"$NEW_SCHEMA_VERSION\"}}" - - echo "Waiting for schema version to update..." - timeout 300 bash -c ' - while true; do - STATUS_SCHEMA=$(kubectl get documentdb "$1" -n "$2" -o jsonpath="{.status.schemaVersion}" 2>/dev/null) - DB_STATUS=$(kubectl get documentdb "$1" -n "$2" -o jsonpath="{.status.status}" 2>/dev/null) - echo "status.schemaVersion: $STATUS_SCHEMA, status: $DB_STATUS" - if [[ "$STATUS_SCHEMA" == "$3" && "$DB_STATUS" == "Cluster in healthy state" ]]; then - echo "✓ Schema version updated to $STATUS_SCHEMA" - break - fi - sleep 10 - done - ' -- "$DB_NAME" "$DB_NS" "$NEW_SCHEMA_VERSION" - - # Verify schema version changed - FINAL_SCHEMA=$(kubectl get documentdb $DB_NAME -n $DB_NS -o jsonpath='{.status.schemaVersion}') - if [[ "$FINAL_SCHEMA" == "$NEW_SCHEMA_VERSION" ]]; then - echo "✓ Schema finalized: $SCHEMA_BASELINE → $FINAL_SCHEMA" - else - echo "❌ Schema version should be $NEW_SCHEMA_VERSION but is $FINAL_SCHEMA" - exit 1 - fi - - echo "NEW_SCHEMA_VERSION=$NEW_SCHEMA_VERSION" >> $GITHUB_ENV - echo "" - echo "✅ Step 6 passed: Schema finalized to $NEW_SCHEMA_VERSION" - - - name: Setup port forwarding for schema finalization verification - uses: ./.github/actions/setup-port-forwarding - with: - namespace: ${{ env.DB_NS }} - cluster-name: ${{ env.DB_NAME }} - port: ${{ env.DB_PORT }} - architecture: ${{ matrix.architecture }} - test-type: 'comprehensive' - - - name: Verify data persistence after schema finalization - run: | - echo "=== Data Persistence: Verifying after schema finalization ===" - mongosh 127.0.0.1:$DB_PORT \ - -u $DB_USERNAME \ - -p $DB_PASSWORD \ - --authenticationMechanism SCRAM-SHA-256 \ - --tls \ - --tlsAllowInvalidCertificates \ - --eval ' - db = db.getSiblingDB("upgrade_test_db"); - var count = db.test_collection.countDocuments(); - assert(count === 2, "Expected 2 documents but found " + count + " after schema finalization"); - print("✓ All " + count + " documents persisted through schema finalization"); - ' - echo "✓ Data persistence verified after schema finalization" - - - name: Cleanup port forwarding after schema finalization verification - if: always() - run: | - if [ -f /tmp/pf_pid ]; then - PF_PID=$(cat /tmp/pf_pid) - kill $PF_PID 2>/dev/null || true - rm -f /tmp/pf_pid - fi - rm -f /tmp/pf_output.log - - - name: "Step 7: Webhook — Reject Rollback Below Schema" - run: | - echo "=== Step 7: Webhook — Reject Rollback Below Schema ===" - echo "Attempting to roll back documentDBImage below status.schemaVersion..." - - CURRENT_SCHEMA="${{ env.NEW_SCHEMA_VERSION }}" - # Use a synthetic image reference with a version guaranteed to be below - # the finalized schema. The webhook extracts the semver from the tag and - # rejects the patch before any pod changes, so the image needn't exist. - ROLLBACK_IMAGE="ghcr.io/${{ github.repository_owner }}/documentdb-kubernetes-operator/documentdb:0.1.0-rollback-test" - - echo "Current schema version: $CURRENT_SCHEMA" - echo "Attempting rollback to: $ROLLBACK_IMAGE (synthetic lower version)" - - # This SHOULD fail — the webhook must reject rollback below schema version. - # Patch both documentDBImage and schemaVersion to exercise both validation paths. - PATCH_OUTPUT=$(kubectl patch documentdb $DB_NAME -n $DB_NS --type='merge' \ - -p "{\"spec\":{\"documentDBImage\":\"$ROLLBACK_IMAGE\",\"schemaVersion\":\"$CURRENT_SCHEMA\"}}" 2>&1) && { - echo "❌ Webhook did NOT reject the rollback — patch succeeded unexpectedly" - echo "Output: $PATCH_OUTPUT" - exit 1 - } - - echo "Patch rejected (expected). Output:" - echo "$PATCH_OUTPUT" - - # Verify the error message mentions rollback blocking - if echo "$PATCH_OUTPUT" | grep -qi "rollback blocked\|older than installed schema"; then - echo "✓ Webhook correctly rejected rollback with expected error message" - else - echo "⚠️ Patch was rejected but error message doesn't match expected pattern" - echo " (Still passing — the important thing is the rejection)" - fi - - # Verify cluster state is unchanged - CURRENT_IMAGE=$(kubectl get documentdb $DB_NAME -n $DB_NS -o jsonpath='{.spec.documentDBImage}') - NEW_EXTENSION="${{ env.DOCUMENTDB_IMAGE }}" - if [[ "$CURRENT_IMAGE" == "$NEW_EXTENSION" ]]; then - echo "✓ Cluster state unchanged — documentDBImage still at $CURRENT_IMAGE" - else - echo "❌ documentDBImage changed unexpectedly to $CURRENT_IMAGE" - exit 1 - fi - - echo "" - echo "✅ Step 7 passed: Webhook correctly blocked rollback below schema version" - - - name: "Step 8: Webhook — Reject Schema Exceeds Binary" - run: | - echo "=== Step 8: Webhook — Reject Schema Exceeds Binary ===" - echo "Attempting to set schemaVersion higher than binary version..." - - # Use an artificially high version that exceeds any binary - INVALID_SCHEMA="99.999.0" - CURRENT_IMAGE=$(kubectl get documentdb $DB_NAME -n $DB_NS -o jsonpath='{.spec.documentDBImage}') - echo "Attempting schemaVersion: $INVALID_SCHEMA" - echo "Current documentDBImage: $CURRENT_IMAGE" - - # This SHOULD fail — the webhook must reject schema > binary. - # Patch both schemaVersion and documentDBImage to exercise both validation paths. - PATCH_OUTPUT=$(kubectl patch documentdb $DB_NAME -n $DB_NS --type='merge' \ - -p "{\"spec\":{\"schemaVersion\":\"$INVALID_SCHEMA\",\"documentDBImage\":\"$CURRENT_IMAGE\"}}" 2>&1) && { - echo "❌ Webhook did NOT reject the invalid schema version — patch succeeded unexpectedly" - echo "Output: $PATCH_OUTPUT" - exit 1 - } - - echo "Patch rejected (expected). Output:" - echo "$PATCH_OUTPUT" - - # Verify the error message mentions schema exceeding binary - if echo "$PATCH_OUTPUT" | grep -qi "exceeds.*binary"; then - echo "✓ Webhook correctly rejected schema version with expected error message" - else - echo "⚠️ Patch was rejected but error message doesn't match expected pattern" - echo " (Still passing — the important thing is the rejection)" - fi - - # Verify schema version is unchanged - CURRENT_SCHEMA=$(kubectl get documentdb $DB_NAME -n $DB_NS -o jsonpath='{.status.schemaVersion}') - EXPECTED_SCHEMA="${{ env.NEW_SCHEMA_VERSION }}" - if [[ "$CURRENT_SCHEMA" == "$EXPECTED_SCHEMA" ]]; then - echo "✓ Schema version unchanged: $CURRENT_SCHEMA" - else - echo "❌ Schema version changed unexpectedly to $CURRENT_SCHEMA" - exit 1 - fi - - echo "" - echo "✅ Step 8 passed: Webhook correctly blocked schema version exceeding binary" - - - name: Collect logs on failure - if: failure() - uses: ./.github/actions/collect-logs - with: - architecture: ${{ matrix.architecture }} - operator-namespace: ${{ env.OPERATOR_NS }} - db-namespace: ${{ env.DB_NS }} - db-name: ${{ env.DB_NAME }} - - - name: Test completion summary - if: always() - run: | - echo "## Upgrade & Rollback Test Summary for ${{ matrix.architecture }}" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "- **Architecture**: ${{ matrix.architecture }}" >> $GITHUB_STEP_SUMMARY - echo "- **Initial Image**: ${{ env.DOCUMENTDB_INITIAL_IMAGE }}" >> $GITHUB_STEP_SUMMARY - echo "- **Old Extension Image**: ${{ env.DOCUMENTDB_OLD_IMAGE }}" >> $GITHUB_STEP_SUMMARY - echo "- **New Extension Image**: ${{ env.DOCUMENTDB_IMAGE }}" >> $GITHUB_STEP_SUMMARY - echo "- **Old Gateway Image**: ${{ env.GATEWAY_OLD_IMAGE }}" >> $GITHUB_STEP_SUMMARY - echo "- **New Gateway Image**: ${{ env.GATEWAY_IMAGE }}" >> $GITHUB_STEP_SUMMARY - echo "- **Image Tag**: ${{ env.IMAGE_TAG }}" >> $GITHUB_STEP_SUMMARY - echo "- **Chart Version**: ${{ env.CHART_VERSION }}" >> $GITHUB_STEP_SUMMARY - echo "- **Released Chart Version**: ${{ env.RELEASED_CHART_VERSION }}" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "### Test Steps:" >> $GITHUB_STEP_SUMMARY - echo "- Step 1: Operator control plane upgrade (released → built)" >> $GITHUB_STEP_SUMMARY - echo "- Step 2: Upgrade both extension and gateway images" >> $GITHUB_STEP_SUMMARY - echo "- Step 3: Rollback extension image" >> $GITHUB_STEP_SUMMARY - echo "- Step 4: Rollback gateway image" >> $GITHUB_STEP_SUMMARY - echo "- Step 5: Re-upgrade binary (setup for schema tests)" >> $GITHUB_STEP_SUMMARY - echo "- Step 6: Schema finalization (two-phase commit)" >> $GITHUB_STEP_SUMMARY - echo "- Step 7: Webhook — reject rollback below schema" >> $GITHUB_STEP_SUMMARY - echo "- Step 8: Webhook — reject schema exceeds binary" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - - if [[ "${{ job.status }}" == "success" ]]; then - echo "- **Status**: ✅ PASSED" >> $GITHUB_STEP_SUMMARY - else - echo "- **Status**: ❌ FAILED" >> $GITHUB_STEP_SUMMARY - fi - - test-summary: - name: Upgrade & Rollback Test Summary - runs-on: ubuntu-latest - if: always() - needs: [build, upgrade-and-rollback-test] - steps: - - name: Generate overall test summary - run: | - echo "## Upgrade & Rollback Test Results Summary" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "### Test Configuration:" >> $GITHUB_STEP_SUMMARY - echo "- **Build Step**: ${{ inputs.image_tag && 'Skipped (using external images)' || 'Executed' }}" >> $GITHUB_STEP_SUMMARY - echo "- **Image Tag**: ${{ inputs.image_tag || 'Built from source' }}" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "### Job Results:" >> $GITHUB_STEP_SUMMARY - echo "- **Build**: ${{ needs.build.result }}" >> $GITHUB_STEP_SUMMARY - echo "- **Upgrade & Rollback Tests**: ${{ needs.upgrade-and-rollback-test.result }}" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - - if [[ "${{ needs.upgrade-and-rollback-test.result }}" == "success" ]]; then - echo "### Overall Status: ✅ ALL TESTS PASSED" >> $GITHUB_STEP_SUMMARY - else - echo "### Overall Status: ❌ SOME TESTS FAILED" >> $GITHUB_STEP_SUMMARY - echo "Check individual job results above for details." >> $GITHUB_STEP_SUMMARY - fi diff --git a/AGENTS.md b/AGENTS.md index 2f81b08b..1516952c 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -371,6 +371,39 @@ Types: - Mock external dependencies appropriately - Ensure tests are idempotent and isolated +### Running E2E tests + +The end-to-end suite lives in [`test/e2e/`](test/e2e/) as a separate Go module +and replaces the legacy `test-integration.yml`, `test-E2E.yml`, +`test-backup-and-restore.yml`, and `test-upgrade-and-rollback.yml` workflows +(and their bash / JavaScript / Python glue). It is a Go / Ginkgo v2 / Gomega +suite that drives the operator end-to-end and speaks the Mongo wire protocol +via `go.mongodb.org/mongo-driver/v2`. + +**Prereqs:** kind + the DocumentDB operator already installed in the target +cluster. In CI, the `.github/actions/setup-test-environment` composite action +handles cluster creation and operator install (via `make deploy`). Locally, +`operator/src/scripts/development/deploy.sh` is the equivalent entry point. + +**Running:** + +```bash +cd test/e2e +ginkgo -r --label-filter=smoke ./tests/... # smoke +ginkgo -r --label-filter=lifecycle ./tests/... # single area +TEST_DEPTH=4 ginkgo -r --procs=4 ./tests/... # full sweep (Lowest depth) +``` + +Labels are defined in `test/e2e/labels.go` (areas: `lifecycle`, `scale`, +`data`, `performance`, `backup`, `recovery`, `tls`, `feature-gates`, +`exposure`, `status`, `upgrade`; plus cross-cutting `smoke`/`basic`/ +`destructive`/`disruptive`/`slow` and capability `needs-*` labels). Depth is +controlled by `TEST_DEPTH` (0=Highest … 4=Lowest, default 2=Medium). + +See [`test/e2e/README.md`](test/e2e/README.md) for the full env-var table +(including `E2E_RUN_ID` and the `E2E_UPGRADE_*` upgrade-suite variables), +helper-package index, troubleshooting, and CNPG dependency policy. + ### Code Review For thorough code reviews, reference the code review agent: diff --git a/CHANGELOG.md b/CHANGELOG.md index b52b8f9f..92a27c8c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,18 @@ ### Breaking Changes - **Validating webhook added**: A new `ValidatingWebhookConfiguration` enforces that `spec.schemaVersion` never exceeds the binary version and blocks `spec.documentDBVersion` rollbacks below the committed schema version. This requires [cert-manager](https://cert-manager.io/) to be installed in the cluster (it is already a prerequisite for the sidecar injector). Existing clusters upgrading to this release will have the webhook activated automatically via `helm upgrade`. +### Testing infrastructure +- **Unified E2E test suite ([#346](https://github.com/documentdb/documentdb-kubernetes-operator/pull/346))**: The four legacy end-to-end workflows (`test-integration.yml`, `test-E2E.yml`, `test-backup-and-restore.yml`, `test-upgrade-and-rollback.yml`) and their bash / JavaScript (mongosh) / Python (pymongo) glue have been replaced by a single Go / Ginkgo v2 / Gomega suite under `test/e2e/`. Specs are organised by CRD operation (lifecycle, scale, data, performance, backup, tls, feature gates, exposure, status, upgrade), reuse CloudNative-PG's `tests/utils` packages as a library, and speak the Mongo wire protocol via `go.mongodb.org/mongo-driver/v2`. + +### Breaking changes for contributors +- **Local E2E invocation changed.** Tests are now run via `ginkgo` against an already-provisioned cluster, not via `npm test` / bash scripts. Typical invocation: + ```bash + cd test/e2e + ginkgo -r --label-filter=smoke ./tests/... + ``` + Label selection replaces per-workflow entry points; depth is controlled by `TEST_DEPTH` (0=Highest … 4=Lowest). See [`test/e2e/README.md`](test/e2e/README.md) for prereqs, the full env-var table (including `E2E_RUN_ID` and the `E2E_UPGRADE_*` upgrade-suite variables), and troubleshooting. +- **Design rationale** for the migration — scope, fixture tiers, parallelism model, CNPG reuse strategy — is documented in [`docs/designs/e2e-test-suite.md`](docs/designs/e2e-test-suite.md). + ## [0.2.0] - 2026-03-25 ### Major Features diff --git a/operator/src/scripts/test-scripts/comprehensive_mongosh_tests.js b/operator/src/scripts/test-scripts/comprehensive_mongosh_tests.js deleted file mode 100644 index c4d4bb33..00000000 --- a/operator/src/scripts/test-scripts/comprehensive_mongosh_tests.js +++ /dev/null @@ -1,497 +0,0 @@ -// Comprehensive DocumentDB test suite with validation -print("=== Starting Comprehensive DocumentDB Tests with Validation ==="); - -// Validation helper function -function validate(condition, message) { - if (!condition) { - print("DEBUG: Validation failed for: " + message); - print("DEBUG: Condition was:", condition); - throw new Error("VALIDATION FAILED: " + message); - } - print("✓ " + message); -} - -// Helper function to handle Long objects returned by some MongoDB drivers -function getLongValue(val) { - if (typeof val === 'object' && val !== null && 'low' in val) { - return val.low; // Extract the actual number from Long object - } - return val; -} - -// Test 1: Basic Connection and Database Operations -print("\n--- Test 1: Basic Database Operations ---"); -db = db.getSiblingDB('testdb'); - -// Verify database connection -print("DEBUG: Current database:", db.getName()); -print("DEBUG: Database connection test:", db.runCommand({ping: 1})); - -// Test collection creation -db.createCollection("users"); -db.createCollection("products"); -db.createCollection("orders"); - -// Validate collections were created -var collections = db.getCollectionNames(); -validate(collections.includes("users"), "Users collection created"); -validate(collections.includes("products"), "Products collection created"); -validate(collections.includes("orders"), "Orders collection created"); - -// Insert sample data -var users = [ - { _id: 1, name: "John Doe", email: "john@example.com", age: 30, city: "New York" }, - { _id: 2, name: "Jane Smith", email: "jane@example.com", age: 25, city: "San Francisco" }, - { _id: 3, name: "Bob Johnson", email: "bob@example.com", age: 35, city: "Chicago" }, - { _id: 4, name: "Alice Brown", email: "alice@example.com", age: 28, city: "Seattle" } -]; - -var products = [ - { _id: 1, name: "Laptop", price: 999.99, category: "Electronics", stock: 50 }, - { _id: 2, name: "Phone", price: 699.99, category: "Electronics", stock: 100 }, - { _id: 3, name: "Book", price: 19.99, category: "Education", stock: 200 }, - { _id: 4, name: "Desk", price: 299.99, category: "Furniture", stock: 25 } -]; - -var orders = [ - { _id: 1, userId: 1, productId: 1, quantity: 1, total: 999.99, date: new Date() }, - { _id: 2, userId: 2, productId: 2, quantity: 2, total: 1399.98, date: new Date() }, - { _id: 3, userId: 3, productId: 3, quantity: 3, total: 59.97, date: new Date() } -]; - -var userResult = db.users.insertMany(users); -var productResult = db.products.insertMany(products); -var orderResult = db.orders.insertMany(orders); - -// Debug the insert results -print("DEBUG: userResult:", JSON.stringify(userResult)); -print("DEBUG: productResult:", JSON.stringify(productResult)); -print("DEBUG: orderResult:", JSON.stringify(orderResult)); - -// Helper function to get insertedIds count (handles both array and object formats) -function getInsertedCount(result) { - if (result.insertedIds) { - if (Array.isArray(result.insertedIds)) { - return result.insertedIds.length; - } else if (typeof result.insertedIds === 'object') { - return Object.keys(result.insertedIds).length; - } - } - return 0; -} - -// Validate insertions -validate(userResult.acknowledged === true, "User insertion was acknowledged"); -validate(getInsertedCount(userResult) === 4, "Inserted exactly 4 users"); -validate(productResult.acknowledged === true, "Product insertion was acknowledged"); -validate(getInsertedCount(productResult) === 4, "Inserted exactly 4 products"); -validate(orderResult.acknowledged === true, "Order insertion was acknowledged"); -validate(getInsertedCount(orderResult) === 3, "Inserted exactly 3 orders"); - -print("Inserted", getInsertedCount(userResult), "users"); -print("Inserted", getInsertedCount(productResult), "products"); -print("Inserted", getInsertedCount(orderResult), "orders"); - -// Verify the documents actually exist in the database -var actualUserCount = db.users.countDocuments(); -var actualProductCount = db.products.countDocuments(); -var actualOrderCount = db.orders.countDocuments(); - -print("DEBUG: Actual document counts - Users:", actualUserCount, "Products:", actualProductCount, "Orders:", actualOrderCount); -validate(actualUserCount === 4, "Database contains exactly 4 users"); -validate(actualProductCount === 4, "Database contains exactly 4 products"); -validate(actualOrderCount === 3, "Database contains exactly 3 orders"); - -// Verify specific users exist -var johnExists = db.users.findOne({ name: "John Doe" }); -var janeExists = db.users.findOne({ name: "Jane Smith" }); -print("DEBUG: John Doe exists:", johnExists !== null); -print("DEBUG: Jane Smith exists:", janeExists !== null); -validate(johnExists !== null, "John Doe document exists after insertion"); -validate(janeExists !== null, "Jane Smith document exists after insertion"); - -// Test 2: Query Operations -print("\n--- Test 2: Query Operations ---"); - -// Simple queries with validation -var youngUsers = db.users.find({ age: { $lt: 30 } }).toArray(); -validate(youngUsers.length === 2, "Found exactly 2 users under 30 (Jane: 25, Alice: 28)"); -validate(youngUsers.some(u => u.name === "Jane Smith"), "Jane Smith found in young users"); -validate(youngUsers.some(u => u.name === "Alice Brown"), "Alice Brown found in young users"); - -var expensiveProducts = db.products.find({ price: { $gt: 500 } }).toArray(); -validate(expensiveProducts.length === 2, "Found exactly 2 expensive products (Laptop, Phone)"); -validate(expensiveProducts.some(p => p.name === "Laptop"), "Laptop found in expensive products"); -validate(expensiveProducts.some(p => p.name === "Phone"), "Phone found in expensive products"); - -// Complex queries with sorting -var sortedUsers = db.users.find().sort({ age: -1 }).toArray(); -validate(sortedUsers.length === 4, "Sorted query returned all 4 users"); -validate(sortedUsers[0].name === "Bob Johnson" && sortedUsers[0].age === 35, "First user is Bob (35)"); -validate(sortedUsers[1].name === "John Doe" && sortedUsers[1].age === 30, "Second user is John (30)"); -validate(sortedUsers[2].name === "Alice Brown" && sortedUsers[2].age === 28, "Third user is Alice (28)"); -validate(sortedUsers[3].name === "Jane Smith" && sortedUsers[3].age === 25, "Fourth user is Jane (25)"); - -print("Users sorted by age (desc):", sortedUsers.map(u => u.name + " (" + u.age + ")")); - -// Test 3: Aggregation Pipeline -print("\n--- Test 3: Aggregation Operations ---"); - -// Average age with validation -var avgAge = db.users.aggregate([ - { $group: { _id: null, avgAge: { $avg: "$age" }, count: { $sum: 1 } } } -]).toArray(); - -var expectedAvgAge = (30 + 25 + 35 + 28) / 4; // 29.5 -validate(avgAge.length === 1, "Aggregation returned exactly 1 result"); -validate(Math.abs(avgAge[0].avgAge - expectedAvgAge) < 0.01, "Average age is correct: " + expectedAvgAge); -validate(avgAge[0].count === 4, "Count is correct: 4 users"); - -print("Average user age:", avgAge[0].avgAge, "from", avgAge[0].count, "users"); - -// Group by city with validation -var cityGroups = db.users.aggregate([ - { $group: { _id: "$city", count: { $sum: 1 }, avgAge: { $avg: "$age" } } }, - { $sort: { count: -1 } } -]).toArray(); - -validate(cityGroups.length === 4, "Grouped by 4 different cities"); -var cities = cityGroups.map(g => g._id); -validate(cities.includes("New York"), "New York city group found"); -validate(cities.includes("San Francisco"), "San Francisco city group found"); -validate(cities.includes("Chicago"), "Chicago city group found"); -validate(cities.includes("Seattle"), "Seattle city group found"); - -print("Users by city:", cityGroups); - -// Product statistics with validation -var productStats = db.products.aggregate([ - { $group: { - _id: "$category", - count: { $sum: 1 }, - avgPrice: { $avg: "$price" }, - totalStock: { $sum: "$stock" } - }}, - { $sort: { avgPrice: -1 } } -]).toArray(); - -validate(productStats.length === 3, "Grouped by 3 categories"); -var electronicsStats = productStats.find(s => s._id === "Electronics"); -validate(electronicsStats && electronicsStats.count === 2, "Electronics category has 2 products"); -validate(electronicsStats && electronicsStats.totalStock === 150, "Electronics total stock is 150"); - -print("Product statistics by category:", productStats); - -// Test 4: Update Operations -print("\n--- Test 4: Update Operations ---"); - -// Small delay to ensure inserts are fully committed -print("DEBUG: Waiting for inserts to be committed..."); -sleep(1000); // 1 second delay - -// First, verify the user exists before attempting update -var johnBefore = db.users.findOne({ name: "John Doe" }); -print("DEBUG: John Doe before update:", JSON.stringify(johnBefore)); -validate(johnBefore !== null, "John Doe document exists before update"); -validate(johnBefore.name === "John Doe", "John Doe has correct name"); -validate(johnBefore.age === 30, "John Doe has initial age of 30"); - -// Update single document with validation -var updateResult = db.users.updateOne( - { name: "John Doe" }, - { $set: { age: 31, lastUpdated: new Date() } } -); - -print("DEBUG: updateResult:", JSON.stringify(updateResult)); - -var matchedCount = getLongValue(updateResult.matchedCount); -var modifiedCount = getLongValue(updateResult.modifiedCount); - -print("DEBUG: Extracted counts - matched:", matchedCount, "modified:", modifiedCount); - -// If first update fails, try with exact field matching -if (matchedCount !== 1) { - print("DEBUG: First update failed, trying exact match..."); - var allUsers = db.users.find().toArray(); - print("DEBUG: All users in database:", JSON.stringify(allUsers)); - - // Try to find John with different criteria - var johnVariants = [ - db.users.findOne({ name: "John Doe" }), - db.users.findOne({ _id: 1 }), - db.users.findOne({ email: "john@example.com" }) - ]; - print("DEBUG: John search variants:", JSON.stringify(johnVariants)); - - // Try update by _id instead - updateResult = db.users.updateOne( - { _id: 1 }, - { $set: { age: 31, lastUpdated: new Date() } } - ); - print("DEBUG: updateResult by _id:", JSON.stringify(updateResult)); - - matchedCount = getLongValue(updateResult.matchedCount); - modifiedCount = getLongValue(updateResult.modifiedCount); -} - -validate(matchedCount === 1, "Update matched exactly 1 document"); -validate(modifiedCount === 1, "Update modified exactly 1 document"); - -// Verify the update -var updatedJohn = db.users.findOne({ name: "John Doe" }); -validate(updatedJohn.age === 31, "John's age updated to 31"); -validate(updatedJohn.lastUpdated !== undefined, "John has lastUpdated field"); - -print("Updated", modifiedCount, "user document"); - -// Update multiple documents with validation -var electronicsBeforeUpdate = db.products.find({ category: "Electronics" }).toArray(); -print("DEBUG: Electronics products before bulk update:", JSON.stringify(electronicsBeforeUpdate)); -validate(electronicsBeforeUpdate.length === 2, "Found exactly 2 Electronics products before update"); - -var bulkUpdate = db.products.updateMany( - { category: "Electronics" }, - { $inc: { stock: -5 }, $set: { lastSold: new Date() } } -); - -print("DEBUG: bulkUpdate result:", JSON.stringify(bulkUpdate)); - -var bulkMatchedCount = getLongValue(bulkUpdate.matchedCount); -var bulkModifiedCount = getLongValue(bulkUpdate.modifiedCount); - -print("DEBUG: Extracted bulk counts - matched:", bulkMatchedCount, "modified:", bulkModifiedCount); -validate(bulkMatchedCount === 2, "Bulk update matched 2 Electronics products"); -validate(bulkModifiedCount === 2, "Bulk update modified 2 products"); - -// Verify bulk update -var updatedElectronics = db.products.find({ category: "Electronics" }).toArray(); -validate(updatedElectronics.every(p => p.lastSold !== undefined), "All electronics have lastSold field"); -var laptop = updatedElectronics.find(p => p.name === "Laptop"); -var phone = updatedElectronics.find(p => p.name === "Phone"); -validate(laptop.stock === 45, "Laptop stock reduced to 45"); -validate(phone.stock === 95, "Phone stock reduced to 95"); - -print("Updated", bulkModifiedCount, "product documents"); - -// Upsert operation with validation -var existingUser = db.users.findOne({ email: "new@example.com" }); -print("DEBUG: Existing user with new@example.com:", JSON.stringify(existingUser)); - -var upsertResult = db.users.updateOne( - { email: "new@example.com" }, - { $set: { name: "New User", age: 22, city: "Boston" } }, - { upsert: true } -); - -print("DEBUG: upsertResult:", JSON.stringify(upsertResult)); - -var upsertMatchedCount = getLongValue(upsertResult.matchedCount); -var upsertModifiedCount = getLongValue(upsertResult.modifiedCount); -var upsertedCount = getLongValue(upsertResult.upsertedCount); - -print("DEBUG: Extracted upsert counts - matched:", upsertMatchedCount, "modified:", upsertModifiedCount, "upserted:", upsertedCount); -validate(upsertMatchedCount === 0, "Upsert matched 0 existing documents"); -validate(upsertModifiedCount === 0, "Upsert modified 0 existing documents"); -validate(upsertedCount === 1, "Upsert created 1 new document"); - -// Verify upsert -var newUser = db.users.findOne({ email: "new@example.com" }); -validate(newUser && newUser.name === "New User", "New user created with correct name"); -validate(newUser && newUser.age === 22, "New user has correct age"); - -print("Upsert operation - matched:", upsertMatchedCount, "modified:", upsertModifiedCount, "upserted:", upsertedCount); - -// Test 5: Text Search -print("\n--- Test 5: Text Search ---"); - -// Simple text search without text index -var laptopProducts = db.products.find({ name: /laptop/i }).toArray(); -validate(laptopProducts.length === 1, "Text search found exactly 1 laptop"); -validate(laptopProducts[0].name === "Laptop", "Found product is the Laptop"); - -print("Text search for 'laptop' found:", laptopProducts.length, "products"); - -// Test 6: Array Operations -print("\n--- Test 6: Array Operations ---"); - -// Count users before adding hobbies array -var userCountBefore = db.users.countDocuments(); -print("DEBUG: User count before adding hobbies:", userCountBefore); - -// Add array field to users -var arrayUpdateResult = db.users.updateMany( - {}, - { $set: { hobbies: [] } } -); -print("DEBUG: arrayUpdateResult:", JSON.stringify(arrayUpdateResult)); - -var arrayMatchedCount = getLongValue(arrayUpdateResult.matchedCount); -var arrayModifiedCount = getLongValue(arrayUpdateResult.modifiedCount); - -print("DEBUG: Extracted array counts - matched:", arrayMatchedCount, "modified:", arrayModifiedCount); -validate(arrayMatchedCount === userCountBefore, "Array update matched all " + userCountBefore + " users"); -validate(arrayModifiedCount === userCountBefore, "Added hobbies array to all " + userCountBefore + " users"); - -// Verify hobbies field was added -var usersWithHobbiesField = db.users.find({ hobbies: { $exists: true } }).toArray(); -validate(usersWithHobbiesField.length === userCountBefore, "All users now have hobbies field"); - -// Update with array operations -var johnBeforeHobbies = db.users.findOne({ name: "John Doe" }); -print("DEBUG: John before adding hobbies:", JSON.stringify(johnBeforeHobbies)); -validate(johnBeforeHobbies !== null, "John Doe exists before adding hobbies"); -validate(Array.isArray(johnBeforeHobbies.hobbies), "John has hobbies array field"); - -var johnHobbiesResult = db.users.updateOne( - { name: "John Doe" }, - { $push: { hobbies: { $each: ["reading", "gaming", "cooking"] } } } -); -print("DEBUG: johnHobbiesResult:", JSON.stringify(johnHobbiesResult)); - -var johnHobbiesMatched = getLongValue(johnHobbiesResult.matchedCount); -var johnHobbiesModified = getLongValue(johnHobbiesResult.modifiedCount); - -validate(johnHobbiesMatched === 1, "John hobbies update matched 1 document"); -validate(johnHobbiesModified === 1, "Added hobbies to John"); - -var janeHobbiesResult = db.users.updateOne( - { name: "Jane Smith" }, - { $push: { hobbies: { $each: ["traveling", "photography"] } } } -); -print("DEBUG: janeHobbiesResult:", JSON.stringify(janeHobbiesResult)); - -var janeHobbiesMatched = getLongValue(janeHobbiesResult.matchedCount); -var janeHobbiesModified = getLongValue(janeHobbiesResult.modifiedCount); - -validate(janeHobbiesMatched === 1, "Jane hobbies update matched 1 document"); -validate(janeHobbiesModified === 1, "Added hobbies to Jane"); - -var usersWithHobbies = db.users.find({ hobbies: { $exists: true, $ne: [] } }).toArray(); -validate(usersWithHobbies.length === 2, "Found exactly 2 users with hobbies"); - -// Array query operations -var readingUsers = db.users.find({ hobbies: "reading" }).toArray(); -validate(readingUsers.length === 1, "Found exactly 1 user who likes reading"); -validate(readingUsers[0].name === "John Doe", "John Doe likes reading"); - -print("Users with hobbies:", usersWithHobbies.length); -print("Users who like reading:", readingUsers.length); - -// Test 7: Date Operations -print("\n--- Test 7: Date Operations ---"); - -var today = new Date(); -var yesterday = new Date(today.getTime() - 24 * 60 * 60 * 1000); - -var recentOrders = db.orders.find({ date: { $gte: yesterday } }).toArray(); -validate(recentOrders.length === 3, "All 3 orders are recent (created today)"); - -// Date aggregation -var dailyStats = db.orders.aggregate([ - { $group: { - _id: { $dateToString: { format: "%Y-%m-%d", date: "$date" } }, - totalOrders: { $sum: 1 }, - totalAmount: { $sum: "$total" } - }} -]).toArray(); - -validate(dailyStats.length === 1, "Orders grouped into 1 day"); -validate(dailyStats[0].totalOrders === 3, "Total orders for today is 3"); -var expectedTotal = 999.99 + 1399.98 + 59.97; -validate(Math.abs(dailyStats[0].totalAmount - expectedTotal) < 0.01, "Total amount is correct"); - -print("Recent orders:", recentOrders.length); -print("Daily order statistics:", dailyStats); - -// Test 8: Batch Operations -print("\n--- Test 8: Batch Operations ---"); - -// Debug: Check current products before bulk ops -var allProducts = db.products.find().toArray(); -print("DEBUG: All products before bulk ops:", JSON.stringify(allProducts)); - -var electronicsProducts = db.products.find({ category: "Electronics" }).toArray(); -var cheapProducts = db.products.find({ price: { $lt: 100 } }).toArray(); - -print("DEBUG: Electronics products:", electronicsProducts.length); -print("DEBUG: Products < $100:", cheapProducts.length); -print("DEBUG: Expected total matches:", electronicsProducts.length + cheapProducts.length); - -var bulkOps = db.products.initializeUnorderedBulkOp(); -bulkOps.find({ category: "Electronics" }).update({ $inc: { views: 1 } }); -bulkOps.find({ price: { $lt: 100 } }).update({ $set: { featured: true } }); -bulkOps.insert({ name: "New Product", price: 49.99, category: "Test", stock: 10 }); - -var bulkResult = bulkOps.execute(); - -print("DEBUG: Bulk result:", JSON.stringify(bulkResult)); - -// Handle different property names between MongoDB and DocumentDB -var nMatched = bulkResult.nMatched || bulkResult.matchedCount || 0; -var nModified = bulkResult.nModified || bulkResult.modifiedCount || 0; -var nInserted = bulkResult.nInserted || bulkResult.insertedCount || 0; - -print("DEBUG: nMatched:", nMatched, "nModified:", nModified, "nInserted:", nInserted); - -// Use more flexible validation based on actual data -var expectedMatches = electronicsProducts.length + cheapProducts.length; -validate(nMatched >= expectedMatches - 1, "Bulk operations matched at least " + (expectedMatches - 1) + " documents"); // Allow for slight variance -validate(nModified >= expectedMatches - 1, "Bulk operations modified at least " + (expectedMatches - 1) + " documents"); -validate(nInserted === 1, "Bulk operations inserted 1 document"); - -// Verify bulk operations -var electronicsWithViews = db.products.find({ category: "Electronics", views: { $exists: true } }).toArray(); -validate(electronicsWithViews.length === 2, "Both electronics products have views field"); - -var featuredProducts = db.products.find({ featured: true }).toArray(); -validate(featuredProducts.length >= 1, "At least 1 product is featured"); // Book should be featured - -var newProduct = db.products.findOne({ name: "New Product" }); -validate(newProduct !== null, "New product was inserted"); -validate(newProduct.price === 49.99, "New product has correct price"); - -print("Bulk operation results - matched:", nMatched, "modified:", nModified, "inserted:", nInserted); - -// Test 9: Final Verification -print("\n--- Test 9: Final Data Verification ---"); - -var totalUsers = db.users.countDocuments(); -var totalProducts = db.products.countDocuments(); -var totalOrders = db.orders.countDocuments(); - -print("DEBUG: Final counts - Users:", totalUsers, "Products:", totalProducts, "Orders:", totalOrders); - -// Use dynamic validation based on actual counts (4 original + 1 upserted = 5) -var expectedUsers = 5; // 4 original + 1 upserted -var expectedProducts = 5; // 4 original + 1 bulk inserted -var expectedOrders = 3; // 3 original - -validate(totalUsers === expectedUsers, "Final user count is " + expectedUsers + " (4 original + 1 upserted)"); -validate(totalProducts === expectedProducts, "Final product count is " + expectedProducts + " (4 original + 1 bulk inserted)"); -validate(totalOrders === expectedOrders, "Final order count is " + expectedOrders); - -print("Final counts - Users:", totalUsers, "Products:", totalProducts, "Orders:", totalOrders); - -// Test data consistency -var allUsersHaveHobbies = db.users.find({ hobbies: { $exists: false } }).toArray(); -validate(allUsersHaveHobbies.length === 0, "All users have hobbies field"); - -var johnFinal = db.users.findOne({ name: "John Doe" }); -print("DEBUG: John final state:", JSON.stringify(johnFinal)); -validate(johnFinal !== null, "John Doe document exists at end"); -validate(johnFinal.age === 31, "John's age is still 31"); -validate(johnFinal.hobbies && johnFinal.hobbies.includes("reading"), "John still has reading hobby"); - -// Clean up test data -print("\n--- Cleanup ---"); -db.users.drop(); -db.products.drop(); -db.orders.drop(); - -// Verify cleanup -var remainingCollections = db.getCollectionNames(); -validate(!remainingCollections.includes("users"), "Users collection dropped"); -validate(!remainingCollections.includes("products"), "Products collection dropped"); -validate(!remainingCollections.includes("orders"), "Orders collection dropped"); - -print("\n=== All Tests Completed Successfully with Validation! ==="); diff --git a/operator/src/scripts/test-scripts/mongo-python-data-pusher.py b/operator/src/scripts/test-scripts/mongo-python-data-pusher.py deleted file mode 100644 index 35f53917..00000000 --- a/operator/src/scripts/test-scripts/mongo-python-data-pusher.py +++ /dev/null @@ -1,41 +0,0 @@ -from pymongo import MongoClient -from pprint import pprint -import ssl - -# Connection parameters -host = "127.0.0.1" # Use localhost for local testing or replace with the actual load balancer endpoint -port = 10260 -username = "default_user" -password = "Admin100" # Default is Admin100 -auth_db = "admin" # Default auth source unless otherwise needed - -# Connect with TLS and skip cert validation -client = MongoClient( - host, - port, - username=username, - password=password, - authSource=auth_db, - authMechanism="SCRAM-SHA-256", - tls=True, - tlsAllowInvalidCertificates=True -) - -# Use the database -club_db = client["soccer_league"] - -# Insert a soccer club document -insert_result = club_db.clubs.insert_one({ - "name": "Manchester United", - "country": "England", - "founded": 1878, - "stadium": "Old Trafford", - "league": "Premier League", - "titles": ["Premier League", "FA Cup", "Champions League"] -}) - -print(f"Inserted soccer club document ID: {insert_result.inserted_id}") - -# Find all soccer clubs -for doc in club_db.clubs.find(): - pprint(doc) diff --git a/operator/src/scripts/test-scripts/performance_test.js b/operator/src/scripts/test-scripts/performance_test.js deleted file mode 100644 index 94474493..00000000 --- a/operator/src/scripts/test-scripts/performance_test.js +++ /dev/null @@ -1,222 +0,0 @@ -// Performance Test Suite with Validation -print("=== Performance Test Suite with Validation ==="); - -// Validation helper function -function validate(condition, message) { - if (!condition) { - print("DEBUG: Performance validation failed for: " + message); - print("DEBUG: Condition was:", condition); - throw new Error("PERFORMANCE VALIDATION FAILED: " + message); - } - print("✓ " + message); -} - -// Helper function to handle Long objects returned by some MongoDB drivers -function getLongValue(val) { - if (typeof val === 'object' && val !== null && 'low' in val) { - return val.low; // Extract the actual number from Long object - } - return val; -} - -db = db.getSiblingDB('perftest'); - -// Large dataset insertion test -print("\n--- Large Dataset Insertion Test ---"); -var startTime = new Date(); -var docs = []; -for (let i = 0; i < 1000; i++) { - docs.push({ - id: i, - name: "User " + i, - email: "user" + i + "@example.com", - data: "This is sample data for user " + i, - timestamp: new Date(), - metadata: { - source: "performance_test", - batch: Math.floor(i / 100), - random: Math.random() - } - }); -} - -validate(docs.length === 1000, "Created exactly 1000 test documents"); - -var insertStart = new Date(); -var result = db.perfcollection.insertMany(docs); -var insertEnd = new Date(); - -// Debug the insert result -print("DEBUG: performance insertMany result:", JSON.stringify(result)); - -// Helper function to get insertedIds count (handles both array and object formats) -function getInsertedCount(result) { - if (result.insertedIds) { - if (Array.isArray(result.insertedIds)) { - return result.insertedIds.length; - } else if (typeof result.insertedIds === 'object') { - return Object.keys(result.insertedIds).length; - } - } - return 0; -} - -var insertTime = insertEnd - insertStart; -validate(result.acknowledged === true, "Insertion was acknowledged"); -validate(getInsertedCount(result) === 1000, "Inserted exactly 1000 documents"); -validate(insertTime < 10000, "Insertion completed within 10 seconds (took " + insertTime + "ms)"); - -print("Inserted", getInsertedCount(result), "documents in", insertTime, "ms"); - -// Query performance test -print("\n--- Query Performance Test ---"); - -var queryStart = new Date(); -var count = db.perfcollection.countDocuments(); -var queryEnd = new Date(); - -var countTime = queryEnd - queryStart; -validate(count === 1000, "Count query returned correct result: 1000"); -validate(countTime < 5000, "Count query completed within 5 seconds (took " + countTime + "ms)"); - -print("Count query took", countTime, "ms, result:", count); - -// Range query performance test -print("\n--- Range Query Performance Test ---"); - -var queryStart2 = new Date(); -var rangeResults = db.perfcollection.find({ id: { $gte: 500 } }).toArray(); -var queryEnd2 = new Date(); - -var rangeTime = queryEnd2 - queryStart2; -validate(rangeResults.length === 500, "Range query returned exactly 500 documents"); -validate(rangeTime < 5000, "Range query completed within 5 seconds (took " + rangeTime + "ms)"); - -// Validate range query results -var minId = Math.min(...rangeResults.map(r => r.id)); -var maxId = Math.max(...rangeResults.map(r => r.id)); -validate(minId === 500, "Minimum ID in range results is 500"); -validate(maxId === 999, "Maximum ID in range results is 999"); - -print("Range query found", rangeResults.length, "documents in", rangeTime, "ms"); - -// Aggregation performance -print("\n--- Aggregation Performance Test ---"); - -var aggStart = new Date(); -var aggResult = db.perfcollection.aggregate([ - { $match: { id: { $gte: 100 } } }, - { $group: { _id: "$metadata.batch", count: { $sum: 1 }, avgId: { $avg: "$id" } } }, - { $sort: { _id: 1 } } -]).toArray(); -var aggEnd = new Date(); - -var aggTime = aggEnd - aggStart; -validate(aggResult.length === 9, "Aggregation returned 9 batches (batches 1-9)"); // 100-999 = batches 1-9 -validate(aggTime < 5000, "Aggregation completed within 5 seconds (took " + aggTime + "ms)"); - -// Validate aggregation results -var totalDocs = aggResult.reduce((sum, batch) => sum + batch.count, 0); -validate(totalDocs === 900, "Aggregation processed exactly 900 documents (id >= 100)"); - -// Check specific batch -var batch5 = aggResult.find(r => r._id === 5); -validate(batch5 && batch5.count === 100, "Batch 5 has exactly 100 documents"); -validate(batch5 && Math.abs(batch5.avgId - 549.5) < 0.1, "Batch 5 average ID is correct (~549.5)"); - -print("Aggregation processed", aggResult.length, "groups in", aggTime, "ms"); - -// Test sorting performance -print("\n--- Sorting Performance Test ---"); - -var sortStart = new Date(); -var sortedResults = db.perfcollection.find({ id: { $lt: 100 } }).sort({ id: -1 }).toArray(); -var sortEnd = new Date(); - -var sortTime = sortEnd - sortStart; -validate(sortedResults.length === 100, "Sort query returned exactly 100 documents"); -validate(sortTime < 3000, "Sort query completed within 3 seconds (took " + sortTime + "ms)"); - -// Validate sorting -validate(sortedResults[0].id === 99, "First document has ID 99 (descending sort)"); -validate(sortedResults[99].id === 0, "Last document has ID 0 (descending sort)"); - -for (let i = 0; i < sortedResults.length - 1; i++) { - validate(sortedResults[i].id > sortedResults[i + 1].id, "Documents are sorted in descending order"); -} - -print("Sort query processed", sortedResults.length, "documents in", sortTime, "ms"); - -// Test update performance -print("\n--- Update Performance Test ---"); - -var updateStart = new Date(); -var updateResult = db.perfcollection.updateMany( - { "metadata.batch": { $in: [0, 1, 2] } }, - { $set: { updated: true, updateTime: new Date() } } -); -var updateEnd = new Date(); - -var updateTime = updateEnd - updateStart; - -var perfUpdateMatchedCount = getLongValue(updateResult.matchedCount); -var perfUpdateModifiedCount = getLongValue(updateResult.modifiedCount); - -validate(perfUpdateMatchedCount === 300, "Update matched exactly 300 documents (3 batches × 100)"); -validate(perfUpdateModifiedCount === 300, "Update modified exactly 300 documents"); -validate(updateTime < 3000, "Update completed within 3 seconds (took " + updateTime + "ms)"); - -// Verify updates -var updatedDocs = db.perfcollection.find({ updated: true }).toArray(); -validate(updatedDocs.length === 300, "Found exactly 300 updated documents"); -validate(updatedDocs.every(doc => doc.updateTime !== undefined), "All updated docs have updateTime"); - -print("Update modified", perfUpdateModifiedCount, "documents in", updateTime, "ms"); - -// Test delete performance -print("\n--- Delete Performance Test ---"); - -var deleteStart = new Date(); -var deleteResult = db.perfcollection.deleteMany({ id: { $gte: 950 } }); -var deleteEnd = new Date(); - -var deleteTime = deleteEnd - deleteStart; - -var perfDeletedCount = getLongValue(deleteResult.deletedCount); - -validate(perfDeletedCount === 50, "Deleted exactly 50 documents (IDs 950-999)"); -validate(deleteTime < 2000, "Delete completed within 2 seconds (took " + deleteTime + "ms)"); - -// Verify deletions -var remainingCount = db.perfcollection.countDocuments(); -validate(remainingCount === 950, "Exactly 950 documents remain after deletion"); - -var deletedDocs = db.perfcollection.find({ id: { $gte: 950 } }).toArray(); -validate(deletedDocs.length === 0, "No documents with ID >= 950 remain"); - -print("Delete removed", perfDeletedCount, "documents in", deleteTime, "ms"); - -// Overall performance summary -print("\n--- Performance Summary ---"); -var totalTime = new Date() - startTime; -validate(totalTime < 30000, "All performance tests completed within 30 seconds (took " + totalTime + "ms)"); - -print("Total performance test time:", totalTime, "ms"); -print("Insert rate:", Math.round(1000 / (insertTime / 1000)), "docs/sec"); -print("Query rate:", Math.round(1000 / (countTime / 1000)), "queries/sec"); -print("Update rate:", Math.round(300 / (updateTime / 1000)), "updates/sec"); -print("Delete rate:", Math.round(50 / (deleteTime / 1000)), "deletes/sec"); - -// Cleanup with validation -var dropStart = new Date(); -db.perfcollection.drop(); -var dropEnd = new Date(); - -var dropTime = dropEnd - dropStart; -validate(dropTime < 2000, "Collection drop completed within 2 seconds (took " + dropTime + "ms)"); - -// Verify cleanup -var collections = db.getCollectionNames(); -validate(!collections.includes("perfcollection"), "Performance collection was dropped"); - -print("\n=== Performance Tests Completed Successfully with Validation! ==="); diff --git a/operator/src/scripts/test-scripts/test-mongodb-connection.sh b/operator/src/scripts/test-scripts/test-mongodb-connection.sh deleted file mode 100644 index 47bf75b7..00000000 --- a/operator/src/scripts/test-scripts/test-mongodb-connection.sh +++ /dev/null @@ -1,435 +0,0 @@ -#!/bin/bash - -# MongoDB Connection Test Script -# Tests MongoDB connection using mongosh with comprehensive validation - -set -e - -# Default values -ARCHITECTURE="" -NAMESPACE="" -CLUSTER_NAME="" -POD_NAME="" -PORT="" -USERNAME="" -PASSWORD="" -TEST_TYPE="comprehensive" - -# Function to display usage -usage() { - echo "Usage: $0 [OPTIONS]" - echo "Options:" - echo " --architecture ARCH Target architecture for logging" - echo " --namespace NS Kubernetes namespace" - echo " --cluster-name NAME DocumentDB cluster name" - echo " --pod-name NAME Pod name (optional, defaults to CLUSTER_NAME-1)" - echo " --port PORT Port to forward and connect to" - echo " --username USER MongoDB username" - echo " --password PASS MongoDB password" - echo " --test-type TYPE Test type (basic, comprehensive)" - echo " --help Show this help" - exit 1 -} - -# Parse command line arguments -while [[ $# -gt 0 ]]; do - case $1 in - --architecture) - ARCHITECTURE="$2" - shift 2 - ;; - --namespace) - NAMESPACE="$2" - shift 2 - ;; - --cluster-name) - CLUSTER_NAME="$2" - shift 2 - ;; - --pod-name) - POD_NAME="$2" - shift 2 - ;; - --port) - PORT="$2" - shift 2 - ;; - --username) - USERNAME="$2" - shift 2 - ;; - --password) - PASSWORD="$2" - shift 2 - ;; - --test-type) - TEST_TYPE="$2" - shift 2 - ;; - --help) - usage - ;; - *) - echo "Unknown option: $1" - usage - ;; - esac -done - -# Validate required parameters -if [[ -z "$ARCHITECTURE" || -z "$NAMESPACE" || -z "$CLUSTER_NAME" || -z "$PORT" || -z "$USERNAME" || -z "$PASSWORD" ]]; then - echo "Error: Missing required parameters" - usage -fi - -# Set default pod name if not provided -if [[ -z "$POD_NAME" ]]; then - POD_NAME="${CLUSTER_NAME}-1" -fi - -echo "Testing connection with mongosh on $ARCHITECTURE architecture..." -echo "Using pod: $POD_NAME" -echo "Port: $PORT" -echo "Test type: $TEST_TYPE" - -# Function to setup port forwarding with retry logic -setup_port_forward() { - local max_attempts=3 - local attempt=1 - - while [ $attempt -le $max_attempts ]; do - echo "Port forward setup attempt $attempt/$max_attempts..." - - # Start port-forward in background - kubectl port-forward pod/$POD_NAME $PORT:$PORT -n $NAMESPACE > /tmp/mongosh_pf.log 2>&1 & - PF_PID=$! - echo $PF_PID > /tmp/mongosh_pf.pid - - # Wait for port-forward to establish - echo "Waiting for port-forward to establish..." - sleep 10 - - # Check if port-forward process is still running - if ! kill -0 $PF_PID 2>/dev/null; then - echo "❌ Port-forward process died (attempt $attempt)" - if [ -f /tmp/mongosh_pf.log ]; then - echo "Port-forward output:" - cat /tmp/mongosh_pf.log - fi - ((attempt++)) - sleep 5 - continue - fi - - # Test connection - echo "Testing port-forward connection..." - timeout 60 bash -c " - until nc -z 127.0.0.1 $PORT; do - echo 'Waiting for port-forward to be ready...' - sleep 2 - done - " && { - echo "✓ Port-forward established successfully" - return 0 - } - - echo "❌ Port-forward connection test failed (attempt $attempt)" - kill $PF_PID 2>/dev/null || true - ((attempt++)) - sleep 5 - done - - echo "❌ Failed to establish port-forward after $max_attempts attempts" - return 1 -} - -# Function to cleanup port forwarding -cleanup_port_forward() { - if [ -f /tmp/mongosh_pf.pid ]; then - PF_PID=$(cat /tmp/mongosh_pf.pid) - kill $PF_PID 2>/dev/null || true - rm -f /tmp/mongosh_pf.pid - fi - rm -f /tmp/mongosh_pf.log -} - -# Setup port forwarding -if ! setup_port_forward; then - echo "❌ Failed to setup port forwarding" - exit 1 -fi - -echo "Port-forward is ready, creating mongosh test script..." - -# Create comprehensive test script -cat > /tmp/test_mongosh.js << 'MONGOSH_SCRIPT' -// Comprehensive MongoDB Connection Test Script -print("=== Starting MongoDB Connection Test ==="); -print("Connected to DocumentDB!"); - -// Switch to test database -db = db.getSiblingDB('mongosh_test_db'); -print("Using database: mongosh_test_db"); - -// Test 1: Basic Connection and Database Operations -print("\n=== Test 1: Basic Connection and Database Operations ==="); - -// Drop collection if it exists (cleanup from previous runs) -db.test_collection.drop(); - -// Create collection and insert test data -print("Creating collection and inserting test data..."); -db.createCollection("test_collection"); - -var testData = [ - { name: "Alice", age: 30, department: "Engineering", salary: 75000 }, - { name: "Bob", age: 25, department: "Marketing", salary: 55000 }, - { name: "Charlie", age: 35, department: "Sales", salary: 65000 }, - { name: "Diana", age: 28, department: "Engineering", salary: 70000 }, - { name: "Eve", age: 32, department: "Marketing", salary: 60000 } -]; - -var insertResult = db.test_collection.insertMany(testData); -print("Inserted documents:", Object.keys(insertResult.insertedIds).length); - -// Validate insertion -var insertedCount = Object.keys(insertResult.insertedIds).length; -if (insertedCount !== 5) { - throw new Error("Expected 5 inserted documents, got " + insertedCount); -} -print("✓ Insertion validation passed"); - -// Test 2: Query Operations -print("\n=== Test 2: Query Operations ==="); - -// Count documents -var totalDocs = db.test_collection.countDocuments({}); -print("Total documents:", totalDocs); -if (totalDocs !== 5) { - throw new Error("Expected 5 total documents, found " + totalDocs); -} -print("✓ Document count validation passed"); - -// Query with filters -var engineers = db.test_collection.find({ department: "Engineering" }).toArray(); -print("Engineers found:", engineers.length); -if (engineers.length !== 2) { - throw new Error("Expected 2 engineers, found " + engineers.length); -} -print("✓ Department filter validation passed"); - -// Range query -var youngEmployees = db.test_collection.find({ age: { $lt: 30 } }).toArray(); -print("Employees under 30:", youngEmployees.length); -if (youngEmployees.length !== 2) { - throw new Error("Expected 2 employees under 30, found " + youngEmployees.length); -} -print("✓ Range query validation passed"); - -// Test 3: Aggregation Operations -print("\n=== Test 3: Aggregation Operations ==="); - -// Average age calculation -var avgAgeResult = db.test_collection.aggregate([ - { $group: { _id: null, avgAge: { $avg: "$age" }, count: { $sum: 1 } } } -]).toArray(); - -var avgAge = avgAgeResult[0].avgAge; -var expectedAvgAge = (30 + 25 + 35 + 28 + 32) / 5; // 30 -print("Average age:", avgAge, "Expected:", expectedAvgAge); - -if (Math.abs(avgAge - expectedAvgAge) > 0.01) { - throw new Error("Expected average age " + expectedAvgAge + ", got " + avgAge); -} -print("✓ Aggregation validation passed"); - -// Group by department -var deptStats = db.test_collection.aggregate([ - { $group: { - _id: "$department", - count: { $sum: 1 }, - avgSalary: { $avg: "$salary" }, - maxSalary: { $max: "$salary" } - }}, - { $sort: { _id: 1 } } -]).toArray(); - -print("Department statistics:", JSON.stringify(deptStats)); -if (deptStats.length !== 3) { - throw new Error("Expected 3 departments, found " + deptStats.length); -} -print("✓ Department grouping validation passed"); - -// Test 4: Update Operations -print("\n=== Test 4: Update Operations ==="); - -// Helper function to handle Long objects -function getLongValue(val) { - if (typeof val === 'object' && val !== null && 'low' in val) { - return val.low; // Extract the actual number from Long object - } - return val; -} - -// Update single document -var updateResult = db.test_collection.updateOne( - { name: "Alice" }, - { $set: { title: "Senior Engineer", lastModified: new Date() } } -); - -var modifiedCount = getLongValue(updateResult.modifiedCount); -var matchedCount = getLongValue(updateResult.matchedCount); - -print("Update result - Modified:", modifiedCount, "Matched:", matchedCount); -if (modifiedCount !== 1 || matchedCount !== 1) { - throw new Error("Expected 1 modified and 1 matched document, got modified=" + modifiedCount + ", matched=" + matchedCount); -} -print("✓ Single update validation passed"); - -// Verify update content -var aliceUpdated = db.test_collection.findOne({ name: "Alice" }); -if (!aliceUpdated.title || aliceUpdated.title !== "Senior Engineer") { - throw new Error("Alice title update validation failed: " + JSON.stringify(aliceUpdated)); -} -print("✓ Update content validation passed"); - -// Bulk update -var bulkUpdateResult = db.test_collection.updateMany( - { salary: { $lt: 60000 } }, - { $inc: { salary: 5000 }, $set: { salaryAdjusted: true } } -); - -var bulkModifiedCount = getLongValue(bulkUpdateResult.modifiedCount); -print("Bulk update result - Modified:", bulkModifiedCount); -if (bulkModifiedCount !== 1) { // Only Bob should match - throw new Error("Expected 1 document to be updated in bulk operation, got " + bulkModifiedCount); -} -print("✓ Bulk update validation passed"); - -// Test 5: Sorting and Limiting -print("\n=== Test 5: Sorting and Limiting Operations ==="); - -// Sort by age ascending -var sortedByAge = db.test_collection.find().sort({ age: 1 }).toArray(); -var ages = sortedByAge.map(doc => doc.age); -print("Ages in ascending order:", ages); - -// Verify sorting -for (var i = 1; i < ages.length; i++) { - if (ages[i] < ages[i-1]) { - throw new Error("Sorting validation failed: ages not in ascending order"); - } -} -print("✓ Sorting validation passed"); - -// Test limit and skip -var limitedResults = db.test_collection.find().sort({ age: 1 }).limit(2).toArray(); -if (limitedResults.length !== 2) { - throw new Error("Expected 2 documents with limit, got " + limitedResults.length); -} -print("✓ Limit operation validation passed"); - -var skippedResults = db.test_collection.find().sort({ age: 1 }).skip(2).limit(2).toArray(); -if (skippedResults.length !== 2) { - throw new Error("Expected 2 documents with skip+limit, got " + skippedResults.length); -} -print("✓ Skip operation validation passed"); - -// Test 7: Complex Aggregation Pipeline -print("\n=== Test 7: Complex Aggregation Pipeline ==="); - -var complexPipeline = [ - { $match: { age: { $gte: 25 } } }, - { $group: { - _id: "$department", - avgAge: { $avg: "$age" }, - totalSalary: { $sum: "$salary" }, - employees: { $push: "$name" } - }}, - { $project: { - department: "$_id", - avgAge: { $round: ["$avgAge", 1] }, - totalSalary: 1, - employeeCount: { $size: "$employees" }, - employees: 1 - }}, - { $sort: { totalSalary: -1 } } -]; - -var complexResult = db.test_collection.aggregate(complexPipeline).toArray(); -print("Complex aggregation result:", JSON.stringify(complexResult, null, 2)); - -if (complexResult.length === 0) { - throw new Error("Complex aggregation returned no results"); -} -print("✓ Complex aggregation validation passed"); - -// Test 8: Delete Operations -print("\n=== Test 8: Delete Operations ==="); - -// Insert a temporary document for deletion test -var tempInsert = db.test_collection.insertOne({ name: "Temp", age: 99, department: "Temp", temporary: true }); -print("Temporary document inserted:", tempInsert.insertedId); - -// Delete the temporary document -var deleteResult = db.test_collection.deleteOne({ temporary: true }); -print("Delete result - Deleted count:", deleteResult.deletedCount); - -if (deleteResult.deletedCount !== 1) { - throw new Error("Expected 1 document to be deleted, got " + deleteResult.deletedCount); -} -print("✓ Delete operation validation passed"); - -// Verify document was deleted -var tempDoc = db.test_collection.findOne({ temporary: true }); -if (tempDoc !== null) { - throw new Error("Temporary document was not properly deleted"); -} -print("✓ Delete verification passed"); - -// Final validation - ensure we still have our original data -var finalCount = db.test_collection.countDocuments({}); -if (finalCount !== 5) { - throw new Error("Expected 5 documents after cleanup, found " + finalCount); -} -print("✓ Final document count validation passed"); - -// Test Summary -print("\n=== Test Summary ==="); -print("✓ All mongosh tests completed successfully!"); -print("✓ Basic connection: PASSED"); -print("✓ Query operations: PASSED"); -print("✓ Aggregation operations: PASSED"); -print("✓ Update operations: PASSED"); -print("✓ Sorting and limiting: PASSED"); -print("✓ Complex aggregation: PASSED"); -print("✓ Delete operations: PASSED"); -print("✓ Data integrity: VERIFIED"); - -print("\nMongoDB connection test completed successfully!"); -MONGOSH_SCRIPT - -echo "Running mongosh validation tests..." - -# Run the comprehensive test script -if mongosh 127.0.0.1:$PORT \ - -u "$USERNAME" \ - -p "$PASSWORD" \ - --authenticationMechanism SCRAM-SHA-256 \ - --tls \ - --tlsAllowInvalidCertificates \ - --file /tmp/test_mongosh.js; then - echo "✓ Mongosh validation tests completed successfully on $ARCHITECTURE" -else - echo "❌ Mongosh validation tests failed on $ARCHITECTURE" - echo "=== Port-forward logs ===" - cat /tmp/mongosh_pf.log 2>/dev/null || echo "No port-forward logs available" - cleanup_port_forward - exit 1 -fi - -# Cleanup -cleanup_port_forward -rm -f /tmp/test_mongosh.js - -echo "✓ MongoDB connection test completed successfully on $ARCHITECTURE" diff --git a/operator/src/scripts/test-scripts/test-python-pymongo.sh b/operator/src/scripts/test-scripts/test-python-pymongo.sh deleted file mode 100755 index cb10d003..00000000 --- a/operator/src/scripts/test-scripts/test-python-pymongo.sh +++ /dev/null @@ -1,317 +0,0 @@ -#!/bin/bash - -# Python PyMongo Integration Test Script -# Tests MongoDB connection using PyMongo with comprehensive validation - -set -e - -# Default values -ARCHITECTURE="" -NAMESPACE="" -CLUSTER_NAME="" -POD_NAME="" -PORT="" -USERNAME="" -PASSWORD="" - -# Function to display usage -usage() { - echo "Usage: $0 [OPTIONS]" - echo "Options:" - echo " --architecture ARCH Target architecture for logging" - echo " --namespace NS Kubernetes namespace" - echo " --cluster-name NAME DocumentDB cluster name" - echo " --pod-name NAME Pod name (optional, defaults to CLUSTER_NAME-1)" - echo " --port PORT Port to forward and connect to" - echo " --username USER MongoDB username" - echo " --password PASS MongoDB password" - echo " --help Show this help" - exit 1 -} - -# Parse command line arguments -while [[ $# -gt 0 ]]; do - case $1 in - --architecture) - ARCHITECTURE="$2" - shift 2 - ;; - --namespace) - NAMESPACE="$2" - shift 2 - ;; - --cluster-name) - CLUSTER_NAME="$2" - shift 2 - ;; - --pod-name) - POD_NAME="$2" - shift 2 - ;; - --port) - PORT="$2" - shift 2 - ;; - --username) - USERNAME="$2" - shift 2 - ;; - --password) - PASSWORD="$2" - shift 2 - ;; - --help) - usage - ;; - *) - echo "Unknown option: $1" - usage - ;; - esac -done - -# Validate required parameters -if [[ -z "$ARCHITECTURE" || -z "$NAMESPACE" || -z "$CLUSTER_NAME" || -z "$PORT" || -z "$USERNAME" || -z "$PASSWORD" ]]; then - echo "Error: Missing required parameters" - usage -fi - -# Set default pod name if not provided -if [[ -z "$POD_NAME" ]]; then - POD_NAME="${CLUSTER_NAME}-1" -fi - -echo "Testing with Python PyMongo client on $ARCHITECTURE architecture..." -echo "Using pod: $POD_NAME" -echo "Port: $PORT" - -# Function to setup port forwarding with retry logic -setup_port_forward() { - local max_attempts=3 - local attempt=1 - - while [ $attempt -le $max_attempts ]; do - echo "Attempt $attempt: Setting up port forwarding to pod $POD_NAME in namespace $NAMESPACE..." - - # Start port forward in background - kubectl port-forward "pod/$POD_NAME" "$PORT:$PORT" -n "$NAMESPACE" & - PF_PID=$! - - # Give it some time to start - sleep 5 - - # Check if port forward is working by testing the connection - if timeout 30 bash -c "until nc -z 127.0.0.1 $PORT; do echo 'Waiting for port-forward...'; sleep 2; done"; then - echo "✓ Port forwarding established successfully on attempt $attempt" - return 0 - else - echo "❌ Port forwarding failed on attempt $attempt" - kill $PF_PID 2>/dev/null || true - sleep 2 - fi - - ((attempt++)) - done - - echo "❌ Failed to establish port forwarding after $max_attempts attempts" - return 1 -} - -# Function to cleanup port forwarding -cleanup_port_forward() { - if [[ -n "$PF_PID" ]]; then - echo "Cleaning up port forwarding (PID: $PF_PID)..." - kill $PF_PID 2>/dev/null || true - wait $PF_PID 2>/dev/null || true - PF_PID="" - fi -} - -# Set up cleanup trap -trap cleanup_port_forward EXIT - -# Install Python dependencies -echo "Installing Python dependencies..." -pip install pymongo - -# Setup port forwarding -if ! setup_port_forward; then - echo "Failed to setup port forwarding" - exit 1 -fi - -# Test connection and ensure port-forward is ready -echo "Verifying port-forward is ready..." -timeout 60 bash -c " -until nc -z 127.0.0.1 $PORT; do - echo 'Waiting for port-forward to be ready...' - sleep 2 -done -" - -echo "Port-forward is ready, running Python tests..." - -# Get the directory where this script is located -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - -# Run the existing Python test script and validate it completes successfully -echo "Running existing Python test script on $ARCHITECTURE..." -echo "Using Python script: $SCRIPT_DIR/mongo-python-data-pusher.py" -if python3 "$SCRIPT_DIR/mongo-python-data-pusher.py"; then - echo "✓ Existing Python test script completed successfully on $ARCHITECTURE" -else - echo "❌ Existing Python test script failed on $ARCHITECTURE" - exit 1 -fi - -# Create and run comprehensive additional Python tests -cat > additional_test.py << EOF -from pymongo import MongoClient -import ssl -import sys - -def validate_test(condition, message): - if not condition: - print(f"❌ VALIDATION FAILED: {message}") - sys.exit(1) - print(f"✓ {message}") - -# Connection parameters -client = MongoClient( - "127.0.0.1", - $PORT, - username="$USERNAME", - password="$PASSWORD", - authSource="admin", - authMechanism="SCRAM-SHA-256", - tls=True, - tlsAllowInvalidCertificates=True -) - -# Test database operations -test_db = client["integration_test"] - -# Test collection operations -collection = test_db["test_collection"] - -# Clear any existing data -collection.drop() - -# Insert test data and validate -docs = [ - {"type": "integration_test", "value": i, "status": "active"} - for i in range(10) -] -result = collection.insert_many(docs) -print(f"Inserted {len(result.inserted_ids)} documents") - -# Validate insertion -validate_test(len(result.inserted_ids) == 10, "Inserted exactly 10 documents") -validate_test(all(isinstance(id, object) for id in result.inserted_ids), "All inserted IDs are valid ObjectIds") - -# Test queries and validate results -count = collection.count_documents({"status": "active"}) -print(f"Found {count} active documents") -validate_test(count == 10, "Found exactly 10 active documents") - -# Test specific value queries -value_5_docs = list(collection.find({"value": 5})) -validate_test(len(value_5_docs) == 1, "Found exactly 1 document with value 5") -validate_test(value_5_docs[0]["value"] == 5, "Document with value 5 has correct value") -validate_test(value_5_docs[0]["status"] == "active", "Document with value 5 has correct status") -validate_test(value_5_docs[0]["type"] == "integration_test", "Document with value 5 has correct type") - -# Test range queries -high_value_docs = list(collection.find({"value": {"\$gte": 7}})) -validate_test(len(high_value_docs) == 3, "Found exactly 3 documents with value >= 7") -expected_values = {7, 8, 9} -found_values = {doc["value"] for doc in high_value_docs} -validate_test(found_values == expected_values, f"High value documents have correct values: {found_values}") - -# Test aggregation and validate results -pipeline = [ - {"\$match": {"status": "active"}}, - {"\$group": {"_id": "\$status", "total": {"\$sum": "\$value"}, "count": {"\$sum": 1}}} -] -agg_result = list(collection.aggregate(pipeline)) -print(f"Aggregation result: {agg_result}") - -validate_test(len(agg_result) == 1, "Aggregation returned exactly 1 group") -validate_test(agg_result[0]["_id"] == "active", "Aggregation grouped by 'active' status") -expected_total = sum(range(10)) # 0+1+2+...+9 = 45 -validate_test(agg_result[0]["total"] == expected_total, f"Aggregation total is correct: {expected_total}") -validate_test(agg_result[0]["count"] == 10, "Aggregation count is correct: 10") - -# Test update operations -update_result = collection.update_many( - {"value": {"\$lt": 5}}, - {"\$set": {"status": "updated"}} -) -validate_test(update_result.modified_count == 5, f"Updated exactly 5 documents (got {update_result.modified_count})") - -# Validate update results -updated_docs = list(collection.find({"status": "updated"})) -validate_test(len(updated_docs) == 5, "Found exactly 5 updated documents") -updated_values = {doc["value"] for doc in updated_docs} -expected_updated_values = {0, 1, 2, 3, 4} -validate_test(updated_values == expected_updated_values, f"Updated documents have correct values: {updated_values}") - -# Test that non-updated documents are unchanged -active_docs = list(collection.find({"status": "active"})) -validate_test(len(active_docs) == 5, "Found exactly 5 still-active documents") -active_values = {doc["value"] for doc in active_docs} -expected_active_values = {5, 6, 7, 8, 9} -validate_test(active_values == expected_active_values, f"Active documents have correct values: {active_values}") - -# Test sorting -sorted_docs = list(collection.find().sort("value", -1)) # Descending order -validate_test(len(sorted_docs) == 10, "Sorted query returned all 10 documents") -sorted_values = [doc["value"] for doc in sorted_docs] -expected_sorted = list(range(9, -1, -1)) # [9, 8, 7, 6, 5, 4, 3, 2, 1, 0] -validate_test(sorted_values == expected_sorted, f"Documents sorted correctly: {sorted_values}") - -# Test complex aggregation with multiple stages -complex_pipeline = [ - {"\$match": {"value": {"\$gte": 3}}}, - {"\$group": {"_id": "\$status", "avg_value": {"\$avg": "\$value"}, "max_value": {"\$max": "\$value"}}}, - {"\$sort": {"_id": 1}} -] -complex_result = list(collection.aggregate(complex_pipeline)) -print(f"Complex aggregation result: {complex_result}") - -# Validate complex aggregation -validate_test(len(complex_result) == 2, "Complex aggregation returned 2 groups (active and updated)") - -# Find the results for each status -active_result = next((r for r in complex_result if r["_id"] == "active"), None) -updated_result = next((r for r in complex_result if r["_id"] == "updated"), None) - -validate_test(active_result is not None, "Found active group in complex aggregation") -validate_test(updated_result is not None, "Found updated group in complex aggregation") - -# For active status: values 5,6,7,8,9 -> avg = 7, max = 9 -validate_test(abs(active_result["avg_value"] - 7.0) < 0.001, f"Active group avg_value is correct: {active_result['avg_value']}") -validate_test(active_result["max_value"] == 9, f"Active group max_value is correct: {active_result['max_value']}") - -# For updated status: values 3,4 (only those >= 3) -> avg = 3.5, max = 4 -validate_test(abs(updated_result["avg_value"] - 3.5) < 0.001, f"Updated group avg_value is correct: {updated_result['avg_value']}") -validate_test(updated_result["max_value"] == 4, f"Updated group max_value is correct: {updated_result['max_value']}") - -print("All Python integration tests passed with validation!") -print(f"Test completed successfully on architecture: {sys.platform}") - -client.close() -EOF - -echo "Running Python validation tests on $ARCHITECTURE..." -if python3 additional_test.py; then - echo "✓ Python validation tests completed successfully on $ARCHITECTURE" -else - echo "❌ Python validation tests failed on $ARCHITECTURE" - exit 1 -fi - -# Cleanup temporary test file -rm -f additional_test.py - -echo "✅ All Python PyMongo tests completed successfully!" diff --git a/test/e2e/README.md b/test/e2e/README.md new file mode 100644 index 00000000..5c4365f6 --- /dev/null +++ b/test/e2e/README.md @@ -0,0 +1,262 @@ +# DocumentDB Operator E2E Test Suite + +## What this is + +A unified Go / Ginkgo v2 / Gomega end-to-end test suite that drives the +DocumentDB Kubernetes Operator against a real cluster. It replaces the four +legacy GitHub Actions workflows (`test-integration.yml`, `test-E2E.yml`, +`test-backup-and-restore.yml`, `test-upgrade-and-rollback.yml`) and their +bash / JavaScript (mongosh) / Python (pymongo) glue with a single Go module +at `test/e2e/`. Specs are organised by CRD operation (lifecycle, scale, data, +performance, backup, tls, feature gates, exposure, status, upgrade), reuse +CloudNative-PG's `tests/utils` packages as a library, and speak the Mongo +wire protocol via `go.mongodb.org/mongo-driver/v2`. Design rationale and +scope: [`docs/designs/e2e-test-suite.md`](../../docs/designs/e2e-test-suite.md). + +## Prereqs + +| Tool | Version | Notes | +|---|---|---| +| Go | 1.25.x (match `test/e2e/go.mod` — currently `go 1.25.8`) | Separate module from the operator | +| Docker | any recent | Required for kind | +| kind | any recent | Local Kubernetes | +| kubectl | matching cluster | | +| helm | 3.x | Operator install | +| `ginkgo` CLI | v2 | `go install github.com/onsi/ginkgo/v2/ginkgo@latest` | + +The suite itself installs no cluster components — it expects an already-running +cluster with the operator deployed. Backup specs additionally need the CSI +snapshot CRDs; TLS cert-manager specs need cert-manager. Both gate with a +runtime probe and `Skip()` rather than fail when the dependency is missing. + +## Quick start + +From the repository root: + +```bash +# 1. Build images + bring up a kind cluster + install the operator + CRDs. +# The script in scripts/development/deploy.sh drives `make deploy` and the +# same composite action (.github/actions/setup-test-environment) CI uses. +cd operator/src +DEPLOY=true DEPLOY_CLUSTER=true ./scripts/development/deploy.sh +cd - + +# 2. Run the smoke label against that cluster. +cd test/e2e +ginkgo -r --label-filter=smoke ./tests/... +``` + +Run a single area: + +```bash +ginkgo -r --label-filter=lifecycle ./tests/... +ginkgo -r --label-filter='data && level:low' ./tests/data +``` + +## Layout + +``` +test/e2e/ +├── go.mod, go.sum # separate module; pins CNPG test utils +├── suite.go # SetupSuite / TeardownSuite; env + run-id wiring +├── suite_test.go # SynchronizedBeforeSuite entry point +├── labels.go # Ginkgo label constants (area + cross-cutting) +├── levels.go # TEST_DEPTH → Level gate (CurrentLevel, SkipUnlessLevel) +├── runid.go # E2E_RUN_ID resolver (stable per-process id) +├── manifests/ +│ ├── base/ # documentdb.yaml.template — the base CR +│ ├── mixins/ # composable overlays (tls_*, exposure_*, storage_*, …) +│ └── backup/ # backup / scheduled_backup / recovery CR templates +├── pkg/e2eutils/ # helper packages imported by every area suite +└── tests/ # one Go package per functional area + ├── lifecycle/ scale/ data/ performance/ status/ + ├── backup/ tls/ feature_gates/ exposure/ upgrade/ +``` + +## Labels & depth + +Labels live in [`labels.go`](labels.go) and are attached either to the area +suite's top-level `Describe` (area labels) or to individual specs (cross-cutting +and capability labels). + +| Group | Labels | +|---|---| +| Area | `lifecycle`, `scale`, `data`, `performance`, `backup`, `recovery`, `tls`, `feature-gates`, `exposure`, `status`, `upgrade` | +| Cross-cutting | `smoke`, `basic`, `destructive`, `disruptive`, `slow` | +| Capability | `needs-cert-manager`, `needs-metallb`, `needs-csi-snapshots`, `needs-csi-resize` | +| Depth | `level:lowest`, `level:low`, `level:medium`, `level:high`, `level:highest` | + +**Depth gate.** `TEST_DEPTH` takes an integer 0–4 mapping to +`Highest` (0), `High`, `Medium`, `Low`, `Lowest` (4). Default is `Medium` (2) +— the authoritative gate is `e2e.SkipUnlessLevel(e2e.Medium)`, which reads +`TEST_DEPTH` at runtime and `Skip()`s when the configured depth is shallower. +The `level:*` labels are informational duplicates for Ginkgo's `--label-filter`. +(CNPG v1.28.1 does not currently export a `tests/utils/levels` package; +[`levels.go`](levels.go) is our local implementation and will be replaced +with a thin re-export if upstream adds one.) + +Examples: + +```bash +# Fast smoke — typically Highest depth +TEST_DEPTH=0 ginkgo -r --label-filter=smoke ./tests/... + +# Full backup area at default depth, skipping clusters without CSI snapshots +ginkgo -r --label-filter='backup && !needs-csi-snapshots' ./tests/backup + +# Nightly: everything +TEST_DEPTH=4 ginkgo -r --procs=4 ./tests/... + +# Upgrade suite (disruptive — runs serial, owns its own operator install) +E2E_UPGRADE=1 E2E_UPGRADE_PREVIOUS_CHART=… \ + ginkgo --procs=1 --label-filter=upgrade ./tests/upgrade +``` + +## Environment variables + +| Variable | Default | Purpose | +|---|---|---| +| `TEST_DEPTH` | `2` (Medium) | Depth gate; 0=Highest … 4=Lowest | +| `E2E_RUN_ID` | auto-generated | Stable id stamped onto shared fixtures + cluster-scoped objects. Set this in CI so parallel Ginkgo binaries share fixtures; leave **unset locally** — an auto-generated id is safer for ad-hoc runs | +| `E2E_ARTIFACTS_DIR` | `./_artifacts//proc-/` | Override the JUnit / log dump directory | +| `DOCUMENTDB_IMAGE` | chart default | Overrides the extension image used by fresh fixtures | +| `GATEWAY_IMAGE` | chart default | Overrides the gateway image used by fresh fixtures | +| `E2E_STORAGE_CLASS` | cluster default | StorageClass for fresh fixtures | +| `E2E_STORAGE_SIZE` | `1Gi` | PVC size for fresh fixtures | +| `GINKGO_PARALLEL_PROCESS` | set by Ginkgo | Consumed; do not set manually | +| `POSTGRES_IMG` | dummy stub | Set by `testenv` to satisfy CNPG's `TestingEnvironment`; do not override | + +**Upgrade area (gated behind `E2E_UPGRADE=1`):** + +| Variable | Purpose | +|---|---| +| `E2E_UPGRADE` | Must be `1` or every spec in `tests/upgrade/` Skips | +| `E2E_UPGRADE_PREVIOUS_CHART` | OCI or path ref for the "old" operator chart | +| `E2E_UPGRADE_PREVIOUS_VERSION` | Chart version string for the old chart | +| `E2E_UPGRADE_CURRENT_CHART` | Chart ref for the "new" (built-from-tree) chart | +| `E2E_UPGRADE_CURRENT_VERSION` | Optional — defaults to chart's own version | +| `E2E_UPGRADE_RELEASE` | Helm release name | +| `E2E_UPGRADE_OPERATOR_NS` | Operator namespace | +| `E2E_UPGRADE_OLD_DOCUMENTDB_IMAGE` | Extension image used before upgrade | +| `E2E_UPGRADE_NEW_DOCUMENTDB_IMAGE` | Extension image used after upgrade | + +> A note on `E2E_KEEP_CLUSTERS`: the design doc discusses a keep-clusters +> flag, but no such knob is honored by the current suite code. Skip-on-prereq +> is the intended mechanism; to inspect a cluster after a failing spec, pass +> `--fail-fast` and manually defer cluster teardown outside the suite. + +**Missing prereqs are `Skip()`, not `Fail()`.** Backup specs probe the +`VolumeSnapshot`/`VolumeSnapshotClass` CRDs at runtime (`Skip` when absent), +and `tls/tls_certmanager_test.go` probes the `cert-manager.io/v1` API group +the same way. The capability labels (`needs-csi-snapshots`, `needs-cert-manager`, +`needs-metallb`, `needs-csi-resize`) let you filter these out up front if +you already know your environment. + +## Adding a new test + +**Adding a spec to an existing area.** Create a new `*_test.go` in +`tests//`, import the area suite's label, attach the right depth +label, and use the suite's shared fixture rather than a fresh cluster when +the spec is read-only: + +```go +var _ = Describe("my new behavior", Label(e2e.DataLabel), e2e.MediumLevelLabel, func() { + It("does the thing", func(sctx SpecContext) { + e2e.SkipUnlessLevel(e2e.Medium) + // ... sharedROCluster is available via the area's BeforeAll + }) +}) +``` + +**Adding a new area package.** Create `tests//`, add +`_suite_test.go` that calls `e2e.SetupSuite` / `e2e.TeardownSuite`, +define an area label in `labels.go`, and attach it to the top-level +`Describe`. Mirror an existing area — `tests/status/` is the smallest +reference for read-only areas; `tests/lifecycle/` for mutating ones. + +**Adding a new manifest mixin.** Drop a `.yaml.template` under +`manifests/mixins/` and pass its stem via `CreateOptions.Mixins` to +`documentdb.Create`. Note the merge semantics: `RenderCR` produces a +multi-document YAML stream (one doc per template) and `Create` deep-merges +them into a single DocumentDB object before applying — maps merge recursively, +**scalars and slices in later mixins overwrite earlier values**. The public +`RenderCR` still returns the raw multi-doc bytes (useful for artifact dumps +or manual `kubectl apply`). + +**Adding a new assertion.** Put the reusable verb in +`pkg/e2eutils/assertions/assertions.go`. Assertions return `func() error` +so callers can wrap them in `Eventually(...).Should(Succeed())`. + +## Helper packages (`pkg/e2eutils/`) + +| Package | Role | +|---|---| +| `testenv/` | Wraps CNPG's `environment.TestingEnvironment` with dummy `POSTGRES_IMG`; registers our `api/preview` scheme on the typed `client.Client`. | +| `documentdb/` | DocumentDB CR verbs: `RenderCR` (base + mixin envsubst), `Create` (multi-doc merge), `PatchSpec`, `WaitHealthy`, `Delete`, `List`. | +| `mongo/` | `go.mongodb.org/mongo-driver/v2` client builder, seed/probe/count helpers; owns the 10 s post-port-forward ping retry budget (`connectRetryTimeout`). | +| `portforward/` | Thin wrapper over CNPG's `forwardconnection` for the DocumentDB gateway port. | +| `assertions/` | Composable Gomega verbs (`AssertDocumentDBReady`, `AssertInstanceCount`, `AssertPrimaryUnchanged`, `AssertPVCCount`, `AssertTLSSecretReady`, `AssertServiceType`, `AssertConnectionStringMatches`). | +| `timeouts/` | DocumentDB-specific overrides layered on top of CNPG's `timeouts` map (`DocumentDBReady`, `DocumentDBUpgrade`, `InstanceScale`, `PVCResize`). | +| `seed/` | Canonical datasets (`SmallDataset(10)`, `MediumDataset(1000)`, sort/agg fixtures) shared by data / performance / backup / upgrade specs. | +| `fixtures/` | Session-scoped shared clusters (`shared_ro.go`, `shared_scale.go`) and lazy MinIO (`minio.go`). Honors `E2E_RUN_ID`, `DOCUMENTDB_IMAGE`, `GATEWAY_IMAGE`, `E2E_STORAGE_CLASS`, `E2E_STORAGE_SIZE`. | +| `namespaces/` | Per-proc, run-id-scoped namespace naming (`e2e--`). | +| `operatorhealth/` | Operator-pod UID + restart-count gate; flips a package sentinel on churn so subsequent non-`disruptive`/`upgrade` specs skip. | +| `clusterprobe/` | Capability probes (CSI snapshot CRDs, cert-manager, StorageClass resize support) used by area `Skip*` helpers. | +| `backup/` | Helpers for asserting `Backup` / `ScheduledBackup` CR state, snapshot readiness, and MinIO object inspection. | +| `tlscerts/` | Self-signed + provided-mode certificate material builders used by `tests/tls/`. | +| `helmop/` | Helm install/upgrade/uninstall for the upgrade suite (multi-phase operator lifecycle). | + +## CI + +The suite is driven by [`.github/workflows/test-e2e.yml`](../../.github/workflows/test-e2e.yml) +(owned by the CI workflow migration; the file may not yet be present in +every working tree — it is added as part of the Phase 3 rollout). The +workflow fans out into nine label-grouped jobs: + +| Job | `--label-filter` | `--procs` | +|---|---|---| +| `smoke` | `smoke` | auto | +| `lifecycle` | `lifecycle` | auto | +| `scale` | `scale` | 2 | +| `data` | `data` | auto | +| `performance` | `performance` | 1 (dedicated runner) | +| `backup` | `backup` | 2 | +| `tls` | `tls` | auto | +| `feature` | `feature-gates \|\| exposure \|\| status` | auto | +| `upgrade` | `upgrade` | 1 | + +Each job runs `setup-test-environment` → `ginkgo -r --label-filter=… +--junit-report=junit.xml ./tests/...` → upload JUnit + logs. +`workflow_dispatch` exposes `label` and `depth` inputs for ad-hoc runs. + +## Troubleshooting + +- **Port-forward / Mongo connect fails with "connection refused."** The + post-port-forward retry budget is 10 s at 100 ms backoff + (`mongo/connect.go`: `connectRetryTimeout` / `connectRetryBackoff`). If + you consistently exceed it, the gateway pod is probably not Ready — check + the DocumentDB CR status and the gateway container logs. +- **Backup specs all Skip.** Your cluster lacks the CSI snapshot CRDs + (`VolumeSnapshotClass`, `VolumeSnapshot`) or the configured StorageClass + isn't backed by a snapshot-capable CSI driver. `scripts/test-scripts/deploy-csi-driver.sh` + under `operator/src/` installs a hostpath CSI driver suitable for kind. +- **TLS cert-manager spec Skips.** `cert-manager.io/v1` isn't served; install + cert-manager (the `setup-test-environment` composite does this for you). +- **"E2E_RUN_ID was not set" warning in CI logs.** The suite auto-generates + a run id, but cross-binary fixture sharing relies on every Ginkgo invocation + in a CI job seeing the same value. Export `E2E_RUN_ID="${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}"` + (or similar) once at the top of the job. +- **Operator churn aborts subsequent specs.** `operatorhealth.Gate` snapshots + the operator pod's UID + restart count at suite start; any drift flips a + package sentinel and skips every subsequent non-`disruptive`/`upgrade` spec. + This is working as intended — investigate why the operator restarted. + +## CNPG dependency & pin policy + +The suite imports CloudNative-PG's `tests/utils/*` packages as a library +(Apache-2.0, compatible with our MIT). The version is pinned in +[`go.mod`](go.mod) — currently `github.com/cloudnative-pg/cloudnative-pg +v1.28.1`. `tests/utils/*` is exported (not `internal/`) but has no stability +contract; budget roughly half a day per CNPG version bump for compat fixes +in our wrappers (`testenv`, `operatorhealth`, `portforward`). Bumps should +be single-purpose PRs gated on the full suite. diff --git a/test/e2e/go.mod b/test/e2e/go.mod new file mode 100644 index 00000000..69611d4d --- /dev/null +++ b/test/e2e/go.mod @@ -0,0 +1,110 @@ +module github.com/documentdb/documentdb-operator/test/e2e + +go 1.25.8 + +require ( + github.com/cloudnative-pg/cloudnative-pg v1.28.1 + github.com/documentdb/documentdb-operator v0.0.0-00010101000000-000000000000 + github.com/kubernetes-csi/external-snapshotter/client/v8 v8.4.0 + github.com/onsi/ginkgo/v2 v2.28.1 + github.com/onsi/gomega v1.39.1 + go.mongodb.org/mongo-driver/v2 v2.5.1 + k8s.io/api v0.35.0 + k8s.io/apimachinery v0.35.0 + k8s.io/client-go v0.35.0 + sigs.k8s.io/controller-runtime v0.22.4 + sigs.k8s.io/yaml v1.6.0 +) + +require ( + github.com/Masterminds/semver/v3 v3.4.0 // indirect + github.com/avast/retry-go/v5 v5.0.0 // indirect + github.com/beorn7/perks v1.0.1 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/cloudnative-pg/barman-cloud v0.4.1-0.20260108104508-ced266c145f5 // indirect + github.com/cloudnative-pg/cnpg-i v0.5.0 // indirect + github.com/cloudnative-pg/machinery v0.3.3 // indirect + github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect + github.com/emicklei/go-restful/v3 v3.13.0 // indirect + github.com/evanphx/json-patch/v5 v5.9.11 // indirect + github.com/fsnotify/fsnotify v1.9.0 // indirect + github.com/fxamacker/cbor/v2 v2.9.0 // indirect + github.com/go-logr/logr v1.4.3 // indirect + github.com/go-logr/zapr v1.3.0 // indirect + github.com/go-openapi/jsonpointer v0.22.4 // indirect + github.com/go-openapi/jsonreference v0.21.4 // indirect + github.com/go-openapi/swag v0.25.4 // indirect + github.com/go-openapi/swag/cmdutils v0.25.4 // indirect + github.com/go-openapi/swag/conv v0.25.4 // indirect + github.com/go-openapi/swag/fileutils v0.25.4 // indirect + github.com/go-openapi/swag/jsonname v0.25.4 // indirect + github.com/go-openapi/swag/jsonutils v0.25.4 // indirect + github.com/go-openapi/swag/loading v0.25.4 // indirect + github.com/go-openapi/swag/mangling v0.25.4 // indirect + github.com/go-openapi/swag/netutils v0.25.4 // indirect + github.com/go-openapi/swag/stringutils v0.25.4 // indirect + github.com/go-openapi/swag/typeutils v0.25.4 // indirect + github.com/go-openapi/swag/yamlutils v0.25.4 // indirect + github.com/go-task/slim-sprig/v3 v3.0.0 // indirect + github.com/google/btree v1.1.3 // indirect + github.com/google/gnostic-models v0.7.1 // indirect + github.com/google/go-cmp v0.7.0 // indirect + github.com/google/pprof v0.0.0-20260115054156-294ebfa9ad83 // indirect + github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 // indirect + github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.3.3 // indirect + github.com/jackc/puddle/v2 v2.2.2 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 // indirect + github.com/klauspost/compress v1.18.0 // indirect + github.com/lib/pq v1.12.0 // indirect + github.com/moby/spdystream v0.5.1 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect + github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect + github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.87.1 // indirect + github.com/prometheus/client_golang v1.23.2 // indirect + github.com/prometheus/client_model v0.6.2 // indirect + github.com/prometheus/common v0.67.4 // indirect + github.com/prometheus/procfs v0.19.2 // indirect + github.com/robfig/cron v1.2.0 // indirect + github.com/spf13/pflag v1.0.10 // indirect + github.com/thoas/go-funk v0.9.3 // indirect + github.com/x448/float16 v0.8.4 // indirect + github.com/xdg-go/pbkdf2 v1.0.0 // indirect + github.com/xdg-go/scram v1.2.0 // indirect + github.com/xdg-go/stringprep v1.0.4 // indirect + github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78 // indirect + go.uber.org/multierr v1.11.0 // indirect + go.uber.org/zap v1.27.1 // indirect + go.yaml.in/yaml/v2 v2.4.3 // indirect + go.yaml.in/yaml/v3 v3.0.4 // indirect + golang.org/x/crypto v0.47.0 // indirect + golang.org/x/mod v0.32.0 // indirect + golang.org/x/net v0.49.0 // indirect + golang.org/x/oauth2 v0.34.0 // indirect + golang.org/x/sync v0.19.0 // indirect + golang.org/x/sys v0.42.0 // indirect + golang.org/x/term v0.41.0 // indirect + golang.org/x/text v0.33.0 // indirect + golang.org/x/time v0.14.0 // indirect + golang.org/x/tools v0.41.0 // indirect + gomodules.xyz/jsonpatch/v2 v2.5.0 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217 // indirect + google.golang.org/grpc v1.79.3 // indirect + google.golang.org/protobuf v1.36.11 // indirect + gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect + k8s.io/apiextensions-apiserver v0.35.0 // indirect + k8s.io/klog/v2 v2.130.1 // indirect + k8s.io/kube-openapi v0.0.0-20251125145642-4e65d59e963e // indirect + k8s.io/utils v0.0.0-20260210185600-b8788abfbbc2 // indirect + sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect + sigs.k8s.io/randfill v1.0.0 // indirect + sigs.k8s.io/structured-merge-diff/v6 v6.3.2-0.20260122202528-d9cc6641c482 // indirect +) + +replace github.com/documentdb/documentdb-operator => ../../operator/src diff --git a/test/e2e/go.sum b/test/e2e/go.sum new file mode 100644 index 00000000..94c5041d --- /dev/null +++ b/test/e2e/go.sum @@ -0,0 +1,303 @@ +github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0= +github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= +github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= +github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= +github.com/avast/retry-go/v5 v5.0.0 h1:kf1Qc2UsTZ4qq8elDymqfbISvkyMuhgRxuJqX2NHP7k= +github.com/avast/retry-go/v5 v5.0.0/go.mod h1://d+usmKWio1agtZfS1H/ltTqwtIfBnRq9zEwjc3eH8= +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/cloudnative-pg/barman-cloud v0.4.1-0.20260108104508-ced266c145f5 h1:wPB7VTNgTv6t9sl4QYOBakmVTqHnOdKUht7Q3aL+uns= +github.com/cloudnative-pg/barman-cloud v0.4.1-0.20260108104508-ced266c145f5/go.mod h1:qD0NtJOllNQbRB0MaleuHsZjFYaXtXfdg0HbFTbuHn0= +github.com/cloudnative-pg/cloudnative-pg v1.28.1 h1:HdOUWgFhta558uHfXeO/199qCApxaj5yi05x6nWNmgs= +github.com/cloudnative-pg/cloudnative-pg v1.28.1/go.mod h1:yhRa4GqJAjNd0tT9AiRgk1KdqLhMjo/JmGGoASRl2CU= +github.com/cloudnative-pg/cnpg-i v0.5.0 h1:/TOzpNT6cwNgrpftTtrnLKdoHgMwd+88vZgXjlVgXeE= +github.com/cloudnative-pg/cnpg-i v0.5.0/go.mod h1:7Gh4+UzhBpGhr4DreB1GN9wGYfvxwXCXZUyVt3zE/3I= +github.com/cloudnative-pg/machinery v0.3.3 h1:CaqXqLTJH9RrVv3R/YU0NmFaI/F18HLg2JfH3mQLcDk= +github.com/cloudnative-pg/machinery v0.3.3/go.mod h1:RYAYlVKBF5pH4mg+Q8wHjNDyENV9ajbkG41zOEf8DEs= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/emicklei/go-restful/v3 v3.13.0 h1:C4Bl2xDndpU6nJ4bc1jXd+uTmYPVUwkD6bFY/oTyCes= +github.com/emicklei/go-restful/v3 v3.13.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/evanphx/json-patch v5.6.0+incompatible h1:jBYDEEiFBPxA0v50tFdvOzQQTCvpL6mnFh5mB2/l16U= +github.com/evanphx/json-patch v5.6.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= +github.com/evanphx/json-patch/v5 v5.9.11 h1:/8HVnzMq13/3x9TPvjG08wUGqBTmZBsCWzjTM0wiaDU= +github.com/evanphx/json-patch/v5 v5.9.11/go.mod h1:3j+LviiESTElxA4p3EMKAB9HXj3/XEtnUf6OZxqIQTM= +github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k= +github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= +github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM= +github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ= +github.com/gkampitakis/ciinfo v0.3.2 h1:JcuOPk8ZU7nZQjdUhctuhQofk7BGHuIy0c9Ez8BNhXs= +github.com/gkampitakis/ciinfo v0.3.2/go.mod h1:1NIwaOcFChN4fa/B0hEBdAb6npDlFL8Bwx4dfRLRqAo= +github.com/gkampitakis/go-diff v1.3.2 h1:Qyn0J9XJSDTgnsgHRdz9Zp24RaJeKMUHg2+PDZZdC4M= +github.com/gkampitakis/go-diff v1.3.2/go.mod h1:LLgOrpqleQe26cte8s36HTWcTmMEur6OPYerdAAS9tk= +github.com/gkampitakis/go-snaps v0.5.15 h1:amyJrvM1D33cPHwVrjo9jQxX8g/7E2wYdZ+01KS3zGE= +github.com/gkampitakis/go-snaps v0.5.15/go.mod h1:HNpx/9GoKisdhw9AFOBT1N7DBs9DiHo/hGheFGBZ+mc= +github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= +github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= +github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg= +github.com/go-openapi/jsonpointer v0.22.4 h1:dZtK82WlNpVLDW2jlA1YCiVJFVqkED1MegOUy9kR5T4= +github.com/go-openapi/jsonpointer v0.22.4/go.mod h1:elX9+UgznpFhgBuaMQ7iu4lvvX1nvNsesQ3oxmYTw80= +github.com/go-openapi/jsonreference v0.21.4 h1:24qaE2y9bx/q3uRK/qN+TDwbok1NhbSmGjjySRCHtC8= +github.com/go-openapi/jsonreference v0.21.4/go.mod h1:rIENPTjDbLpzQmQWCj5kKj3ZlmEh+EFVbz3RTUh30/4= +github.com/go-openapi/swag v0.25.4 h1:OyUPUFYDPDBMkqyxOTkqDYFnrhuhi9NR6QVUvIochMU= +github.com/go-openapi/swag v0.25.4/go.mod h1:zNfJ9WZABGHCFg2RnY0S4IOkAcVTzJ6z2Bi+Q4i6qFQ= +github.com/go-openapi/swag/cmdutils v0.25.4 h1:8rYhB5n6WawR192/BfUu2iVlxqVR9aRgGJP6WaBoW+4= +github.com/go-openapi/swag/cmdutils v0.25.4/go.mod h1:pdae/AFo6WxLl5L0rq87eRzVPm/XRHM3MoYgRMvG4A0= +github.com/go-openapi/swag/conv v0.25.4 h1:/Dd7p0LZXczgUcC/Ikm1+YqVzkEeCc9LnOWjfkpkfe4= +github.com/go-openapi/swag/conv v0.25.4/go.mod h1:3LXfie/lwoAv0NHoEuY1hjoFAYkvlqI/Bn5EQDD3PPU= +github.com/go-openapi/swag/fileutils v0.25.4 h1:2oI0XNW5y6UWZTC7vAxC8hmsK/tOkWXHJQH4lKjqw+Y= +github.com/go-openapi/swag/fileutils v0.25.4/go.mod h1:cdOT/PKbwcysVQ9Tpr0q20lQKH7MGhOEb6EwmHOirUk= +github.com/go-openapi/swag/jsonname v0.25.4 h1:bZH0+MsS03MbnwBXYhuTttMOqk+5KcQ9869Vye1bNHI= +github.com/go-openapi/swag/jsonname v0.25.4/go.mod h1:GPVEk9CWVhNvWhZgrnvRA6utbAltopbKwDu8mXNUMag= +github.com/go-openapi/swag/jsonutils v0.25.4 h1:VSchfbGhD4UTf4vCdR2F4TLBdLwHyUDTd1/q4i+jGZA= +github.com/go-openapi/swag/jsonutils v0.25.4/go.mod h1:7OYGXpvVFPn4PpaSdPHJBtF0iGnbEaTk8AvBkoWnaAY= +github.com/go-openapi/swag/jsonutils/fixtures_test v0.25.4 h1:IACsSvBhiNJwlDix7wq39SS2Fh7lUOCJRmx/4SN4sVo= +github.com/go-openapi/swag/jsonutils/fixtures_test v0.25.4/go.mod h1:Mt0Ost9l3cUzVv4OEZG+WSeoHwjWLnarzMePNDAOBiM= +github.com/go-openapi/swag/loading v0.25.4 h1:jN4MvLj0X6yhCDduRsxDDw1aHe+ZWoLjW+9ZQWIKn2s= +github.com/go-openapi/swag/loading v0.25.4/go.mod h1:rpUM1ZiyEP9+mNLIQUdMiD7dCETXvkkC30z53i+ftTE= +github.com/go-openapi/swag/mangling v0.25.4 h1:2b9kBJk9JvPgxr36V23FxJLdwBrpijI26Bx5JH4Hp48= +github.com/go-openapi/swag/mangling v0.25.4/go.mod h1:6dxwu6QyORHpIIApsdZgb6wBk/DPU15MdyYj/ikn0Hg= +github.com/go-openapi/swag/netutils v0.25.4 h1:Gqe6K71bGRb3ZQLusdI8p/y1KLgV4M/k+/HzVSqT8H0= +github.com/go-openapi/swag/netutils v0.25.4/go.mod h1:m2W8dtdaoX7oj9rEttLyTeEFFEBvnAx9qHd5nJEBzYg= +github.com/go-openapi/swag/stringutils v0.25.4 h1:O6dU1Rd8bej4HPA3/CLPciNBBDwZj9HiEpdVsb8B5A8= +github.com/go-openapi/swag/stringutils v0.25.4/go.mod h1:GTsRvhJW5xM5gkgiFe0fV3PUlFm0dr8vki6/VSRaZK0= +github.com/go-openapi/swag/typeutils v0.25.4 h1:1/fbZOUN472NTc39zpa+YGHn3jzHWhv42wAJSN91wRw= +github.com/go-openapi/swag/typeutils v0.25.4/go.mod h1:Ou7g//Wx8tTLS9vG0UmzfCsjZjKhpjxayRKTHXf2pTE= +github.com/go-openapi/swag/yamlutils v0.25.4 h1:6jdaeSItEUb7ioS9lFoCZ65Cne1/RZtPBZ9A56h92Sw= +github.com/go-openapi/swag/yamlutils v0.25.4/go.mod h1:MNzq1ulQu+yd8Kl7wPOut/YHAAU/H6hL91fF+E2RFwc= +github.com/go-openapi/testify/enable/yaml/v2 v2.0.2 h1:0+Y41Pz1NkbTHz8NngxTuAXxEodtNSI1WG1c/m5Akw4= +github.com/go-openapi/testify/enable/yaml/v2 v2.0.2/go.mod h1:kme83333GCtJQHXQ8UKX3IBZu6z8T5Dvy5+CW3NLUUg= +github.com/go-openapi/testify/v2 v2.0.2 h1:X999g3jeLcoY8qctY/c/Z8iBHTbwLz7R2WXd6Ub6wls= +github.com/go-openapi/testify/v2 v2.0.2/go.mod h1:HCPmvFFnheKK2BuwSA0TbbdxJ3I16pjwMkYkP4Ywn54= +github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= +github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= +github.com/goccy/go-yaml v1.18.0 h1:8W7wMFS12Pcas7KU+VVkaiCng+kG8QiFeFwzFb+rwuw= +github.com/goccy/go-yaml v1.18.0/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg= +github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4= +github.com/google/gnostic-models v0.7.1 h1:SisTfuFKJSKM5CPZkffwi6coztzzeYUhc3v4yxLWH8c= +github.com/google/gnostic-models v0.7.1/go.mod h1:whL5G0m6dmc5cPxKc5bdKdEN3UjI7OUGxBlw57miDrQ= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= +github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/pprof v0.0.0-20260115054156-294ebfa9ad83 h1:z2ogiKUYzX5Is6zr/vP9vJGqPwcdqsWjOt+V8J7+bTc= +github.com/google/pprof v0.0.0-20260115054156-294ebfa9ad83/go.mod h1:MxpfABSjhmINe3F1It9d+8exIHFvUqtLIRCdOGNXqiI= +github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4= +github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 h1:JeSE6pjso5THxAzdVpqr6/geYxZytqFMBCOtn/ujyeo= +github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674/go.mod h1:r4w70xmWCQKmi1ONH4KIaBptdivuRPyosB9RmPlGEwA= +github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.3.3 h1:B+8ClL/kCQkRiU82d9xajRPKYMrB7E0MbtzWVi1K4ns= +github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.3.3/go.mod h1:NbCUVmiS4foBGBHOYlCT25+YmGpJ32dZPi75pGEUpj4= +github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo= +github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= +github.com/joshdk/go-junit v1.0.0 h1:S86cUKIdwBHWwA6xCmFlf3RTLfVXYQfvanM5Uh+K6GE= +github.com/joshdk/go-junit v1.0.0/go.mod h1:TiiV0PqkaNfFXjEiyjWM3XXrhVyCa1K4Zfga6W52ung= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNUXsshfwJMBgNA0RU6/i7WVaAegv3PtuIHPMs= +github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8= +github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= +github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/kubernetes-csi/external-snapshotter/client/v8 v8.4.0 h1:bMqrb3UHgHbP+PW9VwiejfDJU1R0PpXVZNMdeH8WYKI= +github.com/kubernetes-csi/external-snapshotter/client/v8 v8.4.0/go.mod h1:E3vdYxHj2C2q6qo8/Da4g7P+IcwqRZyy3gJBzYybV9Y= +github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= +github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= +github.com/lib/pq v1.12.0 h1:mC1zeiNamwKBecjHarAr26c/+d8V5w/u4J0I/yASbJo= +github.com/lib/pq v1.12.0/go.mod h1:/p+8NSbOcwzAEI7wiMXFlgydTwcgTr3OSKMsD2BitpA= +github.com/maruel/natural v1.1.1 h1:Hja7XhhmvEFhcByqDoHz9QZbkWey+COd9xWfCfn1ioo= +github.com/maruel/natural v1.1.1/go.mod h1:v+Rfd79xlw1AgVBjbO0BEQmptqb5HvL/k9GRHB7ZKEg= +github.com/mfridman/tparse v0.18.0 h1:wh6dzOKaIwkUGyKgOntDW4liXSo37qg5AXbIhkMV3vE= +github.com/mfridman/tparse v0.18.0/go.mod h1:gEvqZTuCgEhPbYk/2lS3Kcxg1GmTxxU7kTC8DvP0i/A= +github.com/moby/spdystream v0.5.1 h1:9sNYeYZUcci9R6/w7KDaFWEWeV4LStVG78Mpyq/Zm/Y= +github.com/moby/spdystream v0.5.1/go.mod h1:xBAYlnt/ay+11ShkdFKNAG7LsyK/tmNBVvVOwrfMgdI= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFdJifH4BDsTlE89Zl93FEloxaWZfGcifgq8= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus= +github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw= +github.com/onsi/ginkgo/v2 v2.28.1 h1:S4hj+HbZp40fNKuLUQOYLDgZLwNUVn19N3Atb98NCyI= +github.com/onsi/ginkgo/v2 v2.28.1/go.mod h1:CLtbVInNckU3/+gC8LzkGUb9oF+e8W8TdUsxPwvdOgE= +github.com/onsi/gomega v1.39.1 h1:1IJLAad4zjPn2PsnhH70V4DKRFlrCzGBNrNaru+Vf28= +github.com/onsi/gomega v1.39.1/go.mod h1:hL6yVALoTOxeWudERyfppUcZXjMwIMLnuSfruD2lcfg= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.87.1 h1:wyKanf+IFdbIqbDNYGt+f1dabLErLWtBaxd0KaAx4aM= +github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.87.1/go.mod h1:WHiLZmOWVop/MoYvRD58LfnPeyE+dcITby/jQjg83Hw= +github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o= +github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg= +github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= +github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= +github.com/prometheus/common v0.67.4 h1:yR3NqWO1/UyO1w2PhUvXlGQs/PtFmoveVO0KZ4+Lvsc= +github.com/prometheus/common v0.67.4/go.mod h1:gP0fq6YjjNCLssJCQp0yk4M8W6ikLURwkdd/YKtTbyI= +github.com/prometheus/procfs v0.19.2 h1:zUMhqEW66Ex7OXIiDkll3tl9a1ZdilUOd/F6ZXw4Vws= +github.com/prometheus/procfs v0.19.2/go.mod h1:M0aotyiemPhBCM0z5w87kL22CxfcH05ZpYlu+b4J7mw= +github.com/robfig/cron v1.2.0 h1:ZjScXvvxeQ63Dbyxy76Fj3AT3Ut0aKsyd2/tl3DTMuQ= +github.com/robfig/cron v1.2.0/go.mod h1:JGuDeoQd7Z6yL4zQhZ3OPEVHB7fL6Ka6skscFHfmt2k= +github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= +github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= +github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk= +github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= +github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/thoas/go-funk v0.9.3 h1:7+nAEx3kn5ZJcnDm2Bh23N2yOtweO14bi//dvRtgLpw= +github.com/thoas/go-funk v0.9.3/go.mod h1:+IWnUfUmFO1+WVYQWQtIJHeRRdaIyyYglZN7xzUPe4Q= +github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY= +github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= +github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= +github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4= +github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY= +github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28= +github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= +github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= +github.com/xdg-go/pbkdf2 v1.0.0 h1:Su7DPu48wXMwC3bs7MCNG+z4FhcyEuz5dlvchbq0B0c= +github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI= +github.com/xdg-go/scram v1.2.0 h1:bYKF2AEwG5rqd1BumT4gAnvwU/M9nBp2pTSxeZw7Wvs= +github.com/xdg-go/scram v1.2.0/go.mod h1:3dlrS0iBaWKYVt2ZfA4cj48umJZ+cAEbR6/SjLA88I8= +github.com/xdg-go/stringprep v1.0.4 h1:XLI/Ng3O1Atzq0oBs3TWm+5ZVgkq2aqdlvP9JtoZ6c8= +github.com/xdg-go/stringprep v1.0.4/go.mod h1:mPGuuIYwz7CmR2bT9j4GbQqutWS1zV24gijq1dTyGkM= +github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78 h1:ilQV1hzziu+LLM3zUTJ0trRztfwgjqKnBWNtSRkbmwM= +github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78/go.mod h1:aL8wCCfTfSfmXjznFBSZNN13rSJjlIOI1fUNAtF7rmI= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +go.mongodb.org/mongo-driver/v2 v2.5.1 h1:j2U/Qp+wvueSpqitLCSZPT/+ZpVc1xzuwdHWwl7d8ro= +go.mongodb.org/mongo-driver/v2 v2.5.1/go.mod h1:yOI9kBsufol30iFsl1slpdq1I0eHPzybRWdyYUs8K/0= +go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= +go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= +go.opentelemetry.io/otel v1.40.0 h1:oA5YeOcpRTXq6NN7frwmwFR0Cn3RhTVZvXsP4duvCms= +go.opentelemetry.io/otel v1.40.0/go.mod h1:IMb+uXZUKkMXdPddhwAHm6UfOwJyh4ct1ybIlV14J0g= +go.opentelemetry.io/otel/metric v1.40.0 h1:rcZe317KPftE2rstWIBitCdVp89A2HqjkxR3c11+p9g= +go.opentelemetry.io/otel/metric v1.40.0/go.mod h1:ib/crwQH7N3r5kfiBZQbwrTge743UDc7DTFVZrrXnqc= +go.opentelemetry.io/otel/sdk v1.40.0 h1:KHW/jUzgo6wsPh9At46+h4upjtccTmuZCFAc9OJ71f8= +go.opentelemetry.io/otel/sdk v1.40.0/go.mod h1:Ph7EFdYvxq72Y8Li9q8KebuYUr2KoeyHx0DRMKrYBUE= +go.opentelemetry.io/otel/sdk/metric v1.39.0 h1:cXMVVFVgsIf2YL6QkRF4Urbr/aMInf+2WKg+sEJTtB8= +go.opentelemetry.io/otel/sdk/metric v1.39.0/go.mod h1:xq9HEVH7qeX69/JnwEfp6fVq5wosJsY1mt4lLfYdVew= +go.opentelemetry.io/otel/trace v1.40.0 h1:WA4etStDttCSYuhwvEa8OP8I5EWu24lkOzp+ZYblVjw= +go.opentelemetry.io/otel/trace v1.40.0/go.mod h1:zeAhriXecNGP/s2SEG3+Y8X9ujcJOTqQ5RgdEJcawiA= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +go.uber.org/zap v1.27.1 h1:08RqriUEv8+ArZRYSTXy1LeBScaMpVSTBhCeaZYfMYc= +go.uber.org/zap v1.27.1/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= +go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0= +go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8= +go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= +go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.47.0 h1:V6e3FRj+n4dbpw86FJ8Fv7XVOql7TEwpHapKoMJ/GO8= +golang.org/x/crypto v0.47.0/go.mod h1:ff3Y9VzzKbwSSEzWqJsJVBnWmRwRSHt/6Op5n9bQc4A= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/mod v0.32.0 h1:9F4d3PHLljb6x//jOyokMv3eX+YDeepZSEo3mFJy93c= +golang.org/x/mod v0.32.0/go.mod h1:SgipZ/3h2Ci89DlEtEXWUk/HteuRin+HHhN+WbNhguU= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.49.0 h1:eeHFmOGUTtaaPSGNmjBKpbng9MulQsJURQUAfUwY++o= +golang.org/x/net v0.49.0/go.mod h1:/ysNB2EvaqvesRkuLAyjI1ycPZlQHM3q01F02UY/MV8= +golang.org/x/oauth2 v0.34.0 h1:hqK/t4AKgbqWkdkcAeI8XLmbK+4m4G5YeQRrmiotGlw= +golang.org/x/oauth2 v0.34.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= +golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo= +golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.41.0 h1:QCgPso/Q3RTJx2Th4bDLqML4W6iJiaXFq2/ftQF13YU= +golang.org/x/term v0.41.0/go.mod h1:3pfBgksrReYfZ5lvYM0kSO0LIkAl4Yl2bXOkKP7Ec2A= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= +golang.org/x/text v0.33.0 h1:B3njUFyqtHDUI5jMn1YIr5B0IE2U0qck04r6d4KPAxE= +golang.org/x/text v0.33.0/go.mod h1:LuMebE6+rBincTi9+xWTY8TztLzKHc/9C1uBCG27+q8= +golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI= +golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/tools v0.41.0 h1:a9b8iMweWG+S0OBnlU36rzLp20z1Rp10w+IY2czHTQc= +golang.org/x/tools v0.41.0/go.mod h1:XSY6eDqxVNiYgezAVqqCeihT4j1U2CCsqvH3WhQpnlg= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gomodules.xyz/jsonpatch/v2 v2.5.0 h1:JELs8RLM12qJGXU4u/TO3V25KW8GreMKl9pdkk14RM0= +gomodules.xyz/jsonpatch/v2 v2.5.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= +gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= +gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= +google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217 h1:gRkg/vSppuSQoDjxyiGfN4Upv/h/DQmIR10ZU8dh4Ww= +google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217/go.mod h1:7i2o+ce6H/6BluujYR+kqX3GKH+dChPTQU19wjRPiGk= +google.golang.org/grpc v1.79.3 h1:sybAEdRIEtvcD68Gx7dmnwjZKlyfuc61Dyo9pGXXkKE= +google.golang.org/grpc v1.79.3/go.mod h1:KmT0Kjez+0dde/v2j9vzwoAScgEPx/Bw1CYChhHLrHQ= +google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= +google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/evanphx/json-patch.v4 v4.13.0 h1:czT3CmqEaQ1aanPc5SdlgQrrEIb8w/wwCvWWnfEbYzo= +gopkg.in/evanphx/json-patch.v4 v4.13.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= +gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +k8s.io/api v0.35.0 h1:iBAU5LTyBI9vw3L5glmat1njFK34srdLmktWwLTprlY= +k8s.io/api v0.35.0/go.mod h1:AQ0SNTzm4ZAczM03QH42c7l3bih1TbAXYo0DkF8ktnA= +k8s.io/apiextensions-apiserver v0.35.0 h1:3xHk2rTOdWXXJM+RDQZJvdx0yEOgC0FgQ1PlJatA5T4= +k8s.io/apiextensions-apiserver v0.35.0/go.mod h1:E1Ahk9SADaLQ4qtzYFkwUqusXTcaV2uw3l14aqpL2LU= +k8s.io/apimachinery v0.35.0 h1:Z2L3IHvPVv/MJ7xRxHEtk6GoJElaAqDCCU0S6ncYok8= +k8s.io/apimachinery v0.35.0/go.mod h1:jQCgFZFR1F4Ik7hvr2g84RTJSZegBc8yHgFWKn//hns= +k8s.io/client-go v0.35.0 h1:IAW0ifFbfQQwQmga0UdoH0yvdqrbwMdq9vIFEhRpxBE= +k8s.io/client-go v0.35.0/go.mod h1:q2E5AAyqcbeLGPdoRB+Nxe3KYTfPce1Dnu1myQdqz9o= +k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= +k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= +k8s.io/kube-openapi v0.0.0-20251125145642-4e65d59e963e h1:iW9ChlU0cU16w8MpVYjXk12dqQ4BPFBEgif+ap7/hqQ= +k8s.io/kube-openapi v0.0.0-20251125145642-4e65d59e963e/go.mod h1:kdmbQkyfwUagLfXIad1y2TdrjPFWp2Q89B3qkRwf/pQ= +k8s.io/utils v0.0.0-20260210185600-b8788abfbbc2 h1:AZYQSJemyQB5eRxqcPky+/7EdBj0xi3g0ZcxxJ7vbWU= +k8s.io/utils v0.0.0-20260210185600-b8788abfbbc2/go.mod h1:xDxuJ0whA3d0I4mf/C4ppKHxXynQ+fxnkmQH0vTHnuk= +sigs.k8s.io/controller-runtime v0.22.4 h1:GEjV7KV3TY8e+tJ2LCTxUTanW4z/FmNB7l327UfMq9A= +sigs.k8s.io/controller-runtime v0.22.4/go.mod h1:+QX1XUpTXN4mLoblf4tqr5CQcyHPAki2HLXqQMY6vh8= +sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 h1:IpInykpT6ceI+QxKBbEflcR5EXP7sU1kvOlxwZh5txg= +sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= +sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= +sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= +sigs.k8s.io/structured-merge-diff/v6 v6.3.2-0.20260122202528-d9cc6641c482 h1:2WOzJpHUBVrrkDjU4KBT8n5LDcj824eX0I5UKcgeRUs= +sigs.k8s.io/structured-merge-diff/v6 v6.3.2-0.20260122202528-d9cc6641c482/go.mod h1:M3W8sfWvn2HhQDIbGWj3S099YozAsymCo/wrT5ohRUE= +sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs= +sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4= diff --git a/test/e2e/labels.go b/test/e2e/labels.go new file mode 100644 index 00000000..8ac02cba --- /dev/null +++ b/test/e2e/labels.go @@ -0,0 +1,59 @@ +// Package e2e contains the DocumentDB Kubernetes Operator end-to-end test +// suite. See docs/designs/e2e-test-suite.md for the full design. +package e2e + +import "github.com/onsi/ginkgo/v2" + +// Ginkgo label constants used to select subsets of the DocumentDB E2E test +// suite at invocation time. Each area suite in tests// applies its +// matching area label to every spec it runs; cross-cutting labels +// (Smoke/Basic/Destructive/Disruptive/Slow and the NeedsXxx capability +// labels) are applied by individual specs. +// +// Keep these in sync with the design document. +const ( + // Area labels — one per test area (tests//). + LifecycleLabel = "lifecycle" + ScaleLabel = "scale" + DataLabel = "data" + PerformanceLabel = "performance" + BackupLabel = "backup" + RecoveryLabel = "recovery" + TLSLabel = "tls" + FeatureLabel = "feature-gates" + ExposureLabel = "exposure" + StatusLabel = "status" + UpgradeLabel = "upgrade" + + // Cross-cutting selectors. + SmokeLabel = "smoke" + BasicLabel = "basic" + DestructiveLabel = "destructive" + DisruptiveLabel = "disruptive" + SlowLabel = "slow" + + // Capability labels — environments that don't provide a prerequisite + // can filter these specs out. + NeedsCertManagerLabel = "needs-cert-manager" + NeedsMetalLBLabel = "needs-metallb" + NeedsCSISnapshotsLabel = "needs-csi-snapshots" + // NeedsCSIResizeLabel marks specs that require the cluster's + // StorageClass to support online PVC expansion (allowVolumeExpansion=true + // plus a resize-capable CSI driver). Environments that lack this + // capability should filter with `--label-filter='!needs-csi-resize'`. + NeedsCSIResizeLabel = "needs-csi-resize" +) + +// Level labels expose the depth tier of a spec to Ginkgo's label filter. +// Phase 2 specs should attach exactly one of these alongside the area +// label so invocations can select, e.g., all "level:low" specs with +// `--label-filter=level:low`. These labels are informational — the +// authoritative gate remains [SkipUnlessLevel], which reads TEST_DEPTH +// at runtime. +var ( + LowLevelLabel = ginkgo.Label("level:low") + MediumLevelLabel = ginkgo.Label("level:medium") + HighLevelLabel = ginkgo.Label("level:high") + HighestLevelLabel = ginkgo.Label("level:highest") + LowestLevelLabel = ginkgo.Label("level:lowest") +) diff --git a/test/e2e/levels.go b/test/e2e/levels.go new file mode 100644 index 00000000..6a1c4664 --- /dev/null +++ b/test/e2e/levels.go @@ -0,0 +1,109 @@ +package e2e + +import ( + "fmt" + "os" + "strconv" + + "github.com/onsi/ginkgo/v2" +) + +// Level represents a depth/intensity tier for a test. Specs can gate +// themselves on the currently configured level so that short CI runs +// execute only the most important specs while nightly/manual runs +// expand coverage. +// +// NOTE: CNPG does not currently expose a `tests/utils/levels` package +// in v1.28.1 (verified with `go doc`). If upstream adds one later, +// replace this file with a thin re-export. +type Level int + +const ( + // Highest runs only the most critical specs (fast smoke). + Highest Level = iota + // High adds the core area-suite coverage. + High + // Medium adds broader coverage for the area. This is the default + // per docs/designs/e2e-test-suite.md. + Medium + // Low adds long-running or edge-case scenarios. + Low + // Lowest runs everything, including slow/destructive corners. + Lowest +) + +// testDepthEnv is the environment variable consulted by CurrentLevel. +// Values are integers 0–4 mapping to Highest…Lowest. Invalid or unset +// values fall back to defaultLevel (Medium). +const testDepthEnv = "TEST_DEPTH" + +// defaultLevel is the depth applied when TEST_DEPTH is unset or +// invalid. Chosen to match the design document. +const defaultLevel = Medium + +// CurrentLevel reads TEST_DEPTH from the environment and returns the +// corresponding Level. Defaults to Medium when unset or invalid. +func CurrentLevel() Level { + raw, ok := os.LookupEnv(testDepthEnv) + if !ok { + return defaultLevel + } + v, err := strconv.Atoi(raw) + if err != nil { + return defaultLevel + } + switch Level(v) { + case Highest, High, Medium, Low, Lowest: + return Level(v) + default: + return defaultLevel + } +} + +// ShouldRun reports whether a spec declared at `required` should run +// given the currently configured level. A spec runs when the configured +// level is at least as deep as the spec's required level. +// +// Deprecated: Phase 2 specs should use [SkipUnlessLevel] instead — +// it is the single, uniform gate documented for area authors and it +// integrates with Ginkgo's reporting by invoking Skip rather than +// silently returning a bool. +func ShouldRun(required Level) bool { + return CurrentLevel() >= required +} + +// SkipUnlessLevel calls Ginkgo's Skip when the current depth level is +// shallower than min. Typical use from an `It`/`DescribeTable`: +// +// It("exercises the pool under sustained load", Label(e2e.SlowLabel), func() { +// e2e.SkipUnlessLevel(e2e.Low) +// ... +// }) +// +// SkipUnlessLevel is the only level-gating pattern Phase 2 test writers +// should use; prefer it over raw calls to [ShouldRun]. +func SkipUnlessLevel(min Level) { + if CurrentLevel() < min { + ginkgo.Skip(fmt.Sprintf("TEST_DEPTH=%d (%s) is shallower than required %s", + CurrentLevel(), levelName(CurrentLevel()), levelName(min))) + } +} + +// levelName returns a human-readable name for a Level for use in skip +// messages. +func levelName(l Level) string { + switch l { + case Highest: + return "Highest" + case High: + return "High" + case Medium: + return "Medium" + case Low: + return "Low" + case Lowest: + return "Lowest" + default: + return fmt.Sprintf("Level(%d)", int(l)) + } +} diff --git a/test/e2e/levels_test.go b/test/e2e/levels_test.go new file mode 100644 index 00000000..128fbe4f --- /dev/null +++ b/test/e2e/levels_test.go @@ -0,0 +1,88 @@ +package e2e + +import ( + "os" + "testing" +) + +func TestCurrentLevelDefault(t *testing.T) { + // t.Setenv with empty value still sets the variable; explicitly + // unset to exercise the "unset" branch. + orig, had := os.LookupEnv(testDepthEnv) + _ = os.Unsetenv(testDepthEnv) + t.Cleanup(func() { + if had { + _ = os.Setenv(testDepthEnv, orig) + } + }) + if got := CurrentLevel(); got != Medium { + t.Fatalf("default CurrentLevel = %v, want Medium", got) + } +} + +func TestCurrentLevelInvalidFallsBack(t *testing.T) { + t.Setenv(testDepthEnv, "not-an-int") + if got := CurrentLevel(); got != Medium { + t.Fatalf("invalid TEST_DEPTH CurrentLevel = %v, want Medium", got) + } + t.Setenv(testDepthEnv, "99") + if got := CurrentLevel(); got != Medium { + t.Fatalf("out-of-range TEST_DEPTH CurrentLevel = %v, want Medium", got) + } +} + +func TestCurrentLevelParses(t *testing.T) { + cases := []struct { + raw string + want Level + }{ + {"0", Highest}, + {"1", High}, + {"2", Medium}, + {"3", Low}, + {"4", Lowest}, + } + for _, c := range cases { + t.Setenv(testDepthEnv, c.raw) + if got := CurrentLevel(); got != c.want { + t.Errorf("CurrentLevel(%s) = %v, want %v", c.raw, got, c.want) + } + } +} + +func TestShouldRunRespectsOrdering(t *testing.T) { + t.Setenv(testDepthEnv, "2") // Medium + // Specs at Highest/High/Medium must run; Low/Lowest must not. + for _, required := range []Level{Highest, High, Medium} { + if !ShouldRun(required) { + t.Errorf("at Medium, ShouldRun(%v) = false; want true", required) + } + } + for _, required := range []Level{Low, Lowest} { + if ShouldRun(required) { + t.Errorf("at Medium, ShouldRun(%v) = true; want false", required) + } + } +} + +func TestLevelName(t *testing.T) { + for _, c := range []struct { + l Level + want string + }{ + {Highest, "Highest"}, + {High, "High"}, + {Medium, "Medium"}, + {Low, "Low"}, + {Lowest, "Lowest"}, + } { + if got := levelName(c.l); got != c.want { + t.Errorf("levelName(%v) = %q, want %q", c.l, got, c.want) + } + } + if got := levelName(Level(42)); got == "" { + t.Error("levelName for unknown should not be empty") + } +} + +// (helpers removed — tests use os.Setenv/Unsetenv directly.) diff --git a/test/e2e/manifests/base/.keep b/test/e2e/manifests/base/.keep new file mode 100644 index 00000000..e69de29b diff --git a/test/e2e/manifests/base/documentdb.yaml.template b/test/e2e/manifests/base/documentdb.yaml.template new file mode 100644 index 00000000..ed0bc512 --- /dev/null +++ b/test/e2e/manifests/base/documentdb.yaml.template @@ -0,0 +1,18 @@ +apiVersion: documentdb.io/preview +kind: DocumentDB +metadata: + name: ${NAME} + namespace: ${NAMESPACE} +spec: + nodeCount: 1 + instancesPerNode: ${INSTANCES} + documentDBImage: ${DOCUMENTDB_IMAGE} + gatewayImage: ${GATEWAY_IMAGE} + documentDbCredentialSecret: ${CREDENTIAL_SECRET} + resource: + storage: + pvcSize: ${STORAGE_SIZE} + storageClass: ${STORAGE_CLASS} + exposeViaService: + serviceType: ${EXPOSURE_TYPE} + logLevel: ${LOG_LEVEL} diff --git a/test/e2e/manifests/embed.go b/test/e2e/manifests/embed.go new file mode 100644 index 00000000..ce6cff1b --- /dev/null +++ b/test/e2e/manifests/embed.go @@ -0,0 +1,15 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +// Package manifests embeds the shared DocumentDB CR templates used by +// the E2E suite. Exposing them as an embed.FS makes template rendering +// independent of the current working directory, so every per-area +// ginkgo binary can locate them without runtime.Caller tricks. +package manifests + +import "embed" + +// FS holds the base/, mixins/, and backup/ template trees. +// +//go:embed base/*.yaml.template mixins/*.yaml.template backup/*.yaml.template +var FS embed.FS diff --git a/test/e2e/manifests/mixins/.keep b/test/e2e/manifests/mixins/.keep new file mode 100644 index 00000000..e69de29b diff --git a/test/e2e/manifests/mixins/exposure_clusterip.yaml.template b/test/e2e/manifests/mixins/exposure_clusterip.yaml.template new file mode 100644 index 00000000..6b89c8d6 --- /dev/null +++ b/test/e2e/manifests/mixins/exposure_clusterip.yaml.template @@ -0,0 +1,8 @@ +apiVersion: documentdb.io/preview +kind: DocumentDB +metadata: + name: ${NAME} + namespace: ${NAMESPACE} +spec: + exposeViaService: + serviceType: ClusterIP diff --git a/test/e2e/manifests/mixins/exposure_loadbalancer.yaml.template b/test/e2e/manifests/mixins/exposure_loadbalancer.yaml.template new file mode 100644 index 00000000..d2eea518 --- /dev/null +++ b/test/e2e/manifests/mixins/exposure_loadbalancer.yaml.template @@ -0,0 +1,8 @@ +apiVersion: documentdb.io/preview +kind: DocumentDB +metadata: + name: ${NAME} + namespace: ${NAMESPACE} +spec: + exposeViaService: + serviceType: LoadBalancer diff --git a/test/e2e/manifests/mixins/feature_changestreams.yaml.template b/test/e2e/manifests/mixins/feature_changestreams.yaml.template new file mode 100644 index 00000000..d23de6d1 --- /dev/null +++ b/test/e2e/manifests/mixins/feature_changestreams.yaml.template @@ -0,0 +1,8 @@ +apiVersion: documentdb.io/preview +kind: DocumentDB +metadata: + name: ${NAME} + namespace: ${NAMESPACE} +spec: + featureGates: + ChangeStreams: true diff --git a/test/e2e/manifests/mixins/reclaim_retain.yaml.template b/test/e2e/manifests/mixins/reclaim_retain.yaml.template new file mode 100644 index 00000000..d4542efe --- /dev/null +++ b/test/e2e/manifests/mixins/reclaim_retain.yaml.template @@ -0,0 +1,11 @@ +apiVersion: documentdb.io/preview +kind: DocumentDB +metadata: + name: ${NAME} + namespace: ${NAMESPACE} +spec: + resource: + storage: + pvcSize: ${STORAGE_SIZE} + storageClass: ${STORAGE_CLASS} + persistentVolumeReclaimPolicy: Retain diff --git a/test/e2e/manifests/mixins/storage_custom.yaml.template b/test/e2e/manifests/mixins/storage_custom.yaml.template new file mode 100644 index 00000000..70eee4c9 --- /dev/null +++ b/test/e2e/manifests/mixins/storage_custom.yaml.template @@ -0,0 +1,10 @@ +apiVersion: documentdb.io/preview +kind: DocumentDB +metadata: + name: ${NAME} + namespace: ${NAMESPACE} +spec: + resource: + storage: + pvcSize: ${STORAGE_SIZE} + storageClass: ${STORAGE_CLASS} diff --git a/test/e2e/manifests/mixins/tls_certmanager.yaml.template b/test/e2e/manifests/mixins/tls_certmanager.yaml.template new file mode 100644 index 00000000..ce9cb050 --- /dev/null +++ b/test/e2e/manifests/mixins/tls_certmanager.yaml.template @@ -0,0 +1,14 @@ +apiVersion: documentdb.io/preview +kind: DocumentDB +metadata: + name: ${NAME} + namespace: ${NAMESPACE} +spec: + tls: + gateway: + mode: CertManager + certManager: + issuerRef: + name: ${ISSUER_NAME} + kind: ${ISSUER_KIND} + group: cert-manager.io diff --git a/test/e2e/manifests/mixins/tls_disabled.yaml.template b/test/e2e/manifests/mixins/tls_disabled.yaml.template new file mode 100644 index 00000000..4697ae61 --- /dev/null +++ b/test/e2e/manifests/mixins/tls_disabled.yaml.template @@ -0,0 +1,9 @@ +apiVersion: documentdb.io/preview +kind: DocumentDB +metadata: + name: ${NAME} + namespace: ${NAMESPACE} +spec: + tls: + gateway: + mode: Disabled diff --git a/test/e2e/manifests/mixins/tls_provided.yaml.template b/test/e2e/manifests/mixins/tls_provided.yaml.template new file mode 100644 index 00000000..3b99c327 --- /dev/null +++ b/test/e2e/manifests/mixins/tls_provided.yaml.template @@ -0,0 +1,11 @@ +apiVersion: documentdb.io/preview +kind: DocumentDB +metadata: + name: ${NAME} + namespace: ${NAMESPACE} +spec: + tls: + gateway: + mode: Provided + provided: + secretName: ${TLS_SECRET_NAME} diff --git a/test/e2e/manifests/mixins/tls_selfsigned.yaml.template b/test/e2e/manifests/mixins/tls_selfsigned.yaml.template new file mode 100644 index 00000000..93a05b2b --- /dev/null +++ b/test/e2e/manifests/mixins/tls_selfsigned.yaml.template @@ -0,0 +1,9 @@ +apiVersion: documentdb.io/preview +kind: DocumentDB +metadata: + name: ${NAME} + namespace: ${NAMESPACE} +spec: + tls: + gateway: + mode: SelfSigned diff --git a/test/e2e/pkg/e2eutils/assertions/.keep b/test/e2e/pkg/e2eutils/assertions/.keep new file mode 100644 index 00000000..e69de29b diff --git a/test/e2e/pkg/e2eutils/assertions/assertions.go b/test/e2e/pkg/e2eutils/assertions/assertions.go new file mode 100644 index 00000000..5fd42f40 --- /dev/null +++ b/test/e2e/pkg/e2eutils/assertions/assertions.go @@ -0,0 +1,197 @@ +// Package assertions returns checker closures for use with Gomega's +// Eventually / Consistently. Each helper yields a `func() error` so it +// can be awaited with `Eventually(fn, timeout, poll).Should(Succeed())` +// without this package pulling in ginkgo or gomega itself. +package assertions + +import ( + "context" + "fmt" + "regexp" + + cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" + cnpgclusterutils "github.com/cloudnative-pg/cloudnative-pg/tests/utils/clusterutils" + preview "github.com/documentdb/documentdb-operator/api/preview" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/labels" + "sigs.k8s.io/controller-runtime/pkg/client" + + documentdbutil "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/documentdb" +) + +// runningStatus aliases the canonical ReadyStatus constant exported by +// the documentdb helper package so all sibling helpers share a single +// source of truth for the "DocumentDB is healthy" sentinel. +const runningStatus = documentdbutil.ReadyStatus + +// clusterNameFor returns the CNPG Cluster name that backs the given +// DocumentDB. For single-cluster (non-replicated) deployments this +// matches the DocumentDB name; replicated clusters use +// `-` but are out of scope here (see AssertPrimary* +// variants that accept an explicit cluster name). +func clusterNameFor(dd *preview.DocumentDB) string { + return dd.Name +} + +// getDocumentDB is a small helper shared by assertions that need to +// read a DocumentDB by key. +func getDocumentDB(ctx context.Context, c client.Client, key client.ObjectKey) (*preview.DocumentDB, error) { + dd := &preview.DocumentDB{} + if err := c.Get(ctx, key, dd); err != nil { + return nil, fmt.Errorf("get DocumentDB %s: %w", key, err) + } + return dd, nil +} + +// AssertDocumentDBReady returns a checker that succeeds when the +// DocumentDB identified by key reports Status.Status == runningStatus. +// Any other value (including "" for a freshly-created object) yields +// a non-nil error so Eventually will keep polling. +func AssertDocumentDBReady(ctx context.Context, c client.Client, key client.ObjectKey) func() error { + return func() error { + dd, err := getDocumentDB(ctx, c, key) + if err != nil { + return err + } + if dd.Status.Status != runningStatus { + return fmt.Errorf("DocumentDB %s status=%q, want %q", + key, dd.Status.Status, runningStatus) + } + return nil + } +} + +// AssertInstanceCount returns a checker that succeeds when the CNPG +// Cluster backing the DocumentDB reports Status.ReadyInstances == want. +// This is the canonical signal for "scale operation completed": the +// DocumentDB spec alone does not expose a live instance count. +func AssertInstanceCount(ctx context.Context, c client.Client, key client.ObjectKey, want int) func() error { + return func() error { + dd, err := getDocumentDB(ctx, c, key) + if err != nil { + return err + } + cluster := &cnpgv1.Cluster{} + ck := client.ObjectKey{Namespace: key.Namespace, Name: clusterNameFor(dd)} + if err := c.Get(ctx, ck, cluster); err != nil { + return fmt.Errorf("get CNPG Cluster %s: %w", ck, err) + } + if cluster.Status.ReadyInstances != want { + return fmt.Errorf("CNPG Cluster %s readyInstances=%d, want %d", + ck, cluster.Status.ReadyInstances, want) + } + return nil + } +} + +// AssertPrimaryUnchanged returns a checker that succeeds when the +// CNPG primary pod name still matches initialPrimary. It is intended +// for Consistently() checks during operations that must not trigger a +// failover (e.g. PVC resize). +func AssertPrimaryUnchanged(ctx context.Context, c client.Client, key client.ObjectKey, initialPrimary string) func() error { + return func() error { + dd, err := getDocumentDB(ctx, c, key) + if err != nil { + return err + } + pod, err := cnpgclusterutils.GetPrimary(ctx, c, key.Namespace, clusterNameFor(dd)) + if err != nil { + return fmt.Errorf("get primary for %s: %w", key, err) + } + if pod == nil || pod.Name == "" { + return fmt.Errorf("no primary pod found for %s", key) + } + if pod.Name != initialPrimary { + return fmt.Errorf("primary changed: want %s, got %s", initialPrimary, pod.Name) + } + return nil + } +} + +// AssertPVCCount returns a checker that succeeds when the count of +// PersistentVolumeClaims in ns matching labelSelector equals want. +// labelSelector follows the standard Kubernetes selector syntax and +// must parse cleanly or the checker returns an error on every call. +func AssertPVCCount(ctx context.Context, c client.Client, ns, labelSelector string, want int) func() error { + sel, selErr := labels.Parse(labelSelector) + return func() error { + if selErr != nil { + return fmt.Errorf("parse selector %q: %w", labelSelector, selErr) + } + pvcs := &corev1.PersistentVolumeClaimList{} + if err := c.List(ctx, pvcs, client.InNamespace(ns), client.MatchingLabelsSelector{Selector: sel}); err != nil { + return fmt.Errorf("list PVCs in %s: %w", ns, err) + } + if got := len(pvcs.Items); got != want { + return fmt.Errorf("PVC count in %s (%s): got %d, want %d", + ns, labelSelector, got, want) + } + return nil + } +} + +// AssertTLSSecretReady returns a checker that succeeds when the named +// secret exists in ns and contains non-empty tls.crt and tls.key +// entries (the canonical keys for a kubernetes.io/tls Secret). +func AssertTLSSecretReady(ctx context.Context, c client.Client, ns, secretName string) func() error { + return func() error { + s := &corev1.Secret{} + key := client.ObjectKey{Namespace: ns, Name: secretName} + if err := c.Get(ctx, key, s); err != nil { + if apierrors.IsNotFound(err) { + return fmt.Errorf("TLS secret %s not found", key) + } + return fmt.Errorf("get TLS secret %s: %w", key, err) + } + if len(s.Data[corev1.TLSCertKey]) == 0 { + return fmt.Errorf("TLS secret %s missing %s", key, corev1.TLSCertKey) + } + if len(s.Data[corev1.TLSPrivateKeyKey]) == 0 { + return fmt.Errorf("TLS secret %s missing %s", key, corev1.TLSPrivateKeyKey) + } + return nil + } +} + +// AssertServiceType returns a checker that succeeds when the named +// Service exists in ns and its spec.type equals want. +func AssertServiceType(ctx context.Context, c client.Client, ns, svcName string, want corev1.ServiceType) func() error { + return func() error { + svc := &corev1.Service{} + key := client.ObjectKey{Namespace: ns, Name: svcName} + if err := c.Get(ctx, key, svc); err != nil { + return fmt.Errorf("get Service %s: %w", key, err) + } + if svc.Spec.Type != want { + return fmt.Errorf("Service %s type=%s, want %s", key, svc.Spec.Type, want) + } + return nil + } +} + +// AssertConnectionStringMatches returns a checker that succeeds when +// the DocumentDB's Status.ConnectionString is non-empty and matches +// the supplied regular expression. Regex compilation errors surface on +// every invocation so bad test input fails fast in Eventually. +func AssertConnectionStringMatches(ctx context.Context, c client.Client, key client.ObjectKey, regex string) func() error { + re, reErr := regexp.Compile(regex) + return func() error { + if reErr != nil { + return fmt.Errorf("compile regex %q: %w", regex, reErr) + } + dd, err := getDocumentDB(ctx, c, key) + if err != nil { + return err + } + cs := dd.Status.ConnectionString + if cs == "" { + return fmt.Errorf("DocumentDB %s has empty connectionString", key) + } + if !re.MatchString(cs) { + return fmt.Errorf("DocumentDB %s connectionString %q does not match %q", + key, cs, regex) + } + return nil + } +} diff --git a/test/e2e/pkg/e2eutils/assertions/assertions_test.go b/test/e2e/pkg/e2eutils/assertions/assertions_test.go new file mode 100644 index 00000000..017bbd69 --- /dev/null +++ b/test/e2e/pkg/e2eutils/assertions/assertions_test.go @@ -0,0 +1,166 @@ +package assertions + +import ( + "context" + "strings" + "testing" + + cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" + preview "github.com/documentdb/documentdb-operator/api/preview" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +func newScheme(t *testing.T) *runtime.Scheme { + t.Helper() + s := runtime.NewScheme() + if err := corev1.AddToScheme(s); err != nil { + t.Fatalf("corev1.AddToScheme: %v", err) + } + if err := preview.AddToScheme(s); err != nil { + t.Fatalf("preview.AddToScheme: %v", err) + } + if err := cnpgv1.AddToScheme(s); err != nil { + t.Fatalf("cnpgv1.AddToScheme: %v", err) + } + return s +} + +func TestAssertDocumentDBReady(t *testing.T) { + t.Parallel() + s := newScheme(t) + dd := &preview.DocumentDB{ + ObjectMeta: metav1.ObjectMeta{Name: "db1", Namespace: "ns"}, + Status: preview.DocumentDBStatus{Status: "Cluster in healthy state"}, + } + notReady := &preview.DocumentDB{ + ObjectMeta: metav1.ObjectMeta{Name: "db2", Namespace: "ns"}, + Status: preview.DocumentDBStatus{Status: "Setting up primary"}, + } + c := fake.NewClientBuilder().WithScheme(s).WithObjects(dd, notReady).Build() + + if err := AssertDocumentDBReady(context.Background(), c, client.ObjectKey{Namespace: "ns", Name: "db1"})(); err != nil { + t.Fatalf("expected ready, got err=%v", err) + } + if err := AssertDocumentDBReady(context.Background(), c, client.ObjectKey{Namespace: "ns", Name: "db2"})(); err == nil { + t.Fatalf("expected not-ready error") + } + if err := AssertDocumentDBReady(context.Background(), c, client.ObjectKey{Namespace: "ns", Name: "missing"})(); err == nil { + t.Fatalf("expected error for missing object") + } +} + +func TestAssertInstanceCount(t *testing.T) { + t.Parallel() + s := newScheme(t) + dd := &preview.DocumentDB{ObjectMeta: metav1.ObjectMeta{Name: "db", Namespace: "ns"}} + cluster := &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{Name: "db", Namespace: "ns"}, + Status: cnpgv1.ClusterStatus{ReadyInstances: 3}, + } + c := fake.NewClientBuilder().WithScheme(s).WithObjects(dd, cluster).Build() + key := client.ObjectKey{Namespace: "ns", Name: "db"} + + if err := AssertInstanceCount(context.Background(), c, key, 3)(); err != nil { + t.Fatalf("want ok, got %v", err) + } + if err := AssertInstanceCount(context.Background(), c, key, 2)(); err == nil { + t.Fatalf("want mismatch error") + } +} + +func TestAssertPVCCount(t *testing.T) { + t.Parallel() + s := newScheme(t) + pvcs := []client.Object{ + &corev1.PersistentVolumeClaim{ObjectMeta: metav1.ObjectMeta{ + Name: "p1", Namespace: "ns", Labels: map[string]string{"app": "dd"}}}, + &corev1.PersistentVolumeClaim{ObjectMeta: metav1.ObjectMeta{ + Name: "p2", Namespace: "ns", Labels: map[string]string{"app": "dd"}}}, + &corev1.PersistentVolumeClaim{ObjectMeta: metav1.ObjectMeta{ + Name: "p3", Namespace: "ns", Labels: map[string]string{"app": "other"}}}, + } + c := fake.NewClientBuilder().WithScheme(s).WithObjects(pvcs...).Build() + + if err := AssertPVCCount(context.Background(), c, "ns", "app=dd", 2)(); err != nil { + t.Fatalf("want ok, got %v", err) + } + if err := AssertPVCCount(context.Background(), c, "ns", "app=dd", 3)(); err == nil { + t.Fatalf("want mismatch error") + } + // Malformed selector surfaces on every call. + if err := AssertPVCCount(context.Background(), c, "ns", "!!bad!!", 0)(); err == nil { + t.Fatalf("want parse error") + } +} + +func TestAssertTLSSecretReady(t *testing.T) { + t.Parallel() + s := newScheme(t) + good := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "g", Namespace: "ns"}, + Type: corev1.SecretTypeTLS, + Data: map[string][]byte{corev1.TLSCertKey: []byte("c"), corev1.TLSPrivateKeyKey: []byte("k")}, + } + missingKey := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "b", Namespace: "ns"}, + Data: map[string][]byte{corev1.TLSCertKey: []byte("c")}, + } + c := fake.NewClientBuilder().WithScheme(s).WithObjects(good, missingKey).Build() + if err := AssertTLSSecretReady(context.Background(), c, "ns", "g")(); err != nil { + t.Fatalf("good: %v", err) + } + if err := AssertTLSSecretReady(context.Background(), c, "ns", "b")(); err == nil { + t.Fatalf("want error for missing key") + } + err := AssertTLSSecretReady(context.Background(), c, "ns", "none")() + if err == nil || !strings.Contains(err.Error(), "not found") { + t.Fatalf("want not-found error, got %v", err) + } +} + +func TestAssertServiceType(t *testing.T) { + t.Parallel() + s := newScheme(t) + svc := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{Name: "svc", Namespace: "ns"}, + Spec: corev1.ServiceSpec{Type: corev1.ServiceTypeLoadBalancer}, + } + c := fake.NewClientBuilder().WithScheme(s).WithObjects(svc).Build() + if err := AssertServiceType(context.Background(), c, "ns", "svc", corev1.ServiceTypeLoadBalancer)(); err != nil { + t.Fatalf("want ok, got %v", err) + } + if err := AssertServiceType(context.Background(), c, "ns", "svc", corev1.ServiceTypeClusterIP)(); err == nil { + t.Fatalf("want mismatch") + } +} + +func TestAssertConnectionStringMatches(t *testing.T) { + t.Parallel() + s := newScheme(t) + dd := &preview.DocumentDB{ + ObjectMeta: metav1.ObjectMeta{Name: "db", Namespace: "ns"}, + Status: preview.DocumentDBStatus{ConnectionString: "mongodb://user:pw@svc:10260/?tls=true"}, + } + empty := &preview.DocumentDB{ObjectMeta: metav1.ObjectMeta{Name: "empty", Namespace: "ns"}} + c := fake.NewClientBuilder().WithScheme(s).WithObjects(dd, empty).Build() + k := client.ObjectKey{Namespace: "ns", Name: "db"} + + if err := AssertConnectionStringMatches(context.Background(), c, k, `^mongodb://.*tls=true`)(); err != nil { + t.Fatalf("want ok, got %v", err) + } + if err := AssertConnectionStringMatches(context.Background(), c, k, `tls=false`)(); err == nil { + t.Fatalf("want mismatch") + } + if err := AssertConnectionStringMatches(context.Background(), c, + client.ObjectKey{Namespace: "ns", Name: "empty"}, `.*`)(); err == nil { + t.Fatalf("want empty-string error") + } + // Bad regex must surface. + if err := AssertConnectionStringMatches(context.Background(), c, k, `[unclosed`)(); err == nil { + t.Fatalf("want regex compile error") + } +} diff --git a/test/e2e/pkg/e2eutils/clusterprobe/clusterprobe.go b/test/e2e/pkg/e2eutils/clusterprobe/clusterprobe.go new file mode 100644 index 00000000..67275007 --- /dev/null +++ b/test/e2e/pkg/e2eutils/clusterprobe/clusterprobe.go @@ -0,0 +1,173 @@ +// Package clusterprobe supplies runtime capability checks for the +// DocumentDB E2E suite. Ginkgo label selectors (e.g. +// `e2e.NeedsCSISnapshotsLabel`) only gate invocation: when a caller +// forgets `--label-filter='!needs-csi-snapshots'` on a cluster that +// lacks CSI snapshot support, the spec still runs and produces +// confusing failures deep inside the Backup/Restore path. +// +// The probes below give each affected spec a deterministic pre-flight +// check that it can invoke from `BeforeEach` and fall through to a +// clear `Skip(...)` message when the capability is missing. They are +// intentionally framework-agnostic (plain errors, no Ginkgo/Gomega +// imports) so unit tests can exercise them with a controller-runtime +// fake client. +package clusterprobe + +import ( + "context" + "errors" + "fmt" + + snapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" + storagev1 "k8s.io/api/storage/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// DefaultStorageClassAnnotation is the annotation Kubernetes uses to +// flag a StorageClass as the cluster default. Present with value "true" +// (or the legacy beta annotation) on at most one StorageClass per +// cluster. +const DefaultStorageClassAnnotation = "storageclass.kubernetes.io/is-default-class" + +// legacyDefaultStorageClassAnnotation is the pre-GA annotation still +// honoured by some distributions (e.g. older OpenShift releases). +const legacyDefaultStorageClassAnnotation = "storageclass.beta.kubernetes.io/is-default-class" + +// isMissingKindErr folds the two distinct "kind is not available" +// errors a controller-runtime client can return when the underlying +// CRD is absent: the apimachinery no-match error returned by a real +// cluster whose discovery lacks the type, and the runtime +// not-registered error returned by a fake client whose scheme omits +// it. Callers use it to decide "probe says missing" vs. "probe should +// propagate the error". +func isMissingKindErr(err error) bool { + if err == nil { + return false + } + if meta.IsNoMatchError(err) { + return true + } + if runtime.IsNotRegisteredError(err) { + return true + } + return false +} + +// HasVolumeSnapshotCRD returns true when the cluster exposes the +// snapshot.storage.k8s.io/v1 VolumeSnapshot kind (i.e. the external +// snapshotter CRD is installed and its types are reachable through +// the supplied client). Other errors — RBAC denials, transient +// API-server failures — are returned to the caller as-is; the probe +// does not swallow them. +func HasVolumeSnapshotCRD(ctx context.Context, c client.Client) (bool, error) { + if c == nil { + return false, errors.New("clusterprobe.HasVolumeSnapshotCRD: client must not be nil") + } + var list snapshotv1.VolumeSnapshotList + if err := c.List(ctx, &list); err != nil { + if isMissingKindErr(err) { + return false, nil + } + return false, fmt.Errorf("list VolumeSnapshots: %w", err) + } + return true, nil +} + +// HasUsableSnapshotClass returns true when at least one +// VolumeSnapshotClass exists on the cluster. Callers that already +// confirmed the CRD via [HasVolumeSnapshotCRD] may still see this +// probe report false on clusters where the CRD is installed but no +// class is provisioned — a common state on stock kind nodes without +// the csi-hostpath driver add-on. +func HasUsableSnapshotClass(ctx context.Context, c client.Client) (bool, error) { + if c == nil { + return false, errors.New("clusterprobe.HasUsableSnapshotClass: client must not be nil") + } + var list snapshotv1.VolumeSnapshotClassList + if err := c.List(ctx, &list); err != nil { + if isMissingKindErr(err) { + return false, nil + } + return false, fmt.Errorf("list VolumeSnapshotClasses: %w", err) + } + return len(list.Items) > 0, nil +} + +// StorageClassAllowsExpansion returns true when the named StorageClass +// exists and has `allowVolumeExpansion=true`. When name is empty the +// probe looks up the cluster's default StorageClass (annotation +// storageclass.kubernetes.io/is-default-class=true, or its legacy +// beta variant). A nil AllowVolumeExpansion pointer on an otherwise +// valid StorageClass is reported as false — that is the Kubernetes +// API default meaning "expansion not allowed". +// +// Returns (false, nil) if the StorageClass (named or default) is not +// found; the caller typically translates that into a Skip() message. +// Returns (false, err) for any other API error. +func StorageClassAllowsExpansion(ctx context.Context, c client.Client, name string) (bool, error) { + if c == nil { + return false, errors.New("clusterprobe.StorageClassAllowsExpansion: client must not be nil") + } + sc, err := resolveStorageClass(ctx, c, name) + if err != nil { + return false, err + } + if sc == nil { + return false, nil + } + if sc.AllowVolumeExpansion == nil { + return false, nil + } + return *sc.AllowVolumeExpansion, nil +} + +// resolveStorageClass returns the StorageClass named by name, or when +// name is empty the cluster default. A missing StorageClass returns +// (nil, nil) so the caller can report it as an absent capability. +func resolveStorageClass(ctx context.Context, c client.Client, name string) (*storagev1.StorageClass, error) { + if name != "" { + sc := &storagev1.StorageClass{} + err := c.Get(ctx, client.ObjectKey{Name: name}, sc) + if apierrors.IsNotFound(err) { + return nil, nil + } + if isMissingKindErr(err) { + return nil, nil + } + if err != nil { + return nil, fmt.Errorf("get StorageClass %s: %w", name, err) + } + return sc, nil + } + var list storagev1.StorageClassList + if err := c.List(ctx, &list); err != nil { + if isMissingKindErr(err) { + return nil, nil + } + return nil, fmt.Errorf("list StorageClasses: %w", err) + } + for i := range list.Items { + sc := &list.Items[i] + if isDefaultStorageClass(sc) { + return sc, nil + } + } + return nil, nil +} + +// isDefaultStorageClass honours both the GA and legacy beta +// "is-default-class" annotations. +func isDefaultStorageClass(sc *storagev1.StorageClass) bool { + if sc == nil { + return false + } + for _, key := range []string{DefaultStorageClassAnnotation, legacyDefaultStorageClassAnnotation} { + if v, ok := sc.Annotations[key]; ok && v == "true" { + return true + } + } + return false +} diff --git a/test/e2e/pkg/e2eutils/clusterprobe/clusterprobe_test.go b/test/e2e/pkg/e2eutils/clusterprobe/clusterprobe_test.go new file mode 100644 index 00000000..24251f13 --- /dev/null +++ b/test/e2e/pkg/e2eutils/clusterprobe/clusterprobe_test.go @@ -0,0 +1,217 @@ +package clusterprobe + +import ( + "context" + "errors" + "testing" + + snapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" + storagev1 "k8s.io/api/storage/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +func schemeWithSnapshots(t *testing.T) *runtime.Scheme { + t.Helper() + s := runtime.NewScheme() + if err := clientgoscheme.AddToScheme(s); err != nil { + t.Fatalf("add clientgo scheme: %v", err) + } + if err := snapshotv1.AddToScheme(s); err != nil { + t.Fatalf("add snapshotv1 scheme: %v", err) + } + return s +} + +func schemeWithoutSnapshots(t *testing.T) *runtime.Scheme { + t.Helper() + s := runtime.NewScheme() + if err := clientgoscheme.AddToScheme(s); err != nil { + t.Fatalf("add clientgo scheme: %v", err) + } + return s +} + +func TestHasVolumeSnapshotCRD(t *testing.T) { + t.Run("scheme lacks VolumeSnapshot returns false", func(t *testing.T) { + c := fake.NewClientBuilder().WithScheme(schemeWithoutSnapshots(t)).Build() + ok, err := HasVolumeSnapshotCRD(context.Background(), c) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + if ok { + t.Fatalf("want false, got true") + } + }) + t.Run("scheme has VolumeSnapshot returns true", func(t *testing.T) { + c := fake.NewClientBuilder().WithScheme(schemeWithSnapshots(t)).Build() + ok, err := HasVolumeSnapshotCRD(context.Background(), c) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + if !ok { + t.Fatalf("want true, got false") + } + }) + t.Run("nil client is an error", func(t *testing.T) { + _, err := HasVolumeSnapshotCRD(context.Background(), nil) + if err == nil { + t.Fatalf("want error, got nil") + } + }) +} + +func TestHasUsableSnapshotClass(t *testing.T) { + t.Run("CRD missing returns false", func(t *testing.T) { + c := fake.NewClientBuilder().WithScheme(schemeWithoutSnapshots(t)).Build() + ok, err := HasUsableSnapshotClass(context.Background(), c) + if err != nil || ok { + t.Fatalf("want (false, nil), got (%v, %v)", ok, err) + } + }) + t.Run("no classes returns false", func(t *testing.T) { + c := fake.NewClientBuilder().WithScheme(schemeWithSnapshots(t)).Build() + ok, err := HasUsableSnapshotClass(context.Background(), c) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + if ok { + t.Fatalf("want false, got true") + } + }) + t.Run("at least one class returns true", func(t *testing.T) { + vsc := &snapshotv1.VolumeSnapshotClass{ + ObjectMeta: metav1.ObjectMeta{Name: "csi-hostpath-snapclass"}, + Driver: "hostpath.csi.k8s.io", + } + c := fake.NewClientBuilder(). + WithScheme(schemeWithSnapshots(t)). + WithObjects(vsc). + Build() + ok, err := HasUsableSnapshotClass(context.Background(), c) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + if !ok { + t.Fatalf("want true, got false") + } + }) +} + +func boolPtr(b bool) *bool { return &b } + +func TestStorageClassAllowsExpansion(t *testing.T) { + mk := func(name string, allow *bool, annotations map[string]string) *storagev1.StorageClass { + return &storagev1.StorageClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Annotations: annotations, + }, + Provisioner: "kubernetes.io/host-path", + AllowVolumeExpansion: allow, + } + } + + t.Run("named class with expansion true", func(t *testing.T) { + c := fake.NewClientBuilder(). + WithScheme(schemeWithSnapshots(t)). + WithObjects(mk("csi-hostpath-sc", boolPtr(true), nil)). + Build() + ok, err := StorageClassAllowsExpansion(context.Background(), c, "csi-hostpath-sc") + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + if !ok { + t.Fatalf("want true, got false") + } + }) + t.Run("named class with expansion nil (default false)", func(t *testing.T) { + c := fake.NewClientBuilder(). + WithScheme(schemeWithSnapshots(t)). + WithObjects(mk("standard", nil, nil)). + Build() + ok, err := StorageClassAllowsExpansion(context.Background(), c, "standard") + if err != nil || ok { + t.Fatalf("want (false, nil), got (%v, %v)", ok, err) + } + }) + t.Run("named class with expansion false", func(t *testing.T) { + c := fake.NewClientBuilder(). + WithScheme(schemeWithSnapshots(t)). + WithObjects(mk("standard", boolPtr(false), nil)). + Build() + ok, _ := StorageClassAllowsExpansion(context.Background(), c, "standard") + if ok { + t.Fatalf("want false, got true") + } + }) + t.Run("missing named class returns false nil", func(t *testing.T) { + c := fake.NewClientBuilder().WithScheme(schemeWithSnapshots(t)).Build() + ok, err := StorageClassAllowsExpansion(context.Background(), c, "does-not-exist") + if err != nil || ok { + t.Fatalf("want (false, nil), got (%v, %v)", ok, err) + } + }) + t.Run("empty name resolves default via GA annotation", func(t *testing.T) { + c := fake.NewClientBuilder(). + WithScheme(schemeWithSnapshots(t)). + WithObjects( + mk("other", boolPtr(false), nil), + mk("standard", boolPtr(true), map[string]string{ + DefaultStorageClassAnnotation: "true", + }), + ). + Build() + ok, err := StorageClassAllowsExpansion(context.Background(), c, "") + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + if !ok { + t.Fatalf("want true for default class, got false") + } + }) + t.Run("empty name honours legacy beta default annotation", func(t *testing.T) { + c := fake.NewClientBuilder(). + WithScheme(schemeWithSnapshots(t)). + WithObjects( + mk("legacy", boolPtr(true), map[string]string{ + "storageclass.beta.kubernetes.io/is-default-class": "true", + }), + ). + Build() + ok, err := StorageClassAllowsExpansion(context.Background(), c, "") + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + if !ok { + t.Fatalf("want true for legacy default, got false") + } + }) + t.Run("empty name with no default class returns false", func(t *testing.T) { + c := fake.NewClientBuilder(). + WithScheme(schemeWithSnapshots(t)). + WithObjects(mk("other", boolPtr(true), nil)). + Build() + ok, err := StorageClassAllowsExpansion(context.Background(), c, "") + if err != nil || ok { + t.Fatalf("want (false, nil), got (%v, %v)", ok, err) + } + }) + t.Run("nil client is an error", func(t *testing.T) { + _, err := StorageClassAllowsExpansion(context.Background(), nil, "anything") + if err == nil { + t.Fatalf("want error, got nil") + } + }) +} + +func TestIsMissingKindErrSmoke(t *testing.T) { + if isMissingKindErr(nil) { + t.Fatalf("nil err should not be missing") + } + if isMissingKindErr(errors.New("boom")) { + t.Fatalf("arbitrary error should not be missing") + } +} diff --git a/test/e2e/pkg/e2eutils/documentdb/.keep b/test/e2e/pkg/e2eutils/documentdb/.keep new file mode 100644 index 00000000..e69de29b diff --git a/test/e2e/pkg/e2eutils/documentdb/documentdb.go b/test/e2e/pkg/e2eutils/documentdb/documentdb.go new file mode 100644 index 00000000..47689c0b --- /dev/null +++ b/test/e2e/pkg/e2eutils/documentdb/documentdb.go @@ -0,0 +1,444 @@ +// Package documentdb provides CRUD and lifecycle helpers for the +// DocumentDB preview CR used by the E2E suite. +// +// The package is deliberately framework-agnostic: it returns plain +// errors rather than calling into Ginkgo/Gomega so unit tests can +// exercise it with a fake client. Suite code wraps these in +// gomega.Eventually where appropriate. +// +// Manifest rendering +// +// Create/RenderCR compose a YAML document from a base template plus +// zero or more mixins, concatenated with "---\n", then run the result +// through CNPG's envsubst helper for ${VAR} substitution. +// +// By default, templates are read from an embedded filesystem +// (test/e2e/manifests via the manifests package) so rendering is +// independent of the current working directory. Callers may pass a +// manifestsRoot to read from disk instead — useful for tests that want +// to point at a fixture tree. +package documentdb + +import ( + "bufio" + "bytes" + "context" + "errors" + "fmt" + "io/fs" + "os" + "path/filepath" + "regexp" + "strings" + "time" + + "github.com/cloudnative-pg/cloudnative-pg/tests/utils/envsubst" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/yaml" + + previewv1 "github.com/documentdb/documentdb-operator/api/preview" + e2emanifests "github.com/documentdb/documentdb-operator/test/e2e/manifests" +) + +// ManifestsFS is the filesystem RenderCR reads templates from when the +// caller does not pass an explicit manifestsRoot. Defaults to the +// embedded test/e2e/manifests tree; tests may override it to point at +// a fixture fs.FS (e.g. fstest.MapFS or os.DirFS). +var ManifestsFS fs.FS = e2emanifests.FS + +// baseSubdir and mixinSubdir are layout conventions: /base/.yaml.template +// and /mixins/.yaml.template respectively. +const ( + baseSubdir = "base" + mixinSubdir = "mixins" + templateExt = ".yaml.template" + yamlSeparator = "---\n" + + // DefaultWaitPoll is the polling interval for WaitHealthy/Delete. + DefaultWaitPoll = 2 * time.Second + + // ReadyStatus is the DocumentDBStatus.Status value the operator + // surfaces once the underlying CNPG cluster is healthy. It mirrors + // the CNPG Cluster status verbatim (see + // operator/src/api/preview/documentdb_types.go). Exposed as an + // exported constant so sibling packages (assertions, fixtures) + // share a single source of truth. + ReadyStatus = "Cluster in healthy state" +) + +// CreateOptions drives Create. Base names the file in manifests/base/, +// Mixins names files under manifests/mixins/. Vars are substituted by +// CNPG's envsubst; NAME and NAMESPACE are added automatically if absent. +type CreateOptions struct { + Base string + Mixins []string + Vars map[string]string + ManifestsRoot string // empty = embedded ManifestsFS +} + +// Create renders the CR and applies it via c.Create. The returned object +// is the in-cluster state after Create succeeds. +// +// When opts.Mixins is non-empty, RenderCR produces a multi-document YAML +// that would silently drop all but the first document under a naive +// yaml.Unmarshal. Create therefore deep-merges the rendered documents +// (override semantics: later mixins win) into a single map before +// converting to the typed DocumentDB object. The public RenderCR API +// still returns the raw multi-doc bytes, which are useful for artifact +// dumps and manual kubectl apply. +func Create(ctx context.Context, c client.Client, ns, name string, opts CreateOptions) (*previewv1.DocumentDB, error) { + raw, err := RenderCR(opts.Base, name, ns, opts.Mixins, opts.Vars, opts.ManifestsRoot) + if err != nil { + return nil, err + } + obj, err := decodeMergedDocumentDB(raw) + if err != nil { + return nil, err + } + if obj.Namespace == "" { + obj.Namespace = ns + } + if obj.Name == "" { + obj.Name = name + } + if err := c.Create(ctx, obj); err != nil { + return nil, fmt.Errorf("creating DocumentDB %s/%s: %w", ns, name, err) + } + return obj, nil +} + +// decodeMergedDocumentDB parses a multi-document YAML byte stream (as +// produced by RenderCR) and returns a single DocumentDB object whose +// fields reflect a deep-merge of every document in stream order. +// Maps are merged recursively; scalars and slices in later documents +// overwrite earlier values — the contract every mixin under +// manifests/mixins/ is written against. +func decodeMergedDocumentDB(raw []byte) (*previewv1.DocumentDB, error) { + docs, err := splitYAMLDocuments(raw) + if err != nil { + return nil, err + } + if len(docs) == 0 { + return nil, errors.New("decodeMergedDocumentDB: no YAML documents rendered") + } + merged := map[string]interface{}{} + for i, doc := range docs { + if len(bytes.TrimSpace(doc)) == 0 { + continue + } + var m map[string]interface{} + if err := yaml.Unmarshal(doc, &m); err != nil { + return nil, fmt.Errorf("unmarshaling YAML document %d: %w", i, err) + } + if m == nil { + continue + } + deepMerge(merged, m) + } + buf, err := yaml.Marshal(merged) + if err != nil { + return nil, fmt.Errorf("re-marshaling merged DocumentDB YAML: %w", err) + } + obj := &previewv1.DocumentDB{} + if err := yaml.Unmarshal(buf, obj); err != nil { + return nil, fmt.Errorf("unmarshaling merged DocumentDB YAML: %w", err) + } + return obj, nil +} + +// splitYAMLDocuments splits a raw YAML byte stream on the "\n---\n" +// document separator. A leading "---\n" is tolerated. +func splitYAMLDocuments(raw []byte) ([][]byte, error) { + // Normalise CRLF so the separator match is portable. + normalized := bytes.ReplaceAll(raw, []byte("\r\n"), []byte("\n")) + // Trim a leading separator if present. + normalized = bytes.TrimPrefix(normalized, []byte("---\n")) + return bytes.Split(normalized, []byte("\n---\n")), nil +} + +// deepMerge recursively merges src into dst with override semantics: +// when both sides hold a map[string]interface{} the merge recurses; +// otherwise the src value replaces dst's value. Nil src values are +// skipped so a mixin cannot unintentionally null out a base field just +// because YAML decoded the key as an explicit null. +func deepMerge(dst, src map[string]interface{}) { + for k, sv := range src { + if sv == nil { + continue + } + dv, ok := dst[k] + if !ok { + dst[k] = sv + continue + } + dm, dIsMap := dv.(map[string]interface{}) + sm, sIsMap := sv.(map[string]interface{}) + if dIsMap && sIsMap { + deepMerge(dm, sm) + dst[k] = dm + continue + } + dst[k] = sv + } +} + +// RenderCR reads the base template and mixin templates and returns the +// concatenated, variable-substituted YAML. NAME and NAMESPACE are +// injected into vars if not already present. +// +// When manifestsRoot is empty, templates are read from the embedded +// ManifestsFS (the default test/e2e/manifests tree). When non-empty, +// it is interpreted as an on-disk directory path and read via +// os.DirFS — the legacy behaviour used by fixture-based tests. +func RenderCR(baseName, name, ns string, mixins []string, vars map[string]string, manifestsRoot string) ([]byte, error) { + if baseName == "" { + return nil, errors.New("RenderCR: baseName is required") + } + + var source fs.FS + if manifestsRoot == "" { + source = ManifestsFS + } else { + source = os.DirFS(manifestsRoot) + } + + merged := map[string]string{"NAME": name, "NAMESPACE": ns} + for k, v := range vars { + merged[k] = v + } + + var buf bytes.Buffer + basePath := filepath.ToSlash(filepath.Join(baseSubdir, baseName+templateExt)) + baseBytes, err := fs.ReadFile(source, basePath) + if err != nil { + return nil, fmt.Errorf("reading base template %s: %w", basePath, err) + } + buf.Write(baseBytes) + + for _, m := range mixins { + mixinPath := filepath.ToSlash(filepath.Join(mixinSubdir, m+templateExt)) + mb, err := fs.ReadFile(source, mixinPath) + if err != nil { + return nil, fmt.Errorf("reading mixin template %s: %w", mixinPath, err) + } + if !bytes.HasSuffix(buf.Bytes(), []byte("\n")) { + buf.WriteByte('\n') + } + buf.WriteString(yamlSeparator) + buf.Write(mb) + } + + rendered, err := envsubst.Envsubst(merged, dropEmptyVarLines(buf.Bytes(), merged)) + if err != nil { + return nil, fmt.Errorf("envsubst: %w", err) + } + return rendered, nil +} + +// DropEmptyVarLines removes template lines of the form `key: ${VAR}` +// when merged[VAR] is an empty string. CNPG's envsubst treats empty +// values as missing, so this lets callers opt fields out of the +// rendered YAML by leaving the corresponding variable unset. Operator +// defaults (documentDBImage, gatewayImage, ...) thus fall through to +// server-side defaults instead of being forced to a pinned value. +func DropEmptyVarLines(data []byte, merged map[string]string) []byte { + return dropEmptyVarLines(data, merged) +} + +// singleVarLineRe matches a line whose non-whitespace content is a +// single YAML scalar assignment to a single ${VAR} reference, e.g.: +// +// documentDBImage: ${DOCUMENTDB_IMAGE} +// +// Leading whitespace is preserved, the captured group is the bare +// variable name. Lines with additional text around the reference do +// not match — we only strip "orphan" scalar assignments. +var singleVarLineRe = regexp.MustCompile(`^\s*[A-Za-z0-9_.\-]+:\s*\$\{([A-Za-z_][A-Za-z0-9_]*)\}\s*$`) + +// dropEmptyVarLines removes template lines of the form +// `key: ${VAR}` when merged[VAR] is an empty string. CNPG's envsubst +// treats empty values as missing, so this lets callers opt fields out +// of the rendered CR by leaving the corresponding variable unset. +// Fields the operator defaults server-side (e.g. documentDBImage, +// gatewayImage) thus fall through to operator defaults. +func dropEmptyVarLines(data []byte, merged map[string]string) []byte { + if !bytes.Contains(data, []byte("${")) { + return data + } + var out bytes.Buffer + scanner := bufio.NewScanner(bytes.NewReader(data)) + scanner.Buffer(make([]byte, 64*1024), 1024*1024) + for scanner.Scan() { + line := scanner.Text() + if m := singleVarLineRe.FindStringSubmatch(line); m != nil { + if v, ok := merged[m[1]]; ok && v == "" { + continue + } + } + out.WriteString(line) + out.WriteByte('\n') + } + // Preserve the last newline behaviour of the original buffer: if + // the input didn't end in \n, trim the trailing one we added. + if !strings.HasSuffix(string(data), "\n") && out.Len() > 0 { + b := out.Bytes() + if b[len(b)-1] == '\n' { + out.Truncate(out.Len() - 1) + } + } + return out.Bytes() +} + +// PatchInstances fetches the DocumentDB named by (ns, name) and +// patches its Spec.InstancesPerNode to want. Returns an error if the +// CR cannot be fetched, the desired value is out of the supported +// range (1..3 per the CRD), or the patch fails. When the CR already +// has the desired value the call is a no-op and returns nil. +func PatchInstances(ctx context.Context, c client.Client, ns, name string, want int) error { + if c == nil { + return errors.New("PatchInstances: client must not be nil") + } + if want < 1 || want > 3 { + return fmt.Errorf("PatchInstances: want=%d out of supported range 1..3", want) + } + dd := &previewv1.DocumentDB{} + if err := c.Get(ctx, client.ObjectKey{Namespace: ns, Name: name}, dd); err != nil { + return fmt.Errorf("get DocumentDB %s/%s: %w", ns, name, err) + } + if dd.Spec.InstancesPerNode == want { + return nil + } + before := dd.DeepCopy() + dd.Spec.InstancesPerNode = want + if err := c.Patch(ctx, dd, client.MergeFrom(before)); err != nil { + return fmt.Errorf("patch DocumentDB %s/%s instances=%d: %w", ns, name, want, err) + } + return nil +} + +// PatchSpec applies a merge-from patch that mutates the provided +// DocumentDB's spec in place. mutate receives a pointer to the Spec and +// may set any fields; the diff against the pre-mutation object is sent +// to the API server. +func PatchSpec(ctx context.Context, c client.Client, dd *previewv1.DocumentDB, mutate func(*previewv1.DocumentDBSpec)) error { + if dd == nil || mutate == nil { + return errors.New("PatchSpec: dd and mutate must not be nil") + } + before := dd.DeepCopy() + mutate(&dd.Spec) + if err := c.Patch(ctx, dd, client.MergeFrom(before)); err != nil { + return fmt.Errorf("patching DocumentDB %s/%s: %w", dd.Namespace, dd.Name, err) + } + return nil +} + +// WaitHealthy polls until the DocumentDB named by key reports a healthy +// status or the timeout elapses. "Healthy" is defined as +// Status.Status == ReadyStatus (the CNPG cluster status propagated via +// DocumentDBStatus.Status) or the presence of a Ready=True condition on +// the object (future-proofing). +// +// The polling interval is DefaultWaitPoll; the function returns nil on +// first healthy observation or an error describing the last observed +// state on timeout. +func WaitHealthy(ctx context.Context, c client.Client, key client.ObjectKey, timeout time.Duration) error { + deadline := time.Now().Add(timeout) + var last previewv1.DocumentDB + for { + if err := c.Get(ctx, key, &last); err == nil { + if isHealthy(&last) { + return nil + } + } else if !apierrors.IsNotFound(err) { + return fmt.Errorf("getting DocumentDB %s: %w", key, err) + } + if time.Now().After(deadline) { + return fmt.Errorf("timed out after %s waiting for DocumentDB %s to be healthy (last status=%q)", + timeout, key, last.Status.Status) + } + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(DefaultWaitPoll): + } + } +} + +// isHealthy implements the predicate documented on WaitHealthy. +func isHealthy(dd *previewv1.DocumentDB) bool { + if dd == nil { + return false + } + if dd.Status.Status == ReadyStatus { + return true + } + // Defensive: DocumentDBStatus today has no Conditions field, but if + // one is added later a Ready=True condition should also be honored. + // Reflectively check via annotations or leave to future extension. + return false +} + +// Delete issues a foreground delete on the given DocumentDB and polls +// until the object is gone or timeout elapses. +func Delete(ctx context.Context, c client.Client, dd *previewv1.DocumentDB, timeout time.Duration) error { + if dd == nil { + return errors.New("Delete: dd must not be nil") + } + if err := c.Delete(ctx, dd); err != nil && !apierrors.IsNotFound(err) { + return fmt.Errorf("deleting DocumentDB %s/%s: %w", dd.Namespace, dd.Name, err) + } + key := client.ObjectKeyFromObject(dd) + deadline := time.Now().Add(timeout) + for { + var got previewv1.DocumentDB + err := c.Get(ctx, key, &got) + if apierrors.IsNotFound(err) { + return nil + } + if err != nil { + return fmt.Errorf("polling deletion of %s: %w", key, err) + } + if time.Now().After(deadline) { + return fmt.Errorf("timed out after %s waiting for DocumentDB %s to be deleted", timeout, key) + } + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(DefaultWaitPoll): + } + } +} + +// List returns all DocumentDB objects in the given namespace. +func List(ctx context.Context, c client.Client, ns string) ([]previewv1.DocumentDB, error) { + var ddList previewv1.DocumentDBList + opts := []client.ListOption{} + if ns != "" { + opts = append(opts, client.InNamespace(ns)) + } + if err := c.List(ctx, &ddList, opts...); err != nil { + return nil, fmt.Errorf("listing DocumentDB in %q: %w", ns, err) + } + return ddList.Items, nil +} + +// Get fetches a DocumentDB by key. +func Get(ctx context.Context, c client.Client, key client.ObjectKey) (*previewv1.DocumentDB, error) { + var dd previewv1.DocumentDB + if err := c.Get(ctx, key, &dd); err != nil { + return nil, fmt.Errorf("getting DocumentDB %s: %w", key, err) + } + return &dd, nil +} + +// objectMetaFor is a small helper that constructs an ObjectMeta for +// ad-hoc DocumentDB creation in tests. Exposed because several helpers +// in later phases will build DocumentDB objects programmatically +// instead of rendering templates. +func objectMetaFor(ns, name string) metav1.ObjectMeta { + return metav1.ObjectMeta{Namespace: ns, Name: name} +} + +var _ = objectMetaFor // retained for Phase-2 programmatic builders diff --git a/test/e2e/pkg/e2eutils/documentdb/documentdb_test.go b/test/e2e/pkg/e2eutils/documentdb/documentdb_test.go new file mode 100644 index 00000000..fd35b173 --- /dev/null +++ b/test/e2e/pkg/e2eutils/documentdb/documentdb_test.go @@ -0,0 +1,353 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package documentdb + +import ( + "context" + "os" + "path/filepath" + goruntime "runtime" + "strings" + "testing" + "time" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + fakeclient "sigs.k8s.io/controller-runtime/pkg/client/fake" + + previewv1 "github.com/documentdb/documentdb-operator/api/preview" +) + +func newScheme(t *testing.T) *runtime.Scheme { + t.Helper() + s := runtime.NewScheme() + if err := previewv1.AddToScheme(s); err != nil { + t.Fatalf("AddToScheme: %v", err) + } + return s +} + +func TestRenderCRConcatenatesBaseAndMixins(t *testing.T) { + dir := t.TempDir() + mustWrite(t, filepath.Join(dir, baseSubdir, "ddb"+templateExt), + "apiVersion: documentdb.io/preview\nkind: DocumentDB\nmetadata:\n name: ${NAME}\n namespace: ${NAMESPACE}\n") + mustWrite(t, filepath.Join(dir, mixinSubdir, "tls"+templateExt), + "# tls mixin for ${NAME}\n") + + got, err := RenderCR("ddb", "my-dd", "ns1", []string{"tls"}, nil, dir) + if err != nil { + t.Fatalf("RenderCR: %v", err) + } + s := string(got) + if !strings.Contains(s, "name: my-dd") { + t.Errorf("expected NAME substitution; got:\n%s", s) + } + if !strings.Contains(s, "namespace: ns1") { + t.Errorf("expected NAMESPACE substitution; got:\n%s", s) + } + if !strings.Contains(s, "---\n") { + t.Errorf("expected YAML separator between base and mixin; got:\n%s", s) + } + if !strings.Contains(s, "tls mixin for my-dd") { + t.Errorf("expected mixin body; got:\n%s", s) + } +} + +func TestRenderCRMissingBaseReturnsError(t *testing.T) { + dir := t.TempDir() + _, err := RenderCR("nope", "n", "ns", nil, nil, dir) + if err == nil { + t.Fatal("expected error for missing base template") + } +} + +func TestRenderCRUserVarsOverrideNameAndNamespace(t *testing.T) { + dir := t.TempDir() + mustWrite(t, filepath.Join(dir, baseSubdir, "b"+templateExt), "x: ${NAME}-${EXTRA}\n") + got, err := RenderCR("b", "n", "ns", nil, map[string]string{"EXTRA": "z"}, dir) + if err != nil { + t.Fatalf("RenderCR: %v", err) + } + if !strings.Contains(string(got), "x: n-z") { + t.Errorf("expected substituted extra var; got: %s", got) + } +} + +func TestGetAndList(t *testing.T) { + s := newScheme(t) + objs := []client.Object{ + &previewv1.DocumentDB{ObjectMeta: metav1.ObjectMeta{Name: "a", Namespace: "ns1"}}, + &previewv1.DocumentDB{ObjectMeta: metav1.ObjectMeta{Name: "b", Namespace: "ns1"}}, + &previewv1.DocumentDB{ObjectMeta: metav1.ObjectMeta{Name: "c", Namespace: "ns2"}}, + } + c := fakeclient.NewClientBuilder().WithScheme(s).WithObjects(objs...).Build() + ctx := context.Background() + + got, err := Get(ctx, c, types.NamespacedName{Name: "a", Namespace: "ns1"}) + if err != nil { + t.Fatalf("Get: %v", err) + } + if got.Name != "a" { + t.Errorf("got name %q want a", got.Name) + } + + items, err := List(ctx, c, "ns1") + if err != nil { + t.Fatalf("List: %v", err) + } + if len(items) != 2 { + t.Errorf("got %d items want 2", len(items)) + } + + all, err := List(ctx, c, "") + if err != nil { + t.Fatalf("List all: %v", err) + } + if len(all) != 3 { + t.Errorf("got %d items want 3", len(all)) + } +} + +func TestPatchSpec(t *testing.T) { + s := newScheme(t) + dd := &previewv1.DocumentDB{ + ObjectMeta: metav1.ObjectMeta{Name: "a", Namespace: "ns1"}, + Spec: previewv1.DocumentDBSpec{NodeCount: 1, InstancesPerNode: 1}, + } + c := fakeclient.NewClientBuilder().WithScheme(s).WithObjects(dd).Build() + ctx := context.Background() + + fresh, err := Get(ctx, c, client.ObjectKeyFromObject(dd)) + if err != nil { + t.Fatalf("Get: %v", err) + } + if err := PatchSpec(ctx, c, fresh, func(spec *previewv1.DocumentDBSpec) { + spec.LogLevel = "debug" + }); err != nil { + t.Fatalf("PatchSpec: %v", err) + } + after, err := Get(ctx, c, client.ObjectKeyFromObject(dd)) + if err != nil { + t.Fatalf("Get after: %v", err) + } + if after.Spec.LogLevel != "debug" { + t.Errorf("expected LogLevel=debug, got %q", after.Spec.LogLevel) + } +} + +func TestIsHealthyMatchesRunningStatus(t *testing.T) { + if isHealthy(nil) { + t.Error("nil should not be healthy") + } + if isHealthy(&previewv1.DocumentDB{}) { + t.Error("empty should not be healthy") + } + dd := &previewv1.DocumentDB{Status: previewv1.DocumentDBStatus{Status: ReadyStatus}} + if !isHealthy(dd) { + t.Errorf("%q should be healthy", ReadyStatus) + } + notReady := &previewv1.DocumentDB{Status: previewv1.DocumentDBStatus{Status: "Running"}} + if isHealthy(notReady) { + t.Error(`"Running" should not be healthy (ReadyStatus mismatch)`) + } +} + +func TestWaitHealthyTimeout(t *testing.T) { + s := newScheme(t) + dd := &previewv1.DocumentDB{ObjectMeta: metav1.ObjectMeta{Name: "a", Namespace: "ns1"}} + c := fakeclient.NewClientBuilder().WithScheme(s).WithObjects(dd).Build() + ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond) + defer cancel() + err := WaitHealthy(ctx, c, client.ObjectKeyFromObject(dd), 200*time.Millisecond) + if err == nil { + t.Fatal("expected timeout error") + } +} + +func TestDeleteRemovesObject(t *testing.T) { + s := newScheme(t) + dd := &previewv1.DocumentDB{ObjectMeta: metav1.ObjectMeta{Name: "a", Namespace: "ns1"}} + c := fakeclient.NewClientBuilder().WithScheme(s).WithObjects(dd).Build() + ctx := context.Background() + if err := Delete(ctx, c, dd, 2*time.Second); err != nil { + t.Fatalf("Delete: %v", err) + } + if _, err := Get(ctx, c, client.ObjectKeyFromObject(dd)); err == nil { + t.Fatal("expected Get to fail after Delete") + } +} + +func TestPatchInstances_UpdatesSpec(t *testing.T) { + s := newScheme(t) + dd := &previewv1.DocumentDB{ + ObjectMeta: metav1.ObjectMeta{Name: "dd", Namespace: "ns1"}, + Spec: previewv1.DocumentDBSpec{NodeCount: 1, InstancesPerNode: 2}, + } + c := fakeclient.NewClientBuilder().WithScheme(s).WithObjects(dd).Build() + ctx := context.Background() + + if err := PatchInstances(ctx, c, "ns1", "dd", 3); err != nil { + t.Fatalf("PatchInstances: %v", err) + } + got, err := Get(ctx, c, types.NamespacedName{Namespace: "ns1", Name: "dd"}) + if err != nil { + t.Fatalf("Get: %v", err) + } + if got.Spec.InstancesPerNode != 3 { + t.Fatalf("InstancesPerNode=%d, want 3", got.Spec.InstancesPerNode) + } +} + +func TestPatchInstances_NoopWhenEqual(t *testing.T) { + s := newScheme(t) + dd := &previewv1.DocumentDB{ + ObjectMeta: metav1.ObjectMeta{Name: "dd", Namespace: "ns1", ResourceVersion: "7"}, + Spec: previewv1.DocumentDBSpec{NodeCount: 1, InstancesPerNode: 2}, + } + c := fakeclient.NewClientBuilder().WithScheme(s).WithObjects(dd).Build() + if err := PatchInstances(context.Background(), c, "ns1", "dd", 2); err != nil { + t.Fatalf("PatchInstances no-op: %v", err) + } +} + +func TestPatchInstances_RejectsOutOfRange(t *testing.T) { + s := newScheme(t) + c := fakeclient.NewClientBuilder().WithScheme(s).Build() + for _, n := range []int{0, 4, -1} { + if err := PatchInstances(context.Background(), c, "ns1", "dd", n); err == nil { + t.Errorf("PatchInstances(%d) expected error, got nil", n) + } + } +} + +func TestPatchInstances_NotFound(t *testing.T) { + s := newScheme(t) + c := fakeclient.NewClientBuilder().WithScheme(s).Build() + if err := PatchInstances(context.Background(), c, "ns1", "missing", 2); err == nil { + t.Fatal("expected error for missing DocumentDB") + } +} + +func mustWrite(t *testing.T, path, content string) { + t.Helper() + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { + t.Fatalf("mkdir: %v", err) + } + if err := os.WriteFile(path, []byte(content), 0o644); err != nil { + t.Fatalf("write: %v", err) + } +} + +// TestCreateAppliesTLSDisabledMixin uses the real base + tls_disabled +// mixin shipped under test/e2e/manifests/ to prove the multi-document +// merge in Create is no longer a silent drop: the mixin's +// Spec.TLS.Gateway.Mode must round-trip to the created object. +func TestCreateAppliesTLSDisabledMixin(t *testing.T) { + root := realManifestsRoot(t) + s := newScheme(t) + c := fakeclient.NewClientBuilder().WithScheme(s).Build() + + obj, err := Create(context.Background(), c, "ns1", "dd1", CreateOptions{ + Base: "documentdb", + Mixins: []string{"tls_disabled"}, + ManifestsRoot: root, + Vars: map[string]string{ + "INSTANCES": "1", + "STORAGE_SIZE": "1Gi", + "STORAGE_CLASS": "standard", + "DOCUMENTDB_IMAGE": "ghcr.io/example/ddb:test", + "GATEWAY_IMAGE": "ghcr.io/example/gw:test", + "CREDENTIAL_SECRET": "documentdb-credentials", + "EXPOSURE_TYPE": "ClusterIP", + "LOG_LEVEL": "info", + }, + }) + if err != nil { + t.Fatalf("Create: %v", err) + } + // Assert against the returned object and re-Get it from the fake + // client; both paths must reflect the merged mixin. + if obj.Spec.TLS == nil || obj.Spec.TLS.Gateway == nil { + t.Fatalf("returned object missing Spec.TLS.Gateway; got %+v", obj.Spec) + } + if obj.Spec.TLS.Gateway.Mode != "Disabled" { + t.Fatalf("returned Spec.TLS.Gateway.Mode=%q, want Disabled", obj.Spec.TLS.Gateway.Mode) + } + + got, err := Get(context.Background(), c, types.NamespacedName{Namespace: "ns1", Name: "dd1"}) + if err != nil { + t.Fatalf("Get back: %v", err) + } + if got.Spec.TLS == nil || got.Spec.TLS.Gateway == nil { + t.Fatalf("stored object missing Spec.TLS.Gateway; got %+v", got.Spec) + } + if got.Spec.TLS.Gateway.Mode != "Disabled" { + t.Fatalf("stored Spec.TLS.Gateway.Mode=%q, want Disabled", got.Spec.TLS.Gateway.Mode) + } + // Base fields must still be present after the merge. + if got.Spec.InstancesPerNode != 1 { + t.Errorf("Spec.InstancesPerNode=%d, want 1", got.Spec.InstancesPerNode) + } + if got.Spec.Resource.Storage.PvcSize != "1Gi" { + t.Errorf("Spec.Resource.Storage.PvcSize=%q, want 1Gi", got.Spec.Resource.Storage.PvcSize) + } +} + +// TestCreateAppliesReclaimRetainMixin exercises the same multi-doc +// merge path with a mixin that nests Spec.Resource.Storage — verifying +// the deep-merge preserves sibling keys (PvcSize, StorageClass) while +// adding PersistentVolumeReclaimPolicy from the mixin. +func TestCreateAppliesReclaimRetainMixin(t *testing.T) { + root := realManifestsRoot(t) + s := newScheme(t) + c := fakeclient.NewClientBuilder().WithScheme(s).Build() + + obj, err := Create(context.Background(), c, "ns1", "dd2", CreateOptions{ + Base: "documentdb", + Mixins: []string{"reclaim_retain"}, + ManifestsRoot: root, + Vars: map[string]string{ + "INSTANCES": "1", + "STORAGE_SIZE": "2Gi", + "STORAGE_CLASS": "standard", + "DOCUMENTDB_IMAGE": "ghcr.io/example/ddb:test", + "GATEWAY_IMAGE": "ghcr.io/example/gw:test", + "CREDENTIAL_SECRET": "documentdb-credentials", + "EXPOSURE_TYPE": "ClusterIP", + "LOG_LEVEL": "info", + }, + }) + if err != nil { + t.Fatalf("Create: %v", err) + } + if obj.Spec.Resource.Storage.PersistentVolumeReclaimPolicy != "Retain" { + t.Fatalf("Spec.Resource.Storage.PersistentVolumeReclaimPolicy=%q, want Retain", + obj.Spec.Resource.Storage.PersistentVolumeReclaimPolicy) + } + if obj.Spec.Resource.Storage.PvcSize != "2Gi" { + t.Errorf("Spec.Resource.Storage.PvcSize=%q, want 2Gi (base preserved after merge)", + obj.Spec.Resource.Storage.PvcSize) + } +} + +// realManifestsRoot returns the absolute path to test/e2e/manifests so +// the round-trip tests exercise the real templates rather than the +// synthetic fixtures that the RenderCR-only tests build with t.TempDir. +// Anchored off runtime.Caller so `go test` from any directory works. +func realManifestsRoot(t *testing.T) string { + t.Helper() + _, thisFile, _, ok := goruntime.Caller(0) + if !ok { + t.Fatal("runtime.Caller failed — cannot locate test/e2e/manifests") + } + // this file: test/e2e/pkg/e2eutils/documentdb/documentdb_test.go + // walk up to test/e2e, then into manifests. + root := filepath.Join(filepath.Dir(thisFile), "..", "..", "..", "manifests") + if _, err := os.Stat(filepath.Join(root, "base", "documentdb"+templateExt)); err != nil { + t.Fatalf("manifests root not found at %s: %v", root, err) + } + return root +} diff --git a/test/e2e/pkg/e2eutils/fixtures/.keep b/test/e2e/pkg/e2eutils/fixtures/.keep new file mode 100644 index 00000000..e69de29b diff --git a/test/e2e/pkg/e2eutils/fixtures/fixtures.go b/test/e2e/pkg/e2eutils/fixtures/fixtures.go new file mode 100644 index 00000000..a62ed5d1 --- /dev/null +++ b/test/e2e/pkg/e2eutils/fixtures/fixtures.go @@ -0,0 +1,478 @@ +// Package fixtures provides session-scoped test fixtures shared across +// DocumentDB e2e test areas. Two cluster fixtures are supported: +// +// - SharedRO: a 1-instance read-only DocumentDB reused by data/, +// performance/ and status/ specs. Specs isolate via per-spec Mongo +// database names (see DBNameFor). +// - SharedScale: a 2-instance mutable DocumentDB reused by scale/ +// specs. Callers must call ResetToTwoInstances in AfterEach. +// +// Both fixtures are created lazily via sync.Once guards and torn down +// explicitly from the area suite_test.go AfterSuite. +// +// Ownership labels (LabelRunID, LabelFixture, LabelArea) are stamped on +// every namespace and CR fixtures create so TeardownSharedRO / +// TeardownSharedScale can list-by-label instead of delete-by-name — +// that avoids cross-binary teardown collisions described in the Phase 1 +// rubber-duck review. +package fixtures + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "fmt" + "os" + "path/filepath" + "runtime" + "sync" + "time" + + "github.com/cloudnative-pg/cloudnative-pg/tests/utils/envsubst" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/wait" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/yaml" + + previewv1 "github.com/documentdb/documentdb-operator/api/preview" + + documentdbutil "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/documentdb" +) + +// Ownership label keys stamped on every fixture-created namespace and +// DocumentDB CR. Exported so parallel tooling (CI cleanup scripts, +// kubectl one-liners) can use the same selectors. +const ( + LabelRunID = "e2e.documentdb.io/run-id" + LabelFixture = "e2e.documentdb.io/fixture" + LabelArea = "e2e.documentdb.io/area" +) + +// Fixture kind label values. +const ( + FixtureSharedRO = "shared-ro" + FixtureSharedScale = "shared-scale" + // FixturePerSpec is stamped on namespaces and secrets created per + // individual spec (i.e. not shared across specs). Area-specific + // helpers_test.go files use this value via CreateLabeledNamespace. + FixturePerSpec = "per-spec" +) + +// DefaultCredentialPassword / DefaultCredentialUsername expose the seed +// credentials used by both shared and per-spec fixture secrets. Area +// helpers_test.go files import these instead of re-declaring string +// literals; that way a credential change ripples out in one edit. +const ( + DefaultCredentialPassword = defaultCredentialPassword + DefaultCredentialUsername = defaultCredentialUsername +) + +// procID returns the Ginkgo parallel process identifier as a string, +// falling back to "1" when unset. This lets per-process fixtures coexist +// safely in a single kind cluster during ginkgo -p runs. +func procID() string { + if v, ok := os.LookupEnv("GINKGO_PARALLEL_PROCESS"); ok && v != "" { + return v + } + return "1" +} + +// runIDMu guards runIDVal. fixtures cannot import the parent e2e +// package (it would create an import cycle); instead the root suite +// calls SetRunID once during SetupSuite. +var ( + runIDMu sync.RWMutex + runIDVal string +) + +// SetRunID records the suite-wide run identifier. Call exactly once +// from the root suite.go after resolving the identifier from the +// environment. Subsequent calls with the same value are no-ops; calls +// with a different non-empty value are ignored (first-writer-wins) to +// keep fixture naming stable if a worker races with the primary node. +func SetRunID(id string) { + if id == "" { + return + } + runIDMu.Lock() + defer runIDMu.Unlock() + if runIDVal == "" { + runIDVal = id + } +} + +// RunID returns the identifier previously recorded by SetRunID, or +// "unset" if SetRunID was never called. The fallback exists so unit +// tests that exercise fixture helpers directly still produce valid +// Kubernetes names; production code paths always call SetRunID first. +func RunID() string { + runIDMu.RLock() + defer runIDMu.RUnlock() + if runIDVal == "" { + return "unset" + } + return runIDVal +} + +// resetRunIDForTest clears the cached run id for unit tests. +func resetRunIDForTest() { + runIDMu.Lock() + defer runIDMu.Unlock() + runIDVal = "" +} + +// defaultCredentialSecretName is the credential secret created alongside +// every shared fixture cluster. Tests read these credentials through +// pkg/e2eutils/mongo helpers. +const defaultCredentialSecretName = "documentdb-credentials" + +// DefaultCredentialSecretName is the exported alias of the credential +// secret name created by the shared fixtures. Exported so cross-package +// helpers (e.g., pkg/e2eutils/mongo) can discover the secret without +// duplicating the string literal. +const DefaultCredentialSecretName = defaultCredentialSecretName + +// defaultCredentialUsername / defaultCredentialPassword are the seed +// credentials stamped into the per-fixture credential secret. +const ( + defaultCredentialUsername = "e2e_admin" + defaultCredentialPassword = "E2eAdmin100" //nolint:gosec // fixture-only +) + +// defaultDocumentDBImage / defaultGatewayImage are empty by default so +// the operator composes the cluster itself: CNPG pg18 base image + +// DocumentDB extension via the image-library mechanism + gateway as a +// separate sidecar image. Setting a single monolithic image here would +// make CNPG run the wrong container for postgres. CI pins real images +// via DOCUMENTDB_IMAGE / GATEWAY_IMAGE environment variables. +const ( + defaultDocumentDBImage = "" + defaultGatewayImage = "" +) + +// defaultStorageSize / defaultStorageClass are conservative defaults +// used by both shared fixtures. Override via E2E_STORAGE_SIZE / +// E2E_STORAGE_CLASS environment variables when targeting non-kind +// clusters. +const ( + defaultStorageSize = "1Gi" + defaultStorageClass = "standard" +) + +// defaultFixtureCreateTimeout / defaultFixtureDeleteTimeout / defaultPollInterval +// bound waits performed inside this package. They intentionally do not +// depend on the sibling timeouts package so that fixture setup is not +// delayed by a missing helper. +const ( + defaultFixtureCreateTimeout = 10 * time.Minute + defaultFixtureDeleteTimeout = 5 * time.Minute + defaultPollInterval = 5 * time.Second +) + +// manifestsDir returns the absolute path to the test/e2e/manifests +// directory regardless of the caller's working directory. It relies on +// runtime.Caller to anchor off this source file. +func manifestsDir() (string, error) { + _, thisFile, _, ok := runtime.Caller(0) + if !ok { + return "", fmt.Errorf("runtime.Caller failed while locating manifests") + } + // this file lives at test/e2e/pkg/e2eutils/fixtures/.go — walk up + // four dirs to reach test/e2e, then descend into manifests/. + return filepath.Join(filepath.Dir(thisFile), "..", "..", "..", "manifests"), nil +} + +// renderTemplate applies envsubst to the template at path relative to +// manifestsDir() and unmarshals the result into a DocumentDB CR. +func renderDocumentDB(relPath string, vars map[string]string) (*previewv1.DocumentDB, error) { + root, err := manifestsDir() + if err != nil { + return nil, err + } + data, err := os.ReadFile(filepath.Join(root, relPath)) + if err != nil { + return nil, fmt.Errorf("reading template %s: %w", relPath, err) + } + rendered, err := envsubst.Envsubst(vars, documentdbutil.DropEmptyVarLines(data, vars)) + if err != nil { + return nil, fmt.Errorf("envsubst on %s: %w", relPath, err) + } + out := &previewv1.DocumentDB{} + if err := yaml.Unmarshal(rendered, out); err != nil { + return nil, fmt.Errorf("unmarshal rendered %s: %w", relPath, err) + } + return out, nil +} + +// ownershipLabels returns the canonical ownership labels applied to +// every fixture-created object. area may be empty when the caller is a +// cross-area helper. +func ownershipLabels(fixture, area string) map[string]string { + l := map[string]string{ + LabelRunID: RunID(), + LabelFixture: fixture, + } + if area != "" { + l[LabelArea] = area + } + return l +} + +// ensureNamespace creates the namespace if it is missing and stamps the +// ownership labels onto it. If the namespace already exists its labels +// are validated: a mismatched LabelRunID returns a collision error. +func ensureNamespace(ctx context.Context, c client.Client, name, fixture string) error { + ns := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Labels: ownershipLabels(fixture, ""), + }, + } + err := c.Create(ctx, ns) + if err == nil { + return nil + } + if !apierrors.IsAlreadyExists(err) { + return fmt.Errorf("create namespace %s: %w", name, err) + } + existing := &corev1.Namespace{} + if getErr := c.Get(ctx, types.NamespacedName{Name: name}, existing); getErr != nil { + return fmt.Errorf("get existing namespace %s: %w", name, getErr) + } + if got := existing.Labels[LabelRunID]; got != RunID() { + return fmt.Errorf("fixture collision: namespace %s exists with run-id=%q (current run-id=%q)", + name, got, RunID()) + } + if got := existing.Labels[LabelFixture]; got != "" && got != fixture { + return fmt.Errorf("fixture collision: namespace %s exists with fixture=%q (want %q)", + name, got, fixture) + } + return nil +} + +// ensureCredentialSecret creates the fixture credential secret if it is +// missing. The secret schema matches the DocumentDB operator's contract +// (keys "username" and "password"). +func ensureCredentialSecret(ctx context.Context, c client.Client, namespace, name, fixture string) error { + sec := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + Labels: ownershipLabels(fixture, ""), + }, + Type: corev1.SecretTypeOpaque, + StringData: map[string]string{ + "username": defaultCredentialUsername, + "password": defaultCredentialPassword, + }, + } + if err := c.Create(ctx, sec); err != nil && !apierrors.IsAlreadyExists(err) { + return fmt.Errorf("create credential secret %s/%s: %w", namespace, name, err) + } + return nil +} + +// CreateLabeledNamespace creates a per-spec namespace stamped with the +// suite run-id, fixture=per-spec, and the caller-supplied area label. +// It is the exported entry point that area helpers_test.go files call +// in BeforeEach; the labels let CI cleanup scripts reap orphaned +// namespaces by selector even when a spec panics before AfterEach. +// +// Semantics on AlreadyExists mirror ensureNamespace: an existing +// namespace with the current run-id (or no run-id label) is adopted; a +// mismatched run-id is a collision and returns an error. +func CreateLabeledNamespace(ctx context.Context, c client.Client, name, area string) error { + ns := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Labels: ownershipLabels(FixturePerSpec, area), + }, + } + err := c.Create(ctx, ns) + if err == nil { + return nil + } + if !apierrors.IsAlreadyExists(err) { + return fmt.Errorf("create namespace %s: %w", name, err) + } + existing := &corev1.Namespace{} + if getErr := c.Get(ctx, types.NamespacedName{Name: name}, existing); getErr != nil { + return fmt.Errorf("get existing namespace %s: %w", name, getErr) + } + if got := existing.Labels[LabelRunID]; got != "" && got != RunID() { + return fmt.Errorf("fixture collision: namespace %s exists with run-id=%q (current run-id=%q)", + name, got, RunID()) + } + return nil +} + +// CreateLabeledCredentialSecret creates the default DocumentDB +// credential secret (DefaultCredentialSecretName) in namespace with the +// same labels CreateLabeledNamespace stamps. Idempotent: an existing +// secret is treated as success regardless of label state, matching the +// contract of ensureCredentialSecret used by shared fixtures. +func CreateLabeledCredentialSecret(ctx context.Context, c client.Client, namespace string) error { + return ensureCredentialSecret(ctx, c, namespace, defaultCredentialSecretName, FixturePerSpec) +} + +// baseVars returns the envsubst variable map shared by both fixtures. +func baseVars(namespace, name, instances string) map[string]string { + documentdbImage := defaultDocumentDBImage + if v := os.Getenv("DOCUMENTDB_IMAGE"); v != "" { + documentdbImage = v + } + gatewayImage := defaultGatewayImage + if v := os.Getenv("GATEWAY_IMAGE"); v != "" { + gatewayImage = v + } + storageSize := defaultStorageSize + if v := os.Getenv("E2E_STORAGE_SIZE"); v != "" { + storageSize = v + } + storageClass := defaultStorageClass + if v := os.Getenv("E2E_STORAGE_CLASS"); v != "" { + storageClass = v + } + return map[string]string{ + "NAMESPACE": namespace, + "NAME": name, + "INSTANCES": instances, + "STORAGE_SIZE": storageSize, + "STORAGE_CLASS": storageClass, + "DOCUMENTDB_IMAGE": documentdbImage, + "GATEWAY_IMAGE": gatewayImage, + "CREDENTIAL_SECRET": defaultCredentialSecretName, + "EXPOSURE_TYPE": "ClusterIP", + "LOG_LEVEL": "info", + } +} + +// createDocumentDB creates the supplied CR if absent, stamping the +// ownership labels onto it. On AlreadyExists it validates the existing +// CR's run-id label matches the current RunID(); a mismatch returns an +// explicit collision error so the caller can abort rather than adopt a +// foreign fixture. +func createDocumentDB(ctx context.Context, c client.Client, dd *previewv1.DocumentDB, fixture string) error { + if dd.Labels == nil { + dd.Labels = map[string]string{} + } + for k, v := range ownershipLabels(fixture, "") { + if _, present := dd.Labels[k]; !present { + dd.Labels[k] = v + } + } + err := c.Create(ctx, dd) + if err == nil { + return nil + } + if !apierrors.IsAlreadyExists(err) { + return fmt.Errorf("create documentdb %s/%s: %w", dd.Namespace, dd.Name, err) + } + existing := &previewv1.DocumentDB{} + key := types.NamespacedName{Namespace: dd.Namespace, Name: dd.Name} + if getErr := c.Get(ctx, key, existing); getErr != nil { + return fmt.Errorf("get existing documentdb %s: %w", key, getErr) + } + if got := existing.Labels[LabelRunID]; got != RunID() { + return fmt.Errorf("fixture collision: existing CR %s/%s belongs to run %q (current %q)", + dd.Namespace, dd.Name, got, RunID()) + } + if got := existing.Labels[LabelFixture]; got != "" && got != fixture { + return fmt.Errorf("fixture collision: existing CR %s/%s has fixture=%q (want %q)", + dd.Namespace, dd.Name, got, fixture) + } + return nil +} + +// waitDocumentDBHealthy polls the DocumentDB CR until its status +// reports the canonical healthy string used by the operator and CI. +func waitDocumentDBHealthy(ctx context.Context, c client.Client, namespace, name string, timeout time.Duration) error { + return wait.PollUntilContextTimeout(ctx, defaultPollInterval, timeout, true, func(ctx context.Context) (bool, error) { + dd := &previewv1.DocumentDB{} + if err := c.Get(ctx, types.NamespacedName{Namespace: namespace, Name: name}, dd); err != nil { + if apierrors.IsNotFound(err) { + return false, nil + } + return false, err + } + return dd.Status.Status == documentdbutil.ReadyStatus, nil + }) +} + +// deleteDocumentDB deletes the DocumentDB CR and waits for it to be +// fully removed. +func deleteDocumentDB(ctx context.Context, c client.Client, namespace, name string, timeout time.Duration) error { + dd := &previewv1.DocumentDB{ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: namespace}} + if err := c.Delete(ctx, dd); err != nil && !apierrors.IsNotFound(err) { + return fmt.Errorf("delete documentdb %s/%s: %w", namespace, name, err) + } + return wait.PollUntilContextTimeout(ctx, defaultPollInterval, timeout, true, func(ctx context.Context) (bool, error) { + err := c.Get(ctx, types.NamespacedName{Namespace: namespace, Name: name}, &previewv1.DocumentDB{}) + if apierrors.IsNotFound(err) { + return true, nil + } + return false, err + }) +} + +// deleteNamespace deletes the namespace and waits for termination. Used +// from fixture teardown. +func deleteNamespace(ctx context.Context, c client.Client, name string, timeout time.Duration) error { + ns := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: name}} + if err := c.Delete(ctx, ns); err != nil && !apierrors.IsNotFound(err) { + return fmt.Errorf("delete namespace %s: %w", name, err) + } + return wait.PollUntilContextTimeout(ctx, defaultPollInterval, timeout, true, func(ctx context.Context) (bool, error) { + err := c.Get(ctx, types.NamespacedName{Name: name}, &corev1.Namespace{}) + if apierrors.IsNotFound(err) { + return true, nil + } + return false, err + }) +} + +// teardownFixtureByLabels performs a label-selector-driven teardown of +// all resources owned by the current RunID() for the given fixture. It +// first deletes any matching DocumentDB CRs (waiting for finalizers), +// then deletes matching namespaces. Callers must pass the same fixture +// constant they used when creating the resources. +func teardownFixtureByLabels(ctx context.Context, c client.Client, fixture string) error { + sel := client.MatchingLabels{ + LabelRunID: RunID(), + LabelFixture: fixture, + } + // Step 1: delete DocumentDB CRs cluster-wide. + dds := &previewv1.DocumentDBList{} + if err := c.List(ctx, dds, sel); err != nil { + return fmt.Errorf("list %s DocumentDB CRs: %w", fixture, err) + } + for i := range dds.Items { + dd := &dds.Items[i] + if err := deleteDocumentDB(ctx, c, dd.Namespace, dd.Name, defaultFixtureDeleteTimeout); err != nil { + return fmt.Errorf("delete %s DocumentDB %s/%s: %w", fixture, dd.Namespace, dd.Name, err) + } + } + // Step 2: delete namespaces. + nss := &corev1.NamespaceList{} + if err := c.List(ctx, nss, sel); err != nil { + return fmt.Errorf("list %s namespaces: %w", fixture, err) + } + for i := range nss.Items { + ns := &nss.Items[i] + if err := deleteNamespace(ctx, c, ns.Name, defaultFixtureDeleteTimeout); err != nil { + return fmt.Errorf("delete %s namespace %s: %w", fixture, ns.Name, err) + } + } + return nil +} + +// DBNameFor returns a deterministic Mongo database name derived from +// the supplied spec text (typically ginkgo's CurrentSpecReport().FullText()). +// The returned string matches "db_" and is safe for Mongo. +func DBNameFor(specText string) string { + sum := sha256.Sum256([]byte(specText)) + return "db_" + hex.EncodeToString(sum[:])[:12] +} diff --git a/test/e2e/pkg/e2eutils/fixtures/fixtures_test.go b/test/e2e/pkg/e2eutils/fixtures/fixtures_test.go new file mode 100644 index 00000000..33ca0fea --- /dev/null +++ b/test/e2e/pkg/e2eutils/fixtures/fixtures_test.go @@ -0,0 +1,217 @@ +package fixtures + +import ( + "context" + "testing" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + fakeclient "sigs.k8s.io/controller-runtime/pkg/client/fake" + + previewv1 "github.com/documentdb/documentdb-operator/api/preview" +) + +func TestDBNameForDeterministic(t *testing.T) { + a := DBNameFor("spec text one") + b := DBNameFor("spec text one") + if a != b { + t.Fatalf("DBNameFor not deterministic: %q != %q", a, b) + } + c := DBNameFor("spec text two") + if a == c { + t.Fatalf("DBNameFor collision for distinct inputs: %q", a) + } + if len(a) != len("db_")+12 { + t.Fatalf("DBNameFor returned unexpected length %q", a) + } + if a[:3] != "db_" { + t.Fatalf("DBNameFor prefix wrong: %q", a) + } +} + +func TestRenderBaseDocumentDB(t *testing.T) { + vars := baseVars("ns", "cluster", "1") + dd, err := renderDocumentDB("base/documentdb.yaml.template", vars) + if err != nil { + t.Fatalf("render failed: %v", err) + } + if dd.Namespace != "ns" || dd.Name != "cluster" { + t.Fatalf("unexpected name/namespace: %s/%s", dd.Namespace, dd.Name) + } + if dd.Spec.NodeCount != 1 { + t.Fatalf("expected nodeCount=1, got %d", dd.Spec.NodeCount) + } + if dd.Spec.InstancesPerNode != 1 { + t.Fatalf("expected instancesPerNode=1, got %d", dd.Spec.InstancesPerNode) + } + if dd.Spec.Resource.Storage.PvcSize == "" { + t.Fatalf("expected pvcSize to be set") + } + if dd.Spec.ExposeViaService.ServiceType != "ClusterIP" { + t.Fatalf("expected ClusterIP exposure, got %q", dd.Spec.ExposeViaService.ServiceType) + } + if _, ok := interface{}(dd).(*previewv1.DocumentDB); !ok { + t.Fatalf("render did not produce *DocumentDB") + } +} + +func TestRenderTLSMixins(t *testing.T) { + for _, tc := range []struct { + path string + wantMode string + }{ + {"mixins/tls_disabled.yaml.template", "Disabled"}, + {"mixins/tls_selfsigned.yaml.template", "SelfSigned"}, + } { + vars := map[string]string{"NAMESPACE": "ns", "NAME": "c"} + dd, err := renderDocumentDB(tc.path, vars) + if err != nil { + t.Fatalf("render %s: %v", tc.path, err) + } + if dd.Spec.TLS == nil || dd.Spec.TLS.Gateway == nil || dd.Spec.TLS.Gateway.Mode != tc.wantMode { + t.Fatalf("%s: expected mode %q, got %+v", tc.path, tc.wantMode, dd.Spec.TLS) + } + } +} + +func TestRenderFeatureChangeStreams(t *testing.T) { + vars := map[string]string{"NAMESPACE": "ns", "NAME": "c"} + dd, err := renderDocumentDB("mixins/feature_changestreams.yaml.template", vars) + if err != nil { + t.Fatalf("render: %v", err) + } + if !dd.Spec.FeatureGates[previewv1.FeatureGateChangeStreams] { + t.Fatalf("expected ChangeStreams feature gate enabled, got %+v", dd.Spec.FeatureGates) + } +} + +// The following tests exercise the label-selector teardown contract and +// the AlreadyExists run-id mismatch error path. They use the +// controller-runtime fake client so they can run without a real +// Kubernetes API. + +func TestOwnershipLabels(t *testing.T) { +resetRunIDForTest() +SetRunID("abcd1234") +labels := ownershipLabels(FixtureSharedRO, "lifecycle") +if labels[LabelRunID] != "abcd1234" { +t.Fatalf("run-id label = %q", labels[LabelRunID]) +} +if labels[LabelFixture] != FixtureSharedRO { +t.Fatalf("fixture label = %q", labels[LabelFixture]) +} +if labels[LabelArea] != "lifecycle" { +t.Fatalf("area label = %q", labels[LabelArea]) +} +// Empty area must not be recorded at all. +if _, ok := ownershipLabels(FixtureSharedRO, "")[LabelArea]; ok { +t.Fatalf("area label present for empty area") +} +} + +func TestRunIDFirstWriterWins(t *testing.T) { +resetRunIDForTest() +SetRunID("first") +SetRunID("second") +if got := RunID(); got != "first" { +t.Fatalf("RunID after conflicting sets = %q, want %q", got, "first") +} +resetRunIDForTest() +if got := RunID(); got != "unset" { +t.Fatalf("reset RunID = %q, want \"unset\"", got) +} +} + +// newFakeClient builds a controller-runtime fake client registered for +// the core + preview schemes used by the fixtures helpers. +func newFakeClient(t *testing.T) *fakeclient.ClientBuilder { +t.Helper() +s := runtime.NewScheme() +if err := corev1.AddToScheme(s); err != nil { +t.Fatalf("corev1 AddToScheme: %v", err) +} +if err := previewv1.AddToScheme(s); err != nil { +t.Fatalf("previewv1 AddToScheme: %v", err) +} +return fakeclient.NewClientBuilder().WithScheme(s) +} + +func TestCreateLabeledNamespaceStampsLabels(t *testing.T) { +resetRunIDForTest() +SetRunID("r-create") +c := newFakeClient(t).Build() +if err := CreateLabeledNamespace(context.Background(), c, "ns-a", "lifecycle"); err != nil { +t.Fatalf("CreateLabeledNamespace: %v", err) +} +got := &corev1.Namespace{} +if err := c.Get(context.Background(), types.NamespacedName{Name: "ns-a"}, got); err != nil { +t.Fatalf("Get: %v", err) +} +if got.Labels[LabelRunID] != "r-create" || +got.Labels[LabelFixture] != FixturePerSpec || +got.Labels[LabelArea] != "lifecycle" { +t.Fatalf("unexpected labels: %v", got.Labels) +} +} + +func TestCreateLabeledNamespaceAdoptsMatchingRunID(t *testing.T) { +resetRunIDForTest() +SetRunID("r-adopt") +existing := &corev1.Namespace{ +ObjectMeta: metav1.ObjectMeta{ +Name: "ns-b", +Labels: map[string]string{LabelRunID: "r-adopt"}, +}, +} +c := newFakeClient(t).WithObjects(existing).Build() +if err := CreateLabeledNamespace(context.Background(), c, "ns-b", "lifecycle"); err != nil { +t.Fatalf("expected adoption on matching run-id, got: %v", err) +} +} + +func TestCreateLabeledNamespaceRejectsRunIDMismatch(t *testing.T) { +resetRunIDForTest() +SetRunID("r-current") +existing := &corev1.Namespace{ +ObjectMeta: metav1.ObjectMeta{ +Name: "ns-c", +Labels: map[string]string{LabelRunID: "r-stale"}, +}, +} +c := newFakeClient(t).WithObjects(existing).Build() +err := CreateLabeledNamespace(context.Background(), c, "ns-c", "lifecycle") +if err == nil { +t.Fatalf("expected collision error, got nil") +} +} + +func TestCreateLabeledCredentialSecret(t *testing.T) { +resetRunIDForTest() +SetRunID("r-sec") +c := newFakeClient(t).Build() +if err := CreateLabeledCredentialSecret(context.Background(), c, "ns-s"); err != nil { +t.Fatalf("CreateLabeledCredentialSecret: %v", err) +} +got := &corev1.Secret{} +if err := c.Get(context.Background(), types.NamespacedName{ +Namespace: "ns-s", Name: DefaultCredentialSecretName, +}, got); err != nil { +t.Fatalf("Get: %v", err) +} +if string(got.Data["username"]) != DefaultCredentialUsername { +// fake client promotes StringData to Data on read; both keys must match. +if got.StringData["username"] != DefaultCredentialUsername { +t.Fatalf("username mismatch: data=%q stringData=%q", +got.Data["username"], got.StringData["username"]) +} +} +if got.Labels[LabelRunID] != "r-sec" || got.Labels[LabelFixture] != FixturePerSpec { +t.Fatalf("unexpected labels: %v", got.Labels) +} +// Second call must not error even though the secret already exists. +if err := CreateLabeledCredentialSecret(context.Background(), c, "ns-s"); err != nil { +t.Fatalf("idempotent CreateLabeledCredentialSecret returned: %v", err) +} +} diff --git a/test/e2e/pkg/e2eutils/fixtures/shared_ro.go b/test/e2e/pkg/e2eutils/fixtures/shared_ro.go new file mode 100644 index 00000000..de9dd302 --- /dev/null +++ b/test/e2e/pkg/e2eutils/fixtures/shared_ro.go @@ -0,0 +1,100 @@ +package fixtures + +import ( + "context" + "fmt" + "sync" + + "sigs.k8s.io/controller-runtime/pkg/client" + + previewv1 "github.com/documentdb/documentdb-operator/api/preview" +) + +// SharedRONamespace returns the per-process namespace name used by the +// shared read-only fixture cluster. The name embeds the current RunID +// so concurrent runs (e.g., parallel CI jobs) cannot collide on the +// same namespace and stomp one another during teardown. +func SharedRONamespace() string { + return fmt.Sprintf("e2e-shared-ro-%s-%s", RunID(), procID()) +} + +// SharedROName is the DocumentDB CR name used by the shared read-only +// fixture cluster. +const SharedROName = "shared-ro" + +// SharedROHandle is a read-only proxy over the shared RO DocumentDB +// cluster. Callers must NOT mutate the underlying CR. The handle only +// exposes accessors; there are no Patch/Delete methods. +type SharedROHandle struct { + namespace string + name string +} + +// Namespace returns the namespace of the shared RO cluster. +func (h *SharedROHandle) Namespace() string { return h.namespace } + +// Name returns the name of the shared RO cluster. +func (h *SharedROHandle) Name() string { return h.name } + +// GetCR fetches a fresh copy of the underlying DocumentDB CR. The +// returned CR is a deep copy; mutating it has no effect on the live +// resource. Callers that try to Update/Patch the returned CR against +// the API server will succeed silently only if they re-use the real +// client — prefer to treat this as read-only. +func (h *SharedROHandle) GetCR(ctx context.Context, c client.Client) (*previewv1.DocumentDB, error) { + dd := &previewv1.DocumentDB{} + if err := c.Get(ctx, client.ObjectKey{Namespace: h.namespace, Name: h.name}, dd); err != nil { + return nil, fmt.Errorf("get shared-ro documentdb: %w", err) + } + return dd, nil +} + +var ( + sharedRO *SharedROHandle + sharedROOnce sync.Once + sharedROErr error +) + +// GetOrCreateSharedRO returns the session-scoped shared read-only +// DocumentDB fixture, creating it lazily on first call. Subsequent +// calls return the same handle. Errors are cached: a failed first +// attempt will not be retried within the same process. +func GetOrCreateSharedRO(ctx context.Context, c client.Client) (*SharedROHandle, error) { + sharedROOnce.Do(func() { + ns := SharedRONamespace() + if err := ensureNamespace(ctx, c, ns, FixtureSharedRO); err != nil { + sharedROErr = err + return + } + if err := ensureCredentialSecret(ctx, c, ns, defaultCredentialSecretName, FixtureSharedRO); err != nil { + sharedROErr = err + return + } + dd, err := renderDocumentDB("base/documentdb.yaml.template", baseVars(ns, SharedROName, "1")) + if err != nil { + sharedROErr = err + return + } + if err := createDocumentDB(ctx, c, dd, FixtureSharedRO); err != nil { + sharedROErr = err + return + } + if err := waitDocumentDBHealthy(ctx, c, ns, SharedROName, defaultFixtureCreateTimeout); err != nil { + sharedROErr = fmt.Errorf("waiting for shared-ro to become healthy: %w", err) + return + } + sharedRO = &SharedROHandle{namespace: ns, name: SharedROName} + }) + return sharedRO, sharedROErr +} + +// TeardownSharedRO deletes every resource stamped with +// (LabelRunID=RunID(), LabelFixture=FixtureSharedRO). This is +// label-selector-driven so a process that never called +// GetOrCreateSharedRO but observes leftover resources from a previous +// run can still clean up. Safe to call multiple times; callers should +// invoke it from SynchronizedAfterSuite. +func TeardownSharedRO(ctx context.Context, c client.Client) error { + sharedRO = nil + return teardownFixtureByLabels(ctx, c, FixtureSharedRO) +} diff --git a/test/e2e/pkg/e2eutils/fixtures/shared_scale.go b/test/e2e/pkg/e2eutils/fixtures/shared_scale.go new file mode 100644 index 00000000..fb69b4e8 --- /dev/null +++ b/test/e2e/pkg/e2eutils/fixtures/shared_scale.go @@ -0,0 +1,156 @@ +package fixtures + +import ( + "context" + "fmt" + "sync" + "time" + + cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/wait" + "sigs.k8s.io/controller-runtime/pkg/client" + + previewv1 "github.com/documentdb/documentdb-operator/api/preview" +) + +// SharedScaleNamespace returns the per-process namespace name used by +// the shared scale fixture cluster. The name embeds the current RunID +// so concurrent runs cannot collide on the same namespace during +// teardown. +func SharedScaleNamespace() string { + return fmt.Sprintf("e2e-shared-scale-%s-%s", RunID(), procID()) +} + +// SharedScaleName is the DocumentDB CR name used by the shared scale +// fixture cluster. +const SharedScaleName = "shared-scale" + +// sharedScaleInstances is the baseline InstancesPerNode value the +// scale fixture is created with and reset to between specs. +const sharedScaleInstances = 2 + +// SharedScaleHandle is the mutable handle to the shared scale +// DocumentDB cluster used by tests/scale/. Unlike SharedROHandle it +// exposes full access to the underlying CR and provides ResetToTwoInstances +// to restore state between specs. +type SharedScaleHandle struct { + namespace string + name string +} + +// Namespace returns the namespace of the shared scale cluster. +func (h *SharedScaleHandle) Namespace() string { return h.namespace } + +// Name returns the name of the shared scale cluster. +func (h *SharedScaleHandle) Name() string { return h.name } + +// GetCR fetches the current state of the underlying DocumentDB CR. +func (h *SharedScaleHandle) GetCR(ctx context.Context, c client.Client) (*previewv1.DocumentDB, error) { + dd := &previewv1.DocumentDB{} + if err := c.Get(ctx, client.ObjectKey{Namespace: h.namespace, Name: h.name}, dd); err != nil { + return nil, fmt.Errorf("get shared-scale documentdb: %w", err) + } + return dd, nil +} + +// ResetToTwoInstances restores the shared scale cluster to +// instancesPerNode=sharedScaleInstances (the default 2) and waits for +// both the operator's DocumentDB status to report healthy and the +// underlying CNPG Cluster's readyInstances to equal 2. Call from an +// AfterEach to leave the fixture in a known state for the next spec. +// +// The CNPG convergence wait is essential: the DocumentDB CR status +// can flip to Ready before the PostgreSQL layer has re-added the +// second replica, which would cause the next spec's scale assertions +// to observe a transient single-instance cluster. +func (h *SharedScaleHandle) ResetToTwoInstances(ctx context.Context, c client.Client) error { + dd := &previewv1.DocumentDB{} + if err := c.Get(ctx, client.ObjectKey{Namespace: h.namespace, Name: h.name}, dd); err != nil { + return fmt.Errorf("get shared-scale for reset: %w", err) + } + if dd.Spec.InstancesPerNode != sharedScaleInstances { + patch := client.MergeFrom(dd.DeepCopy()) + dd.Spec.InstancesPerNode = sharedScaleInstances + if err := c.Patch(ctx, dd, patch); err != nil { + return fmt.Errorf("patch shared-scale back to %d instances: %w", sharedScaleInstances, err) + } + } + if err := waitDocumentDBHealthy(ctx, c, h.namespace, h.name, defaultFixtureCreateTimeout); err != nil { + return err + } + return waitCNPGReadyInstances(ctx, c, h.namespace, h.name, sharedScaleInstances, defaultFixtureCreateTimeout) +} + +// waitCNPGReadyInstances polls the CNPG Cluster associated with the +// DocumentDB named (ns, name) until its Status.ReadyInstances matches +// want. The CNPG Cluster is assumed to carry the same name as the +// DocumentDB CR (the non-replicated convention used across the +// operator). +func waitCNPGReadyInstances(ctx context.Context, c client.Client, namespace, name string, want int, timeout time.Duration) error { + return wait.PollUntilContextTimeout(ctx, defaultPollInterval, timeout, true, func(ctx context.Context) (bool, error) { + cl := &cnpgv1.Cluster{} + err := c.Get(ctx, types.NamespacedName{Namespace: namespace, Name: name}, cl) + if err != nil { + if apierrors.IsNotFound(err) { + return false, nil + } + return false, fmt.Errorf("get CNPG cluster %s/%s: %w", namespace, name, err) + } + return cl.Status.ReadyInstances == want, nil + }) +} + +var ( + sharedScale *SharedScaleHandle + sharedScaleOnce sync.Once + sharedScaleErr error +) + +// GetOrCreateSharedScale returns the session-scoped shared scale +// DocumentDB fixture, creating it lazily on first call. Subsequent +// calls return the same handle. +func GetOrCreateSharedScale(ctx context.Context, c client.Client) (*SharedScaleHandle, error) { + sharedScaleOnce.Do(func() { + ns := SharedScaleNamespace() + if err := ensureNamespace(ctx, c, ns, FixtureSharedScale); err != nil { + sharedScaleErr = err + return + } + if err := ensureCredentialSecret(ctx, c, ns, defaultCredentialSecretName, FixtureSharedScale); err != nil { + sharedScaleErr = err + return + } + dd, err := renderDocumentDB( + "base/documentdb.yaml.template", + baseVars(ns, SharedScaleName, fmt.Sprintf("%d", sharedScaleInstances)), + ) + if err != nil { + sharedScaleErr = err + return + } + if err := createDocumentDB(ctx, c, dd, FixtureSharedScale); err != nil { + sharedScaleErr = err + return + } + if err := waitDocumentDBHealthy(ctx, c, ns, SharedScaleName, defaultFixtureCreateTimeout); err != nil { + sharedScaleErr = fmt.Errorf("waiting for shared-scale to become healthy: %w", err) + return + } + if err := waitCNPGReadyInstances(ctx, c, ns, SharedScaleName, sharedScaleInstances, defaultFixtureCreateTimeout); err != nil { + sharedScaleErr = fmt.Errorf("waiting for CNPG readyInstances=%d: %w", sharedScaleInstances, err) + return + } + sharedScale = &SharedScaleHandle{namespace: ns, name: SharedScaleName} + }) + return sharedScale, sharedScaleErr +} + +// TeardownSharedScale deletes every resource stamped with +// (LabelRunID=RunID(), LabelFixture=FixtureSharedScale). Safe to call +// multiple times; invoke from SynchronizedAfterSuite. +func TeardownSharedScale(ctx context.Context, c client.Client) error { + sharedScale = nil + return teardownFixtureByLabels(ctx, c, FixtureSharedScale) +} diff --git a/test/e2e/pkg/e2eutils/fixtures/teardown_test.go b/test/e2e/pkg/e2eutils/fixtures/teardown_test.go new file mode 100644 index 00000000..7c993f67 --- /dev/null +++ b/test/e2e/pkg/e2eutils/fixtures/teardown_test.go @@ -0,0 +1,177 @@ +package fixtures + +import ( + "context" + "strings" + "testing" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + previewv1 "github.com/documentdb/documentdb-operator/api/preview" +) + +func newFakeScheme(t *testing.T) *runtime.Scheme { + t.Helper() + s := runtime.NewScheme() + if err := corev1.AddToScheme(s); err != nil { + t.Fatalf("corev1: %v", err) + } + if err := previewv1.AddToScheme(s); err != nil { + t.Fatalf("preview: %v", err) + } + return s +} + +// TestTeardownFixtureByLabels_SelectsOnlyMatchingRun creates two sets +// of fixture objects belonging to different run ids and asserts +// teardownFixtureByLabels only removes those tagged with the current +// run id. +func TestTeardownFixtureByLabels_SelectsOnlyMatchingRun(t *testing.T) { + resetRunIDForTest() + SetRunID("runA") + + mineLabels := map[string]string{ + LabelRunID: "runA", + LabelFixture: FixtureSharedRO, + } + theirsLabels := map[string]string{ + LabelRunID: "runB", + LabelFixture: FixtureSharedRO, + } + + mineNS := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "e2e-mine", Labels: mineLabels}} + theirsNS := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "e2e-theirs", Labels: theirsLabels}} + mineDD := &previewv1.DocumentDB{ObjectMeta: metav1.ObjectMeta{ + Name: "dd-mine", Namespace: "e2e-mine", Labels: mineLabels, + }} + theirsDD := &previewv1.DocumentDB{ObjectMeta: metav1.ObjectMeta{ + Name: "dd-theirs", Namespace: "e2e-theirs", Labels: theirsLabels, + }} + + c := fake.NewClientBuilder(). + WithScheme(newFakeScheme(t)). + WithObjects(mineNS, theirsNS, mineDD, theirsDD). + Build() + + ctx := context.Background() + if err := teardownFixtureByLabels(ctx, c, FixtureSharedRO); err != nil { + t.Fatalf("teardown: %v", err) + } + + // Mine should be gone. + if err := c.Get(ctx, types.NamespacedName{Name: "e2e-mine"}, &corev1.Namespace{}); err == nil { + t.Fatalf("expected mine namespace to be deleted") + } + if err := c.Get(ctx, types.NamespacedName{Namespace: "e2e-mine", Name: "dd-mine"}, &previewv1.DocumentDB{}); err == nil { + t.Fatalf("expected mine documentdb to be deleted") + } + + // Theirs must survive. + if err := c.Get(ctx, types.NamespacedName{Name: "e2e-theirs"}, &corev1.Namespace{}); err != nil { + t.Fatalf("theirs namespace should still exist: %v", err) + } + if err := c.Get(ctx, types.NamespacedName{Namespace: "e2e-theirs", Name: "dd-theirs"}, &previewv1.DocumentDB{}); err != nil { + t.Fatalf("theirs documentdb should still exist: %v", err) + } +} + +// TestCreateDocumentDB_RunIDMismatchIsExplicitError exercises the +// adoption-refusal path: when an existing CR has a different run-id +// label the helper must return a descriptive error instead of silently +// adopting a foreign fixture. +func TestCreateDocumentDB_RunIDMismatchIsExplicitError(t *testing.T) { + resetRunIDForTest() + SetRunID("newrun") + + existing := &previewv1.DocumentDB{ + ObjectMeta: metav1.ObjectMeta{ + Name: "shared", + Namespace: "ns", + Labels: map[string]string{ + LabelRunID: "oldrun", + LabelFixture: FixtureSharedRO, + }, + }, + } + c := fake.NewClientBuilder(). + WithScheme(newFakeScheme(t)). + WithObjects(existing). + Build() + + attempt := &previewv1.DocumentDB{ObjectMeta: metav1.ObjectMeta{Name: "shared", Namespace: "ns"}} + err := createDocumentDB(context.Background(), c, attempt, FixtureSharedRO) + if err == nil { + t.Fatal("expected collision error, got nil") + } + if !strings.Contains(err.Error(), "fixture collision") { + t.Fatalf("expected 'fixture collision' error, got: %v", err) + } + if !strings.Contains(err.Error(), "oldrun") || !strings.Contains(err.Error(), "newrun") { + t.Fatalf("error should name both run ids: %v", err) + } +} + +// TestCreateDocumentDB_AdoptsMatchingRun ensures that an AlreadyExists +// result with a matching run-id label is treated as idempotent success +// (this is the lazy-fixture re-entry path). +func TestCreateDocumentDB_AdoptsMatchingRun(t *testing.T) { + resetRunIDForTest() + SetRunID("runX") + + existing := &previewv1.DocumentDB{ + ObjectMeta: metav1.ObjectMeta{ + Name: "shared", + Namespace: "ns", + Labels: map[string]string{ + LabelRunID: "runX", + LabelFixture: FixtureSharedRO, + }, + }, + } + c := fake.NewClientBuilder(). + WithScheme(newFakeScheme(t)). + WithObjects(existing). + Build() + + attempt := &previewv1.DocumentDB{ObjectMeta: metav1.ObjectMeta{Name: "shared", Namespace: "ns"}} + if err := createDocumentDB(context.Background(), c, attempt, FixtureSharedRO); err != nil { + t.Fatalf("expected idempotent success, got %v", err) + } +} + +// TestEnsureNamespace_RunIDMismatchIsExplicitError mirrors the CR test +// for namespace-level collisions. +func TestEnsureNamespace_RunIDMismatchIsExplicitError(t *testing.T) { + resetRunIDForTest() + SetRunID("newrun") + + existing := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ns", + Labels: map[string]string{ + LabelRunID: "oldrun", + LabelFixture: FixtureSharedRO, + }, + }, + } + c := fake.NewClientBuilder(). + WithScheme(newFakeScheme(t)). + WithObjects(existing). + Build() + + err := ensureNamespace(context.Background(), c, "ns", FixtureSharedRO) + if err == nil { + t.Fatal("expected collision error, got nil") + } + if !strings.Contains(err.Error(), "fixture collision") { + t.Fatalf("want fixture collision, got: %v", err) + } +} + +// Silence unused-import warnings if client is otherwise unused. +var _ client.Client = (client.Client)(nil) diff --git a/test/e2e/pkg/e2eutils/helmop/helmop.go b/test/e2e/pkg/e2eutils/helmop/helmop.go new file mode 100644 index 00000000..ebc7033e --- /dev/null +++ b/test/e2e/pkg/e2eutils/helmop/helmop.go @@ -0,0 +1,192 @@ +// Package helmop provides thin wrappers around the `helm` CLI for the +// DocumentDB E2E upgrade specs. The upgrade area owns its own operator +// install — it installs a previous-released chart, deploys a DocumentDB, +// then upgrades the chart to the PR's build — so these helpers are +// disruptive by design and must only be used from specs running with +// `ginkgo -procs=1`. +// +// The helpers shell out to the `helm` binary on PATH. Required CLI: +// `helm` v3.13+ (Helm 3 with `upgrade --install`, `--wait`, and +// `--version` behavior used here). No in-process Helm SDK dependency is +// pulled in so the test module footprint stays small. +// +// Typical flow from a spec: +// +// _ = helmop.Uninstall(ctx, "documentdb-operator", "documentdb-operator") +// Expect(helmop.Install(ctx, "documentdb-operator", "documentdb-operator", +// "documentdb/documentdb-operator", "0.1.2", nil)).To(Succeed()) +// Expect(helmop.WaitOperatorReady(ctx, env, "documentdb-operator", +// 2*time.Minute)).To(Succeed()) +// Expect(helmop.Upgrade(ctx, "documentdb-operator", "documentdb-operator", +// "/path/to/pr-chart", "", nil)).To(Succeed()) +package helmop + +import ( + "bytes" + "context" + "fmt" + "os/exec" + "sort" + "time" + + corev1 "k8s.io/api/core/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/cloudnative-pg/cloudnative-pg/tests/utils/environment" + + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/operatorhealth" +) + +// DefaultTimeout bounds every helm invocation. Individual callers may +// pass a context with a tighter deadline. +const DefaultTimeout = 10 * time.Minute + +// run executes the helm CLI with the supplied args. stdout/stderr are +// merged so error messages from helm are surfaced verbatim. +func run(ctx context.Context, args ...string) error { + if _, err := exec.LookPath("helm"); err != nil { + return fmt.Errorf("helm CLI not found on PATH: %w", err) + } + cctx := ctx + if _, hasDeadline := ctx.Deadline(); !hasDeadline { + var cancel context.CancelFunc + cctx, cancel = context.WithTimeout(ctx, DefaultTimeout) + defer cancel() + } + cmd := exec.CommandContext(cctx, "helm", args...) + var out bytes.Buffer + cmd.Stdout = &out + cmd.Stderr = &out + if err := cmd.Run(); err != nil { + return fmt.Errorf("helm %v: %w\n---helm output---\n%s", args, err, out.String()) + } + return nil +} + +// setFlags renders a values map to deterministic `--set key=value` +// arguments. Sorted by key so command lines are reproducible in logs. +func setFlags(values map[string]string) []string { + if len(values) == 0 { + return nil + } + keys := make([]string, 0, len(values)) + for k := range values { + keys = append(keys, k) + } + sort.Strings(keys) + args := make([]string, 0, 2*len(keys)) + for _, k := range keys { + args = append(args, "--set", fmt.Sprintf("%s=%s", k, values[k])) + } + return args +} + +// Install installs the chart at the given release name / namespace. The +// namespace is created if absent. version may be empty to use the +// latest chart version reachable from the repo/URL. values is an +// optional `--set key=value` map. +func Install(ctx context.Context, releaseName, namespace, chart, version string, values map[string]string) error { + if releaseName == "" || namespace == "" || chart == "" { + return fmt.Errorf("helmop.Install: releaseName, namespace and chart are required") + } + args := []string{ + "install", releaseName, chart, + "--namespace", namespace, + "--create-namespace", + "--wait", + } + if version != "" { + args = append(args, "--version", version) + } + args = append(args, setFlags(values)...) + return run(ctx, args...) +} + +// Upgrade upgrades an existing release, or installs it if the release +// is absent (helm upgrade --install semantics). +func Upgrade(ctx context.Context, releaseName, namespace, chart, version string, values map[string]string) error { + if releaseName == "" || namespace == "" || chart == "" { + return fmt.Errorf("helmop.Upgrade: releaseName, namespace and chart are required") + } + args := []string{ + "upgrade", "--install", releaseName, chart, + "--namespace", namespace, + "--create-namespace", + "--wait", + } + if version != "" { + args = append(args, "--version", version) + } + args = append(args, setFlags(values)...) + return run(ctx, args...) +} + +// Uninstall removes a release. A missing release is not an error so +// callers can use Uninstall as an idempotent reset. +func Uninstall(ctx context.Context, releaseName, namespace string) error { + if releaseName == "" || namespace == "" { + return fmt.Errorf("helmop.Uninstall: releaseName and namespace are required") + } + err := run(ctx, "uninstall", releaseName, "--namespace", namespace, "--wait", "--ignore-not-found") + return err +} + +// WaitOperatorReady polls the operator namespace until a pod with the +// operator label is Ready or the timeout expires. It deliberately +// reuses operatorhealth's label selector so callers observe the same +// pod the churn gate watches. +func WaitOperatorReady(ctx context.Context, env *environment.TestingEnvironment, namespace string, timeout time.Duration) error { + if env == nil || env.Client == nil { + return fmt.Errorf("helmop.WaitOperatorReady: nil env/client") + } + if namespace == "" { + return fmt.Errorf("helmop.WaitOperatorReady: namespace required") + } + deadline := time.Now().Add(timeout) + const poll = 3 * time.Second + var lastReason string + for { + ready, reason, err := operatorReadyOnce(ctx, env.Client, namespace) + if err == nil && ready { + return nil + } + if err != nil { + lastReason = err.Error() + } else { + lastReason = reason + } + if time.Now().After(deadline) { + return fmt.Errorf("operator pod in %q not ready after %s: %s", namespace, timeout, lastReason) + } + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(poll): + } + } +} + +func operatorReadyOnce(ctx context.Context, c client.Client, namespace string) (bool, string, error) { + var pods corev1.PodList + if err := c.List(ctx, &pods, + client.InNamespace(namespace), + client.MatchingLabels{operatorhealth.PodLabelKey: operatorhealth.PodLabelValue}, + ); err != nil { + return false, "", fmt.Errorf("list operator pods: %w", err) + } + if len(pods.Items) == 0 { + return false, "no operator pods yet", nil + } + for i := range pods.Items { + p := &pods.Items[i] + if p.Status.Phase != corev1.PodRunning { + continue + } + for _, cond := range p.Status.Conditions { + if cond.Type == corev1.PodReady && cond.Status == corev1.ConditionTrue { + return true, "", nil + } + } + } + return false, fmt.Sprintf("%d operator pod(s) present but none Ready", len(pods.Items)), nil +} diff --git a/test/e2e/pkg/e2eutils/helmop/helmop_test.go b/test/e2e/pkg/e2eutils/helmop/helmop_test.go new file mode 100644 index 00000000..eb666e5f --- /dev/null +++ b/test/e2e/pkg/e2eutils/helmop/helmop_test.go @@ -0,0 +1,69 @@ +package helmop + +import ( + "context" + "testing" + "time" +) + +func TestSetFlagsDeterministic(t *testing.T) { + t.Parallel() + got := setFlags(map[string]string{"b": "2", "a": "1", "c": "3"}) + want := []string{"--set", "a=1", "--set", "b=2", "--set", "c=3"} + if len(got) != len(want) { + t.Fatalf("setFlags length = %d, want %d (%v)", len(got), len(want), got) + } + for i := range got { + if got[i] != want[i] { + t.Fatalf("setFlags[%d] = %q, want %q", i, got[i], want[i]) + } + } +} + +func TestSetFlagsEmpty(t *testing.T) { + t.Parallel() + if got := setFlags(nil); got != nil { + t.Fatalf("setFlags(nil) = %v, want nil", got) + } + if got := setFlags(map[string]string{}); got != nil { + t.Fatalf("setFlags(empty) = %v, want nil", got) + } +} + +func TestInstallRejectsEmptyArgs(t *testing.T) { + t.Parallel() + cases := []struct{ rel, ns, chart string }{ + {"", "ns", "chart"}, + {"rel", "", "chart"}, + {"rel", "ns", ""}, + } + for _, c := range cases { + if err := Install(context.Background(), c.rel, c.ns, c.chart, "", nil); err == nil { + t.Errorf("Install(%q,%q,%q) = nil, want error", c.rel, c.ns, c.chart) + } + } +} + +func TestUpgradeRejectsEmptyArgs(t *testing.T) { + t.Parallel() + if err := Upgrade(context.Background(), "", "ns", "chart", "", nil); err == nil { + t.Error("Upgrade with empty release = nil, want error") + } +} + +func TestUninstallRejectsEmptyArgs(t *testing.T) { + t.Parallel() + if err := Uninstall(context.Background(), "", "ns"); err == nil { + t.Error("Uninstall with empty release = nil, want error") + } + if err := Uninstall(context.Background(), "rel", ""); err == nil { + t.Error("Uninstall with empty namespace = nil, want error") + } +} + +func TestWaitOperatorReadyNilEnv(t *testing.T) { + t.Parallel() + if err := WaitOperatorReady(context.Background(), nil, "ns", time.Millisecond); err == nil { + t.Error("WaitOperatorReady(nil env) = nil, want error") + } +} diff --git a/test/e2e/pkg/e2eutils/mongo/.keep b/test/e2e/pkg/e2eutils/mongo/.keep new file mode 100644 index 00000000..e69de29b diff --git a/test/e2e/pkg/e2eutils/mongo/client.go b/test/e2e/pkg/e2eutils/mongo/client.go new file mode 100644 index 00000000..ce296ff4 --- /dev/null +++ b/test/e2e/pkg/e2eutils/mongo/client.go @@ -0,0 +1,221 @@ +// Package mongo provides thin helpers for the DocumentDB E2E suite to +// connect to a DocumentDB gateway endpoint using the official +// mongo-driver/v2 client. It is intentionally minimal: URI construction +// with proper credential escaping, connect/ping, seeding, counting, and +// database cleanup. +package mongo + +import ( + "context" + "crypto/tls" + "crypto/x509" + "errors" + "fmt" + "net/url" + "time" + + "go.mongodb.org/mongo-driver/v2/bson" + "go.mongodb.org/mongo-driver/v2/mongo" + "go.mongodb.org/mongo-driver/v2/mongo/options" +) + +// DefaultConnectTimeout is applied to mongo.Connect when the caller does +// not provide a deadline on the context. +const DefaultConnectTimeout = 10 * time.Second + +// ClientOptions describes the parameters required to reach a DocumentDB +// gateway. All fields are required except TLSInsecure (ignored when TLS +// is false) and AuthDB (defaults to "admin"). +type ClientOptions struct { + // Host is the DocumentDB gateway hostname or IP. + Host string + // Port is the DocumentDB gateway TCP port. + Port string + // User is the plain (un-escaped) username. + User string + // Password is the plain (un-escaped) password. + Password string + // TLS toggles transport TLS on the connection. + TLS bool + // TLSInsecure skips certificate verification when TLS is true. It is + // only appropriate for tests against self-signed certificates that + // are not trusted via RootCAs. Mutually exclusive in practice with + // RootCAs/CABundlePEM: if both are set, RootCAs wins and + // InsecureSkipVerify is not applied. + TLSInsecure bool + // RootCAs, when non-nil and TLS is true, is used as the trust store + // for server-certificate verification. Takes precedence over + // CABundlePEM if both are set. + RootCAs *x509.CertPool + // CABundlePEM, when non-empty and RootCAs is nil, is parsed into a + // one-off CertPool used as the trust store for server-certificate + // verification. Convenience for callers that already have the PEM + // bytes (e.g., from a kubernetes.io/tls Secret). + CABundlePEM []byte + // ServerName is the expected hostname presented by the server for + // SNI + hostname verification. Defaults to Host when empty. Set + // explicitly when connecting through a port-forward (where Host is + // 127.0.0.1 but the cert is issued for a Service DNS name). + ServerName string + // AuthDB is the authentication database (authSource). Defaults to + // "admin" when empty. + AuthDB string +} + +// BuildURI constructs the mongodb:// URI that NewClient would use. It is +// exported to make credential escaping, TLS flag, and authSource +// behaviour directly unit-testable without spinning up a server. +func BuildURI(opts ClientOptions) (string, error) { + if opts.Host == "" { + return "", errors.New("mongo: Host is required") + } + if opts.Port == "" { + return "", errors.New("mongo: Port is required") + } + if opts.User == "" { + return "", errors.New("mongo: User is required") + } + authDB := opts.AuthDB + if authDB == "" { + authDB = "admin" + } + u := url.QueryEscape(opts.User) + p := url.QueryEscape(opts.Password) + tlsFlag := "false" + if opts.TLS { + tlsFlag = "true" + } + // authSource is a URL query parameter; url.QueryEscape keeps it safe + // for names containing reserved characters. + return fmt.Sprintf( + "mongodb://%s:%s@%s:%s/?tls=%s&authSource=%s", + u, p, opts.Host, opts.Port, tlsFlag, url.QueryEscape(authDB), + ), nil +} + +// NewClient builds a connected *mongo.Client against the endpoint +// described by opts. The caller owns the returned client and is +// responsible for calling Disconnect. +// +// Connect time is bounded by DefaultConnectTimeout via the driver's +// SetConnectTimeout option. mongo-driver/v2 Connect is lazy, so +// callers who need a post-connect round-trip must call Ping (or +// pingWithRetry from connect.go) themselves. +func NewClient(_ context.Context, opts ClientOptions) (*mongo.Client, error) { + uri, err := BuildURI(opts) + if err != nil { + return nil, err + } + co := options.Client().ApplyURI(uri).SetConnectTimeout(DefaultConnectTimeout) + if opts.TLS { + tlsCfg, terr := buildTLSConfig(opts) + if terr != nil { + return nil, terr + } + if tlsCfg != nil { + co.SetTLSConfig(tlsCfg) + } + } + c, err := mongo.Connect(co) + if err != nil { + return nil, fmt.Errorf("mongo: connect: %w", err) + } + return c, nil +} + +// buildTLSConfig assembles a *tls.Config for the driver. Priority: +// +// 1. RootCAs, if non-nil — use as trust store. +// 2. CABundlePEM, if non-empty — parse into a fresh pool. +// 3. TLSInsecure — skip verification entirely. +// +// Returns (nil, nil) when TLS is on but none of the above are set; the +// driver then falls back to the system trust store (default behaviour). +// ServerName is propagated when set so callers can overcome SNI +// mismatch in port-forward scenarios. +func buildTLSConfig(opts ClientOptions) (*tls.Config, error) { + cfg := &tls.Config{MinVersion: tls.VersionTLS12} + if opts.ServerName != "" { + cfg.ServerName = opts.ServerName + } + switch { + case opts.RootCAs != nil: + cfg.RootCAs = opts.RootCAs + return cfg, nil + case len(opts.CABundlePEM) > 0: + pool := x509.NewCertPool() + if !pool.AppendCertsFromPEM(opts.CABundlePEM) { + return nil, errors.New("mongo: CABundlePEM contained no parseable certificates") + } + cfg.RootCAs = pool + return cfg, nil + case opts.TLSInsecure: + cfg.InsecureSkipVerify = true //nolint:gosec // tests only, self-signed gateway + return cfg, nil + } + // TLS on, no CA and not insecure: return a minimal config that + // still honours a user-supplied ServerName but otherwise defers to + // the driver/system trust store. + if cfg.ServerName != "" { + return cfg, nil + } + return nil, nil +} + +// Ping issues a server-selection + hello roundtrip, using the context +// for cancellation/deadline propagation. +func Ping(ctx context.Context, c *mongo.Client) error { + if c == nil { + return errors.New("mongo: nil client") + } + if err := c.Ping(ctx, nil); err != nil { + return fmt.Errorf("mongo: ping: %w", err) + } + return nil +} + +// Seed inserts docs into db.coll via InsertMany and returns the number +// of documents accepted by the server. +func Seed(ctx context.Context, c *mongo.Client, db, coll string, docs []bson.M) (int, error) { + if c == nil { + return 0, errors.New("mongo: nil client") + } + if len(docs) == 0 { + return 0, nil + } + anyDocs := make([]any, len(docs)) + for i := range docs { + anyDocs[i] = docs[i] + } + res, err := c.Database(db).Collection(coll).InsertMany(ctx, anyDocs) + if err != nil { + return 0, fmt.Errorf("mongo: seed %s.%s: %w", db, coll, err) + } + return len(res.InsertedIDs), nil +} + +// Count returns the number of documents in db.coll matching filter. +func Count(ctx context.Context, c *mongo.Client, db, coll string, filter bson.M) (int64, error) { + if c == nil { + return 0, errors.New("mongo: nil client") + } + if filter == nil { + filter = bson.M{} + } + n, err := c.Database(db).Collection(coll).CountDocuments(ctx, filter) + if err != nil { + return 0, fmt.Errorf("mongo: count %s.%s: %w", db, coll, err) + } + return n, nil +} + +// DropDatabase drops the named database. A nil client returns an error. +func DropDatabase(ctx context.Context, c *mongo.Client, db string) error { + if c == nil { + return errors.New("mongo: nil client") + } + if err := c.Database(db).Drop(ctx); err != nil { + return fmt.Errorf("mongo: drop %s: %w", db, err) + } + return nil +} diff --git a/test/e2e/pkg/e2eutils/mongo/client_test.go b/test/e2e/pkg/e2eutils/mongo/client_test.go new file mode 100644 index 00000000..1248e76a --- /dev/null +++ b/test/e2e/pkg/e2eutils/mongo/client_test.go @@ -0,0 +1,207 @@ +package mongo + +import ( + "crypto/ecdsa" + "crypto/elliptic" + "crypto/rand" + "crypto/x509" + "crypto/x509/pkix" + "encoding/pem" + "math/big" + "strings" + "testing" + "time" +) + +func TestBuildURI_Basic(t *testing.T) { + t.Parallel() + got, err := BuildURI(ClientOptions{ + Host: "gw.example", Port: "10260", User: "alice", Password: "secret", + }) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + want := "mongodb://alice:secret@gw.example:10260/?tls=false&authSource=admin" + if got != want { + t.Fatalf("uri mismatch:\n got=%s\nwant=%s", got, want) + } +} + +func TestBuildURI_EscapesCreds(t *testing.T) { + t.Parallel() + got, err := BuildURI(ClientOptions{ + Host: "h", Port: "1", User: "a@b", Password: "p@ss:w/rd?&", + }) + if err != nil { + t.Fatalf("err: %v", err) + } + // '@', ':', '/', '?', '&' must all be percent-encoded so the driver + // doesn't mis-parse the URI. + for _, bad := range []string{"a@b:", "@ss:", "w/rd?", "?&@"} { + if strings.Contains(got, bad) { + t.Fatalf("uri must escape %q; got %s", bad, got) + } + } + if !strings.Contains(got, "a%40b") { + t.Fatalf("expected user to contain 'a%%40b'; got %s", got) + } + if !strings.Contains(got, "p%40ss%3Aw%2Frd%3F%26") { + t.Fatalf("expected escaped password; got %s", got) + } +} + +func TestBuildURI_TLSFlag(t *testing.T) { + t.Parallel() + on, _ := BuildURI(ClientOptions{Host: "h", Port: "1", User: "u", Password: "p", TLS: true}) + if !strings.Contains(on, "tls=true") { + t.Fatalf("expected tls=true, got %s", on) + } + off, _ := BuildURI(ClientOptions{Host: "h", Port: "1", User: "u", Password: "p", TLS: false}) + if !strings.Contains(off, "tls=false") { + t.Fatalf("expected tls=false, got %s", off) + } +} + +func TestBuildURI_AuthDBOverride(t *testing.T) { + t.Parallel() + got, _ := BuildURI(ClientOptions{ + Host: "h", Port: "1", User: "u", Password: "p", AuthDB: "mydb", + }) + if !strings.Contains(got, "authSource=mydb") { + t.Fatalf("expected authSource=mydb; got %s", got) + } + def, _ := BuildURI(ClientOptions{Host: "h", Port: "1", User: "u", Password: "p"}) + if !strings.Contains(def, "authSource=admin") { + t.Fatalf("expected default authSource=admin; got %s", def) + } +} + +func TestBuildURI_MissingRequired(t *testing.T) { + t.Parallel() + cases := []ClientOptions{ + {Port: "1", User: "u"}, + {Host: "h", User: "u"}, + {Host: "h", Port: "1"}, + } + for i, c := range cases { + if _, err := BuildURI(c); err == nil { + t.Fatalf("case %d: expected error for incomplete opts %+v", i, c) + } + } +} + +// mintSelfSignedPEM returns a short-lived self-signed cert's PEM bytes. +// Used only to feed buildTLSConfig a PEM it can parse; we never need to +// serve TLS from it. +func mintSelfSignedPEM(t *testing.T) []byte { + t.Helper() + key, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader) + if err != nil { + t.Fatalf("generate key: %v", err) + } + tmpl := &x509.Certificate{ + SerialNumber: big.NewInt(1), + Subject: pkix.Name{CommonName: "test"}, + NotBefore: time.Now().Add(-time.Minute), + NotAfter: time.Now().Add(time.Hour), + KeyUsage: x509.KeyUsageCertSign | x509.KeyUsageDigitalSignature, + IsCA: true, + BasicConstraintsValid: true, + } + der, err := x509.CreateCertificate(rand.Reader, tmpl, tmpl, &key.PublicKey, key) + if err != nil { + t.Fatalf("create cert: %v", err) + } + return pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: der}) +} + +func TestBuildTLSConfig_RootCAsTakesPriority(t *testing.T) { + t.Parallel() + pool := x509.NewCertPool() + cfg, err := buildTLSConfig(ClientOptions{ + TLS: true, + RootCAs: pool, + CABundlePEM: []byte("ignored"), + TLSInsecure: true, + ServerName: "localhost", + }) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if cfg == nil { + t.Fatal("expected non-nil config") + } + if cfg.RootCAs != pool { + t.Fatal("RootCAs must be the supplied pool, not a parsed bundle") + } + if cfg.InsecureSkipVerify { + t.Fatal("InsecureSkipVerify must not be set when RootCAs is supplied") + } + if cfg.ServerName != "localhost" { + t.Fatalf("ServerName = %q, want localhost", cfg.ServerName) + } + if cfg.MinVersion != 0x0303 { // TLS 1.2 + t.Fatalf("MinVersion = %x, want TLS 1.2", cfg.MinVersion) + } +} + +func TestBuildTLSConfig_CABundlePEMParsed(t *testing.T) { + t.Parallel() + pemBytes := mintSelfSignedPEM(t) + cfg, err := buildTLSConfig(ClientOptions{ + TLS: true, + CABundlePEM: pemBytes, + }) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if cfg == nil || cfg.RootCAs == nil { + t.Fatal("expected RootCAs parsed from PEM") + } +} + +func TestBuildTLSConfig_CABundlePEMInvalid(t *testing.T) { + t.Parallel() + if _, err := buildTLSConfig(ClientOptions{ + TLS: true, + CABundlePEM: []byte("not a real pem"), + }); err == nil { + t.Fatal("expected error for unparseable CABundlePEM") + } +} + +func TestBuildTLSConfig_Insecure(t *testing.T) { + t.Parallel() + cfg, err := buildTLSConfig(ClientOptions{TLS: true, TLSInsecure: true}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if cfg == nil || !cfg.InsecureSkipVerify { + t.Fatal("expected InsecureSkipVerify=true") + } +} + +func TestBuildTLSConfig_NilWhenNoHintsAndNoServerName(t *testing.T) { + t.Parallel() + cfg, err := buildTLSConfig(ClientOptions{TLS: true}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if cfg != nil { + t.Fatalf("expected nil config when no CA/insecure/ServerName supplied, got %+v", cfg) + } +} + +func TestBuildTLSConfig_ServerNameOnlyReturnsConfig(t *testing.T) { + t.Parallel() + cfg, err := buildTLSConfig(ClientOptions{TLS: true, ServerName: "gw.example"}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if cfg == nil || cfg.ServerName != "gw.example" { + t.Fatalf("expected ServerName preserved, got %+v", cfg) + } + if cfg.RootCAs != nil || cfg.InsecureSkipVerify { + t.Fatal("ServerName-only config must not set RootCAs or InsecureSkipVerify") + } +} diff --git a/test/e2e/pkg/e2eutils/mongo/connect.go b/test/e2e/pkg/e2eutils/mongo/connect.go new file mode 100644 index 00000000..0c7a7189 --- /dev/null +++ b/test/e2e/pkg/e2eutils/mongo/connect.go @@ -0,0 +1,268 @@ +// Package mongo — connect.go provides a high-level helper that opens a +// port-forward to a DocumentDB gateway Service, reads credentials from +// the standard "documentdb-credentials" secret in the CR's namespace, +// and returns a connected mongo-driver client wrapped in a [Handle] +// that also owns the port-forward lifetime. +// +// This helper intentionally lives outside pkg/e2eutils/fixtures to +// avoid an import cycle: fixtures creates the CR + secret; mongo is +// the pure data-plane helper callers reach for in `It` blocks. +package mongo + +import ( + "context" + "crypto/x509" + "errors" + "fmt" + "net" + "strconv" + "time" + + "github.com/cloudnative-pg/cloudnative-pg/tests/utils/environment" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" + driver "go.mongodb.org/mongo-driver/v2/mongo" + + previewv1 "github.com/documentdb/documentdb-operator/api/preview" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/portforward" +) + +// DefaultCredentialSecretName is the secret name the shared fixtures +// create to hold gateway credentials. Kept in sync with +// fixtures.DefaultCredentialSecretName; duplicated here to avoid a +// circular import. +const DefaultCredentialSecretName = "documentdb-credentials" + +// Handle owns a live mongo-driver client plus the port-forward that +// backs it. Callers must invoke Close when done; failing to do so +// leaks a local port-forward goroutine. +type Handle struct { + client *driver.Client + stop func() error +} + +// Client returns the underlying mongo-driver client. Prefer Database +// for per-spec isolation. +func (h *Handle) Client() *driver.Client { return h.client } + +// Database is a pass-through to the underlying driver client. +func (h *Handle) Database(name string) *driver.Database { + return h.client.Database(name) +} + +// Close disconnects the mongo client and tears down the port-forward. +// Safe to call on a nil handle. Returns the first non-nil error +// observed across (Disconnect, port-forward shutdown). +func (h *Handle) Close(ctx context.Context) error { + if h == nil { + return nil + } + var derr error + if h.client != nil { + derr = h.client.Disconnect(ctx) + } + var serr error + if h.stop != nil { + serr = h.stop() + } + if derr != nil { + return derr + } + return serr +} + +// connectRetryTimeout bounds the post-port-forward ping/retry loop +// because forwardconnection's goroutine takes a brief moment to bind +// the chosen local port. 10s @ 100ms backoff absorbs ~100 attempts, +// which is plenty for the typical <1s bind delay without stretching +// the happy path. +const ( + connectRetryTimeout = 10 * time.Second + connectRetryBackoff = 100 * time.Millisecond +) + +// ConnectOption customises NewFromDocumentDB. Options are composable +// and apply in the order supplied; later options overwrite earlier +// ones for the same field. +type ConnectOption func(*connectConfig) + +type connectConfig struct { + rootCAs *x509.CertPool + caBundlePEM []byte + serverName string + tlsInsecure bool +} + +// WithRootCAs pins the trust store used for server-certificate +// verification to the given pool. Prefer this over WithCABundlePEM +// when you already have a *x509.CertPool assembled. +func WithRootCAs(pool *x509.CertPool) ConnectOption { + return func(c *connectConfig) { c.rootCAs = pool; c.tlsInsecure = false } +} + +// WithCABundlePEM pins the trust store to a CA bundle parsed from PEM +// bytes. Convenient for callers reading ca.crt out of a Secret. +func WithCABundlePEM(pem []byte) ConnectOption { + return func(c *connectConfig) { c.caBundlePEM = pem; c.tlsInsecure = false } +} + +// WithServerName overrides the TLS SNI + hostname-verification target. +// Use when connecting through a port-forward where Host is 127.0.0.1 +// but the server certificate was issued for a Service DNS name. +func WithServerName(name string) ConnectOption { + return func(c *connectConfig) { c.serverName = name } +} + +// WithTLSInsecure turns off server-certificate verification. It is the +// default when no ConnectOption is supplied, preserving legacy +// behaviour; callers that want CA verification must pass WithRootCAs +// or WithCABundlePEM explicitly. +func WithTLSInsecure() ConnectOption { + return func(c *connectConfig) { + c.tlsInsecure = true + c.rootCAs = nil + c.caBundlePEM = nil + } +} + +// NewFromDocumentDB builds a connected Handle against the DocumentDB CR +// identified by (namespace, name). It: +// +// 1. Reads the CR and the "documentdb-credentials" secret from the +// same namespace. +// 2. Picks a free local TCP port. +// 3. Opens a port-forward to the gateway Service via the portforward +// helper (using OpenWithErr so teardown surfaces forwarder errors). +// 4. Connects the mongo-driver client with TLS; verification mode is +// controlled by opts (default: InsecureSkipVerify for backwards +// compatibility with the historical gateway self-signed cert). +// 5. Pings with retry until the port-forward is reachable or +// connectRetryTimeout elapses. +func NewFromDocumentDB( + ctx context.Context, + env *environment.TestingEnvironment, + namespace, name string, + opts ...ConnectOption, +) (*Handle, error) { + if env == nil || env.Client == nil { + return nil, errors.New("mongo: NewFromDocumentDB requires a non-nil TestingEnvironment") + } + + cfg := connectConfig{tlsInsecure: true} + for _, o := range opts { + o(&cfg) + } + + dd := &previewv1.DocumentDB{} + if err := env.Client.Get(ctx, types.NamespacedName{Namespace: namespace, Name: name}, dd); err != nil { + return nil, fmt.Errorf("get DocumentDB %s/%s: %w", namespace, name, err) + } + + user, pass, err := readCredentialSecret(ctx, env, namespace) + if err != nil { + return nil, err + } + + lp, err := pickFreePort() + if err != nil { + return nil, fmt.Errorf("mongo: pick free port: %w", err) + } + + stop, err := portforward.OpenWithErr(ctx, env, dd, lp) + if err != nil { + return nil, fmt.Errorf("mongo: open port-forward: %w", err) + } + + c, err := NewClient(ctx, ClientOptions{ + Host: "127.0.0.1", + Port: strconv.Itoa(lp), + User: user, + Password: pass, + TLS: true, + TLSInsecure: cfg.tlsInsecure, + RootCAs: cfg.rootCAs, + CABundlePEM: cfg.caBundlePEM, + ServerName: cfg.serverName, + AuthDB: "admin", + }) + if err != nil { + _ = stop() + return nil, fmt.Errorf("mongo: connect: %w", err) + } + + // pingWithRetry owns the post-port-forward connection-refused + // window. No pre-ping sleep is needed: the retry loop at + // connectRetryBackoff cadence covers the forwarder bind delay. + if err := pingWithRetry(ctx, c, connectRetryTimeout); err != nil { + _ = c.Disconnect(ctx) + _ = stop() + return nil, fmt.Errorf("mongo: post-connect ping: %w", err) + } + + return &Handle{client: c, stop: stop}, nil +} + +// readCredentialSecret fetches username/password from the fixture +// credential secret. The secret is expected to have keys "username" +// and "password". +func readCredentialSecret( + ctx context.Context, + env *environment.TestingEnvironment, + namespace string, +) (string, string, error) { + sec := &corev1.Secret{} + err := env.Client.Get(ctx, types.NamespacedName{ + Namespace: namespace, Name: DefaultCredentialSecretName, + }, sec) + if err != nil { + return "", "", fmt.Errorf("get credential secret %s/%s: %w", + namespace, DefaultCredentialSecretName, err) + } + u := string(sec.Data["username"]) + p := string(sec.Data["password"]) + if u == "" || p == "" { + return "", "", fmt.Errorf("credential secret %s/%s missing username/password", + namespace, DefaultCredentialSecretName) + } + return u, p, nil +} + +// pickFreePort asks the kernel for an unused TCP port by binding ":0" +// and immediately closing the listener. There is a narrow race window +// between Close and the port-forward goroutine binding the same port; +// pingWithRetry absorbs that window without a fixed pre-ping sleep. +func pickFreePort() (int, error) { + l, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + return 0, err + } + defer func() { _ = l.Close() }() + return l.Addr().(*net.TCPAddr).Port, nil +} + +// pingWithRetry polls Ping until it succeeds or timeout elapses. The +// port-forward goroutine needs a moment to bind the local port, so the +// first few pings may fail with "connection refused". Short backoff +// (connectRetryBackoff) keeps the happy path fast while still covering +// slow CI nodes via the overall timeout budget. +func pingWithRetry(ctx context.Context, c *driver.Client, timeout time.Duration) error { + deadline := time.Now().Add(timeout) + var last error + for { + pingCtx, cancel := context.WithTimeout(ctx, 3*time.Second) + err := c.Ping(pingCtx, nil) + cancel() + if err == nil { + return nil + } + last = err + if time.Now().After(deadline) { + return fmt.Errorf("ping did not succeed within %s: %w", timeout, last) + } + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(connectRetryBackoff): + } + } +} diff --git a/test/e2e/pkg/e2eutils/namespaces/namespaces.go b/test/e2e/pkg/e2eutils/namespaces/namespaces.go new file mode 100644 index 00000000..4ace28e7 --- /dev/null +++ b/test/e2e/pkg/e2eutils/namespaces/namespaces.go @@ -0,0 +1,127 @@ +// Package namespaces produces deterministic per-spec Kubernetes +// namespace names for DocumentDB e2e tests. The canonical entry point +// is [NamespaceForSpec], which a spec calls from inside a BeforeEach to +// obtain a name unique to the current spec, parallel process, and run. +// +// The returned names are DNS-1123-compliant (lowercase, ≤63 chars) and +// stable: calling NamespaceForSpec twice from within the same spec +// produces the same name, which is what retry / recovery logic needs. +package namespaces + +import ( + "crypto/sha256" + "encoding/hex" + "fmt" + "os" + "strings" + + "github.com/onsi/ginkgo/v2" +) + +// maxNameLen bounds the returned namespace name; Kubernetes rejects +// names longer than 63 characters for DNS-1123 labels. +const maxNameLen = 63 + +// runIDFunc is a package-level indirection so unit tests can inject a +// deterministic run id without plumbing the root e2e package (which +// would introduce an import cycle). +var runIDFunc = defaultRunID + +// SetRunIDFunc overrides the run-id accessor. The root suite wires it +// during SetupSuite so NamespaceForSpec returns names that match the +// fixtures/teardown label selectors. Tests call this to inject a +// deterministic id. +func SetRunIDFunc(f func() string) { + if f != nil { + runIDFunc = f + } +} + +func defaultRunID() string { + if v := os.Getenv("E2E_RUN_ID"); v != "" { + return sanitizeSegment(v) + } + return "unset" +} + +// NamespaceForSpec returns a deterministic namespace name for the +// currently-running Ginkgo spec. The name embeds the sanitized area +// label, the run id, the parallel process number, and an 8-character +// SHA-256 prefix derived from the spec's FullText. Collisions across +// specs are avoided by the hash; determinism within a spec is provided +// by the hash being a pure function of the FullText. +// +// If area is empty, "spec" is used. Callers should pass the area +// label constant (e.g., e2e.LifecycleLabel) to make failures easier to +// triage from kubectl output. +func NamespaceForSpec(area string) string { + return buildName(area, ginkgo.CurrentSpecReport().FullText(), procID()) +} + +// procID returns the ginkgo parallel process id, defaulting to "1" +// when unset. Duplicated here (instead of shared with fixtures) to +// avoid a dependency cycle with the fixtures package. +func procID() string { + if v := os.Getenv("GINKGO_PARALLEL_PROCESS"); v != "" { + return v + } + return "1" +} + +// buildName is the pure core of NamespaceForSpec, factored out to make +// it trivially unit-testable without a Ginkgo runtime. +func buildName(area, specText, proc string) string { + areaPart := sanitizeSegment(area) + if areaPart == "" { + areaPart = "spec" + } + sum := sha256.Sum256([]byte(specText)) + hash := hex.EncodeToString(sum[:])[:8] + runID := sanitizeSegment(runIDFunc()) + if runID == "" { + runID = "unset" + } + name := fmt.Sprintf("e2e-%s-%s-p%s-%s", areaPart, runID, proc, hash) + if len(name) <= maxNameLen { + return name + } + // Truncate areaPart first, then runID, preserving the trailing + // hash (which is what guarantees uniqueness). + suffix := fmt.Sprintf("-p%s-%s", proc, hash) + budget := maxNameLen - len("e2e-") - len(suffix) - 1 // -1 for the dash between area and runID + if budget < 2 { + // Degenerate input; fall back to hash-only. + return ("e2e-" + hash + suffix)[:maxNameLen] + } + areaBudget := budget / 2 + runBudget := budget - areaBudget + if len(areaPart) > areaBudget { + areaPart = areaPart[:areaBudget] + } + if len(runID) > runBudget { + runID = runID[:runBudget] + } + return fmt.Sprintf("e2e-%s-%s%s", strings.Trim(areaPart, "-"), strings.Trim(runID, "-"), suffix) +} + +// sanitizeSegment converts arbitrary input into DNS-1123-safe runs of +// [a-z0-9-], collapsing and trimming separators. +func sanitizeSegment(in string) string { + in = strings.ToLower(in) + var b strings.Builder + b.Grow(len(in)) + lastDash := false + for _, r := range in { + switch { + case r >= 'a' && r <= 'z', r >= '0' && r <= '9': + b.WriteRune(r) + lastDash = false + default: + if !lastDash { + b.WriteByte('-') + lastDash = true + } + } + } + return strings.Trim(b.String(), "-") +} diff --git a/test/e2e/pkg/e2eutils/namespaces/namespaces_test.go b/test/e2e/pkg/e2eutils/namespaces/namespaces_test.go new file mode 100644 index 00000000..1323a63f --- /dev/null +++ b/test/e2e/pkg/e2eutils/namespaces/namespaces_test.go @@ -0,0 +1,76 @@ +package namespaces + +import ( + "regexp" + "strings" + "testing" +) + +// dns1123Label matches the Kubernetes DNS-1123 label regex. +var dns1123Label = regexp.MustCompile(`^[a-z0-9]([-a-z0-9]*[a-z0-9])?$`) + +func TestBuildNameDeterministic(t *testing.T) { + SetRunIDFunc(func() string { return "run1" }) + a := buildName("lifecycle", "lifecycle creates a cluster", "1") + b := buildName("lifecycle", "lifecycle creates a cluster", "1") + if a != b { + t.Fatalf("non-deterministic: %q vs %q", a, b) + } + if !strings.HasPrefix(a, "e2e-lifecycle-run1-p1-") { + t.Fatalf("unexpected prefix: %q", a) + } +} + +func TestBuildNameUniquePerSpec(t *testing.T) { + SetRunIDFunc(func() string { return "run1" }) + a := buildName("scale", "scale up to 3", "1") + b := buildName("scale", "scale up to 4", "1") + if a == b { + t.Fatalf("distinct specs produced same name: %q", a) + } +} + +func TestBuildNameUniquePerProc(t *testing.T) { + SetRunIDFunc(func() string { return "run1" }) + a := buildName("data", "spec x", "1") + b := buildName("data", "spec x", "2") + if a == b { + t.Fatalf("distinct procs produced same name: %q", a) + } +} + +func TestBuildNameLengthAndDNS(t *testing.T) { + SetRunIDFunc(func() string { return strings.Repeat("x", 80) }) + longArea := strings.Repeat("area", 20) + name := buildName(longArea, "some-spec-text", "1") + if len(name) > maxNameLen { + t.Fatalf("name too long (%d): %q", len(name), name) + } + if !dns1123Label.MatchString(name) { + t.Fatalf("name not DNS-1123: %q", name) + } +} + +func TestBuildNameEmptyArea(t *testing.T) { + SetRunIDFunc(func() string { return "r" }) + name := buildName("", "spec", "1") + if !strings.HasPrefix(name, "e2e-spec-") { + t.Fatalf("empty area did not default to 'spec': %q", name) + } +} + +func TestSanitizeSegment(t *testing.T) { + cases := map[string]string{ + "Hello World": "hello-world", + "lifecycle": "lifecycle", + "a/b c": "a-b-c", + "---leading": "leading", + "": "", + "UPPER-123": "upper-123", + } + for in, want := range cases { + if got := sanitizeSegment(in); got != want { + t.Errorf("sanitizeSegment(%q) = %q, want %q", in, got, want) + } + } +} diff --git a/test/e2e/pkg/e2eutils/operatorhealth/.keep b/test/e2e/pkg/e2eutils/operatorhealth/.keep new file mode 100644 index 00000000..e69de29b diff --git a/test/e2e/pkg/e2eutils/operatorhealth/gate.go b/test/e2e/pkg/e2eutils/operatorhealth/gate.go new file mode 100644 index 00000000..0257f8ca --- /dev/null +++ b/test/e2e/pkg/e2eutils/operatorhealth/gate.go @@ -0,0 +1,213 @@ +// Package operatorhealth exposes a "churn gate" for the DocumentDB E2E +// suite: a lightweight equivalent of CNPG's tests/utils/operator +// PodRestarted / PodRenamed semantics, plus a sentinel that lets +// non-disruptive specs skip themselves after a prior spec has bounced +// the operator. +// +// Typical use from a suite-level BeforeEach/AfterEach: +// +// var gate *operatorhealth.Gate +// +// BeforeSuite(func() { +// var err error +// gate, err = operatorhealth.NewGate(ctx, env.Client, operatorhealth.DefaultNamespace) +// Expect(err).NotTo(HaveOccurred()) +// }) +// +// BeforeEach(operatorhealth.BeforeEachHook(gate)) +// AfterEach(operatorhealth.AfterEachHook(gate)) +// +// Disruptive specs that intentionally bounce the operator should mark +// the sentinel themselves via MarkChurned() so the AfterEach hook can +// keep its idempotent semantics. +package operatorhealth + +import ( + "context" + "errors" + "fmt" + "sync/atomic" + + . "github.com/onsi/ginkgo/v2" //nolint:revive // Ginkgo DSL is intentional. + + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// DefaultNamespace is where the Helm chart installs the DocumentDB +// operator. +const DefaultNamespace = "documentdb-operator" + +// PodLabelSelector is the label the operator Deployment stamps on its +// Pod spec (verified from a live kind cluster: `app=documentdb-operator`). +// If the chart changes the selector, update this constant. +const ( + PodLabelKey = "app" + PodLabelValue = "documentdb-operator" +) + +// operatorChurned is a process-wide sentinel that records whether the +// operator pod has been observed to restart/rename. Once set, it stays +// set for the remainder of the process (the gate is advisory, not a +// correctness gate). +var operatorChurned atomic.Bool + +// Gate snapshots the identity and restart count of the operator pod so +// later Check calls can decide whether the operator churned underneath +// us. +type Gate struct { + c client.Client + ns string + initialUID types.UID + initialRestarts int32 + initialPodName string +} + +// NewGate discovers the current operator pod in ns and captures its +// identity. If no pod is found the caller can decide whether that's a +// fatal condition (typical for non-disruptive suites) or tolerable. +func NewGate(ctx context.Context, c client.Client, ns string) (*Gate, error) { + if c == nil { + return nil, errors.New("NewGate: client must not be nil") + } + if ns == "" { + ns = DefaultNamespace + } + pod, err := findOperatorPod(ctx, c, ns) + if err != nil { + return nil, err + } + return &Gate{ + c: c, + ns: ns, + initialUID: pod.UID, + initialPodName: pod.Name, + initialRestarts: totalRestarts(pod), + }, nil +} + +// Check re-reads the operator pod and reports whether it is still the +// same instance with the same restart count. A drift in UID, name, or +// restart count returns healthy=false with a short reason suitable for +// logging. +func (g *Gate) Check(ctx context.Context) (healthy bool, reason string, err error) { + if g == nil { + return false, "gate is nil", errors.New("Check: gate is nil") + } + pod, err := findOperatorPod(ctx, g.c, g.ns) + if err != nil { + return false, err.Error(), err + } + switch { + case pod.UID != g.initialUID: + return false, fmt.Sprintf("operator pod UID changed: %s -> %s", g.initialUID, pod.UID), nil + case pod.Name != g.initialPodName: + return false, fmt.Sprintf("operator pod renamed: %s -> %s", g.initialPodName, pod.Name), nil + case totalRestarts(pod) != g.initialRestarts: + return false, fmt.Sprintf("operator pod restart count changed: %d -> %d", + g.initialRestarts, totalRestarts(pod)), nil + } + return true, "", nil +} + +// Verify is a convenience wrapper over [Gate.Check] returning nil when +// the operator pod matches the snapshot captured by [NewGate] and an +// error (wrapping the observed reason) otherwise. It also flips the +// process-wide churn sentinel so subsequent calls to [SkipIfChurned] +// observe the drift. +// +// Typical use from an area's BeforeEach: +// +// BeforeEach(func() { Expect(gate.Verify(ctx)).To(Succeed()) }) +func (g *Gate) Verify(ctx context.Context) error { + if g == nil { + return errors.New("Verify: gate is nil") + } + healthy, reason, err := g.Check(ctx) + if err != nil { + MarkChurned() + return fmt.Errorf("operator health check failed: %w", err) + } + if !healthy { + MarkChurned() + return fmt.Errorf("operator churn detected: %s", reason) + } + return nil +} + +// MarkChurned sets the process-wide sentinel, causing SkipIfChurned to +// skip subsequent non-disruptive specs. Disruptive specs that know they +// bounced the operator should call this in their AfterEach. +func MarkChurned() { operatorChurned.Store(true) } + +// HasChurned reports the current sentinel state. +func HasChurned() bool { return operatorChurned.Load() } + +// SkipIfChurned calls Ginkgo's Skip if a prior spec (or an explicit +// MarkChurned call) has observed operator churn. Intended for use from +// BeforeEach of non-disruptive area suites. +func SkipIfChurned() { + if HasChurned() { + Skip("operator churned in a previous spec; skipping non-disruptive spec") + } +} + +// BeforeEachHook returns a Ginkgo BeforeEach body that calls +// SkipIfChurned. If gate is nil the hook still honors the sentinel so +// disruptive specs can flip it without a live Gate. +func BeforeEachHook(gate *Gate) func() { + _ = gate // reserved: future versions may refresh gate snapshot here + return func() { SkipIfChurned() } +} + +// AfterEachHook returns a Ginkgo AfterEach body that re-checks the +// operator pod and flips the sentinel if churn is detected. A nil gate +// disables the check. +func AfterEachHook(gate *Gate) func() { + return func() { + if gate == nil { + return + } + healthy, reason, err := gate.Check(context.Background()) + if err != nil || !healthy { + if reason == "" && err != nil { + reason = err.Error() + } + GinkgoWriter.Printf("operatorhealth: marking churned: %s\n", reason) + MarkChurned() + } + } +} + +// findOperatorPod looks up the first operator pod matching +// PodLabelKey=PodLabelValue in ns. Returns a NotFound error if none +// exist. +func findOperatorPod(ctx context.Context, c client.Client, ns string) (*corev1.Pod, error) { + var pods corev1.PodList + if err := c.List(ctx, &pods, + client.InNamespace(ns), + client.MatchingLabels{PodLabelKey: PodLabelValue}, + ); err != nil { + return nil, fmt.Errorf("listing operator pods in %q: %w", ns, err) + } + if len(pods.Items) == 0 { + return nil, apierrors.NewNotFound(corev1.Resource("pods"), + fmt.Sprintf("%s=%s in %s", PodLabelKey, PodLabelValue, ns)) + } + return &pods.Items[0], nil +} + +// totalRestarts sums RestartCount across all container statuses on pod. +// Matches CNPG's PodRestarted semantics. +func totalRestarts(pod *corev1.Pod) int32 { + if pod == nil { + return 0 + } + var total int32 + for _, cs := range pod.Status.ContainerStatuses { + total += cs.RestartCount + } + return total +} diff --git a/test/e2e/pkg/e2eutils/operatorhealth/gate_test.go b/test/e2e/pkg/e2eutils/operatorhealth/gate_test.go new file mode 100644 index 00000000..36321405 --- /dev/null +++ b/test/e2e/pkg/e2eutils/operatorhealth/gate_test.go @@ -0,0 +1,159 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package operatorhealth + +import ( + "context" + "testing" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + fakeclient "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +func newScheme(t *testing.T) *runtime.Scheme { + t.Helper() + s := runtime.NewScheme() + if err := clientgoscheme.AddToScheme(s); err != nil { + t.Fatalf("scheme: %v", err) + } + return s +} + +func newPod(uid, name string, restarts int32) *corev1.Pod { + return &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: DefaultNamespace, + Labels: map[string]string{PodLabelKey: PodLabelValue}, + UID: types.UID(uid), + }, + Status: corev1.PodStatus{ + ContainerStatuses: []corev1.ContainerStatus{ + {Name: "manager", RestartCount: restarts}, + }, + }, + } +} + +func TestNewGateCapturesInitialState(t *testing.T) { + // Reset sentinel between tests. + operatorChurned.Store(false) + + s := newScheme(t) + pod := newPod("uid-1", "documentdb-operator-abc", 0) + c := fakeclient.NewClientBuilder().WithScheme(s).WithObjects(pod).Build() + + g, err := NewGate(context.Background(), c, DefaultNamespace) + if err != nil { + t.Fatalf("NewGate: %v", err) + } + if g.initialUID != "uid-1" || g.initialRestarts != 0 || g.initialPodName != pod.Name { + t.Errorf("unexpected captured state: %+v", g) + } +} + +func TestCheckHealthyWhenUnchanged(t *testing.T) { + operatorChurned.Store(false) + + s := newScheme(t) + pod := newPod("uid-1", "p1", 0) + c := fakeclient.NewClientBuilder().WithScheme(s).WithObjects(pod).Build() + + g, err := NewGate(context.Background(), c, DefaultNamespace) + if err != nil { + t.Fatal(err) + } + healthy, reason, err := g.Check(context.Background()) + if err != nil { + t.Fatalf("Check: %v", err) + } + if !healthy { + t.Errorf("expected healthy, got reason=%q", reason) + } +} + +func TestCheckDetectsRestart(t *testing.T) { + operatorChurned.Store(false) + + s := newScheme(t) + pod := newPod("uid-1", "p1", 0) + c := fakeclient.NewClientBuilder().WithScheme(s).WithObjects(pod).Build() + + g, err := NewGate(context.Background(), c, DefaultNamespace) + if err != nil { + t.Fatal(err) + } + // Bump restart count. + pod.Status.ContainerStatuses[0].RestartCount = 2 + if err := c.Status().Update(context.Background(), pod); err != nil { + t.Fatalf("update pod: %v", err) + } + + healthy, reason, err := g.Check(context.Background()) + if err != nil { + t.Fatalf("Check: %v", err) + } + if healthy { + t.Error("expected unhealthy after restart count bump") + } + if reason == "" { + t.Error("expected non-empty reason") + } +} + +func TestCheckDetectsPodReplacement(t *testing.T) { + operatorChurned.Store(false) + + s := newScheme(t) + pod := newPod("uid-1", "p1", 0) + c := fakeclient.NewClientBuilder().WithScheme(s).WithObjects(pod).Build() + + g, err := NewGate(context.Background(), c, DefaultNamespace) + if err != nil { + t.Fatal(err) + } + + // Replace the pod with a new UID/name. + if err := c.Delete(context.Background(), pod); err != nil { + t.Fatalf("delete pod: %v", err) + } + replacement := newPod("uid-2", "p2", 0) + if err := c.Create(context.Background(), replacement); err != nil { + t.Fatalf("create replacement: %v", err) + } + + healthy, _, err := g.Check(context.Background()) + if err != nil { + t.Fatalf("Check: %v", err) + } + if healthy { + t.Error("expected unhealthy after pod replacement") + } +} + +func TestSentinelMarkAndHas(t *testing.T) { + operatorChurned.Store(false) + if HasChurned() { + t.Fatal("expected sentinel clear") + } + MarkChurned() + if !HasChurned() { + t.Fatal("expected sentinel set") + } + // Reset for other tests. + operatorChurned.Store(false) +} + +func TestNewGateNoPods(t *testing.T) { + operatorChurned.Store(false) + s := newScheme(t) + c := fakeclient.NewClientBuilder().WithScheme(s).Build() + if _, err := NewGate(context.Background(), c, DefaultNamespace); err == nil { + t.Fatal("expected error when no pods match") + } +} diff --git a/test/e2e/pkg/e2eutils/portforward/.keep b/test/e2e/pkg/e2eutils/portforward/.keep new file mode 100644 index 00000000..e69de29b diff --git a/test/e2e/pkg/e2eutils/portforward/portforward.go b/test/e2e/pkg/e2eutils/portforward/portforward.go new file mode 100644 index 00000000..f0323dfa --- /dev/null +++ b/test/e2e/pkg/e2eutils/portforward/portforward.go @@ -0,0 +1,151 @@ +// Package portforward is a thin wrapper around CNPG's +// tests/utils/forwardconnection helper, specialised for the DocumentDB +// gateway service. +// +// The DocumentDB operator creates a Service named +// "documentdb-service-" in the same namespace as the CR, with +// a port named "gateway" targeting the gateway sidecar (default port +// 10260). This package opens a local port-forward to that service and +// returns a stop func the caller defers. +// +// Fallback note +// +// forwardconnection.NewDialerFromService is generic over service name +// and does NOT hardcode Postgres, despite the package's origin in the +// CNPG codebase. We therefore use the CNPG helper directly rather than +// reaching for client-go's portforward.PortForwarder. If a future CNPG +// release tightens the helper to Postgres-only semantics, this file is +// the single place to swap in a client-go implementation. +package portforward + +import ( + "context" + "errors" + "fmt" + "io" + + "github.com/cloudnative-pg/cloudnative-pg/tests/utils/environment" + "github.com/cloudnative-pg/cloudnative-pg/tests/utils/forwardconnection" + "github.com/onsi/ginkgo/v2" + + previewv1 "github.com/documentdb/documentdb-operator/api/preview" +) + +// GatewayPort is the default DocumentDB gateway TCP port inside the +// cluster. Mirrored from operator/src/internal/utils/constants.go so +// the E2E module does not depend on the operator's internal packages. +const GatewayPort = 10260 + +// ServiceNamePrefix mirrors DOCUMENTDB_SERVICE_PREFIX from the operator. +// The fully-qualified service name is ServiceNamePrefix + dd.Name, +// truncated to 63 characters to honour the Kubernetes DNS limit. +const ServiceNamePrefix = "documentdb-service-" + +// GatewayServiceName returns the Service name the operator creates for +// the given DocumentDB CR. +func GatewayServiceName(dd *previewv1.DocumentDB) string { + if dd == nil { + return "" + } + name := ServiceNamePrefix + dd.Name + if len(name) > 63 { + name = name[:63] + } + return name +} + +// OpenWithErr establishes a port-forward from localPort on the caller's +// host to the DocumentDB gateway service backing dd. It returns a stop +// func that halts the forward and returns the final error reported by +// the forwarder goroutine (nil on clean shutdown). Callers MUST invoke +// stop exactly once; double-invocation is safe but only the first call +// returns the real error. +// +// If localPort is 0, a free port is picked by the kernel. +// +// Prefer OpenWithErr over Open for new call sites: exposing the +// forwarder error lets specs surface gateway-level disconnects instead +// of silently dropping them. +func OpenWithErr( + ctx context.Context, + env *environment.TestingEnvironment, + dd *previewv1.DocumentDB, + localPort int, +) (stop func() error, err error) { + if env == nil { + return nil, fmt.Errorf("OpenWithErr: env must not be nil") + } + if dd == nil { + return nil, fmt.Errorf("OpenWithErr: dd must not be nil") + } + svcName := GatewayServiceName(dd) + if svcName == "" { + return nil, fmt.Errorf("OpenWithErr: could not derive gateway service name from %+v", dd) + } + + dialer, _, err := forwardconnection.NewDialerFromService( + ctx, + env.Interface, + env.RestClientConfig, + dd.Namespace, + svcName, + ) + if err != nil { + return nil, fmt.Errorf("building dialer for %s/%s: %w", dd.Namespace, svcName, err) + } + + portMaps := []string{fmt.Sprintf("%d:%d", localPort, GatewayPort)} + fc, err := forwardconnection.NewForwardConnection(dialer, portMaps, io.Discard, io.Discard) + if err != nil { + return nil, fmt.Errorf("creating forward connection: %w", err) + } + + fwdCtx, cancel := context.WithCancel(ctx) + errCh := make(chan error, 1) + go func() { errCh <- fc.StartAndWait(fwdCtx) }() + + var stopped bool + stop = func() error { + if stopped { + return nil + } + stopped = true + cancel() + // Drain the goroutine so callers see deterministic teardown. + // context.Canceled is the expected shutdown signal and is + // swallowed; everything else is surfaced. + e := <-errCh + if e != nil && !errors.Is(e, context.Canceled) { + return e + } + return nil + } + return stop, nil +} + +// Open is the backwards-compatible wrapper around OpenWithErr that +// returns a stop func() (no error). Any non-nil forwarder error +// observed at teardown is logged to GinkgoWriter so test failures are +// still traceable. +// +// New callers should prefer OpenWithErr; Open remains for pre-existing +// callers that cannot easily propagate the error (e.g., helpers that +// plug into DeferCleanup with a no-return func). +func Open( + ctx context.Context, + env *environment.TestingEnvironment, + dd *previewv1.DocumentDB, + localPort int, +) (stop func(), err error) { + stopE, err := OpenWithErr(ctx, env, dd, localPort) + if err != nil { + return nil, err + } + return func() { + if ferr := stopE(); ferr != nil { + fmt.Fprintf(ginkgo.GinkgoWriter, + "portforward: forwarder for %s/%s exited with error: %v\n", + dd.Namespace, GatewayServiceName(dd), ferr) + } + }, nil +} diff --git a/test/e2e/pkg/e2eutils/portforward/portforward_test.go b/test/e2e/pkg/e2eutils/portforward/portforward_test.go new file mode 100644 index 00000000..8eaa78e7 --- /dev/null +++ b/test/e2e/pkg/e2eutils/portforward/portforward_test.go @@ -0,0 +1,79 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package portforward + +import ( + "strings" + "testing" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + previewv1 "github.com/documentdb/documentdb-operator/api/preview" +) + +func TestGatewayServiceName(t *testing.T) { + cases := []struct { + name string + dd *previewv1.DocumentDB + want string + }{ + {"nil", nil, ""}, + { + "short", + &previewv1.DocumentDB{ObjectMeta: metav1.ObjectMeta{Name: "my-dd"}}, + "documentdb-service-my-dd", + }, + { + "truncated", + &previewv1.DocumentDB{ObjectMeta: metav1.ObjectMeta{Name: strings.Repeat("x", 80)}}, + // 19 (prefix) + 44 xs = 63 + "documentdb-service-" + strings.Repeat("x", 44), + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := GatewayServiceName(tc.dd) + if got != tc.want { + t.Errorf("got %q want %q", got, tc.want) + } + if len(got) > 63 { + t.Errorf("name exceeds 63 chars: %d", len(got)) + } + }) + } +} + +func TestGatewayPort(t *testing.T) { + if GatewayPort != 10260 { + t.Errorf("GatewayPort drift: got %d want 10260 (see operator/src/internal/utils/constants.go)", GatewayPort) + } +} + +func TestOpenWithErr_NilEnv(t *testing.T) { + t.Parallel() + stop, err := OpenWithErr(nil, nil, &previewv1.DocumentDB{ObjectMeta: metav1.ObjectMeta{Name: "x"}}, 0) //nolint:staticcheck // testing nil-ctx/env guard + if err == nil { + t.Fatal("expected error for nil env") + } + if stop != nil { + t.Fatal("expected nil stop when error is returned") + } + if !strings.Contains(err.Error(), "env") { + t.Fatalf("error should mention env: %v", err) + } +} + +func TestOpenWithErr_NilDD(t *testing.T) { + t.Parallel() + // Open is a wrapper around OpenWithErr; exercise the backward- + // compat shim's validation path in the same package without + // needing a real *environment.TestingEnvironment. + stop, err := Open(nil, nil, nil, 0) //nolint:staticcheck // testing nil-guard + if err == nil { + t.Fatal("expected error for nil env/dd") + } + if stop != nil { + t.Fatal("expected nil stop when error is returned") + } +} diff --git a/test/e2e/pkg/e2eutils/seed/.keep b/test/e2e/pkg/e2eutils/seed/.keep new file mode 100644 index 00000000..e69de29b diff --git a/test/e2e/pkg/e2eutils/seed/datasets.go b/test/e2e/pkg/e2eutils/seed/datasets.go new file mode 100644 index 00000000..b89f66b9 --- /dev/null +++ b/test/e2e/pkg/e2eutils/seed/datasets.go @@ -0,0 +1,126 @@ +// Package seed provides deterministic, canonical datasets used by the +// DocumentDB E2E suite. All generators are pure functions with no +// external dependencies; they return freshly-allocated slices of +// bson.M so callers may mutate them safely. +package seed + +import ( + "fmt" + "math/rand/v2" + + "go.mongodb.org/mongo-driver/v2/bson" +) + +// SmallDatasetSize is the number of documents returned by SmallDataset. +const SmallDatasetSize = 10 + +// MediumDatasetSize is the number of documents returned by MediumDataset. +const MediumDatasetSize = 1000 + +// SortDatasetSize is the number of documents returned by SortDataset. +const SortDatasetSize = 100 + +// AggDatasetSize is the number of documents returned by AggDataset. +const AggDatasetSize = 50 + +// AggDatasetGroups is the number of distinct category values emitted by +// AggDataset. Callers asserting group cardinality in aggregation tests +// should use this constant. +const AggDatasetGroups = 5 + +// SmallDataset returns exactly SmallDatasetSize documents with +// predictable identity and score fields, suitable for quick insert / +// count round-trips. Shape: {_id: N, name: "doc-N", score: N*10} for +// N in [1, SmallDatasetSize]. +func SmallDataset() []bson.M { + out := make([]bson.M, SmallDatasetSize) + for i := 0; i < SmallDatasetSize; i++ { + n := i + 1 + out[i] = bson.M{ + "_id": n, + "name": fmt.Sprintf("doc-%d", n), + "score": n * 10, + } + } + return out +} + +// MediumDataset returns MediumDatasetSize documents following the same +// shape as SmallDataset, used to validate bulk insert, count, and +// indexing behaviour under non-trivial sizes. +func MediumDataset() []bson.M { + out := make([]bson.M, MediumDatasetSize) + for i := 0; i < MediumDatasetSize; i++ { + n := i + 1 + out[i] = bson.M{ + "_id": n, + "name": fmt.Sprintf("doc-%d", n), + "score": n * 10, + } + } + return out +} + +// sortDatasetSeed is the deterministic seed used by SortDataset so that +// identical Go runs produce identical document order — this is what +// makes sort assertions in tests reproducible. +var sortDatasetSeed = [32]byte{ + 0xd0, 0xc8, 0xd8, 0x53, 's', 'o', 'r', 't', + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +} + +// SortDataset returns SortDatasetSize documents with varied string +// names and numeric scores suitable for validating $sort semantics. +// Output order is intentionally scrambled but deterministic — running +// the function repeatedly yields the same slice so assertions can +// compare to a known ordering. +func SortDataset() []bson.M { + r := rand.New(rand.NewChaCha8(sortDatasetSeed)) + // Generate names from a 12-letter alphabet so sort validations see + // meaningful string comparisons rather than trivial N-indexed names. + const alphabet = "abcdefghijkl" + indices := r.Perm(SortDatasetSize) + out := make([]bson.M, SortDatasetSize) + for i := 0; i < SortDatasetSize; i++ { + n := indices[i] + 1 + // Two-letter name derived from the permutation for variety. + b := []byte{alphabet[n%len(alphabet)], alphabet[(n*7)%len(alphabet)]} + out[i] = bson.M{ + "_id": n, + "name": string(b), + "score": (n * 37) % 1000, + } + } + return out +} + +// aggDatasetSeed is the deterministic seed used by AggDataset. +var aggDatasetSeed = [32]byte{ + 0xa6, 0x67, 0x67, 'a', 'g', 'g', 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +} + +// AggDataset returns AggDatasetSize documents spread across exactly +// AggDatasetGroups distinct `category` values. Every document has a +// unique numeric _id and a per-category `value` field suitable for +// $group / $sum aggregations. +func AggDataset() []bson.M { + r := rand.New(rand.NewChaCha8(aggDatasetSeed)) + categories := []string{"alpha", "beta", "gamma", "delta", "epsilon"} + if len(categories) != AggDatasetGroups { + // Compile-time invariant guarded by the const. + panic("seed: AggDatasetGroups mismatch") + } + out := make([]bson.M, AggDatasetSize) + // Round-robin to guarantee every category appears at least once, + // then perturb value fields with the deterministic RNG. + for i := 0; i < AggDatasetSize; i++ { + cat := categories[i%len(categories)] + out[i] = bson.M{ + "_id": i + 1, + "category": cat, + "value": r.IntN(1000), + } + } + return out +} diff --git a/test/e2e/pkg/e2eutils/seed/datasets_test.go b/test/e2e/pkg/e2eutils/seed/datasets_test.go new file mode 100644 index 00000000..5a9b34c7 --- /dev/null +++ b/test/e2e/pkg/e2eutils/seed/datasets_test.go @@ -0,0 +1,96 @@ +package seed + +import ( + "fmt" + "reflect" + "testing" +) + +func TestSmallDataset_Shape(t *testing.T) { + t.Parallel() + ds := SmallDataset() + if len(ds) != SmallDatasetSize { + t.Fatalf("want %d docs, got %d", SmallDatasetSize, len(ds)) + } + for i, d := range ds { + n := i + 1 + if d["_id"] != n { + t.Fatalf("doc %d: _id=%v, want %d", i, d["_id"], n) + } + if d["name"] != fmt.Sprintf("doc-%d", n) { + t.Fatalf("doc %d: name=%v", i, d["name"]) + } + if d["score"] != n*10 { + t.Fatalf("doc %d: score=%v, want %d", i, d["score"], n*10) + } + } +} + +func TestMediumDataset_Size(t *testing.T) { + t.Parallel() + ds := MediumDataset() + if len(ds) != MediumDatasetSize { + t.Fatalf("want %d docs, got %d", MediumDatasetSize, len(ds)) + } + // Spot-check first and last doc. + if ds[0]["_id"] != 1 { + t.Fatalf("first _id=%v", ds[0]["_id"]) + } + if ds[MediumDatasetSize-1]["_id"] != MediumDatasetSize { + t.Fatalf("last _id=%v", ds[MediumDatasetSize-1]["_id"]) + } +} + +func TestSortDataset_DeterministicOrder(t *testing.T) { + t.Parallel() + a := SortDataset() + b := SortDataset() + if len(a) != SortDatasetSize { + t.Fatalf("size=%d want %d", len(a), SortDatasetSize) + } + if !reflect.DeepEqual(a, b) { + t.Fatalf("SortDataset is not deterministic across calls") + } + // All _id values must be unique and in range [1, SortDatasetSize]. + seen := make(map[any]bool, SortDatasetSize) + for _, d := range a { + id := d["_id"] + if seen[id] { + t.Fatalf("duplicate _id=%v", id) + } + seen[id] = true + } +} + +func TestAggDataset_GroupCardinality(t *testing.T) { + t.Parallel() + ds := AggDataset() + if len(ds) != AggDatasetSize { + t.Fatalf("size=%d want %d", len(ds), AggDatasetSize) + } + cats := map[string]int{} + for _, d := range ds { + c, ok := d["category"].(string) + if !ok { + t.Fatalf("non-string category: %T", d["category"]) + } + cats[c]++ + } + if len(cats) != AggDatasetGroups { + t.Fatalf("want %d distinct categories, got %d (%v)", AggDatasetGroups, len(cats), cats) + } + // Every category should have at least one document (round-robin + // distribution guarantees this when size ≥ groups). + for c, n := range cats { + if n == 0 { + t.Fatalf("category %s empty", c) + } + } +} + +func TestAggDataset_Deterministic(t *testing.T) { + t.Parallel() + if !reflect.DeepEqual(AggDataset(), AggDataset()) { + t.Fatalf("AggDataset not deterministic") + } +} diff --git a/test/e2e/pkg/e2eutils/testenv/.keep b/test/e2e/pkg/e2eutils/testenv/.keep new file mode 100644 index 00000000..e69de29b diff --git a/test/e2e/pkg/e2eutils/testenv/env.go b/test/e2e/pkg/e2eutils/testenv/env.go new file mode 100644 index 00000000..fee8555d --- /dev/null +++ b/test/e2e/pkg/e2eutils/testenv/env.go @@ -0,0 +1,96 @@ +// Package testenv constructs a CloudNative-PG *environment.TestingEnvironment +// pre-configured for the DocumentDB E2E suite. +// +// Upstream CNPG's NewTestingEnvironment only registers the +// volumesnapshot and prometheus-operator scheme groups. DocumentDB specs +// additionally need: +// +// - CloudNative-PG's apiv1 (Cluster, Backup, ScheduledBackup, Pooler, …) +// - k8s.io client-go scheme (core/v1, apps/v1, …) +// - the DocumentDB operator preview API (documentdb.io/preview) +// +// NewDocumentDBTestingEnvironment registers those groups onto the shared +// scheme and rebuilds env.Client so it can Get/List/Watch DocumentDB CRs. +// +// Phase-0 note: CNPG's NewTestingEnvironment parses POSTGRES_IMG with +// Masterminds/semver. If the tag is not semver-parseable (e.g. "latest") +// it returns an error. We default POSTGRES_IMG=busybox:17.2 when the +// variable is unset so the suite can boot without CNPG postgres images. +package testenv + +import ( + "context" + "fmt" + "os" + + cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" + "github.com/cloudnative-pg/cloudnative-pg/tests/utils/environment" + "k8s.io/apimachinery/pkg/runtime" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + "sigs.k8s.io/controller-runtime/pkg/client" + + previewv1 "github.com/documentdb/documentdb-operator/api/preview" +) + +// DefaultOperatorNamespace is the namespace the DocumentDB operator is +// deployed into by the Helm chart and by the suite's CI fixtures. +const DefaultOperatorNamespace = "documentdb-operator" + +// DefaultPostgresImage is the placeholder image used to satisfy CNPG's +// semver parsing when the caller does not care about the Postgres image +// (DocumentDB specs never launch raw CNPG clusters from this env). +const DefaultPostgresImage = "busybox:17.2" + +// postgresImgEnv is the environment variable consulted by the upstream +// CNPG testing environment constructor. +const postgresImgEnv = "POSTGRES_IMG" + +// NewDocumentDBTestingEnvironment returns a CNPG *TestingEnvironment with +// the CloudNative-PG apiv1, client-go and DocumentDB preview schemes +// registered and env.Client rebuilt against that scheme. The supplied +// context is stored on the returned environment for callers that need it. +func NewDocumentDBTestingEnvironment(ctx context.Context) (*environment.TestingEnvironment, error) { + if _, ok := os.LookupEnv(postgresImgEnv); !ok { + if err := os.Setenv(postgresImgEnv, DefaultPostgresImage); err != nil { + return nil, fmt.Errorf("setting %s: %w", postgresImgEnv, err) + } + } + + env, err := environment.NewTestingEnvironment() + if err != nil { + return nil, fmt.Errorf("creating CNPG testing environment: %w", err) + } + + utilruntime.Must(cnpgv1.AddToScheme(env.Scheme)) + utilruntime.Must(clientgoscheme.AddToScheme(env.Scheme)) + utilruntime.Must(previewv1.AddToScheme(env.Scheme)) + + c, err := client.New(env.RestClientConfig, client.Options{Scheme: env.Scheme}) + if err != nil { + return nil, fmt.Errorf("rebuilding controller-runtime client with DocumentDB scheme: %w", err) + } + env.Client = c + if ctx != nil { + env.Ctx = ctx + } + return env, nil +} + +// DefaultDocumentDBScheme returns a fresh scheme with the same group +// registrations applied by NewDocumentDBTestingEnvironment. It is useful +// for unit tests that construct a fake client without spinning up the +// full TestingEnvironment. +func DefaultDocumentDBScheme() (*runtime.Scheme, error) { + s := runtime.NewScheme() + if err := cnpgv1.AddToScheme(s); err != nil { + return nil, fmt.Errorf("adding cnpg apiv1 to scheme: %w", err) + } + if err := clientgoscheme.AddToScheme(s); err != nil { + return nil, fmt.Errorf("adding client-go scheme: %w", err) + } + if err := previewv1.AddToScheme(s); err != nil { + return nil, fmt.Errorf("adding documentdb preview scheme: %w", err) + } + return s, nil +} diff --git a/test/e2e/pkg/e2eutils/testenv/env_test.go b/test/e2e/pkg/e2eutils/testenv/env_test.go new file mode 100644 index 00000000..d3755df5 --- /dev/null +++ b/test/e2e/pkg/e2eutils/testenv/env_test.go @@ -0,0 +1,38 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package testenv + +import ( + "testing" + + cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" + previewv1 "github.com/documentdb/documentdb-operator/api/preview" + corev1 "k8s.io/api/core/v1" +) + +func TestDefaultDocumentDBSchemeRegistersExpectedGroups(t *testing.T) { + s, err := DefaultDocumentDBScheme() + if err != nil { + t.Fatalf("DefaultDocumentDBScheme: %v", err) + } + + if !s.Recognizes(cnpgv1.SchemeGroupVersion.WithKind("Cluster")) { + t.Errorf("expected scheme to recognize cnpg apiv1 Cluster") + } + if !s.Recognizes(previewv1.GroupVersion.WithKind("DocumentDB")) { + t.Errorf("expected scheme to recognize DocumentDB preview group") + } + if !s.Recognizes(corev1.SchemeGroupVersion.WithKind("Pod")) { + t.Errorf("expected scheme to recognize core/v1 Pod") + } +} + +func TestDefaultConstants(t *testing.T) { + if DefaultOperatorNamespace == "" { + t.Fatal("DefaultOperatorNamespace must not be empty") + } + if DefaultPostgresImage == "" { + t.Fatal("DefaultPostgresImage must not be empty") + } +} diff --git a/test/e2e/pkg/e2eutils/timeouts/.keep b/test/e2e/pkg/e2eutils/timeouts/.keep new file mode 100644 index 00000000..e69de29b diff --git a/test/e2e/pkg/e2eutils/timeouts/timeouts.go b/test/e2e/pkg/e2eutils/timeouts/timeouts.go new file mode 100644 index 00000000..420e62ec --- /dev/null +++ b/test/e2e/pkg/e2eutils/timeouts/timeouts.go @@ -0,0 +1,119 @@ +// Package timeouts centralises the Eventually/Consistently durations +// used by the DocumentDB E2E suite. Where an operation corresponds to a +// CNPG event already modelled by +// github.com/cloudnative-pg/cloudnative-pg/tests/utils/timeouts, this +// package reuses the CNPG value (converted to a time.Duration); for +// DocumentDB-specific operations it defines opinionated defaults. +package timeouts + +import ( + "time" + + cnpgtimeouts "github.com/cloudnative-pg/cloudnative-pg/tests/utils/timeouts" +) + +// Op is a DocumentDB-specific operation identifier. New callers should +// prefer the constants below over ad-hoc strings so that the helper can +// surface un-mapped operations via the UnknownOpFallback. +type Op string + +// DocumentDB-specific operations. When adding an entry here, also +// extend the switch in For and PollInterval. +const ( + // DocumentDBReady waits for a fresh DocumentDB cluster to reach the + // running state after creation. + DocumentDBReady Op = "documentDBReady" + // DocumentDBUpgrade waits for an in-place image upgrade rollout. + DocumentDBUpgrade Op = "documentDBUpgrade" + // InstanceScale waits for a replica count change to converge. + InstanceScale Op = "instanceScale" + // PVCResize waits for a StorageConfiguration.PvcSize change to be + // applied across all PVCs. + PVCResize Op = "pvcResize" + // BackupComplete waits for a Backup CR to reach Completed. + BackupComplete Op = "backupComplete" + // RestoreComplete waits for a recovery bootstrap to complete. + RestoreComplete Op = "restoreComplete" + // MongoConnect bounds a single mongo client connect/ping attempt. + MongoConnect Op = "mongoConnect" + // ServiceReady waits for a LoadBalancer / ClusterIP to acquire an + // address and begin routing. + ServiceReady Op = "serviceReady" +) + +// UnknownOpFallback is returned by For when an Op is not in the +// DocumentDB map and has no corresponding CNPG mapping. +const UnknownOpFallback = 2 * time.Minute + +// documentDBDefaults captures the DocumentDB-specific defaults used by +// For. Keep this map in sync with the constants above. +var documentDBDefaults = map[Op]time.Duration{ + DocumentDBReady: 5 * time.Minute, + DocumentDBUpgrade: 10 * time.Minute, + InstanceScale: 5 * time.Minute, + PVCResize: 5 * time.Minute, + BackupComplete: 10 * time.Minute, + RestoreComplete: 15 * time.Minute, + MongoConnect: 30 * time.Second, + ServiceReady: 2 * time.Minute, +} + +// cnpgAlias maps selected DocumentDB ops to their CNPG counterparts. +// When the CNPG timeouts map (optionally overridden via the +// TEST_TIMEOUTS environment variable) contains the aliased event, its +// value — converted from seconds to time.Duration — wins over the +// DocumentDB default. This lets operators share a single tuning knob +// for cluster-readiness style waits. +var cnpgAlias = map[Op]cnpgtimeouts.Timeout{ + DocumentDBReady: cnpgtimeouts.ClusterIsReady, + InstanceScale: cnpgtimeouts.ClusterIsReady, + BackupComplete: cnpgtimeouts.BackupIsReady, +} + +// For returns the Eventually timeout for op. Lookup order: +// 1. CNPG alias (honours TEST_TIMEOUTS env var if set). +// 2. DocumentDB default. +// 3. UnknownOpFallback for unknown ops. +func For(op Op) time.Duration { + if alias, ok := cnpgAlias[op]; ok { + if m, err := cnpgtimeouts.Timeouts(); err == nil { + if s, ok := m[alias]; ok { + return time.Duration(s) * time.Second + } + } + } + if d, ok := documentDBDefaults[op]; ok { + return d + } + return UnknownOpFallback +} + +// PollInterval returns the Eventually poll interval for op. Fast ops +// use a short 2-second poll; slow, cluster-level operations use a +// 10-second poll to reduce API-server churn during long waits. +func PollInterval(op Op) time.Duration { + switch op { + case MongoConnect, ServiceReady: + return 2 * time.Second + case DocumentDBReady, DocumentDBUpgrade, InstanceScale, + PVCResize, BackupComplete, RestoreComplete: + return 10 * time.Second + default: + return 5 * time.Second + } +} + +// AllOps returns the set of DocumentDB operations known to this +// package, in insertion order. Useful for table tests. +func AllOps() []Op { + return []Op{ + DocumentDBReady, + DocumentDBUpgrade, + InstanceScale, + PVCResize, + BackupComplete, + RestoreComplete, + MongoConnect, + ServiceReady, + } +} diff --git a/test/e2e/pkg/e2eutils/timeouts/timeouts_test.go b/test/e2e/pkg/e2eutils/timeouts/timeouts_test.go new file mode 100644 index 00000000..e70d2c7f --- /dev/null +++ b/test/e2e/pkg/e2eutils/timeouts/timeouts_test.go @@ -0,0 +1,50 @@ +package timeouts + +import ( + "testing" + "time" +) + +func TestFor_CoversAllOps(t *testing.T) { + t.Parallel() + for _, op := range AllOps() { + d := For(op) + if d <= 0 { + t.Fatalf("For(%s) returned non-positive %s", op, d) + } + // Guard: every known Op must have an explicit entry in + // documentDBDefaults (even if its value coincidentally equals + // UnknownOpFallback) so adding a new Op forces a choice. + if _, ok := documentDBDefaults[op]; !ok { + t.Fatalf("Op %s missing from documentDBDefaults — add an explicit default", op) + } + } +} + +func TestFor_UnknownOpFallback(t *testing.T) { + t.Parallel() + got := For(Op("this-op-does-not-exist")) + if got != UnknownOpFallback { + t.Fatalf("unknown op: got %s, want %s", got, UnknownOpFallback) + } +} + +func TestFor_DocumentDBUpgrade_IsDocumentDBDefault(t *testing.T) { + t.Parallel() + // Not CNPG-aliased → must come straight from documentDBDefaults. + if got, want := For(DocumentDBUpgrade), 10*time.Minute; got != want { + t.Fatalf("DocumentDBUpgrade: got %s, want %s", got, want) + } +} + +func TestPollInterval_NonZero(t *testing.T) { + t.Parallel() + for _, op := range AllOps() { + if got := PollInterval(op); got <= 0 { + t.Fatalf("PollInterval(%s) non-positive: %s", op, got) + } + } + if got := PollInterval(Op("unknown")); got <= 0 { + t.Fatalf("PollInterval(unknown) non-positive: %s", got) + } +} diff --git a/test/e2e/pkg/e2eutils/tlscerts/tlscerts.go b/test/e2e/pkg/e2eutils/tlscerts/tlscerts.go new file mode 100644 index 00000000..9c5cefdf --- /dev/null +++ b/test/e2e/pkg/e2eutils/tlscerts/tlscerts.go @@ -0,0 +1,117 @@ +// Package tlscerts generates throwaway TLS material (CA + server +// certificate) suitable for DocumentDB E2E "Provided" mode tests. +// +// The generated artefacts are written into an in-memory struct whose +// PEM fields can be plugged directly into a Kubernetes +// kubernetes.io/tls Secret (tls.crt / tls.key) plus an optional +// ca.crt entry for clients that want to verify the chain. +// +// None of this material is secure: keys are 2048-bit RSA, validity +// windows are short, and no revocation story exists. It is only +// intended for tests. +package tlscerts + +import ( + "crypto/rand" + "crypto/rsa" + "crypto/x509" + "crypto/x509/pkix" + "encoding/pem" + "fmt" + "math/big" + "net" + "time" +) + +// Bundle is the PEM-encoded material produced by Generate. The fields +// align with the canonical key names used by Kubernetes TLS secrets: +// tls.crt (ServerCertPEM), tls.key (ServerKeyPEM) and the optional +// ca.crt (CACertPEM). +type Bundle struct { + CACertPEM []byte + CAKeyPEM []byte + ServerCertPEM []byte + ServerKeyPEM []byte +} + +// GenerateOptions controls Generate. DNSNames and IPAddresses populate +// the server certificate's SANs; at least one entry is required so +// TLS clients performing hostname verification have something to +// match against. Validity defaults to 24 hours when zero. +type GenerateOptions struct { + // CommonName is the server certificate's CN. Defaults to + // "documentdb-e2e" when empty. + CommonName string + // DNSNames populates the SAN DNSNames field. + DNSNames []string + // IPAddresses populates the SAN IPAddresses field. + IPAddresses []net.IP + // Validity defaults to 24 hours when zero. + Validity time.Duration +} + +// Generate builds a self-signed CA and a server certificate signed by +// that CA. Both are returned as PEM-encoded bytes in Bundle. +func Generate(opts GenerateOptions) (*Bundle, error) { + if len(opts.DNSNames) == 0 && len(opts.IPAddresses) == 0 { + return nil, fmt.Errorf("tlscerts: at least one DNSName or IPAddress SAN is required") + } + validity := opts.Validity + if validity == 0 { + validity = 24 * time.Hour + } + cn := opts.CommonName + if cn == "" { + cn = "documentdb-e2e" + } + + caKey, err := rsa.GenerateKey(rand.Reader, 2048) + if err != nil { + return nil, fmt.Errorf("tlscerts: generate CA key: %w", err) + } + caTmpl := &x509.Certificate{ + SerialNumber: big.NewInt(1), + Subject: pkix.Name{CommonName: "documentdb-e2e-ca"}, + NotBefore: time.Now().Add(-5 * time.Minute), + NotAfter: time.Now().Add(validity), + KeyUsage: x509.KeyUsageCertSign | x509.KeyUsageDigitalSignature, + BasicConstraintsValid: true, + IsCA: true, + } + caDER, err := x509.CreateCertificate(rand.Reader, caTmpl, caTmpl, &caKey.PublicKey, caKey) + if err != nil { + return nil, fmt.Errorf("tlscerts: sign CA: %w", err) + } + + srvKey, err := rsa.GenerateKey(rand.Reader, 2048) + if err != nil { + return nil, fmt.Errorf("tlscerts: generate server key: %w", err) + } + srvTmpl := &x509.Certificate{ + SerialNumber: big.NewInt(2), + Subject: pkix.Name{CommonName: cn}, + NotBefore: time.Now().Add(-5 * time.Minute), + NotAfter: time.Now().Add(validity), + KeyUsage: x509.KeyUsageDigitalSignature | x509.KeyUsageKeyEncipherment, + ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth, x509.ExtKeyUsageClientAuth}, + DNSNames: append([]string(nil), opts.DNSNames...), + IPAddresses: append([]net.IP(nil), opts.IPAddresses...), + } + srvDER, err := x509.CreateCertificate(rand.Reader, srvTmpl, caTmpl, &srvKey.PublicKey, caKey) + if err != nil { + return nil, fmt.Errorf("tlscerts: sign server cert: %w", err) + } + + return &Bundle{ + CACertPEM: pemEncode("CERTIFICATE", caDER), + CAKeyPEM: pemEncode("RSA PRIVATE KEY", x509.MarshalPKCS1PrivateKey(caKey)), + ServerCertPEM: pemEncode("CERTIFICATE", srvDER), + ServerKeyPEM: pemEncode("RSA PRIVATE KEY", x509.MarshalPKCS1PrivateKey(srvKey)), + }, nil +} + +// pemEncode is a tiny wrapper so callers don't need to construct +// pem.Block literals at each call site. +func pemEncode(blockType string, der []byte) []byte { + return pem.EncodeToMemory(&pem.Block{Type: blockType, Bytes: der}) +} diff --git a/test/e2e/pkg/e2eutils/tlscerts/tlscerts_test.go b/test/e2e/pkg/e2eutils/tlscerts/tlscerts_test.go new file mode 100644 index 00000000..7e2c1eee --- /dev/null +++ b/test/e2e/pkg/e2eutils/tlscerts/tlscerts_test.go @@ -0,0 +1,97 @@ +package tlscerts + +import ( + "crypto/x509" + "encoding/pem" + "net" + "strings" + "testing" + "time" +) + +func TestGenerateRejectsEmptySANs(t *testing.T) { + if _, err := Generate(GenerateOptions{}); err == nil { + t.Fatalf("expected error for empty SANs") + } +} + +func TestGenerateProducesVerifiableChain(t *testing.T) { + b, err := Generate(GenerateOptions{ + CommonName: "gw.test", + DNSNames: []string{"gw.test", "localhost"}, + IPAddresses: []net.IP{net.ParseIP("127.0.0.1")}, + Validity: 1 * time.Hour, + }) + if err != nil { + t.Fatalf("Generate: %v", err) + } + for name, pemBytes := range map[string][]byte{ + "ca.crt": b.CACertPEM, + "ca.key": b.CAKeyPEM, + "tls.crt": b.ServerCertPEM, + "tls.key": b.ServerKeyPEM, + } { + if len(pemBytes) == 0 { + t.Fatalf("%s empty", name) + } + if blk, _ := pem.Decode(pemBytes); blk == nil { + t.Fatalf("%s not valid PEM", name) + } + } + + caBlock, _ := pem.Decode(b.CACertPEM) + if caBlock == nil { + t.Fatal("decode CA") + } + caCert, err := x509.ParseCertificate(caBlock.Bytes) + if err != nil { + t.Fatalf("parse CA: %v", err) + } + if !caCert.IsCA { + t.Fatal("CA.IsCA = false") + } + srvBlock, _ := pem.Decode(b.ServerCertPEM) + srvCert, err := x509.ParseCertificate(srvBlock.Bytes) + if err != nil { + t.Fatalf("parse server: %v", err) + } + pool := x509.NewCertPool() + pool.AddCert(caCert) + if _, err := srvCert.Verify(x509.VerifyOptions{ + Roots: pool, + DNSName: "gw.test", + CurrentTime: time.Now(), + }); err != nil { + t.Fatalf("verify: %v", err) + } + if !containsString(srvCert.DNSNames, "localhost") { + t.Fatalf("missing localhost SAN: %v", srvCert.DNSNames) + } +} + +func TestGenerateDefaultValidity(t *testing.T) { + b, err := Generate(GenerateOptions{DNSNames: []string{"x"}}) + if err != nil { + t.Fatal(err) + } + blk, _ := pem.Decode(b.ServerCertPEM) + cert, err := x509.ParseCertificate(blk.Bytes) + if err != nil { + t.Fatal(err) + } + if cert.NotAfter.Sub(cert.NotBefore) < time.Hour { + t.Fatalf("validity too short: %s", cert.NotAfter.Sub(cert.NotBefore)) + } + if !strings.EqualFold(cert.Subject.CommonName, "documentdb-e2e") { + t.Fatalf("unexpected CN: %s", cert.Subject.CommonName) + } +} + +func containsString(xs []string, want string) bool { + for _, x := range xs { + if x == want { + return true + } + } + return false +} diff --git a/test/e2e/runid.go b/test/e2e/runid.go new file mode 100644 index 00000000..90eb3f98 --- /dev/null +++ b/test/e2e/runid.go @@ -0,0 +1,69 @@ +package e2e + +import ( + "crypto/rand" + "encoding/hex" + "fmt" + "os" + "sync" + "time" +) + +// runIDEnv is the environment variable consulted to pin the run +// identifier. When set and non-empty, its value is used verbatim; when +// unset, a per-process id is generated from the current time and a +// small random suffix on first access. +const runIDEnv = "E2E_RUN_ID" + +var ( + runIDOnce sync.Once + runIDVal string +) + +// RunID returns the process-scoped run identifier used to namespace +// shared fixtures and to label every cluster-scoped object the e2e +// suite creates. Stable for the life of the process. +// +// The identifier is resolved in this order: +// +// 1. $E2E_RUN_ID when set and non-empty (useful for reusing / cleaning +// up fixtures across invocations); +// 2. otherwise a short, low-collision id derived from the current +// Unix nanosecond timestamp plus four random bytes. +// +// Multiple test binaries that run independently each get their own +// id, which is what the fixture teardown logic relies on to avoid +// deleting another binary's still-live resources. +func RunID() string { + runIDOnce.Do(func() { + if v := os.Getenv(runIDEnv); v != "" { + runIDVal = v + return + } + runIDVal = generateRunID() + }) + return runIDVal +} + +// generateRunID produces a short, lowercase alphanumeric identifier. +// Exposed for tests via the resetRunIDForTest helper below. +func generateRunID() string { + var b [4]byte + if _, err := rand.Read(b[:]); err != nil { + // crypto/rand should never fail in practice; fall back to a + // time-derived suffix so the suite keeps running. + return fmt.Sprintf("t%x", time.Now().UnixNano())[:10] + } + // UnixNano in base-36 keeps the prefix short while remaining + // monotonic; 8 hex chars of randomness reduce cross-process + // collision risk when two binaries start in the same nanosecond. + ts := time.Now().UnixNano() + return fmt.Sprintf("%x%s", ts&0xFFFFFFFF, hex.EncodeToString(b[:])) +} + +// resetRunIDForTest re-initialises the once-guard so tests can exercise +// the generation path deterministically. Not part of the public API. +func resetRunIDForTest() { + runIDOnce = sync.Once{} + runIDVal = "" +} diff --git a/test/e2e/runid_test.go b/test/e2e/runid_test.go new file mode 100644 index 00000000..ac07c239 --- /dev/null +++ b/test/e2e/runid_test.go @@ -0,0 +1,54 @@ +package e2e + +import ( + "os" + "regexp" + "testing" +) + +func TestRunIDFromEnv(t *testing.T) { + t.Setenv(runIDEnv, "pinned-run-42") + resetRunIDForTest() + t.Cleanup(resetRunIDForTest) + + if got := RunID(); got != "pinned-run-42" { + t.Fatalf("RunID with env override = %q, want %q", got, "pinned-run-42") + } + // Second call must return the same value (cached). + if got := RunID(); got != "pinned-run-42" { + t.Fatalf("second RunID = %q, want %q", got, "pinned-run-42") + } +} + +func TestRunIDGeneratedWhenEnvMissing(t *testing.T) { + _ = os.Unsetenv(runIDEnv) + resetRunIDForTest() + t.Cleanup(resetRunIDForTest) + + a := RunID() + if a == "" { + t.Fatal("generated RunID must not be empty") + } + // Stable across calls. + if b := RunID(); a != b { + t.Fatalf("RunID not stable: %q != %q", a, b) + } + // Short and lowercase hex/alnum. + if len(a) > 24 { + t.Fatalf("RunID unexpectedly long: %q", a) + } + if !regexp.MustCompile(`^[a-z0-9]+$`).MatchString(a) { + t.Fatalf("RunID not lowercase alnum: %q", a) + } +} + +func TestGenerateRunIDUnique(t *testing.T) { + seen := map[string]struct{}{} + for range 16 { + v := generateRunID() + if _, dup := seen[v]; dup { + t.Fatalf("generateRunID produced duplicate %q", v) + } + seen[v] = struct{}{} + } +} diff --git a/test/e2e/suite.go b/test/e2e/suite.go new file mode 100644 index 00000000..361df5e2 --- /dev/null +++ b/test/e2e/suite.go @@ -0,0 +1,212 @@ +package e2e + +import ( + "context" + "errors" + "fmt" + "os" + "path/filepath" + "sync" + "time" + + "github.com/cloudnative-pg/cloudnative-pg/tests/utils/environment" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/fixtures" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/namespaces" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/operatorhealth" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/testenv" +) + +// suiteEnv holds the process-wide CNPG TestingEnvironment used by every +// spec in the current test binary. It is populated by SetupSuite and +// cleared by TeardownSuite. Each Ginkgo test binary (root + per-area) +// gets its own copy; state is not shared across binaries. +var ( + suiteEnv *environment.TestingEnvironment + suiteEnvOnce sync.Once + suiteEnvErr error + + // suiteGate is the operator-pod churn sentinel captured at + // SetupSuite time. It is reused by [CheckOperatorUnchanged] + // from per-area BeforeEach hooks so a single operator restart + // during the run aborts every subsequent spec instead of + // producing confusing downstream failures. + suiteGate *operatorhealth.Gate +) + +// SuiteEnv returns the TestingEnvironment initialized by SetupSuite. +// Specs must invoke this only after SynchronizedBeforeSuite has run on +// the local node; a nil return means setup was skipped or failed. +func SuiteEnv() *environment.TestingEnvironment { return suiteEnv } + +// SetupSuite builds the shared TestingEnvironment (idempotent) and runs +// the operator-health gate, failing fast if the operator pod is not +// Ready within timeout. Intended to be called from +// SynchronizedBeforeSuite in the suite_test.go of every test binary. +func SetupSuite(ctx context.Context, operatorReadyTimeout time.Duration) error { + suiteEnvOnce.Do(func() { + // Propagate the resolved run id into every package that + // stamps it onto fixtures. Both fixtures and namespaces + // must see the same value before any namespace is + // derived so per-spec names collide deterministically + // across binaries when E2E_RUN_ID is exported. + fixtures.SetRunID(RunID()) + namespaces.SetRunIDFunc(RunID) + + env, err := testenv.NewDocumentDBTestingEnvironment(ctx) + if err != nil { + suiteEnvErr = fmt.Errorf("building TestingEnvironment: %w", err) + return + } + suiteEnv = env + if err := gateOperatorReady(ctx, env.Client, testenv.DefaultOperatorNamespace, operatorReadyTimeout); err != nil { + suiteEnvErr = fmt.Errorf("operator health gate: %w", err) + } + }) + return suiteEnvErr +} + +// TeardownSuite releases the shared fixtures created during the suite +// run. Safe to call even when SetupSuite failed or was never invoked. +// Errors from individual fixture teardowns are joined so the caller +// sees every problem rather than just the first. +func TeardownSuite(ctx context.Context) error { + if suiteEnv == nil || suiteEnv.Client == nil { + return nil + } + var errs []error + if err := fixtures.TeardownSharedRO(ctx, suiteEnv.Client); err != nil && !isNotFound(err) { + errs = append(errs, fmt.Errorf("teardown shared-ro: %w", err)) + } + if err := fixtures.TeardownSharedScale(ctx, suiteEnv.Client); err != nil && !isNotFound(err) { + errs = append(errs, fmt.Errorf("teardown shared-scale: %w", err)) + } + return errors.Join(errs...) +} + +// CheckOperatorUnchanged verifies that the operator pod captured at +// SetupSuite time is still running with the same UID and restart count. +// Returns nil when suiteGate has not been initialized yet (e.g., the +// caller is in the root binary before SynchronizedBeforeSuite), or when +// the operator pod matches the snapshot. Any drift returns a wrapped +// error and flips the package-level churn sentinel so subsequent +// SkipIfChurned calls observe it. +// +// Every per-area suite (except tests/upgrade/, where operator restarts +// are expected) should invoke this from a BeforeEach: +// +// var _ = BeforeEach(func() { +// Expect(e2e.CheckOperatorUnchanged()).To(Succeed()) +// }) +func CheckOperatorUnchanged() error { + if suiteGate == nil { + return nil + } + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + return suiteGate.Verify(ctx) +} + +// gateOperatorReady waits up to timeout for the DocumentDB operator pod +// to reach Ready=True and stores the captured [operatorhealth.Gate] in +// the package-level suiteGate so [CheckOperatorUnchanged] can reuse it. +func gateOperatorReady(ctx context.Context, c client.Client, ns string, timeout time.Duration) error { + deadline := time.Now().Add(timeout) + const poll = 2 * time.Second + var lastReason string + for { + pod, err := findOperatorPodForGate(ctx, c, ns) + switch { + case err == nil && podReady(pod): + g, gateErr := operatorhealth.NewGate(ctx, c, ns) + if gateErr != nil { + return fmt.Errorf("snapshot operator gate: %w", gateErr) + } + suiteGate = g + return nil + case err != nil: + lastReason = err.Error() + default: + lastReason = fmt.Sprintf("pod %s/%s not ready yet", ns, pod.Name) + } + if time.Now().After(deadline) { + return fmt.Errorf("operator pod in %q not ready after %s: %s", ns, timeout, lastReason) + } + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(poll): + } + } +} + +// findOperatorPodForGate locates the operator pod via the same label +// selector operatorhealth uses. Kept private to avoid cycling the +// internals of operatorhealth — if that package grows an exported +// finder, switch to it. +func findOperatorPodForGate(ctx context.Context, c client.Client, ns string) (*corev1.Pod, error) { + var pods corev1.PodList + if err := c.List(ctx, &pods, + client.InNamespace(ns), + client.MatchingLabels{operatorhealth.PodLabelKey: operatorhealth.PodLabelValue}, + ); err != nil { + return nil, fmt.Errorf("listing operator pods: %w", err) + } + if len(pods.Items) == 0 { + return nil, fmt.Errorf("no operator pods with %s=%s in %s", + operatorhealth.PodLabelKey, operatorhealth.PodLabelValue, ns) + } + return &pods.Items[0], nil +} + +func podReady(pod *corev1.Pod) bool { + if pod == nil || pod.Status.Phase != corev1.PodRunning { + return false + } + for _, cond := range pod.Status.Conditions { + if cond.Type == corev1.PodReady && cond.Status == corev1.ConditionTrue { + return true + } + } + return false +} + +// isNotFound detects "resource gone" errors returned by fixture +// teardown so the suite does not fail when fixtures were never created +// (e.g., a smoke-only run). +func isNotFound(err error) bool { + return err != nil && apierrors.IsNotFound(err) +} + +// ArtifactsDir returns the directory E2E artifacts (logs, junit reports) +// should be written to. The default layout isolates each ginkgo binary +// run and each parallel process: +// +// ./_artifacts//proc-/ +// +// The directory is created lazily on first call. Override the entire +// path via E2E_ARTIFACTS_DIR — the override is taken verbatim (no RunID +// or proc suffix is appended). +func ArtifactsDir() string { + if v := os.Getenv("E2E_ARTIFACTS_DIR"); v != "" { + _ = os.MkdirAll(v, 0o755) + return v + } + dir := filepath.Join(".", "_artifacts", RunID(), "proc-"+procIDString()) + _ = os.MkdirAll(dir, 0o755) + return dir +} + +// procIDString returns the Ginkgo parallel process id or "1" when +// unset. Kept separate from the fixtures procID helper to avoid a +// circular dependency and because callers in suite.go only need a +// string, not the int form. +func procIDString() string { + if v := os.Getenv("GINKGO_PARALLEL_PROCESS"); v != "" { + return v + } + return "1" +} diff --git a/test/e2e/suite_test.go b/test/e2e/suite_test.go new file mode 100644 index 00000000..a1274238 --- /dev/null +++ b/test/e2e/suite_test.go @@ -0,0 +1,127 @@ +// suite_test.go is the Ginkgo root for the DocumentDB Kubernetes +// Operator E2E suite. It owns shared bootstrap: building the CNPG +// TestingEnvironment, running the operator-health gate, and tearing +// down session-scoped fixtures. Each per-area package under tests/ +// compiles to its own test binary and performs the same bootstrap via +// the exported SetupSuite / TeardownSuite helpers in suite.go. +// +// Cross-binary run-id contract: +// +// Per-spec fixtures (labeled namespaces, credential secrets) are +// stamped with e2e.RunID(), which falls back to a random value when +// E2E_RUN_ID is unset. Every Ginkgo test binary computes its own +// RunID at start-up, so running two binaries back-to-back without +// E2E_RUN_ID means they cannot adopt each other's fixtures — the +// second binary will reject the mismatched run-id label. To run +// multiple binaries in a single logical E2E run (CI matrix, manual +// bisection, etc.) export E2E_RUN_ID= for all of +// them. When the variable is empty, SynchronizedBeforeSuite logs a +// warning to GinkgoWriter so it surfaces in test output. +// +// Environment variables consulted by the suite: +// +// TEST_DEPTH // 0–4 — depth tier, see levels.go. Default: 1 (High). +// TEST_TIMEOUTS // optional timeout profile, consumed by pkg/e2eutils/timeouts. +// KUBECONFIG // standard; required to reach the test cluster. +// POSTGRES_IMG // placeholder for CNPG's semver parsing (default busybox:17.2). +// E2E_ARTIFACTS_DIR // override for artifact output (default ./_artifacts). +// E2E_RUN_ID // optional shared id for cross-binary fixture reuse. +// E2E_TAIL_LOGS // "1" enables the best-effort operator log tailer. +// +// Standard Ginkgo v2 flags (--ginkgo.label-filter, --ginkgo.focus, -p, +// etc.) are auto-registered. +package e2e + +import ( + "context" + "fmt" + "os" + "testing" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +// operatorReadyTimeout bounds how long SynchronizedBeforeSuite waits +// for the operator pod to report Ready=True before aborting the suite. +const operatorReadyTimeout = 2 * time.Minute + +// TestE2E is the Ginkgo root for this package. Per-area test binaries +// live under tests// and have their own TestX entry points. +func TestE2E(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "DocumentDB E2E Suite") +} + +var _ = SynchronizedBeforeSuite( + // Node 1 (primary process): build the environment, gate the + // operator, optionally start the log tailer, then publish an + // empty marker — each node rebuilds its own local env so there is + // nothing to serialize. + func(ctx SpecContext) []byte { + if os.Getenv("E2E_RUN_ID") == "" { + fmt.Fprintf(GinkgoWriter, + "e2e: WARNING — E2E_RUN_ID is unset; per-spec fixtures cannot be reused "+ + "across test binaries in this run. Export E2E_RUN_ID= "+ + "before invoking multiple ginkgo binaries to share labeled fixtures.\n") + } + if err := SetupSuite(ctx, operatorReadyTimeout); err != nil { + Fail(fmt.Sprintf("suite bootstrap failed on node 1: %v", err)) + } + fmt.Fprintf(GinkgoWriter, + "e2e: depth=%d (TEST_DEPTH=%q) artifacts=%s\n", + CurrentLevel(), os.Getenv("TEST_DEPTH"), ArtifactsDir()) + fmt.Fprintf(GinkgoWriter, + "e2e: active area labels = %v\n", allAreaLabels()) + if os.Getenv("E2E_TAIL_LOGS") == "1" { + startOperatorLogTailer(context.Background()) + } + return []byte{} + }, + // All nodes: build a local env so Ginkgo parallel processes each + // have their own *environment.TestingEnvironment to work with. + func(_ SpecContext, _ []byte) { + if err := SetupSuite(context.Background(), operatorReadyTimeout); err != nil { + Fail(fmt.Sprintf("suite bootstrap failed on worker node: %v", err)) + } + }, +) + +var _ = SynchronizedAfterSuite( + // All nodes: teardown shared fixtures. Errors are logged but not + // escalated — cleanup is best-effort. + func(ctx SpecContext) { + if err := TeardownSuite(ctx); err != nil { + fmt.Fprintf(GinkgoWriter, "e2e: teardown reported errors: %v\n", err) + } + }, + // Node 1: no-op. Nothing to aggregate. + func(_ SpecContext) {}, +) + +// allAreaLabels returns the static list of area labels declared in +// labels.go. Kept in sync manually; adding a new area should append +// here and in labels.go together. +func allAreaLabels() []string { + return []string{ + LifecycleLabel, ScaleLabel, DataLabel, PerformanceLabel, + BackupLabel, RecoveryLabel, TLSLabel, FeatureLabel, + ExposureLabel, StatusLabel, UpgradeLabel, + } +} + +// startOperatorLogTailer is currently a no-op. The earlier placeholder +// that wrote a stub operator.log into $ARTIFACTS has been removed so +// failure triage does not find an empty file and assume the tailer ran. +// When E2E_TAIL_LOGS=1 is set the suite logs a reminder that no log +// streaming is active yet. +// +// TODO(p2): replace with a proper client-go PodLogs stream that +// appends until the context is cancelled. See +// docs/designs/e2e-test-suite.md §"Diagnostics". +func startOperatorLogTailer(_ context.Context) { + fmt.Fprintf(GinkgoWriter, + "e2e: E2E_TAIL_LOGS=1 requested but the operator log tailer is not implemented yet; "+ + "no operator.log will be produced for this run.\n") +} diff --git a/test/e2e/tests/data/aggregation_test.go b/test/e2e/tests/data/aggregation_test.go new file mode 100644 index 00000000..58e149e6 --- /dev/null +++ b/test/e2e/tests/data/aggregation_test.go @@ -0,0 +1,110 @@ +package data + +import ( + "context" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "go.mongodb.org/mongo-driver/v2/bson" + "go.mongodb.org/mongo-driver/v2/mongo" + + e2e "github.com/documentdb/documentdb-operator/test/e2e" + emongo "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/mongo" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/seed" +) + +var _ = Describe("DocumentDB data — aggregation", + Ordered, + Label(e2e.DataLabel), + e2e.MediumLevelLabel, + func() { + var ( + ctx context.Context + handle *emongo.Handle + dbName string + coll *mongo.Collection + ) + + BeforeAll(func() { + ctx = context.Background() + handle, dbName = connectSharedRO(ctx) + coll = handle.Database(dbName).Collection("agg") + docs := seed.AggDataset() + any := make([]any, len(docs)) + for i := range docs { + any[i] = docs[i] + } + _, err := coll.InsertMany(ctx, any) + Expect(err).NotTo(HaveOccurred()) + }) + AfterAll(func() { + if handle != nil { + _ = handle.Client().Database(dbName).Drop(ctx) + _ = handle.Close(ctx) + } + }) + + It("groups documents by category and counts per-group cardinality", func() { + pipe := mongo.Pipeline{ + {{Key: "$group", Value: bson.D{ + {Key: "_id", Value: "$category"}, + {Key: "count", Value: bson.D{{Key: "$sum", Value: 1}}}, + }}}, + } + cur, err := coll.Aggregate(ctx, pipe) + Expect(err).NotTo(HaveOccurred()) + defer cur.Close(ctx) + var results []bson.M + Expect(cur.All(ctx, &results)).To(Succeed()) + Expect(results).To(HaveLen(seed.AggDatasetGroups)) + var total int64 + for _, r := range results { + switch v := r["count"].(type) { + case int32: + total += int64(v) + case int64: + total += v + default: + Fail("unexpected count type") + } + } + Expect(total).To(Equal(int64(seed.AggDatasetSize))) + }) + + It("filters with $match before grouping", func() { + pipe := mongo.Pipeline{ + {{Key: "$match", Value: bson.D{{Key: "category", Value: "alpha"}}}}, + {{Key: "$group", Value: bson.D{ + {Key: "_id", Value: "$category"}, + {Key: "n", Value: bson.D{{Key: "$sum", Value: 1}}}, + }}}, + } + cur, err := coll.Aggregate(ctx, pipe) + Expect(err).NotTo(HaveOccurred()) + defer cur.Close(ctx) + var results []bson.M + Expect(cur.All(ctx, &results)).To(Succeed()) + Expect(results).To(HaveLen(1)) + Expect(results[0]["_id"]).To(Equal("alpha")) + }) + + It("projects selected fields with $project", func() { + pipe := mongo.Pipeline{ + {{Key: "$match", Value: bson.D{{Key: "_id", Value: 1}}}}, + {{Key: "$project", Value: bson.D{ + {Key: "_id", Value: 0}, + {Key: "category", Value: 1}, + }}}, + } + cur, err := coll.Aggregate(ctx, pipe) + Expect(err).NotTo(HaveOccurred()) + defer cur.Close(ctx) + var results []bson.M + Expect(cur.All(ctx, &results)).To(Succeed()) + Expect(results).To(HaveLen(1)) + // _id was explicitly excluded; only category remains. + Expect(results[0]).NotTo(HaveKey("_id")) + Expect(results[0]).To(HaveKey("category")) + }) + }, +) diff --git a/test/e2e/tests/data/crud_test.go b/test/e2e/tests/data/crud_test.go new file mode 100644 index 00000000..3d290fff --- /dev/null +++ b/test/e2e/tests/data/crud_test.go @@ -0,0 +1,85 @@ +package data + +import ( + "context" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "go.mongodb.org/mongo-driver/v2/bson" + + e2e "github.com/documentdb/documentdb-operator/test/e2e" + emongo "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/mongo" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/seed" +) + +var _ = Describe("DocumentDB data — CRUD", + Ordered, + Label(e2e.DataLabel, e2e.BasicLabel), + e2e.MediumLevelLabel, + func() { + var ( + ctx context.Context + handle *emongo.Handle + dbName string + ) + + BeforeAll(func() { + ctx = context.Background() + handle, dbName = connectSharedRO(ctx) + }) + AfterAll(func() { + if handle != nil { + _ = handle.Client().Database(dbName).Drop(ctx) + _ = handle.Close(ctx) + } + }) + + It("inserts a document and finds it", func() { + coll := handle.Database(dbName).Collection("crud_insert_find") + _, err := coll.InsertOne(ctx, bson.M{"_id": 1, "name": "alice", "score": 10}) + Expect(err).NotTo(HaveOccurred()) + var got bson.M + Expect(coll.FindOne(ctx, bson.M{"_id": 1}).Decode(&got)).To(Succeed()) + Expect(got["name"]).To(Equal("alice")) + }) + + It("bulk inserts the small dataset and counts documents", func() { + coll := handle.Database(dbName).Collection("crud_bulk") + docs := seed.SmallDataset() + any := make([]any, len(docs)) + for i := range docs { + any[i] = docs[i] + } + _, err := coll.InsertMany(ctx, any) + Expect(err).NotTo(HaveOccurred()) + n, err := coll.CountDocuments(ctx, bson.M{}) + Expect(err).NotTo(HaveOccurred()) + Expect(n).To(Equal(int64(seed.SmallDatasetSize))) + }) + + It("updates a document in place", func() { + coll := handle.Database(dbName).Collection("crud_update") + _, err := coll.InsertOne(ctx, bson.M{"_id": 1, "status": "new"}) + Expect(err).NotTo(HaveOccurred()) + res, err := coll.UpdateOne(ctx, bson.M{"_id": 1}, bson.M{"$set": bson.M{"status": "done"}}) + Expect(err).NotTo(HaveOccurred()) + Expect(res.ModifiedCount).To(Equal(int64(1))) + var got bson.M + Expect(coll.FindOne(ctx, bson.M{"_id": 1}).Decode(&got)).To(Succeed()) + Expect(got["status"]).To(Equal("done")) + }) + + It("deletes a document and observes the decrement", func() { + coll := handle.Database(dbName).Collection("crud_delete") + docs := []any{bson.M{"_id": 1}, bson.M{"_id": 2}, bson.M{"_id": 3}} + _, err := coll.InsertMany(ctx, docs) + Expect(err).NotTo(HaveOccurred()) + res, err := coll.DeleteOne(ctx, bson.M{"_id": 2}) + Expect(err).NotTo(HaveOccurred()) + Expect(res.DeletedCount).To(Equal(int64(1))) + n, err := coll.CountDocuments(ctx, bson.M{}) + Expect(err).NotTo(HaveOccurred()) + Expect(n).To(Equal(int64(2))) + }) + }, +) diff --git a/test/e2e/tests/data/data_suite_test.go b/test/e2e/tests/data/data_suite_test.go new file mode 100644 index 00000000..f83eec43 --- /dev/null +++ b/test/e2e/tests/data/data_suite_test.go @@ -0,0 +1,56 @@ +// Package data hosts the DocumentDB E2E data area. See +// docs/designs/e2e-test-suite.md for the spec catalog. This file is +// the Ginkgo root for the area binary and shares bootstrap with the +// other area binaries via the exported helpers in package e2e. +package data + +import ( + "context" + "fmt" + "testing" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/documentdb/documentdb-operator/test/e2e" +) + +const operatorReadyTimeout = 2 * time.Minute + +func TestData(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "DocumentDB E2E - Data", Label(e2e.DataLabel)) +} + +var _ = SynchronizedBeforeSuite( + func(ctx SpecContext) []byte { + if err := e2e.SetupSuite(ctx, operatorReadyTimeout); err != nil { + Fail(fmt.Sprintf("data bootstrap: %v", err)) + } + return []byte{} + }, + func(_ SpecContext, _ []byte) { + if err := e2e.SetupSuite(context.Background(), operatorReadyTimeout); err != nil { + Fail(fmt.Sprintf("data worker bootstrap: %v", err)) + } + }, +) + +var _ = SynchronizedAfterSuite( + func(ctx SpecContext) { + if err := e2e.TeardownSuite(ctx); err != nil { + fmt.Fprintf(GinkgoWriter, "data teardown: %v\n", err) + } + }, + func(_ SpecContext) {}, +) + +// BeforeEach in this area aborts the spec if the operator pod has +// drifted since SetupSuite (UID/name/restart-count change). Area +// tests/upgrade/ intentionally omits this hook because operator +// restarts are part of its scenario. +var _ = BeforeEach(func() { +Expect(e2e.CheckOperatorUnchanged()).To(Succeed(), +"operator health check failed — a previous spec or reconciler likely restarted the operator") +}) diff --git a/test/e2e/tests/data/delete_ops_test.go b/test/e2e/tests/data/delete_ops_test.go new file mode 100644 index 00000000..19eedf7a --- /dev/null +++ b/test/e2e/tests/data/delete_ops_test.go @@ -0,0 +1,79 @@ +package data + +import ( + "context" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "go.mongodb.org/mongo-driver/v2/bson" + "go.mongodb.org/mongo-driver/v2/mongo" + + e2e "github.com/documentdb/documentdb-operator/test/e2e" + emongo "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/mongo" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/seed" +) + +// This spec writes to its per-spec Mongo database only — the shared +// read-only CR is not mutated, honoring fixture contracts. The "RO" in +// SharedRO means the Kubernetes custom resource is read-only; data-plane +// writes into isolated databases are permitted. +var _ = Describe("DocumentDB data — delete operators", + Ordered, + Label(e2e.DataLabel), + e2e.MediumLevelLabel, + func() { + var ( + ctx context.Context + handle *emongo.Handle + dbName string + coll *mongo.Collection + ) + + BeforeAll(func() { + ctx = context.Background() + handle, dbName = connectSharedRO(ctx) + coll = handle.Database(dbName).Collection("delete_ops") + }) + AfterAll(func() { + if handle != nil { + _ = handle.Client().Database(dbName).Drop(ctx) + _ = handle.Close(ctx) + } + }) + + BeforeEach(func() { + // Reset state between Its so counts are deterministic. + _, err := coll.DeleteMany(ctx, bson.M{}) + Expect(err).NotTo(HaveOccurred()) + seedSmall(ctx, coll) + }) + + It("deleteOne removes exactly one matching document", func() { + res, err := coll.DeleteOne(ctx, bson.M{"score": bson.M{"$gte": 30}}) + Expect(err).NotTo(HaveOccurred()) + Expect(res.DeletedCount).To(Equal(int64(1))) + n, err := coll.CountDocuments(ctx, bson.M{}) + Expect(err).NotTo(HaveOccurred()) + Expect(n).To(Equal(int64(seed.SmallDatasetSize - 1))) + }) + + It("deleteMany removes every matching document", func() { + // SmallDataset scores are 10..100. >= 50 → ids 5..10 → 6 docs. + res, err := coll.DeleteMany(ctx, bson.M{"score": bson.M{"$gte": 50}}) + Expect(err).NotTo(HaveOccurred()) + Expect(res.DeletedCount).To(Equal(int64(6))) + n, err := coll.CountDocuments(ctx, bson.M{}) + Expect(err).NotTo(HaveOccurred()) + Expect(n).To(Equal(int64(seed.SmallDatasetSize - 6))) + }) + + It("deleteMany with empty filter removes all documents", func() { + res, err := coll.DeleteMany(ctx, bson.M{}) + Expect(err).NotTo(HaveOccurred()) + Expect(res.DeletedCount).To(Equal(int64(seed.SmallDatasetSize))) + n, err := coll.CountDocuments(ctx, bson.M{}) + Expect(err).NotTo(HaveOccurred()) + Expect(n).To(Equal(int64(0))) + }) + }, +) diff --git a/test/e2e/tests/data/helpers_test.go b/test/e2e/tests/data/helpers_test.go new file mode 100644 index 00000000..5b38e2c8 --- /dev/null +++ b/test/e2e/tests/data/helpers_test.go @@ -0,0 +1,31 @@ +// Package data hosts DocumentDB E2E data-area specs. This file provides +// a small connectSharedRO helper shared across the spec files in this +// package so each spec does not repeat the fixture-get / +// port-forward / client-connect plumbing. It is a test-only helper +// (package data) and is not exported to other areas. +package data + +import ( + "context" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + e2e "github.com/documentdb/documentdb-operator/test/e2e" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/fixtures" + emongo "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/mongo" +) + +// connectSharedRO returns a Handle against the session-wide SharedRO +// DocumentDB cluster and a DB name unique to the calling spec. The +// returned Handle MUST be closed by the caller (typically from +// AfterAll). dbName is derived from CurrentSpecReport().FullText() so +// Ginkgo parallel processes running the same file against the same +// cluster do not collide on collection state. +func connectSharedRO(ctx context.Context) (*emongo.Handle, string) { + roHandle, err := fixtures.GetOrCreateSharedRO(ctx, e2e.SuiteEnv().Client) + Expect(err).NotTo(HaveOccurred(), "get-or-create shared-ro fixture") + h, err := emongo.NewFromDocumentDB(ctx, e2e.SuiteEnv(), roHandle.Namespace(), roHandle.Name()) + Expect(err).NotTo(HaveOccurred(), "connect to shared-ro gateway") + return h, fixtures.DBNameFor(CurrentSpecReport().FullText()) +} diff --git a/test/e2e/tests/data/pipeline_test.go b/test/e2e/tests/data/pipeline_test.go new file mode 100644 index 00000000..faae56c2 --- /dev/null +++ b/test/e2e/tests/data/pipeline_test.go @@ -0,0 +1,120 @@ +package data + +import ( + "context" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "go.mongodb.org/mongo-driver/v2/bson" + "go.mongodb.org/mongo-driver/v2/mongo" + + e2e "github.com/documentdb/documentdb-operator/test/e2e" + emongo "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/mongo" +) + +// pipeline_test.go exercises more complex aggregation pipelines: +// $lookup (joins), $unwind (array flattening), and $group. Data is +// seeded inline because seed.AggDataset does not model cross-collection +// relationships. +var _ = Describe("DocumentDB data — complex pipelines", + Ordered, + Label(e2e.DataLabel), + e2e.MediumLevelLabel, + func() { + var ( + ctx context.Context + handle *emongo.Handle + dbName string + orders *mongo.Collection + products *mongo.Collection + ) + + BeforeAll(func() { + ctx = context.Background() + handle, dbName = connectSharedRO(ctx) + orders = handle.Database(dbName).Collection("orders") + products = handle.Database(dbName).Collection("products") + + _, err := products.InsertMany(ctx, []any{ + bson.M{"_id": "p1", "name": "pen", "category": "office"}, + bson.M{"_id": "p2", "name": "book", "category": "office"}, + bson.M{"_id": "p3", "name": "lamp", "category": "home"}, + }) + Expect(err).NotTo(HaveOccurred()) + + _, err = orders.InsertMany(ctx, []any{ + bson.M{"_id": 1, "customer": "alice", "items": bson.A{"p1", "p2"}}, + bson.M{"_id": 2, "customer": "bob", "items": bson.A{"p2", "p3"}}, + bson.M{"_id": 3, "customer": "alice", "items": bson.A{"p3"}}, + }) + Expect(err).NotTo(HaveOccurred()) + }) + AfterAll(func() { + if handle != nil { + _ = handle.Client().Database(dbName).Drop(ctx) + _ = handle.Close(ctx) + } + }) + + It("performs $unwind on the items array", func() { + pipe := mongo.Pipeline{ + {{Key: "$unwind", Value: "$items"}}, + } + cur, err := orders.Aggregate(ctx, pipe) + Expect(err).NotTo(HaveOccurred()) + defer cur.Close(ctx) + var out []bson.M + Expect(cur.All(ctx, &out)).To(Succeed()) + // 2 + 2 + 1 = 5 unwound rows from 3 source orders. + Expect(out).To(HaveLen(5)) + }) + + It("joins orders with products via $lookup + $unwind", func() { + pipe := mongo.Pipeline{ + {{Key: "$unwind", Value: "$items"}}, + {{Key: "$lookup", Value: bson.D{ + {Key: "from", Value: "products"}, + {Key: "localField", Value: "items"}, + {Key: "foreignField", Value: "_id"}, + {Key: "as", Value: "product"}, + }}}, + {{Key: "$unwind", Value: "$product"}}, + {{Key: "$match", Value: bson.D{{Key: "customer", Value: "alice"}}}}, + } + cur, err := orders.Aggregate(ctx, pipe) + Expect(err).NotTo(HaveOccurred()) + defer cur.Close(ctx) + var out []bson.M + Expect(cur.All(ctx, &out)).To(Succeed()) + // alice has orders {1,3} with items {p1,p2,p3} → 3 rows. + Expect(out).To(HaveLen(3)) + for _, doc := range out { + Expect(doc["customer"]).To(Equal("alice")) + product, ok := doc["product"].(bson.M) + Expect(ok).To(BeTrue(), "product should be an embedded doc post-lookup") + Expect(product).To(HaveKey("name")) + } + }) + + It("aggregates per-customer item counts with $group", func() { + pipe := mongo.Pipeline{ + {{Key: "$unwind", Value: "$items"}}, + {{Key: "$group", Value: bson.D{ + {Key: "_id", Value: "$customer"}, + {Key: "n", Value: bson.D{{Key: "$sum", Value: 1}}}, + }}}, + } + cur, err := orders.Aggregate(ctx, pipe) + Expect(err).NotTo(HaveOccurred()) + defer cur.Close(ctx) + var out []bson.M + Expect(cur.All(ctx, &out)).To(Succeed()) + counts := map[string]int{} + for _, r := range out { + counts[r["_id"].(string)] = toInt(r["n"]) + } + Expect(counts).To(HaveKeyWithValue("alice", 3)) + Expect(counts).To(HaveKeyWithValue("bob", 2)) + }) + }, +) diff --git a/test/e2e/tests/data/query_test.go b/test/e2e/tests/data/query_test.go new file mode 100644 index 00000000..d3718270 --- /dev/null +++ b/test/e2e/tests/data/query_test.go @@ -0,0 +1,98 @@ +package data + +import ( + "context" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "go.mongodb.org/mongo-driver/v2/bson" + "go.mongodb.org/mongo-driver/v2/mongo" + + e2e "github.com/documentdb/documentdb-operator/test/e2e" + emongo "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/mongo" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/seed" +) + +// seedSmall inserts seed.SmallDataset into coll and returns a convenience +// view over the dataset size. +func seedSmall(ctx context.Context, coll *mongo.Collection) int { + docs := seed.SmallDataset() + any := make([]any, len(docs)) + for i := range docs { + any[i] = docs[i] + } + _, err := coll.InsertMany(ctx, any) + Expect(err).NotTo(HaveOccurred()) + return len(docs) +} + +var _ = Describe("DocumentDB data — query filters", + Ordered, + Label(e2e.DataLabel), + e2e.MediumLevelLabel, + func() { + var ( + ctx context.Context + handle *emongo.Handle + dbName string + coll *mongo.Collection + ) + + BeforeAll(func() { + ctx = context.Background() + handle, dbName = connectSharedRO(ctx) + coll = handle.Database(dbName).Collection("query_filters") + seedSmall(ctx, coll) + }) + AfterAll(func() { + if handle != nil { + _ = handle.Client().Database(dbName).Drop(ctx) + _ = handle.Close(ctx) + } + }) + + It("filters with $eq", func() { + var got bson.M + Expect(coll.FindOne(ctx, bson.M{"score": bson.M{"$eq": 50}}).Decode(&got)).To(Succeed()) + Expect(got["_id"]).To(BeEquivalentTo(5)) + }) + + It("filters with $gt", func() { + n, err := coll.CountDocuments(ctx, bson.M{"score": bson.M{"$gt": 50}}) + Expect(err).NotTo(HaveOccurred()) + // SmallDataset scores are N*10 for N in [1..10] → strictly > 50 means 6..10. + Expect(n).To(Equal(int64(5))) + }) + + It("filters with $in", func() { + n, err := coll.CountDocuments(ctx, bson.M{"_id": bson.M{"$in": []int{1, 3, 5, 99}}}) + Expect(err).NotTo(HaveOccurred()) + Expect(n).To(Equal(int64(3))) + }) + + It("filters with $and", func() { + n, err := coll.CountDocuments(ctx, bson.M{"$and": []bson.M{ + {"score": bson.M{"$gte": 30}}, + {"score": bson.M{"$lte": 70}}, + }}) + Expect(err).NotTo(HaveOccurred()) + Expect(n).To(Equal(int64(5))) + }) + + It("filters with $or", func() { + n, err := coll.CountDocuments(ctx, bson.M{"$or": []bson.M{ + {"_id": 1}, + {"_id": 10}, + }}) + Expect(err).NotTo(HaveOccurred()) + Expect(n).To(Equal(int64(2))) + }) + + It("filters with $regex on name", func() { + // SmallDataset names are "doc-N" so all documents match "^doc-". + n, err := coll.CountDocuments(ctx, bson.M{"name": bson.M{"$regex": "^doc-"}}) + Expect(err).NotTo(HaveOccurred()) + Expect(n).To(Equal(int64(seed.SmallDatasetSize))) + }) + }, +) diff --git a/test/e2e/tests/data/sort_limit_skip_test.go b/test/e2e/tests/data/sort_limit_skip_test.go new file mode 100644 index 00000000..06776c55 --- /dev/null +++ b/test/e2e/tests/data/sort_limit_skip_test.go @@ -0,0 +1,113 @@ +package data + +import ( + "context" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "go.mongodb.org/mongo-driver/v2/bson" + "go.mongodb.org/mongo-driver/v2/mongo" + "go.mongodb.org/mongo-driver/v2/mongo/options" + + e2e "github.com/documentdb/documentdb-operator/test/e2e" + emongo "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/mongo" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/seed" +) + +var _ = Describe("DocumentDB data — sort/limit/skip", + Ordered, + Label(e2e.DataLabel), + e2e.MediumLevelLabel, + func() { + var ( + ctx context.Context + handle *emongo.Handle + dbName string + coll *mongo.Collection + ) + + BeforeAll(func() { + ctx = context.Background() + handle, dbName = connectSharedRO(ctx) + coll = handle.Database(dbName).Collection("sort_cursor") + docs := seed.SortDataset() + any := make([]any, len(docs)) + for i := range docs { + any[i] = docs[i] + } + _, err := coll.InsertMany(ctx, any) + Expect(err).NotTo(HaveOccurred()) + }) + AfterAll(func() { + if handle != nil { + _ = handle.Client().Database(dbName).Drop(ctx) + _ = handle.Close(ctx) + } + }) + + It("sorts ascending by _id", func() { + cur, err := coll.Find(ctx, bson.M{}, + options.Find().SetSort(bson.D{{Key: "_id", Value: 1}}).SetLimit(5)) + Expect(err).NotTo(HaveOccurred()) + defer cur.Close(ctx) + var results []bson.M + Expect(cur.All(ctx, &results)).To(Succeed()) + Expect(results).To(HaveLen(5)) + Expect(results[0]["_id"]).To(BeEquivalentTo(1)) + // Strictly ascending. + for i := 1; i < len(results); i++ { + prev := toInt(results[i-1]["_id"]) + cur := toInt(results[i]["_id"]) + Expect(cur).To(BeNumerically(">", prev)) + } + }) + + It("sorts descending by _id", func() { + cur, err := coll.Find(ctx, bson.M{}, + options.Find().SetSort(bson.D{{Key: "_id", Value: -1}}).SetLimit(3)) + Expect(err).NotTo(HaveOccurred()) + defer cur.Close(ctx) + var results []bson.M + Expect(cur.All(ctx, &results)).To(Succeed()) + Expect(results).To(HaveLen(3)) + Expect(toInt(results[0]["_id"])).To(Equal(seed.SortDatasetSize)) + }) + + It("limits and skips consistently", func() { + // Full page 1 (no skip) of 10 results sorted by _id asc. + page1, err := coll.Find(ctx, bson.M{}, + options.Find().SetSort(bson.D{{Key: "_id", Value: 1}}).SetLimit(10)) + Expect(err).NotTo(HaveOccurred()) + defer page1.Close(ctx) + var page1Docs []bson.M + Expect(page1.All(ctx, &page1Docs)).To(Succeed()) + Expect(page1Docs).To(HaveLen(10)) + + // Page 2 is Skip(5) → first doc of page2 equals 6th of page1. + page2, err := coll.Find(ctx, bson.M{}, + options.Find().SetSort(bson.D{{Key: "_id", Value: 1}}).SetSkip(5).SetLimit(5)) + Expect(err).NotTo(HaveOccurred()) + defer page2.Close(ctx) + var page2Docs []bson.M + Expect(page2.All(ctx, &page2Docs)).To(Succeed()) + Expect(page2Docs).To(HaveLen(5)) + Expect(page2Docs[0]["_id"]).To(Equal(page1Docs[5]["_id"])) + }) + }, +) + +// toInt coerces numeric BSON values (int32/int64/int) to int for test +// comparisons. Panics on unexpected types so failure is obvious. +func toInt(v any) int { + switch n := v.(type) { + case int32: + return int(n) + case int64: + return int(n) + case int: + return n + default: + Fail("unexpected numeric type in _id") + return 0 + } +} diff --git a/test/e2e/tests/data/update_ops_test.go b/test/e2e/tests/data/update_ops_test.go new file mode 100644 index 00000000..42943469 --- /dev/null +++ b/test/e2e/tests/data/update_ops_test.go @@ -0,0 +1,91 @@ +package data + +import ( + "context" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "go.mongodb.org/mongo-driver/v2/bson" + "go.mongodb.org/mongo-driver/v2/mongo" + + e2e "github.com/documentdb/documentdb-operator/test/e2e" + emongo "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/mongo" +) + +var _ = Describe("DocumentDB data — update operators", + Ordered, + Label(e2e.DataLabel), + e2e.MediumLevelLabel, + func() { + var ( + ctx context.Context + handle *emongo.Handle + dbName string + coll *mongo.Collection + ) + + BeforeAll(func() { + ctx = context.Background() + handle, dbName = connectSharedRO(ctx) + coll = handle.Database(dbName).Collection("update_ops") + }) + AfterAll(func() { + if handle != nil { + _ = handle.Client().Database(dbName).Drop(ctx) + _ = handle.Close(ctx) + } + }) + + It("applies $set to add and mutate a field", func() { + id := "set-1" + _, err := coll.InsertOne(ctx, bson.M{"_id": id, "name": "alpha"}) + Expect(err).NotTo(HaveOccurred()) + _, err = coll.UpdateOne(ctx, bson.M{"_id": id}, + bson.M{"$set": bson.M{"name": "alpha-2", "enabled": true}}) + Expect(err).NotTo(HaveOccurred()) + var got bson.M + Expect(coll.FindOne(ctx, bson.M{"_id": id}).Decode(&got)).To(Succeed()) + Expect(got["name"]).To(Equal("alpha-2")) + Expect(got["enabled"]).To(BeTrue()) + }) + + It("applies $inc to a numeric field", func() { + id := "inc-1" + _, err := coll.InsertOne(ctx, bson.M{"_id": id, "count": 10}) + Expect(err).NotTo(HaveOccurred()) + _, err = coll.UpdateOne(ctx, bson.M{"_id": id}, + bson.M{"$inc": bson.M{"count": 5}}) + Expect(err).NotTo(HaveOccurred()) + var got bson.M + Expect(coll.FindOne(ctx, bson.M{"_id": id}).Decode(&got)).To(Succeed()) + Expect(toInt(got["count"])).To(Equal(15)) + }) + + It("applies $unset to remove a field", func() { + id := "unset-1" + _, err := coll.InsertOne(ctx, bson.M{"_id": id, "tmp": "x", "keep": "y"}) + Expect(err).NotTo(HaveOccurred()) + _, err = coll.UpdateOne(ctx, bson.M{"_id": id}, + bson.M{"$unset": bson.M{"tmp": ""}}) + Expect(err).NotTo(HaveOccurred()) + var got bson.M + Expect(coll.FindOne(ctx, bson.M{"_id": id}).Decode(&got)).To(Succeed()) + Expect(got).NotTo(HaveKey("tmp")) + Expect(got).To(HaveKey("keep")) + }) + + It("applies $push to append to an array", func() { + id := "push-1" + _, err := coll.InsertOne(ctx, bson.M{"_id": id, "tags": bson.A{"a"}}) + Expect(err).NotTo(HaveOccurred()) + _, err = coll.UpdateOne(ctx, bson.M{"_id": id}, + bson.M{"$push": bson.M{"tags": "b"}}) + Expect(err).NotTo(HaveOccurred()) + var got bson.M + Expect(coll.FindOne(ctx, bson.M{"_id": id}).Decode(&got)).To(Succeed()) + tags, ok := got["tags"].(bson.A) + Expect(ok).To(BeTrue(), "tags should decode as bson.A") + Expect(tags).To(ConsistOf("a", "b")) + }) + }, +) diff --git a/test/e2e/tests/exposure/clusterip_test.go b/test/e2e/tests/exposure/clusterip_test.go new file mode 100644 index 00000000..d1b18fe2 --- /dev/null +++ b/test/e2e/tests/exposure/clusterip_test.go @@ -0,0 +1,101 @@ +package exposure + +import ( + "context" + "fmt" + "net" + "strconv" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + corev1 "k8s.io/api/core/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/documentdb/documentdb-operator/test/e2e" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/assertions" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/mongo" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/portforward" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/timeouts" +) + +// reserveFreePort opens and immediately closes a TCP listener on :0 so +// the kernel picks an unused ephemeral port. There is an inherent TOCTOU +// window between the close and the subsequent bind inside port-forward, +// but for a single-threaded ginkgo run it is adequate. +func reserveFreePort() (int, error) { + l, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + return 0, fmt.Errorf("reserve free port: %w", err) + } + port := l.Addr().(*net.TCPAddr).Port + _ = l.Close() + return port, nil +} + +// DocumentDB exposure — ClusterIP. +// +// Verifies: +// 1. spec.exposeViaService.serviceType=ClusterIP round-trips into the +// API server unchanged; +// 2. the gateway Service the operator creates is of type ClusterIP; +// 3. a cluster-internal connection (via port-forward) can ping the +// gateway — i.e. the Service is actually wired to Ready gateway pods. +var _ = Describe("DocumentDB exposure — ClusterIP", + Label(e2e.ExposureLabel), e2e.MediumLevelLabel, + func() { + BeforeEach(func() { e2e.SkipUnlessLevel(e2e.Medium) }) + + It("routes cluster-internal traffic to the gateway", func() { + env := e2e.SuiteEnv() + Expect(env).ToNot(BeNil()) + c := env.Client + + ctx, cancel := context.WithTimeout(context.Background(), 12*time.Minute) + DeferCleanup(cancel) + + dd, cleanup := setupFreshCluster(ctx, c, "expose-clusterip", + []string{"exposure_clusterip"}, nil) + DeferCleanup(cleanup) + + // 1. Spec round-trip. + Expect(dd.Spec.ExposeViaService.ServiceType).To(Equal("ClusterIP")) + + // 2. Service type is ClusterIP. + svcName := portforward.GatewayServiceName(dd) + Eventually(assertions.AssertServiceType(ctx, c, dd.Namespace, svcName, corev1.ServiceTypeClusterIP), + timeouts.For(timeouts.ServiceReady), timeouts.PollInterval(timeouts.ServiceReady)). + Should(Succeed()) + + // 3. Cluster-internal connection works. + localPort, err := reserveFreePort() + Expect(err).ToNot(HaveOccurred()) + stop, err := portforward.Open(ctx, env, dd, localPort) + Expect(err).ToNot(HaveOccurred(), "open port-forward") + DeferCleanup(stop) + + var pingErr error + Eventually(func() error { + pingCtx, pingCancel := context.WithTimeout(ctx, 10*time.Second) + defer pingCancel() + cli, err := mongo.NewClient(pingCtx, mongo.ClientOptions{ + Host: "127.0.0.1", + Port: strconv.Itoa(localPort), + User: credUser, + Password: credPassword, + TLS: false, + }) + if err != nil { + pingErr = err + return err + } + defer func() { _ = cli.Disconnect(context.Background()) }() + pingErr = mongo.Ping(pingCtx, cli) + return pingErr + }, timeouts.For(timeouts.MongoConnect), timeouts.PollInterval(timeouts.MongoConnect)). + Should(Succeed(), "mongo ping through ClusterIP port-forward: %v", pingErr) + + _ = client.ObjectKeyFromObject(dd) + }) + }) diff --git a/test/e2e/tests/exposure/exposure_suite_test.go b/test/e2e/tests/exposure/exposure_suite_test.go new file mode 100644 index 00000000..d8d8e45b --- /dev/null +++ b/test/e2e/tests/exposure/exposure_suite_test.go @@ -0,0 +1,56 @@ +// Package exposure hosts the DocumentDB E2E exposure area. See +// docs/designs/e2e-test-suite.md for the spec catalog. This file is +// the Ginkgo root for the area binary and shares bootstrap with the +// other area binaries via the exported helpers in package e2e. +package exposure + +import ( + "context" + "fmt" + "testing" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/documentdb/documentdb-operator/test/e2e" +) + +const operatorReadyTimeout = 2 * time.Minute + +func TestExposure(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "DocumentDB E2E - Exposure", Label(e2e.ExposureLabel)) +} + +var _ = SynchronizedBeforeSuite( + func(ctx SpecContext) []byte { + if err := e2e.SetupSuite(ctx, operatorReadyTimeout); err != nil { + Fail(fmt.Sprintf("exposure bootstrap: %v", err)) + } + return []byte{} + }, + func(_ SpecContext, _ []byte) { + if err := e2e.SetupSuite(context.Background(), operatorReadyTimeout); err != nil { + Fail(fmt.Sprintf("exposure worker bootstrap: %v", err)) + } + }, +) + +var _ = SynchronizedAfterSuite( + func(ctx SpecContext) { + if err := e2e.TeardownSuite(ctx); err != nil { + fmt.Fprintf(GinkgoWriter, "exposure teardown: %v\n", err) + } + }, + func(_ SpecContext) {}, +) + +// BeforeEach in this area aborts the spec if the operator pod has +// drifted since SetupSuite (UID/name/restart-count change). Area +// tests/upgrade/ intentionally omits this hook because operator +// restarts are part of its scenario. +var _ = BeforeEach(func() { +Expect(e2e.CheckOperatorUnchanged()).To(Succeed(), +"operator health check failed — a previous spec or reconciler likely restarted the operator") +}) diff --git a/test/e2e/tests/exposure/helpers_test.go b/test/e2e/tests/exposure/helpers_test.go new file mode 100644 index 00000000..66301f09 --- /dev/null +++ b/test/e2e/tests/exposure/helpers_test.go @@ -0,0 +1,115 @@ +package exposure + +import ( + "context" + "os" + "time" + + . "github.com/onsi/gomega" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + + previewv1 "github.com/documentdb/documentdb-operator/api/preview" + "github.com/documentdb/documentdb-operator/test/e2e" + documentdbutil "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/documentdb" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/fixtures" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/namespaces" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/timeouts" +) + +// Credential constants now alias the fixtures exports so every area +// lands on the same values. credUser / credPassword are retained as +// package-level constants because clusterip_test.go (a spec under this +// pass's do-not-touch list) references them directly. +const ( + credSecretName = fixtures.DefaultCredentialSecretName + credUser = fixtures.DefaultCredentialUsername + credPassword = fixtures.DefaultCredentialPassword //nolint:gosec // fixture-only + + // DOCUMENTDB_IMAGE / GATEWAY_IMAGE default to empty strings so the + // operator selects the correct components itself: CNPG pg18 base + + // DocumentDB extension via image-library + gateway as a separate + // sidecar. A pinned env-var override is still honoured for CI. + defaultDocDBImage = "" + defaultGatewayImage = "" +) + +func baseVars(ns, name string) map[string]string { + docdbImg := defaultDocDBImage + if v := os.Getenv("DOCUMENTDB_IMAGE"); v != "" { + docdbImg = v + } + gwImg := defaultGatewayImage + if v := os.Getenv("GATEWAY_IMAGE"); v != "" { + gwImg = v + } + sSize := "1Gi" + if v := os.Getenv("E2E_STORAGE_SIZE"); v != "" { + sSize = v + } + sClass := "standard" + if v := os.Getenv("E2E_STORAGE_CLASS"); v != "" { + sClass = v + } + return map[string]string{ + "NAMESPACE": ns, + "NAME": name, + "INSTANCES": "1", + "STORAGE_SIZE": sSize, + "STORAGE_CLASS": sClass, + "DOCUMENTDB_IMAGE": docdbImg, + "GATEWAY_IMAGE": gwImg, + "CREDENTIAL_SECRET": credSecretName, + "EXPOSURE_TYPE": "ClusterIP", + "LOG_LEVEL": "info", + } +} + +// tests/exposure/ → ../../manifests +func manifestsRoot() string { return "../../manifests" } + +// setupFreshCluster is the exposure-area analogue of the feature_gates +// helper: namespace + secret + DocumentDB, waits for healthy. Returns +// the live CR plus a namespace-deleting cleanup. +func setupFreshCluster( + ctx context.Context, + c client.Client, + name string, + mixins []string, + extraVars map[string]string, +) (*previewv1.DocumentDB, func()) { + ns := namespaces.NamespaceForSpec(e2e.ExposureLabel) + Expect(fixtures.CreateLabeledNamespace(ctx, c, ns, e2e.ExposureLabel)).To(Succeed()) + Expect(fixtures.CreateLabeledCredentialSecret(ctx, c, ns)).To(Succeed()) + vars := baseVars(ns, name) + for k, v := range extraVars { + vars[k] = v + } + _, err := documentdbutil.Create(ctx, c, ns, name, documentdbutil.CreateOptions{ + Base: "documentdb", + Mixins: mixins, + Vars: vars, + ManifestsRoot: manifestsRoot(), + }) + Expect(err).ToNot(HaveOccurred(), "create DocumentDB") + + Eventually(func() error { + return documentdbutil.WaitHealthy(ctx, c, + types.NamespacedName{Namespace: ns, Name: name}, + timeouts.For(timeouts.DocumentDBReady)) + }, timeouts.For(timeouts.DocumentDBReady)+30*time.Second, 10*time.Second). + Should(Succeed(), "DocumentDB %s/%s did not become healthy", ns, name) + + live, err := documentdbutil.Get(ctx, c, client.ObjectKey{Namespace: ns, Name: name}) + Expect(err).ToNot(HaveOccurred()) + + cleanup := func() { + delCtx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + defer cancel() + _ = c.Delete(delCtx, &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: ns}}) + } + return live, cleanup +} diff --git a/test/e2e/tests/exposure/loadbalancer_test.go b/test/e2e/tests/exposure/loadbalancer_test.go new file mode 100644 index 00000000..3008d41e --- /dev/null +++ b/test/e2e/tests/exposure/loadbalancer_test.go @@ -0,0 +1,140 @@ +package exposure + +import ( + "context" + "fmt" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + intstr "k8s.io/apimachinery/pkg/util/intstr" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/documentdb/documentdb-operator/test/e2e" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/assertions" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/portforward" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/timeouts" +) + +// hasLoadBalancerController probes the target cluster by creating a +// throwaway LoadBalancer Service and polling briefly for an external +// address. The probe uses a short timeout so environments without a +// working LB controller skip fast rather than failing the spec. The +// probe namespace is the default namespace; the Service is deleted +// before the function returns regardless of the outcome. +func hasLoadBalancerController(ctx context.Context, c client.Client, timeout time.Duration) (bool, error) { + probeName := fmt.Sprintf("e2e-lb-probe-%d", time.Now().UnixNano()) + probeNS := "default" + svc := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: probeName, + Namespace: probeNS, + Labels: map[string]string{ + "e2e.documentdb.io/probe": "loadbalancer", + }, + }, + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeLoadBalancer, + Selector: map[string]string{"app.kubernetes.io/name": "nonexistent-e2e-probe"}, + Ports: []corev1.ServicePort{{ + Name: "probe", + Port: 80, + TargetPort: intstr.FromInt(80), + Protocol: corev1.ProtocolTCP, + }}, + }, + } + if err := c.Create(ctx, svc); err != nil && !apierrors.IsAlreadyExists(err) { + return false, fmt.Errorf("create LB probe: %w", err) + } + defer func() { + delCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + _ = c.Delete(delCtx, &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{Name: probeName, Namespace: probeNS}, + }) + }() + + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + got := &corev1.Service{} + if err := c.Get(ctx, client.ObjectKey{Namespace: probeNS, Name: probeName}, got); err == nil { + for _, ing := range got.Status.LoadBalancer.Ingress { + if ing.IP != "" || ing.Hostname != "" { + return true, nil + } + } + } + select { + case <-ctx.Done(): + return false, ctx.Err() + case <-time.After(2 * time.Second): + } + } + return false, nil +} + +// DocumentDB exposure — LoadBalancer. +// +// Requires a working LoadBalancer controller in the target cluster +// (kind + MetalLB, a cloud-provider LB, etc.). When no external address +// is assigned to a probe Service within ~30s, the spec skips rather than +// fails so unconfigured environments do not poison the run. +var _ = Describe("DocumentDB exposure — LoadBalancer", + Label(e2e.ExposureLabel, e2e.NeedsMetalLBLabel), e2e.MediumLevelLabel, + func() { + BeforeEach(func() { + e2e.SkipUnlessLevel(e2e.Medium) + env := e2e.SuiteEnv() + Expect(env).ToNot(BeNil()) + probeCtx, cancel := context.WithTimeout(context.Background(), 45*time.Second) + defer cancel() + ok, err := hasLoadBalancerController(probeCtx, env.Client, 30*time.Second) + Expect(err).ToNot(HaveOccurred()) + if !ok { + Skip("no LoadBalancer controller in cluster — probe service acquired no external address within 30s") + } + }) + + It("provisions a LoadBalancer Service with an external address", func() { + env := e2e.SuiteEnv() + c := env.Client + + ctx, cancel := context.WithTimeout(context.Background(), 12*time.Minute) + DeferCleanup(cancel) + + dd, cleanup := setupFreshCluster(ctx, c, "expose-lb", + []string{"exposure_loadbalancer"}, + map[string]string{"EXPOSURE_TYPE": "LoadBalancer"}, + ) + DeferCleanup(cleanup) + + // 1. Spec round-trip. + Expect(dd.Spec.ExposeViaService.ServiceType).To(Equal("LoadBalancer")) + + // 2. Service type is LoadBalancer. + svcName := portforward.GatewayServiceName(dd) + Eventually(assertions.AssertServiceType(ctx, c, dd.Namespace, svcName, corev1.ServiceTypeLoadBalancer), + timeouts.For(timeouts.ServiceReady), timeouts.PollInterval(timeouts.ServiceReady)). + Should(Succeed()) + + // 3. External address is eventually assigned. + Eventually(func() error { + svc := &corev1.Service{} + if err := c.Get(ctx, client.ObjectKey{Namespace: dd.Namespace, Name: svcName}, svc); err != nil { + return err + } + for _, ing := range svc.Status.LoadBalancer.Ingress { + if ing.IP != "" || ing.Hostname != "" { + return nil + } + } + return fmt.Errorf("Service %s/%s has no external address yet", dd.Namespace, svcName) + }, timeouts.For(timeouts.ServiceReady), timeouts.PollInterval(timeouts.ServiceReady)). + Should(Succeed()) + }) + }) diff --git a/test/e2e/tests/feature_gates/changestreams_test.go b/test/e2e/tests/feature_gates/changestreams_test.go new file mode 100644 index 00000000..516ed503 --- /dev/null +++ b/test/e2e/tests/feature_gates/changestreams_test.go @@ -0,0 +1,97 @@ +package feature_gates + +import ( + "context" + "fmt" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + previewv1 "github.com/documentdb/documentdb-operator/api/preview" + "github.com/documentdb/documentdb-operator/test/e2e" +) + +// walLevelFor reads the CNPG Cluster that backs the given DocumentDB and +// returns the value of its postgresql.parameters["wal_level"]. Empty +// string means the operator did not set the key (CNPG default applies, +// which is the "replica" level that disables logical decoding — i.e. +// change streams). Any error from the client is surfaced verbatim. +func walLevelFor(ctx context.Context, c client.Client, dd *previewv1.DocumentDB) (string, error) { + cluster := &cnpgv1.Cluster{} + if err := c.Get(ctx, client.ObjectKey{Namespace: dd.Namespace, Name: dd.Name}, cluster); err != nil { + return "", fmt.Errorf("get CNPG Cluster %s/%s: %w", dd.Namespace, dd.Name, err) + } + if cluster.Spec.PostgresConfiguration.Parameters == nil { + return "", nil + } + return cluster.Spec.PostgresConfiguration.Parameters["wal_level"], nil +} + +// DocumentDB feature-gates / change streams. +// +// The operator translates `spec.featureGates.ChangeStreams=true` into +// `wal_level=logical` on the underlying CNPG Cluster (see +// operator/src/internal/cnpg/cnpg_cluster.go). When the gate is off (or +// unset), the operator does not force a wal_level override, so CNPG's +// default ("replica") applies and change streams over the Mongo wire +// protocol are not supported by the DocumentDB extension. +// +// We assert the observable operator contract — the CNPG Cluster's +// postgresql.parameters — because: +// 1. It is image-independent: the protocol-level change-stream +// behaviour is only available in the "-changestream" DocumentDB +// image variants, which are not guaranteed to be loaded in every +// e2e environment; +// 2. It is what the operator code actually controls. +// +// A future expansion can layer a best-effort mongo `Watch` call on top +// once the suite standardises on change-stream-capable images. +var _ = Describe("DocumentDB feature-gates — change streams", + Label(e2e.FeatureLabel), e2e.MediumLevelLabel, + func() { + BeforeEach(func() { e2e.SkipUnlessLevel(e2e.Medium) }) + + DescribeTable("wal_level reflects ChangeStreams gate", + func(enabled, expectLogical bool) { + env := e2e.SuiteEnv() + Expect(env).ToNot(BeNil(), "SuiteEnv must be initialized") + c := env.Client + + ctx, cancel := context.WithTimeout(context.Background(), 12*time.Minute) + DeferCleanup(cancel) + + name := "ft-cs-on" + mixin := "feature_changestreams" + if !enabled { + name = "ft-cs-off" + // Omit the mixin; the base template has no + // featureGates block, so the gate is implicitly + // disabled. + mixin = "" + } + mixins := []string{} + if mixin != "" { + mixins = append(mixins, mixin) + } + dd, cleanup := setupFreshCluster(ctx, c, name, mixins, nil) + DeferCleanup(cleanup) + + walLevel, err := walLevelFor(ctx, c, dd) + Expect(err).ToNot(HaveOccurred()) + + if expectLogical { + Expect(walLevel).To(Equal("logical"), + "enabled gate must drive wal_level=logical") + } else { + Expect(walLevel).ToNot(Equal("logical"), + "disabled gate must leave wal_level off of logical; got %q", walLevel) + } + }, + Entry("enabled → wal_level=logical", true, true), + Entry("disabled → wal_level not forced to logical", false, false), + ) + }) diff --git a/test/e2e/tests/feature_gates/feature_gates_suite_test.go b/test/e2e/tests/feature_gates/feature_gates_suite_test.go new file mode 100644 index 00000000..9aaf005c --- /dev/null +++ b/test/e2e/tests/feature_gates/feature_gates_suite_test.go @@ -0,0 +1,56 @@ +// Package feature_gates hosts the DocumentDB E2E featuregates area. See +// docs/designs/e2e-test-suite.md for the spec catalog. This file is +// the Ginkgo root for the area binary and shares bootstrap with the +// other area binaries via the exported helpers in package e2e. +package feature_gates + +import ( + "context" + "fmt" + "testing" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/documentdb/documentdb-operator/test/e2e" +) + +const operatorReadyTimeout = 2 * time.Minute + +func TestFeatureGates(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "DocumentDB E2E - FeatureGates", Label(e2e.FeatureLabel)) +} + +var _ = SynchronizedBeforeSuite( + func(ctx SpecContext) []byte { + if err := e2e.SetupSuite(ctx, operatorReadyTimeout); err != nil { + Fail(fmt.Sprintf("featuregates bootstrap: %v", err)) + } + return []byte{} + }, + func(_ SpecContext, _ []byte) { + if err := e2e.SetupSuite(context.Background(), operatorReadyTimeout); err != nil { + Fail(fmt.Sprintf("featuregates worker bootstrap: %v", err)) + } + }, +) + +var _ = SynchronizedAfterSuite( + func(ctx SpecContext) { + if err := e2e.TeardownSuite(ctx); err != nil { + fmt.Fprintf(GinkgoWriter, "featuregates teardown: %v\n", err) + } + }, + func(_ SpecContext) {}, +) + +// BeforeEach in this area aborts the spec if the operator pod has +// drifted since SetupSuite (UID/name/restart-count change). Area +// tests/upgrade/ intentionally omits this hook because operator +// restarts are part of its scenario. +var _ = BeforeEach(func() { +Expect(e2e.CheckOperatorUnchanged()).To(Succeed(), +"operator health check failed — a previous spec or reconciler likely restarted the operator") +}) diff --git a/test/e2e/tests/feature_gates/helpers_test.go b/test/e2e/tests/feature_gates/helpers_test.go new file mode 100644 index 00000000..0e5995b8 --- /dev/null +++ b/test/e2e/tests/feature_gates/helpers_test.go @@ -0,0 +1,123 @@ +package feature_gates + +import ( + "context" + "os" + "time" + + . "github.com/onsi/gomega" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + + previewv1 "github.com/documentdb/documentdb-operator/api/preview" + "github.com/documentdb/documentdb-operator/test/e2e" + documentdbutil "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/documentdb" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/fixtures" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/namespaces" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/timeouts" +) + +// Shared credential name for fresh per-spec clusters. Username and +// password values are now sourced from fixtures.DefaultCredentialUsername +// and fixtures.DefaultCredentialPassword so mongo helpers that already +// know those values work against both shared fixtures and per-spec CRs. +const credSecretName = fixtures.DefaultCredentialSecretName + +// defaultDocDBImage / defaultGatewayImage are empty by default so the +// operator picks the correct layered defaults (CNPG pg18 base + +// DocumentDB extension image + gateway sidecar). Env vars still +// override for CI pinning. +const ( + defaultDocDBImage = "" + defaultGatewayImage = "" +) + +// baseVars builds the envsubst map the base/documentdb.yaml.template +// expects. Callers override individual entries for per-spec tweaks. +func baseVars(ns, name string) map[string]string { + docdbImg := defaultDocDBImage + if v := os.Getenv("DOCUMENTDB_IMAGE"); v != "" { + docdbImg = v + } + gwImg := defaultGatewayImage + if v := os.Getenv("GATEWAY_IMAGE"); v != "" { + gwImg = v + } + sSize := "1Gi" + if v := os.Getenv("E2E_STORAGE_SIZE"); v != "" { + sSize = v + } + sClass := "standard" + if v := os.Getenv("E2E_STORAGE_CLASS"); v != "" { + sClass = v + } + return map[string]string{ + "NAMESPACE": ns, + "NAME": name, + "INSTANCES": "1", + "STORAGE_SIZE": sSize, + "STORAGE_CLASS": sClass, + "DOCUMENTDB_IMAGE": docdbImg, + "GATEWAY_IMAGE": gwImg, + "CREDENTIAL_SECRET": credSecretName, + "EXPOSURE_TYPE": "ClusterIP", + "LOG_LEVEL": "info", + } +} + +// manifestsRoot returns the absolute path to test/e2e/manifests so the +// per-spec clusters can read the mixin templates without depending on +// the caller's working directory. +func manifestsRoot() string { + // tests/feature_gates/ → ../../manifests + return "../../manifests" +} + +// setupFreshCluster creates a namespace, credential secret, and a +// DocumentDB CR composed of the base template plus mixins, then waits +// for it to become healthy. It returns the live CR plus a cleanup func +// that deletes the namespace. Namespace + secret creation delegate to +// the fixtures helpers so ownership labels match the rest of the suite. +func setupFreshCluster( + ctx context.Context, + c client.Client, + name string, + mixins []string, + extraVars map[string]string, +) (*previewv1.DocumentDB, func()) { + ns := namespaces.NamespaceForSpec(e2e.FeatureLabel) + Expect(fixtures.CreateLabeledNamespace(ctx, c, ns, e2e.FeatureLabel)).To(Succeed()) + Expect(fixtures.CreateLabeledCredentialSecret(ctx, c, ns)).To(Succeed()) + vars := baseVars(ns, name) + for k, v := range extraVars { + vars[k] = v + } + _, err := documentdbutil.Create(ctx, c, ns, name, documentdbutil.CreateOptions{ + Base: "documentdb", + Mixins: mixins, + Vars: vars, + ManifestsRoot: manifestsRoot(), + }) + Expect(err).ToNot(HaveOccurred(), "create DocumentDB") + + Eventually(func() error { + return documentdbutil.WaitHealthy(ctx, c, + types.NamespacedName{Namespace: ns, Name: name}, + timeouts.For(timeouts.DocumentDBReady)) + }, timeouts.For(timeouts.DocumentDBReady)+30*time.Second, 10*time.Second). + Should(Succeed(), "DocumentDB %s/%s did not become healthy", ns, name) + + // Re-fetch to return the populated object. + live, err := documentdbutil.Get(ctx, c, client.ObjectKey{Namespace: ns, Name: name}) + Expect(err).ToNot(HaveOccurred()) + + cleanup := func() { + delCtx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + defer cancel() + _ = c.Delete(delCtx, &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: ns}}) + } + return live, cleanup +} \ No newline at end of file diff --git a/test/e2e/tests/lifecycle/delete_reclaim_test.go b/test/e2e/tests/lifecycle/delete_reclaim_test.go new file mode 100644 index 00000000..9a1c26fc --- /dev/null +++ b/test/e2e/tests/lifecycle/delete_reclaim_test.go @@ -0,0 +1,98 @@ +package lifecycle + +import ( + "context" + "time" + + . "github.com/onsi/ginkgo/v2" //nolint:revive + . "github.com/onsi/gomega" //nolint:revive + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/documentdb/documentdb-operator/test/e2e" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/assertions" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/documentdb" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/namespaces" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/timeouts" +) + +var _ = Describe("DocumentDB lifecycle — delete with Retain reclaim", + Label(e2e.LifecycleLabel, e2e.DestructiveLabel), e2e.MediumLevelLabel, + func() { + const name = "lifecycle-delete-retain" + var ( + ctx context.Context + ns string + c client.Client + ) + + BeforeEach(func() { + e2e.SkipUnlessLevel(e2e.Medium) + ctx = context.Background() + c = e2e.SuiteEnv().Client + ns = namespaces.NamespaceForSpec(e2e.LifecycleLabel) + createNamespace(ctx, c, ns) + createCredentialSecret(ctx, c, ns, "documentdb-credentials") + }) + + It("preserves the underlying PersistentVolume after the CR is deleted", func() { + vars := baseVars("1Gi") + dd, err := documentdb.Create(ctx, c, ns, name, documentdb.CreateOptions{ + Base: "documentdb", + Mixins: []string{"reclaim_retain"}, + Vars: vars, + }) + Expect(err).ToNot(HaveOccurred()) + + key := types.NamespacedName{Namespace: ns, Name: name} + Eventually(assertions.AssertDocumentDBReady(ctx, c, key), + timeouts.For(timeouts.DocumentDBReady), + timeouts.PollInterval(timeouts.DocumentDBReady), + ).Should(Succeed()) + + // Capture the PV names currently bound to this + // namespace's PVCs so we can verify they survive + // DocumentDB deletion. + var pvcs corev1.PersistentVolumeClaimList + Expect(c.List(ctx, &pvcs, client.InNamespace(ns))).To(Succeed()) + Expect(pvcs.Items).ToNot(BeEmpty(), "expected at least one PVC after Ready") + var pvNames []string + for i := range pvcs.Items { + if v := pvcs.Items[i].Spec.VolumeName; v != "" { + pvNames = append(pvNames, v) + } + } + Expect(pvNames).ToNot(BeEmpty(), "expected bound PVs; got only pending PVCs") + + // Delete the DocumentDB and wait for it to disappear. + Expect(documentdb.Delete(ctx, c, dd, 3*time.Minute)).To(Succeed()) + + // Retained PVs should remain in the API server; their + // phase transitions to Released (or stays Bound briefly) + // but the object itself must not be collected. + for _, pvName := range pvNames { + var pv corev1.PersistentVolume + Eventually(func() error { + return c.Get(ctx, types.NamespacedName{Name: pvName}, &pv) + }, 2*time.Minute, 5*time.Second).Should(Succeed(), + "PV %s should still exist under Retain policy", pvName) + Expect(pv.Spec.PersistentVolumeReclaimPolicy).To( + Equal(corev1.PersistentVolumeReclaimRetain), + "PV %s must have reclaimPolicy=Retain", pvName) + } + + // Manual cleanup: retained PVs will otherwise leak across + // test runs. Deleting them releases the underlying + // provisioner storage in kind's local-path driver. + DeferCleanup(func(ctx SpecContext) { + for _, pvName := range pvNames { + _ = c.Delete(ctx, &corev1.PersistentVolume{ + ObjectMeta: metav1.ObjectMeta{Name: pvName}, + }) + } + }) + }) + }) diff --git a/test/e2e/tests/lifecycle/deploy_test.go b/test/e2e/tests/lifecycle/deploy_test.go new file mode 100644 index 00000000..734021e6 --- /dev/null +++ b/test/e2e/tests/lifecycle/deploy_test.go @@ -0,0 +1,79 @@ +package lifecycle + +import ( + "context" + "time" + + . "github.com/onsi/ginkgo/v2" //nolint:revive + . "github.com/onsi/gomega" //nolint:revive + + cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/documentdb/documentdb-operator/test/e2e" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/assertions" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/documentdb" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/namespaces" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/timeouts" +) + +var _ = Describe("DocumentDB lifecycle — deploy", + Label(e2e.LifecycleLabel, e2e.BasicLabel, e2e.SmokeLabel), e2e.MediumLevelLabel, + func() { + const name = "lifecycle-deploy" + var ( + ctx context.Context + ns string + c client.Client + ) + + BeforeEach(func() { + e2e.SkipUnlessLevel(e2e.Medium) + ctx = context.Background() + c = e2e.SuiteEnv().Client + ns = namespaces.NamespaceForSpec(e2e.LifecycleLabel) + createNamespace(ctx, c, ns) + createCredentialSecret(ctx, c, ns, "documentdb-credentials") + }) + + It("brings a 1-instance cluster to Ready and wires owner refs on the backing CNPG Cluster", func() { + dd, err := documentdb.Create(ctx, c, ns, name, documentdb.CreateOptions{ + Base: "documentdb", + Vars: baseVars("1Gi"), + }) + Expect(err).ToNot(HaveOccurred()) + DeferCleanup(func(ctx SpecContext) { + _ = documentdb.Delete(ctx, c, dd, 3*time.Minute) + }) + + key := types.NamespacedName{Namespace: ns, Name: name} + Eventually(assertions.AssertDocumentDBReady(ctx, c, key), + timeouts.For(timeouts.DocumentDBReady), + timeouts.PollInterval(timeouts.DocumentDBReady), + ).Should(Succeed()) + + // CNPG Cluster backing this DocumentDB exists and has an + // owner reference back to the DocumentDB CR — mirrors + // what docs/designs/e2e-test-suite.md calls for. The + // Cluster name equals the DocumentDB name for single- + // cluster deployments (see assertions.clusterNameFor). + var cluster cnpgv1.Cluster + Eventually(func() error { + return c.Get(ctx, key, &cluster) + }, 2*time.Minute, 5*time.Second).Should(Succeed()) + + current := getDD(ctx, ns, name) + Expect(cluster.OwnerReferences).ToNot(BeEmpty(), + "CNPG Cluster should be owned by the DocumentDB CR") + var found bool + for _, o := range cluster.OwnerReferences { + if o.UID == current.UID && o.Kind == "DocumentDB" { + found = true + break + } + } + Expect(found).To(BeTrue(), + "expected owner reference with UID=%s on CNPG Cluster %s", current.UID, key) + }) + }) diff --git a/test/e2e/tests/lifecycle/helpers_test.go b/test/e2e/tests/lifecycle/helpers_test.go new file mode 100644 index 00000000..d377c018 --- /dev/null +++ b/test/e2e/tests/lifecycle/helpers_test.go @@ -0,0 +1,109 @@ +package lifecycle + +import ( + "context" + "errors" + "os" + + . "github.com/onsi/ginkgo/v2" //nolint:revive + . "github.com/onsi/gomega" //nolint:revive + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + + previewv1 "github.com/documentdb/documentdb-operator/api/preview" + "github.com/documentdb/documentdb-operator/test/e2e" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/documentdb" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/fixtures" +) + +var ( + // errPendingPVCs signals that no PVCs have been created yet. + errPendingPVCs = errors.New("waiting for PVCs to appear") + // errNotExpanded signals that at least one PVC has not reached + // the requested capacity yet. + errNotExpanded = errors.New("waiting for PVC expansion") +) + +// baseVars returns the envsubst variables used by the lifecycle base +// template. Image overrides honour the same E2E-wide env vars the +// shared fixtures do; tests that need to mutate specific fields +// override the returned map before calling Create. +func baseVars(size string) map[string]string { + // Leave DOCUMENTDB_IMAGE / GATEWAY_IMAGE empty by default so the + // operator picks its own defaults — the DocumentDB extension is + // mounted onto the CNPG pg18 base via the image-library mechanism + // and the gateway is a separate sidecar image. Setting a monolithic + // override here (e.g. documentdb-local:16) would point the CNPG + // cluster at a non-postgres image and break initdb. + ddImage := os.Getenv("DOCUMENTDB_IMAGE") + gwImage := os.Getenv("GATEWAY_IMAGE") + storageClass := "standard" + if v := os.Getenv("E2E_STORAGE_CLASS"); v != "" { + storageClass = v + } + if size == "" { + size = "1Gi" + } + return map[string]string{ + "INSTANCES": "1", + "STORAGE_SIZE": size, + "STORAGE_CLASS": storageClass, + "DOCUMENTDB_IMAGE": ddImage, + "GATEWAY_IMAGE": gwImage, + "CREDENTIAL_SECRET": fixtures.DefaultCredentialSecretName, + "EXPOSURE_TYPE": "ClusterIP", + "LOG_LEVEL": "info", + } +} + +// createNamespace creates ns (via fixtures.CreateLabeledNamespace so the +// ownership labels are stamped) and registers a DeferCleanup to remove +// it. The signature is preserved so update_storage_test.go — which is +// out of scope for this pass — continues to compile. +func createNamespace(ctx context.Context, c client.Client, ns string) { + if err := fixtures.CreateLabeledNamespace(ctx, c, ns, "lifecycle"); err != nil { + Fail("create namespace " + ns + ": " + err.Error()) + } + DeferCleanup(func(ctx SpecContext) { + _ = c.Delete(ctx, &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: ns}}) + }) +} + +// createCredentialSecret seeds the default DocumentDB credential secret +// so the operator can finish the bootstrap bring-up. Name is accepted +// for signature compatibility with update_storage_test.go; when it +// matches DefaultCredentialSecretName the fixtures helper is used so +// ownership labels are stamped. +func createCredentialSecret(ctx context.Context, c client.Client, ns, name string) { + if name == fixtures.DefaultCredentialSecretName || name == "" { + if err := fixtures.CreateLabeledCredentialSecret(ctx, c, ns); err != nil { + Fail("create credential secret " + ns + "/" + name + ": " + err.Error()) + } + return + } + // Non-default secret name — fall back to an inline Create so callers + // can seed multiple named secrets in the same namespace. + sec := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: ns}, + Type: corev1.SecretTypeOpaque, + StringData: map[string]string{ + "username": fixtures.DefaultCredentialUsername, + "password": fixtures.DefaultCredentialPassword, + }, + } + if err := c.Create(ctx, sec); err != nil { + Fail("create credential secret " + ns + "/" + name + ": " + err.Error()) + } +} + +// getDD is a convenience shortcut around documentdb.Get used by specs +// that need to refetch the CR after a patch. +func getDD(ctx context.Context, ns, name string) *previewv1.DocumentDB { + c := e2e.SuiteEnv().Client + dd, err := documentdb.Get(ctx, c, types.NamespacedName{Namespace: ns, Name: name}) + Expect(err).ToNot(HaveOccurred()) + return dd +} diff --git a/test/e2e/tests/lifecycle/lifecycle_suite_test.go b/test/e2e/tests/lifecycle/lifecycle_suite_test.go new file mode 100644 index 00000000..613023a3 --- /dev/null +++ b/test/e2e/tests/lifecycle/lifecycle_suite_test.go @@ -0,0 +1,57 @@ +// Package lifecycle hosts the DocumentDB E2E lifecycle area. See +// docs/designs/e2e-test-suite.md for the spec catalog. This file is +// the Ginkgo root for the area binary and shares bootstrap with the +// other area binaries via the exported helpers in package e2e. +package lifecycle + +import ( + "context" + "fmt" + "testing" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/documentdb/documentdb-operator/test/e2e" +) + +const operatorReadyTimeout = 2 * time.Minute + +func TestLifecycle(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "DocumentDB E2E - Lifecycle", Label(e2e.LifecycleLabel)) +} + +var _ = SynchronizedBeforeSuite( + func(ctx SpecContext) []byte { + if err := e2e.SetupSuite(ctx, operatorReadyTimeout); err != nil { + Fail(fmt.Sprintf("lifecycle bootstrap: %v", err)) + } + return []byte{} + }, + func(_ SpecContext, _ []byte) { + if err := e2e.SetupSuite(context.Background(), operatorReadyTimeout); err != nil { + Fail(fmt.Sprintf("lifecycle worker bootstrap: %v", err)) + } + }, +) + +var _ = SynchronizedAfterSuite( + func(ctx SpecContext) { + if err := e2e.TeardownSuite(ctx); err != nil { + fmt.Fprintf(GinkgoWriter, "lifecycle teardown: %v\n", err) + } + }, + func(_ SpecContext) {}, +) + + +// BeforeEach in this area aborts the spec if the operator pod has +// drifted since SetupSuite (UID/name/restart-count change). Area +// tests/upgrade/ intentionally omits this hook because operator +// restarts are part of its scenario. +var _ = BeforeEach(func() { +Expect(e2e.CheckOperatorUnchanged()).To(Succeed(), +"operator health check failed — a previous spec or reconciler likely restarted the operator") +}) diff --git a/test/e2e/tests/lifecycle/update_image_test.go b/test/e2e/tests/lifecycle/update_image_test.go new file mode 100644 index 00000000..9a1f9fb6 --- /dev/null +++ b/test/e2e/tests/lifecycle/update_image_test.go @@ -0,0 +1,99 @@ +package lifecycle + +import ( + "context" + "os" + "time" + + . "github.com/onsi/ginkgo/v2" //nolint:revive + . "github.com/onsi/gomega" //nolint:revive + + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + + previewv1 "github.com/documentdb/documentdb-operator/api/preview" + "github.com/documentdb/documentdb-operator/test/e2e" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/assertions" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/documentdb" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/namespaces" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/timeouts" +) + +// The design doc calls the field `spec.documentDbVersion`; the CRD at +// operator/src/api/preview/documentdb_types.go names it DocumentDBVersion +// (JSON `documentDBVersion`) and also exposes DocumentDBImage / GatewayImage +// which take precedence when set. Because the base template provides +// DocumentDBImage (not Version), we exercise the rollout via the image +// field and assert against Status.DocumentDBImage — Phase 3 follow-up to +// parameterise this once the Version-only path is wired into manifests. +var _ = Describe("DocumentDB lifecycle — update documentDBImage", + Label(e2e.LifecycleLabel, e2e.DisruptiveLabel), e2e.MediumLevelLabel, + func() { + const name = "lifecycle-update-image" + var ( + ctx context.Context + ns string + c client.Client + ) + + BeforeEach(func() { + e2e.SkipUnlessLevel(e2e.Medium) + ctx = context.Background() + c = e2e.SuiteEnv().Client + ns = namespaces.NamespaceForSpec(e2e.LifecycleLabel) + createNamespace(ctx, c, ns) + createCredentialSecret(ctx, c, ns, "documentdb-credentials") + }) + + It("rolls out a new image tag and reflects it in Status.DocumentDBImage", func() { + vars := baseVars("1Gi") + startImage := vars["DOCUMENTDB_IMAGE"] + if startImage == "" { + Skip("DOCUMENTDB_IMAGE env var must be set for the image-update spec — " + + "it needs an explicit starting tag to roll off of. Set DOCUMENTDB_IMAGE " + + "and optionally E2E_DOCUMENTDB_IMAGE_NEXT to exercise this path.") + } + // The target image override must be an explicit + // different tag; without it the patch would be a no-op + // (same image as startImage) and the Eventually below + // would trivially pass, producing a false positive. + targetImage := os.Getenv("E2E_DOCUMENTDB_IMAGE_NEXT") + if targetImage == "" || targetImage == startImage { + Skip("E2E_DOCUMENTDB_IMAGE_NEXT must be set to a different image than " + + "DOCUMENTDB_IMAGE to exercise a real rollout — skipping to avoid a no-op.") + } + + dd, err := documentdb.Create(ctx, c, ns, name, documentdb.CreateOptions{ + Base: "documentdb", + Vars: vars, + }) + Expect(err).ToNot(HaveOccurred()) + DeferCleanup(func(ctx SpecContext) { + _ = documentdb.Delete(ctx, c, dd, 3*time.Minute) + }) + + key := types.NamespacedName{Namespace: ns, Name: name} + Eventually(assertions.AssertDocumentDBReady(ctx, c, key), + timeouts.For(timeouts.DocumentDBReady), + timeouts.PollInterval(timeouts.DocumentDBReady), + ).Should(Succeed()) + + // Refetch for a fresh resourceVersion before patching. + fresh := getDD(ctx, ns, name) + Expect(documentdb.PatchSpec(ctx, c, fresh, func(s *previewv1.DocumentDBSpec) { + s.DocumentDBImage = targetImage + })).To(Succeed()) + + Eventually(func() string { + current := getDD(ctx, ns, name) + return current.Status.DocumentDBImage + }, timeouts.For(timeouts.DocumentDBUpgrade), + timeouts.PollInterval(timeouts.DocumentDBUpgrade), + ).Should(Equal(targetImage)) + + Eventually(assertions.AssertDocumentDBReady(ctx, c, key), + timeouts.For(timeouts.DocumentDBUpgrade), + timeouts.PollInterval(timeouts.DocumentDBUpgrade), + ).Should(Succeed()) + }) + }) diff --git a/test/e2e/tests/lifecycle/update_loglevel_test.go b/test/e2e/tests/lifecycle/update_loglevel_test.go new file mode 100644 index 00000000..ddb2c2d3 --- /dev/null +++ b/test/e2e/tests/lifecycle/update_loglevel_test.go @@ -0,0 +1,79 @@ +package lifecycle + +import ( + "context" + "time" + + . "github.com/onsi/ginkgo/v2" //nolint:revive + . "github.com/onsi/gomega" //nolint:revive + + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + + previewv1 "github.com/documentdb/documentdb-operator/api/preview" + "github.com/documentdb/documentdb-operator/test/e2e" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/assertions" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/documentdb" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/namespaces" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/timeouts" +) + +var _ = Describe("DocumentDB lifecycle — update logLevel", + Label(e2e.LifecycleLabel, e2e.BasicLabel), e2e.MediumLevelLabel, + func() { + const name = "lifecycle-update-loglevel" + var ( + ctx context.Context + ns string + c client.Client + ) + + BeforeEach(func() { + e2e.SkipUnlessLevel(e2e.Medium) + ctx = context.Background() + c = e2e.SuiteEnv().Client + ns = namespaces.NamespaceForSpec(e2e.LifecycleLabel) + createNamespace(ctx, c, ns) + createCredentialSecret(ctx, c, ns, "documentdb-credentials") + }) + + It("propagates a spec.logLevel patch to the live CR", func() { + vars := baseVars("1Gi") + vars["LOG_LEVEL"] = "info" + + dd, err := documentdb.Create(ctx, c, ns, name, documentdb.CreateOptions{ + Base: "documentdb", + Vars: vars, + }) + Expect(err).ToNot(HaveOccurred()) + DeferCleanup(func(ctx SpecContext) { + _ = documentdb.Delete(ctx, c, dd, 3*time.Minute) + }) + + key := types.NamespacedName{Namespace: ns, Name: name} + Eventually(assertions.AssertDocumentDBReady(ctx, c, key), + timeouts.For(timeouts.DocumentDBReady), + timeouts.PollInterval(timeouts.DocumentDBReady), + ).Should(Succeed()) + + // Patch spec.logLevel; field is exported verbatim as + // LogLevel in api/preview/documentdb_types.go. + fresh := getDD(ctx, ns, name) + Expect(fresh.Spec.LogLevel).To(Equal("info")) + Expect(documentdb.PatchSpec(ctx, c, fresh, func(s *previewv1.DocumentDBSpec) { + s.LogLevel = "debug" + })).To(Succeed()) + + Eventually(func() string { + current := getDD(ctx, ns, name) + return current.Spec.LogLevel + }, 1*time.Minute, 2*time.Second).Should(Equal("debug"), + "patched spec.logLevel should reach the API server") + + // Reconciliation should not disrupt readiness while the + // only change is a log-level string. + Consistently(assertions.AssertDocumentDBReady(ctx, c, key), + 30*time.Second, 5*time.Second, + ).Should(Succeed()) + }) + }) diff --git a/test/e2e/tests/lifecycle/update_storage_test.go b/test/e2e/tests/lifecycle/update_storage_test.go new file mode 100644 index 00000000..019acb16 --- /dev/null +++ b/test/e2e/tests/lifecycle/update_storage_test.go @@ -0,0 +1,108 @@ +package lifecycle + +import ( + "context" + "time" + + . "github.com/onsi/ginkgo/v2" //nolint:revive + . "github.com/onsi/gomega" //nolint:revive + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + + previewv1 "github.com/documentdb/documentdb-operator/api/preview" + "github.com/documentdb/documentdb-operator/test/e2e" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/assertions" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/clusterprobe" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/documentdb" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/namespaces" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/timeouts" +) + +// The CRD nests storage as Spec.Resource.Storage.PvcSize (see +// operator/src/api/preview/documentdb_types.go). The design doc wording +// "spec.resource or spec.persistentVolumeClaim" is ambiguous — the real +// field is `spec.resource.storage.pvcSize`, patched below. +var _ = Describe("DocumentDB lifecycle — update storage.pvcSize", + Label(e2e.LifecycleLabel, e2e.DisruptiveLabel, e2e.NeedsCSIResizeLabel), + e2e.MediumLevelLabel, + func() { + const name = "lifecycle-update-storage" + var ( + ctx context.Context + ns string + c client.Client + ) + + BeforeEach(func() { + e2e.SkipUnlessLevel(e2e.Medium) + ctx = context.Background() + c = e2e.SuiteEnv().Client + // Runtime capability probe: PVC resize silently falls over + // on StorageClasses without AllowVolumeExpansion=true. The + // NeedsCSIResizeLabel only gates invocation; this probe + // gives a clear Skip when the backing class cannot expand. + scName := baseVars("1Gi")["STORAGE_CLASS"] + canExpand, err := clusterprobe.StorageClassAllowsExpansion(ctx, c, scName) + Expect(err).NotTo(HaveOccurred(), "probe StorageClass %q expansion", scName) + if !canExpand { + Skip("StorageClass " + scName + " does not allow volume expansion — skipping PVC resize spec") + } + ns = namespaces.NamespaceForSpec(e2e.LifecycleLabel) + createNamespace(ctx, c, ns) + createCredentialSecret(ctx, c, ns, "documentdb-credentials") + }) + + It("expands PVCs from 1Gi to 2Gi without rotating the primary", func() { + dd, err := documentdb.Create(ctx, c, ns, name, documentdb.CreateOptions{ + Base: "documentdb", + Vars: baseVars("1Gi"), + }) + Expect(err).ToNot(HaveOccurred()) + DeferCleanup(func(ctx SpecContext) { + _ = documentdb.Delete(ctx, c, dd, 3*time.Minute) + }) + + key := types.NamespacedName{Namespace: ns, Name: name} + Eventually(assertions.AssertDocumentDBReady(ctx, c, key), + timeouts.For(timeouts.DocumentDBReady), + timeouts.PollInterval(timeouts.DocumentDBReady), + ).Should(Succeed()) + + // Patch the storage size. + fresh := getDD(ctx, ns, name) + Expect(documentdb.PatchSpec(ctx, c, fresh, func(s *previewv1.DocumentDBSpec) { + s.Resource.Storage.PvcSize = "2Gi" + })).To(Succeed()) + + // PVC capacity should eventually be updated across all + // backing claims. List PVCs in the namespace; in kind + // with a single-instance cluster there is one data PVC. + want := resource.MustParse("2Gi") + Eventually(func() error { + var pvcs corev1.PersistentVolumeClaimList + if err := c.List(ctx, &pvcs, client.InNamespace(ns)); err != nil { + return err + } + if len(pvcs.Items) == 0 { + return errPendingPVCs + } + for i := range pvcs.Items { + got := pvcs.Items[i].Status.Capacity[corev1.ResourceStorage] + if got.Cmp(want) < 0 { + return errNotExpanded + } + } + return nil + }, timeouts.For(timeouts.PVCResize), + timeouts.PollInterval(timeouts.PVCResize), + ).Should(Succeed()) + + // Cluster still healthy after the resize. + Eventually(assertions.AssertDocumentDBReady(ctx, c, key), + 1*time.Minute, 5*time.Second, + ).Should(Succeed()) + }) + }) diff --git a/test/e2e/tests/performance/perf_aggregation_test.go b/test/e2e/tests/performance/perf_aggregation_test.go new file mode 100644 index 00000000..b723dd8c --- /dev/null +++ b/test/e2e/tests/performance/perf_aggregation_test.go @@ -0,0 +1,87 @@ +package performance + +import ( + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "go.mongodb.org/mongo-driver/v2/bson" + + "github.com/documentdb/documentdb-operator/test/e2e" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/seed" +) + +// Aggregation threshold rationale +// +// A $group pipeline over a few thousand small documents on an indexed +// field is dominated by gateway + wire overhead, not planner work. On +// kind-on-laptop the pipeline completes in ~1–3s; the 45s budget is a +// generous upper bound that will only fail on a hard regression (e.g., +// unexpected collection-scan fallback or planner bug). +var _ = Describe("DocumentDB performance — aggregation pipeline", + Label(e2e.PerformanceLabel, e2e.SlowLabel), e2e.HighLevelLabel, + Ordered, Serial, func() { + + const ( + copies = 40 // seed.AggDataset * copies = 2,000 docs + aggBudget = 45 * time.Second + batchWrite = 500 + ) + + BeforeEach(func() { e2e.SkipUnlessLevel(e2e.High) }) + + It("runs a $group aggregation within the smoke threshold", func(ctx SpecContext) { + conn := connectSharedRO(ctx) + DeferCleanup(conn.Stop) + + coll := conn.Client.Database(conn.DB).Collection("agg") + + // Replicate the canonical AggDataset so we stay within a + // deterministic shape while reaching non-trivial size. + base := seed.AggDataset() + buf := make([]any, 0, batchWrite) + id := 1 + flush := func() { + if len(buf) == 0 { + return + } + _, err := coll.InsertMany(ctx, buf) + Expect(err).NotTo(HaveOccurred(), "seed agg") + buf = buf[:0] + } + for c := 0; c < copies; c++ { + for _, d := range base { + cp := bson.M{} + for k, v := range d { + cp[k] = v + } + cp["_id"] = id + id++ + buf = append(buf, cp) + if len(buf) >= batchWrite { + flush() + } + } + } + flush() + + pipeline := []bson.M{ + {"$group": bson.M{"_id": "$category", "total": bson.M{"$sum": "$value"}, "n": bson.M{"$sum": 1}}}, + {"$sort": bson.M{"_id": 1}}, + } + + start := time.Now() + cur, err := coll.Aggregate(ctx, pipeline) + Expect(err).NotTo(HaveOccurred(), "Aggregate") + var out []bson.M + Expect(cur.All(ctx, &out)).To(Succeed()) + elapsed := time.Since(start) + logLatency("aggregate-group", elapsed) + + Expect(out).To(HaveLen(seed.AggDatasetGroups), + "each AggDataset category should appear once") + Expect(elapsed).To(BeNumerically("<", aggBudget), + "$group pipeline should complete within %s", aggBudget) + }) + }) diff --git a/test/e2e/tests/performance/perf_count_range_test.go b/test/e2e/tests/performance/perf_count_range_test.go new file mode 100644 index 00000000..1369f20b --- /dev/null +++ b/test/e2e/tests/performance/perf_count_range_test.go @@ -0,0 +1,66 @@ +package performance + +import ( + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "go.mongodb.org/mongo-driver/v2/bson" + "go.mongodb.org/mongo-driver/v2/mongo" + "go.mongodb.org/mongo-driver/v2/mongo/options" + + "github.com/documentdb/documentdb-operator/test/e2e" +) + +// Count/range threshold rationale +// +// After seeding 5,000 documents and creating an index on `value`, a +// half-range query (value >= midpoint) should hit the index and return +// ~2,500 documents quickly — well under a second on a hot kind cluster. +// We allow 30s to absorb port-forward warmup + cold-cache index +// traversal on busy CI nodes. Any regression past 30s likely means the +// planner stopped using the index. +var _ = Describe("DocumentDB performance — count with range + index", + Label(e2e.PerformanceLabel, e2e.SlowLabel), e2e.HighLevelLabel, + Ordered, Serial, func() { + + const ( + docCount = 5_000 + countBudget = 30 * time.Second + ) + + BeforeEach(func() { e2e.SkipUnlessLevel(e2e.High) }) + + It("counts half the range using an index within the smoke threshold", func(ctx SpecContext) { + conn := connectSharedRO(ctx) + DeferCleanup(conn.Stop) + + coll := conn.Client.Database(conn.DB).Collection("range_count") + + docs := make([]any, docCount) + for i := 0; i < docCount; i++ { + docs[i] = bson.M{"_id": i + 1, "value": i + 1} + } + _, err := coll.InsertMany(ctx, docs) + Expect(err).NotTo(HaveOccurred(), "seed range_count") + + _, err = coll.Indexes().CreateOne(ctx, mongo.IndexModel{ + Keys: bson.D{{Key: "value", Value: 1}}, + Options: options.Index().SetName("idx_value"), + }) + Expect(err).NotTo(HaveOccurred(), "create value index") + + filter := bson.M{"value": bson.M{"$gte": docCount / 2}} + + start := time.Now() + n, err := coll.CountDocuments(ctx, filter) + elapsed := time.Since(start) + logLatency("count-range", elapsed) + + Expect(err).NotTo(HaveOccurred(), "CountDocuments range") + Expect(n).To(BeEquivalentTo(docCount/2 + 1)) + Expect(elapsed).To(BeNumerically("<", countBudget), + "indexed range count should complete within %s", countBudget) + }) + }) diff --git a/test/e2e/tests/performance/perf_delete_drop_test.go b/test/e2e/tests/performance/perf_delete_drop_test.go new file mode 100644 index 00000000..ab667bf5 --- /dev/null +++ b/test/e2e/tests/performance/perf_delete_drop_test.go @@ -0,0 +1,68 @@ +package performance + +import ( + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "go.mongodb.org/mongo-driver/v2/bson" + + "github.com/documentdb/documentdb-operator/test/e2e" +) + +// Delete+drop threshold rationale +// +// DeleteMany of half a collection followed by a Collection.Drop() is a +// cheap metadata-bounded path on the DocumentDB gateway. Kind-on-laptop +// baseline is ~1–3s for both combined; 60s is a generous 20x guard +// aimed at catching pathologic regressions such as tombstone fanout or +// table-rewrite fallback. +var _ = Describe("DocumentDB performance — bulk delete and drop", + Label(e2e.PerformanceLabel, e2e.SlowLabel), e2e.HighLevelLabel, + Ordered, Serial, func() { + + const ( + docCount = 5_000 + deleteBudget = 60 * time.Second + ) + + BeforeEach(func() { e2e.SkipUnlessLevel(e2e.High) }) + + It("bulk-deletes half the collection and drops it within the smoke threshold", func(ctx SpecContext) { + conn := connectSharedRO(ctx) + DeferCleanup(conn.Stop) + + coll := conn.Client.Database(conn.DB).Collection("delete_drop") + + docs := make([]any, docCount) + for i := 0; i < docCount; i++ { + // Even-ids are deletable, odd-ids are survivors. This + // exercises a real matching predicate rather than a + // fast-path {} delete. + docs[i] = bson.M{"_id": i + 1, "even": (i+1)%2 == 0} + } + _, err := coll.InsertMany(ctx, docs) + Expect(err).NotTo(HaveOccurred(), "seed delete_drop") + + start := time.Now() + delRes, err := coll.DeleteMany(ctx, bson.M{"even": true}) + Expect(err).NotTo(HaveOccurred(), "DeleteMany") + Expect(delRes.DeletedCount).To(BeEquivalentTo(docCount / 2)) + + // Drop the collection — the operation should complete + // quickly even on a large collection because it is a + // metadata-only truncate on the server. + Expect(coll.Drop(ctx)).To(Succeed(), "Drop collection") + elapsed := time.Since(start) + logLatency("delete-drop", elapsed) + + Expect(elapsed).To(BeNumerically("<", deleteBudget), + "delete + drop should complete within %s", deleteBudget) + + n, err := coll.CountDocuments(ctx, bson.M{}) + Expect(err).NotTo(HaveOccurred(), + "CountDocuments on a dropped collection should return 0, not error") + Expect(n).To(BeEquivalentTo(0)) + }) + }) diff --git a/test/e2e/tests/performance/perf_helpers_test.go b/test/e2e/tests/performance/perf_helpers_test.go new file mode 100644 index 00000000..34aca3b4 --- /dev/null +++ b/test/e2e/tests/performance/perf_helpers_test.go @@ -0,0 +1,68 @@ +package performance + +import ( + "context" + "fmt" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + driver "go.mongodb.org/mongo-driver/v2/mongo" + + "github.com/documentdb/documentdb-operator/test/e2e" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/fixtures" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/mongo" +) + +// perfConn bundles everything a perf spec needs to drive mongo traffic +// against the shared RO fixture: a connected client, an isolated DB +// name, and a cleanup hook that drops the DB and tears down the +// port-forward. +type perfConn struct { + Client *driver.Client + DB string + Stop func() +} + +// connectSharedRO provisions the SharedRO fixture (lazily on first +// call) and returns a connected mongo client scoped to a per-spec +// database name derived from CurrentSpecReport().FullText(). The +// returned Stop drops the spec's database and tears down the +// forward/client. +// +// The mechanics (port-forward, credential resolution, retry on +// forwarder bind) are delegated to mongo.NewFromDocumentDB so that all +// suites share a single connect path — we just wrap it to preserve the +// per-spec DB-drop cleanup contract the perf specs rely on. +func connectSharedRO(ctx context.Context) *perfConn { + GinkgoHelper() + env := e2e.SuiteEnv() + Expect(env).NotTo(BeNil(), "SuiteEnv must be initialized") + + handle, err := fixtures.GetOrCreateSharedRO(ctx, env.Client) + Expect(err).NotTo(HaveOccurred(), "provision SharedRO fixture") + Expect(handle).NotTo(BeNil()) + + h, err := mongo.NewFromDocumentDB(ctx, env, handle.Namespace(), handle.Name()) + Expect(err).NotTo(HaveOccurred(), "open mongo connection to SharedRO") + + db := fixtures.DBNameFor(CurrentSpecReport().FullText()) + c := h.Client() + + stop := func() { + dropCtx, dropCancel := context.WithTimeout(context.Background(), 30*time.Second) + defer dropCancel() + _ = mongo.DropDatabase(dropCtx, c, db) + closeCtx, closeCancel := context.WithTimeout(context.Background(), 15*time.Second) + defer closeCancel() + _ = h.Close(closeCtx) + } + return &perfConn{Client: c, DB: db, Stop: stop} +} + +// logLatency is a small convenience so every spec reports its measured +// duration in a uniform format that CI log scrapers can grep. +func logLatency(op string, elapsed time.Duration) { + fmt.Fprintf(GinkgoWriter, "perf[%s]: %s\n", op, elapsed) +} diff --git a/test/e2e/tests/performance/perf_insert_test.go b/test/e2e/tests/performance/perf_insert_test.go new file mode 100644 index 00000000..496ef898 --- /dev/null +++ b/test/e2e/tests/performance/perf_insert_test.go @@ -0,0 +1,76 @@ +package performance + +import ( + "context" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "go.mongodb.org/mongo-driver/v2/bson" + + "github.com/documentdb/documentdb-operator/test/e2e" +) + +// Insert threshold rationale +// +// Seeding 10,000 small documents via InsertMany is the bulk-write +// tripwire. On a kind-on-laptop baseline the operation typically +// completes in 10–20s; CI nodes add variance. The 2-minute bound is a +// generous ~8x multiplier intended to catch catastrophic regressions +// (e.g., accidental per-document round-trips, gateway CPU starvation) +// rather than to grade performance. +var _ = Describe("DocumentDB performance — bulk insert", + Label(e2e.PerformanceLabel, e2e.SlowLabel), e2e.HighLevelLabel, + Ordered, Serial, func() { + + const ( + docCount = 10_000 + insertBudget = 2 * time.Minute + perInsertBatch = 1_000 + ) + + BeforeEach(func() { e2e.SkipUnlessLevel(e2e.High) }) + + It("bulk-inserts 10k documents within the smoke threshold", func(ctx SpecContext) { + conn := connectSharedRO(ctx) + DeferCleanup(conn.Stop) + + coll := conn.Client.Database(conn.DB).Collection("bulk_insert") + + // Build the payload outside the timed region so we measure + // server-side insert latency rather than Go allocations. + batches := make([][]any, 0, docCount/perInsertBatch) + for b := 0; b < docCount/perInsertBatch; b++ { + docs := make([]any, perInsertBatch) + base := b * perInsertBatch + for i := 0; i < perInsertBatch; i++ { + n := base + i + 1 + docs[i] = bson.M{ + "_id": n, + "kind": "perf", + "value": n, + } + } + batches = append(batches, docs) + } + + opCtx, cancel := context.WithTimeout(ctx, insertBudget) + defer cancel() + + start := time.Now() + for _, batch := range batches { + _, err := coll.InsertMany(opCtx, batch) + Expect(err).NotTo(HaveOccurred(), "InsertMany") + } + elapsed := time.Since(start) + logLatency("insert-10k", elapsed) + + Expect(elapsed).To(BeNumerically("<", insertBudget), + "bulk insert of %d docs should complete within %s", docCount, insertBudget) + + n, err := coll.CountDocuments(ctx, bson.M{}) + Expect(err).NotTo(HaveOccurred()) + Expect(n).To(BeEquivalentTo(docCount)) + }) + }) diff --git a/test/e2e/tests/performance/perf_sort_test.go b/test/e2e/tests/performance/perf_sort_test.go new file mode 100644 index 00000000..8d0655c6 --- /dev/null +++ b/test/e2e/tests/performance/perf_sort_test.go @@ -0,0 +1,86 @@ +package performance + +import ( + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "go.mongodb.org/mongo-driver/v2/bson" + "go.mongodb.org/mongo-driver/v2/mongo" + "go.mongodb.org/mongo-driver/v2/mongo/options" + + "github.com/documentdb/documentdb-operator/test/e2e" +) + +// Sort threshold rationale +// +// With an ascending index on the sort field, a full collection sort +// over 5,000 docs is effectively a scan-of-index + cursor drain. On +// kind-on-laptop this completes in ~2s; the 60s cap absorbs cold index +// loads, port-forward warmup, and CI noise. A regression past the cap +// almost always means the sort fell back to in-memory post-processing. +var _ = Describe("DocumentDB performance — indexed sort", + Label(e2e.PerformanceLabel, e2e.SlowLabel), e2e.HighLevelLabel, + Ordered, Serial, func() { + + const ( + docCount = 5_000 + sortBudget = 60 * time.Second + ) + + BeforeEach(func() { e2e.SkipUnlessLevel(e2e.High) }) + + It("drains a sorted cursor using an index within the smoke threshold", func(ctx SpecContext) { + conn := connectSharedRO(ctx) + DeferCleanup(conn.Stop) + + coll := conn.Client.Database(conn.DB).Collection("sorted") + + // Seed in reverse order so a naive collection-scan sort + // would be slower than an index-assisted one — makes the + // index actually useful for the assertion. + docs := make([]any, docCount) + for i := 0; i < docCount; i++ { + docs[i] = bson.M{"_id": i + 1, "score": docCount - i} + } + _, err := coll.InsertMany(ctx, docs) + Expect(err).NotTo(HaveOccurred(), "seed sorted") + + _, err = coll.Indexes().CreateOne(ctx, mongo.IndexModel{ + Keys: bson.D{{Key: "score", Value: 1}}, + Options: options.Index().SetName("idx_score"), + }) + Expect(err).NotTo(HaveOccurred(), "create score index") + + findOpts := options.Find().SetSort(bson.D{{Key: "score", Value: 1}}) + + start := time.Now() + cur, err := coll.Find(ctx, bson.M{}, findOpts) + Expect(err).NotTo(HaveOccurred(), "Find with sort") + var last int32 + first := true + count := 0 + for cur.Next(ctx) { + var d struct { + Score int32 `bson:"score"` + } + Expect(cur.Decode(&d)).To(Succeed()) + if !first { + Expect(d.Score).To(BeNumerically(">=", last), + "sort output must be non-decreasing") + } + last = d.Score + first = false + count++ + } + Expect(cur.Err()).NotTo(HaveOccurred()) + Expect(cur.Close(ctx)).To(Succeed()) + elapsed := time.Since(start) + logLatency("sort-index", elapsed) + + Expect(count).To(Equal(docCount)) + Expect(elapsed).To(BeNumerically("<", sortBudget), + "indexed sort should complete within %s", sortBudget) + }) + }) diff --git a/test/e2e/tests/performance/perf_update_test.go b/test/e2e/tests/performance/perf_update_test.go new file mode 100644 index 00000000..03beecc2 --- /dev/null +++ b/test/e2e/tests/performance/perf_update_test.go @@ -0,0 +1,63 @@ +package performance + +import ( + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "go.mongodb.org/mongo-driver/v2/bson" + + "github.com/documentdb/documentdb-operator/test/e2e" +) + +// Update threshold rationale +// +// A single UpdateMany over 5,000 docs that sets a new field on every +// document is one round-trip per call, so the wall-clock cost is +// dominated by server-side write amplification + WAL. Kind-on-laptop +// baseline is ~2–5s; 90s is a generous tripwire that catches pathologic +// regressions (e.g., accidentally rewriting $set as per-doc upserts). +var _ = Describe("DocumentDB performance — bulk update", + Label(e2e.PerformanceLabel, e2e.SlowLabel), e2e.HighLevelLabel, + Ordered, Serial, func() { + + const ( + docCount = 5_000 + updateBudget = 90 * time.Second + ) + + BeforeEach(func() { e2e.SkipUnlessLevel(e2e.High) }) + + It("bulk-updates every document within the smoke threshold", func(ctx SpecContext) { + conn := connectSharedRO(ctx) + DeferCleanup(conn.Stop) + + coll := conn.Client.Database(conn.DB).Collection("bulk_update") + + docs := make([]any, docCount) + for i := 0; i < docCount; i++ { + docs[i] = bson.M{"_id": i + 1, "touched": false, "value": i} + } + _, err := coll.InsertMany(ctx, docs) + Expect(err).NotTo(HaveOccurred(), "seed bulk_update") + + start := time.Now() + res, err := coll.UpdateMany(ctx, + bson.M{"touched": false}, + bson.M{"$set": bson.M{"touched": true, "stamp": "perf"}}, + ) + elapsed := time.Since(start) + logLatency("update-5k", elapsed) + + Expect(err).NotTo(HaveOccurred(), "UpdateMany") + Expect(res.MatchedCount).To(BeEquivalentTo(docCount)) + Expect(res.ModifiedCount).To(BeEquivalentTo(docCount)) + Expect(elapsed).To(BeNumerically("<", updateBudget), + "bulk update should complete within %s", updateBudget) + + remaining, err := coll.CountDocuments(ctx, bson.M{"touched": false}) + Expect(err).NotTo(HaveOccurred()) + Expect(remaining).To(BeEquivalentTo(0)) + }) + }) diff --git a/test/e2e/tests/performance/performance_suite_test.go b/test/e2e/tests/performance/performance_suite_test.go new file mode 100644 index 00000000..c05318da --- /dev/null +++ b/test/e2e/tests/performance/performance_suite_test.go @@ -0,0 +1,56 @@ +// Package performance hosts the DocumentDB E2E performance area. See +// docs/designs/e2e-test-suite.md for the spec catalog. This file is +// the Ginkgo root for the area binary and shares bootstrap with the +// other area binaries via the exported helpers in package e2e. +package performance + +import ( + "context" + "fmt" + "testing" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/documentdb/documentdb-operator/test/e2e" +) + +const operatorReadyTimeout = 2 * time.Minute + +func TestPerformance(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "DocumentDB E2E - Performance", Label(e2e.PerformanceLabel)) +} + +var _ = SynchronizedBeforeSuite( + func(ctx SpecContext) []byte { + if err := e2e.SetupSuite(ctx, operatorReadyTimeout); err != nil { + Fail(fmt.Sprintf("performance bootstrap: %v", err)) + } + return []byte{} + }, + func(_ SpecContext, _ []byte) { + if err := e2e.SetupSuite(context.Background(), operatorReadyTimeout); err != nil { + Fail(fmt.Sprintf("performance worker bootstrap: %v", err)) + } + }, +) + +var _ = SynchronizedAfterSuite( + func(ctx SpecContext) { + if err := e2e.TeardownSuite(ctx); err != nil { + fmt.Fprintf(GinkgoWriter, "performance teardown: %v\n", err) + } + }, + func(_ SpecContext) {}, +) + +// BeforeEach in this area aborts the spec if the operator pod has +// drifted since SetupSuite (UID/name/restart-count change). Area +// tests/upgrade/ intentionally omits this hook because operator +// restarts are part of its scenario. +var _ = BeforeEach(func() { +Expect(e2e.CheckOperatorUnchanged()).To(Succeed(), +"operator health check failed — a previous spec or reconciler likely restarted the operator") +}) diff --git a/test/e2e/tests/scale/scale_down_test.go b/test/e2e/tests/scale/scale_down_test.go new file mode 100644 index 00000000..e2b0ffa4 --- /dev/null +++ b/test/e2e/tests/scale/scale_down_test.go @@ -0,0 +1,117 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package scale + +import ( + "context" + "fmt" + + cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" + cnpgclusterutils "github.com/cloudnative-pg/cloudnative-pg/tests/utils/clusterutils" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/documentdb/documentdb-operator/test/e2e" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/assertions" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/documentdb" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/fixtures" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/timeouts" +) + +var _ = Describe("DocumentDB scale — down", + Ordered, + Label(e2e.ScaleLabel, e2e.BasicLabel), + e2e.MediumLevelLabel, + func() { + var ( + handle *fixtures.SharedScaleHandle + c client.Client + ctx context.Context + key client.ObjectKey + ) + + BeforeAll(func() { + env := e2e.SuiteEnv() + Expect(env).NotTo(BeNil(), "SuiteEnv not initialized") + ctx = env.Ctx + if ctx == nil { + ctx = context.Background() + } + c = env.Client + + h, err := fixtures.GetOrCreateSharedScale(ctx, c) + Expect(err).NotTo(HaveOccurred(), "get-or-create shared-scale fixture") + handle = h + key = client.ObjectKey{Namespace: handle.Namespace(), Name: handle.Name()} + }) + + AfterEach(func() { + Expect(handle.ResetToTwoInstances(ctx, c)).To(Succeed(), + "reset shared-scale fixture to 2 instances") + }) + + It("scales 3 → 2 instances", func() { + e2e.SkipUnlessLevel(e2e.Medium) + + // Grow to 3 first so we can assert a genuine 3→2 scale-down. + Expect(documentdb.PatchInstances(ctx, c, key.Namespace, key.Name, 3)).To(Succeed()) + Eventually(assertions.AssertInstanceCount(ctx, c, key, 3), + timeouts.For(timeouts.InstanceScale), + timeouts.PollInterval(timeouts.InstanceScale)). + Should(Succeed(), "CNPG Cluster should converge to readyInstances=3 before scale-down") + Eventually(assertions.AssertDocumentDBReady(ctx, c, key), + timeouts.For(timeouts.InstanceScale), + timeouts.PollInterval(timeouts.InstanceScale)). + Should(Succeed()) + + Expect(documentdb.PatchInstances(ctx, c, key.Namespace, key.Name, 2)).To(Succeed()) + + Eventually(assertions.AssertInstanceCount(ctx, c, key, 2), + timeouts.For(timeouts.InstanceScale), + timeouts.PollInterval(timeouts.InstanceScale)). + Should(Succeed(), "CNPG Cluster should converge to readyInstances=2") + Eventually(assertions.AssertDocumentDBReady(ctx, c, key), + timeouts.For(timeouts.InstanceScale), + timeouts.PollInterval(timeouts.InstanceScale)). + Should(Succeed(), "DocumentDB should be Ready at 2 instances") + }) + + It("scales 2 → 1 instance and stays healthy after primary re-election", func() { + e2e.SkipUnlessLevel(e2e.Medium) + + primary, err := cnpgclusterutils.GetPrimary(ctx, c, key.Namespace, key.Name) + Expect(err).NotTo(HaveOccurred(), "fetch initial primary") + Expect(primary).NotTo(BeNil()) + GinkgoLogr.Info("initial primary before 2→1 scale-down", "pod", primary.Name) + + Expect(documentdb.PatchInstances(ctx, c, key.Namespace, key.Name, 1)).To(Succeed()) + + Eventually(assertions.AssertInstanceCount(ctx, c, key, 1), + timeouts.For(timeouts.InstanceScale), + timeouts.PollInterval(timeouts.InstanceScale)). + Should(Succeed(), "CNPG Cluster should converge to readyInstances=1") + + Eventually(assertions.AssertDocumentDBReady(ctx, c, key), + timeouts.For(timeouts.InstanceScale), + timeouts.PollInterval(timeouts.InstanceScale)). + Should(Succeed(), "DocumentDB should be Ready after scaling to 1 instance") + + // After scale-down, a primary must still exist — but its + // identity may legitimately have changed via re-election, + // so we do not assert pod-name equality here. + Eventually(func() error { + cl := &cnpgv1.Cluster{} + if err := c.Get(ctx, key, cl); err != nil { + return fmt.Errorf("get CNPG cluster: %w", err) + } + if cl.Status.CurrentPrimary == "" { + return fmt.Errorf("CNPG cluster %s has no currentPrimary", key) + } + return nil + }, timeouts.For(timeouts.InstanceScale), + timeouts.PollInterval(timeouts.InstanceScale)). + Should(Succeed(), "CNPG Cluster should report a currentPrimary after re-election") + }) + }) diff --git a/test/e2e/tests/scale/scale_suite_test.go b/test/e2e/tests/scale/scale_suite_test.go new file mode 100644 index 00000000..d396f698 --- /dev/null +++ b/test/e2e/tests/scale/scale_suite_test.go @@ -0,0 +1,56 @@ +// Package scale hosts the DocumentDB E2E scale area. See +// docs/designs/e2e-test-suite.md for the spec catalog. This file is +// the Ginkgo root for the area binary and shares bootstrap with the +// other area binaries via the exported helpers in package e2e. +package scale + +import ( + "context" + "fmt" + "testing" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/documentdb/documentdb-operator/test/e2e" +) + +const operatorReadyTimeout = 2 * time.Minute + +func TestScale(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "DocumentDB E2E - Scale", Label(e2e.ScaleLabel)) +} + +var _ = SynchronizedBeforeSuite( + func(ctx SpecContext) []byte { + if err := e2e.SetupSuite(ctx, operatorReadyTimeout); err != nil { + Fail(fmt.Sprintf("scale bootstrap: %v", err)) + } + return []byte{} + }, + func(_ SpecContext, _ []byte) { + if err := e2e.SetupSuite(context.Background(), operatorReadyTimeout); err != nil { + Fail(fmt.Sprintf("scale worker bootstrap: %v", err)) + } + }, +) + +var _ = SynchronizedAfterSuite( + func(ctx SpecContext) { + if err := e2e.TeardownSuite(ctx); err != nil { + fmt.Fprintf(GinkgoWriter, "scale teardown: %v\n", err) + } + }, + func(_ SpecContext) {}, +) + +// BeforeEach in this area aborts the spec if the operator pod has +// drifted since SetupSuite (UID/name/restart-count change). Area +// tests/upgrade/ intentionally omits this hook because operator +// restarts are part of its scenario. +var _ = BeforeEach(func() { +Expect(e2e.CheckOperatorUnchanged()).To(Succeed(), +"operator health check failed — a previous spec or reconciler likely restarted the operator") +}) diff --git a/test/e2e/tests/scale/scale_up_test.go b/test/e2e/tests/scale/scale_up_test.go new file mode 100644 index 00000000..3c311375 --- /dev/null +++ b/test/e2e/tests/scale/scale_up_test.go @@ -0,0 +1,109 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package scale + +import ( + "context" + + cnpgclusterutils "github.com/cloudnative-pg/cloudnative-pg/tests/utils/clusterutils" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/documentdb/documentdb-operator/test/e2e" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/assertions" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/documentdb" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/fixtures" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/timeouts" +) + +var _ = Describe("DocumentDB scale — up", + Ordered, + Label(e2e.ScaleLabel, e2e.BasicLabel), + e2e.MediumLevelLabel, + func() { + var ( + handle *fixtures.SharedScaleHandle + c client.Client + ctx context.Context + key client.ObjectKey + ) + + BeforeAll(func() { + env := e2e.SuiteEnv() + Expect(env).NotTo(BeNil(), "SuiteEnv not initialized") + ctx = env.Ctx + if ctx == nil { + ctx = context.Background() + } + c = env.Client + + h, err := fixtures.GetOrCreateSharedScale(ctx, c) + Expect(err).NotTo(HaveOccurred(), "get-or-create shared-scale fixture") + handle = h + key = client.ObjectKey{Namespace: handle.Namespace(), Name: handle.Name()} + }) + + AfterEach(func() { + Expect(handle.ResetToTwoInstances(ctx, c)).To(Succeed(), + "reset shared-scale fixture to 2 instances") + }) + + It("scales 2 → 3 instances while keeping the primary pod stable", func() { + e2e.SkipUnlessLevel(e2e.Medium) + + primary, err := cnpgclusterutils.GetPrimary(ctx, c, key.Namespace, key.Name) + Expect(err).NotTo(HaveOccurred(), "fetch initial primary") + Expect(primary).NotTo(BeNil()) + initialPrimary := primary.Name + + Expect(documentdb.PatchInstances(ctx, c, key.Namespace, key.Name, 3)).To(Succeed()) + + Eventually(assertions.AssertInstanceCount(ctx, c, key, 3), + timeouts.For(timeouts.InstanceScale), + timeouts.PollInterval(timeouts.InstanceScale)). + Should(Succeed(), "CNPG Cluster should report readyInstances=3") + + Eventually(assertions.AssertDocumentDBReady(ctx, c, key), + timeouts.For(timeouts.InstanceScale), + timeouts.PollInterval(timeouts.InstanceScale)). + Should(Succeed(), "DocumentDB status should be Ready") + + Expect(assertions.AssertPrimaryUnchanged(ctx, c, key, initialPrimary)()). + To(Succeed(), "scaling up must not change the primary") + }) + + It("scales 1 → 2 instances after first scaling down to 1", func() { + e2e.SkipUnlessLevel(e2e.Medium) + + Expect(documentdb.PatchInstances(ctx, c, key.Namespace, key.Name, 1)).To(Succeed()) + Eventually(assertions.AssertInstanceCount(ctx, c, key, 1), + timeouts.For(timeouts.InstanceScale), + timeouts.PollInterval(timeouts.InstanceScale)). + Should(Succeed(), "CNPG Cluster should converge to readyInstances=1") + Eventually(assertions.AssertDocumentDBReady(ctx, c, key), + timeouts.For(timeouts.InstanceScale), + timeouts.PollInterval(timeouts.InstanceScale)). + Should(Succeed(), "DocumentDB should be Ready at 1 instance") + + primary, err := cnpgclusterutils.GetPrimary(ctx, c, key.Namespace, key.Name) + Expect(err).NotTo(HaveOccurred(), "fetch primary before scale-up") + Expect(primary).NotTo(BeNil()) + initialPrimary := primary.Name + + Expect(documentdb.PatchInstances(ctx, c, key.Namespace, key.Name, 2)).To(Succeed()) + + Eventually(assertions.AssertInstanceCount(ctx, c, key, 2), + timeouts.For(timeouts.InstanceScale), + timeouts.PollInterval(timeouts.InstanceScale)). + Should(Succeed(), "CNPG Cluster should converge to readyInstances=2") + Eventually(assertions.AssertDocumentDBReady(ctx, c, key), + timeouts.For(timeouts.InstanceScale), + timeouts.PollInterval(timeouts.InstanceScale)). + Should(Succeed(), "DocumentDB should be Ready at 2 instances") + + Expect(assertions.AssertPrimaryUnchanged(ctx, c, key, initialPrimary)()). + To(Succeed(), "scaling up 1→2 must not change the primary") + }) + }) diff --git a/test/e2e/tests/status/connection_string_test.go b/test/e2e/tests/status/connection_string_test.go new file mode 100644 index 00000000..2358aad3 --- /dev/null +++ b/test/e2e/tests/status/connection_string_test.go @@ -0,0 +1,68 @@ +package status + +import ( + "context" + "net/url" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/documentdb/documentdb-operator/test/e2e" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/assertions" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/fixtures" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/timeouts" +) + +// DocumentDB status — ConnectionString. +// +// The operator publishes a mongo:// URI in status.connectionString once +// the gateway Service and credential secret are ready. We: +// 1. assert the string matches the expected "^mongodb://" shape +// (scheme + auth + host segment); +// 2. parse it with net/url and sanity-check the scheme and host. +// +// This spec runs against the session-scoped shared RO fixture so it +// adds negligible time to the suite. +var _ = Describe("DocumentDB status — connectionString", + Label(e2e.StatusLabel), e2e.MediumLevelLabel, + func() { + BeforeEach(func() { e2e.SkipUnlessLevel(e2e.Medium) }) + + It("publishes a valid mongodb:// URI", func() { + env := e2e.SuiteEnv() + Expect(env).ToNot(BeNil()) + c := env.Client + + ctx, cancel := context.WithTimeout(context.Background(), 8*time.Minute) + DeferCleanup(cancel) + + handle, err := fixtures.GetOrCreateSharedRO(ctx, c) + Expect(err).ToNot(HaveOccurred()) + + key := client.ObjectKey{Namespace: handle.Namespace(), Name: handle.Name()} + + // 1. Regex assertion via the shared helper. + Eventually( + assertions.AssertConnectionStringMatches(ctx, c, key, `^mongodb://`), + timeouts.For(timeouts.DocumentDBReady), + timeouts.PollInterval(timeouts.DocumentDBReady), + ).Should(Succeed()) + + // 2. Parse + structural sanity. + dd, err := handle.GetCR(ctx, c) + Expect(err).ToNot(HaveOccurred()) + Expect(dd.Status.ConnectionString).ToNot(BeEmpty(), + "status.connectionString must be populated on a Ready DocumentDB") + + u, err := url.Parse(dd.Status.ConnectionString) + Expect(err).ToNot(HaveOccurred(), + "status.connectionString must parse as a URL: %q", dd.Status.ConnectionString) + Expect(u.Scheme).To(Equal("mongodb"), + "connection string scheme must be mongodb; got %q", u.Scheme) + Expect(u.Host).ToNot(BeEmpty(), + "connection string must carry a host component") + }) + }) diff --git a/test/e2e/tests/status/mount_options_test.go b/test/e2e/tests/status/mount_options_test.go new file mode 100644 index 00000000..0e8fef50 --- /dev/null +++ b/test/e2e/tests/status/mount_options_test.go @@ -0,0 +1,95 @@ +package status + +import ( + "context" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + corev1 "k8s.io/api/core/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/documentdb/documentdb-operator/test/e2e" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/fixtures" +) + +// DocumentDB mount options — CRD discrepancy note. +// +// The task brief asked the spec to inspect `status.mountOptions` but no +// such field exists on `DocumentDBStatus`. Mount configuration for a +// DocumentDB cluster lives on the backing CloudNative-PG Postgres pods +// — concretely, CNPG mounts the PGDATA volume at +// `/var/lib/postgresql/data` (see CNPG's pkg/specs/volumes.go). +// +// We therefore verify the observable contract by listing the pods CNPG +// owns (label `cnpg.io/cluster=`) and asserting that at least one +// container mounts `/var/lib/postgresql/data`. +const pgdataMountPath = "/var/lib/postgresql/data" + +var _ = Describe("DocumentDB mount options — PGDATA volume mount", + Label(e2e.StatusLabel), e2e.MediumLevelLabel, + func() { + BeforeEach(func() { e2e.SkipUnlessLevel(e2e.Medium) }) + + It("mounts the PGDATA volume at /var/lib/postgresql/data", func() { + env := e2e.SuiteEnv() + Expect(env).ToNot(BeNil()) + c := env.Client + + ctx, cancel := context.WithTimeout(context.Background(), 8*time.Minute) + DeferCleanup(cancel) + + handle, err := fixtures.GetOrCreateSharedRO(ctx, c) + Expect(err).ToNot(HaveOccurred()) + + Eventually(func() error { + pods := &corev1.PodList{} + if err := c.List(ctx, pods, + client.InNamespace(handle.Namespace()), + client.MatchingLabels{"cnpg.io/cluster": handle.Name()}, + ); err != nil { + return err + } + if len(pods.Items) == 0 { + return &noCNPGPodsErr{ + namespace: handle.Namespace(), name: handle.Name(), + } + } + for i := range pods.Items { + if hasPGDATAMount(&pods.Items[i]) { + return nil + } + } + return &noPGDATAMountErr{namespace: handle.Namespace(), name: handle.Name()} + }, 3*time.Minute, 5*time.Second).Should(Succeed()) + }) + }) + +func hasPGDATAMount(pod *corev1.Pod) bool { + for i := range pod.Spec.Containers { + for _, vm := range pod.Spec.Containers[i].VolumeMounts { + if vm.MountPath == pgdataMountPath { + return true + } + } + } + return false +} + +type noCNPGPodsErr struct { + namespace, name string +} + +func (e *noCNPGPodsErr) Error() string { + return "no CNPG pods labelled cnpg.io/cluster=" + e.name + " in " + e.namespace +} + +type noPGDATAMountErr struct { + namespace, name string +} + +func (e *noPGDATAMountErr) Error() string { + return "no CNPG pod in " + e.namespace + "/" + e.name + + " mounts " + pgdataMountPath +} diff --git a/test/e2e/tests/status/pv_name_test.go b/test/e2e/tests/status/pv_name_test.go new file mode 100644 index 00000000..7a7e75c4 --- /dev/null +++ b/test/e2e/tests/status/pv_name_test.go @@ -0,0 +1,86 @@ +package status + +import ( + "context" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + corev1 "k8s.io/api/core/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/documentdb/documentdb-operator/test/e2e" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/fixtures" +) + +// DocumentDB persistent volume — CRD discrepancy note. +// +// The task brief asked the spec to inspect `status.persistentVolumeClaim` +// but the `DocumentDBStatus` type in operator/src/api/preview/documentdb_types.go +// does not expose such a field. The authoritative ownership of a +// DocumentDB's data volumes sits on the backing CloudNative-PG Cluster, +// which labels each PVC with `cnpg.io/cluster=`. +// +// We therefore verify the operator's observable contract by listing +// PersistentVolumeClaims in the DocumentDB's namespace filtered by that +// CNPG label and asserting: +// - at least one PVC exists (one per Postgres instance); +// - every returned PVC has reached phase Bound. +// +// If `status.persistentVolumeClaim` is added to the CRD in the future, +// this spec should grow an additional assertion that correlates the +// status field with the live PVC list. +var _ = Describe("DocumentDB persistent volume — CNPG PVC discovery", + Label(e2e.StatusLabel), e2e.MediumLevelLabel, + func() { + BeforeEach(func() { e2e.SkipUnlessLevel(e2e.Medium) }) + + It("provisions Bound PVCs labelled with cnpg.io/cluster", func() { + env := e2e.SuiteEnv() + Expect(env).ToNot(BeNil()) + c := env.Client + + ctx, cancel := context.WithTimeout(context.Background(), 8*time.Minute) + DeferCleanup(cancel) + + handle, err := fixtures.GetOrCreateSharedRO(ctx, c) + Expect(err).ToNot(HaveOccurred()) + + Eventually(func() error { + pvcList := &corev1.PersistentVolumeClaimList{} + if err := c.List(ctx, pvcList, + client.InNamespace(handle.Namespace()), + client.MatchingLabels{"cnpg.io/cluster": handle.Name()}, + ); err != nil { + return err + } + if len(pvcList.Items) == 0 { + return &noPVCErr{namespace: handle.Namespace(), name: handle.Name()} + } + for i := range pvcList.Items { + p := &pvcList.Items[i] + if p.Status.Phase != corev1.ClaimBound { + return &pvcNotBoundErr{name: p.Name, phase: string(p.Status.Phase)} + } + } + return nil + }, 3*time.Minute, 5*time.Second).Should(Succeed()) + }) + }) + +type noPVCErr struct { + namespace, name string +} + +func (e *noPVCErr) Error() string { + return "no PVCs labelled cnpg.io/cluster=" + e.name + " in " + e.namespace +} + +type pvcNotBoundErr struct { + name, phase string +} + +func (e *pvcNotBoundErr) Error() string { + return "PVC " + e.name + " is not Bound (phase=" + e.phase + ")" +} diff --git a/test/e2e/tests/status/status_suite_test.go b/test/e2e/tests/status/status_suite_test.go new file mode 100644 index 00000000..ea3cfa7f --- /dev/null +++ b/test/e2e/tests/status/status_suite_test.go @@ -0,0 +1,56 @@ +// Package status hosts the DocumentDB E2E status area. See +// docs/designs/e2e-test-suite.md for the spec catalog. This file is +// the Ginkgo root for the area binary and shares bootstrap with the +// other area binaries via the exported helpers in package e2e. +package status + +import ( + "context" + "fmt" + "testing" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/documentdb/documentdb-operator/test/e2e" +) + +const operatorReadyTimeout = 2 * time.Minute + +func TestStatus(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "DocumentDB E2E - Status", Label(e2e.StatusLabel)) +} + +var _ = SynchronizedBeforeSuite( + func(ctx SpecContext) []byte { + if err := e2e.SetupSuite(ctx, operatorReadyTimeout); err != nil { + Fail(fmt.Sprintf("status bootstrap: %v", err)) + } + return []byte{} + }, + func(_ SpecContext, _ []byte) { + if err := e2e.SetupSuite(context.Background(), operatorReadyTimeout); err != nil { + Fail(fmt.Sprintf("status worker bootstrap: %v", err)) + } + }, +) + +var _ = SynchronizedAfterSuite( + func(ctx SpecContext) { + if err := e2e.TeardownSuite(ctx); err != nil { + fmt.Fprintf(GinkgoWriter, "status teardown: %v\n", err) + } + }, + func(_ SpecContext) {}, +) + +// BeforeEach in this area aborts the spec if the operator pod has +// drifted since SetupSuite (UID/name/restart-count change). Area +// tests/upgrade/ intentionally omits this hook because operator +// restarts are part of its scenario. +var _ = BeforeEach(func() { +Expect(e2e.CheckOperatorUnchanged()).To(Succeed(), +"operator health check failed — a previous spec or reconciler likely restarted the operator") +}) diff --git a/test/e2e/tests/tls/helpers_test.go b/test/e2e/tests/tls/helpers_test.go new file mode 100644 index 00000000..bb947234 --- /dev/null +++ b/test/e2e/tests/tls/helpers_test.go @@ -0,0 +1,174 @@ +package tls + +import ( + "context" + "fmt" + "net" + "os" + "path/filepath" + "runtime" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + + previewv1 "github.com/documentdb/documentdb-operator/api/preview" + + "github.com/documentdb/documentdb-operator/test/e2e" + ddbutil "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/documentdb" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/fixtures" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/namespaces" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/portforward" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/timeouts" +) + +// Shared per-spec setup for the TLS area. Each TLS spec uses the same +// base DocumentDB template plus a single mixin describing the TLS +// mode under test. +// +// tlsCredentialSecret is intentionally distinct from +// fixtures.DefaultCredentialSecretName so specs can exercise a custom +// secret name on the CR spec path; the credentials themselves reuse +// fixtures.DefaultCredentialUsername / DefaultCredentialPassword so a +// future rotation stays a one-file edit. +const ( + tlsCredentialSecret = "tls-e2e-credentials" + tlsCredentialUser = fixtures.DefaultCredentialUsername + tlsCredentialPassword = fixtures.DefaultCredentialPassword //nolint:gosec // fixture-only + tlsDocumentDBName = "tls-e2e" + tlsDefaultStorageSize = "1Gi" + tlsDefaultStorageCls = "standard" + tlsDefaultDDBImage = "" + tlsDefaultGatewayImage = "" +) + +// clusterSetup holds the artefacts returned by provisionCluster. +type clusterSetup struct { + NamespaceName string + DD *previewv1.DocumentDB +} + +// provisionCluster builds a TLS-configured DocumentDB from the base +// template + supplied mixin, waits for it to become healthy, and +// registers DeferCleanup hooks to tear it down. extraVars are merged +// on top of the baseline variable map so specs can inject +// mode-specific values (e.g., TLS_SECRET_NAME for Provided mode). +func provisionCluster( + ctx context.Context, + c client.Client, + area, mixin string, + extraVars map[string]string, +) *clusterSetup { + GinkgoHelper() + + nsName := namespaces.NamespaceForSpec(area) + ns := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: nsName}} + Expect(createIdempotent(ctx, c, ns)).To(Succeed(), "create namespace %s", nsName) + + sec := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: tlsCredentialSecret, Namespace: nsName}, + Type: corev1.SecretTypeOpaque, + StringData: map[string]string{ + "username": fixtures.DefaultCredentialUsername, + "password": fixtures.DefaultCredentialPassword, + }, + } + Expect(createIdempotent(ctx, c, sec)).To(Succeed(), "create credential secret") + + vars := map[string]string{ + "STORAGE_SIZE": envDefault("E2E_STORAGE_SIZE", tlsDefaultStorageSize), + "STORAGE_CLASS": envDefault("E2E_STORAGE_CLASS", tlsDefaultStorageCls), + "DOCUMENTDB_IMAGE": envDefault("DOCUMENTDB_IMAGE", tlsDefaultDDBImage), + "GATEWAY_IMAGE": envDefault("GATEWAY_IMAGE", tlsDefaultGatewayImage), + "CREDENTIAL_SECRET": tlsCredentialSecret, + "INSTANCES": "1", + "EXPOSURE_TYPE": "ClusterIP", + "LOG_LEVEL": "info", + } + for k, v := range extraVars { + vars[k] = v + } + + dd, err := ddbutil.Create(ctx, c, nsName, tlsDocumentDBName, ddbutil.CreateOptions{ + Base: "documentdb", + Mixins: []string{mixin}, + Vars: vars, + ManifestsRoot: manifestsRoot(), + }) + Expect(err).NotTo(HaveOccurred(), "render/create documentdb with mixin %q", mixin) + + DeferCleanup(func(ctx SpecContext) { + // Best-effort namespace deletion — this also garbage-collects + // the DocumentDB CR and any child objects via ownerRefs. + _ = c.Delete(ctx, &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: nsName}}) + }) + + key := types.NamespacedName{Namespace: nsName, Name: tlsDocumentDBName} + Expect(ddbutil.WaitHealthy(ctx, c, key, timeouts.For(timeouts.DocumentDBReady))). + To(Succeed(), "documentdb did not become healthy within %s", timeouts.For(timeouts.DocumentDBReady)) + + return &clusterSetup{NamespaceName: nsName, DD: dd} +} + +// openGatewayForward opens a port-forward to the gateway Service of +// dd on a locally-reserved port and returns (host, port, stop). The +// caller defers stop; the host is always "127.0.0.1". +func openGatewayForward(ctx context.Context, dd *previewv1.DocumentDB) (string, string, func()) { + GinkgoHelper() + port := pickFreeLocalPort() + stop, err := portforward.Open(ctx, e2e.SuiteEnv(), dd, port) + Expect(err).NotTo(HaveOccurred(), "open port-forward to gateway service") + // Give the forwarder a beat to bind the local listener before + // the first connect attempt on slow CI nodes. + time.Sleep(250 * time.Millisecond) + return "127.0.0.1", fmt.Sprintf("%d", port), stop +} + +// pickFreeLocalPort binds :0 to discover an unused TCP port, closes +// the listener, and returns the port. A narrow race exists between +// close and the forwarder's bind; it matches how controller-runtime +// envtest picks its local API server port and is benign on CI hosts +// without adversarial workloads. +func pickFreeLocalPort() int { + GinkgoHelper() + l, err := net.Listen("tcp", "127.0.0.1:0") + Expect(err).NotTo(HaveOccurred(), "reserve a free local TCP port") + addr := l.Addr().(*net.TCPAddr).Port + _ = l.Close() + return addr +} + +// createIdempotent wraps c.Create so tests that re-enter on retry +// don't trip over AlreadyExists. +func createIdempotent(ctx context.Context, c client.Client, obj client.Object) error { + if err := c.Create(ctx, obj); err != nil && !apierrors.IsAlreadyExists(err) { + return err + } + return nil +} + +// envDefault returns os.Getenv(k) when set, otherwise def. +func envDefault(k, def string) string { + if v := os.Getenv(k); v != "" { + return v + } + return def +} + +// manifestsRoot returns the absolute path of the shared manifests +// directory. Uses runtime.Caller so go test invocations from any +// working directory still find the templates. +func manifestsRoot() string { + _, thisFile, _, ok := runtime.Caller(0) + if !ok { + return filepath.Join(".", "..", "..", "manifests") + } + // test/e2e/tests/tls/ -> test/e2e/manifests + return filepath.Join(filepath.Dir(thisFile), "..", "..", "manifests") +} diff --git a/test/e2e/tests/tls/tls_certmanager_test.go b/test/e2e/tests/tls/tls_certmanager_test.go new file mode 100644 index 00000000..ce7a34b6 --- /dev/null +++ b/test/e2e/tests/tls/tls_certmanager_test.go @@ -0,0 +1,182 @@ +package tls + +import ( + "context" + "crypto/x509" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + apimeta "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" + + "github.com/documentdb/documentdb-operator/test/e2e" + ddbutil "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/documentdb" + mongohelper "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/mongo" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/namespaces" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/timeouts" +) + +// CertManager mode delegates certificate issuance to cert-manager via +// an IssuerRef on the DocumentDB CR. This spec creates a minimal +// self-signed Issuer in the test namespace, points the CR at it, and +// verifies the gateway serves a TLS connection that validates against +// the CA material cert-manager stored in the issued Secret. This +// matters because InsecureSkipVerify would mask missing CA wiring; the +// real invariant the operator promises is "the secret named in +// status.tls.secretName contains a chain that the gateway serves". +// +// The spec is skipped automatically when cert-manager is not installed +// on the target cluster, detected by the absence of the Issuer CRD. +var _ = Describe("DocumentDB TLS — cert-manager", + Label(e2e.TLSLabel, e2e.NeedsCertManagerLabel), e2e.MediumLevelLabel, + func() { + BeforeEach(func() { e2e.SkipUnlessLevel(e2e.Medium) }) + + It("provisions certificates through a cert-manager Issuer", func(sctx SpecContext) { + ctx, cancel := context.WithTimeout(sctx, 10*time.Minute) + defer cancel() + + env := e2e.SuiteEnv() + Expect(env).NotTo(BeNil(), "suite env not initialised") + + skipIfCertManagerMissing(ctx) + + // Pre-create the namespace and a self-signed Issuer in it + // so the gateway reconcile can resolve the IssuerRef on + // its first pass. provisionCluster treats the namespace + // as idempotent and reuses it. + nsName := namespaces.NamespaceForSpec(e2e.TLSLabel) + Expect(createIdempotent(ctx, env.Client, + &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: nsName}})). + To(Succeed(), "create namespace %s", nsName) + + issuerName := "tls-e2e-selfsigned" + issuer := &unstructured.Unstructured{} + issuer.SetGroupVersionKind(schema.GroupVersionKind{ + Group: "cert-manager.io", Version: "v1", Kind: "Issuer", + }) + issuer.SetName(issuerName) + issuer.SetNamespace(nsName) + // spec.selfSigned is an empty object per cert-manager schema. + Expect(unstructured.SetNestedMap(issuer.Object, map[string]any{}, + "spec", "selfSigned")).To(Succeed(), "set spec.selfSigned") + Expect(createIdempotent(ctx, env.Client, issuer)). + To(Succeed(), "create selfSigned Issuer") + + cluster := provisionCluster(ctx, env.Client, e2e.TLSLabel, + "tls_certmanager", map[string]string{ + "ISSUER_NAME": issuerName, + "ISSUER_KIND": "Issuer", + }) + Expect(cluster.NamespaceName).To(Equal(nsName), + "provisionCluster must reuse the pre-created namespace") + + key := types.NamespacedName{Namespace: cluster.NamespaceName, Name: cluster.DD.Name} + var tlsSecretName string + Eventually(func(g Gomega) bool { + dd, err := ddbutil.Get(ctx, env.Client, key) + g.Expect(err).NotTo(HaveOccurred()) + if dd.Status.TLS == nil { + return false + } + tlsSecretName = dd.Status.TLS.SecretName + return dd.Status.TLS.Ready + }, timeouts.For(timeouts.DocumentDBReady), timeouts.PollInterval(timeouts.DocumentDBReady)). + Should(BeTrue(), "status.tls.ready did not flip true with cert-manager issuer") + Expect(tlsSecretName).NotTo(BeEmpty(), + "status.tls.secretName must be populated once ready") + + // Read the cert-manager-issued secret and extract the CA + // (ca.crt for self-signed issuer; fall back to tls.crt + // when the issuer didn't populate ca.crt because the + // self-signed issuer doubles as its own CA). + caPEM := readCAFromSecret(ctx, cluster.NamespaceName, tlsSecretName) + + host, port, stop := openGatewayForward(ctx, cluster.DD) + defer stop() + + connectCtx, cancelConnect := context.WithTimeout(ctx, timeouts.For(timeouts.MongoConnect)) + defer cancelConnect() + + pool := x509.NewCertPool() + Expect(pool.AppendCertsFromPEM(caPEM)). + To(BeTrue(), "parse CA PEM from cert-manager secret") + + // The gateway certificate is issued for the Service DNS + // name; override SNI to match one of its SANs so + // hostname-verification through the 127.0.0.1 forward + // succeeds. Keep the primary Service FQDN matching + // mixins/tls_certmanager issue. + sni := "documentdb-service-" + tlsDocumentDBName + "." + cluster.NamespaceName + ".svc" + + client, err := mongohelper.NewClient(connectCtx, mongohelper.ClientOptions{ + Host: host, + Port: port, + User: tlsCredentialUser, + Password: tlsCredentialPassword, + TLS: true, + RootCAs: pool, + ServerName: sni, + }) + Expect(err).NotTo(HaveOccurred(), "TLS connect via cert-manager issuer") + defer func() { _ = client.Disconnect(ctx) }() + + Eventually(func() error { + return mongohelper.Ping(connectCtx, client) + }, timeouts.For(timeouts.MongoConnect), timeouts.PollInterval(timeouts.MongoConnect)). + Should(Succeed(), "ping via cert-manager-issued cert should succeed with CA verification") + }) + }, +) + +// readCAFromSecret fetches the issued TLS secret and returns the CA +// bundle bytes. Cert-manager's self-signed Issuer populates ca.crt; +// some issuer types leave it empty and rely on tls.crt being a +// self-contained self-signed leaf, so we fall back to tls.crt when +// ca.crt is missing or empty. +func readCAFromSecret(ctx context.Context, ns, name string) []byte { + GinkgoHelper() + env := e2e.SuiteEnv() + sec := &corev1.Secret{} + Expect(env.Client.Get(ctx, types.NamespacedName{Namespace: ns, Name: name}, sec)). + To(Succeed(), "get issued TLS secret %s/%s", ns, name) + if ca := sec.Data[corev1.ServiceAccountRootCAKey]; len(ca) > 0 { + return ca + } + if crt := sec.Data[corev1.TLSCertKey]; len(crt) > 0 { + return crt + } + Fail("issued TLS secret " + ns + "/" + name + " contains neither ca.crt nor tls.crt") + return nil +} + +// skipIfCertManagerMissing probes for the cert-manager Issuer CRD via +// a no-op List on the v1 kind and calls Skip when the resource is not +// registered. Using a discovery-driven List avoids pulling in the +// apiextensions client for a single check. +func skipIfCertManagerMissing(ctx context.Context) { + GinkgoHelper() + env := e2e.SuiteEnv() + list := &unstructured.UnstructuredList{} + list.SetGroupVersionKind(schema.GroupVersionKind{ + Group: "cert-manager.io", Version: "v1", Kind: "IssuerList", + }) + err := env.Client.List(ctx, list) + if err == nil { + return + } + // apimeta.IsNoMatchError matches the REST-mapper error when the + // CRD is not registered; NotFound covers servers that return 404 + // on the discovery round-trip. + if apimeta.IsNoMatchError(err) || apierrors.IsNotFound(err) { + Skip("cert-manager is not installed on the target cluster") + } + Expect(err).NotTo(HaveOccurred(), "unexpected error probing for cert-manager") +} diff --git a/test/e2e/tests/tls/tls_disabled_test.go b/test/e2e/tests/tls/tls_disabled_test.go new file mode 100644 index 00000000..5b5a370b --- /dev/null +++ b/test/e2e/tests/tls/tls_disabled_test.go @@ -0,0 +1,57 @@ +package tls + +import ( + "context" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/documentdb/documentdb-operator/test/e2e" + mongohelper "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/mongo" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/timeouts" +) + +// TLS-disabled mode corresponds to spec.tls.gateway.mode=Disabled. +// The gateway still listens but accepts plain-text mongo wire +// protocol. This spec verifies the happy-path: a freshly-created +// DocumentDB with TLS disabled accepts an unencrypted connection +// from the mongo driver. +var _ = Describe("DocumentDB TLS — disabled", + Label(e2e.TLSLabel), e2e.MediumLevelLabel, + func() { + BeforeEach(func() { e2e.SkipUnlessLevel(e2e.Medium) }) + + It("accepts plaintext mongo connections", func(sctx SpecContext) { + ctx, cancel := context.WithTimeout(sctx, 10*time.Minute) + defer cancel() + + env := e2e.SuiteEnv() + Expect(env).NotTo(BeNil(), "suite env not initialised") + + cluster := provisionCluster(ctx, env.Client, e2e.TLSLabel, + "tls_disabled", nil) + + host, port, stop := openGatewayForward(ctx, cluster.DD) + defer stop() + + connectCtx, cancelConnect := context.WithTimeout(ctx, timeouts.For(timeouts.MongoConnect)) + defer cancelConnect() + + client, err := mongohelper.NewClient(connectCtx, mongohelper.ClientOptions{ + Host: host, + Port: port, + User: tlsCredentialUser, + Password: tlsCredentialPassword, + TLS: false, + }) + Expect(err).NotTo(HaveOccurred(), "connect to gateway without TLS") + defer func() { _ = client.Disconnect(ctx) }() + + Eventually(func() error { + return mongohelper.Ping(connectCtx, client) + }, timeouts.For(timeouts.MongoConnect), timeouts.PollInterval(timeouts.MongoConnect)). + Should(Succeed(), "plaintext ping should succeed when TLS is disabled") + }) + }, +) diff --git a/test/e2e/tests/tls/tls_provided_test.go b/test/e2e/tests/tls/tls_provided_test.go new file mode 100644 index 00000000..b1958dca --- /dev/null +++ b/test/e2e/tests/tls/tls_provided_test.go @@ -0,0 +1,113 @@ +package tls + +import ( + "context" + "crypto/x509" + "net" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/documentdb/documentdb-operator/test/e2e" + mongohelper "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/mongo" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/namespaces" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/timeouts" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/tlscerts" +) + +// Provided mode points the gateway at a user-supplied kubernetes.io/tls +// Secret that contains the full certificate chain and private key. +// This spec mints a throwaway CA + server cert with the tlscerts +// helper, materialises it as a Secret with the canonical data keys +// (tls.crt, tls.key, ca.crt), wires the DocumentDB CR at it, and +// verifies a TLS mongo connection succeeds while validating the server +// certificate against the locally generated CA. +// +// Because the client connects through a port-forward (SNI = 127.0.0.1), +// we explicitly override ServerName to "localhost" — one of the SANs +// baked into the issued server cert — so hostname verification passes. +// The invariants covered: (a) operator accepts the Provided Secret +// reference, (b) the gateway serves exactly the cert we handed it, and +// (c) the cert's chain validates against the CA bytes we planted. +var _ = Describe("DocumentDB TLS — provided", + Label(e2e.TLSLabel), e2e.MediumLevelLabel, + func() { + BeforeEach(func() { e2e.SkipUnlessLevel(e2e.Medium) }) + + It("uses a user-provided TLS secret", func(sctx SpecContext) { + ctx, cancel := context.WithTimeout(sctx, 10*time.Minute) + defer cancel() + + env := e2e.SuiteEnv() + Expect(env).NotTo(BeNil(), "suite env not initialised") + + nsName := namespaces.NamespaceForSpec(e2e.TLSLabel) + Expect(createIdempotent(ctx, env.Client, + &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: nsName}})). + To(Succeed(), "create namespace %s", nsName) + + bundle, err := tlscerts.Generate(tlscerts.GenerateOptions{ + CommonName: "documentdb-e2e", + DNSNames: []string{ + "localhost", + "documentdb-service-" + tlsDocumentDBName, + "documentdb-service-" + tlsDocumentDBName + "." + nsName + ".svc", + "documentdb-service-" + tlsDocumentDBName + "." + nsName + ".svc.cluster.local", + }, + IPAddresses: []net.IP{net.ParseIP("127.0.0.1")}, + Validity: 1 * time.Hour, + }) + Expect(err).NotTo(HaveOccurred(), "generate TLS bundle") + + secretName := "tls-e2e-provided-cert" + tlsSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: secretName, Namespace: nsName}, + Type: corev1.SecretTypeTLS, + Data: map[string][]byte{ + corev1.TLSCertKey: bundle.ServerCertPEM, + corev1.TLSPrivateKeyKey: bundle.ServerKeyPEM, + corev1.ServiceAccountRootCAKey: bundle.CACertPEM, // "ca.crt" + }, + } + Expect(createIdempotent(ctx, env.Client, tlsSecret)). + To(Succeed(), "create provided TLS secret") + + cluster := provisionCluster(ctx, env.Client, e2e.TLSLabel, + "tls_provided", map[string]string{ + "TLS_SECRET_NAME": secretName, + }) + Expect(cluster.NamespaceName).To(Equal(nsName)) + + host, port, stop := openGatewayForward(ctx, cluster.DD) + defer stop() + + connectCtx, cancelConnect := context.WithTimeout(ctx, timeouts.For(timeouts.MongoConnect)) + defer cancelConnect() + + pool := x509.NewCertPool() + Expect(pool.AppendCertsFromPEM(bundle.CACertPEM)). + To(BeTrue(), "parse self-minted CA PEM") + + client, err := mongohelper.NewClient(connectCtx, mongohelper.ClientOptions{ + Host: host, + Port: port, + User: tlsCredentialUser, + Password: tlsCredentialPassword, + TLS: true, + RootCAs: pool, + ServerName: "localhost", // matches a SAN in the issued server cert + }) + Expect(err).NotTo(HaveOccurred(), "TLS connect with provided cert") + defer func() { _ = client.Disconnect(ctx) }() + + Eventually(func() error { + return mongohelper.Ping(connectCtx, client) + }, timeouts.For(timeouts.MongoConnect), timeouts.PollInterval(timeouts.MongoConnect)). + Should(Succeed(), "ping via provided cert should succeed under CA verification") + }) + }, +) diff --git a/test/e2e/tests/tls/tls_selfsigned_test.go b/test/e2e/tests/tls/tls_selfsigned_test.go new file mode 100644 index 00000000..638e6870 --- /dev/null +++ b/test/e2e/tests/tls/tls_selfsigned_test.go @@ -0,0 +1,89 @@ +package tls + +import ( + "context" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "k8s.io/apimachinery/pkg/types" + + "github.com/documentdb/documentdb-operator/test/e2e" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/assertions" + ddbutil "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/documentdb" + mongohelper "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/mongo" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/timeouts" +) + +// Self-signed mode corresponds to spec.tls.gateway.mode=SelfSigned. +// The operator mints an in-cluster CA and server certificate and +// projects them into a per-DocumentDB Secret. Clients outside the +// cluster can't practically obtain that CA, so the spec connects +// with InsecureSkipVerify=true — the goal here is to prove that +// enabling TLS doesn't break the happy path, not to validate the +// chain. +var _ = Describe("DocumentDB TLS — self-signed", + Label(e2e.TLSLabel), e2e.MediumLevelLabel, + func() { + BeforeEach(func() { e2e.SkipUnlessLevel(e2e.Medium) }) + + It("deploys with self-signed certs and accepts TLS connections", func(sctx SpecContext) { + ctx, cancel := context.WithTimeout(sctx, 10*time.Minute) + defer cancel() + + env := e2e.SuiteEnv() + Expect(env).NotTo(BeNil(), "suite env not initialised") + + cluster := provisionCluster(ctx, env.Client, e2e.TLSLabel, + "tls_selfsigned", nil) + + // Wait for the operator-published TLS status to name a + // secret and advertise Ready. The secret name is chosen + // by the operator; we don't assert a specific value — we + // only fetch whatever the status reports. + key := types.NamespacedName{Namespace: cluster.NamespaceName, Name: cluster.DD.Name} + Eventually(func(g Gomega) string { + dd, err := ddbutil.Get(ctx, env.Client, key) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(dd.Status.TLS).NotTo(BeNil(), "status.tls not populated yet") + g.Expect(dd.Status.TLS.Ready).To(BeTrue(), "status.tls.ready false") + return dd.Status.TLS.SecretName + }, timeouts.For(timeouts.DocumentDBReady), timeouts.PollInterval(timeouts.DocumentDBReady)). + ShouldNot(BeEmpty(), "operator did not publish TLS secret name") + + // Assert the projected secret looks like a TLS secret. + Eventually(func() error { + dd, err := ddbutil.Get(ctx, env.Client, key) + if err != nil { + return err + } + return assertions.AssertTLSSecretReady(ctx, env.Client, + cluster.NamespaceName, dd.Status.TLS.SecretName)() + }, timeouts.For(timeouts.DocumentDBReady), timeouts.PollInterval(timeouts.DocumentDBReady)). + Should(Succeed()) + + host, port, stop := openGatewayForward(ctx, cluster.DD) + defer stop() + + connectCtx, cancelConnect := context.WithTimeout(ctx, timeouts.For(timeouts.MongoConnect)) + defer cancelConnect() + + client, err := mongohelper.NewClient(connectCtx, mongohelper.ClientOptions{ + Host: host, + Port: port, + User: tlsCredentialUser, + Password: tlsCredentialPassword, + TLS: true, + TLSInsecure: true, + }) + Expect(err).NotTo(HaveOccurred(), "connect with insecure TLS") + defer func() { _ = client.Disconnect(ctx) }() + + Eventually(func() error { + return mongohelper.Ping(connectCtx, client) + }, timeouts.For(timeouts.MongoConnect), timeouts.PollInterval(timeouts.MongoConnect)). + Should(Succeed(), "TLS ping with insecure verify should succeed") + }) + }, +) diff --git a/test/e2e/tests/tls/tls_suite_test.go b/test/e2e/tests/tls/tls_suite_test.go new file mode 100644 index 00000000..d05e13aa --- /dev/null +++ b/test/e2e/tests/tls/tls_suite_test.go @@ -0,0 +1,56 @@ +// Package tls hosts the DocumentDB E2E tls area. See +// docs/designs/e2e-test-suite.md for the spec catalog. This file is +// the Ginkgo root for the area binary and shares bootstrap with the +// other area binaries via the exported helpers in package e2e. +package tls + +import ( + "context" + "fmt" + "testing" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/documentdb/documentdb-operator/test/e2e" +) + +const operatorReadyTimeout = 2 * time.Minute + +func TestTLS(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "DocumentDB E2E - TLS", Label(e2e.TLSLabel)) +} + +var _ = SynchronizedBeforeSuite( + func(ctx SpecContext) []byte { + if err := e2e.SetupSuite(ctx, operatorReadyTimeout); err != nil { + Fail(fmt.Sprintf("tls bootstrap: %v", err)) + } + return []byte{} + }, + func(_ SpecContext, _ []byte) { + if err := e2e.SetupSuite(context.Background(), operatorReadyTimeout); err != nil { + Fail(fmt.Sprintf("tls worker bootstrap: %v", err)) + } + }, +) + +var _ = SynchronizedAfterSuite( + func(ctx SpecContext) { + if err := e2e.TeardownSuite(ctx); err != nil { + fmt.Fprintf(GinkgoWriter, "tls teardown: %v\n", err) + } + }, + func(_ SpecContext) {}, +) + +// BeforeEach in this area aborts the spec if the operator pod has +// drifted since SetupSuite (UID/name/restart-count change). Area +// tests/upgrade/ intentionally omits this hook because operator +// restarts are part of its scenario. +var _ = BeforeEach(func() { +Expect(e2e.CheckOperatorUnchanged()).To(Succeed(), +"operator health check failed — a previous spec or reconciler likely restarted the operator") +}) diff --git a/test/e2e/tests/upgrade/helpers_test.go b/test/e2e/tests/upgrade/helpers_test.go new file mode 100644 index 00000000..1cda61ca --- /dev/null +++ b/test/e2e/tests/upgrade/helpers_test.go @@ -0,0 +1,163 @@ +package upgrade + +import ( + "context" + "os" + "os/exec" + "path/filepath" + "runtime" + "time" + + . "github.com/onsi/ginkgo/v2" //nolint:revive + + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// Environment variables that gate and parameterize the upgrade area. +const ( + envEnable = "E2E_UPGRADE" + envPreviousChart = "E2E_UPGRADE_PREVIOUS_CHART" + envPreviousVersion = "E2E_UPGRADE_PREVIOUS_VERSION" + envCurrentChart = "E2E_UPGRADE_CURRENT_CHART" + envCurrentVersion = "E2E_UPGRADE_CURRENT_VERSION" + envReleaseName = "E2E_UPGRADE_RELEASE" + envOperatorNamespace = "E2E_UPGRADE_OPERATOR_NS" + + envOldDocumentDBImage = "E2E_UPGRADE_OLD_DOCUMENTDB_IMAGE" + envNewDocumentDBImage = "E2E_UPGRADE_NEW_DOCUMENTDB_IMAGE" + + // Optional gateway image overrides for the image-upgrade spec. + // When unset the spec patches only spec.documentDBImage and leaves + // spec.gatewayImage as-is (operator uses its default gateway). The + // gateway image has an independent release cadence from the + // extension image; setting these to the same value as the + // documentdb env vars is INCORRECT under the layered-image + // architecture (CNPG pg18 + extension image-library + gateway + // sidecar). + envOldGatewayImage = "E2E_UPGRADE_OLD_GATEWAY_IMAGE" + envNewGatewayImage = "E2E_UPGRADE_NEW_GATEWAY_IMAGE" +) + +// Defaults applied when the env vars above are not set. The chart +// references intentionally fail-closed — specs skip themselves instead +// of installing a hard-coded "latest" chart from the internet. +const ( + defaultReleaseName = "documentdb-operator" + defaultOperatorNamespace = "documentdb-operator" + + controlPlaneUpgradeTimeout = 15 * time.Minute + imageRolloutTimeout = 15 * time.Minute +) + +// skipUnlessUpgradeEnabled skips the current spec when the upgrade +// area is not explicitly enabled. Called from BeforeEach in every +// spec below so Ginkgo reports a clear "skipped" message. +func skipUnlessUpgradeEnabled() { + if os.Getenv(envEnable) != "1" { + Skip("upgrade specs require E2E_UPGRADE=1") + } + if _, err := exec.LookPath("helm"); err != nil { + Skip("upgrade specs require the `helm` CLI on PATH: " + err.Error()) + } +} + +// requireEnv returns the value of name, or Skip()s the spec when the +// variable is unset. Used for chart path / image references that must +// be supplied by the CI job — specs fail-closed rather than guess. +func requireEnv(name, reason string) string { + v := os.Getenv(name) + if v == "" { + Skip("upgrade spec skipped: " + name + " is required (" + reason + ")") + } + return v +} + +// envOr returns the value of name, or fallback when unset. +func envOr(name, fallback string) string { + if v := os.Getenv(name); v != "" { + return v + } + return fallback +} + +// credentialSecretName is the default secret populated by createCredentialSecret +// and consumed by mongo.NewFromDocumentDB / the DocumentDB CR. +const credentialSecretName = "documentdb-credentials" + +// baseVars returns the envsubst variable map for the base DocumentDB +// template. It mirrors the backup-area helper so upgrade specs share +// the same manifests/base/documentdb.yaml.template layout. The +// DOCUMENTDB_IMAGE / GATEWAY_IMAGE fields default to empty (operator +// picks layered defaults), and can be overridden via env vars — +// image-upgrade specs further override them per-call via extraVars. +func baseVars(name, ns, size string) map[string]string { + // Empty defaults → operator composes CNPG pg18 + extension + gateway. + // Do NOT fall back GATEWAY_IMAGE to DOCUMENTDB_IMAGE: the gateway is + // an independent sidecar image, not a monolithic build. + ddImage := os.Getenv("DOCUMENTDB_IMAGE") + gwImage := os.Getenv("GATEWAY_IMAGE") + sc := "standard" + if v := os.Getenv("E2E_STORAGE_CLASS"); v != "" { + sc = v + } + if size == "" { + size = "2Gi" + } + return map[string]string{ + "NAME": name, + "NAMESPACE": ns, + "INSTANCES": "1", + "STORAGE_SIZE": size, + "STORAGE_CLASS": sc, + "DOCUMENTDB_IMAGE": ddImage, + "GATEWAY_IMAGE": gwImage, + "CREDENTIAL_SECRET": credentialSecretName, + "EXPOSURE_TYPE": "ClusterIP", + "LOG_LEVEL": "info", + } +} + +// manifestsRoot returns the absolute path to test/e2e/manifests, used +// as ManifestsRoot for documentdb.Create so rendering is robust to +// the current working directory. +func manifestsRoot() string { + _, thisFile, _, ok := runtime.Caller(0) + if !ok { + Fail("runtime.Caller failed — cannot locate test/e2e/manifests") + } + // this file: test/e2e/tests/upgrade/helpers_test.go + // manifests: test/e2e/manifests/ + return filepath.Join(filepath.Dir(thisFile), "..", "..", "manifests") +} + +// createNamespace creates ns (if missing) and registers DeferCleanup +// to delete it at spec teardown. +func createNamespace(ctx context.Context, c client.Client, ns string) { + obj := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: ns}} + err := c.Create(ctx, obj) + if err != nil && !apierrors.IsAlreadyExists(err) { + Fail("create namespace " + ns + ": " + err.Error()) + } + DeferCleanup(func(ctx SpecContext) { + _ = c.Delete(ctx, &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: ns}}) + }) +} + +// createCredentialSecret seeds the DocumentDB credential secret in ns. +func createCredentialSecret(ctx context.Context, c client.Client, ns string) { + sec := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: credentialSecretName, Namespace: ns}, + Type: corev1.SecretTypeOpaque, + StringData: map[string]string{ + "username": "e2e_admin", + "password": "E2eAdmin100", + }, + } + err := c.Create(ctx, sec) + if err != nil && !apierrors.IsAlreadyExists(err) { + Fail("create credential secret " + ns + "/" + credentialSecretName + ": " + err.Error()) + } +} diff --git a/test/e2e/tests/upgrade/rollback_test.go b/test/e2e/tests/upgrade/rollback_test.go new file mode 100644 index 00000000..a1bac61b --- /dev/null +++ b/test/e2e/tests/upgrade/rollback_test.go @@ -0,0 +1,42 @@ +package upgrade + +import ( + . "github.com/onsi/ginkgo/v2" + + "github.com/documentdb/documentdb-operator/test/e2e" +) + +// DocumentDB upgrade — rollback: skeleton for the operator-rollback +// scenario. The upgrade flow is one-directional today — there is no +// formally supported `helm rollback` story for the DocumentDB operator +// or its CRDs (CRD removal/downgrade is the hard part). The spec +// below is Pending and always skipped with a clear reason so the +// area's intent is documented but the test does not flap against an +// unimplemented feature. +// +// When rollback support lands: +// 1. Drop the Skip() below. +// 2. Replace the placeholders with: install current, seed, helm +// rollback to previous, verify CR still reads/writes. +// 3. Confirm the previous chart's CRD schema is backward-compatible +// with the data written by the current operator, or document the +// rollback boundary. +var _ = Describe("DocumentDB upgrade — rollback", + Label(e2e.UpgradeLabel, e2e.DisruptiveLabel, e2e.SlowLabel), + e2e.HighLevelLabel, + Serial, Ordered, Pending, func() { + BeforeEach(func() { + // Defense in depth: even if Pending is removed by mistake, + // keep the spec dormant until rollback is supported. + Skip("rollback support pending") + }) + + It("rolls the operator back to the previous chart without losing data", func() { + // Placeholder intent: + // 1. Install current PR chart. + // 2. Create DocumentDB + seed data. + // 3. `helm rollback` to previously-released chart version. + // 4. Assert operator becomes Ready on the old version. + // 5. Assert DocumentDB CR is still accepted and data is intact. + }) + }) diff --git a/test/e2e/tests/upgrade/upgrade_control_plane_test.go b/test/e2e/tests/upgrade/upgrade_control_plane_test.go new file mode 100644 index 00000000..0b61e151 --- /dev/null +++ b/test/e2e/tests/upgrade/upgrade_control_plane_test.go @@ -0,0 +1,137 @@ +package upgrade + +import ( + "context" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "go.mongodb.org/mongo-driver/v2/bson" + "k8s.io/apimachinery/pkg/types" + + "github.com/documentdb/documentdb-operator/test/e2e" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/assertions" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/documentdb" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/helmop" + e2emongo "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/mongo" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/namespaces" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/seed" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/timeouts" +) + +// DocumentDB upgrade — control plane: uninstalls the operator, installs +// a previously-released chart, deploys a DocumentDB, seeds data, then +// upgrades the chart to the PR's built chart and verifies the operator +// is healthy and the seeded data survived the bounce. +// +// Residual risk: the "previous-released chart" is NOT pinned in code. +// It must be supplied by the caller via E2E_UPGRADE_PREVIOUS_CHART and +// E2E_UPGRADE_PREVIOUS_VERSION (e.g. the chart published on +// GitHub Releases). Hard-coding "latest" here would break every time a +// new release is cut, so the spec fail-closed skips when unset. +var _ = Describe("DocumentDB upgrade — control plane", + Label(e2e.UpgradeLabel, e2e.DisruptiveLabel, e2e.SlowLabel), + e2e.HighLevelLabel, + Serial, Ordered, func() { + const ( + ddName = "upgrade-cp" + dbName = "upgrade_cp" + collName = "seed" + ) + var ( + releaseName string + operatorNs string + previousChart string + previousVer string + currentChart string + currentVer string + operatorCtx context.Context + operatorCancel context.CancelFunc + ) + + BeforeAll(func() { + skipUnlessUpgradeEnabled() + releaseName = envOr(envReleaseName, defaultReleaseName) + operatorNs = envOr(envOperatorNamespace, defaultOperatorNamespace) + previousChart = requireEnv(envPreviousChart, + "chart ref to the previous released operator chart (e.g. documentdb/documentdb-operator or a local tgz)") + previousVer = requireEnv(envPreviousVersion, + "semver of the previous released chart; see GitHub Releases or the published Helm index") + currentChart = requireEnv(envCurrentChart, + "chart ref to the PR's built chart (path to the unpacked chart dir or packaged tgz)") + currentVer = envOr(envCurrentVersion, "") + }) + + BeforeEach(func() { + e2e.SkipUnlessLevel(e2e.High) + operatorCtx, operatorCancel = context.WithTimeout(context.Background(), controlPlaneUpgradeTimeout) + DeferCleanup(func() { operatorCancel() }) + }) + + It("upgrades operator from previous released chart to current and retains data", func() { + env := e2e.SuiteEnv() + Expect(env).NotTo(BeNil(), "SuiteEnv must be initialized by SetupSuite") + c := env.Client + + By("uninstalling any pre-existing operator release (idempotent)") + Expect(helmop.Uninstall(operatorCtx, releaseName, operatorNs)).To(Succeed()) + + By("installing the previous released operator chart") + Expect(helmop.Install(operatorCtx, releaseName, operatorNs, previousChart, previousVer, nil)). + To(Succeed(), "install previous chart %s@%s", previousChart, previousVer) + Expect(helmop.WaitOperatorReady(operatorCtx, env, operatorNs, 3*time.Minute)).To(Succeed()) + + By("creating a DocumentDB on the previous operator") + ns := namespaces.NamespaceForSpec(e2e.UpgradeLabel) + createNamespace(operatorCtx, c, ns) + createCredentialSecret(operatorCtx, c, ns) + + dd, err := documentdb.Create(operatorCtx, c, ns, ddName, documentdb.CreateOptions{ + Base: "documentdb", + Vars: baseVars(ddName, ns, "2Gi"), + ManifestsRoot: manifestsRoot(), + }) + Expect(err).NotTo(HaveOccurred(), "create DocumentDB %s/%s", ns, ddName) + DeferCleanup(func(ctx SpecContext) { + _ = documentdb.Delete(ctx, c, dd, 3*time.Minute) + }) + + key := types.NamespacedName{Namespace: ns, Name: ddName} + Eventually(assertions.AssertDocumentDBReady(operatorCtx, c, key), + timeouts.For(timeouts.DocumentDBReady), + timeouts.PollInterval(timeouts.DocumentDBReady), + ).Should(Succeed(), "DocumentDB did not reach Ready under previous operator") + + By("seeding data on the previous operator") + docs := seed.SmallDataset() + handle, err := e2emongo.NewFromDocumentDB(operatorCtx, env, ns, ddName) + Expect(err).NotTo(HaveOccurred(), "connect to DocumentDB gateway") + inserted, err := e2emongo.Seed(operatorCtx, handle.Client(), dbName, collName, docs) + Expect(err).NotTo(HaveOccurred(), "seed %s.%s", dbName, collName) + Expect(inserted).To(Equal(seed.SmallDatasetSize)) + // Explicit close before the helm upgrade: the port-forward + // goroutine must not outlive the operator bounce. + Expect(handle.Close(operatorCtx)).To(Succeed()) + + By("upgrading the chart to the PR's built version") + Expect(helmop.Upgrade(operatorCtx, releaseName, operatorNs, currentChart, currentVer, nil)). + To(Succeed(), "upgrade to current chart %s@%s", currentChart, currentVer) + Expect(helmop.WaitOperatorReady(operatorCtx, env, operatorNs, 5*time.Minute)).To(Succeed()) + + By("verifying the DocumentDB CR is still reconciled by the new operator") + Eventually(assertions.AssertDocumentDBReady(operatorCtx, c, key), + timeouts.For(timeouts.DocumentDBUpgrade), + timeouts.PollInterval(timeouts.DocumentDBUpgrade), + ).Should(Succeed(), "DocumentDB did not reach Ready after operator upgrade") + + By("verifying seeded data survived the operator bounce") + handle2, err := e2emongo.NewFromDocumentDB(operatorCtx, env, ns, ddName) + Expect(err).NotTo(HaveOccurred(), "reconnect to DocumentDB gateway") + DeferCleanup(func(ctx SpecContext) { _ = handle2.Close(ctx) }) + n, err := e2emongo.Count(operatorCtx, handle2.Client(), dbName, collName, bson.M{}) + Expect(err).NotTo(HaveOccurred(), "count %s.%s", dbName, collName) + Expect(n).To(Equal(int64(seed.SmallDatasetSize)), + "seeded document count changed across operator upgrade") + }) + }) diff --git a/test/e2e/tests/upgrade/upgrade_images_test.go b/test/e2e/tests/upgrade/upgrade_images_test.go new file mode 100644 index 00000000..c71a58b8 --- /dev/null +++ b/test/e2e/tests/upgrade/upgrade_images_test.go @@ -0,0 +1,214 @@ +package upgrade + +import ( + "context" + "fmt" + "os" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "go.mongodb.org/mongo-driver/v2/bson" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + + previewv1 "github.com/documentdb/documentdb-operator/api/preview" + "github.com/documentdb/documentdb-operator/test/e2e" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/assertions" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/documentdb" + e2emongo "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/mongo" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/namespaces" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/seed" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/timeouts" +) + +// DocumentDB upgrade — images: with the operator already running at +// the current version, patches the DocumentDB spec.documentDBImage +// (and spec.gatewayImage) from an old image tag to a new one and +// verifies the rollout completes + the seeded dataset is retained. +// Unlike upgrade_control_plane_test.go this does not touch the Helm +// release; it only exercises the CR-driven data-plane image upgrade +// path. +// +// Residual risk: the spec needs two image references (old/new). They +// come from E2E_UPGRADE_OLD_DOCUMENTDB_IMAGE / +// E2E_UPGRADE_NEW_DOCUMENTDB_IMAGE — there is no pinned default +// because the set of valid old→new pairs depends on the release being +// validated. +var _ = Describe("DocumentDB upgrade — images", + Label(e2e.UpgradeLabel, e2e.DisruptiveLabel, e2e.SlowLabel), + e2e.HighLevelLabel, + Serial, Ordered, func() { + const ( + ddName = "upgrade-img" + dbName = "upgrade_img" + collName = "seed" + ) + var ( + oldImage string + newImage string + oldGwImage string + newGwImage string + ctx context.Context + cancel context.CancelFunc + ) + + BeforeAll(func() { + skipUnlessUpgradeEnabled() + oldImage = requireEnv(envOldDocumentDBImage, + "DocumentDB image tag to start from (e.g. ghcr.io/microsoft/documentdb/documentdb:0.108.0)") + newImage = requireEnv(envNewDocumentDBImage, + "DocumentDB image tag to upgrade to (must be different from the old tag)") + if oldImage == newImage { + Skip("E2E_UPGRADE_OLD_DOCUMENTDB_IMAGE and E2E_UPGRADE_NEW_DOCUMENTDB_IMAGE are identical; nothing to upgrade") + } + // The gateway is an independent sidecar image; specs may + // exercise a gateway upgrade alongside the extension + // upgrade, or leave the gateway untouched. Both env vars + // must either be set together or both left empty. + oldGwImage = os.Getenv(envOldGatewayImage) + newGwImage = os.Getenv(envNewGatewayImage) + if (oldGwImage == "") != (newGwImage == "") { + Fail(fmt.Sprintf("%s and %s must be set together (or both unset)", + envOldGatewayImage, envNewGatewayImage)) + } + if oldGwImage != "" && oldGwImage == newGwImage { + Skip(envOldGatewayImage + " and " + envNewGatewayImage + " are identical; nothing to upgrade") + } + }) + + BeforeEach(func() { + e2e.SkipUnlessLevel(e2e.High) + ctx, cancel = context.WithTimeout(context.Background(), imageRolloutTimeout) + DeferCleanup(func() { cancel() }) + }) + + It("rolls DocumentDB pods to a new image and retains data", func() { + env := e2e.SuiteEnv() + Expect(env).NotTo(BeNil(), "SuiteEnv must be initialized by SetupSuite") + Expect(ctx).NotTo(BeNil(), "BeforeEach must have populated the spec context") + c := env.Client + + By("creating a DocumentDB pinned to the old image") + ns := namespaces.NamespaceForSpec(e2e.UpgradeLabel) + createNamespace(ctx, c, ns) + createCredentialSecret(ctx, c, ns) + + vars := baseVars(ddName, ns, "2Gi") + vars["DOCUMENTDB_IMAGE"] = oldImage + if oldGwImage != "" { + vars["GATEWAY_IMAGE"] = oldGwImage + } + + dd, err := documentdb.Create(ctx, c, ns, ddName, documentdb.CreateOptions{ + Base: "documentdb", + Vars: vars, + ManifestsRoot: manifestsRoot(), + }) + Expect(err).NotTo(HaveOccurred(), "create DocumentDB %s/%s", ns, ddName) + DeferCleanup(func(ctx SpecContext) { + _ = documentdb.Delete(ctx, c, dd, 3*time.Minute) + }) + + key := types.NamespacedName{Namespace: ns, Name: ddName} + Eventually(assertions.AssertDocumentDBReady(ctx, c, key), + timeouts.For(timeouts.DocumentDBReady), + timeouts.PollInterval(timeouts.DocumentDBReady), + ).Should(Succeed(), "DocumentDB did not reach Ready on oldImage=%s", oldImage) + + By("seeding data on the old image") + docs := seed.SmallDataset() + handle, err := e2emongo.NewFromDocumentDB(ctx, env, ns, ddName) + Expect(err).NotTo(HaveOccurred(), "connect to DocumentDB gateway on oldImage") + inserted, err := e2emongo.Seed(ctx, handle.Client(), dbName, collName, docs) + Expect(err).NotTo(HaveOccurred(), "seed %s.%s", dbName, collName) + Expect(inserted).To(Equal(seed.SmallDatasetSize)) + Expect(handle.Close(ctx)).To(Succeed()) + + By("patching spec.documentDBImage (and optionally gatewayImage) to the new image") + fresh, err := documentdb.Get(ctx, c, key) + Expect(err).NotTo(HaveOccurred(), "re-fetch DocumentDB before patch") + Expect(documentdb.PatchSpec(ctx, c, fresh, func(s *previewv1.DocumentDBSpec) { + s.DocumentDBImage = newImage + if newGwImage != "" { + s.GatewayImage = newGwImage + } + })).To(Succeed(), "patch DocumentDB image from %s to %s", oldImage, newImage) + + By("waiting for the CNPG-backed rollout to settle on the new image") + // Poll the CR's backing pods until every container image + // matches newImage. A transient all-pods-gone window is + // acceptable during rollout, so we require at least one + // pod AND zero pods still on oldImage. + Eventually(func() error { + return allPodsOnImage(ctx, c, ns, ddName, newImage) + }, timeouts.For(timeouts.DocumentDBUpgrade), + timeouts.PollInterval(timeouts.DocumentDBUpgrade), + ).Should(Succeed(), "pods did not roll to %s", newImage) + + Eventually(assertions.AssertDocumentDBReady(ctx, c, key), + timeouts.For(timeouts.DocumentDBUpgrade), + timeouts.PollInterval(timeouts.DocumentDBUpgrade), + ).Should(Succeed(), "DocumentDB did not reach Ready on newImage=%s", newImage) + + By("verifying data seeded before the upgrade is still reachable") + handle2, err := e2emongo.NewFromDocumentDB(ctx, env, ns, ddName) + Expect(err).NotTo(HaveOccurred(), "reconnect to DocumentDB gateway on newImage") + DeferCleanup(func(ctx SpecContext) { _ = handle2.Close(ctx) }) + n, err := e2emongo.Count(ctx, handle2.Client(), dbName, collName, bson.M{}) + Expect(err).NotTo(HaveOccurred(), "count %s.%s on newImage", dbName, collName) + Expect(n).To(Equal(int64(seed.SmallDatasetSize)), + "seeded document count changed across image upgrade") + }) + }) + +// allPodsOnImage returns nil when there is at least one Pod owned by +// the CNPG Cluster backing ddName and every container in every such +// Pod reports an image equal to want. The helper intentionally errs +// on the side of "not yet done" — missing pods, empty status, or any +// mismatch returns a non-nil error so Eventually keeps polling. +func allPodsOnImage(ctx context.Context, c client.Client, ns, ddName, want string) error { + var pods corev1.PodList + sel := labels.SelectorFromSet(labels.Set{"cnpg.io/cluster": ddName}) + if err := c.List(ctx, &pods, client.InNamespace(ns), client.MatchingLabelsSelector{Selector: sel}); err != nil { + return fmt.Errorf("list pods: %w", err) + } + if len(pods.Items) == 0 { + return fmt.Errorf("no pods yet for cluster %s/%s", ns, ddName) + } + for i := range pods.Items { + p := &pods.Items[i] + if len(p.Status.ContainerStatuses) == 0 { + return fmt.Errorf("pod %s has no container statuses yet", p.Name) + } + for j := range p.Status.ContainerStatuses { + got := p.Status.ContainerStatuses[j].Image + // Container image strings can be reported by the kubelet in + // resolved form (digest appended). Accept any image whose + // reported tag contains the requested ref; this matches the + // upgrade-verification semantics used in other areas. + if got != want && !containsImageRef(got, want) { + return fmt.Errorf("pod %s container %s image=%q, want %q", + p.Name, p.Status.ContainerStatuses[j].Name, got, want) + } + } + } + return nil +} + +// containsImageRef returns true when got references want either +// verbatim or as the repository:tag prefix of a digest-resolved form +// (e.g. "repo:tag@sha256:..."). Keeps the image-rollout assertion +// resilient to kubelets that report resolved digests. +func containsImageRef(got, want string) bool { + if got == want { + return true + } + if len(got) < len(want) { + return false + } + return got[:len(want)] == want && (len(got) == len(want) || got[len(want)] == '@') +} diff --git a/test/e2e/tests/upgrade/upgrade_suite_test.go b/test/e2e/tests/upgrade/upgrade_suite_test.go new file mode 100644 index 00000000..0fa2f150 --- /dev/null +++ b/test/e2e/tests/upgrade/upgrade_suite_test.go @@ -0,0 +1,58 @@ +// Package upgrade hosts the DocumentDB E2E upgrade area. See +// docs/designs/e2e-test-suite.md for the spec catalog. This file is +// the Ginkgo root for the area binary and shares bootstrap with the +// other area binaries via the exported helpers in package e2e. +// +// This area is DISRUPTIVE — its specs install/upgrade the operator +// itself. They are gated behind the E2E_UPGRADE=1 environment variable +// to prevent accidental local runs. They require the `helm` v3 CLI on +// PATH and must run with `ginkgo -procs=1` because they mutate the +// cluster-wide operator Deployment. +// +// Unlike every other area, tests/upgrade/ does NOT install the +// [e2e.CheckOperatorUnchanged] BeforeEach hook — operator restarts are +// part of the scenario here, not a failure mode. This exemption is +// acknowledged in pkg e2e's suite.go header comment. +package upgrade + +import ( + "context" + "fmt" + "testing" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/documentdb/documentdb-operator/test/e2e" +) + +const operatorReadyTimeout = 2 * time.Minute + +func TestUpgrade(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "DocumentDB E2E - Upgrade", Label(e2e.UpgradeLabel)) +} + +var _ = SynchronizedBeforeSuite( + func(ctx SpecContext) []byte { + if err := e2e.SetupSuite(ctx, operatorReadyTimeout); err != nil { + Fail(fmt.Sprintf("upgrade bootstrap: %v", err)) + } + return []byte{} + }, + func(_ SpecContext, _ []byte) { + if err := e2e.SetupSuite(context.Background(), operatorReadyTimeout); err != nil { + Fail(fmt.Sprintf("upgrade worker bootstrap: %v", err)) + } + }, +) + +var _ = SynchronizedAfterSuite( + func(ctx SpecContext) { + if err := e2e.TeardownSuite(ctx); err != nil { + fmt.Fprintf(GinkgoWriter, "upgrade teardown: %v\n", err) + } + }, + func(_ SpecContext) {}, +) From 4181cfd2f562c1cd8cd45602a1b79ba0d91c8eb1 Mon Sep 17 00:00:00 2001 From: George Eichberger Date: Mon, 20 Apr 2026 19:32:11 -0700 Subject: [PATCH 04/10] test(e2e): add mongo hello probes to lifecycle deploy and tls-certmanager renewal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rationale from PR #346 gate: 'Ready=true' + a bare reconcile success alone can mask a broken gateway sidecar or a cert chain that only validates at issue-time. Add two minimal data-plane probes: 1. lifecycle/deploy: after the 1-instance cluster reaches Ready and owner-refs are verified, open a mongo-driver v2 client via the existing NewFromDocumentDB helper (port-forward + credentials) and Ping. Covers the 'the gateway actually answers' invariant during the most common bring-up path. 2. tls/cert-manager: after the initial CA-verified ping succeeds, trigger a reissue by deleting the cert-manager-generated Secret, wait for a new tls.crt to land, and reconnect with the freshly extracted CA. With the self-signed Issuer used here this exercises a full leaf+CA rotation, so a passing re-ping proves the gateway remounted the renewed material rather than serving a pinned copy. Both use mongohelper.Ping — the canonical 'hello' probe — so the existing connect retry + TLS verification paths are reused without introducing bespoke command builders. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- test/e2e/tests/lifecycle/deploy_test.go | 14 +++++ test/e2e/tests/tls/tls_certmanager_test.go | 64 ++++++++++++++++++++++ 2 files changed, 78 insertions(+) diff --git a/test/e2e/tests/lifecycle/deploy_test.go b/test/e2e/tests/lifecycle/deploy_test.go index 734021e6..cb8a668d 100644 --- a/test/e2e/tests/lifecycle/deploy_test.go +++ b/test/e2e/tests/lifecycle/deploy_test.go @@ -14,6 +14,7 @@ import ( "github.com/documentdb/documentdb-operator/test/e2e" "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/assertions" "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/documentdb" + mongohelper "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/mongo" "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/namespaces" "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/timeouts" ) @@ -75,5 +76,18 @@ var _ = Describe("DocumentDB lifecycle — deploy", } Expect(found).To(BeTrue(), "expected owner reference with UID=%s on CNPG Cluster %s", current.UID, key) + + // Data-plane smoke: opening a mongo-driver connection + // against the freshly-deployed CR proves the gateway + // actually answers on the wire. Without this step, + // "Ready=true" alone can mask a broken gateway sidecar + // (e.g. wrong image, misconfigured credentials secret). + // NewFromDocumentDB handles port-forward + credentials; + // a successful Ping is the canonical "hello" probe. + h, err := mongohelper.NewFromDocumentDB(ctx, e2e.SuiteEnv(), ns, name) + Expect(err).ToNot(HaveOccurred(), "connect mongo to freshly-deployed DocumentDB") + DeferCleanup(func(ctx SpecContext) { _ = h.Close(ctx) }) + Expect(mongohelper.Ping(ctx, h.Client())).To(Succeed(), + "ping freshly-deployed DocumentDB gateway") }) }) diff --git a/test/e2e/tests/tls/tls_certmanager_test.go b/test/e2e/tests/tls/tls_certmanager_test.go index ce7a34b6..b60af68e 100644 --- a/test/e2e/tests/tls/tls_certmanager_test.go +++ b/test/e2e/tests/tls/tls_certmanager_test.go @@ -132,6 +132,70 @@ var _ = Describe("DocumentDB TLS — cert-manager", return mongohelper.Ping(connectCtx, client) }, timeouts.For(timeouts.MongoConnect), timeouts.PollInterval(timeouts.MongoConnect)). Should(Succeed(), "ping via cert-manager-issued cert should succeed with CA verification") + + // --- Renewal check --- + // Force cert-manager to re-issue the Certificate by + // deleting the generated Secret; cert-manager recreates + // it with a fresh tls.crt. With the self-signed Issuer + // used here, a new leaf + new CA are produced on every + // issuance, so the old CA pool will NOT validate the + // new leaf — proving the gateway actually picked up the + // reissued material. If the gateway pinned the initial + // cert in memory, this ping would fail with a bad + // certificate error. + By("forcing cert-manager to reissue the gateway Secret") + origCrt := make([]byte, 0) + { + sec := &corev1.Secret{} + Expect(env.Client.Get(ctx, types.NamespacedName{ + Namespace: cluster.NamespaceName, Name: tlsSecretName, + }, sec)).To(Succeed(), "read TLS secret before deletion") + origCrt = append(origCrt, sec.Data[corev1.TLSCertKey]...) + Expect(env.Client.Delete(ctx, sec)).To(Succeed(), + "delete TLS secret to trigger cert-manager renewal") + } + + // Wait for cert-manager to recreate the secret with a + // different tls.crt. Using the existing MongoConnect + // timeout budget is enough here; if cert-manager takes + // longer the test will fail loudly rather than flake. + Eventually(func(g Gomega) { + sec := &corev1.Secret{} + g.Expect(env.Client.Get(ctx, types.NamespacedName{ + Namespace: cluster.NamespaceName, Name: tlsSecretName, + }, sec)).To(Succeed()) + g.Expect(sec.Data[corev1.TLSCertKey]).NotTo(BeEmpty(), + "reissued secret must carry tls.crt") + g.Expect(sec.Data[corev1.TLSCertKey]).NotTo(Equal(origCrt), + "tls.crt must differ after reissue") + }, timeouts.For(timeouts.DocumentDBReady), timeouts.PollInterval(timeouts.DocumentDBReady)). + Should(Succeed(), "cert-manager did not reissue TLS secret") + + // Reconnect with the NEW CA and ping; Eventually gives + // the gateway a window to notice the remounted cert. + newCA := readCAFromSecret(ctx, cluster.NamespaceName, tlsSecretName) + newPool := x509.NewCertPool() + Expect(newPool.AppendCertsFromPEM(newCA)). + To(BeTrue(), "parse renewed CA PEM") + + renewCtx, cancelRenew := context.WithTimeout(ctx, timeouts.For(timeouts.MongoConnect)) + defer cancelRenew() + Eventually(func(g Gomega) { + client2, err := mongohelper.NewClient(renewCtx, mongohelper.ClientOptions{ + Host: host, + Port: port, + User: tlsCredentialUser, + Password: tlsCredentialPassword, + TLS: true, + RootCAs: newPool, + ServerName: sni, + }) + g.Expect(err).NotTo(HaveOccurred(), "reconnect with renewed CA") + defer func() { _ = client2.Disconnect(renewCtx) }() + g.Expect(mongohelper.Ping(renewCtx, client2)).To(Succeed(), + "ping via renewed cert should succeed") + }, timeouts.For(timeouts.MongoConnect), timeouts.PollInterval(timeouts.MongoConnect)). + Should(Succeed(), "gateway did not start serving the renewed cert") }) }, ) From b3a700148d3712607dcdc89b76fb7d1b4ff2acac Mon Sep 17 00:00:00 2001 From: George Eichberger Date: Mon, 20 Apr 2026 19:38:48 -0700 Subject: [PATCH 05/10] fix(e2e/tls): per-iteration attempt context in renewal Eventually + polish Addresses code-review-agent findings on 07fec09: - critical: shared renewCtx in the Eventually closure could exhaust the MongoConnect budget on the first iteration, starving later retries. Use per-iteration attemptCtx and widen the outer poll window to DocumentDBReady so cert-manager + gateway remount have real time to converge. - bytes.Clone replaces the ad-hoc make+append pattern for origCrt. - extra By(...) markers around reissue/reconnect make the failure narrative obvious. - clarifying comment on the explicit Ping in lifecycle/deploy documenting why we keep it despite NewFromDocumentDB pinging internally. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- test/e2e/tests/lifecycle/deploy_test.go | 6 ++- test/e2e/tests/tls/tls_certmanager_test.go | 45 ++++++++++++---------- 2 files changed, 29 insertions(+), 22 deletions(-) diff --git a/test/e2e/tests/lifecycle/deploy_test.go b/test/e2e/tests/lifecycle/deploy_test.go index cb8a668d..bb0df506 100644 --- a/test/e2e/tests/lifecycle/deploy_test.go +++ b/test/e2e/tests/lifecycle/deploy_test.go @@ -82,8 +82,10 @@ var _ = Describe("DocumentDB lifecycle — deploy", // actually answers on the wire. Without this step, // "Ready=true" alone can mask a broken gateway sidecar // (e.g. wrong image, misconfigured credentials secret). - // NewFromDocumentDB handles port-forward + credentials; - // a successful Ping is the canonical "hello" probe. + // NewFromDocumentDB pings internally before returning, + // so the explicit Ping below is belt-and-braces at the + // test boundary — keeping it here makes the failure + // narrative clear without readers chasing helper code. h, err := mongohelper.NewFromDocumentDB(ctx, e2e.SuiteEnv(), ns, name) Expect(err).ToNot(HaveOccurred(), "connect mongo to freshly-deployed DocumentDB") DeferCleanup(func(ctx SpecContext) { _ = h.Close(ctx) }) diff --git a/test/e2e/tests/tls/tls_certmanager_test.go b/test/e2e/tests/tls/tls_certmanager_test.go index b60af68e..6106316a 100644 --- a/test/e2e/tests/tls/tls_certmanager_test.go +++ b/test/e2e/tests/tls/tls_certmanager_test.go @@ -1,6 +1,7 @@ package tls import ( + "bytes" "context" "crypto/x509" "time" @@ -144,21 +145,19 @@ var _ = Describe("DocumentDB TLS — cert-manager", // cert in memory, this ping would fail with a bad // certificate error. By("forcing cert-manager to reissue the gateway Secret") - origCrt := make([]byte, 0) - { - sec := &corev1.Secret{} - Expect(env.Client.Get(ctx, types.NamespacedName{ - Namespace: cluster.NamespaceName, Name: tlsSecretName, - }, sec)).To(Succeed(), "read TLS secret before deletion") - origCrt = append(origCrt, sec.Data[corev1.TLSCertKey]...) - Expect(env.Client.Delete(ctx, sec)).To(Succeed(), - "delete TLS secret to trigger cert-manager renewal") - } + origSec := &corev1.Secret{} + Expect(env.Client.Get(ctx, types.NamespacedName{ + Namespace: cluster.NamespaceName, Name: tlsSecretName, + }, origSec)).To(Succeed(), "read TLS secret before deletion") + origCrt := bytes.Clone(origSec.Data[corev1.TLSCertKey]) + Expect(env.Client.Delete(ctx, origSec)).To(Succeed(), + "delete TLS secret to trigger cert-manager renewal") // Wait for cert-manager to recreate the secret with a - // different tls.crt. Using the existing MongoConnect - // timeout budget is enough here; if cert-manager takes - // longer the test will fail loudly rather than flake. + // different tls.crt. Using the DocumentDBReady budget + // here because cert-manager reissue latency is dominated + // by issuer controller scheduling, not mongo connect. + By("waiting for cert-manager to reissue the TLS Secret with a new tls.crt") Eventually(func(g Gomega) { sec := &corev1.Secret{} g.Expect(env.Client.Get(ctx, types.NamespacedName{ @@ -173,15 +172,21 @@ var _ = Describe("DocumentDB TLS — cert-manager", // Reconnect with the NEW CA and ping; Eventually gives // the gateway a window to notice the remounted cert. + // Each Eventually attempt gets its own bounded context so + // the per-attempt budget does not collapse across retries + // — otherwise the first iteration's NewClient could burn + // the whole MongoConnect window, leaving no time for the + // gateway to actually pick up the reissued material. + By("reconnecting via the renewed CA and pinging through the gateway") newCA := readCAFromSecret(ctx, cluster.NamespaceName, tlsSecretName) newPool := x509.NewCertPool() Expect(newPool.AppendCertsFromPEM(newCA)). To(BeTrue(), "parse renewed CA PEM") - renewCtx, cancelRenew := context.WithTimeout(ctx, timeouts.For(timeouts.MongoConnect)) - defer cancelRenew() Eventually(func(g Gomega) { - client2, err := mongohelper.NewClient(renewCtx, mongohelper.ClientOptions{ + attemptCtx, cancelAttempt := context.WithTimeout(ctx, timeouts.For(timeouts.MongoConnect)) + defer cancelAttempt() + client2, err := mongohelper.NewClient(attemptCtx, mongohelper.ClientOptions{ Host: host, Port: port, User: tlsCredentialUser, @@ -191,11 +196,11 @@ var _ = Describe("DocumentDB TLS — cert-manager", ServerName: sni, }) g.Expect(err).NotTo(HaveOccurred(), "reconnect with renewed CA") - defer func() { _ = client2.Disconnect(renewCtx) }() - g.Expect(mongohelper.Ping(renewCtx, client2)).To(Succeed(), + defer func() { _ = client2.Disconnect(attemptCtx) }() + g.Expect(mongohelper.Ping(attemptCtx, client2)).To(Succeed(), "ping via renewed cert should succeed") - }, timeouts.For(timeouts.MongoConnect), timeouts.PollInterval(timeouts.MongoConnect)). - Should(Succeed(), "gateway did not start serving the renewed cert") + }, timeouts.For(timeouts.DocumentDBReady), timeouts.PollInterval(timeouts.MongoConnect)). + Should(Succeed(), "gateway did not start serving the renewed cert (or reconnect kept failing)") }) }, ) From f23a316b80f417bd06ad0b784a538e44a4207a66 Mon Sep 17 00:00:00 2001 From: George Eichberger Date: Tue, 21 Apr 2026 10:15:09 -0700 Subject: [PATCH 06/10] test(e2e/feature-gates): drop ChangeStreams coverage, leave TODO ChangeStreams is an experimental DocumentDB feature that requires the -changestream image variant, which is not part of the default e2e image set. Running the wal_level-translation assertion on the default image implied support the shipped pipeline cannot honour, so this change: - removes tests/feature_gates/changestreams_test.go - removes manifests/mixins/feature_changestreams.yaml.template - removes the corresponding render check in pkg/e2eutils/fixtures/fixtures_test.go - leaves a TODO in feature_gates_suite_test.go and at the fixtures deletion site explaining status and the steps to re-enable (capability label + preflight + best-effort mongo Watch on top). Operator API (previewv1.FeatureGateChangeStreams) and the operator's wal_level=logical translation are untouched. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../feature_changestreams.yaml.template | 8 -- .../pkg/e2eutils/fixtures/fixtures_test.go | 18 ++-- .../tests/feature_gates/changestreams_test.go | 97 ------------------- .../feature_gates/feature_gates_suite_test.go | 27 ++++++ 4 files changed, 35 insertions(+), 115 deletions(-) delete mode 100644 test/e2e/manifests/mixins/feature_changestreams.yaml.template delete mode 100644 test/e2e/tests/feature_gates/changestreams_test.go diff --git a/test/e2e/manifests/mixins/feature_changestreams.yaml.template b/test/e2e/manifests/mixins/feature_changestreams.yaml.template deleted file mode 100644 index d23de6d1..00000000 --- a/test/e2e/manifests/mixins/feature_changestreams.yaml.template +++ /dev/null @@ -1,8 +0,0 @@ -apiVersion: documentdb.io/preview -kind: DocumentDB -metadata: - name: ${NAME} - namespace: ${NAMESPACE} -spec: - featureGates: - ChangeStreams: true diff --git a/test/e2e/pkg/e2eutils/fixtures/fixtures_test.go b/test/e2e/pkg/e2eutils/fixtures/fixtures_test.go index 33ca0fea..e0d1f3b8 100644 --- a/test/e2e/pkg/e2eutils/fixtures/fixtures_test.go +++ b/test/e2e/pkg/e2eutils/fixtures/fixtures_test.go @@ -76,16 +76,14 @@ func TestRenderTLSMixins(t *testing.T) { } } -func TestRenderFeatureChangeStreams(t *testing.T) { - vars := map[string]string{"NAMESPACE": "ns", "NAME": "c"} - dd, err := renderDocumentDB("mixins/feature_changestreams.yaml.template", vars) - if err != nil { - t.Fatalf("render: %v", err) - } - if !dd.Spec.FeatureGates[previewv1.FeatureGateChangeStreams] { - t.Fatalf("expected ChangeStreams feature gate enabled, got %+v", dd.Spec.FeatureGates) - } -} +// TODO(e2e/feature-gates): re-introduce a ChangeStreams mixin-render +// test once the suite ships with a change-stream-capable DocumentDB +// image. The feature is experimental and requires a custom image +// variant (the `-changestream` tag line) that is not part of the +// default e2e image set, so we removed the render+behaviour tests to +// keep the default pipeline green. The API symbol +// previewv1.FeatureGateChangeStreams and the operator's wal_level +// translation remain in place — this is purely about test coverage. // The following tests exercise the label-selector teardown contract and // the AlreadyExists run-id mismatch error path. They use the diff --git a/test/e2e/tests/feature_gates/changestreams_test.go b/test/e2e/tests/feature_gates/changestreams_test.go deleted file mode 100644 index 516ed503..00000000 --- a/test/e2e/tests/feature_gates/changestreams_test.go +++ /dev/null @@ -1,97 +0,0 @@ -package feature_gates - -import ( - "context" - "fmt" - "time" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - - cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" - "sigs.k8s.io/controller-runtime/pkg/client" - - previewv1 "github.com/documentdb/documentdb-operator/api/preview" - "github.com/documentdb/documentdb-operator/test/e2e" -) - -// walLevelFor reads the CNPG Cluster that backs the given DocumentDB and -// returns the value of its postgresql.parameters["wal_level"]. Empty -// string means the operator did not set the key (CNPG default applies, -// which is the "replica" level that disables logical decoding — i.e. -// change streams). Any error from the client is surfaced verbatim. -func walLevelFor(ctx context.Context, c client.Client, dd *previewv1.DocumentDB) (string, error) { - cluster := &cnpgv1.Cluster{} - if err := c.Get(ctx, client.ObjectKey{Namespace: dd.Namespace, Name: dd.Name}, cluster); err != nil { - return "", fmt.Errorf("get CNPG Cluster %s/%s: %w", dd.Namespace, dd.Name, err) - } - if cluster.Spec.PostgresConfiguration.Parameters == nil { - return "", nil - } - return cluster.Spec.PostgresConfiguration.Parameters["wal_level"], nil -} - -// DocumentDB feature-gates / change streams. -// -// The operator translates `spec.featureGates.ChangeStreams=true` into -// `wal_level=logical` on the underlying CNPG Cluster (see -// operator/src/internal/cnpg/cnpg_cluster.go). When the gate is off (or -// unset), the operator does not force a wal_level override, so CNPG's -// default ("replica") applies and change streams over the Mongo wire -// protocol are not supported by the DocumentDB extension. -// -// We assert the observable operator contract — the CNPG Cluster's -// postgresql.parameters — because: -// 1. It is image-independent: the protocol-level change-stream -// behaviour is only available in the "-changestream" DocumentDB -// image variants, which are not guaranteed to be loaded in every -// e2e environment; -// 2. It is what the operator code actually controls. -// -// A future expansion can layer a best-effort mongo `Watch` call on top -// once the suite standardises on change-stream-capable images. -var _ = Describe("DocumentDB feature-gates — change streams", - Label(e2e.FeatureLabel), e2e.MediumLevelLabel, - func() { - BeforeEach(func() { e2e.SkipUnlessLevel(e2e.Medium) }) - - DescribeTable("wal_level reflects ChangeStreams gate", - func(enabled, expectLogical bool) { - env := e2e.SuiteEnv() - Expect(env).ToNot(BeNil(), "SuiteEnv must be initialized") - c := env.Client - - ctx, cancel := context.WithTimeout(context.Background(), 12*time.Minute) - DeferCleanup(cancel) - - name := "ft-cs-on" - mixin := "feature_changestreams" - if !enabled { - name = "ft-cs-off" - // Omit the mixin; the base template has no - // featureGates block, so the gate is implicitly - // disabled. - mixin = "" - } - mixins := []string{} - if mixin != "" { - mixins = append(mixins, mixin) - } - dd, cleanup := setupFreshCluster(ctx, c, name, mixins, nil) - DeferCleanup(cleanup) - - walLevel, err := walLevelFor(ctx, c, dd) - Expect(err).ToNot(HaveOccurred()) - - if expectLogical { - Expect(walLevel).To(Equal("logical"), - "enabled gate must drive wal_level=logical") - } else { - Expect(walLevel).ToNot(Equal("logical"), - "disabled gate must leave wal_level off of logical; got %q", walLevel) - } - }, - Entry("enabled → wal_level=logical", true, true), - Entry("disabled → wal_level not forced to logical", false, false), - ) - }) diff --git a/test/e2e/tests/feature_gates/feature_gates_suite_test.go b/test/e2e/tests/feature_gates/feature_gates_suite_test.go index 9aaf005c..eed835a6 100644 --- a/test/e2e/tests/feature_gates/feature_gates_suite_test.go +++ b/test/e2e/tests/feature_gates/feature_gates_suite_test.go @@ -54,3 +54,30 @@ var _ = BeforeEach(func() { Expect(e2e.CheckOperatorUnchanged()).To(Succeed(), "operator health check failed — a previous spec or reconciler likely restarted the operator") }) + +// TODO(e2e/feature-gates): add a ChangeStreams spec here once the +// suite standardises on a change-stream-capable DocumentDB image. +// +// Status: experimental feature. The operator already translates +// `spec.featureGates.ChangeStreams=true` into `wal_level=logical` on +// the underlying CNPG Cluster (see operator/src/internal/cnpg/ +// cnpg_cluster.go), but end-to-end validation of the Mongo-wire +// `watch()` call requires the `-changestream` DocumentDB image +// variant, which is not part of the default e2e image set. +// +// Previously this area carried a tests/feature_gates/changestreams_ +// test.go that asserted the wal_level translation via the CNPG spec. +// It was removed together with manifests/mixins/feature_changestreams. +// yaml.template and the fixtures_test render check so the default +// pipeline does not imply the feature is supported in the shipped +// image. +// +// When re-enabling: +// 1. Restore manifests/mixins/feature_changestreams.yaml.template +// (single key: spec.featureGates.ChangeStreams: true). +// 2. Gate the spec behind a `needs-changestream-image` capability +// label (mirrors `needs-cert-manager`) and a preflight check that +// skips when the current documentDBImage cannot handle it. +// 3. Layer a best-effort mongo-driver `Watch` smoke on top of the +// existing wal_level assertion so both the operator and extension +// contracts are covered. From ca58471dc4c65c53462d75853aef27e5b330db97 Mon Sep 17 00:00:00 2001 From: German Date: Wed, 22 Apr 2026 13:49:38 -0700 Subject: [PATCH 07/10] test(e2e/status): verify published connection string connects via live Ping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Strengthen connection_string_test with a four-layer assertion: 1. Shape (existing AssertConnectionStringMatches helper). 2. Template: assert the secret name in the $(kubectl get secret …) userinfo matches the CR's credentials secret, catching drift between the status template and the actual secret. 3. Semantic: regex-split userinfo from the URI (url.Parse can't handle the shell subshell in userinfo) and assert host, port, and query params (tls, replicaSet, optional tlsAllowInvalidCertificates) against the published Service and status.TLS state. 4. Live: open a port-forward to the gateway Service and Ping via mongo.NewFromDocumentDB, proving the advertised string is not just well-formed but actually usable. Covers the failure mode where status.connectionString parses but points at the wrong host/port/params, which the previous shape-only check silently allowed. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Signed-off-by: German --- .../tests/status/connection_string_test.go | 168 ++++++++++++++++-- 1 file changed, 153 insertions(+), 15 deletions(-) diff --git a/test/e2e/tests/status/connection_string_test.go b/test/e2e/tests/status/connection_string_test.go index 2358aad3..749ac7f1 100644 --- a/test/e2e/tests/status/connection_string_test.go +++ b/test/e2e/tests/status/connection_string_test.go @@ -2,7 +2,10 @@ package status import ( "context" + "fmt" "net/url" + "regexp" + "strconv" "time" . "github.com/onsi/ginkgo/v2" @@ -13,16 +16,47 @@ import ( "github.com/documentdb/documentdb-operator/test/e2e" "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/assertions" "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/fixtures" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/mongo" + "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/portforward" "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/timeouts" ) // DocumentDB status — ConnectionString. // -// The operator publishes a mongo:// URI in status.connectionString once -// the gateway Service and credential secret are ready. We: -// 1. assert the string matches the expected "^mongodb://" shape -// (scheme + auth + host segment); -// 2. parse it with net/url and sanity-check the scheme and host. +// The operator publishes a `mongodb://` URI in status.connectionString +// once the gateway Service and credential secret are ready. This spec +// has three layers, ordered cheapest-first so the failure surface is +// well-separated: +// +// 1. Shape — the string matches `^mongodb://` and carries a non-empty +// host component. Catches "field unset" and "scheme drift". +// +// 2. Semantic — the string names the expected credential secret, +// targets the default gateway port, and carries every Mongo URI +// query param the Go/JS/Python driver needs (directConnection, +// authMechanism=SCRAM-SHA-256, tls, replicaSet, and +// tlsAllowInvalidCertificates correlated with status.TLS.Ready). +// Catches operator regressions that rewrite GenerateConnectionString +// in util.go. +// +// 3. Live — open a real port-forward to the gateway Service, read the +// credential secret, and Ping via mongo-driver/v2. Proves the +// (port + params) the operator published actually reach a working +// endpoint, independent of the string's literal host (which is the +// cluster-internal Service ClusterIP and so only dialable from +// outside the cluster via port-forward). +// +// Why we do not shell-eval the string +// +// status.connectionString contains `$(kubectl get secret …)` subshells +// in userinfo so that a human can paste it into a terminal and have +// credentials auto-resolve. Running `bash -c "echo "` in-test to +// exercise that roundtrip would require `kubectl` + a valid kubeconfig +// in the Ginkgo process, conflate shell and driver failure modes, and +// not work on runners without bash — we have none today, but locking +// ourselves to bash for a status assertion is a poor tradeoff. The +// string-level assertion on the secret-name reference (below) is the +// high-signal subset of that approach at a fraction of the cost. // // This spec runs against the session-scoped shared RO fixture so it // adds negligible time to the suite. @@ -31,7 +65,7 @@ var _ = Describe("DocumentDB status — connectionString", func() { BeforeEach(func() { e2e.SkipUnlessLevel(e2e.Medium) }) - It("publishes a valid mongodb:// URI", func() { + It("publishes a valid, dialable mongodb:// URI", func() { env := e2e.SuiteEnv() Expect(env).ToNot(BeNil()) c := env.Client @@ -44,25 +78,129 @@ var _ = Describe("DocumentDB status — connectionString", key := client.ObjectKey{Namespace: handle.Namespace(), Name: handle.Name()} - // 1. Regex assertion via the shared helper. + // Layer 1: shape assertion via the shared helper, eventually-polled + // because the operator may publish the string a reconcile or two + // after the CR flips Ready. + By("asserting status.connectionString matches ^mongodb://") Eventually( assertions.AssertConnectionStringMatches(ctx, c, key, `^mongodb://`), timeouts.For(timeouts.DocumentDBReady), timeouts.PollInterval(timeouts.DocumentDBReady), ).Should(Succeed()) - // 2. Parse + structural sanity. dd, err := handle.GetCR(ctx, c) Expect(err).ToNot(HaveOccurred()) - Expect(dd.Status.ConnectionString).ToNot(BeEmpty(), + connStr := dd.Status.ConnectionString + Expect(connStr).ToNot(BeEmpty(), "status.connectionString must be populated on a Ready DocumentDB") - u, err := url.Parse(dd.Status.ConnectionString) + // Layer 2a: credential-secret reference. Catches operator typos + // and regressions that ignore spec.documentDbCredentialSecret. + // expectedSecret mirrors utils.GenerateConnectionString's + // fallback: spec override wins, else the default secret name. + expectedSecret := dd.Spec.DocumentDbCredentialSecret + if expectedSecret == "" { + expectedSecret = mongo.DefaultCredentialSecretName + } + By(fmt.Sprintf("asserting connection string references secret %q", expectedSecret)) + // The secret name appears twice inside `kubectl get secret -n ` + // subshells — one substring match is sufficient. + Expect(connStr).To(ContainSubstring("secret "+expectedSecret+" "), + "connection string must reference credential secret %q; got: %s", + expectedSecret, connStr) + + // Layer 2b: extract host:port and query params. We cannot use + // url.Parse on the full string because userinfo contains + // `$(kubectl ... | base64 -d)` which is not a valid URL + // userinfo. Strip userinfo with a regex that matches up to + // the LAST '@' before the first '/' — Mongo's default URI + // grammar guarantees userinfo does not contain '/'. + By("parsing host:port and query params from the published URI") + re := regexp.MustCompile(`^mongodb://.*@(?P[^/]+)/\?(?P.+)$`) + m := re.FindStringSubmatch(connStr) + Expect(m).ToNot(BeNil(), + "connection string must be of form mongodb://@/?; got: %s", + connStr) + hostport := m[1] + rawQuery := m[2] + + host, port, err := splitHostPort(hostport) + Expect(err).ToNot(HaveOccurred(), + "host:port segment must split cleanly; got %q", hostport) + Expect(host).ToNot(BeEmpty(), "host component must not be empty") + Expect(port).To(Equal(portforward.GatewayPort), + "connection string port must equal the default gateway port (%d); got %d", + portforward.GatewayPort, port) + + // Layer 2c: required query parameters. Each catches a distinct + // regression in GenerateConnectionString: missing + // directConnection breaks replica-set discovery through the + // gateway; missing authMechanism breaks SCRAM; missing tls or + // replicaSet breaks drivers that refuse to infer defaults. + By("asserting required Mongo URI query parameters are present") + qv, err := url.ParseQuery(rawQuery) + Expect(err).ToNot(HaveOccurred(), "query must parse: %q", rawQuery) + Expect(qv.Get("directConnection")).To(Equal("true"), + "connection string must set directConnection=true") + Expect(qv.Get("authMechanism")).To(Equal("SCRAM-SHA-256"), + "connection string must set authMechanism=SCRAM-SHA-256") + Expect(qv.Get("tls")).To(Equal("true"), + "connection string must set tls=true (gateway is TLS-only)") + Expect(qv.Get("replicaSet")).To(Equal("rs0"), + "connection string must set replicaSet=rs0") + + // Layer 2d: TLS trust flag correlates with status.TLS.Ready. + // GenerateConnectionString appends tlsAllowInvalidCertificates=true + // exactly when the CR is NOT in a "trust-ready" state + // (status.TLS nil or not Ready). Inverting this flag would + // either leak self-signed exposure into production or break + // connections to trusted CAs; both are silent footguns without + // this assertion. + trustReady := dd.Status.TLS != nil && dd.Status.TLS.Ready + if trustReady { + Expect(qv.Has("tlsAllowInvalidCertificates")).To(BeFalse(), + "with status.TLS.Ready=true the connection string must NOT set tlsAllowInvalidCertificates") + } else { + Expect(qv.Get("tlsAllowInvalidCertificates")).To(Equal("true"), + "with status.TLS.Ready=false the connection string must set tlsAllowInvalidCertificates=true") + } + + // Layer 3: live Ping through the same (port + params + secret) + // contract. NewFromDocumentDB opens a port-forward to the + // gateway Service, reads the credential secret, dials with + // TLS+InsecureSkipVerify (matching tlsAllowInvalidCertificates + // behaviour for this spec's shared self-signed fixture), and + // Pings. Any mismatch between the published string's port / + // secret-name and what actually serves traffic surfaces here + // as a connect or auth failure. + By("dialing the gateway via port-forward and running Ping") + dialCtx, dialCancel := context.WithTimeout(ctx, timeouts.For(timeouts.MongoConnect)) + DeferCleanup(dialCancel) + mh, err := mongo.NewFromDocumentDB(dialCtx, env, dd.Namespace, dd.Name, + mongo.WithTLSInsecure()) Expect(err).ToNot(HaveOccurred(), - "status.connectionString must parse as a URL: %q", dd.Status.ConnectionString) - Expect(u.Scheme).To(Equal("mongodb"), - "connection string scheme must be mongodb; got %q", u.Scheme) - Expect(u.Host).ToNot(BeEmpty(), - "connection string must carry a host component") + "must be able to dial + Ping using the contract described by status.connectionString") + DeferCleanup(func() { + closeCtx, closeCancel := context.WithTimeout(context.Background(), 30*time.Second) + defer closeCancel() + _ = mh.Close(closeCtx) + }) }) }) + +// splitHostPort splits a "host:port" segment where the port is +// numeric. We avoid net.SplitHostPort only because the host in this +// spec is a ClusterIP and so unambiguously not an IPv6 literal — a +// focused parser makes the "port drift" failure message more direct. +func splitHostPort(hostport string) (host string, port int, err error) { + for i := len(hostport) - 1; i >= 0; i-- { + if hostport[i] == ':' { + p, perr := strconv.Atoi(hostport[i+1:]) + if perr != nil { + return "", 0, fmt.Errorf("port segment not numeric: %q", hostport[i+1:]) + } + return hostport[:i], p, nil + } + } + return "", 0, fmt.Errorf("host:port missing ':' separator: %q", hostport) +} From fa1578987cefa1a9a478c23cf9f2e4e7c8a3e4f5 Mon Sep 17 00:00:00 2001 From: German Date: Wed, 22 Apr 2026 13:49:46 -0700 Subject: [PATCH 08/10] docs(copilot): document Priority as a GitHub Project field, not a label The documentdb org tracks issue priority via the single-select 'Priority' field on project boards #6 and #4 (options P0/P1/P2), not via repo labels. Record the project/field/option IDs and the gh CLI workflow so future assistants don't invent P0 labels that drift from the actual triage signal. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Signed-off-by: German --- .github/copilot-instructions.md | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 006c6f96..ceff3cf0 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -66,6 +66,33 @@ The code review agent will: - Emit events for significant state changes - Use finalizers for cleanup operations +## Issue Triage & Priority + +**Priority is tracked via GitHub Projects, not labels.** Do not create `P0`/`P1`/`P2` labels; the repo intentionally doesn't use them. + +- Planning board: [DocumentDB k8s operator planning board](https://github.com/orgs/documentdb/projects/6) (project number `6`, owner `documentdb`) +- Issue tracking board: [DocumentDB issue tracking](https://github.com/orgs/documentdb/projects/4) (project number `4`) +- Both boards have a single-select `Priority` field with values `P0`, `P1`, `P2`. + +### Setting priority on a new issue + +1. Add the issue to the relevant project: + ```bash + gh project item-add 6 --owner documentdb --url + ``` +2. Set the Priority field using `gh project item-edit` with the project + field + option IDs (obtainable via `gh project field-list 6 --owner documentdb --format json` and the GraphQL `options` query). Example: + ```bash + gh api graphql -f query=' + mutation($project:ID!,$item:ID!,$field:ID!,$opt:String!){ + updateProjectV2ItemFieldValue(input:{projectId:$project,itemId:$item,fieldId:$field,value:{singleSelectOptionId:$opt}}){projectV2Item{id}} + }' -F project=PVT_kwDODDbYls4BIeDc -F item= -F field=PVTSSF_lADODDbYls4BIeDczg4658Q -F opt= + ``` + +### Assignment + +- Reviewers / maintainers are listed in `CODEOWNERS` and `MAINTAINERS.md`. Rayhan Hossain's GitHub handle is `hossain-rayhan`. +- Use `gh issue edit --repo documentdb/documentdb-kubernetes-operator --add-assignee ` rather than editing through the UI so the change is auditable. + ## Commit Messages Follow conventional commits format: From de201de7b9a4023001e8a93841cc94c61dfdde00 Mon Sep 17 00:00:00 2001 From: German Date: Wed, 22 Apr 2026 13:52:52 -0700 Subject: [PATCH 09/10] docs(copilot): require DCO sign-off on every commit Record the DCO requirement and the rebase-exec recipe for retrofitting Signed-off-by trailers, so future assistants don't ship commits that the DCO bot will block on the PR. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Signed-off-by: German --- .github/copilot-instructions.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index ceff3cf0..7bb069de 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -102,3 +102,21 @@ Follow conventional commits format: - `test:` for test additions/changes - `refactor:` for code refactoring - `chore:` for maintenance tasks + +### DCO Sign-off (Required) + +Every commit **must** carry a `Signed-off-by:` trailer — the repo enforces the +[Developer Certificate of Origin](../contribute/developer-certificate-of-origin) +via a DCO check on PRs, and unsigned commits block the merge. + +- Use `git commit -s` (or `git commit --signoff`) for new commits. +- To retrofit sign-offs onto commits you already made on the current branch: + ```bash + GIT_SEQUENCE_EDITOR=: git rebase -i \ + --exec 'git commit --amend --no-edit --signoff' + ``` + (Plain `git rebase --signoff` is a no-op when commits don't need to be replayed.) +- Verify before pushing: `git log -n --format='%(trailers:key=Signed-off-by)'` + must print a trailer for every commit. +- The sign-off is in addition to the `Co-authored-by: Copilot …` trailer, not a + replacement for it. From b7803d5073ca315bba9de2ab14e6f9abaf8827f9 Mon Sep 17 00:00:00 2001 From: German Date: Wed, 22 Apr 2026 14:09:32 -0700 Subject: [PATCH 10/10] fix(e2e): address Copilot review comments on PR #346 - tests/exposure/clusterip: remove no-op client.ObjectKeyFromObject(dd) and drop the now-unused controller-runtime client import. - tests/data/sort_limit_skip: rename the local slice that shadowed the predeclared 'any' identifier to docsAny. - pkg/e2eutils/timeouts: refresh the 'when adding an op' comment to point at documentDBDefaults/cnpgAlias (there is no switch). - test/e2e/go.mod: bump go directive to 1.25.9 to match operator/src after rebase onto upstream/main. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Signed-off-by: German --- test/e2e/go.mod | 2 +- test/e2e/pkg/e2eutils/timeouts/timeouts.go | 3 ++- test/e2e/tests/data/sort_limit_skip_test.go | 6 +++--- test/e2e/tests/exposure/clusterip_test.go | 3 --- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/test/e2e/go.mod b/test/e2e/go.mod index 69611d4d..777fbf1d 100644 --- a/test/e2e/go.mod +++ b/test/e2e/go.mod @@ -1,6 +1,6 @@ module github.com/documentdb/documentdb-operator/test/e2e -go 1.25.8 +go 1.25.9 require ( github.com/cloudnative-pg/cloudnative-pg v1.28.1 diff --git a/test/e2e/pkg/e2eutils/timeouts/timeouts.go b/test/e2e/pkg/e2eutils/timeouts/timeouts.go index 420e62ec..8a18b73c 100644 --- a/test/e2e/pkg/e2eutils/timeouts/timeouts.go +++ b/test/e2e/pkg/e2eutils/timeouts/timeouts.go @@ -18,7 +18,8 @@ import ( type Op string // DocumentDB-specific operations. When adding an entry here, also -// extend the switch in For and PollInterval. +// extend documentDBDefaults/cnpgAlias (and PollInterval if the new op +// needs a non-default poll cadence). const ( // DocumentDBReady waits for a fresh DocumentDB cluster to reach the // running state after creation. diff --git a/test/e2e/tests/data/sort_limit_skip_test.go b/test/e2e/tests/data/sort_limit_skip_test.go index 06776c55..324382af 100644 --- a/test/e2e/tests/data/sort_limit_skip_test.go +++ b/test/e2e/tests/data/sort_limit_skip_test.go @@ -31,11 +31,11 @@ var _ = Describe("DocumentDB data — sort/limit/skip", handle, dbName = connectSharedRO(ctx) coll = handle.Database(dbName).Collection("sort_cursor") docs := seed.SortDataset() - any := make([]any, len(docs)) + docsAny := make([]any, len(docs)) for i := range docs { - any[i] = docs[i] + docsAny[i] = docs[i] } - _, err := coll.InsertMany(ctx, any) + _, err := coll.InsertMany(ctx, docsAny) Expect(err).NotTo(HaveOccurred()) }) AfterAll(func() { diff --git a/test/e2e/tests/exposure/clusterip_test.go b/test/e2e/tests/exposure/clusterip_test.go index d1b18fe2..7d614a45 100644 --- a/test/e2e/tests/exposure/clusterip_test.go +++ b/test/e2e/tests/exposure/clusterip_test.go @@ -11,7 +11,6 @@ import ( . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" - "sigs.k8s.io/controller-runtime/pkg/client" "github.com/documentdb/documentdb-operator/test/e2e" "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/assertions" @@ -95,7 +94,5 @@ var _ = Describe("DocumentDB exposure — ClusterIP", return pingErr }, timeouts.For(timeouts.MongoConnect), timeouts.PollInterval(timeouts.MongoConnect)). Should(Succeed(), "mongo ping through ClusterIP port-forward: %v", pingErr) - - _ = client.ObjectKeyFromObject(dd) }) })