Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,39 @@ verify-with-container: ## Run verification in a container.
govulncheck: $(GOVULNCHECK) $(OUTPUT_DIR) ## Run govulncheck vulnerability scan.
@./hack/govulncheck.sh $(GOVULNCHECK) $(OUTPUT_DIR)

# ============================================================================
# E2E Coverage
# ============================================================================

##@ E2E Coverage
##
## Targets for building a coverage-instrumented operator image, collecting
## coverage data written during E2E tests, and uploading the report to Codecov.
## Uses emptyDir (no PVC): the collect step sends SIGTERM to the operator
## process, waits for container restart, then copies data from the running pod.
##
## Typical flow (local):
## make image-build-coverage image-push-coverage # build & push coverage image
## COVERAGE_IMAGE=<pullspec> hack/e2e-coverage.sh setup # patch CSV
## make test-e2e # run E2E suite
## make e2e-coverage-collect # collect + upload
##
## In CI, hack/e2e-coverage.sh handles setup and collection automatically.

COVERAGE_IMG ?= $(IMG)-e2e-coverage

.PHONY: image-build-coverage
image-build-coverage: ## Build coverage-instrumented container image.
$(CONTAINER_ENGINE) build -f images/ci/Dockerfile.coverage -t $(COVERAGE_IMG) .

.PHONY: image-push-coverage
image-push-coverage: ## Push coverage-instrumented container image.
$(CONTAINER_ENGINE) push $(COVERAGE_IMG) $(CONTAINER_PUSH_ARGS)

.PHONY: e2e-coverage-collect
e2e-coverage-collect: ## Collect e2e coverage data and optionally upload to Codecov.
ARTIFACT_DIR=$${ARTIFACT_DIR:-.} hack/e2e-coverage.sh collect

# ============================================================================
# Maintenance
# ============================================================================
Expand Down
179 changes: 179 additions & 0 deletions hack/e2e-coverage.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
#!/usr/bin/env bash
#
# E2E coverage lifecycle script for CI and local use.
#
# Usage:
# hack/e2e-coverage.sh setup Prepare the operator for coverage collection
# hack/e2e-coverage.sh collect Collect, convert, and optionally upload coverage data
#
# Environment variables:
# COVERAGE_IMAGE (setup) Full pullspec of the coverage-instrumented image
# CODECOV_TOKEN (collect) Codecov upload token; skip upload if unset
# ARTIFACT_DIR (collect) Directory for CI artifacts; defaults to "."
set -euo pipefail

NAMESPACE="cert-manager-operator"
DEPLOYMENT="cert-manager-operator-controller-manager"
GOCOVERDIR_PATH="/tmp/e2e-cover"
CODECOV_SECRET_PATH="/var/run/secrets/codecov/CODECOV_TOKEN"
POD_LABEL="name=cert-manager-operator"

setup() {
echo "--- E2E Coverage Setup ---"

if [[ -z "${COVERAGE_IMAGE:-}" ]]; then
echo "Error: COVERAGE_IMAGE env var must be set"
exit 1
fi
echo "Coverage image: ${COVERAGE_IMAGE}"

local csv
csv=$(oc get deployment "${DEPLOYMENT}" -n "${NAMESPACE}" \
-o jsonpath='{.metadata.ownerReferences[?(@.kind=="ClusterServiceVersion")].name}' 2>/dev/null)

if [[ -n "${csv}" ]]; then
echo "Found CSV: ${csv} -- patching via CSV"
oc patch csv "${csv}" -n "${NAMESPACE}" --type=json -p "[
{\"op\": \"replace\", \"path\": \"/spec/install/spec/deployments/0/spec/template/spec/containers/0/image\", \"value\": \"${COVERAGE_IMAGE}\"},
{\"op\": \"add\", \"path\": \"/spec/install/spec/deployments/0/spec/template/spec/containers/0/env/-\", \"value\": {\"name\": \"GOCOVERDIR\", \"value\": \"${GOCOVERDIR_PATH}\"}},
{\"op\": \"add\", \"path\": \"/spec/install/spec/deployments/0/spec/template/spec/containers/0/volumeMounts/-\", \"value\": {\"name\": \"coverage-data\", \"mountPath\": \"${GOCOVERDIR_PATH}\"}},
{\"op\": \"add\", \"path\": \"/spec/install/spec/deployments/0/spec/template/spec/volumes/-\", \"value\": {\"name\": \"coverage-data\", \"emptyDir\": {}}}
]"
else
echo "No CSV found -- patching deployment directly"
oc set image "deployment/${DEPLOYMENT}" -n "${NAMESPACE}" \
cert-manager-operator="${COVERAGE_IMAGE}"
oc set env "deployment/${DEPLOYMENT}" -n "${NAMESPACE}" \
-c cert-manager-operator GOCOVERDIR="${GOCOVERDIR_PATH}"

local has_vol
has_vol=$(oc get "deployment/${DEPLOYMENT}" -n "${NAMESPACE}" \
-o jsonpath='{.spec.template.spec.volumes[?(@.name=="coverage-data")].name}' 2>/dev/null)
if [[ -z "${has_vol}" ]]; then
oc patch "deployment/${DEPLOYMENT}" -n "${NAMESPACE}" --type=json -p "[
{\"op\": \"add\", \"path\": \"/spec/template/spec/containers/0/volumeMounts/-\", \"value\": {\"name\": \"coverage-data\", \"mountPath\": \"${GOCOVERDIR_PATH}\"}},
{\"op\": \"add\", \"path\": \"/spec/template/spec/volumes/-\", \"value\": {\"name\": \"coverage-data\", \"emptyDir\": {}}}
]"
else
echo "Volume 'coverage-data' already exists -- skipping volume patch"
fi
fi

echo "Waiting for operator rollout with coverage image..."
sleep 5
oc rollout status "deployment/${DEPLOYMENT}" -n "${NAMESPACE}" --timeout=180s

echo "Verifying GOCOVERDIR is set in the running pod..."
oc exec -n "${NAMESPACE}" "deploy/${DEPLOYMENT}" -- env | grep GOCOVERDIR || \
echo "Warning: GOCOVERDIR not found in pod env (non-fatal)"

echo "--- Coverage setup complete ---"
}

collect() {
echo "--- E2E Coverage Collection ---"

local artifact_dir="${ARTIFACT_DIR:-.}"
local coverage_dir="${artifact_dir}/e2e-cover-data"
local coverage_profile="${artifact_dir}/coverage-e2e.out"

if [[ -z "${CODECOV_TOKEN:-}" ]] && [[ -f "${CODECOV_SECRET_PATH}" ]]; then
CODECOV_TOKEN=$(cat "${CODECOV_SECRET_PATH}")
export CODECOV_TOKEN
fi

local pod
pod=$(oc get pod -n "${NAMESPACE}" -l "${POD_LABEL}" \
-o jsonpath='{.items[0].metadata.name}' 2>/dev/null)
if [[ -z "${pod}" ]]; then
echo "Error: no operator pod found in namespace ${NAMESPACE}"
exit 1
fi
echo "Operator pod: ${pod}"

echo "Sending SIGTERM to operator process to flush coverage data..."
oc exec -n "${NAMESPACE}" "${pod}" -c cert-manager-operator -- /bin/sh -c 'kill -TERM 1' || true

echo "Waiting for container to restart..."
sleep 10
oc wait pod/"${pod}" --for=condition=Ready -n "${NAMESPACE}" --timeout=120s
Comment on lines +94 to +99
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Hardcoded sleep may be insufficient for container restart.

The script waits exactly 10 seconds after sending SIGTERM before checking pod readiness. Container restart time can vary based on image pull policies, resource constraints, and cluster load. This fixed delay creates a race condition that may cause premature readiness checks.

⏱️ Proposed fix to add adaptive wait logic
     echo "Sending SIGTERM to operator process to flush coverage data..."
     oc exec -n "${NAMESPACE}" "${pod}" -c cert-manager-operator -- /bin/sh -c 'kill -TERM 1' || true
 
-    echo "Waiting for container to restart..."
-    sleep 10
-    oc wait pod/"${pod}" --for=condition=Ready -n "${NAMESPACE}" --timeout=120s
+    echo "Waiting for pod to become NotReady (container restarting)..."
+    for i in {1..30}; do
+        if ! oc get pod/"${pod}" -n "${NAMESPACE}" -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' | grep -q "True"; then
+            echo "Pod is restarting..."
+            break
+        fi
+        sleep 1
+    done
+    
+    echo "Waiting for pod to become Ready again..."
+    oc wait pod/"${pod}" --for=condition=Ready -n "${NAMESPACE}" --timeout=120s
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
echo "Sending SIGTERM to operator process to flush coverage data..."
oc exec -n "${NAMESPACE}" "${pod}" -c cert-manager-operator -- /bin/sh -c 'kill -TERM 1' || true
echo "Waiting for container to restart..."
sleep 10
oc wait pod/"${pod}" --for=condition=Ready -n "${NAMESPACE}" --timeout=120s
echo "Sending SIGTERM to operator process to flush coverage data..."
oc exec -n "${NAMESPACE}" "${pod}" -c cert-manager-operator -- /bin/sh -c 'kill -TERM 1' || true
echo "Waiting for pod to become NotReady (container restarting)..."
for i in {1..30}; do
if ! oc get pod/"${pod}" -n "${NAMESPACE}" -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' | grep -q "True"; then
echo "Pod is restarting..."
break
fi
sleep 1
done
echo "Waiting for pod to become Ready again..."
oc wait pod/"${pod}" --for=condition=Ready -n "${NAMESPACE}" --timeout=120s
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@hack/e2e-coverage.sh` around lines 94 - 99, The fixed "sleep 10" after
sending SIGTERM is flaky; replace the static delay with an adaptive wait that
polls pod readiness. Remove the sleep and instead loop (or call oc wait
directly) to check the pod referenced by "${pod}" in "${NAMESPACE}" for
phase=Running and container ready (or use oc wait pod/"${pod}"
--for=condition=Ready --timeout=120s without the prior sleep), retrying until
success or timeout; ensure the logic surrounds the oc exec kill -TERM 1 call and
uses the same pod variable so it handles variable restart times instead of a
fixed 10s pause.


mkdir -p "${coverage_dir}"
echo "Copying coverage data from operator pod..."
oc cp "${NAMESPACE}/${pod}:${GOCOVERDIR_PATH}/." "${coverage_dir}" -c cert-manager-operator

echo "Coverage files:"
ls -la "${coverage_dir}/" 2>/dev/null || true

if ls "${coverage_dir}"/covmeta.* >/dev/null 2>&1; then
echo "Converting coverage data to Go profile format..."
go tool covdata textfmt -i="${coverage_dir}" -o="${coverage_profile}"

echo ""
echo "=== E2E Coverage Summary ==="
go tool covdata percent -i="${coverage_dir}"
echo "============================="
echo ""
echo "Coverage profile: ${coverage_profile} ($(wc -l < "${coverage_profile}") lines)"

if [[ -n "${CODECOV_TOKEN:-}" ]]; then
echo "Uploading to Codecov..."
local codecov_bin="${artifact_dir}/codecov"
curl -sS -o "${codecov_bin}" \
https://uploader.codecov.io/latest/linux/codecov
curl -sS -o "${codecov_bin}.SHA256SUM" \
https://uploader.codecov.io/latest/linux/codecov.SHA256SUM

cd "$(dirname "${codecov_bin}")" && sha256sum -c "$(basename "${codecov_bin}").SHA256SUM" && cd - >/dev/null
chmod +x "${codecov_bin}"
Comment on lines +122 to +128
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Improve error handling in checksum verification.

The directory change for sha256sum verification (line 127) does not properly handle failures. If cd fails or the checksum verification fails, the script continues to cd - which may fail or return to the wrong directory, and the error is suppressed by redirecting to /dev/null.

🔒 Proposed fix for safer checksum verification
-            cd "$(dirname "${codecov_bin}")" && sha256sum -c "$(basename "${codecov_bin}").SHA256SUM" && cd - >/dev/null
+            if ! (cd "$(dirname "${codecov_bin}")" && sha256sum -c "$(basename "${codecov_bin}").SHA256SUM"); then
+                echo "Error: Codecov binary checksum verification failed"
+                exit 1
+            fi
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
curl -sS -o "${codecov_bin}" \
https://uploader.codecov.io/latest/linux/codecov
curl -sS -o "${codecov_bin}.SHA256SUM" \
https://uploader.codecov.io/latest/linux/codecov.SHA256SUM
cd "$(dirname "${codecov_bin}")" && sha256sum -c "$(basename "${codecov_bin}").SHA256SUM" && cd - >/dev/null
chmod +x "${codecov_bin}"
curl -sS -o "${codecov_bin}" \
https://uploader.codecov.io/latest/linux/codecov
curl -sS -o "${codecov_bin}.SHA256SUM" \
https://uploader.codecov.io/latest/linux/codecov.SHA256SUM
if ! (cd "$(dirname "${codecov_bin}")" && sha256sum -c "$(basename "${codecov_bin}").SHA256SUM"); then
echo "Error: Codecov binary checksum verification failed"
exit 1
fi
chmod +x "${codecov_bin}"
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@hack/e2e-coverage.sh` around lines 122 - 128, The checksum verification
currently does a plain cd and then runs sha256sum, which hides failures and
still attempts cd -; change this to run verification in a safe context and abort
on failure: use a subshell or pushd/popd so directory changes cannot leak (e.g.,
(cd "$(dirname "${codecov_bin}")" && sha256sum -c "$(basename
"${codecov_bin}").SHA256SUM")) and check the exit status of sha256sum, exiting
the script on non-zero; only run chmod +x "${codecov_bin}" after the
verification succeeds. Reference: codecov_bin, sha256sum -c, and chmod +x
"${codecov_bin}".


local -a codecov_args=(
--file="${coverage_profile}"
--flags=e2e
--name="E2E Coverage"
--verbose
)

local job_type="${JOB_TYPE:-local}"
if [[ "${job_type}" == "presubmit" ]]; then
echo "Detected presubmit (PR #${PULL_NUMBER:-unknown})"
[[ -n "${PULL_NUMBER:-}" ]] && codecov_args+=(--pr "${PULL_NUMBER}")
[[ -n "${PULL_PULL_SHA:-}" ]] && codecov_args+=(--sha "${PULL_PULL_SHA}")
[[ -n "${PULL_BASE_REF:-}" ]] && codecov_args+=(--branch "${PULL_BASE_REF}")
[[ -n "${REPO_OWNER:-}" && -n "${REPO_NAME:-}" ]] && codecov_args+=(--slug "${REPO_OWNER}/${REPO_NAME}")
elif [[ "${job_type}" == "postsubmit" ]]; then
echo "Detected postsubmit (branch ${PULL_BASE_REF:-unknown})"
[[ -n "${PULL_BASE_SHA:-}" ]] && codecov_args+=(--sha "${PULL_BASE_SHA}")
[[ -n "${PULL_BASE_REF:-}" ]] && codecov_args+=(--branch "${PULL_BASE_REF}")
[[ -n "${REPO_OWNER:-}" && -n "${REPO_NAME:-}" ]] && codecov_args+=(--slug "${REPO_OWNER}/${REPO_NAME}")
else
echo "Local run -- no Prow context, Codecov will auto-detect from git"
fi

"${codecov_bin}" "${codecov_args[@]}" || echo "Warning: Codecov upload failed (non-fatal)"
rm -f "${codecov_bin}" "${codecov_bin}.SHA256SUM"
else
echo "CODECOV_TOKEN not set -- skipping Codecov upload."
echo "Coverage profile saved as artifact: ${coverage_profile}"
fi
else
echo "Warning: No coverage data found in ${coverage_dir}"
echo "The operator may not have been built with coverage instrumentation,"
echo "or it may not have exited cleanly (SIGKILL instead of SIGTERM)."
fi

echo "--- Coverage collection complete ---"
}

case "${1:-}" in
setup)
setup
;;
collect)
collect
;;
*)
echo "Usage: $0 {setup|collect}" >&2
exit 1
;;
esac
27 changes: 27 additions & 0 deletions images/ci/Dockerfile.coverage
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Build the cert-manager-operator binary with coverage instrumentation.
# This mirrors images/ci/Dockerfile but adds Go coverage flags so the binary
# records which lines are executed during E2E tests.
FROM registry.ci.openshift.org/ocp/builder:rhel-9-golang-1.25-openshift-4.21 AS builder

ARG SRC_DIR=/go/src/github.com/openshift/cert-manager-operator
ENV GO_BUILD_TAGS=strictfipsruntime,openssl
ENV GOEXPERIMENT=strictfipsruntime
ENV CGO_ENABLED=1
ENV GOFLAGS=""

WORKDIR $SRC_DIR

COPY . .

RUN go build -tags $GO_BUILD_TAGS \
-cover -covermode=count -coverpkg=./... \
-o cert-manager-operator .

FROM registry.access.redhat.com/ubi9-minimal:9.4
RUN microdnf install -y tar && microdnf clean all
ARG SRC_DIR=/go/src/github.com/openshift/cert-manager-operator
COPY --from=builder $SRC_DIR/cert-manager-operator /usr/bin/cert-manager-operator
RUN mkdir -p /tmp/e2e-cover && chown 65534:65534 /tmp/e2e-cover && chmod 700 /tmp/e2e-cover
USER 65534:65534
ENV GOCOVERDIR=/tmp/e2e-cover
ENTRYPOINT ["/usr/bin/cert-manager-operator"]