diff --git a/internal/seitask/keygen/hd.go b/internal/keygen/hd.go similarity index 100% rename from internal/seitask/keygen/hd.go rename to internal/keygen/hd.go diff --git a/internal/seitask/keygen/hd_test.go b/internal/keygen/hd_test.go similarity index 100% rename from internal/seitask/keygen/hd_test.go rename to internal/keygen/hd_test.go diff --git a/internal/keygen/keygen.go b/internal/keygen/keygen.go new file mode 100644 index 00000000..a9c9e250 --- /dev/null +++ b/internal/keygen/keygen.go @@ -0,0 +1,82 @@ +// Package keygen derives a Sei chain account: a BIP-39 mnemonic + the cosmos +// secp256k1 address at the standard coin-type-118 path, bech32-encoded with the +// "sei" prefix. The full pipeline (entropy → mnemonic → seed → BIP-32 master → +// BIP-44 child → secp256k1 → ripemd160 → bech32) matches `seid keys add` +// byte-for-byte, so a mnemonic generated here imports verbatim into a seid +// keyring. +// +// This is the general, k8s-free derivation primitive. Callers that need to stamp +// the result into a Secret / workflow-vars layer sit on top of it — see +// internal/seitask/keygen for the seitask-runner's Secret writer. +package keygen + +import ( + "crypto/sha256" + "fmt" + + "github.com/btcsuite/btcd/btcec/v2" + "github.com/cosmos/btcutil/bech32" + bip39 "github.com/cosmos/go-bip39" + "golang.org/x/crypto/ripemd160" //nolint:staticcheck // Cosmos address derivation is bound to RIPEMD-160 by protocol. +) + +// cosmosHDPath is the cosmos BIP-44 path, coin type 118. Matches `seid keys add`. +const cosmosHDPath = "m/44'/118'/0'/0/0" + +const bech32AccountPrefix = "sei" + +// SecretMnemonicKey is the conventional Secret data key the mnemonic is stored +// under; downstream pods reference it via secretKeyRef. +const SecretMnemonicKey = "mnemonic" + +// Identity is a derived account: the mnemonic that produced it and its bech32 +// address. The mnemonic is the secret material; treat it accordingly. +type Identity struct { + Mnemonic string + Address string +} + +// Derive generates a 24-word BIP-39 mnemonic and derives the cosmos secp256k1 +// address at m/44'/118'/0'/0/0. +func Derive() (Identity, error) { + // 24 words → 256 bits entropy; matches seid default. + entropy, err := bip39.NewEntropy(256) + if err != nil { + return Identity{}, fmt.Errorf("entropy: %w", err) + } + mnemonic, err := bip39.NewMnemonic(entropy) + if err != nil { + return Identity{}, fmt.Errorf("mnemonic: %w", err) + } + + // BIP-39 PBKDF2 → 64-byte seed; empty passphrase matches seid default. + seed, err := bip39.NewSeedWithErrorChecking(mnemonic, "") + if err != nil { + return Identity{}, fmt.Errorf("seed: %w", err) + } + master, chainCode := computeMasterFromSeed(seed) + privKey, err := derivePrivateKeyForPath(master, chainCode, cosmosHDPath) + if err != nil { + return Identity{}, fmt.Errorf("derive %s: %w", cosmosHDPath, err) + } + _, pub := btcec.PrivKeyFromBytes(privKey) + pubCompressed := pub.SerializeCompressed() + + // Cosmos address = ripemd160(sha256(pubkey_compressed)), bech32-encoded. + sha := sha256.Sum256(pubCompressed) + hasher := ripemd160.New() + if _, err := hasher.Write(sha[:]); err != nil { + return Identity{}, fmt.Errorf("ripemd160: %w", err) + } + addrBytes := hasher.Sum(nil) + + converted, err := bech32.ConvertBits(addrBytes, 8, 5, true) + if err != nil { + return Identity{}, fmt.Errorf("bech32 convert: %w", err) + } + address, err := bech32.Encode(bech32AccountPrefix, converted) + if err != nil { + return Identity{}, fmt.Errorf("bech32 encode: %w", err) + } + return Identity{Mnemonic: mnemonic, Address: address}, nil +} diff --git a/internal/seitask/keygen/keygen.go b/internal/seitask/keygen/keygen.go index 4e75105d..23ddeb91 100644 --- a/internal/seitask/keygen/keygen.go +++ b/internal/seitask/keygen/keygen.go @@ -1,37 +1,25 @@ -// Package keygen implements `seitask keygen`: generate a BIP-39 mnemonic + -// cosmos secp256k1 keypair, write the mnemonic to a per-run Secret named +// Package keygen implements `seitask keygen`: derive a Sei account via the +// general internal/keygen primitive, write the mnemonic to a per-run Secret named // "-", and publish ADMIN_ADDRESS / ADMIN_SECRET_NAME // to workflow-vars. All created resources carry an ownerRef to the parent -// Workflow CR for cascade GC. +// Workflow CR for cascade GC. The key derivation itself lives in +// internal/keygen (k8s-free, reused by the test harness); this package is the +// seitask-runner's Secret/workflow-vars writer on top of it. package keygen import ( "context" - "crypto/sha256" "fmt" - "github.com/btcsuite/btcd/btcec/v2" - "github.com/cosmos/btcutil/bech32" - bip39 "github.com/cosmos/go-bip39" - "golang.org/x/crypto/ripemd160" //nolint:staticcheck // Cosmos address derivation is bound to RIPEMD-160 by protocol. - corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" + keyderive "github.com/sei-protocol/sei-k8s-controller/internal/keygen" "github.com/sei-protocol/sei-k8s-controller/internal/taskruntime" ) -// cosmos BIP-44 path, coin type 118. Matches `seid keys add` so mnemonics -// generated here import verbatim into a seid keyring. -const cosmosHDPath = "m/44'/118'/0'/0/0" - -const bech32AccountPrefix = "sei" - -// SecretMnemonicKey is the data key downstream pods reference via secretKeyRef. -const SecretMnemonicKey = "mnemonic" - const fieldOwner client.FieldOwner = "seitask-keygen" // Params carries the typed inputs to Run. @@ -79,7 +67,7 @@ func Run(ctx context.Context, c client.Client, p Params) (Result, error) { return Result{}, taskruntime.Infra(fmt.Errorf("reading existing Secret %q: %w", secretName, err)) } - mnemonic, address, err := deriveIdentity() + id, err := keyderive.Derive() if err != nil { return Result{}, taskruntime.Infra(fmt.Errorf("deriving identity: %w", err)) } @@ -92,11 +80,11 @@ func Run(ctx context.Context, c client.Client, p Params) (Result, error) { }, Type: corev1.SecretTypeOpaque, Data: map[string][]byte{ - SecretMnemonicKey: []byte(mnemonic), + keyderive.SecretMnemonicKey: []byte(id.Mnemonic), // address is duplicated into the Secret so a re-run of keygen // can reuse the existing identity without re-deriving from the // mnemonic (the Secret is the source of truth for both). - "address": []byte(address), + "address": []byte(id.Address), }, } if err := c.Create(ctx, secret, fieldOwner); err != nil { @@ -108,10 +96,10 @@ func Run(ctx context.Context, c client.Client, p Params) (Result, error) { return Result{}, taskruntime.Infra(fmt.Errorf("creating Secret %q: %w", secretName, err)) } - if err := writeWorkflowVars(ctx, c, p.Workflow, address, secretName); err != nil { + if err := writeWorkflowVars(ctx, c, p.Workflow, id.Address, secretName); err != nil { return Result{}, err } - return Result{SecretName: secretName, Address: address}, nil + return Result{SecretName: secretName, Address: id.Address}, nil } func writeWorkflowVars(ctx context.Context, c client.Client, w taskruntime.WorkflowIdentity, address, secretName string) error { @@ -125,50 +113,3 @@ func writeWorkflowVars(ctx context.Context, c client.Client, w taskruntime.Workf taskruntime.KeyAdminSecretName: secretName, }) } - -// deriveIdentity generates a 24-word BIP-39 mnemonic and derives the cosmos -// secp256k1 address at m/44'/118'/0'/0/0. The full pipeline (entropy → -// mnemonic → seed → BIP-32 master → BIP-44 child → secp256k1 → ripemd160 -// → bech32) matches `seid keys add` byte-for-byte. -func deriveIdentity() (mnemonic, address string, err error) { - // 24 words → 256 bits entropy; matches seid default. - entropy, err := bip39.NewEntropy(256) - if err != nil { - return "", "", fmt.Errorf("entropy: %w", err) - } - mnemonic, err = bip39.NewMnemonic(entropy) - if err != nil { - return "", "", fmt.Errorf("mnemonic: %w", err) - } - - // BIP-39 PBKDF2 → 64-byte seed; empty passphrase matches seid default. - seed, err := bip39.NewSeedWithErrorChecking(mnemonic, "") - if err != nil { - return "", "", fmt.Errorf("seed: %w", err) - } - master, chainCode := computeMasterFromSeed(seed) - privKey, err := derivePrivateKeyForPath(master, chainCode, cosmosHDPath) - if err != nil { - return "", "", fmt.Errorf("derive %s: %w", cosmosHDPath, err) - } - _, pub := btcec.PrivKeyFromBytes(privKey) - pubCompressed := pub.SerializeCompressed() - - // Cosmos address = ripemd160(sha256(pubkey_compressed)), bech32-encoded. - sha := sha256.Sum256(pubCompressed) - hasher := ripemd160.New() - if _, err := hasher.Write(sha[:]); err != nil { - return "", "", fmt.Errorf("ripemd160: %w", err) - } - addrBytes := hasher.Sum(nil) - - converted, err := bech32.ConvertBits(addrBytes, 8, 5, true) - if err != nil { - return "", "", fmt.Errorf("bech32 convert: %w", err) - } - address, err = bech32.Encode(bech32AccountPrefix, converted) - if err != nil { - return "", "", fmt.Errorf("bech32 encode: %w", err) - } - return mnemonic, address, nil -} diff --git a/internal/seitask/keygen/keygen_test.go b/internal/seitask/keygen/keygen_test.go index 34f25cb3..4dbe69bc 100644 --- a/internal/seitask/keygen/keygen_test.go +++ b/internal/seitask/keygen/keygen_test.go @@ -11,6 +11,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" + keyderive "github.com/sei-protocol/sei-k8s-controller/internal/keygen" "github.com/sei-protocol/sei-k8s-controller/internal/taskruntime" ) @@ -58,7 +59,7 @@ func TestRun_CreatesSecretAndWorkflowVars(t *testing.T) { if err := c.Get(context.Background(), types.NamespacedName{Namespace: testNamespace, Name: testSecretName}, secret); err != nil { t.Fatalf("Get Secret: %v", err) } - mnemonic, ok := secret.Data[SecretMnemonicKey] + mnemonic, ok := secret.Data[keyderive.SecretMnemonicKey] if !ok || len(mnemonic) == 0 { t.Fatalf("mnemonic missing from Secret") } diff --git a/sdk/sei/provider.go b/sdk/sei/provider.go index 458cdeac..7a6125d3 100644 --- a/sdk/sei/provider.go +++ b/sdk/sei/provider.go @@ -43,6 +43,7 @@ type NodeHandle interface { Namespace() string EVMRPC() string TendermintRPC() string + REST() string WaitReady(ctx context.Context) error Delete(ctx context.Context) error Object() any // mode-specific raw resource (k8s: *v1alpha1.SeiNode) diff --git a/sdk/sei/provider/k8s/handle.go b/sdk/sei/provider/k8s/handle.go index 06b6fde8..4ec504ab 100644 --- a/sdk/sei/provider/k8s/handle.go +++ b/sdk/sei/provider/k8s/handle.go @@ -123,6 +123,13 @@ func (h *nodeHandle) TendermintRPC() string { return h.node.Status.Endpoint.TendermintRpc } +func (h *nodeHandle) REST() string { + if h.node == nil || h.node.Status.Endpoint == nil { + return "" + } + return h.node.Status.Endpoint.TendermintRest +} + // WaitReady blocks until the SeiNode reaches PhaseRunning and a light serve-probe // passes, failing fast on PhaseFailed. The caller's ctx is the budget. func (h *nodeHandle) WaitReady(ctx context.Context) error { diff --git a/sdk/sei/readiness.go b/sdk/sei/readiness.go index e7acd221..c594e2f7 100644 --- a/sdk/sei/readiness.go +++ b/sdk/sei/readiness.go @@ -146,6 +146,19 @@ func WaitEVMServing(ctx context.Context, hc *http.Client, evmRPC string) error { }) } +// WaitRESTServing blocks until restURL answers the Cosmos REST node-info endpoint +// with HTTP 200 — proof the LCD listener is bound and serving. A node's status +// advertises its REST URL as soon as the endpoint is composed, but the LCD API +// binds later in seid boot than the EVM listener, so a freshly-Running node can +// advertise REST before it serves; this gates on an actual answer. hc may be nil. +func WaitRESTServing(ctx context.Context, hc *http.Client, restURL string) error { + url := restURL + "/cosmos/base/tendermint/v1beta1/node_info" + return pollUntil(ctx, url, func(ctx context.Context) bool { + _, ok := getJSON(ctx, hc, http.MethodGet, url, "") + return ok + }) +} + // pollUntil ticks done() every probeInterval until it returns true or ctx fires, // running once immediately. A stdlib poll loop — no apimachinery in core. func pollUntil(ctx context.Context, what string, done func(context.Context) bool) error { diff --git a/sdk/sei/sei.go b/sdk/sei/sei.go index 87f84564..a7fa3b21 100644 --- a/sdk/sei/sei.go +++ b/sdk/sei/sei.go @@ -174,6 +174,10 @@ func (n *Node) EVMRPC() string { return n.handle.EVMRPC() } // TendermintRPC is the node's Tendermint RPC URL off .status. func (n *Node) TendermintRPC() string { return n.handle.TendermintRPC() } +// REST is the node's Cosmos REST (LCD) URL off .status; "" unless the node +// serves REST (fullNode RPCs do; bare validators do not unless configured). +func (n *Node) REST() string { return n.handle.REST() } + // WaitReady blocks until the node reaches the Running phase and a light serve- // probe passes, or the caller's ctx fires (IsTimeout on a deadline). func (n *Node) WaitReady(ctx context.Context) error { return n.handle.WaitReady(ctx) } diff --git a/test/integration/.xreview/release-suite.md b/test/integration/.xreview/release-suite.md new file mode 100644 index 00000000..03892cb0 --- /dev/null +++ b/test/integration/.xreview/release-suite.md @@ -0,0 +1,40 @@ +# xreview ledger — TestRelease + keygen refactor (WS-I) + +Class: component (integration suite + internal package refactor + additive SDK surface) +Tier: T2 + +Target: `test/integration/release_test.go`, `internal/keygen/*`, `internal/seitask/keygen/keygen.go`, `sdk/sei` Node.REST() +Artifact: branch `feat/test-release` + +## Round 1 + +State: RESOLVED +OpenFindings: 0 +Convergence: independent (4 blinded reviewers) +Blinded: yes +Dissenter: sei-network-specialist (DISSENT → resolved) + +Slate: sei-network-specialist (dissenter), systems-engineer, kubernetes-specialist, idiomatic-reviewer. + +### Findings + +| Finding | Status | Evidence | Raised by | Resolution | +|---|---|---|---|---| +| Dropped envFrom / RPC_EVM_RPC_LIST | **MISMATCH → FIXED** | Scenario injects env via `envFrom: workflow-vars CM` ∪ explicit list; the CM carries RPC_EVM_RPC_LIST (+ RPC_*/CHAIN_ID/ADMIN_ADDRESS) with no explicit equivalent. A harness sub-case reading it would skip silently → exit 0 false-pass. | dissenter (headline) | Job env now reproduces the scenario superset: the RPC_*/CHAIN_ID/ADMIN_ADDRESS CM names alongside the SEI_* explicit names. | +| Verdict is exit-0-only | **MISSING → FIXED** | No record of which sub-cases ran; scenario had upload-report (S3 audit). Strictly less observable than the artifact. | dissenter | Log the harness pod-log tail on completion (success too), so a skip-but-exit-0 is forensically visible. (Full S3/report = the deferred telemetry component.) | +| Job missing securityContext/resources/ttl | **MISMATCH → FIXED** | seiload_job.yaml.tmpl sets runAsNonRoot/seccomp/drop-ALL/readOnlyRootFS + resources + ttl; releaseJob set none → restricted-PSS admission could reject. | k8s | releaseJob now matches the seiload baseline (security context, resources, ttlSecondsAfterFinished). | +| REST handed unprobed (cold-start) | **MISSING → FIXED** | rest=="" is a status-string check, not a serve-probe; LCD binds later than the EVM listener → cold-REST window. | systems, k8s, dissenter | Added sei.WaitRESTServing (GET /cosmos/base/tendermint/v1beta1/node_info), symmetric with WaitEVMServing; replaces the bare empty-check. | +| waitJob drops podLogTail on ctx/signal | **MISMATCH → FIXED** | ctx.Done() branch failed with only ctx.Err() — no harness log on Ctrl-C/SIGTERM/timeout. | systems | ctx.Done() branch now tails the pod log (fresh ctx); messages genericized from "seiload job" to "job". | +| releaseBaseConfig handed un-cloned | **flag → FIXED** | provision maps.Clone's config; release passed the package-global directly. | systems | maps.Clone at the network create. | +| REST on by default for fullNode | COMPATIBLE | Smoke-confirmed (`...:1317` populated); ModeFull → REST.Enable=true, validators → false (explains the upgrade-suite gap). | dissenter (refutes own attack) | — | +| Secret material handling | COMPATIBLE | secretKeyRef (not plain env), never logged, Data not StringData. | systems | — | +| keygen refactor behavior | COMPATIBLE | Pure extraction verified line-by-line (entropy/path/pipeline/idempotency/ownerRef preserved). | k8s, idiom | — | +| Single RPC + EVM-legacy + funding + namespace | COMPATIBLE | One-node filter consistency correct; 1e12 usei matches scenario; co-located. | dissenter, k8s | — | +| Suite SA needs secrets RBAC | RESOLVED (Brandon-authorized) | createMnemonicSecret needs secrets create/delete; smoke confirmed Forbidden without it. | k8s, systems | Granted to the harness Role; the committed manifest lands in the cutover. | + +### Idiom addendum (RATIFY) +Clean. The Go-built Job (vs seiload's template) is principled (shape owned by the suite, not platform) — do NOT harmonize. Node.REST() additive-safe, mirrors EVMRPC/TendermintRPC. Nits fixed: keygen.go:9 typo. SecretMnemonicKey placement vet-and-rejected (one shared const). + +### Deferred +- Full upload-report / S3 audit trail = the deferred telemetry/report component (PromQL punted to last); the pod-log tail is the interim observability. +- Secret leak on SIGKILL until the label-GC sweep ships (cutover) — documented; mnemonic is for a throwaway chain (DeletionDelete cascade). diff --git a/test/integration/release_test.go b/test/integration/release_test.go new file mode 100644 index 00000000..68809eb8 --- /dev/null +++ b/test/integration/release_test.go @@ -0,0 +1,286 @@ +//go:build integration + +package integration + +import ( + "context" + "maps" + "net/http" + "os/signal" + "syscall" + "testing" + "time" + + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + + "github.com/sei-protocol/sei-k8s-controller/internal/keygen" + "github.com/sei-protocol/sei-k8s-controller/sdk/sei" +) + +// releaseAdminBalance funds the admin account in genesis so the release-test +// harness can sign and pay for the txs it issues. Ported from the release-test +// scenario's validator template. +const releaseAdminBalance = "1000000000000usei" + +// releaseBaseConfig is the seid config the release chain runs with: the memiavl +// storage baseline (the nightly image rejects the cosmos_only default) plus kv tx +// indexing (the harness queries txs) and a short mempool TTL. Ported from the +// release-test scenario's validator + rpc configOverrides. +var releaseBaseConfig = mergeConfig(memiavlStorageConfig, map[string]string{ + "tx_index.indexer": "kv", + "mempool.ttl_duration": "60s", +}) + +// releaseLegacyEVMAPIs are the legacy sei_* EVM APIs the release-test's stateful +// sequences exercise (sei_newFilter / sei_getFilterLogs need a single consistent +// filter-store, which is why the suite runs exactly one RPC node). +const releaseLegacyEVMAPIs = "sei_getLogs,sei_getBlockByNumber,sei_getBlockByHash,sei_getSeiAddress," + + "sei_getEVMAddress,sei_getCosmosTx,sei_getEvmTx,sei_newFilter,sei_getFilterLogs" + +// releaseRPCConfig overlays the follower-only knobs on releaseBaseConfig: a low +// RPC lag threshold and the legacy EVM APIs above. +var releaseRPCConfig = map[string]string{ + "network.rpc.lag_threshold": "2", + "evm.enabled_legacy_sei_apis": releaseLegacyEVMAPIs, +} + +// TestRelease drives the release-validation scenario: provision a 4-validator +// chain + one EVM-serving RPC follower, generate a funded admin account, and run +// the external release-test image against the RPC node as a Job. The release-test +// image owns the functional assertions (TEST_TARGET=chain-agnostic); the suite's +// job is to stand up the chain, hand the harness its endpoints + admin key, and +// gate on the Job's exit code. Replaces the Chaos-Mesh Workflow's keygen + +// provision + run steps with statement order + the SDK. +// +// One RPC node (not the load suite's two) is deliberate: the harness runs +// stateful EVM-filter and send-then-wait sequences that need one consistent +// mempool + filter-store view. +// +// Inputs (env): SEI_CHAIN_ID, SEID_IMAGE [required], RELEASE_TEST_IMAGE +// (the external harness) [required]; SEI_NAMESPACE [optional]. Run with +// -test.timeout 0 (see TestBenchmark). +func TestRelease(t *testing.T) { + requireCluster(t) + chainID := mustEnv(t, "SEI_CHAIN_ID") + seid := mustEnv(t, "SEID_IMAGE") + releaseImage := mustEnv(t, "RELEASE_TEST_IMAGE") + ns := envOr("SEI_NAMESPACE", "") + runLabels := map[string]string{runLabelKey: chainID} + + // Generous envelope: the external chain-agnostic harness is a large suite + // (each test file re-creates + funds + associates users) and runs well past + // half an hour against a single RPC node; size the ctx above the Job deadline. + ctx, cancel := context.WithTimeout(context.Background(), 80*time.Minute) + defer cancel() + ctx, stop := signal.NotifyContext(ctx, syscall.SIGTERM, syscall.SIGINT) + defer stop() + + c := openClient(ctx, t) + cs := clientset(t) + + // Admin account: derive a funded identity the release-test harness signs with. + admin, err := keygen.Derive() + if err != nil { + t.Fatalf("derive admin key: %v", err) + } + + // Provision: 4 validators with the admin funded in genesis, + 1 RPC follower. + ch := &chain{} + cleanupChain(t, ch) + net, err := c.CreateNetwork(ctx, sei.NetworkSpec{ + Name: chainID, + Namespace: ns, + Image: seid, + Validators: 4, + Labels: runLabels, + Config: maps.Clone(releaseBaseConfig), // package-global; clone before handing to the SDK + Accounts: []sei.GenesisAccount{{Address: admin.Address, Balance: releaseAdminBalance}}, + DeletionPolicy: sei.DeletionDelete, + }) + if err != nil { + t.Fatalf("create network %q: %v", chainID, err) + } + ch.network = net + if err := net.WaitReady(ctx); err != nil { + t.Fatalf("network %q ready: %v", chainID, err) + } + t.Logf("network %s: ready (4 validators, admin %s funded)", chainID, admin.Address) + + rpcName := rpcNodeName(chainID, 0) + node, err := c.CreateNode(ctx, sei.NodeSpec{ + Name: rpcName, + Network: chainID, + Namespace: ns, + Image: seid, + Labels: runLabels, + Config: mergeConfig(releaseBaseConfig, releaseRPCConfig), + }) + if err != nil { + t.Fatalf("create rpc node %q: %v", rpcName, err) + } + ch.rpcNodes = append(ch.rpcNodes, node) + if err := node.WaitReady(ctx); err != nil { + t.Fatalf("rpc node %q running: %v", rpcName, err) + } + hc := &http.Client{Timeout: 10 * time.Second} + if err := sei.WaitCaughtUp(ctx, hc, node.TendermintRPC()); err != nil { + t.Fatalf("rpc node %q caught up: %v", rpcName, err) + } + if err := sei.WaitEVMServing(ctx, hc, node.EVMRPC()); err != nil { + t.Fatalf("rpc node %q EVM serving: %v", rpcName, err) + } + rest := node.REST() + if rest == "" { + t.Fatalf("rpc node %q exposes no REST endpoint (release-test needs SEI_REST_ENDPOINT)", rpcName) + } + // The status advertises REST as soon as the endpoint is composed, but the LCD + // listener binds later than the EVM one — probe it actually serves before + // handing the URL to the harness, so a cold REST surfaces here, not mid-test. + if err := sei.WaitRESTServing(ctx, hc, rest); err != nil { + t.Fatalf("rpc node %q REST serving: %v", rpcName, err) + } + t.Logf("rpc node %s: caught up + EVM serving + REST at %s", rpcName, rest) + + // Hand the admin mnemonic to the harness via a Secret (secretKeyRef), labeled + // for the GC sweep and deleted on cleanup. + secretName := "admin-" + chainID + createMnemonicSecret(ctx, t, cs, net.Namespace(), secretName, runLabels, admin.Mnemonic) + + // Run the external release-test image as a Job; its exit code is the verdict. + job := releaseJob(releaseParams{ + name: "release-test-" + chainID, + namespace: net.Namespace(), + image: releaseImage, + runID: chainID, + chainID: chainID, + adminAddr: admin.Address, + secretName: secretName, + tmRPC: node.TendermintRPC(), + evmRPC: node.EVMRPC(), + rest: rest, + }) + if _, err := cs.BatchV1().Jobs(net.Namespace()).Create(ctx, job, metav1.CreateOptions{}); err != nil { + t.Fatalf("create release-test job: %v", err) + } + t.Cleanup(func() { + delCtx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) + defer cancel() + bg := metav1.DeletePropagationBackground + _ = cs.BatchV1().Jobs(net.Namespace()).Delete(delCtx, job.Name, metav1.DeleteOptions{PropagationPolicy: &bg}) + }) + t.Logf("release-test job launched (%s)", releaseImage) + + waitJob(ctx, t, cs, net.Namespace(), job.Name) + // Archive the harness output even on success: exit 0 alone doesn't show which + // sub-cases ran, so a skip-but-pass is otherwise invisible (the scenario's + // upload-report served this; an S3 report is the deferred telemetry step). + t.Logf("release-test job completed; harness log tail:\n%s", podLogTail(ctx, cs, net.Namespace(), job.Name)) + + // The chain stayed live through the release suite: the follower is still + // caught up (it can't catch up to a halted chain, so this covers quorum). + if err := sei.WaitCaughtUp(ctx, hc, node.TendermintRPC()); err != nil { + t.Errorf("post-release %s not caught up: %v", rpcName, err) + } + t.Logf("chain live post-release — TestRelease OK") +} + +// createMnemonicSecret writes the admin mnemonic to a Secret the release-test pod +// reads via secretKeyRef. Labeled for the GC sweep; deleted on cleanup. (The +// seitask-runner stamps an ownerRef instead — the harness uses the run label + +// t.Cleanup, matching how it provisions everything else.) +func createMnemonicSecret( + ctx context.Context, t *testing.T, cs *kubernetes.Clientset, + ns, name string, labels map[string]string, mnemonic string, +) { + t.Helper() + sec := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: ns, Labels: labels}, + Type: corev1.SecretTypeOpaque, + Data: map[string][]byte{keygen.SecretMnemonicKey: []byte(mnemonic)}, + } + if _, err := cs.CoreV1().Secrets(ns).Create(ctx, sec, metav1.CreateOptions{}); err != nil { + t.Fatalf("create mnemonic secret %q: %v", name, err) + } + t.Cleanup(func() { + delCtx, cancel := context.WithTimeout(context.Background(), time.Minute) + defer cancel() + _ = cs.CoreV1().Secrets(ns).Delete(delCtx, name, metav1.DeleteOptions{}) + }) +} + +// releaseParams are the per-run inputs to the release-test Job. +type releaseParams struct { + name, namespace, image, runID string + chainID, adminAddr string + secretName string + tmRPC, evmRPC, rest string +} + +// releaseJob builds the release-test Job: the external harness image, fed the +// chain endpoints + admin identity, run once (no retry) with a self-terminating +// deadline. Resources + ttl match the scenario's run-release-test step (which the +// nightly — an unenforced-PSS namespace — runs without a securityContext, so this +// stays faithful rather than imposing one the harness image may not tolerate). +func releaseJob(p releaseParams) *batchv1.Job { + backoff := int32(0) + deadline := int64(60 * 60) // the chain-agnostic harness runs >35m against one RPC node; generous cap + ttl := int32(86400) // GC the finished Job after a day (matches seiload) + return &batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: p.name, + Namespace: p.namespace, + Labels: map[string]string{runLabelKey: p.runID}, + }, + Spec: batchv1.JobSpec{ + BackoffLimit: &backoff, + ActiveDeadlineSeconds: &deadline, + TTLSecondsAfterFinished: &ttl, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{runLabelKey: p.runID}}, + Spec: corev1.PodSpec{ + RestartPolicy: corev1.RestartPolicyNever, + Containers: []corev1.Container{{ + Name: "release-test", + Image: p.image, + // The scenario projects the workflow-vars CM (RPC_*/CHAIN_ID/ + // ADMIN_ADDRESS) via envFrom ON TOP of the explicit SEI_* list; + // reproduce that superset so a harness sub-case reading e.g. + // RPC_EVM_RPC_LIST isn't silently unset (a skip-but-exit-0). + Env: []corev1.EnvVar{ + {Name: "TEST_TARGET", Value: "chain-agnostic"}, + {Name: "SEI_CHAIN_ID", Value: p.chainID}, + {Name: "SEI_ADMIN_ADDRESS", Value: p.adminAddr}, + {Name: "SEI_TENDERMINT_RPC", Value: p.tmRPC}, + {Name: "SEI_EVM_JSON_RPC", Value: p.evmRPC}, + {Name: "SEI_REST_ENDPOINT", Value: p.rest}, + // workflow-vars CM superset (the scenario's envFrom). + {Name: "CHAIN_ID", Value: p.chainID}, + {Name: "ADMIN_ADDRESS", Value: p.adminAddr}, + {Name: "RPC_TM_RPC", Value: p.tmRPC}, + {Name: "RPC_EVM_RPC", Value: p.evmRPC}, + {Name: "RPC_EVM_RPC_LIST", Value: p.evmRPC}, // single RPC node → one-element list + {Name: "RPC_REST", Value: p.rest}, + {Name: "SEI_ADMIN_MNEMONIC", ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{Name: p.secretName}, + Key: keygen.SecretMnemonicKey, + }, + }}, + }, + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("500m"), + corev1.ResourceMemory: resource.MustParse("1Gi"), + }, + Limits: corev1.ResourceList{corev1.ResourceMemory: resource.MustParse("2Gi")}, + }, + }}, + }, + }, + }, + } +} diff --git a/test/integration/seiload_test.go b/test/integration/seiload_test.go index b4270ce6..06a93c68 100644 --- a/test/integration/seiload_test.go +++ b/test/integration/seiload_test.go @@ -185,13 +185,19 @@ func waitJob(ctx context.Context, t *testing.T, cs *kubernetes.Clientset, ns, na return } if cond.Type == batchv1.JobFailed && cond.Status == corev1.ConditionTrue { - t.Fatalf("seiload job %q failed: %s\n--- seiload pod log (tail) ---\n%s", + t.Fatalf("job %q failed: %s\n--- pod log (tail) ---\n%s", name, cond.Message, podLogTail(ctx, cs, ns, name)) } } select { case <-ctx.Done(): - t.Fatalf("seiload job %q did not finish before deadline: %v", name, ctx.Err()) + // The suite ctx fired (deadline or SIGTERM) — grab the pod log on a + // fresh ctx (the suite ctx is already dead) so the failure carries the + // job's last output, not just "deadline". + logCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + tail := podLogTail(logCtx, cs, ns, name) + cancel() + t.Fatalf("job %q did not finish before deadline: %v\n--- pod log (tail) ---\n%s", name, ctx.Err(), tail) case <-tick.C: } }