From 49d10efea830f55f6486f4cc32b1c5a81ea998b0 Mon Sep 17 00:00:00 2001 From: Steven Normore Date: Sat, 28 Mar 2026 15:49:58 -0400 Subject: [PATCH] client: support incremental multicast group subscription Remove the guard in the CLI that blocked `doublezero connect multicast` when a multicast service was already running. The onchain subscribe instruction, CLI's find_or_create_user_and_subscribe, and daemon's InfraEqual + UpdateGroups path already support incremental group additions without tearing down the tunnel. Also improve e2e test ergonomics: - Add Make targets: test-debug, test-nobuild, test-keep, test-cleanup - Rename DZ_E2E_DEBUG env var to DEBUG for consistency with shreds repo - Update dev/e2e-test.sh and dev/e2e-until-fail.sh to use Make - Update CLAUDE.md and DEVELOPMENT.md with new Make target usage --- CHANGELOG.md | 1 + CLAUDE.md | 17 ++++++-- DEVELOPMENT.md | 32 ++++++++++------ Makefile | 41 ++++++++++++++++++-- client/doublezero/src/command/connect.rs | 17 -------- dev/e2e-test.sh | 6 +-- dev/e2e-until-fail.sh | 2 +- e2e/Makefile | 49 +++++++++++++++++------- e2e/main_test.go | 28 +++++++++++++- e2e/multicast_test.go | 18 +++++++-- e2e/qa_multicast_test.go | 29 +++++++------- 11 files changed, 165 insertions(+), 75 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a8f6592bf9..999ff521f4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ All notable changes to this project will be documented in this file. ### Changes - CLI + - Allow incremental multicast group addition without disconnecting - Reset SIGPIPE to SIG_DFL at the start of main() in all 3 CLI binaries (doublezero, doublezero-geolocation, doublezero-admin) so the process exits silently like standard CLI tools - SDK - Add Go SDK for shred subscription program with read-only account deserialization (epoch state, seat assignments, pricing, settlement, validator client rewards), PDA derivation helpers, RPC fetchers, compatibility tests, and a fetch example CLI diff --git a/CLAUDE.md b/CLAUDE.md index cb70c4a153..0c7a7e6107 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -257,11 +257,20 @@ Only use `dev/dzctl destroy -y` when you need a completely clean slate (e.g., le ```bash # Run a specific test (preferred) -go test -tags e2e -run TestE2E_Multicast_Publisher -v -count=1 ./e2e/... +make e2e-test RUN=TestE2E_Multicast_Publisher -# Run all tests (requires high-memory machine) -dev/e2e-test.sh +# Run with debug logging +make e2e-test-debug RUN=TestE2E_Multicast_Publisher + +# Skip docker image rebuild +make e2e-test-nobuild RUN=TestE2E_Multicast_Publisher # Keep containers after test completion/failure for debugging -TESTCONTAINERS_RYUK_DISABLED=true go test -tags e2e -run TestE2E_Multicast_Publisher -v -count=1 ./e2e/... +make e2e-test-keep RUN=TestE2E_Multicast_Publisher + +# Run all tests (requires high-memory machine) +make e2e-test + +# Clean up leftover containers +make e2e-test-cleanup ``` diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 9a85a00a0e..22dd19abd4 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -55,23 +55,31 @@ The required image (`ghcr.io/malbeclabs/ceos:4.33.1F`) will be pulled automatica End-to-end tests exercise the full DoubleZero stack — smartcontracts, controller, activator, client, and device agents — all running in isolated Docker containers. ```bash -# Run a specific E2E test directly -cd e2e/ -go test -tags e2e -v -run TestE2E_MultiClient +# Run a specific test +make e2e-test RUN=TestE2E_MultiClient -# Or use the helper script -dev/e2e-test.sh TestE2E_MultiClient +# Run with debug logging +make e2e-test-debug RUN=TestE2E_MultiClient + +# Skip docker image rebuild (faster iteration) +make e2e-test-nobuild RUN=TestE2E_MultiClient + +# Keep containers after test for debugging +make e2e-test-keep RUN=TestE2E_MultiClient + +# Both: skip rebuild + keep containers +make e2e-test-keep-nobuild RUN=TestE2E_MultiClient + +# Clean up leftover containers from previous runs +make e2e-test-cleanup + +# Run all tests (requires high-memory machine) +make e2e-test ``` > ⚠️ Note: > -> -> E2E tests are resource-intensive. It’s recommended to run them individually or with low parallelism: -> -> ```bash -> go test -tags e2e -v -parallel=1 -timeout=20m -> ``` -> +> E2E tests are resource-intensive. It’s recommended to run them individually. > Running all tests together may require at least 64 GB of memory available to Docker. > diff --git a/Makefile b/Makefile index b4599a0196..011fa07be9 100644 --- a/Makefile +++ b/Makefile @@ -198,15 +198,48 @@ generate-fixtures: # ----------------------------------------------------------------------------- # E2E targets +# +# Usage: +# make e2e-test # run all tests +# make e2e-test RUN=TestE2E_Multicast # run a specific test +# make e2e-test-debug RUN=TestE2E_Multicast # with debug logging +# make e2e-test-nobuild # skip docker image build +# make e2e-test-keep # keep containers after test +# make e2e-test-keep-nobuild # both +# make e2e-test-cleanup # remove leftover containers # ----------------------------------------------------------------------------- -.PHONY: e2e-test -e2e-test: - cd e2e && $(MAKE) test - .PHONY: e2e-build e2e-build: cd e2e && $(MAKE) build +.PHONY: e2e-build-debug +e2e-build-debug: + cd e2e && $(MAKE) build-debug + +.PHONY: e2e-test +e2e-test: + cd e2e && $(MAKE) test $(if $(RUN),RUN=$(RUN)) + +.PHONY: e2e-test-debug +e2e-test-debug: + cd e2e && $(MAKE) test-debug $(if $(RUN),RUN=$(RUN)) + +.PHONY: e2e-test-nobuild +e2e-test-nobuild: + cd e2e && $(MAKE) test-nobuild $(if $(RUN),RUN=$(RUN)) + +.PHONY: e2e-test-keep +e2e-test-keep: + cd e2e && $(MAKE) test-keep $(if $(RUN),RUN=$(RUN)) + +.PHONY: e2e-test-keep-nobuild +e2e-test-keep-nobuild: + cd e2e && $(MAKE) test-keep-nobuild $(if $(RUN),RUN=$(RUN)) + +.PHONY: e2e-test-cleanup +e2e-test-cleanup: + cd e2e && $(MAKE) test-cleanup + # ----------------------------------------------------------------------------- # Build programs for specific environments # ----------------------------------------------------------------------------- diff --git a/client/doublezero/src/command/connect.rs b/client/doublezero/src/command/connect.rs index b866a49435..cb5ea306bf 100644 --- a/client/doublezero/src/command/connect.rs +++ b/client/doublezero/src/command/connect.rs @@ -204,23 +204,6 @@ impl ProvisioningCliCommand { client_ip: Ipv4Addr, spinner: &ProgressBar, ) -> eyre::Result<()> { - // Check if the daemon already has a multicast service running. The daemon - // does not support updating an existing service — both publisher and subscriber - // roles must be specified in a single connect command. - if let Ok(statuses) = controller.status().await { - if statuses.iter().any(|s| { - s.user_type - .as_ref() - .is_some_and(|t| t.eq_ignore_ascii_case("multicast")) - }) { - eyre::bail!( - "A multicast service is already running. Disconnect first with \ - `doublezero disconnect multicast`, then reconnect with all desired \ - groups in a single command (e.g. --publish and --subscribe)." - ); - } - } - let mcast_groups = client.list_multicastgroup(ListMulticastGroupCommand)?; // Resolve pub group codes to pubkeys diff --git a/dev/e2e-test.sh b/dev/e2e-test.sh index 4f01f2885b..8de15f6e90 100755 --- a/dev/e2e-test.sh +++ b/dev/e2e-test.sh @@ -6,10 +6,8 @@ workspace_dir=$(dirname "${script_dir}") test=${1:-} -cd "${workspace_dir}/e2e" - if [ -n "${test}" ]; then - go test -v -tags e2e -run="${test}" -timeout 20m + make -C "${workspace_dir}/e2e" test RUN="${test}" else - make test verbose + make -C "${workspace_dir}/e2e" test fi diff --git a/dev/e2e-until-fail.sh b/dev/e2e-until-fail.sh index f674404d81..f1e5b095ef 100755 --- a/dev/e2e-until-fail.sh +++ b/dev/e2e-until-fail.sh @@ -57,7 +57,7 @@ while :; do fi set +e - "${workspace_dir}/dev/e2e-test.sh" "${target_test}" + make -C "${workspace_dir}/e2e" test-nobuild $(if [ -n "$target_test" ]; then echo "RUN=$target_test"; fi) ret_val=$? set -e diff --git a/e2e/Makefile b/e2e/Makefile index 783e23baf9..fa4b759562 100644 --- a/e2e/Makefile +++ b/e2e/Makefile @@ -6,8 +6,19 @@ GIT_SHA:=`git rev-parse --short HEAD` # Enabled by default on mac, but not always on linux. export DOCKER_BUILDKIT=1 +.PHONY: build build-debug test test-debug test-nobuild test-keep test-keep-nobuild test-cleanup + +# ----------------------------------------------------------------------------- +# Build # ----------------------------------------------------------------------------- -# Run the e2e tests. +build: + go run ./cmd/dzctl/main.go build + +build-debug: + go run ./cmd/dzctl/main.go build -v + +# ----------------------------------------------------------------------------- +# Test # # This will build the docker images first, so it's not necessary to run `build` # before running `test`. @@ -15,25 +26,35 @@ export DOCKER_BUILDKIT=1 # We configure -timeout=20m for the case where the user is running the tests # sequentially. This should be more than enough time for the tests to run in # that case, and leave room in case more tests are added in the future. +# +# Usage: +# make test # run all tests +# make test RUN=TestE2E_Multicast # run a specific test +# make test-debug RUN=TestE2E_Multicast # with debug logging +# make test-nobuild # skip docker image build +# make test-keep # keep containers after test +# make test-keep-nobuild # both # ----------------------------------------------------------------------------- -.PHONY: test test: - $(if $(findstring nobuild,$(MAKECMDGOALS)),DZ_E2E_NO_BUILD=1) go test -tags=e2e -timeout=20m $(if $(parallel),-parallel=$(parallel)) $(if $(run),-run=$(run)) $(if $(findstring verbose,$(MAKECMDGOALS)),-v) + go test -tags=e2e -timeout=20m -v -count=1 $(if $(RUN),-run $(RUN)) . -# Dummy target to suppress errors when using 'nobuild' as a flag in `make test nobuild -.PHONY: nobuild -nobuild: - @: +test-debug: + DEBUG=1 go test -tags=e2e -timeout=20m -v -count=1 $(if $(RUN),-run $(RUN)) . -# Dummy target to suppress errors when using 'verbose' as a flag in `make test verbose` -.PHONY: verbose -verbose: - @: +test-nobuild: + DZ_E2E_NO_BUILD=1 go test -tags=e2e -timeout=20m -v -count=1 $(if $(RUN),-run $(RUN)) . -.PHONY: build -build: - go run ./cmd/dzctl/main.go build +test-keep: + TESTCONTAINERS_RYUK_DISABLED=true go test -tags=e2e -timeout=20m -v -count=1 $(if $(RUN),-run $(RUN)) . + +test-keep-nobuild: + TESTCONTAINERS_RYUK_DISABLED=true DZ_E2E_NO_BUILD=1 go test -tags=e2e -timeout=20m -v -count=1 $(if $(RUN),-run $(RUN)) . +test-cleanup: + @echo "Removing containers with label dz.malbeclabs.com..." + @docker rm -f $$(docker ps -aq --filter label=dz.malbeclabs.com) 2>/dev/null || true + @echo "Removing networks with label dz.malbeclabs.com..." + @docker network rm $$(docker network ls -q --filter label=dz.malbeclabs.com) 2>/dev/null || true # ----------------------------------------------------------------------------- # Solana image build and push. diff --git a/e2e/main_test.go b/e2e/main_test.go index accb906926..b32e3b4c75 100644 --- a/e2e/main_test.go +++ b/e2e/main_test.go @@ -76,7 +76,7 @@ func TestMain(m *testing.M) { if vFlag := flag.Lookup("test.v"); vFlag != nil && vFlag.Value.String() == "true" { verbose = true } - if os.Getenv("DZ_E2E_DEBUG") != "" { + if os.Getenv("DEBUG") != "" { debug = true } @@ -534,6 +534,30 @@ func (dn *TestDevnet) ConnectMulticastSubscriberSkipAccessPass(t *testing.T, cli dn.log.Debug("--> Multicast subscriber connected") } +// AddMulticastPublisherGroupSkipAccessPass incrementally adds publish groups to +// an existing multicast service without disconnecting. +func (dn *TestDevnet) AddMulticastPublisherGroupSkipAccessPass(t *testing.T, client *devnet.Client, multicastGroupCodes ...string) { + dn.log.Debug("==> Adding multicast publisher groups incrementally", "clientIP", client.CYOANetworkIP, "groups", multicastGroupCodes) + + groupArgs := strings.Join(multicastGroupCodes, " ") + _, err := client.Exec(t.Context(), []string{"bash", "-c", "doublezero connect multicast --publish " + groupArgs}) + require.NoError(t, err) + + dn.log.Debug("--> Multicast publisher groups added incrementally") +} + +// AddMulticastSubscriberGroupSkipAccessPass incrementally adds subscribe groups to +// an existing multicast service without disconnecting. +func (dn *TestDevnet) AddMulticastSubscriberGroupSkipAccessPass(t *testing.T, client *devnet.Client, multicastGroupCodes ...string) { + dn.log.Debug("==> Adding multicast subscriber groups incrementally", "clientIP", client.CYOANetworkIP, "groups", multicastGroupCodes) + + groupArgs := strings.Join(multicastGroupCodes, " ") + _, err := client.Exec(t.Context(), []string{"bash", "-c", "doublezero connect multicast --subscribe " + groupArgs}) + require.NoError(t, err) + + dn.log.Debug("--> Multicast subscriber groups added incrementally") +} + func (dn *TestDevnet) DisconnectMulticastSubscriber(t *testing.T, client *devnet.Client) { dn.log.Debug("==> Disconnecting multicast subscriber", "clientIP", client.CYOANetworkIP) @@ -765,7 +789,7 @@ func (dn *TestDevnet) BuildAgentConfigData(t *testing.T, deviceCode string, extr // newTestLoggerForTest creates a logger for individual test runs. // Logs are written to t.Log() so they only appear on test failure (unless -v is passed). -// With DZ_E2E_DEBUG=1, shows DEBUG level logs; otherwise shows INFO level. +// With DEBUG=1, shows DEBUG level logs; otherwise shows INFO level. func newTestLoggerForTest(t *testing.T) *slog.Logger { w := &testWriter{t: t} logLevel := slog.LevelInfo diff --git a/e2e/multicast_test.go b/e2e/multicast_test.go index aadb0b8d86..d54d226a2e 100644 --- a/e2e/multicast_test.go +++ b/e2e/multicast_test.go @@ -192,13 +192,23 @@ func TestE2E_Multicast(t *testing.T) { createMulticastGroupForBothClients(t, tdn, publisherClient, subscriberClient, "mg02") if !t.Run("connect", func(t *testing.T) { - // Connect publisher. - tdn.ConnectMulticastPublisherSkipAccessPass(t, publisherClient, "mg01", "mg02") + // Connect publisher to first group only. + tdn.ConnectMulticastPublisherSkipAccessPass(t, publisherClient, "mg01") err = publisherClient.WaitForTunnelUp(t.Context(), 90*time.Second) require.NoError(t, err) - // Connect subscriber. - tdn.ConnectMulticastSubscriberSkipAccessPass(t, subscriberClient, "mg01", "mg02") + // Incrementally add second publish group without disconnecting. + tdn.AddMulticastPublisherGroupSkipAccessPass(t, publisherClient, "mg02") + err = publisherClient.WaitForTunnelUp(t.Context(), 90*time.Second) + require.NoError(t, err) + + // Connect subscriber to first group only. + tdn.ConnectMulticastSubscriberSkipAccessPass(t, subscriberClient, "mg01") + err = subscriberClient.WaitForTunnelUp(t.Context(), 90*time.Second) + require.NoError(t, err) + + // Incrementally add second subscribe group without disconnecting. + tdn.AddMulticastSubscriberGroupSkipAccessPass(t, subscriberClient, "mg02") err = subscriberClient.WaitForTunnelUp(t.Context(), 90*time.Second) require.NoError(t, err) diff --git a/e2e/qa_multicast_test.go b/e2e/qa_multicast_test.go index 1618e813e0..f2f654cb40 100644 --- a/e2e/qa_multicast_test.go +++ b/e2e/qa_multicast_test.go @@ -347,17 +347,17 @@ func TestQA_MulticastPublisherMultipleGroups(t *testing.T) { require.Greater(t, reportB.PacketCount, uint64(0), "subscriberB received no packets from group B") log.Info("Received multicast packets", "subscriber", subscriberB.Host, "group", groupB.Code, "packetCount", reportB.PacketCount) - // --- Phase 2: Dynamic subscription --- - // SubA disconnects and reconnects with both groups A+B — verify identity preserved and receives from both. - log.Debug("Phase 2: dynamic subscription") + // --- Phase 2: Incremental subscription --- + // SubA adds group B without disconnecting — verify tunnel stays up, identity preserved, and receives from both. + log.Debug("Phase 2: incremental subscription (no disconnect)") statusBefore, err := subscriberA.GetUserStatus(ctx) require.NoError(t, err, "failed to get subscriberA status before adding group B") log.Debug("SubscriberA status before", "status", statusBefore) - log.Debug("SubscriberA reconnecting with both groups", "codes", []string{groupA.Code, groupB.Code}) - err = subscriberA.ConnectUserMulticast_Subscriber_Wait(ctx, groupA.Code, groupB.Code) - require.NoError(t, err, "failed to reconnect subscriberA with both groups") + log.Debug("SubscriberA adding group B incrementally (no disconnect)", "code", groupB.Code) + err = subscriberA.ConnectUserMulticast_Subscriber_AddTunnel(ctx, groupB.Code) + require.NoError(t, err, "failed to incrementally add group B to subscriberA") err = subscriberA.WaitForStatusUp(ctx) require.NoError(t, err, "failed to wait for subscriberA status up after adding group B") @@ -389,15 +389,18 @@ func TestQA_MulticastPublisherMultipleGroups(t *testing.T) { require.NotNil(t, reportB, "no report for group B") require.Greater(t, reportB.PacketCount, uint64(0), "no packets from group B") - log.Debug("Phase 2 passed: dynamic subscription verified", + log.Debug("Phase 2 passed: incremental subscription verified", "groupA_packets", reportA.PacketCount, "groupB_packets", reportB.PacketCount) - // --- Phase 3: Simultaneous pub+sub --- - // SubA reconnects as both publisher and subscriber on group A, sends to itself. - log.Debug("Phase 3: simultaneous pub+sub") + // --- Phase 3: Incremental publish after subscribe (cross-role) --- + // SubA adds a publisher role on group A without disconnecting. This triggers a + // full reprovision in the daemon (publisher role transition) but should still + // converge to a working pub+sub state. + log.Debug("Phase 3: incremental publish after subscribe (cross-role)") - err = subscriberA.ConnectUserMulticast_PubAndSub_Wait(ctx, []string{groupA.Code}, []string{groupA.Code}) - require.NoError(t, err, "failed to connect subscriberA as pub+sub") + log.Debug("SubscriberA adding publisher role on group A incrementally", "code", groupA.Code) + err = subscriberA.ConnectUserMulticast_Publisher_AddTunnel(ctx, groupA.Code) + require.NoError(t, err, "failed to incrementally add publisher role to subscriberA") err = subscriberA.WaitForStatusUp(ctx) require.NoError(t, err, "failed to wait for subscriberA status up as pub+sub") @@ -419,5 +422,5 @@ func TestQA_MulticastPublisherMultipleGroups(t *testing.T) { reportPubSub, err := subscriberA.WaitForMulticastReport(ctx, groupA) require.NoError(t, err, "failed to get report for group A as pub+sub") require.Greater(t, reportPubSub.PacketCount, uint64(0), "pub+sub client received no packets") - log.Debug("Phase 3 passed: pub+sub verified", "group", groupA.Code, "packetCount", reportPubSub.PacketCount) + log.Debug("Phase 3 passed: incremental publish after subscribe verified", "group", groupA.Code, "packetCount", reportPubSub.PacketCount) }