Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 38 additions & 3 deletions internal/bootstrap/manifest.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"errors"
"fmt"
"io"
"log/slog"
"net/http"
"os"
"time"
Expand Down Expand Up @@ -106,10 +107,44 @@ func FetchManifest(ctx context.Context) (*Manifest, error) {
if url == "" {
return nil, ErrManifestURLMissing
}
return fetchManifestFrom(ctx, url)

// Only the network fetch is retried (reusing the download retry budget):
// a transient GitHub CDN 504 at boot must not hard-fail the daemon, but a
// deterministic parse/version error is not worth re-fetching for. ctx
// cancellation aborts immediately without burning the remaining attempts.
var (
body []byte
lastErr error
)
backoff := downloadRetryBackoff
for attempt := 1; attempt <= maxDownloadAttempts; attempt++ {
if attempt > 1 {
slog.Warn("retrying manifest fetch",
"attempt", attempt,
"max", maxDownloadAttempts,
"after", lastErr,
"backoff", backoff.String())
select {
case <-time.After(backoff):
case <-ctx.Done():
return nil, fmt.Errorf("manifest fetch retry aborted: %w", ctx.Err())
}
backoff *= retryBackoffMultiplier
}
body, lastErr = fetchManifestBody(ctx, url)
if lastErr == nil {
return parseManifest(body)
}
if ctx.Err() != nil {
return nil, lastErr
}
}
return nil, fmt.Errorf("manifest fetch failed after %d attempts: %w", maxDownloadAttempts, lastErr)
}

func fetchManifestFrom(ctx context.Context, url string) (*Manifest, error) {
// fetchManifestBody performs a single manifest GET and returns the raw
// body. It is the retryable unit wrapped by FetchManifest.
func fetchManifestBody(ctx context.Context, url string) ([]byte, error) {
fctx, cancel := context.WithTimeout(ctx, manifestFetchTimeout)
defer cancel()
req, err := http.NewRequestWithContext(fctx, http.MethodGet, url, nil)
Expand All @@ -131,7 +166,7 @@ func fetchManifestFrom(ctx context.Context, url string) (*Manifest, error) {
if int64(len(body)) > manifestMaxBytes {
return nil, fmt.Errorf("manifest: body exceeds %d bytes", manifestMaxBytes)
}
return parseManifest(body)
return body, nil
}

func parseManifest(body []byte) (*Manifest, error) {
Expand Down
40 changes: 40 additions & 0 deletions internal/bootstrap/manifest_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,18 @@ import (
"net/http/httptest"
"strings"
"testing"
"time"
)

// fastRetry compresses the retry backoff so tests that exercise the retry
// path don't sleep for real seconds. It restores the original on cleanup.
func fastRetry(t *testing.T) {
t.Helper()
orig := downloadRetryBackoff
downloadRetryBackoff = time.Millisecond
t.Cleanup(func() { downloadRetryBackoff = orig })
}

const sampleManifest = `{
"version": 1,
"runed_version": "v9.9.9",
Expand Down Expand Up @@ -145,6 +155,7 @@ func TestFetchManifest_HTTP(t *testing.T) {
}

func TestFetchManifest_HTTPError(t *testing.T) {
fastRetry(t)
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
http.Error(w, "boom", http.StatusInternalServerError)
}))
Expand All @@ -156,3 +167,32 @@ func TestFetchManifest_HTTPError(t *testing.T) {
t.Errorf("expected HTTP 500 error, got %v", err)
}
}

// TestFetchManifest_RetriesTransient verifies a transient 5xx (e.g. a
// GitHub CDN 504) is retried and the fetch ultimately succeeds.
func TestFetchManifest_RetriesTransient(t *testing.T) {
fastRetry(t)
var attempts int
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
attempts++
if attempts < 3 {
http.Error(w, "gateway timeout", http.StatusGatewayTimeout)
return
}
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(sampleManifest))
}))
defer srv.Close()
t.Setenv(EnvManifest, srv.URL)

m, err := FetchManifest(t.Context())
if err != nil {
t.Fatalf("FetchManifest after transient 504s: %v", err)
}
if m.Version != 1 {
t.Errorf("Version = %d, want 1", m.Version)
}
if attempts != 3 {
t.Errorf("attempts = %d, want 3 (two 504s then success)", attempts)
}
}
Loading