Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions bin/rune
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,11 @@ if [ -z "$RUNE_VERSION" ]; then
token="${GITHUB_TOKEN:-${GH_TOKEN:-}}"
if [ -n "$token" ]; then
body="$(curl --fail --silent --show-error --location --connect-timeout 10 --max-time 20 \
--retry 3 --retry-delay 2 \
--header "Authorization: Bearer $token" "$api" || true)"
else
body="$(curl --fail --silent --show-error --location --connect-timeout 10 --max-time 20 "$api" || true)"
body="$(curl --fail --silent --show-error --location --connect-timeout 10 --max-time 20 \
--retry 3 --retry-delay 2 "$api" || true)"
fi

RUNE_VERSION="$(printf '%s' "$body" \
Expand Down Expand Up @@ -126,8 +128,10 @@ mkdir -p "$(dirname "$TARGET")"
TMP="$(mktemp "$(dirname "$TARGET")/.rune-bootstrap-XXXXXX")"
SUMS="$(mktemp -t rune-bootstrap-sums-XXXXXX)"

curl --fail --silent --show-error --location --connect-timeout 10 --max-time 120 "$RELEASE_BASE/$ASSET" -o "$TMP"
curl --fail --silent --show-error --location --connect-timeout 10 --max-time 30 "$RELEASE_BASE/checksums.txt" -o "$SUMS"
# --retry rides out transient GitHub CDN failures (504, timeouts) instead
# of aborting the whole bootstrap on the first blip.
curl --fail --silent --show-error --location --connect-timeout 10 --max-time 120 --retry 3 --retry-delay 2 "$RELEASE_BASE/$ASSET" -o "$TMP"
curl --fail --silent --show-error --location --connect-timeout 10 --max-time 30 --retry 3 --retry-delay 2 "$RELEASE_BASE/checksums.txt" -o "$SUMS"

EXPECTED="$(grep " $ASSET\$" "$SUMS" | cut -d' ' -f1)"
if [ -z "$EXPECTED" ]; then
Expand Down
47 changes: 47 additions & 0 deletions internal/bootstrap/download.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,53 @@ func newDownloadTransport() *http.Transport {

type ProgressFunc func(downloaded, total int64)

// maxDownloadAttempts caps retries for a single network fetch (manifest
// or artifact). A transient blip — most notably a GitHub CDN 504 during
// install — recovers within a couple of attempts; beyond that the
// failure is most likely persistent (wrong SHA, missing asset) where
// further retries only add latency.
const maxDownloadAttempts = 3

// downloadRetryBackoff is the initial wait between attempts; it is
// multiplied by retryBackoffMultiplier each retry so a transient blip
// recovers quickly while a server-side cold-start has time to warm up.
// var so tests can compress the real waits.
var downloadRetryBackoff = 2 * time.Second

const retryBackoffMultiplier = 3

// withRetry runs fn up to maxDownloadAttempts times with bounded
// exponential backoff so a transient network failure (e.g. a GitHub CDN
// 504) surfaces as a retry rather than a hard install failure. ctx
// cancellation aborts immediately without burning the remaining
// attempts. logf may be nil.
func withRetry(ctx context.Context, logf func(string, ...any), label string, fn func() error) error {
var lastErr error
backoff := downloadRetryBackoff
for attempt := 1; attempt <= maxDownloadAttempts; attempt++ {
if attempt > 1 {
if logf != nil {
logf("retrying %s (attempt %d/%d) after %s: %v", label, attempt, maxDownloadAttempts, backoff, lastErr)
}
select {
case <-time.After(backoff):
case <-ctx.Done():
return fmt.Errorf("%s: retry aborted: %w", label, ctx.Err())
}
backoff *= retryBackoffMultiplier
}
if err := fn(); err != nil {
if ctx.Err() != nil {
return err
}
lastErr = err
continue
}
return nil
}
return fmt.Errorf("%s: failed after %d attempts: %w", label, maxDownloadAttempts, lastErr)
}

func DownloadAndVerify(ctx context.Context, spec ArtifactSpec, destPath string, progress ProgressFunc) error {
if spec.URL == "" || spec.SHA256 == "" {
return errors.New("download: spec missing url or sha256")
Expand Down
14 changes: 9 additions & 5 deletions internal/bootstrap/install.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ func Install(ctx context.Context, opts InstallOptions) (*Result, error) {

// Fetch manifest
logf("[1/%d] manifest: fetching from %s", total, opts.ManifestURL)
manifest, err := FetchManifest(ctx, opts.ManifestURL)
manifest, err := FetchManifest(ctx, opts.ManifestURL, logf)
if err != nil {
r.Failed[StepManifest] = err.Error()
r.Status = "partial"
Expand Down Expand Up @@ -123,7 +123,7 @@ func Install(ctx context.Context, opts InstallOptions) (*Result, error) {
}

logf("[%d/%d] %s (%d bytes): downloading...", stepNum, total, in.step, in.spec.Size)
if err := installArtifact(ctx, paths, in.spec, in.dest, opts.Progress); err != nil {
if err := installArtifact(ctx, paths, in.spec, in.dest, opts.Progress, logf); err != nil {
r.Failed[in.step] = err.Error()
r.Status = "partial"
return r, err
Expand Down Expand Up @@ -198,10 +198,12 @@ func onlySkipped(r *Result) bool {
// spec.Extract == "" : raw binaries are downloaded directly to dest
// spec.Extract == "tar.gz": archive is extracted into dest
// Others treat as error
func installArtifact(ctx context.Context, paths *Paths, spec ArtifactSpec, dest string, progress ProgressFunc) error {
func installArtifact(ctx context.Context, paths *Paths, spec ArtifactSpec, dest string, progress ProgressFunc, logf func(string, ...any)) error {
switch spec.Extract {
case "": // raw binaries
if err := DownloadAndVerify(ctx, spec, dest, progress); err != nil {
if err := withRetry(ctx, logf, "download "+filepath.Base(dest), func() error {
return DownloadAndVerify(ctx, spec, dest, progress)
}); err != nil {
return err
}
if err := os.Chmod(dest, binaryMode); err != nil {
Expand All @@ -212,7 +214,9 @@ func installArtifact(ctx context.Context, paths *Paths, spec ArtifactSpec, dest
case "tar.gz": // Tarball archive
// Download archive under paths.Cache first
tarPath := filepath.Join(paths.Cache, filepath.Base(dest)+".tar.gz")
if err := DownloadAndVerify(ctx, spec, tarPath, progress); err != nil {
if err := withRetry(ctx, logf, "download "+filepath.Base(dest), func() error {
return DownloadAndVerify(ctx, spec, tarPath, progress)
}); err != nil {
return err
}
defer os.Remove(tarPath)
Expand Down
48 changes: 33 additions & 15 deletions internal/bootstrap/manifest.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,45 @@ var (
ErrNoArtifactForPlatform = errors.New("manifest: no artifacts for this platform")
)

func FetchManifest(ctx context.Context, manifestURL string) (*Manifest, error) {
func FetchManifest(ctx context.Context, manifestURL string, logf func(string, ...any)) (*Manifest, error) {
if v := os.Getenv(envManifest); v != "" {
manifestURL = v
}
if manifestURL == "" {
return nil, errors.New("manifest: no URL provided (default missing; set RUNE_MANIFEST?)")
}

// Only the network fetch is retried; a transient GitHub CDN 504 should
// not abort install, but a parse/version error below is deterministic
// and retrying it would just waste the backoff budget.
var body []byte
if err := withRetry(ctx, logf, "fetch manifest", func() error {
var ferr error
body, ferr = fetchManifestBody(ctx, manifestURL)
return ferr
}); err != nil {
return nil, err
}

var m Manifest
dec := json.NewDecoder(bytes.NewReader(body))
dec.DisallowUnknownFields()
if err := dec.Decode(&m); err != nil {
return nil, fmt.Errorf("manifest: parse JSON: %w", err)
}
if m.Version != ManifestVersion {
return nil, fmt.Errorf("%w: got %d, want %d", ErrUnsupportedManifestVersion, m.Version, ManifestVersion)
}
if len(m.Platforms) == 0 {
return nil, errors.New("manifest: platforms is empty")
}

return &m, nil
}

// fetchManifestBody performs a single manifest GET and returns the raw
// body. It is the retryable unit wrapped by FetchManifest's withRetry.
func fetchManifestBody(ctx context.Context, manifestURL string) ([]byte, error) {
ctx, cancel := context.WithTimeout(ctx, defaultManifestFetchTimeout)
defer cancel()

Expand All @@ -88,20 +119,7 @@ func FetchManifest(ctx context.Context, manifestURL string) (*Manifest, error) {
return nil, fmt.Errorf("manifest: body exceeds %d bytes", maxBody)
}

var m Manifest
dec := json.NewDecoder(bytes.NewReader(body))
dec.DisallowUnknownFields()
if err := dec.Decode(&m); err != nil {
return nil, fmt.Errorf("manifest: parse JSON: %w", err)
}
if m.Version != ManifestVersion {
return nil, fmt.Errorf("%w: got %d, want %d", ErrUnsupportedManifestVersion, m.Version, ManifestVersion)
}
if len(m.Platforms) == 0 {
return nil, errors.New("manifest: platforms is empty")
}

return &m, nil
return body, nil
}

func (m *Manifest) ArtifactsForCurrentPlatform() (PlatformArtifacts, error) {
Expand Down
51 changes: 45 additions & 6 deletions internal/bootstrap/manifest_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,18 @@ import (
"net/http/httptest"
"strings"
"testing"
"time"
)

// fastRetry compresses the retry backoff so tests that exercise the retry
// path don't sleep for real seconds. It restores the original on cleanup.
func fastRetry(t *testing.T) {
t.Helper()
orig := downloadRetryBackoff
downloadRetryBackoff = time.Millisecond
t.Cleanup(func() { downloadRetryBackoff = orig })
}

func fullManifestJSON(extraFields string) string {
return fmt.Sprintf(`{
"version": 1,
Expand All @@ -31,7 +41,7 @@ func TestFetchManifest_HappyPath(t *testing.T) {
}))
defer srv.Close()

m, err := FetchManifest(context.Background(), srv.URL)
m, err := FetchManifest(context.Background(), srv.URL, nil)
if err != nil {
t.Fatalf("FetchManifest: %v", err)
}
Expand Down Expand Up @@ -66,7 +76,7 @@ func TestFetchManifest_EnvOverride(t *testing.T) {
defer srv.Close()

t.Setenv(envManifest, srv.URL)
if _, err := FetchManifest(context.Background(), "https://default/manifest.json"); err != nil {
if _, err := FetchManifest(context.Background(), "https://default/manifest.json", nil); err != nil {
t.Fatalf("FetchManifest: %v", err)
}
if !served {
Expand All @@ -81,7 +91,7 @@ func TestFetchManifest_RejectsUnsupportedVersion(t *testing.T) {
}))
defer srv.Close()

_, err := FetchManifest(context.Background(), srv.URL)
_, err := FetchManifest(context.Background(), srv.URL, nil)
if !errors.Is(err, ErrUnsupportedManifestVersion) {
t.Fatalf("want ErrUnsupportedManifestVersion, got %v", err)
}
Expand All @@ -94,7 +104,7 @@ func TestFetchManifest_RejectsEmptyPlatforms(t *testing.T) {
}))
defer srv.Close()

_, err := FetchManifest(context.Background(), srv.URL)
_, err := FetchManifest(context.Background(), srv.URL, nil)
if err == nil || !strings.Contains(err.Error(), "platforms is empty") {
t.Errorf("want empty-platforms error, got %v", err)
}
Expand All @@ -106,24 +116,53 @@ func TestFetchManifest_RejectsUnknownFields(t *testing.T) {
}))
defer srv.Close()

_, err := FetchManifest(context.Background(), srv.URL)
_, err := FetchManifest(context.Background(), srv.URL, nil)
if err == nil || !strings.Contains(err.Error(), "unknown field") {
t.Errorf("want unknown-field rejection, got %v", err)
}
}

func TestFetchManifest_HTTPError(t *testing.T) {
fastRetry(t)
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
http.Error(w, "not found", http.StatusNotFound)
}))
defer srv.Close()

_, err := FetchManifest(context.Background(), srv.URL)
_, err := FetchManifest(context.Background(), srv.URL, nil)
if err == nil || !strings.Contains(err.Error(), "404") {
t.Errorf("want HTTP 404, got %v", err)
}
}

// TestFetchManifest_RetriesTransient verifies a transient 5xx (e.g. a
// GitHub CDN 504) is retried and the fetch ultimately succeeds.
func TestFetchManifest_RetriesTransient(t *testing.T) {
fastRetry(t)
var attempts int
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
attempts++
if attempts < 3 {
http.Error(w, "gateway timeout", http.StatusGatewayTimeout)
return
}
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(fullManifestJSON("")))
}))
defer srv.Close()

m, err := FetchManifest(context.Background(), srv.URL, nil)
if err != nil {
t.Fatalf("FetchManifest after transient 504s: %v", err)
}
if m.Version != 1 {
t.Errorf("Version = %d, want 1", m.Version)
}
if attempts != 3 {
t.Errorf("attempts = %d, want 3 (two 504s then success)", attempts)
}
}

func TestArtifactsForCurrentPlatform_NotFound(t *testing.T) {
m := &Manifest{
Version: 1,
Expand Down