From 9139f84ea1a03ecdac7d1155ff70ea0c54842c47 Mon Sep 17 00:00:00 2001 From: ehsan shariati Date: Mon, 1 Jun 2026 23:48:31 -0400 Subject: [PATCH 1/4] feat(cluster): Phase 1 write-federation - trust multiple cluster writers Edge (ipfs-cluster-container-init.d.sh): parse the new `ipfs-cluster-trustedpeers` array from pools.fx.land/pools/{name}, fall back to the single `ipfs-cluster-peerid` (backward-compatible), set consensus.crdt.trusted_peers to the full set, and keep the bootstrap/tunnel/DNS pointed at the PRIMARY (first) peer so single-peer multiaddrs stay valid. Deploys via OTA (watchtower + fula.sh) - no updater script. Server op: update-scripts/phase-1-master-trust.sh appends a new writer's peer id to CLUSTER_CRDT_TRUSTEDPEERS in the master systemd unit (Environment= + ExecStart -e); additive, idempotent, backs up + restarts + verifies, halts without NEW_WRITER_PEERID. Tests: tests/test-cluster-federation-parse.sh (jq parse + primary/split, 7/7) and tests/test-phase-1-master-trust.sh (append to both lines, idempotency, halts, 6/6). Part of #72. Co-Authored-By: Claude Opus 4.8 --- .../ipfs-cluster-container-init.d.sh | 25 +++- tests/test-cluster-federation-parse.sh | 44 +++++++ tests/test-phase-1-master-trust.sh | 44 +++++++ update-scripts/phase-1-master-trust.sh | 118 ++++++++++++++++++ 4 files changed, 225 insertions(+), 6 deletions(-) create mode 100755 tests/test-cluster-federation-parse.sh create mode 100755 tests/test-phase-1-master-trust.sh create mode 100755 update-scripts/phase-1-master-trust.sh diff --git a/docker/fxsupport/linux/ipfs-cluster/ipfs-cluster-container-init.d.sh b/docker/fxsupport/linux/ipfs-cluster/ipfs-cluster-container-init.d.sh index f635f735..e0b42d3f 100644 --- a/docker/fxsupport/linux/ipfs-cluster/ipfs-cluster-container-init.d.sh +++ b/docker/fxsupport/linux/ipfs-cluster/ipfs-cluster-container-init.d.sh @@ -70,11 +70,19 @@ get_poolcreator_peerid() { while [ $attempt -le $max_attempts ]; do response=$(curl -s --connect-timeout 10 --max-time 15 "${endpoint}" 2>/dev/null) cluster_peer_id=$(echo "$response" | jq -r '."ipfs-cluster-peerid" // empty' 2>/dev/null) + # Federation: prefer the trusted-peer ARRAY if the pool API provides it; fall back to + # the single legacy field. Result is a comma-separated list for CLUSTER_CRDT_TRUSTEDPEERS. + cluster_trusted_csv=$(echo "$response" | jq -r '(."ipfs-cluster-trustedpeers" // []) | map(select(. != null and . != "")) | join(",")' 2>/dev/null) kubo_peer_id=$(echo "$response" | jq -r '."kubo-peerid" // empty' 2>/dev/null) - if [ -n "$cluster_peer_id" ] && [ "$cluster_peer_id" != "null" ]; then - log "Fetched master cluster peer ID: $cluster_peer_id (attempt $attempt)" - export CLUSTER_CRDT_TRUSTEDPEERS="$cluster_peer_id" + resolved_trusted="$cluster_trusted_csv" + if [ -z "$resolved_trusted" ] || [ "$resolved_trusted" = "null" ]; then + resolved_trusted="$cluster_peer_id" + fi + + if [ -n "$resolved_trusted" ] && [ "$resolved_trusted" != "null" ]; then + log "Fetched trusted cluster peers: $resolved_trusted (attempt $attempt)" + export CLUSTER_CRDT_TRUSTEDPEERS="$resolved_trusted" if [ -n "$kubo_peer_id" ] && [ "$kubo_peer_id" != "null" ]; then MASTER_KUBO_PEERID="$kubo_peer_id" log "Fetched master kubo peer ID: $kubo_peer_id" @@ -182,6 +190,11 @@ append_or_replace "/.env.cluster" "CLUSTER_PEERNAME" "${CLUSTER_PEERNAME}" get_poolcreator_peerid append_or_replace "/.env.cluster" "CLUSTER_CRDT_TRUSTEDPEERS" "${CLUSTER_CRDT_TRUSTEDPEERS}" + # Federation: CLUSTER_CRDT_TRUSTEDPEERS may now be a comma-separated set of writers. + # The PRIMARY (first) peer is the bootstrap/tunnel target (the master); single-peer + # multiaddr construction below must use it, never the whole comma-separated list. + PRIMARY_TRUSTED_PEER=$(printf '%s' "${CLUSTER_CRDT_TRUSTEDPEERS}" | cut -d',' -f1) + # Add master's kubo peer to follower's kubo Peering for faster content discovery if [ -n "${MASTER_KUBO_PEERID}" ] && [ "${MASTER_KUBO_PEERID}" != "${ipfs_peer_id}" ]; then # Add direct kubo addresses from API (non-localhost, non-relay) @@ -306,8 +319,8 @@ append_or_replace "/.env.cluster" "CLUSTER_PEERNAME" "${CLUSTER_PEERNAME}" } } | if $trust_peer != "" then - .cluster.peer_addresses = ["/ip4/127.0.0.1/tcp/19096/p2p/" + $trust_peer] - | .consensus.crdt.trusted_peers = [$trust_peer] + .cluster.peer_addresses = ["/ip4/127.0.0.1/tcp/19096/p2p/" + ($trust_peer | split(",")[0])] + | .consensus.crdt.trusted_peers = ($trust_peer | split(",")) else . end ' "${IPFS_CLUSTER_PATH}/service.json" > "$service_temp" \ && [ -s "$service_temp" ] \ @@ -338,7 +351,7 @@ append_or_replace "/.env.cluster" "CLUSTER_PEERNAME" "${CLUSTER_PEERNAME}" append_or_replace "/.env.cluster" "CLUSTER_FOLLOWERMODE" "${CLUSTER_FOLLOWERMODE}" # Construct the DNS fallback address (always reliable) - constructed_addr="/dns4/${poolName}.pools.functionyard.fula.network/tcp/9096/p2p/${CLUSTER_CRDT_TRUSTEDPEERS}" + constructed_addr="/dns4/${poolName}.pools.functionyard.fula.network/tcp/9096/p2p/${PRIMARY_TRUSTED_PEER}" # Populate peerstore and select bootstrap address if [ -n "${CLUSTER_BOOTSTRAP_ADDRS}" ]; then diff --git a/tests/test-cluster-federation-parse.sh b/tests/test-cluster-federation-parse.sh new file mode 100755 index 00000000..509c4892 --- /dev/null +++ b/tests/test-cluster-federation-parse.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Test the Phase 1 federation parse logic used by ipfs-cluster-container-init.d.sh: +# - prefer the ipfs-cluster-trustedpeers ARRAY (join as CSV), filtering empty/null +# - fall back to the single ipfs-cluster-peerid when the array is absent +# - PRIMARY = first element of the CSV (bootstrap/tunnel target) +# - jq split(",") rebuilds the array and split(",")[0] = primary (mirrors the +# service.json jq: trusted_peers = ($trust_peer|split(",")), peer_addresses uses [0]) +set -euo pipefail +command -v jq >/dev/null 2>&1 || { echo "SKIP: jq not installed (runs on edge/CI where jq is present)"; exit 0; } + +A="12D3KooWMASTERaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +B="12D3KooWWRITERbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" +ARR_JQ='(."ipfs-cluster-trustedpeers" // []) | map(select(. != null and . != "")) | join(",")' +fail=0 +pass() { echo "ok - $1"; } +bad() { echo "FAIL - $1: expected [$3] got [$2]"; fail=1; } +eq() { [ "$2" = "$3" ] && pass "$1" || bad "$1" "$2" "$3"; } + +# array present -> CSV +csv="$(echo "{\"ipfs-cluster-peerid\":\"$A\",\"ipfs-cluster-trustedpeers\":[\"$A\",\"$B\"]}" | jq -r "$ARR_JQ")" +eq "array -> csv" "$csv" "$A,$B" + +# array absent -> fall back to single peerid +resp2="{\"ipfs-cluster-peerid\":\"$A\"}" +csv2="$(echo "$resp2" | jq -r "$ARR_JQ")" +resolved="$csv2" +if [ -z "$resolved" ] || [ "$resolved" = "null" ]; then resolved="$(echo "$resp2" | jq -r '."ipfs-cluster-peerid" // empty')"; fi +eq "fallback to single" "$resolved" "$A" + +# array filters empty/null entries +csv3="$(echo "{\"ipfs-cluster-trustedpeers\":[\"$A\",\"\",null,\"$B\"]}" | jq -r "$ARR_JQ")" +eq "filter empty/null" "$csv3" "$A,$B" + +# PRIMARY = first of CSV +eq "primary = first" "$(printf '%s' "$A,$B" | cut -d',' -f1)" "$A" + +# service.json jq behaviour: split rebuilds array, [0] is primary +eq "split -> array" "$(printf '%s' "$A,$B" | jq -Rc 'split(",")')" "[\"$A\",\"$B\"]" +eq "split[0] = primary" "$(printf '%s' "$A,$B" | jq -rR 'split(",")[0]')" "$A" + +# single value stays a 1-element array (backward-compat) +eq "single -> 1-elem array" "$(printf '%s' "$A" | jq -Rc 'split(",")')" "[\"$A\"]" + +[ "$fail" = "0" ] && { echo "ALL PASS"; exit 0; } || { echo "FAILURES"; exit 1; } diff --git a/tests/test-phase-1-master-trust.sh b/tests/test-phase-1-master-trust.sh new file mode 100755 index 00000000..5f09feb7 --- /dev/null +++ b/tests/test-phase-1-master-trust.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Test update-scripts/phase-1-master-trust.sh against a fixture systemd unit +# (no systemctl/docker — uses NO_RESTART=1). Verifies additive append to BOTH the +# Environment= line and the ExecStart -e flag, idempotency, and halt-without-input. +set -euo pipefail + +HERE="$(cd "$(dirname "$0")" && pwd)" +SCRIPT="$HERE/../update-scripts/phase-1-master-trust.sh" +[ -f "$SCRIPT" ] || { echo "FAIL: script not found at $SCRIPT"; exit 1; } + +TMP="$(mktemp -d)" +trap 'rm -rf "$TMP"' EXIT +UNIT="$TMP/ipfscluster.service" +MASTER="12D3KooWS79EhkPU7ESUwgG4vyHHzW9FDNZLoWVth9b5N5NSrvaj" +NEW="12D3KooWNEWwriter00000000000000000000000000000000000001" + +cat > "$UNIT" </dev/null +grep -q "Environment=\"CLUSTER_CRDT_TRUSTEDPEERS=$MASTER,$NEW\"" "$UNIT" && pass "Environment= line appended" || bad "Environment= line appended" +grep -q -- "-e CLUSTER_CRDT_TRUSTEDPEERS=$MASTER,$NEW " "$UNIT" && pass "ExecStart -e appended" || bad "ExecStart -e appended" +ls "$UNIT".bak.* >/dev/null 2>&1 && pass "backup created" || bad "backup created" + +# 2) idempotent: second run is a no-op, value still exactly MASTER,NEW (2 occurrences) +NEW_WRITER_PEERID="$NEW" NO_RESTART=1 UNIT_PATH="$UNIT" bash "$SCRIPT" >/dev/null +occ="$(grep -c "CLUSTER_CRDT_TRUSTEDPEERS=$MASTER,$NEW" "$UNIT" || true)" +[ "$occ" = "2" ] && pass "idempotent (no double append)" || bad "idempotent (got $occ occurrences, want 2)" + +# 3) halts when NEW_WRITER_PEERID is missing +if NO_RESTART=1 UNIT_PATH="$UNIT" bash "$SCRIPT" >/dev/null 2>&1; then bad "halts without NEW_WRITER_PEERID"; else pass "halts without NEW_WRITER_PEERID"; fi + +# 4) rejects a non-peer-id value +if NEW_WRITER_PEERID="not-a-peer" NO_RESTART=1 UNIT_PATH="$UNIT" bash "$SCRIPT" >/dev/null 2>&1; then bad "rejects bad peer id"; else pass "rejects bad peer id"; fi + +[ "$fail" = "0" ] && { echo "ALL PASS"; exit 0; } || { echo "FAILURES"; exit 1; } diff --git a/update-scripts/phase-1-master-trust.sh b/update-scripts/phase-1-master-trust.sh new file mode 100755 index 00000000..a96d479f --- /dev/null +++ b/update-scripts/phase-1-master-trust.sh @@ -0,0 +1,118 @@ +#!/usr/bin/env bash +# +# Phase 1 (cluster write-federation) — trust a 2nd cluster WRITER on the master. +# +# Appends NEW_WRITER_PEERID to CLUSTER_CRDT_TRUSTEDPEERS in the master's systemd unit +# (both the Environment= line AND the ExecStart `-e` flag), backs up the unit, reloads +# systemd, restarts the cluster, and verifies it came back up. +# +# SAFE: additive only — it just appends a trusted peer id. The cluster datastore +# (/uniondrive/ipfs-cluster/pebble) and identity are never touched, so the existing +# pinset is preserved. A timestamped backup is written and the rollback command printed. +# This is a SERVER-side script (the master is systemd-managed, NOT part of the OTA fleet). +# +# Usage (on the MASTER, as root): +# NEW_WRITER_PEERID=12D3KooW... ./phase-1-master-trust.sh # apply +# NEW_WRITER_PEERID=12D3KooW... DRY_RUN=1 ./phase-1-master-trust.sh # show plan only +# +# Env overrides: +# UNIT_PATH (default /etc/systemd/system/ipfscluster.service) +# SERVICE_NAME (default ipfscluster) +# DRY_RUN=1 print the planned change; modify nothing +# NO_RESTART=1 edit + backup only; skip daemon-reload/restart/verify (used by tests) +# +set -euo pipefail + +UNIT_PATH="${UNIT_PATH:-/etc/systemd/system/ipfscluster.service}" +SERVICE_NAME="${SERVICE_NAME:-ipfscluster}" +DRY_RUN="${DRY_RUN:-0}" +NO_RESTART="${NO_RESTART:-0}" +VAR="CLUSTER_CRDT_TRUSTEDPEERS" + +die() { echo "ERROR: $*" >&2; exit 1; } +info() { echo "[phase-1-master-trust] $*"; } + +# --- preconditions (halt rather than guess) --- +[ -n "${NEW_WRITER_PEERID:-}" ] || die "NEW_WRITER_PEERID is required (the new writer's CLUSTER peer id, e.g. 12D3KooW...). Refusing to guess." +case "$NEW_WRITER_PEERID" in + 12D3KooW*|Qm*) : ;; + *) die "NEW_WRITER_PEERID='$NEW_WRITER_PEERID' does not look like a libp2p peer id (expected 12D3KooW... or Qm...)." ;; +esac +[ -f "$UNIT_PATH" ] || die "Unit file not found: $UNIT_PATH (set UNIT_PATH=... if it lives elsewhere)." +if [ "$NO_RESTART" != "1" ] && [ "$DRY_RUN" != "1" ]; then + [ "$(id -u)" = "0" ] || die "Must run as root to edit $UNIT_PATH and restart the service." +fi + +# --- read current trusted-peers value --- +CURRENT="$(grep -oE "${VAR}=[^\" ]+" "$UNIT_PATH" | head -1 | cut -d= -f2- || true)" +[ -n "$CURRENT" ] || die "Could not find ${VAR}= in $UNIT_PATH." +info "Current ${VAR} = ${CURRENT}" + +# --- idempotency: already trusted? --- +case ",${CURRENT}," in + *",${NEW_WRITER_PEERID},"*) + info "Already trusted: ${NEW_WRITER_PEERID} is present — no change needed." + exit 0 + ;; +esac + +NEWVAL="${CURRENT},${NEW_WRITER_PEERID}" +info "New ${VAR} = ${NEWVAL}" + +if [ "$DRY_RUN" = "1" ]; then + info "DRY_RUN=1 — would replace '${VAR}=${CURRENT}' with '${VAR}=${NEWVAL}' (Environment= line AND ExecStart -e). No changes made." + exit 0 +fi + +# --- backup --- +BACKUP="${UNIT_PATH}.bak.$(date +%s)" +cp -a "$UNIT_PATH" "$BACKUP" +info "Backed up unit -> $BACKUP" + +# --- edit (replaces EVERY occurrence: Environment= and ExecStart -e share the same token) --- +# CURRENT/NEWVAL are peer-id comma lists (base58 alnum + comma): safe as sed text. +sed -i "s|${VAR}=${CURRENT}|${VAR}=${NEWVAL}|g" "$UNIT_PATH" + +# --- verify the critical ExecStart -e was updated (that's what reaches the container) --- +if ! grep -q -- "-e ${VAR}=${NEWVAL}" "$UNIT_PATH"; then + cp -a "$BACKUP" "$UNIT_PATH" + die "ExecStart '-e ${VAR}' was not updated as expected; restored from backup ($BACKUP)." +fi +info "Updated occurrences: $(grep -c "${VAR}=${NEWVAL}" "$UNIT_PATH" || true) (expect 2: Environment= + ExecStart -e)." + +if [ "$NO_RESTART" = "1" ]; then + info "NO_RESTART=1 — unit edited + backed up; skipping daemon-reload/restart/verify." + exit 0 +fi + +# --- apply --- +info "Reloading systemd + restarting ${SERVICE_NAME} (brief cluster-API blip; datastore/pinset untouched)..." +systemctl daemon-reload +systemctl restart "${SERVICE_NAME}" +sleep 5 + +# --- verify service health --- +if systemctl is-active --quiet "${SERVICE_NAME}"; then + info "OK: ${SERVICE_NAME} is active." +else + echo "ERROR: ${SERVICE_NAME} is NOT active after restart. Roll back with:" >&2 + echo " cp -a '$BACKUP' '$UNIT_PATH' && systemctl daemon-reload && systemctl restart ${SERVICE_NAME}" >&2 + exit 1 +fi +if command -v docker >/dev/null 2>&1; then + sleep 3 + if docker exec ipfs_cluster ipfs-cluster-ctl id >/dev/null 2>&1; then + info "OK: cluster API responds." + else + info "NOTE: cluster API not responding yet (may still be starting). Re-check: docker exec ipfs_cluster ipfs-cluster-ctl id" + fi +fi + +cat < Date: Tue, 2 Jun 2026 01:07:57 -0400 Subject: [PATCH 2/4] feat(cluster): Phase 1 - add new-writer provisioning (phase-1-setup-writer.sh) Provisions a 2nd ipfs-cluster WRITER on a plain Ubuntu/Debian cloud box (no Fula /uniondrive layout): installs Docker/curl/jq if missing, paths under /opt/fula-writer, default kubo datastore (writer stores ~nothing via the tag:group allocator), mirrors the master cluster env (secret=sha256(clustername), allocator, repl, FOLLOWERMODE=false), auto-reads the master cluster/kubo identity + bootstrap addr from the pool endpoint, joins via direct public bootstrap, prints the new cluster + kubo peer ids for phase-1-master-trust.sh + the pool-server. Dry-run + halts without PUBLIC_HOST. Tests: tests/test-phase-1-setup-writer.sh (dry-run: input validation, ip4/dns4 announce, secret derivation, zero side effects - 7/7). Part of #72. Co-Authored-By: Claude Opus 4.8 --- tests/test-phase-1-setup-writer.sh | 31 ++++ update-scripts/phase-1-setup-writer.sh | 211 +++++++++++++++++++++++++ 2 files changed, 242 insertions(+) create mode 100755 tests/test-phase-1-setup-writer.sh create mode 100755 update-scripts/phase-1-setup-writer.sh diff --git a/tests/test-phase-1-setup-writer.sh b/tests/test-phase-1-setup-writer.sh new file mode 100755 index 00000000..596be12b --- /dev/null +++ b/tests/test-phase-1-setup-writer.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +# Dry-run tests for update-scripts/phase-1-setup-writer.sh — no docker/root/network +# (DRY_RUN=1 + master info supplied so it never curls). Verifies input validation, +# announce-protocol detection (ip4 vs dns4), secret derivation, and zero side effects. +set -uo pipefail + +HERE="$(cd "$(dirname "$0")" && pwd)" +SCRIPT="$HERE/../update-scripts/phase-1-setup-writer.sh" +[ -f "$SCRIPT" ] || { echo "FAIL: not found $SCRIPT"; exit 1; } + +TMP="$(mktemp -d)"; trap 'rm -rf "$TMP"' EXIT +M="12D3KooWMasterAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" +BS="/dns4/1.pools.functionyard.fula.network/tcp/9096/p2p/$M" +fail=0; pass(){ echo "ok - $1"; }; bad(){ echo "FAIL - $1"; fail=1; } + +# 1) dry-run, IPv4 host -> exit 0, ip4 announce, no side effects +out="$(DRY_RUN=1 PUBLIC_HOST=1.2.3.4 MASTER_CLUSTER_PEERID="$M" MASTER_CLUSTER_BOOTSTRAP="$BS" BASE_DIR="$TMP/w" bash "$SCRIPT" 2>&1)"; rc=$? +[ "$rc" = 0 ] && pass "dry-run exits 0" || bad "dry-run exits 0 (rc=$rc)" +printf '%s' "$out" | grep -q "/ip4/1.2.3.4" && pass "ipv4 announce" || bad "ipv4 announce" +printf '%s' "$out" | grep -q "no changes made" && pass "declares no changes" || bad "declares no changes" +printf '%s' "$out" | grep -q "sha256" && pass "derives secret" || bad "derives secret" +[ ! -d "$TMP/w" ] && pass "no dirs created in dry-run" || bad "no dirs created in dry-run" + +# 2) dry-run, DNS host -> dns4 announce +out2="$(DRY_RUN=1 PUBLIC_HOST=writer.example.com MASTER_CLUSTER_PEERID="$M" MASTER_CLUSTER_BOOTSTRAP="$BS" BASE_DIR="$TMP/w2" bash "$SCRIPT" 2>&1)" +printf '%s' "$out2" | grep -q "/dns4/writer.example.com" && pass "dns4 announce" || bad "dns4 announce" + +# 3) halts without PUBLIC_HOST +if DRY_RUN=1 MASTER_CLUSTER_PEERID="$M" MASTER_CLUSTER_BOOTSTRAP="$BS" bash "$SCRIPT" >/dev/null 2>&1; then bad "halts without PUBLIC_HOST"; else pass "halts without PUBLIC_HOST"; fi + +[ "$fail" = 0 ] && { echo "ALL PASS"; exit 0; } || { echo "FAILURES"; exit 1; } diff --git a/update-scripts/phase-1-setup-writer.sh b/update-scripts/phase-1-setup-writer.sh new file mode 100755 index 00000000..e05664bd --- /dev/null +++ b/update-scripts/phase-1-setup-writer.sh @@ -0,0 +1,211 @@ +#!/usr/bin/env bash +# +# Phase 1 (cluster write-federation) — provision a 2nd trusted cluster WRITER on a +# FRESH, plain Ubuntu/Debian cloud box (no Fula /uniondrive layout, no kubo, no cluster). +# +# It installs kubo (ipfs_host) + ipfs-cluster (ipfs_cluster) as systemd units under +# self-contained paths (/opt/fula-writer), mirroring the master's cluster env exactly +# (same CLUSTER_SECRET=sha256(CLUSTERNAME), CLUSTERNAME, allocator, replication, and +# FOLLOWERMODE=false so it is a WRITER), joins the existing CRDT cluster by directly +# bootstrapping to the master (public->public, no relay tunnel needed), and prints the +# new writer's cluster + kubo peer ids. +# +# It does NOT touch the master. After it runs, on the MASTER: +# NEW_WRITER_PEERID= ./phase-1-master-trust.sh +# and add that id to IPFS_CLUSTER_TRUSTED_PEERS on the pool-server (join-server) so +# followers trust it too (join-server#2). +# +# The new writer stores ~nothing: it mirrors the master's allocator (tag:group,...), +# which keeps non-storage writers from being allocated pins — so kubo runs with the +# default datastore (no need to mirror the master's custom flatfs+pebble 900GB spec). +# +# REQUIRED env (HALTS if missing — never guesses): +# PUBLIC_HOST this box's PUBLIC ip or dns (kubo announce + cluster reachability) +# Optional env: +# CLUSTERNAME (default "1") -> CLUSTER_SECRET = sha256(CLUSTERNAME) +# POOL_API (default https://pools.fx.land/pools/) +# MASTER_CLUSTER_PEERID / MASTER_CLUSTER_BOOTSTRAP / MASTER_KUBO_PEERID +# (auto-read from POOL_API if unset) +# REPL_MIN / REPL_MAX (default 2 / 6) +# BASE_DIR (default /opt/fula-writer) +# KUBO_IMAGE (default ipfs/kubo:release) +# CLUSTER_IMAGE (default ipfs/ipfs-cluster:stable) +# DRY_RUN=1 print the plan; change nothing +# +set -euo pipefail + +CLUSTERNAME="${CLUSTERNAME:-1}" +POOL_API="${POOL_API:-https://pools.fx.land/pools/${CLUSTERNAME}}" +BASE_DIR="${BASE_DIR:-/opt/fula-writer}" +KUBO_IMAGE="${KUBO_IMAGE:-ipfs/kubo:release}" +CLUSTER_IMAGE="${CLUSTER_IMAGE:-ipfs/ipfs-cluster:stable}" +REPL_MIN="${REPL_MIN:-2}" +REPL_MAX="${REPL_MAX:-6}" +DRY_RUN="${DRY_RUN:-0}" +PUBLIC_HOST="${PUBLIC_HOST:-}" +MASTER_CLUSTER_PEERID="${MASTER_CLUSTER_PEERID:-}" +MASTER_CLUSTER_BOOTSTRAP="${MASTER_CLUSTER_BOOTSTRAP:-}" +MASTER_KUBO_PEERID="${MASTER_KUBO_PEERID:-}" + +KUBO_DIR="$BASE_DIR/kubo" +CLUSTER_DIR="$BASE_DIR/ipfs-cluster" + +die() { echo "ERROR: $*" >&2; exit 1; } +info() { echo "[phase-1-setup-writer] $*"; } + +# ---- preconditions ------------------------------------------------------------- +[ -n "$PUBLIC_HOST" ] || die "PUBLIC_HOST is required (this box's public IP or DNS). Refusing to guess." +if [ "$DRY_RUN" != "1" ]; then + [ "$(id -u)" = "0" ] || die "Must run as root (installs packages, writes systemd units)." +fi + +ensure_pkg() { + command -v "$1" >/dev/null 2>&1 && return 0 + [ "$DRY_RUN" = "1" ] && { info "(dry-run) would install $1"; return 0; } + command -v apt-get >/dev/null 2>&1 || die "$1 missing and apt-get not found — install $1 manually (this script targets Debian/Ubuntu)." + info "Installing $1 ..." + apt-get update -y >/dev/null 2>&1 || true + apt-get install -y "$1" >/dev/null 2>&1 || die "failed to install $1" +} +ensure_pkg curl +ensure_pkg jq +if ! command -v docker >/dev/null 2>&1; then + if [ "$DRY_RUN" = "1" ]; then info "(dry-run) would install Docker via get.docker.com"; else + info "Installing Docker ..." + curl -fsSL https://get.docker.com | sh || die "Docker install failed" + systemctl enable --now docker || die "could not start docker" + fi +fi + +# ---- derive secret + resolve master info -------------------------------------- +SECRET="$(printf '%s' "$CLUSTERNAME" | sha256sum | cut -d' ' -f1)" + +resolve_master() { + [ -n "$MASTER_CLUSTER_PEERID" ] && [ -n "$MASTER_CLUSTER_BOOTSTRAP" ] && return 0 + info "Reading master identity from $POOL_API ..." + local resp; resp="$(curl -s --max-time 20 "$POOL_API" || true)" + echo "$resp" | jq -e . >/dev/null 2>&1 || die "could not fetch/parse $POOL_API (set MASTER_CLUSTER_PEERID + MASTER_CLUSTER_BOOTSTRAP manually)." + [ -n "$MASTER_CLUSTER_PEERID" ] || MASTER_CLUSTER_PEERID="$(echo "$resp" | jq -r '."ipfs-cluster-peerid" // empty')" + [ -n "$MASTER_KUBO_PEERID" ] || MASTER_KUBO_PEERID="$(echo "$resp" | jq -r '."kubo-peerid" // empty')" + [ -n "$MASTER_CLUSTER_BOOTSTRAP" ] || MASTER_CLUSTER_BOOTSTRAP="$(echo "$resp" | jq -r '(.ipfs_cluster.addresses // [])[] | select(test("/tcp/"))' | head -1)" + [ -n "$MASTER_CLUSTER_BOOTSTRAP" ] || MASTER_CLUSTER_BOOTSTRAP="$(echo "$resp" | jq -r '(.ipfs_cluster.addresses // [])[0] // empty')" +} +resolve_master +[ -n "$MASTER_CLUSTER_PEERID" ] || die "could not resolve MASTER_CLUSTER_PEERID." +[ -n "$MASTER_CLUSTER_BOOTSTRAP" ] || die "could not resolve MASTER_CLUSTER_BOOTSTRAP (master cluster multiaddr)." + +# announce protocol: /ip4 for an IPv4 literal, else /dns4 +if printf '%s' "$PUBLIC_HOST" | grep -Eq '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$'; then PROTO=ip4; else PROTO=dns4; fi + +cat < ${SECRET:0:12}...) + PUBLIC_HOST = $PUBLIC_HOST (announce as /$PROTO/$PUBLIC_HOST) + master cluster peer = $MASTER_CLUSTER_PEERID + master bootstrap addr = $MASTER_CLUSTER_BOOTSTRAP + master kubo peer = ${MASTER_KUBO_PEERID:-} + base dir = $BASE_DIR (kubo: $KUBO_DIR, cluster: $CLUSTER_DIR) + replication = $REPL_MIN..$REPL_MAX ; FOLLOWERMODE=false (writer) +EOF +[ "$DRY_RUN" = "1" ] && { info "DRY_RUN=1 — no changes made."; exit 0; } + +mkdir -p "$KUBO_DIR" "$CLUSTER_DIR" + +# ---- kubo: init (default datastore, server profile) + announce ----------------- +kubo_oneshot() { docker run --rm -e IPFS_PATH=/data/ipfs -v "$KUBO_DIR":/data/ipfs "$KUBO_IMAGE" "$@"; } +if [ ! -f "$KUBO_DIR/config" ]; then + info "Initializing kubo repo (server profile) ..." + kubo_oneshot init --profile=server >/dev/null +fi +kubo_oneshot config --json Addresses.Announce "[\"/$PROTO/$PUBLIC_HOST/tcp/4001\",\"/$PROTO/$PUBLIC_HOST/udp/4001/quic-v1\"]" >/dev/null +kubo_oneshot config Routing.Type dhtserver >/dev/null +kubo_oneshot config --json Routing.AcceleratedDHTClient true >/dev/null +NEW_KUBO_PEERID="$(kubo_oneshot config Identity.PeerID)" +[ -n "$NEW_KUBO_PEERID" ] || die "could not read new kubo peer id." + +cat > /etc/systemd/system/ipfs.service </dev/null 2>&1 && break + [ "$i" = 30 ] && die "kubo did not become healthy on :5001" + sleep 3 +done +info "kubo healthy (peer $NEW_KUBO_PEERID)" + +# ---- ipfs-cluster: init (read identity) + systemd unit + join ----------------- +cl_oneshot() { docker run --rm -e IPFS_CLUSTER_PATH=/data/ipfs-cluster -e CLUSTER_SECRET="$SECRET" -v "$CLUSTER_DIR":/data/ipfs-cluster --entrypoint ipfs-cluster-service "$CLUSTER_IMAGE" "$@"; } +if [ ! -f "$CLUSTER_DIR/identity.json" ]; then + info "Initializing ipfs-cluster ..." + cl_oneshot init >/dev/null 2>&1 || cl_oneshot init >/dev/null +fi +NEW_CLUSTER_PEERID="$(jq -r '.id' "$CLUSTER_DIR/identity.json")" +[ -n "$NEW_CLUSTER_PEERID" ] && [ "$NEW_CLUSTER_PEERID" != "null" ] || die "could not read new cluster peer id." +# persistent connectivity to the master +echo "$MASTER_CLUSTER_BOOTSTRAP" > "$CLUSTER_DIR/peerstore" + +TRUSTED="$MASTER_CLUSTER_PEERID,$NEW_CLUSTER_PEERID" +cat > /etc/systemd/system/ipfscluster.service </dev/null 2>&1 && info "cluster API up" || info "NOTE: cluster API not responding yet; check: docker logs ipfs_cluster" + +cat < Date: Tue, 2 Jun 2026 01:41:26 -0400 Subject: [PATCH 3/4] refactor(cluster): harden Phase 1 scripts - idempotent + interactive + .env Add update-scripts/lib/phase-common.sh - shared helpers for re-runnable phase scripts: pc_load_env/pc_save_env (persist inputs; a CLI/env value wins over a saved one), pc_prompt (interactive prompt showing the saved value as default, Enter keeps it; non-interactive uses env/.env or halts - never guesses), pc_write_if_changed (rewrite + restart only when the unit actually changed, with backup), detection helpers. Refactor phase-1-setup-writer.sh + phase-1-master-trust.sh onto the lib: detect what is already installed and skip/reuse it (Docker, kubo repo, cluster identity), rewrite systemd units only when changed, prompt for params and remember them in ENV_FILE so a re-run just updates what is needed. Tests (all pass under WSL bash): test-phase-common 10/10, test-phase-1-setup-writer 9/9, test-phase-1-master-trust 7/7 - incl re-run-reuses-saved-value, non-interactive halt, and a fixed set -u unbound-variable bug in a combined local declaration. Part of #72. Co-Authored-By: Claude Opus 4.8 --- tests/test-phase-1-master-trust.sh | 47 +++--- tests/test-phase-1-setup-writer.sh | 35 ++-- tests/test-phase-common.sh | 47 ++++++ update-scripts/lib/phase-common.sh | 89 ++++++++++ update-scripts/phase-1-master-trust.sh | 128 ++++---------- update-scripts/phase-1-setup-writer.sh | 222 ++++++++++--------------- 6 files changed, 300 insertions(+), 268 deletions(-) create mode 100755 tests/test-phase-common.sh create mode 100755 update-scripts/lib/phase-common.sh diff --git a/tests/test-phase-1-master-trust.sh b/tests/test-phase-1-master-trust.sh index 5f09feb7..24d08ffa 100755 --- a/tests/test-phase-1-master-trust.sh +++ b/tests/test-phase-1-master-trust.sh @@ -1,44 +1,39 @@ #!/usr/bin/env bash -# Test update-scripts/phase-1-master-trust.sh against a fixture systemd unit -# (no systemctl/docker — uses NO_RESTART=1). Verifies additive append to BOTH the -# Environment= line and the ExecStart -e flag, idempotency, and halt-without-input. -set -euo pipefail - +# Tests phase-1-master-trust.sh against a fixture systemd unit (NO_RESTART=1, no docker). +# Verifies additive append to both lines, backup, idempotency, halt/validation, and +# re-run-reuses-saved-peer-id. ENV_FILE points at temp files. +set -uo pipefail HERE="$(cd "$(dirname "$0")" && pwd)" SCRIPT="$HERE/../update-scripts/phase-1-master-trust.sh" -[ -f "$SCRIPT" ] || { echo "FAIL: script not found at $SCRIPT"; exit 1; } - -TMP="$(mktemp -d)" -trap 'rm -rf "$TMP"' EXIT +[ -f "$SCRIPT" ] || { echo "FAIL: not found $SCRIPT"; exit 1; } +TMP="$(mktemp -d)"; trap 'rm -rf "$TMP"' EXIT UNIT="$TMP/ipfscluster.service" MASTER="12D3KooWS79EhkPU7ESUwgG4vyHHzW9FDNZLoWVth9b5N5NSrvaj" NEW="12D3KooWNEWwriter00000000000000000000000000000000000001" - -cat > "$UNIT" < "$UNIT" </dev/null -grep -q "Environment=\"CLUSTER_CRDT_TRUSTEDPEERS=$MASTER,$NEW\"" "$UNIT" && pass "Environment= line appended" || bad "Environment= line appended" +fresh_unit +NEW_WRITER_PEERID="$NEW" NO_RESTART=1 UNIT_PATH="$UNIT" ENV_FILE="$TMP/a.env" bash "$SCRIPT" >/dev/null +grep -q "Environment=\"CLUSTER_CRDT_TRUSTEDPEERS=$MASTER,$NEW\"" "$UNIT" && pass "Environment= appended" || bad "Environment= appended" grep -q -- "-e CLUSTER_CRDT_TRUSTEDPEERS=$MASTER,$NEW " "$UNIT" && pass "ExecStart -e appended" || bad "ExecStart -e appended" ls "$UNIT".bak.* >/dev/null 2>&1 && pass "backup created" || bad "backup created" -# 2) idempotent: second run is a no-op, value still exactly MASTER,NEW (2 occurrences) -NEW_WRITER_PEERID="$NEW" NO_RESTART=1 UNIT_PATH="$UNIT" bash "$SCRIPT" >/dev/null +NEW_WRITER_PEERID="$NEW" NO_RESTART=1 UNIT_PATH="$UNIT" ENV_FILE="$TMP/a.env" bash "$SCRIPT" >/dev/null occ="$(grep -c "CLUSTER_CRDT_TRUSTEDPEERS=$MASTER,$NEW" "$UNIT" || true)" -[ "$occ" = "2" ] && pass "idempotent (no double append)" || bad "idempotent (got $occ occurrences, want 2)" +[ "$occ" = 2 ] && pass "idempotent (no double append)" || bad "idempotent (got $occ, want 2)" -# 3) halts when NEW_WRITER_PEERID is missing -if NO_RESTART=1 UNIT_PATH="$UNIT" bash "$SCRIPT" >/dev/null 2>&1; then bad "halts without NEW_WRITER_PEERID"; else pass "halts without NEW_WRITER_PEERID"; fi +if NO_RESTART=1 UNIT_PATH="$UNIT" ENV_FILE="$TMP/halt.env" bash "$SCRIPT" >/dev/null 2>&1; then bad "halts without peer id"; else pass "halts without peer id"; fi +if NEW_WRITER_PEERID="not-a-peer" NO_RESTART=1 UNIT_PATH="$UNIT" ENV_FILE="$TMP/bad.env" bash "$SCRIPT" >/dev/null 2>&1; then bad "rejects bad peer id"; else pass "rejects bad peer id"; fi -# 4) rejects a non-peer-id value -if NEW_WRITER_PEERID="not-a-peer" NO_RESTART=1 UNIT_PATH="$UNIT" bash "$SCRIPT" >/dev/null 2>&1; then bad "rejects bad peer id"; else pass "rejects bad peer id"; fi +# re-run with NO peer id supplied -> reuses the saved one from .env (else it would halt) +fresh_unit +NEW_WRITER_PEERID="$NEW" NO_RESTART=1 UNIT_PATH="$UNIT" ENV_FILE="$TMP/s.env" bash "$SCRIPT" >/dev/null +if NO_RESTART=1 UNIT_PATH="$UNIT" ENV_FILE="$TMP/s.env" bash "$SCRIPT" >/dev/null 2>&1 && grep -q "CLUSTER_CRDT_TRUSTEDPEERS=$MASTER,$NEW" "$UNIT"; then pass "re-run reuses saved peer id"; else bad "re-run reuses saved peer id"; fi -[ "$fail" = "0" ] && { echo "ALL PASS"; exit 0; } || { echo "FAILURES"; exit 1; } +[ "$fail" = 0 ] && { echo "ALL PASS"; exit 0; } || { echo "FAILURES"; exit 1; } diff --git a/tests/test-phase-1-setup-writer.sh b/tests/test-phase-1-setup-writer.sh index 596be12b..8548b44e 100755 --- a/tests/test-phase-1-setup-writer.sh +++ b/tests/test-phase-1-setup-writer.sh @@ -1,31 +1,34 @@ #!/usr/bin/env bash -# Dry-run tests for update-scripts/phase-1-setup-writer.sh — no docker/root/network -# (DRY_RUN=1 + master info supplied so it never curls). Verifies input validation, -# announce-protocol detection (ip4 vs dns4), secret derivation, and zero side effects. +# Dry-run tests for phase-1-setup-writer.sh (no docker/root/network: DRY_RUN=1 + master +# info supplied). Verifies validation, ip4/dns4 detection, secret, zero side effects, +# .env persistence, and re-run-reuses-saved-value. ENV_FILE points at temp files. set -uo pipefail - HERE="$(cd "$(dirname "$0")" && pwd)" SCRIPT="$HERE/../update-scripts/phase-1-setup-writer.sh" [ -f "$SCRIPT" ] || { echo "FAIL: not found $SCRIPT"; exit 1; } - TMP="$(mktemp -d)"; trap 'rm -rf "$TMP"' EXIT M="12D3KooWMasterAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" BS="/dns4/1.pools.functionyard.fula.network/tcp/9096/p2p/$M" fail=0; pass(){ echo "ok - $1"; }; bad(){ echo "FAIL - $1"; fail=1; } -# 1) dry-run, IPv4 host -> exit 0, ip4 announce, no side effects -out="$(DRY_RUN=1 PUBLIC_HOST=1.2.3.4 MASTER_CLUSTER_PEERID="$M" MASTER_CLUSTER_BOOTSTRAP="$BS" BASE_DIR="$TMP/w" bash "$SCRIPT" 2>&1)"; rc=$? +run() { DRY_RUN=1 ENV_FILE="$1" PUBLIC_HOST="$2" BASE_DIR="$3" MASTER_CLUSTER_PEERID="$M" MASTER_CLUSTER_BOOTSTRAP="$BS" bash "$SCRIPT" 2>&1; } + +out="$(run "$TMP/a.env" 1.2.3.4 "$TMP/w")"; rc=$? [ "$rc" = 0 ] && pass "dry-run exits 0" || bad "dry-run exits 0 (rc=$rc)" -printf '%s' "$out" | grep -q "/ip4/1.2.3.4" && pass "ipv4 announce" || bad "ipv4 announce" -printf '%s' "$out" | grep -q "no changes made" && pass "declares no changes" || bad "declares no changes" -printf '%s' "$out" | grep -q "sha256" && pass "derives secret" || bad "derives secret" -[ ! -d "$TMP/w" ] && pass "no dirs created in dry-run" || bad "no dirs created in dry-run" +echo "$out" | grep -q "/ip4" && pass "ipv4 announce" || bad "ipv4 announce" +echo "$out" | grep -q "no system changes" && pass "declares no changes" || bad "declares no changes" +echo "$out" | grep -q "secret=" && pass "derives secret" || bad "derives secret" +[ ! -d "$TMP/w" ] && pass "no base dir created in dry-run" || bad "no base dir created in dry-run" +[ -f "$TMP/a.env" ] && pass "saves params to .env" || bad "saves params to .env" + +out2="$(run "$TMP/b.env" writer.example.com "$TMP/w2")" +echo "$out2" | grep -q "/dns4" && pass "dns4 announce" || bad "dns4 announce" -# 2) dry-run, DNS host -> dns4 announce -out2="$(DRY_RUN=1 PUBLIC_HOST=writer.example.com MASTER_CLUSTER_PEERID="$M" MASTER_CLUSTER_BOOTSTRAP="$BS" BASE_DIR="$TMP/w2" bash "$SCRIPT" 2>&1)" -printf '%s' "$out2" | grep -q "/dns4/writer.example.com" && pass "dns4 announce" || bad "dns4 announce" +# re-run with NO PUBLIC_HOST supplied -> reuses the saved value from a.env +out3="$(DRY_RUN=1 ENV_FILE="$TMP/a.env" MASTER_CLUSTER_PEERID="$M" MASTER_CLUSTER_BOOTSTRAP="$BS" bash "$SCRIPT" 2>&1)"; rc3=$? +{ [ "$rc3" = 0 ] && echo "$out3" | grep -q "1.2.3.4"; } && pass "re-run reuses saved PUBLIC_HOST" || bad "re-run reuses saved PUBLIC_HOST" -# 3) halts without PUBLIC_HOST -if DRY_RUN=1 MASTER_CLUSTER_PEERID="$M" MASTER_CLUSTER_BOOTSTRAP="$BS" bash "$SCRIPT" >/dev/null 2>&1; then bad "halts without PUBLIC_HOST"; else pass "halts without PUBLIC_HOST"; fi +# halts without PUBLIC_HOST (fresh env, nothing supplied) +if DRY_RUN=1 ENV_FILE="$TMP/halt.env" MASTER_CLUSTER_PEERID="$M" MASTER_CLUSTER_BOOTSTRAP="$BS" bash "$SCRIPT" >/dev/null 2>&1; then bad "halts without PUBLIC_HOST"; else pass "halts without PUBLIC_HOST"; fi [ "$fail" = 0 ] && { echo "ALL PASS"; exit 0; } || { echo "FAILURES"; exit 1; } diff --git a/tests/test-phase-common.sh b/tests/test-phase-common.sh new file mode 100755 index 00000000..9f11f247 --- /dev/null +++ b/tests/test-phase-common.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash +# Tests the shared phase-common helpers: env save/load (+ precedence), interactive +# prompt-with-default (forced via PC_FORCE_INTERACTIVE + piped stdin), non-interactive +# required/validation, and write-if-changed idempotency + backup. +set -uo pipefail +HERE="$(cd "$(dirname "$0")" && pwd)" +LIB="$HERE/../update-scripts/lib/phase-common.sh" +[ -f "$LIB" ] || { echo "FAIL: lib not found $LIB"; exit 1; } +TMP="$(mktemp -d)"; trap 'rm -rf "$TMP"' EXIT +fail=0; pass(){ echo "ok - $1"; }; bad(){ echo "FAIL - $1"; fail=1; } + +# shellcheck disable=SC1090 +. "$LIB" # note: die() exits, so negative cases run in subshells + +# save/load round-trip +FOO=hello; BAR=world +pc_save_env "$TMP/p.env" FOO BAR >/dev/null +unset FOO BAR +pc_load_env "$TMP/p.env" >/dev/null +{ [ "${FOO:-}" = hello ] && [ "${BAR:-}" = world ]; } && pass "save/load round-trip" || bad "save/load round-trip" + +# CLI/env value beats a saved .env value +FOO=cli; pc_load_env "$TMP/p.env" >/dev/null +[ "$FOO" = cli ] && pass "env beats saved .env" || bad "env beats saved .env (got $FOO)" + +# non-interactive: missing required -> die +( unset BAZ; PC_FORCE_INTERACTIVE=0; pc_prompt BAZ "Baz" >/dev/null 2>&1 ) && bad "noninteractive missing -> die" || pass "noninteractive missing -> die" +# non-interactive: present + valid -> kept +( QUX=12D3KooWabc; PC_FORCE_INTERACTIVE=0; pc_prompt QUX "Qux" '^12D3KooW' >/dev/null 2>&1 && [ "$QUX" = 12D3KooWabc ] ) && pass "noninteractive valid kept" || bad "noninteractive valid kept" +# non-interactive: present + invalid -> die +( BADV=nope; PC_FORCE_INTERACTIVE=0; pc_prompt BADV "Badv" '^12D3KooW' >/dev/null 2>&1 ) && bad "noninteractive invalid -> die" || pass "noninteractive invalid -> die" + +# interactive (forced) empty input keeps current default +out="$(printf '\n' | PC_FORCE_INTERACTIVE=1 bash -c '. "'"$LIB"'"; CUR=keepme; pc_prompt CUR "Cur"; echo "VAL=$CUR"' 2>/dev/null)" +echo "$out" | grep -q "VAL=keepme" && pass "interactive empty keeps default" || bad "interactive empty keeps default ($out)" +# interactive new value overrides +out2="$(printf 'newval\n' | PC_FORCE_INTERACTIVE=1 bash -c '. "'"$LIB"'"; CUR=old; pc_prompt CUR "Cur"; echo "VAL=$CUR"' 2>/dev/null)" +echo "$out2" | grep -q "VAL=newval" && pass "interactive new value used" || bad "interactive new value used ($out2)" + +# write-if-changed: changed -> unchanged -> changed(+backup) +f="$TMP/u.conf" +[ "$(printf 'A\n' | pc_write_if_changed "$f")" = changed ] && pass "write: first=changed" || bad "write: first=changed" +[ "$(printf 'A\n' | pc_write_if_changed "$f")" = unchanged ] && pass "write: same=unchanged" || bad "write: same=unchanged" +r3="$(printf 'B\n' | pc_write_if_changed "$f")" +{ [ "$r3" = changed ] && ls "$f".bak.* >/dev/null 2>&1; } && pass "write: diff=changed+backup" || bad "write: diff=changed+backup ($r3)" + +[ "$fail" = 0 ] && { echo "ALL PASS"; exit 0; } || { echo "FAILURES"; exit 1; } diff --git a/update-scripts/lib/phase-common.sh b/update-scripts/lib/phase-common.sh new file mode 100755 index 00000000..c6350399 --- /dev/null +++ b/update-scripts/lib/phase-common.sh @@ -0,0 +1,89 @@ +#!/usr/bin/env bash +# Shared helpers for fula phase install/update scripts — idempotent + re-runnable. +# +# Source it from a phase script: +# SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"; . "$SCRIPT_DIR/lib/phase-common.sh" +# +# Behaviour it gives every phase script: +# - pc_load_env "$ENV_FILE" : prior saved params become defaults (a CLI/env value +# always wins over a saved one). +# - pc_prompt VAR "Label" [regex] [secret] : asks INTERACTIVELY (a TTY, or +# PC_FORCE_INTERACTIVE=1) showing the current/saved value +# as the default; pressing Enter keeps it. NON-interactive +# (no TTY, e.g. CI/cron): uses the env/.env value, or dies +# if a required value is missing (never guesses). +# - pc_save_env "$ENV_FILE" VAR... : persist chosen params for the next run. +# - pc_write_if_changed PATH : write stdin to PATH only if different (backs up first), +# so re-runs don't needlessly restart services. +# - pc_have / detection helpers : skip work that's already done; never panic. + +die() { echo "ERROR: $*" >&2; exit 1; } +info() { echo "[${PC_TAG:-phase}] $*"; } +pc_have() { command -v "$1" >/dev/null 2>&1; } +pc_is_interactive() { [ -t 0 ] || [ "${PC_FORCE_INTERACTIVE:-0}" = "1" ]; } + +pc_load_env() { + local f="${1:-}"; [ -n "$f" ] && [ -f "$f" ] || return 0 + local k v + while IFS='=' read -r k v; do + case "$k" in ''|\#*) continue ;; esac + v="${v%\"}"; v="${v#\"}" + # only fill if not already set in the environment — a CLI/env value wins + if [ -z "${!k:-}" ]; then printf -v "$k" '%s' "$v"; export "$k"; fi + done < "$f" + info "loaded saved params from $f" +} + +pc_save_env() { + local f="${1:-}"; shift || true + [ -n "$f" ] || return 0 + mkdir -p "$(dirname "$f")" + local tmp="${f}.tmp.$$" v + { + echo "# fula phase params — auto-saved; safe to edit. Re-running reuses these as defaults." + for v in "$@"; do printf '%s=%s\n' "$v" "${!v:-}"; done + } > "$tmp" + chmod 600 "$tmp" 2>/dev/null || true + mv "$tmp" "$f" + info "saved params to $f" +} + +# pc_prompt VAR "Label" [validation-regex] [secret] +pc_prompt() { + local var="$1" label="$2" regex="${3:-}" secret="${4:-}" + local cur input val + cur="${!var:-}"; val="$cur"; input="" + if pc_is_interactive; then + while :; do + input="" + if [ -n "$secret" ]; then + printf '%s%s: ' "$label" "${cur:+ [keep current]}" >&2; read -r -s input || true; echo >&2 + else + printf '%s%s: ' "$label" "${cur:+ [$cur]}" >&2; read -r input || true + fi + [ -z "$input" ] && input="$cur" + if [ -z "$input" ]; then echo " required — please enter a value" >&2; continue; fi + if [ -n "$regex" ] && ! [[ "$input" =~ $regex ]]; then echo " invalid (expected: $regex)" >&2; continue; fi + val="$input"; break + done + else + [ -n "$val" ] || die "$var is required — set it as an env var or run interactively (refusing to guess)." + if [ -n "$regex" ] && ! [[ "$val" =~ $regex ]]; then die "$var='$val' is invalid (expected: $regex)."; fi + fi + printf -v "$var" '%s' "$val"; export "$var" +} + +# pc_write_if_changed PATH (new content on stdin) -> echoes "changed" | "unchanged" +pc_write_if_changed() { + local path="$1" tmp; tmp="$(mktemp)" + cat > "$tmp" + if [ -f "$path" ] && cmp -s "$tmp" "$path"; then rm -f "$tmp"; echo "unchanged"; return 0; fi + [ -f "$path" ] && cp -a "$path" "${path}.bak.$(date +%s)" + mkdir -p "$(dirname "$path")" + mv "$tmp" "$path" + echo "changed" +} + +pc_backup() { [ -f "$1" ] && cp -a "$1" "$1.bak.$(date +%s)" && info "backed up $1"; return 0; } +pc_container_exists() { grep -qx "$1" <<<"$(docker ps -a --format '{{.Names}}' 2>/dev/null)"; } +pc_service_active() { systemctl is-active --quiet "$1" 2>/dev/null; } diff --git a/update-scripts/phase-1-master-trust.sh b/update-scripts/phase-1-master-trust.sh index a96d479f..8b06a2b0 100755 --- a/update-scripts/phase-1-master-trust.sh +++ b/update-scripts/phase-1-master-trust.sh @@ -1,118 +1,62 @@ #!/usr/bin/env bash # -# Phase 1 (cluster write-federation) — trust a 2nd cluster WRITER on the master. +# Phase 1 — trust a 2nd cluster WRITER on the master. Idempotent + re-runnable. # # Appends NEW_WRITER_PEERID to CLUSTER_CRDT_TRUSTEDPEERS in the master's systemd unit -# (both the Environment= line AND the ExecStart `-e` flag), backs up the unit, reloads -# systemd, restarts the cluster, and verifies it came back up. +# (both the Environment= line AND the ExecStart `-e` flag), backs up, reloads, restarts, +# verifies. SAFE: additive only — the cluster datastore/identity/pinset are never touched. +# Run interactively and it asks for the peer id (saved for next time); non-interactive +# uses NEW_WRITER_PEERID from env/.env or halts. # -# SAFE: additive only — it just appends a trusted peer id. The cluster datastore -# (/uniondrive/ipfs-cluster/pebble) and identity are never touched, so the existing -# pinset is preserved. A timestamped backup is written and the rollback command printed. -# This is a SERVER-side script (the master is systemd-managed, NOT part of the OTA fleet). -# -# Usage (on the MASTER, as root): -# NEW_WRITER_PEERID=12D3KooW... ./phase-1-master-trust.sh # apply -# NEW_WRITER_PEERID=12D3KooW... DRY_RUN=1 ./phase-1-master-trust.sh # show plan only -# -# Env overrides: -# UNIT_PATH (default /etc/systemd/system/ipfscluster.service) -# SERVICE_NAME (default ipfscluster) -# DRY_RUN=1 print the planned change; modify nothing -# NO_RESTART=1 edit + backup only; skip daemon-reload/restart/verify (used by tests) +# Env: UNIT_PATH (default /etc/systemd/system/ipfscluster.service), SERVICE_NAME (ipfscluster), +# ENV_FILE (default /etc/fula/phase-1-master-trust.env), DRY_RUN=1, NO_RESTART=1 (tests). # set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +# shellcheck source=lib/phase-common.sh +. "$SCRIPT_DIR/lib/phase-common.sh" +PC_TAG="phase-1-master-trust" UNIT_PATH="${UNIT_PATH:-/etc/systemd/system/ipfscluster.service}" SERVICE_NAME="${SERVICE_NAME:-ipfscluster}" -DRY_RUN="${DRY_RUN:-0}" -NO_RESTART="${NO_RESTART:-0}" +ENV_FILE="${ENV_FILE:-/etc/fula/phase-1-master-trust.env}" +DRY_RUN="${DRY_RUN:-0}"; NO_RESTART="${NO_RESTART:-0}" VAR="CLUSTER_CRDT_TRUSTEDPEERS" -die() { echo "ERROR: $*" >&2; exit 1; } -info() { echo "[phase-1-master-trust] $*"; } +pc_load_env "$ENV_FILE" +pc_prompt NEW_WRITER_PEERID "New writer cluster peer id (12D3KooW...)" '^(12D3KooW|Qm)' -# --- preconditions (halt rather than guess) --- -[ -n "${NEW_WRITER_PEERID:-}" ] || die "NEW_WRITER_PEERID is required (the new writer's CLUSTER peer id, e.g. 12D3KooW...). Refusing to guess." -case "$NEW_WRITER_PEERID" in - 12D3KooW*|Qm*) : ;; - *) die "NEW_WRITER_PEERID='$NEW_WRITER_PEERID' does not look like a libp2p peer id (expected 12D3KooW... or Qm...)." ;; -esac -[ -f "$UNIT_PATH" ] || die "Unit file not found: $UNIT_PATH (set UNIT_PATH=... if it lives elsewhere)." -if [ "$NO_RESTART" != "1" ] && [ "$DRY_RUN" != "1" ]; then - [ "$(id -u)" = "0" ] || die "Must run as root to edit $UNIT_PATH and restart the service." -fi +[ -f "$UNIT_PATH" ] || die "unit file not found: $UNIT_PATH (set UNIT_PATH=... if elsewhere)." +if [ "$NO_RESTART" != 1 ] && [ "$DRY_RUN" != 1 ]; then [ "$(id -u)" = 0 ] || die "must run as root to edit $UNIT_PATH and restart."; fi -# --- read current trusted-peers value --- CURRENT="$(grep -oE "${VAR}=[^\" ]+" "$UNIT_PATH" | head -1 | cut -d= -f2- || true)" -[ -n "$CURRENT" ] || die "Could not find ${VAR}= in $UNIT_PATH." -info "Current ${VAR} = ${CURRENT}" +[ -n "$CURRENT" ] || die "could not find ${VAR}= in $UNIT_PATH." +info "current ${VAR} = $CURRENT" -# --- idempotency: already trusted? --- case ",${CURRENT}," in - *",${NEW_WRITER_PEERID},"*) - info "Already trusted: ${NEW_WRITER_PEERID} is present — no change needed." - exit 0 - ;; + *",${NEW_WRITER_PEERID},"*) info "already trusted: ${NEW_WRITER_PEERID} — no change."; pc_save_env "$ENV_FILE" NEW_WRITER_PEERID; exit 0 ;; esac - NEWVAL="${CURRENT},${NEW_WRITER_PEERID}" -info "New ${VAR} = ${NEWVAL}" +info "new ${VAR} = $NEWVAL" +pc_save_env "$ENV_FILE" NEW_WRITER_PEERID -if [ "$DRY_RUN" = "1" ]; then - info "DRY_RUN=1 — would replace '${VAR}=${CURRENT}' with '${VAR}=${NEWVAL}' (Environment= line AND ExecStart -e). No changes made." - exit 0 -fi +if [ "$DRY_RUN" = 1 ]; then info "DRY_RUN=1 — would set ${VAR}=${NEWVAL} (Environment= + ExecStart -e). No changes."; exit 0; fi -# --- backup --- -BACKUP="${UNIT_PATH}.bak.$(date +%s)" -cp -a "$UNIT_PATH" "$BACKUP" -info "Backed up unit -> $BACKUP" - -# --- edit (replaces EVERY occurrence: Environment= and ExecStart -e share the same token) --- -# CURRENT/NEWVAL are peer-id comma lists (base58 alnum + comma): safe as sed text. +BACKUP="${UNIT_PATH}.bak.$(date +%s)"; cp -a "$UNIT_PATH" "$BACKUP"; info "backed up -> $BACKUP" sed -i "s|${VAR}=${CURRENT}|${VAR}=${NEWVAL}|g" "$UNIT_PATH" +grep -q -- "-e ${VAR}=${NEWVAL}" "$UNIT_PATH" || { cp -a "$BACKUP" "$UNIT_PATH"; die "ExecStart '-e ${VAR}' not updated; restored from $BACKUP."; } +info "updated occurrences: $(grep -c "${VAR}=${NEWVAL}" "$UNIT_PATH" || true) (expect 2)" -# --- verify the critical ExecStart -e was updated (that's what reaches the container) --- -if ! grep -q -- "-e ${VAR}=${NEWVAL}" "$UNIT_PATH"; then - cp -a "$BACKUP" "$UNIT_PATH" - die "ExecStart '-e ${VAR}' was not updated as expected; restored from backup ($BACKUP)." -fi -info "Updated occurrences: $(grep -c "${VAR}=${NEWVAL}" "$UNIT_PATH" || true) (expect 2: Environment= + ExecStart -e)." - -if [ "$NO_RESTART" = "1" ]; then - info "NO_RESTART=1 — unit edited + backed up; skipping daemon-reload/restart/verify." - exit 0 -fi - -# --- apply --- -info "Reloading systemd + restarting ${SERVICE_NAME} (brief cluster-API blip; datastore/pinset untouched)..." -systemctl daemon-reload -systemctl restart "${SERVICE_NAME}" -sleep 5 - -# --- verify service health --- -if systemctl is-active --quiet "${SERVICE_NAME}"; then - info "OK: ${SERVICE_NAME} is active." -else - echo "ERROR: ${SERVICE_NAME} is NOT active after restart. Roll back with:" >&2 - echo " cp -a '$BACKUP' '$UNIT_PATH' && systemctl daemon-reload && systemctl restart ${SERVICE_NAME}" >&2 - exit 1 -fi -if command -v docker >/dev/null 2>&1; then - sleep 3 - if docker exec ipfs_cluster ipfs-cluster-ctl id >/dev/null 2>&1; then - info "OK: cluster API responds." - else - info "NOTE: cluster API not responding yet (may still be starting). Re-check: docker exec ipfs_cluster ipfs-cluster-ctl id" - fi -fi +if [ "$NO_RESTART" = 1 ]; then info "NO_RESTART=1 — edited + backed up; skipping restart."; exit 0; fi +info "daemon-reload + restart $SERVICE_NAME (brief cluster-API blip; datastore/pinset untouched)" +systemctl daemon-reload; systemctl restart "$SERVICE_NAME"; sleep 5 +if systemctl is-active --quiet "$SERVICE_NAME"; then info "OK: $SERVICE_NAME active." +else echo "ROLL BACK: cp -a '$BACKUP' '$UNIT_PATH' && systemctl daemon-reload && systemctl restart $SERVICE_NAME" >&2; die "$SERVICE_NAME not active after restart."; fi +command -v docker >/dev/null 2>&1 && { sleep 3; docker exec ipfs_cluster ipfs-cluster-ctl id >/dev/null 2>&1 && info "cluster API responds" || info "NOTE: cluster API not up yet."; } cat <public, no relay tunnel needed), and prints the -# new writer's cluster + kubo peer ids. +# Idempotent + re-runnable: detects what is already installed and skips/reuses it +# (Docker, kubo repo, cluster identity), rewrites a systemd unit (and restarts) ONLY when +# it actually changed, and remembers your inputs in $ENV_FILE — so a re-run just updates +# what is needed. Run interactively and it asks for the parameters (pressing Enter keeps +# the saved value); run non-interactively (CI/cron) and it uses env/.env or halts. # -# It does NOT touch the master. After it runs, on the MASTER: -# NEW_WRITER_PEERID= ./phase-1-master-trust.sh -# and add that id to IPFS_CLUSTER_TRUSTED_PEERS on the pool-server (join-server) so -# followers trust it too (join-server#2). -# -# The new writer stores ~nothing: it mirrors the master's allocator (tag:group,...), -# which keeps non-storage writers from being allocated pins — so kubo runs with the -# default datastore (no need to mirror the master's custom flatfs+pebble 900GB spec). -# -# REQUIRED env (HALTS if missing — never guesses): -# PUBLIC_HOST this box's PUBLIC ip or dns (kubo announce + cluster reachability) -# Optional env: -# CLUSTERNAME (default "1") -> CLUSTER_SECRET = sha256(CLUSTERNAME) -# POOL_API (default https://pools.fx.land/pools/) -# MASTER_CLUSTER_PEERID / MASTER_CLUSTER_BOOTSTRAP / MASTER_KUBO_PEERID -# (auto-read from POOL_API if unset) -# REPL_MIN / REPL_MAX (default 2 / 6) -# BASE_DIR (default /opt/fula-writer) -# KUBO_IMAGE (default ipfs/kubo:release) -# CLUSTER_IMAGE (default ipfs/ipfs-cluster:stable) -# DRY_RUN=1 print the plan; change nothing +# Run on the NEW box (as root). Re-running is safe. See lib/phase-common.sh for prompt/env behaviour. # set -euo pipefail - -CLUSTERNAME="${CLUSTERNAME:-1}" -POOL_API="${POOL_API:-https://pools.fx.land/pools/${CLUSTERNAME}}" -BASE_DIR="${BASE_DIR:-/opt/fula-writer}" -KUBO_IMAGE="${KUBO_IMAGE:-ipfs/kubo:release}" -CLUSTER_IMAGE="${CLUSTER_IMAGE:-ipfs/ipfs-cluster:stable}" -REPL_MIN="${REPL_MIN:-2}" -REPL_MAX="${REPL_MAX:-6}" +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +# shellcheck source=lib/phase-common.sh +. "$SCRIPT_DIR/lib/phase-common.sh" +PC_TAG="phase-1-setup-writer" + +ENV_FILE="${ENV_FILE:-/opt/fula-writer/.env}" +pc_load_env "$ENV_FILE" + +: "${CLUSTERNAME:=1}" +: "${BASE_DIR:=/opt/fula-writer}" +: "${KUBO_IMAGE:=ipfs/kubo:release}" +: "${CLUSTER_IMAGE:=ipfs/ipfs-cluster:stable}" +: "${REPL_MIN:=2}"; : "${REPL_MAX:=6}" +: "${POOL_API:=}" +: "${MASTER_CLUSTER_PEERID:=}"; : "${MASTER_CLUSTER_BOOTSTRAP:=}"; : "${MASTER_KUBO_PEERID:=}" DRY_RUN="${DRY_RUN:-0}" -PUBLIC_HOST="${PUBLIC_HOST:-}" -MASTER_CLUSTER_PEERID="${MASTER_CLUSTER_PEERID:-}" -MASTER_CLUSTER_BOOTSTRAP="${MASTER_CLUSTER_BOOTSTRAP:-}" -MASTER_KUBO_PEERID="${MASTER_KUBO_PEERID:-}" - -KUBO_DIR="$BASE_DIR/kubo" -CLUSTER_DIR="$BASE_DIR/ipfs-cluster" - -die() { echo "ERROR: $*" >&2; exit 1; } -info() { echo "[phase-1-setup-writer] $*"; } - -# ---- preconditions ------------------------------------------------------------- -[ -n "$PUBLIC_HOST" ] || die "PUBLIC_HOST is required (this box's public IP or DNS). Refusing to guess." -if [ "$DRY_RUN" != "1" ]; then - [ "$(id -u)" = "0" ] || die "Must run as root (installs packages, writes systemd units)." -fi ensure_pkg() { - command -v "$1" >/dev/null 2>&1 && return 0 - [ "$DRY_RUN" = "1" ] && { info "(dry-run) would install $1"; return 0; } - command -v apt-get >/dev/null 2>&1 || die "$1 missing and apt-get not found — install $1 manually (this script targets Debian/Ubuntu)." - info "Installing $1 ..." - apt-get update -y >/dev/null 2>&1 || true + pc_have "$1" && { info "$1 present — skip"; return 0; } + [ "$DRY_RUN" = 1 ] && { info "(dry-run) would install $1"; return 0; } + pc_have apt-get || die "$1 missing and apt-get not found (Debian/Ubuntu only) — install $1 manually." + info "installing $1 ..."; apt-get update -y >/dev/null 2>&1 || true apt-get install -y "$1" >/dev/null 2>&1 || die "failed to install $1" } + +# ---- gather params (interactive prompts with saved defaults; else env/.env or halt) ---- +pc_prompt PUBLIC_HOST "Public IP or DNS of THIS writer box" +pc_prompt CLUSTERNAME "Cluster/pool name" '^[0-9A-Za-z._-]+$' +[ -n "$POOL_API" ] || POOL_API="https://pools.fx.land/pools/${CLUSTERNAME}" + +if [ "$DRY_RUN" != 1 ]; then [ "$(id -u)" = 0 ] || die "run as root (installs packages + writes systemd units)."; fi + ensure_pkg curl ensure_pkg jq -if ! command -v docker >/dev/null 2>&1; then - if [ "$DRY_RUN" = "1" ]; then info "(dry-run) would install Docker via get.docker.com"; else - info "Installing Docker ..." - curl -fsSL https://get.docker.com | sh || die "Docker install failed" - systemctl enable --now docker || die "could not start docker" - fi -fi +if pc_have docker; then info "docker present — skip" +elif [ "$DRY_RUN" = 1 ]; then info "(dry-run) would install Docker" +else info "installing Docker ..."; curl -fsSL https://get.docker.com | sh || die "Docker install failed"; systemctl enable --now docker || die "could not start docker"; fi -# ---- derive secret + resolve master info -------------------------------------- SECRET="$(printf '%s' "$CLUSTERNAME" | sha256sum | cut -d' ' -f1)" -resolve_master() { - [ -n "$MASTER_CLUSTER_PEERID" ] && [ -n "$MASTER_CLUSTER_BOOTSTRAP" ] && return 0 - info "Reading master identity from $POOL_API ..." - local resp; resp="$(curl -s --max-time 20 "$POOL_API" || true)" - echo "$resp" | jq -e . >/dev/null 2>&1 || die "could not fetch/parse $POOL_API (set MASTER_CLUSTER_PEERID + MASTER_CLUSTER_BOOTSTRAP manually)." - [ -n "$MASTER_CLUSTER_PEERID" ] || MASTER_CLUSTER_PEERID="$(echo "$resp" | jq -r '."ipfs-cluster-peerid" // empty')" - [ -n "$MASTER_KUBO_PEERID" ] || MASTER_KUBO_PEERID="$(echo "$resp" | jq -r '."kubo-peerid" // empty')" - [ -n "$MASTER_CLUSTER_BOOTSTRAP" ] || MASTER_CLUSTER_BOOTSTRAP="$(echo "$resp" | jq -r '(.ipfs_cluster.addresses // [])[] | select(test("/tcp/"))' | head -1)" - [ -n "$MASTER_CLUSTER_BOOTSTRAP" ] || MASTER_CLUSTER_BOOTSTRAP="$(echo "$resp" | jq -r '(.ipfs_cluster.addresses // [])[0] // empty')" -} -resolve_master -[ -n "$MASTER_CLUSTER_PEERID" ] || die "could not resolve MASTER_CLUSTER_PEERID." -[ -n "$MASTER_CLUSTER_BOOTSTRAP" ] || die "could not resolve MASTER_CLUSTER_BOOTSTRAP (master cluster multiaddr)." +# resolve master identity from the pool endpoint unless already provided/saved +if [ -z "$MASTER_CLUSTER_PEERID" ] || [ -z "$MASTER_CLUSTER_BOOTSTRAP" ]; then + if [ "$DRY_RUN" = 1 ] && ! pc_have curl; then info "(dry-run) would read $POOL_API" + else + info "reading master identity from $POOL_API ..." + resp="$(curl -s --max-time 20 "$POOL_API" 2>/dev/null || true)" + if printf '%s' "$resp" | jq -e . >/dev/null 2>&1; then + [ -n "$MASTER_CLUSTER_PEERID" ] || MASTER_CLUSTER_PEERID="$(printf '%s' "$resp" | jq -r '."ipfs-cluster-peerid" // empty')" + [ -n "$MASTER_KUBO_PEERID" ] || MASTER_KUBO_PEERID="$(printf '%s' "$resp" | jq -r '."kubo-peerid" // empty')" + [ -n "$MASTER_CLUSTER_BOOTSTRAP" ] || MASTER_CLUSTER_BOOTSTRAP="$(printf '%s' "$resp" | jq -r '(.ipfs_cluster.addresses // [])[] | select(test("/tcp/"))' | head -1)" + [ -n "$MASTER_CLUSTER_BOOTSTRAP" ] || MASTER_CLUSTER_BOOTSTRAP="$(printf '%s' "$resp" | jq -r '(.ipfs_cluster.addresses // [])[0] // empty')" + fi + fi +fi +pc_prompt MASTER_CLUSTER_PEERID "Master cluster peer id" '^(12D3KooW|Qm)' +pc_prompt MASTER_CLUSTER_BOOTSTRAP "Master cluster bootstrap multiaddr" '^/' + +if [[ "$PUBLIC_HOST" =~ ^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$ ]]; then PROTO=ip4; else PROTO=dns4; fi -# announce protocol: /ip4 for an IPv4 literal, else /dns4 -if printf '%s' "$PUBLIC_HOST" | grep -Eq '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$'; then PROTO=ip4; else PROTO=dns4; fi +pc_save_env "$ENV_FILE" PUBLIC_HOST CLUSTERNAME POOL_API BASE_DIR KUBO_IMAGE CLUSTER_IMAGE REPL_MIN REPL_MAX MASTER_CLUSTER_PEERID MASTER_CLUSTER_BOOTSTRAP MASTER_KUBO_PEERID cat < ${SECRET:0:12}...) - PUBLIC_HOST = $PUBLIC_HOST (announce as /$PROTO/$PUBLIC_HOST) - master cluster peer = $MASTER_CLUSTER_PEERID - master bootstrap addr = $MASTER_CLUSTER_BOOTSTRAP - master kubo peer = ${MASTER_KUBO_PEERID:-} - base dir = $BASE_DIR (kubo: $KUBO_DIR, cluster: $CLUSTER_DIR) - replication = $REPL_MIN..$REPL_MAX ; FOLLOWERMODE=false (writer) + PUBLIC_HOST=$PUBLIC_HOST (announce /$PROTO) CLUSTERNAME=$CLUSTERNAME secret=${SECRET:0:12}... + master peer=$MASTER_CLUSTER_PEERID + master bootstrap=$MASTER_CLUSTER_BOOTSTRAP + base=$BASE_DIR repl=$REPL_MIN..$REPL_MAX FOLLOWERMODE=false (writer) env=$ENV_FILE EOF -[ "$DRY_RUN" = "1" ] && { info "DRY_RUN=1 — no changes made."; exit 0; } +[ "$DRY_RUN" = 1 ] && { info "DRY_RUN=1 — params saved; no system changes made."; exit 0; } +KUBO_DIR="$BASE_DIR/kubo"; CLUSTER_DIR="$BASE_DIR/ipfs-cluster" mkdir -p "$KUBO_DIR" "$CLUSTER_DIR" -# ---- kubo: init (default datastore, server profile) + announce ----------------- +# ---- kubo (idempotent init + config) ---- kubo_oneshot() { docker run --rm -e IPFS_PATH=/data/ipfs -v "$KUBO_DIR":/data/ipfs "$KUBO_IMAGE" "$@"; } -if [ ! -f "$KUBO_DIR/config" ]; then - info "Initializing kubo repo (server profile) ..." - kubo_oneshot init --profile=server >/dev/null -fi +if [ -f "$KUBO_DIR/config" ]; then info "kubo repo exists — skip init"; else info "init kubo repo (server profile)"; kubo_oneshot init --profile=server >/dev/null; fi kubo_oneshot config --json Addresses.Announce "[\"/$PROTO/$PUBLIC_HOST/tcp/4001\",\"/$PROTO/$PUBLIC_HOST/udp/4001/quic-v1\"]" >/dev/null kubo_oneshot config Routing.Type dhtserver >/dev/null kubo_oneshot config --json Routing.AcceleratedDHTClient true >/dev/null -NEW_KUBO_PEERID="$(kubo_oneshot config Identity.PeerID)" -[ -n "$NEW_KUBO_PEERID" ] || die "could not read new kubo peer id." +NEW_KUBO_PEERID="$(kubo_oneshot config Identity.PeerID)"; [ -n "$NEW_KUBO_PEERID" ] || die "could not read new kubo peer id." -cat > /etc/systemd/system/ipfs.service </dev/null 2>&1 && break; [ "$i" = 30 ] && die "kubo not healthy on :5001"; sleep 3; done +info "kubo healthy ($NEW_KUBO_PEERID)" -info "Starting kubo ..." -systemctl daemon-reload -systemctl enable --now ipfs.service -for i in $(seq 1 30); do - curl -s -X POST http://127.0.0.1:5001/api/v0/id >/dev/null 2>&1 && break - [ "$i" = 30 ] && die "kubo did not become healthy on :5001" - sleep 3 -done -info "kubo healthy (peer $NEW_KUBO_PEERID)" - -# ---- ipfs-cluster: init (read identity) + systemd unit + join ----------------- +# ---- ipfs-cluster (idempotent init + join) ---- cl_oneshot() { docker run --rm -e IPFS_CLUSTER_PATH=/data/ipfs-cluster -e CLUSTER_SECRET="$SECRET" -v "$CLUSTER_DIR":/data/ipfs-cluster --entrypoint ipfs-cluster-service "$CLUSTER_IMAGE" "$@"; } -if [ ! -f "$CLUSTER_DIR/identity.json" ]; then - info "Initializing ipfs-cluster ..." - cl_oneshot init >/dev/null 2>&1 || cl_oneshot init >/dev/null -fi -NEW_CLUSTER_PEERID="$(jq -r '.id' "$CLUSTER_DIR/identity.json")" -[ -n "$NEW_CLUSTER_PEERID" ] && [ "$NEW_CLUSTER_PEERID" != "null" ] || die "could not read new cluster peer id." -# persistent connectivity to the master -echo "$MASTER_CLUSTER_BOOTSTRAP" > "$CLUSTER_DIR/peerstore" - +if [ -f "$CLUSTER_DIR/identity.json" ]; then info "cluster identity exists — skip init"; else info "init ipfs-cluster"; cl_oneshot init >/dev/null 2>&1 || cl_oneshot init >/dev/null; fi +NEW_CLUSTER_PEERID="$(jq -r '.id' "$CLUSTER_DIR/identity.json")"; { [ -n "$NEW_CLUSTER_PEERID" ] && [ "$NEW_CLUSTER_PEERID" != null ]; } || die "could not read new cluster peer id." +printf '%s\n' "$MASTER_CLUSTER_BOOTSTRAP" > "$CLUSTER_DIR/peerstore" TRUSTED="$MASTER_CLUSTER_PEERID,$NEW_CLUSTER_PEERID" -cat > /etc/systemd/system/ipfscluster.service </dev/null 2>&1 && info "cluster API up" || info "NOTE: cluster API not responding yet; check: docker logs ipfs_cluster" +docker exec ipfs_cluster ipfs-cluster-ctl id >/dev/null 2>&1 && info "cluster API up" || info "NOTE: cluster API not up yet — check: docker logs ipfs_cluster" -cat < Date: Thu, 11 Jun 2026 18:54:47 -0400 Subject: [PATCH 4/4] fix(cluster): kubo image entrypoint + repo-lock fixes; add real-daemon e2e suite MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit phase-1-setup-writer.sh fixes found by live e2e on a clean Ubuntu 24.04 box: - bypass the kubo image auto-init entrypoint (--entrypoint ipfs) — plain `docker run ... init` double-inits and fails on a fresh repo - route `ipfs config` through the running daemon on re-runs (repo lock), one-shot otherwise (kubo_cfg helper); read peer id lock-free via jq tests/e2e/phase-1: isolated-cluster e2e (sim master w/ shifted ports + trust preservation, REAL setup-writer + master-trust runs, updated+old followers, drills D0-D4: failover write, mixed-fleet, reconvergence, idempotent re-runs). Result on test box: 14/14 pass. Co-Authored-By: Claude Fable 5 --- tests/e2e/phase-1/10-master.sh | 119 +++++++++++++++++++++++++ tests/e2e/phase-1/20-followers.sh | 94 +++++++++++++++++++ tests/e2e/phase-1/30-drills.sh | 108 ++++++++++++++++++++++ update-scripts/phase-1-setup-writer.sh | 19 ++-- 4 files changed, 335 insertions(+), 5 deletions(-) create mode 100644 tests/e2e/phase-1/10-master.sh create mode 100644 tests/e2e/phase-1/20-followers.sh create mode 100644 tests/e2e/phase-1/30-drills.sh diff --git a/tests/e2e/phase-1/10-master.sh b/tests/e2e/phase-1/10-master.sh new file mode 100644 index 00000000..8761275f --- /dev/null +++ b/tests/e2e/phase-1/10-master.sh @@ -0,0 +1,119 @@ +#!/usr/bin/env bash +# +# fxe2e Phase-1 e2e — role: SIMULATED MASTER (test server only, NEVER production). +# +# Provisions an isolated test cluster master that mirrors the production master's +# SHAPE (systemd unit -> docker run, env-driven, CLUSTER_CRDT_TRUSTEDPEERS on both the +# Environment= line and the ExecStart -e flag) but with prefixed names + shifted ports +# so the REAL phase-1-setup-writer.sh can run with its defaults on the same box. +# Idempotent: re-run safe. Cluster name carries a random-but-fixed suffix so the +# derived secret is not guessable on a public test box. +# +set -euo pipefail +HERE="$(cd "$(dirname "$0")" && pwd)" +. "$HERE/../../../update-scripts/lib/phase-common.sh" +PC_TAG="fxe2e-master" + +CLUSTERNAME="${FXE2E_CLUSTERNAME:-fxe2e-vt9q4z}" +SECRET="$(printf '%s' "$CLUSTERNAME" | sha256sum | cut -d' ' -f1)" +BASE=/opt/fxe2e/master; KUBO_DIR="$BASE/kubo"; CL_DIR="$BASE/cluster" +KUBO_IMAGE="${KUBO_IMAGE:-ipfs/kubo:release}" +CLUSTER_IMAGE="${CLUSTER_IMAGE:-ipfs/ipfs-cluster:stable}" +[ "$(id -u)" = 0 ] || die "run as root" +pc_have docker || die "docker required (run phase-1-setup-writer.sh first or install docker)" +mkdir -p "$KUBO_DIR" "$CL_DIR" + +# ---- kubo (shifted ports: swarm 14001, API 127.0.0.1:15001, gw 127.0.0.1:18080) ---- +# --entrypoint ipfs bypasses the image's auto-init entrypoint (double-init bug); config is +# applied only on FRESH init (one-shot `ipfs config` needs the repo lock — re-runs with the +# daemon up would fail; port changes require stopping the unit + wiping is fine: TEST ONLY). +k() { docker run --rm --entrypoint ipfs -e IPFS_PATH=/data/ipfs -v "$KUBO_DIR":/data/ipfs "$KUBO_IMAGE" "$@"; } +if [ -f "$KUBO_DIR/config" ]; then info "master kubo repo exists — skip init+config" +else + info "init master kubo" + k init --profile=server >/dev/null + k config Addresses.API /ip4/127.0.0.1/tcp/15001 >/dev/null + k config Addresses.Gateway /ip4/127.0.0.1/tcp/18080 >/dev/null + k config --json Addresses.Swarm '["/ip4/0.0.0.0/tcp/14001"]' >/dev/null +fi +MASTER_KUBO_PEERID="$(jq -r '.Identity.PeerID // empty' "$KUBO_DIR/config")"; [ -n "$MASTER_KUBO_PEERID" ] || die "no master kubo peer id" + +ku_ch="$(cat </dev/null 2>&1 && break; [ "$i" = 30 ] && die "master kubo not healthy on :15001"; sleep 3; done +info "master kubo healthy ($MASTER_KUBO_PEERID)" + +# ---- cluster (shifted ports: swarm 19096, REST 127.0.0.1:19094, proxy 127.0.0.1:19095) ---- +cl() { docker run --rm -e IPFS_CLUSTER_PATH=/data/ipfs-cluster -e CLUSTER_SECRET="$SECRET" -v "$CL_DIR":/data/ipfs-cluster --entrypoint ipfs-cluster-service "$CLUSTER_IMAGE" "$@"; } +if [ -f "$CL_DIR/identity.json" ]; then info "master cluster identity exists — skip init"; else info "init master cluster"; cl init >/dev/null 2>&1 || cl init >/dev/null; fi +MASTER_CLUSTER_PEERID="$(jq -r '.id' "$CL_DIR/identity.json")" +{ [ -n "$MASTER_CLUSTER_PEERID" ] && [ "$MASTER_CLUSTER_PEERID" != null ]; } || die "no master cluster peer id" + +# Trust line mirrors prod shape: present on BOTH Environment= and ExecStart -e so +# phase-1-master-trust.sh (UNIT_PATH/SERVICE_NAME overrides) edits it exactly like prod. +# Re-runs PRESERVE the current trusted list (master-trust may have appended writers — +# regenerating from the template must never revert that). +TRUST_LINE="$MASTER_CLUSTER_PEERID" +if [ -f /etc/systemd/system/fxe2e-master-ipfscluster.service ]; then + cur_trust="$(grep -oE 'CLUSTER_CRDT_TRUSTEDPEERS=[^" ]+' /etc/systemd/system/fxe2e-master-ipfscluster.service | head -1 | cut -d= -f2- || true)" + [ -n "$cur_trust" ] && TRUST_LINE="$cur_trust" +fi +cl_ch="$(cat </dev/null 2>&1 && break; [ "$i" = 30 ] && die "master cluster API not healthy on :19094"; sleep 3; done +info "master cluster healthy" + +PUB_IP="${PUBLIC_HOST:-$(hostname -I | awk '{print $1}')}" +cat </dev/null + docker run --rm --entrypoint ipfs -e IPFS_PATH=/data/ipfs -v "$kdir":/data/ipfs "$KUBO_IMAGE" config Addresses.API "/ip4/127.0.0.1/tcp/$kapi" >/dev/null + docker run --rm --entrypoint ipfs -e IPFS_PATH=/data/ipfs -v "$kdir":/data/ipfs "$KUBO_IMAGE" config Addresses.Gateway "/ip4/127.0.0.1/tcp/$gw" >/dev/null + docker run --rm --entrypoint ipfs -e IPFS_PATH=/data/ipfs -v "$kdir":/data/ipfs "$KUBO_IMAGE" config --json Addresses.Swarm "[\"/ip4/0.0.0.0/tcp/$kswarm\"]" >/dev/null + fi + + if [ ! -f "$cdir/identity.json" ]; then + info "[$name] init cluster" + docker run --rm -e IPFS_CLUSTER_PATH=/data/ipfs-cluster -e CLUSTER_SECRET="$SECRET" -v "$cdir":/data/ipfs-cluster --entrypoint ipfs-cluster-service "$CLUSTER_IMAGE" init >/dev/null 2>&1 || true + fi + printf '/ip4/127.0.0.1/tcp/19096/p2p/%s\n' "$MASTER_ID" > "$cdir/peerstore" + case ",$trusted," in *",$WRITER_ID,"*) printf '/ip4/127.0.0.1/tcp/9096/p2p/%s\n' "$WRITER_ID" >> "$cdir/peerstore";; esac + + docker rm -f "fxe2e_${name}_ipfs" "fxe2e_${name}_cluster" >/dev/null 2>&1 || true + docker run -d --restart unless-stopped --name "fxe2e_${name}_ipfs" --network host -e IPFS_PATH=/data/ipfs -v "$kdir":/data/ipfs "$KUBO_IMAGE" >/dev/null + for i in $(seq 1 30); do curl -s -X POST "http://127.0.0.1:$kapi/api/v0/id" >/dev/null 2>&1 && break; [ "$i" = 30 ] && die "[$name] kubo not healthy on :$kapi"; sleep 3; done + + docker run -d --restart unless-stopped --name "fxe2e_${name}_cluster" --network host \ + -e IPFS_CLUSTER_PATH=/data/ipfs-cluster \ + -e CLUSTER_SECRET="$SECRET" \ + -e CLUSTER_CLUSTERNAME="$CLUSTERNAME" \ + -e CLUSTER_FOLLOWERMODE=true \ + -e CLUSTER_CRDT_TRUSTEDPEERS="$trusted" \ + -e CLUSTER_LISTENMULTIADDRESS="/ip4/0.0.0.0/tcp/$clswarm" \ + -e CLUSTER_RESTAPI_HTTPLISTENMULTIADDRESS="/ip4/127.0.0.1/tcp/$clrest" \ + -e CLUSTER_IPFSPROXY_LISTENMULTIADDRESS="/ip4/127.0.0.1/tcp/$proxy" \ + -e CLUSTER_PINSVCAPI_HTTPLISTENMULTIADDRESS="/ip4/127.0.0.1/tcp/$pinsvc" \ + -e CLUSTER_IPFSHTTP_NODEMULTIADDRESS="/ip4/127.0.0.1/tcp/$kapi" \ + -e CLUSTER_REPLICATIONFACTORMIN=2 -e CLUSTER_REPLICATIONFACTORMAX=4 \ + -e CLUSTER_DISABLEREPINNING=false \ + -e CLUSTER_PEERNAME="$peername" \ + -e CLUSTER_MONITORPINGINTERVAL=15s \ + -v "$cdir":/data/ipfs-cluster \ + "$CLUSTER_IMAGE" daemon --upgrade --bootstrap "/ip4/127.0.0.1/tcp/19096/p2p/$MASTER_ID" >/dev/null + for i in $(seq 1 30); do docker exec "fxe2e_${name}_cluster" ipfs-cluster-ctl --host "/ip4/127.0.0.1/tcp/$clrest" id >/dev/null 2>&1 && break; [ "$i" = 30 ] && die "[$name] cluster API not healthy on :$clrest"; sleep 3; done + info "[$name] healthy (trusts: $trusted)" + + # Deterministic bitswap on one box: connect this follower's kubo to master + writer kubo. + docker exec "fxe2e_${name}_ipfs" ipfs swarm connect "/ip4/127.0.0.1/tcp/14001/p2p/$MASTER_KUBO_ID" >/dev/null 2>&1 || true + [ -n "$WRITER_KUBO_ID" ] && docker exec "fxe2e_${name}_ipfs" ipfs swarm connect "/ip4/127.0.0.1/tcp/4001/p2p/$WRITER_KUBO_ID" >/dev/null 2>&1 || true +} + +mk_follower fA "$MASTER_ID,$WRITER_ID" 24001 25001 29096 29094 fxe2e-follower-new +mk_follower fB "$MASTER_ID" 34001 35001 39096 39094 fxe2e-follower-old + +cat < both followers pin it (old + new) +# D2 master DOWN : pin via WRITER -> updated follower pins it; OLD follower keeps +# serving existing pins and does NOT see the writer pin (mixed fleet) +# D3 master BACK : CRDT reconverges — master learns the writer-era pin; nothing lost +# D4 idempotency : re-run setup-writer + master-trust -> no changes, peerset stable +# +# Production-grade: every assert polls with a deadline; any FAIL exits 1. +# +set -uo pipefail +HERE="$(cd "$(dirname "$0")" && pwd)" + +PASS=0; FAIL=0 +ok() { echo "ok - $1"; PASS=$((PASS+1)); } +bad() { echo "FAIL - $1"; FAIL=$((FAIL+1)); } + +mctl() { docker exec fxe2e_m_cluster ipfs-cluster-ctl --host /ip4/127.0.0.1/tcp/19094 "$@"; } +wctl() { docker exec ipfs_cluster ipfs-cluster-ctl "$@"; } +actl() { docker exec fxe2e_fA_cluster ipfs-cluster-ctl --host /ip4/127.0.0.1/tcp/29094 "$@"; } +bctl() { docker exec fxe2e_fB_cluster ipfs-cluster-ctl --host /ip4/127.0.0.1/tcp/39094 "$@"; } + +A_ID="$(jq -r '.id' /opt/fxe2e/fA/cluster/identity.json)" +B_ID="$(jq -r '.id' /opt/fxe2e/fB/cluster/identity.json)" +M_ID="$(jq -r '.id' /opt/fxe2e/master/cluster/identity.json)" +# shellcheck disable=SC1091 +W_ID="$(. /opt/fula-writer/.env; printf '%s' "$NEW_CLUSTER_PEERID")" + +# poll JSON status until .peer_map[peerid].status == "pinned" (deadline secs) +pin_state() { "$1" --enc=json status "$2" 2>/dev/null | jq -r --arg p "$3" '.peer_map[$p].status // "absent"' 2>/dev/null || echo absent; } +wait_pinned() { # $1=ctlfn $2=cid $3=peerid $4=deadline $5=label + local t=0 + while [ "$t" -lt "$4" ]; do + [ "$(pin_state "$1" "$2" "$3")" = "pinned" ] && { ok "$5"; return 0; } + sleep 5; t=$((t+5)) + done + bad "$5 (timeout ${4}s)"; "$1" status "$2" 2>/dev/null | sed 's/^/ /' | head -8; return 1 +} + +echo "== D0 topology ==" +for i in $(seq 1 24); do + n="$(mctl peers ls 2>/dev/null | grep -c '^12D3KooW' || true)" + [ "${n:-0}" -ge 4 ] && break; sleep 5 +done +n="$(mctl peers ls 2>/dev/null | grep -c '^12D3KooW' || true)" +[ "${n:-0}" -ge 4 ] && ok "D0 master sees >=4 cluster peers ($n)" || bad "D0 master sees $n peers (want >=4)" + +echo "== D1 baseline: pin via MASTER reaches old+new followers ==" +CID1="$(echo "fxe2e-baseline-$(date +%s)" | docker exec -i fxe2e_m_ipfs ipfs add -q)" +[ -n "$CID1" ] && ok "D1 content added on master kubo ($CID1)" || bad "D1 could not add content" +mctl pin add "$CID1" >/dev/null 2>&1 || bad "D1 master pin add failed" +wait_pinned actl "$CID1" "$A_ID" 180 "D1 follower A (updated) pinned baseline CID" +wait_pinned bctl "$CID1" "$B_ID" 180 "D1 follower B (old) pinned baseline CID" + +PRE_DOWN_MASTER_PINS="$(mctl status --filter pinned 2>/dev/null | grep -c '^[A-Za-z0-9]' || true)" + +echo "== D2 master DOWN: writer keeps the network writable; old follower unaffected ==" +systemctl stop fxe2e-master-ipfscluster.service +sleep 3 +CID2="$(echo "fxe2e-writer-era-$(date +%s)" | docker exec -i ipfs_host ipfs add -q)" +wctl pin add "$CID2" >/dev/null 2>&1 && ok "D2 pin add via WRITER succeeded with master down" || bad "D2 writer pin add failed" +wait_pinned actl "$CID2" "$A_ID" 240 "D2 follower A (updated) pinned writer-era CID with master DOWN" +if [ "$(pin_state bctl "$CID2" "$B_ID")" = "pinned" ]; then + bad "D2 follower B (old) unexpectedly pinned a writer-issued CID (should not trust writer)" +else + ok "D2 follower B (old) does NOT see writer-issued pin (expected mixed-fleet behavior)" +fi +[ "$(pin_state bctl "$CID1" "$B_ID")" = "pinned" ] \ + && ok "D2 follower B (old) still serves its existing pin during master outage" \ + || bad "D2 follower B (old) lost its existing pin" + +echo "== D3 master BACK: CRDT reconverges, nothing lost ==" +systemctl start fxe2e-master-ipfscluster.service +for i in $(seq 1 30); do docker exec fxe2e_m_cluster ipfs-cluster-ctl --host /ip4/127.0.0.1/tcp/19094 id >/dev/null 2>&1 && break; sleep 5; done +t=0; got="" +while [ "$t" -lt 300 ]; do + if mctl status "$CID2" 2>/dev/null | grep -qi 'PINNED'; then got=1; break; fi + sleep 10; t=$((t+10)) +done +[ -n "$got" ] && ok "D3 master converged to writer-era pin after restart" || bad "D3 master never learned writer-era pin (300s)" +POST_UP_MASTER_PINS="$(mctl status --filter pinned 2>/dev/null | grep -c '^[A-Za-z0-9]' || true)" +[ "${POST_UP_MASTER_PINS:-0}" -ge "${PRE_DOWN_MASTER_PINS:-0}" ] \ + && ok "D3 pinset never shrank (pre=$PRE_DOWN_MASTER_PINS post=$POST_UP_MASTER_PINS)" \ + || bad "D3 pinset shrank (pre=$PRE_DOWN_MASTER_PINS post=$POST_UP_MASTER_PINS)" +wait_pinned bctl "$CID1" "$B_ID" 90 "D3 follower B (old) still healthy after master bounce" + +echo "== D4 idempotency: re-runs are no-ops ==" +if (cd "$HERE/../../../update-scripts" && bash phase-1-setup-writer.sh >/tmp/fxe2e-rerun-writer.log 2>&1); then + grep -qiE 'unchanged|skip' /tmp/fxe2e-rerun-writer.log && ok "D4 setup-writer re-run: no-op paths taken" || ok "D4 setup-writer re-run exited 0" +else + bad "D4 setup-writer re-run failed (see /tmp/fxe2e-rerun-writer.log)" +fi +if UNIT_PATH=/etc/systemd/system/fxe2e-master-ipfscluster.service SERVICE_NAME=fxe2e-master-ipfscluster \ + ENV_FILE=/opt/fxe2e/master-trust.env NEW_WRITER_PEERID="$W_ID" \ + bash "$HERE/../../../update-scripts/phase-1-master-trust.sh" >/tmp/fxe2e-rerun-trust.log 2>&1; then + grep -qi 'already trusted' /tmp/fxe2e-rerun-trust.log && ok "D4 master-trust re-run: already-trusted no-op" || ok "D4 master-trust re-run exited 0" +else + bad "D4 master-trust re-run failed (see /tmp/fxe2e-rerun-trust.log)" +fi +n2="$(mctl peers ls 2>/dev/null | grep -c '^12D3KooW' || true)" +[ "${n2:-0}" -ge 4 ] && ok "D4 peerset stable after re-runs ($n2)" || bad "D4 peerset shrank ($n2)" + +echo +echo "RESULT: pass=$PASS fail=$FAIL" +[ "$FAIL" = 0 ] || exit 1 diff --git a/update-scripts/phase-1-setup-writer.sh b/update-scripts/phase-1-setup-writer.sh index 4131da7a..6c1ea6de 100755 --- a/update-scripts/phase-1-setup-writer.sh +++ b/update-scripts/phase-1-setup-writer.sh @@ -85,12 +85,21 @@ KUBO_DIR="$BASE_DIR/kubo"; CLUSTER_DIR="$BASE_DIR/ipfs-cluster" mkdir -p "$KUBO_DIR" "$CLUSTER_DIR" # ---- kubo (idempotent init + config) ---- -kubo_oneshot() { docker run --rm -e IPFS_PATH=/data/ipfs -v "$KUBO_DIR":/data/ipfs "$KUBO_IMAGE" "$@"; } +# --entrypoint ipfs: the official kubo image's entrypoint auto-inits an empty repo before +# running the CMD, so a plain `docker run ... init` double-inits and fails ("configuration +# file already exists"). Bypassing the entrypoint makes init/config single, explicit ops. +kubo_oneshot() { docker run --rm --entrypoint ipfs -e IPFS_PATH=/data/ipfs -v "$KUBO_DIR":/data/ipfs "$KUBO_IMAGE" "$@"; } +# kubo_cfg: on RE-RUNS the ipfs_host daemon holds the repo lock, so a one-shot `ipfs config` +# would fail. Route through the running daemon (RPC, no lock) when up, one-shot otherwise. +# Note: config set via a running daemon takes effect on its next restart (restart +# ipfs.service manually if you changed PUBLIC_HOST on a re-run). +kubo_cfg() { if [ -n "$(docker ps -q -f name='^ipfs_host$')" ]; then docker exec ipfs_host ipfs "$@"; else kubo_oneshot "$@"; fi; } if [ -f "$KUBO_DIR/config" ]; then info "kubo repo exists — skip init"; else info "init kubo repo (server profile)"; kubo_oneshot init --profile=server >/dev/null; fi -kubo_oneshot config --json Addresses.Announce "[\"/$PROTO/$PUBLIC_HOST/tcp/4001\",\"/$PROTO/$PUBLIC_HOST/udp/4001/quic-v1\"]" >/dev/null -kubo_oneshot config Routing.Type dhtserver >/dev/null -kubo_oneshot config --json Routing.AcceleratedDHTClient true >/dev/null -NEW_KUBO_PEERID="$(kubo_oneshot config Identity.PeerID)"; [ -n "$NEW_KUBO_PEERID" ] || die "could not read new kubo peer id." +kubo_cfg config --json Addresses.Announce "[\"/$PROTO/$PUBLIC_HOST/tcp/4001\",\"/$PROTO/$PUBLIC_HOST/udp/4001/quic-v1\"]" >/dev/null +kubo_cfg config Routing.Type dhtserver >/dev/null +kubo_cfg config --json Routing.AcceleratedDHTClient true >/dev/null +# read identity from the repo file directly — lock-free, daemon-state-independent +NEW_KUBO_PEERID="$(jq -r '.Identity.PeerID // empty' "$KUBO_DIR/config")"; [ -n "$NEW_KUBO_PEERID" ] || die "could not read new kubo peer id." kubo_ch="$(cat <