diff --git a/.github/actions/sync-fork-checkpoints/action.yml b/.github/actions/sync-fork-checkpoints/action.yml new file mode 100644 index 000000000..0c84f81fa --- /dev/null +++ b/.github/actions/sync-fork-checkpoints/action.yml @@ -0,0 +1,191 @@ +name: 'Sync Fork Checkpoints' +description: 'Import Entire session checkpoint data from a fork after a PR is merged' + +inputs: + token: + description: 'GitHub token with contents:write permission. Defaults to GITHUB_TOKEN.' + required: false + default: ${{ github.token }} + +outputs: + imported_count: + description: 'Number of checkpoint commits cherry-picked' + value: ${{ steps.sync.outputs.imported_count }} + synced: + description: 'Whether any checkpoints were synced (true/false)' + value: ${{ steps.sync.outputs.synced }} + +runs: + using: 'composite' + steps: + - name: Sync fork checkpoints + id: sync + shell: bash + env: + GH_TOKEN: ${{ inputs.token }} + REPO: ${{ github.repository }} + FORK_URL: ${{ github.event.pull_request.head.repo.clone_url }} + FORK_FULL_NAME: ${{ github.event.pull_request.head.repo.full_name }} + IS_FORK: ${{ github.event.pull_request.head.repo.fork }} + MERGE_SHA: ${{ github.event.pull_request.merge_commit_sha }} + BASE_SHA: ${{ github.event.pull_request.base.sha }} + HEAD_SHA: ${{ github.event.pull_request.head.sha }} + PR_NUMBER: ${{ github.event.pull_request.number }} + run: | + set -euo pipefail + + BRANCH="entire/checkpoints/v1" + ORIGIN_URL="https://x-access-token:${GH_TOKEN}@github.com/${REPO}.git" + + echo "synced=false" >> "$GITHUB_OUTPUT" + echo "imported_count=0" >> "$GITHUB_OUTPUT" + + # --- Guard: only run for fork PRs --- + if [ "$IS_FORK" != "true" ]; then + echo "PR is not from a fork. Skipping." + exit 0 + fi + + # --- Set up a minimal repo (no full checkout, only checkpoints branch) --- + WORKDIR=$(mktemp -d) + cd "$WORKDIR" + git init + git remote add origin "$ORIGIN_URL" + + # Fetch only the merge range commits (for reading trailers) + git fetch origin "$MERGE_SHA" --depth=100 2>/dev/null || true + git fetch origin "$BASE_SHA" --depth=100 2>/dev/null || true + + # --- Step 1: Find checkpoint IDs in merged commits --- + echo "Looking for Entire-Checkpoint trailers in ${BASE_SHA:0:7}..${MERGE_SHA:0:7}" + + CHECKPOINT_IDS=$(git log --format='%(trailers:key=Entire-Checkpoint,valueonly)' \ + "${BASE_SHA}..${MERGE_SHA}" 2>/dev/null \ + | sed 's/^[[:space:]]*//;s/[[:space:]]*$//' \ + | grep -v '^$' | sort -u || true) + + # Fallback: check original PR commits (handles squash merges that drop trailers) + if [ -z "$CHECKPOINT_IDS" ]; then + echo "No trailers in merge range. Checking original PR commits..." + git fetch "$FORK_URL" "$HEAD_SHA" --depth=50 2>/dev/null || true + CHECKPOINT_IDS=$(git log --format='%(trailers:key=Entire-Checkpoint,valueonly)' \ + "${BASE_SHA}..${HEAD_SHA}" 2>/dev/null \ + | sed 's/^[[:space:]]*//;s/[[:space:]]*$//' \ + | grep -v '^$' | sort -u || true) + fi + + if [ -z "$CHECKPOINT_IDS" ]; then + echo "No Entire-Checkpoint trailers found. Nothing to sync." + exit 0 + fi + + echo "Found checkpoint IDs:" + echo "$CHECKPOINT_IDS" + echo "" + + # --- Step 2: Fetch fork's checkpoints branch --- + echo "Fetching $BRANCH from fork ($FORK_FULL_NAME)..." + if ! git fetch "$FORK_URL" "$BRANCH" 2>/dev/null; then + echo "Fork has no $BRANCH branch. Nothing to sync." + exit 0 + fi + FORK_REF=$(git rev-parse FETCH_HEAD) + echo "Fork's $BRANCH is at ${FORK_REF:0:7}" + + # --- Step 3: Validate checkpoint IDs --- + VALID_IDS=() + for ID in $CHECKPOINT_IDS; do + if echo "$ID" | grep -qE '^[0-9a-f]{12}$'; then + VALID_IDS+=("$ID") + else + echo " Skipping invalid checkpoint ID: $ID" + fi + done + + if [ ${#VALID_IDS[@]} -eq 0 ]; then + echo "No valid checkpoint IDs found. Nothing to sync." + exit 0 + fi + + # --- Step 4: Find matching commits on fork's checkpoints branch --- + GREP_PATTERN=$(printf '%s\n' "${VALID_IDS[@]}" | paste -sd '|' -) + + # Fetch upstream's checkpoints branch to determine fork-only commits + git fetch origin "$BRANCH" 2>/dev/null || true + + if git rev-parse --verify "refs/remotes/origin/$BRANCH" >/dev/null 2>&1; then + FORK_RANGE="origin/${BRANCH}..${FORK_REF}" + else + FORK_RANGE="${FORK_REF}" + fi + + # Find commits referencing our checkpoint IDs, in chronological order + COMMITS=$(git log --reverse --format='%H' --extended-regexp \ + --grep="(${GREP_PATTERN})" "$FORK_RANGE" 2>/dev/null || true) + + if [ -z "$COMMITS" ]; then + echo "No matching commits found on fork's $BRANCH. Nothing to sync." + exit 0 + fi + + COMMIT_COUNT=$(echo "$COMMITS" | wc -l | tr -d ' ') + echo "Found $COMMIT_COUNT commit(s) to cherry-pick" + + # --- Step 5: Set up git identity --- + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + + # --- Step 6: Check out or create the local checkpoints branch --- + if git rev-parse --verify "refs/remotes/origin/$BRANCH" >/dev/null 2>&1; then + echo "Checking out existing $BRANCH..." + git checkout -B "$BRANCH" "origin/$BRANCH" + else + echo "Creating new orphan branch $BRANCH..." + git checkout --orphan "$BRANCH" + git rm -rf . 2>/dev/null || true + git commit --allow-empty -m "Initialize $BRANCH" + fi + + # --- Step 7: Cherry-pick each commit (preserves messages, trailers, authorship) --- + IMPORTED=0 + while IFS= read -r COMMIT; do + [ -z "$COMMIT" ] && continue + SUBJECT=$(git log -1 --format='%s' "$COMMIT") + echo " Cherry-picking: $SUBJECT (${COMMIT:0:7})" + if git cherry-pick "$COMMIT" --no-edit; then + IMPORTED=$((IMPORTED + 1)) + else + git cherry-pick --abort 2>/dev/null || true + echo " Warning: failed to cherry-pick ${COMMIT:0:7}, skipping" + fi + done <<< "$COMMITS" + + if [ "$IMPORTED" -eq 0 ]; then + echo "No commits were successfully cherry-picked. Nothing to sync." + exit 0 + fi + + # --- Step 8: Push only the checkpoints branch (with retry for concurrent merges) --- + MAX_RETRIES=3 + for ATTEMPT in $(seq 1 $MAX_RETRIES); do + if git push origin "$BRANCH"; then + echo "" + echo "Successfully cherry-picked ${IMPORTED} commit(s) from PR #${PR_NUMBER}" + echo "synced=true" >> "$GITHUB_OUTPUT" + echo "imported_count=${IMPORTED}" >> "$GITHUB_OUTPUT" + exit 0 + fi + + if [ "$ATTEMPT" -lt "$MAX_RETRIES" ]; then + echo "Push failed (attempt $ATTEMPT/$MAX_RETRIES). Rebasing on remote..." + git fetch origin "$BRANCH" + git rebase "origin/$BRANCH" || { + echo "Rebase failed, trying merge..." + git rebase --abort 2>/dev/null || true + git merge "origin/$BRANCH" --no-edit + } + fi + done + + echo "::warning::Failed to push checkpoints after $MAX_RETRIES attempts" + exit 1 diff --git a/.github/workflows/sync-fork-checkpoints.yml b/.github/workflows/sync-fork-checkpoints.yml new file mode 100644 index 000000000..78760758d --- /dev/null +++ b/.github/workflows/sync-fork-checkpoints.yml @@ -0,0 +1,32 @@ +# Sync Entire session checkpoints from fork PRs. +# +# When a PR from a fork is merged, this workflow imports the checkpoint data +# (session transcripts, prompts, context) from the fork's entire/checkpoints/v1 +# branch into the upstream repo's entire/checkpoints/v1 branch. +# +# This enables the full Entire session history to be preserved even when +# contributors work from forks without push access to upstream. +# +# How it works: +# 1. Finds Entire-Checkpoint trailers in the merged commits +# 2. Fetches the fork's entire/checkpoints/v1 branch +# 3. Selectively imports only the referenced checkpoint directories +# 4. Pushes the updated checkpoints branch to upstream + +name: Sync Fork Checkpoints + +on: + pull_request_target: + types: [closed] + +permissions: + contents: write + +jobs: + sync-checkpoints: + if: >- + github.event.pull_request.merged == true && + github.event.pull_request.head.repo.fork == true + runs-on: ubuntu-latest + steps: + - uses: entireio/cli/.github/actions/sync-fork-checkpoints@main