From d669b565be4d9ac8f7551776b6253c79bebcd919 Mon Sep 17 00:00:00 2001 From: corey Date: Fri, 13 Feb 2026 11:18:29 +0800 Subject: [PATCH 1/5] support mpt node --- morph-node/.env_mpt | 7 +++++++ morph-node/Makefile | 19 +++++++++++++++++ morph-node/docker-compose.yml | 6 +++--- morph-node/entrypoint-geth-mpt.sh | 35 +++++++++++++++++++++++++++++++ 4 files changed, 64 insertions(+), 3 deletions(-) create mode 100644 morph-node/.env_mpt create mode 100644 morph-node/entrypoint-geth-mpt.sh diff --git a/morph-node/.env_mpt b/morph-node/.env_mpt new file mode 100644 index 0000000..1defda6 --- /dev/null +++ b/morph-node/.env_mpt @@ -0,0 +1,7 @@ +# MPT specific overrides (loaded after base env to override values) +GETH_ENTRYPOINT_FILE=./entrypoint-geth-mpt.sh +MPT_FORK_TIME=2000000000000 + +# MPT snapshot names +HOODI_MPT_SNAPSHOT_NAME=snapshot-20260211-1 +MAINNET_MPT_SNAPSHOT_NAME=snapshot-20260211-1 diff --git a/morph-node/Makefile b/morph-node/Makefile index b38471c..2aef9ad 100644 --- a/morph-node/Makefile +++ b/morph-node/Makefile @@ -12,6 +12,8 @@ JWT_SECRET_FILE_HOLESKY := $(JWT_SECRET_FILE) JWT_SECRET_FILE_HOODI := $(JWT_SECRET_FILE) +include .env_mpt + generate-jwt: @[ -f $(JWT_SECRET_FILE_MAINNET) ] || (echo "Generating $(JWT_SECRET_FILE_MAINNET)..." && openssl rand -hex 32 > $(JWT_SECRET_FILE_MAINNET) && echo "$(JWT_SECRET_FILE_MAINNET) created.") @@ -31,6 +33,12 @@ run-holesky-node: generate-jwt-holesky run-hoodi-node: generate-jwt-hoodi docker-compose --env-file .env_hoodi up node & +run-hoodi-mpt-node: generate-jwt-hoodi + docker-compose --env-file .env_hoodi --env-file .env_mpt up node & + +run-mainnet-mpt-node: generate-jwt + docker-compose --env-file .env --env-file .env_mpt up node & + stop-node: docker stop morph-node morph-geth @@ -47,6 +55,12 @@ run-holesky-validator: generate-jwt-holesky run-hoodi-validator: generate-jwt-hoodi docker-compose --env-file .env_hoodi up validator & +run-hoodi-mpt-validator: generate-jwt-hoodi + docker-compose --env-file .env_hoodi --env-file .env_mpt up validator & + +run-mainnet-mpt-validator: generate-jwt + docker-compose --env-file .env --env-file .env_mpt up validator & + stop-validator: docker stop validator-node morph-geth @@ -93,6 +107,11 @@ download-and-decompress-hoodi-snapshot: download-and-decompress-mainnet-snapshot: $(call download-and-decompress,$(MAINNET_SNAPSHOT_NAME),https://snapshot.morphl2.io/mainnet) +download-and-decompress-hoodi-mpt-snapshot: + $(call download-and-decompress,$(HOODI_MPT_SNAPSHOT_NAME),https://snapshot.morphl2.io/hoodi) + +download-and-decompress-mainnet-mpt-snapshot: + $(call download-and-decompress,$(MAINNET_MPT_SNAPSHOT_NAME),https://snapshot.morphl2.io/mainnet) diff --git a/morph-node/docker-compose.yml b/morph-node/docker-compose.yml index 72027f8..3225d7f 100644 --- a/morph-node/docker-compose.yml +++ b/morph-node/docker-compose.yml @@ -3,7 +3,7 @@ version: '3.8' services: geth: container_name: morph-geth - image: ghcr.io/morph-l2/go-ethereum:2.1.1 + image: ghcr.io/morph-l2/go-ethereum:2.1.2 restart: unless-stopped ports: - "8545:8545" @@ -26,7 +26,7 @@ services: depends_on: geth: condition: service_started - image: ghcr.io/morph-l2/node:0.4.10 + image: ghcr.io/morph-l2/node:0.4.11 restart: unless-stopped ports: - "26656" @@ -53,7 +53,7 @@ services: depends_on: geth: condition: service_started - image: ghcr.io/morph-l2/node:0.4.10 + image: ghcr.io/morph-l2/node:0.4.11 ports: - "26660" environment: diff --git a/morph-node/entrypoint-geth-mpt.sh b/morph-node/entrypoint-geth-mpt.sh new file mode 100644 index 0000000..ecaf768 --- /dev/null +++ b/morph-node/entrypoint-geth-mpt.sh @@ -0,0 +1,35 @@ +#!/bin/sh + +if [ ! -f /jwt-secret.txt ]; then + echo "Error: jwt-secret.txt not found. Please create it before starting the service." + exit 1 +fi + +MORPH_FLAG=${MORPH_FLAG:-"morph"} + +COMMAND="geth \ +--$MORPH_FLAG \ +--morph-mpt +--datadir="./db" \ +--verbosity=3 \ +--http \ +--http.corsdomain="*" \ +--http.vhosts="*" \ +--http.addr=0.0.0.0 \ +--http.port=8545 \ +--http.api=web3,debug,eth,txpool,net,morph,engine,admin \ +--ws \ +--ws.addr=0.0.0.0 \ +--ws.port=8546 \ +--ws.origins="*" \ +--ws.api=web3,debug,eth,txpool,net,morph,engine,admin \ +--authrpc.addr=0.0.0.0 \ +--authrpc.port=8551 \ +--authrpc.vhosts="*" \ +--authrpc.jwtsecret="./jwt-secret.txt" \ +--gcmode=archive \ +--log.filename=./db/geth.log \ +--metrics \ +--metrics.addr=0.0.0.0" + +eval $COMMAND From 38ffc33722cb5177cadf363ee9e85dfaca32f7a9 Mon Sep 17 00:00:00 2001 From: corey Date: Wed, 11 Mar 2026 18:20:19 +0800 Subject: [PATCH 2/5] feat: add snapshot automation scripts Made-with: Cursor --- ops/snapshot/README.md | 151 +++++++++++++++ ops/snapshot/README.zh.md | 151 +++++++++++++++ ops/snapshot/metrics_server.py | 63 +++++++ ops/snapshot/snapshot.env.example | 54 ++++++ ops/snapshot/snapshot_make.py | 202 ++++++++++++++++++++ ops/snapshot/update_metadata.py | 298 ++++++++++++++++++++++++++++++ ops/snapshot/update_readme.py | 148 +++++++++++++++ 7 files changed, 1067 insertions(+) create mode 100644 ops/snapshot/README.md create mode 100644 ops/snapshot/README.zh.md create mode 100644 ops/snapshot/metrics_server.py create mode 100644 ops/snapshot/snapshot.env.example create mode 100644 ops/snapshot/snapshot_make.py create mode 100644 ops/snapshot/update_metadata.py create mode 100644 ops/snapshot/update_readme.py diff --git a/ops/snapshot/README.md b/ops/snapshot/README.md new file mode 100644 index 0000000..32737a2 --- /dev/null +++ b/ops/snapshot/README.md @@ -0,0 +1,151 @@ +# Snapshot Automation + +> 中文版请见 [README.zh.md](./README.zh.md) + +Automatically creates a node snapshot every two weeks and syncs the relevant parameters to README.md for users to download. + +## Background + +Manually creating snapshots is error-prone and tedious. This solution automates the entire process using a server-side cron job and the GitHub REST API — no GitHub Actions or git CLI required. + +## Directory Structure + +``` +run-morph-node/ +├── README.md # snapshot table is updated here +└── ops/snapshot/ + ├── README.md # this document + ├── README.zh.md # Chinese version + ├── snapshot_make.py # entry point: stop → snapshot → upload → restart → update README + ├── update_metadata.py # fetches indexer API data and orchestrates the full update flow + ├── update_readme.py # pure table-update logic (imported by update_metadata.py) + └── metrics_server.py # persistent HTTP server exposing metrics on :6060/metrics +``` + +## Workflow + +``` +Server cron job (1st and 15th of each month) + │ + ▼ + ops/snapshot/snapshot_make.py + [1] stop morph-node, morph-geth + [2] create snapshot (tar geth + node data) + [3] upload to S3 + [4] restart morph-geth → wait for RPC → collect base_height + [5] restart morph-node + [6] call update_metadata.py + │ BASE_HEIGHT, SNAPSHOT_NAME + ▼ + python3 update_metadata.py + ┌─────────────────────────────────────────────────────┐ + │ 1. call internal explorer-indexer API: │ + │ GET /v1/batch/l1_msg_start_height/ │ + │ GET /v1/batch/derivation_start_height/│ + │ 2. fetch README.md content via GitHub API │ + │ 3. insert new snapshot row at top of table │ + │ 4. create branch + push updated file via GitHub API │ + │ 5. open PR via GitHub API │ + └─────────────────────────────────────────────────────┘ +``` + +## Triggers + +| Method | Description | +|---|---| +| Scheduled | Server cron job on the 1st and 15th of each month | +| Manual | SSH into the server and run `snapshot_make.py` directly | + +## Multi-environment Support + +| Environment | Indexer API (internal) | +|---|---| +| mainnet | `explorer-indexer.morphl2.io` | +| hoodi | `explorer-indexer-hoodi.morphl2.io` | +| holesky | `explorer-indexer-holesky.morphl2.io` | + +Each environment has its own node server with its own cron job. S3 paths and README table sections are automatically scoped by environment. + +## Deployment + +### 1. Clone the Repository on the Node Server + +```bash +git clone https://github.com/morphl2/run-morph-node.git /data/run-morph-node +``` + +### 2. Create the Environment File + +Copy the template into the same directory and fill in the values: + +```bash +cd /data/run-morph-node/ops/snapshot +cp snapshot.env.example snapshot.env +# edit snapshot.env and fill in GH_TOKEN, S3_BUCKET, ENVIRONMENT, etc. +``` + +For multiple environments or snapshot types, use separate files: + +```bash +cp snapshot.env.example snapshot-hoodi.env +cp snapshot.env.example snapshot-mainnet-mpt.env +``` + +All available variables are documented in [`snapshot.env.example`](./snapshot.env.example). These files must **not** be committed to git (add `*.env` to `.gitignore`). + +Also recommended: enable **"Automatically delete head branches"** under repo Settings → General. Branches will be deleted automatically after a PR is merged. + +### 3. Configure the Cron Job + +Add one entry per environment / snapshot type: + +```bash +crontab -e +``` + +```cron +REPO=/data/run-morph-node/ops/snapshot + +# mainnet standard snapshot (uses default snapshot.env) +0 2 1,15 * * python3 $REPO/snapshot_make.py >> /var/log/snapshot-mainnet.log 2>&1 + +# mainnet mpt-snapshot +0 3 1,15 * * ENV_FILE=$REPO/snapshot-mainnet-mpt.env python3 $REPO/snapshot_make.py >> /var/log/snapshot-mainnet-mpt.log 2>&1 + +# hoodi +0 2 1,15 * * ENV_FILE=$REPO/snapshot-hoodi.env python3 $REPO/snapshot_make.py >> /var/log/snapshot-hoodi.log 2>&1 +``` + +### 4. Start the Metrics Server + +Run `metrics_server.py` as a persistent pm2 process so it survives server reboots: + +```bash +pm2 startup # register pm2 itself as a system startup service (run once) +pm2 start python3 --name morph-snapshot-metrics -- /data/run-morph-node/ops/snapshot/metrics_server.py +pm2 save +``` + +Once running, the metrics endpoint is available at `http://:6060/metrics`. + +Exposed metrics: + +| Metric | Type | Description | +|---|---|---| +| `morph_snapshot_readme_update_status` | gauge | 1 = success, 0 = failure | +| `morph_snapshot_readme_update_timestamp_seconds` | gauge | Unix timestamp of the last run | + +Labels: `environment` (mainnet / hoodi / holesky), `snapshot` (snapshot name) + +> Default metrics file path: `/tmp/morph_snapshot_metrics.prom` +> Override via the `METRICS_FILE` environment variable — applies to both `update_readme.py` and `metrics_server.py`. + +## Key Design Decisions + +- **`base_height` is collected after geth restarts**: querying the RPC after the snapshot is created and geth is started alone gives the actual block state of the snapshot, which is more accurate than querying before the stop. `morph-node` is started only after the height is confirmed. +- **Fallback recovery on failure**: if the snapshot or upload fails, a fallback step in `snapshot_make.py` attempts to restart both processes to avoid prolonged service interruption. +- **No GitHub Actions or git CLI required**: `update_metadata.py` uses the GitHub REST API directly — the server only needs Python 3. The `GH_TOKEN` is the only credential needed. +- **New entries are inserted at the top of the table**: the latest snapshot always appears in the first row for quick access. +- **Changes are merged via PR, not direct push**: a new branch is created and a PR is opened, preserving review opportunity and preventing automated scripts from writing directly to the main branch. + + diff --git a/ops/snapshot/README.zh.md b/ops/snapshot/README.zh.md new file mode 100644 index 0000000..604a938 --- /dev/null +++ b/ops/snapshot/README.zh.md @@ -0,0 +1,151 @@ +# Snapshot 自动化 + +> English version: [README.md](./README.md) + +每两周自动制作一次节点 snapshot,并将相关参数同步到 README.md 供用户下载使用。 + +## 背景 + +手动制作 snapshot 流程繁琐且容易遗漏,本方案通过服务器 cron 定时任务和 GitHub REST API 将全流程自动化,无需 GitHub Actions 或 git CLI。 + +## 目录结构 + +``` +run-morph-node/ +├── README.md # snapshot 表格在此更新 +└── ops/snapshot/ + ├── README.md # 英文文档 + ├── README.zh.md # 本文档 + ├── snapshot.env.example # 环境变量模板(每个环境复制一份填写) + ├── snapshot_make.py # 入口:停服 → 制作 → 上传 → 重启 → 更新 README + ├── update_metadata.py # 查询 indexer API 并编排完整更新流程 + ├── update_readme.py # 纯表格更新逻辑(由 update_metadata.py 调用) + └── metrics_server.py # 常驻 HTTP server,在 :6060/metrics 暴露 metrics +``` + +## 完整流程 + +``` +服务器 cron 定时任务(每月 1 日 / 15 日) + │ + ▼ + ops/snapshot/snapshot_make.py + [1] 停止 morph-node、morph-geth + [2] 制作快照(tar geth + node 数据) + [3] 上传至 S3 + [4] 重启 morph-geth → 等待 RPC 就绪 → 采集 base_height + [5] 重启 morph-node + [6] 调用 update_metadata.py + │ BASE_HEIGHT, SNAPSHOT_NAME + ▼ + python3 update_metadata.py + ┌─────────────────────────────────────────────────────┐ + │ 1. 调用内网 explorer-indexer API: │ + │ GET /v1/batch/l1_msg_start_height/ │ + │ GET /v1/batch/derivation_start_height/│ + │ 2. 通过 GitHub API 获取 README.md 当前内容 │ + │ 3. 在内存中插入新快照记录到表格顶部 │ + │ 4. 通过 GitHub API 建新分支并推送更新后的文件 │ + │ 5. 通过 GitHub API 开启 PR │ + └─────────────────────────────────────────────────────┘ +``` + +## 触发方式 + +| 方式 | 说明 | +|---|---| +| 定时 | 服务器 cron,每月 1 日和 15 日自动执行 | +| 手动 | SSH 登录服务器,直接执行 `snapshot_make.py` | + +## 多环境支持 + +| 环境 | Indexer API(内网) | +|---|---| +| mainnet | `explorer-indexer.morphl2.io` | +| hoodi | `explorer-indexer-hoodi.morphl2.io` | +| holesky | `explorer-indexer-holesky.morphl2.io` | + +每个环境 / 快照类型有独立的 env 文件,通过 `ENV_FILE` 环境变量指定。S3 路径和 README 表格自动按环境区分。 + +## 部署步骤 + +### 1. 在节点服务器上克隆仓库 + +```bash +git clone https://github.com/morph-l2/run-morph-node.git /data/run-morph-node +``` + +### 2. 创建环境变量文件 + +在脚本同级目录复制模板并填写对应值: + +```bash +cd /data/run-morph-node/ops/snapshot +cp snapshot.env.example snapshot.env +# 编辑 snapshot.env,填写 GH_TOKEN、S3_BUCKET、ENVIRONMENT 等 +``` + +多个环境或快照类型各自使用独立的 env 文件: + +```bash +cp snapshot.env.example snapshot-hoodi.env +cp snapshot.env.example snapshot-mainnet-mpt.env +``` + +所有可配置变量及其说明见 [`snapshot.env.example`](./snapshot.env.example)。这些文件**不可提交到 git**(在 `.gitignore` 中添加 `*.env`)。 + +同时建议在 repo Settings → General 中开启 **"Automatically delete head branches"**,PR merge 后分支自动删除,无需手动维护。 + +### 3. 配置 cron job + +每个环境 / 快照类型各添加一条 cron 记录: + +```bash +crontab -e +``` + +```cron +REPO=/data/run-morph-node/ops/snapshot + +# mainnet 标准 snapshot(使用默认的 snapshot.env) +0 2 1,15 * * python3 $REPO/snapshot_make.py >> /var/log/snapshot-mainnet.log 2>&1 + +# mainnet mpt-snapshot(env 文件中设置 SNAPSHOT_PREFIX=mpt-snapshot) +0 3 1,15 * * ENV_FILE=$REPO/snapshot-mainnet-mpt.env python3 $REPO/snapshot_make.py >> /var/log/snapshot-mainnet-mpt.log 2>&1 + +# hoodi +0 2 1,15 * * ENV_FILE=$REPO/snapshot-hoodi.env python3 $REPO/snapshot_make.py >> /var/log/snapshot-hoodi.log 2>&1 +``` + +### 4. 启动 metrics server + +在节点服务器上用 pm2 托管 `metrics_server.py`,使其随机器重启自动恢复: + +```bash +pm2 startup # 将 pm2 自身注册为系统开机服务(仅需执行一次) +pm2 start python3 --name morph-snapshot-metrics -- /data/run-morph-node/ops/snapshot/metrics_server.py +pm2 save +``` + +启动后采集侧即可通过 `http://:6060/metrics` 拉取指标。 + +暴露的 metrics: + +| Metric | 类型 | 说明 | +|---|---|---| +| `morph_snapshot_readme_update_status` | gauge | 1 = 成功,0 = 失败 | +| `morph_snapshot_readme_update_timestamp_seconds` | gauge | 最后一次执行的 Unix 时间戳 | + +Labels:`environment`(mainnet / hoodi / holesky)、`snapshot`(快照名称) + +> 默认 metrics 文件路径:`/tmp/morph_snapshot_metrics.prom` +> 如需修改,通过环境变量 `METRICS_FILE` 统一传入(对 `update_readme.py` 和 `metrics_server.py` 同时生效)。 + +## 关键设计决策 + +- **base_height 在 geth 重启后采集**:snapshot 制作完成、geth 单独启动后再查询 RPC,读取的是 snapshot 实际对应的区块状态,比停止前采集更准确。morph-node 在确认高度后再启动。 +- **失败时兜底恢复**:`snapshot_make.py` 在异常时尝试拉起两个进程,避免服务持续中断。 +- **不依赖 GitHub Actions 和 git CLI**:`update_metadata.py` 直接调用 GitHub REST API,服务器只需要 Python 3,`GH_TOKEN` 是唯一需要的凭证。 +- **新记录插入表格顶部**:最新 snapshot 始终出现在表格第一行,便于用户快速找到。 +- **通过 PR 而非直接 push 合并变更**:创建新分支并开启 PR,保留 review 机会,避免自动化脚本直接写入 main 分支。 +- **每个环境 / 类型独立 env 文件**:通过 `ENV_FILE` 环境变量指定,各配置互不干扰,同一台机器可以跑多种 snapshot 类型。 diff --git a/ops/snapshot/metrics_server.py b/ops/snapshot/metrics_server.py new file mode 100644 index 0000000..5e6ef27 --- /dev/null +++ b/ops/snapshot/metrics_server.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +""" +Lightweight Prometheus metrics HTTP server for morph snapshot automation. + +Reads a .prom file written by update_readme.py and serves it on :6060/metrics. +Intended to run as a persistent process (e.g. managed by pm2). + +Environment variables: + METRICS_FILE - path to the .prom file (default: /tmp/morph_snapshot_metrics.prom) + METRICS_PORT - port to listen on (default: 6060) +""" + +import http.server +import os +import socket + +METRICS_FILE = os.environ.get("METRICS_FILE", "/tmp/morph_snapshot_metrics.prom") +PORT = int(os.environ.get("METRICS_PORT", "6060")) + +EMPTY_METRICS = ( + "# HELP morph_snapshot_readme_update_status 1 if last README update succeeded, 0 if failed\n" + "# TYPE morph_snapshot_readme_update_status gauge\n" + "# (no data yet — update_readme.sh has not run)\n" +) + + +class MetricsHandler(http.server.BaseHTTPRequestHandler): + def do_GET(self): + if self.path != "/metrics": + self.send_response(404) + self.end_headers() + return + + try: + with open(METRICS_FILE, "r") as f: + content = f.read() + self.send_response(200) + except OSError: + content = EMPTY_METRICS + self.send_response(200) + + body = content.encode("utf-8") + self.send_header("Content-Type", "text/plain; version=0.0.4; charset=utf-8") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + + def log_message(self, fmt, *args): + # Suppress per-request access logs to keep output clean + pass + + +if __name__ == "__main__": + server = http.server.HTTPServer(("0.0.0.0", PORT), MetricsHandler) + host = socket.gethostname() + print(f"morph-snapshot metrics server listening on http://{host}:{PORT}/metrics") + print(f"Reading metrics from: {METRICS_FILE}") + try: + server.serve_forever() + except KeyboardInterrupt: + print("\nShutting down.") + server.server_close() + diff --git a/ops/snapshot/snapshot.env.example b/ops/snapshot/snapshot.env.example new file mode 100644 index 0000000..e416509 --- /dev/null +++ b/ops/snapshot/snapshot.env.example @@ -0,0 +1,54 @@ +# ───────────────────────────────────────────────────────────────────────────── +# Morph Snapshot Environment Configuration +# +# Copy this file and fill in the values (keep it in the same directory as the scripts): +# cp ops/snapshot/snapshot.env.example ops/snapshot/snapshot.env +# +# For multiple environments / snapshot types, use separate files: +# cp ops/snapshot/snapshot.env.example ops/snapshot/snapshot-hoodi.env +# ENV_FILE=ops/snapshot/snapshot-hoodi.env python3 ops/snapshot/snapshot_make.py +# +# snapshot.env (the default) is loaded automatically without ENV_FILE. +# ───────────────────────────────────────────────────────────────────────────── + +# ── Required ────────────────────────────────────────────────────────────────── + +# Target environment: mainnet | hoodi | holesky +ENVIRONMENT=mainnet + +# S3 bucket to upload snapshots to +S3_BUCKET=my-morph-snapshots + +# GitHub Fine-grained PAT with Contents:write and Pull requests:write +GH_TOKEN=ghp_xxxxxxxxxxxx + +# GitHub repository in owner/repo format +GITHUB_REPOSITORY=morph-l2/run-morph-node + +# ── Snapshot type ───────────────────────────────────────────────────────────── + +# Prefix for the snapshot name: snapshot | mpt-snapshot | full-snapshot +# Affects snapshot name (e.g. snapshot-20260309-1), S3 key, and branch name. +# Each type running on the same day will get a unique name and branch. +SNAPSHOT_PREFIX=snapshot + +# ── Paths ───────────────────────────────────────────────────────────────────── + +# Root directory of chain data for this environment +MORPH_HOME=/data/mainnet + +# Temporary work directory used during snapshot compression (cleared after use) +SNAPSHOT_WORK_DIR=/data/snapshot_work + +# Output path of the compressed snapshot file +SNAPSHOT_FILE=/data/snapshot.tar.gz + +# ── Service ─────────────────────────────────────────────────────────────────── + +# Geth JSON-RPC endpoint used to collect base_height after restart +GETH_RPC=http://127.0.0.1:8545 + +# ── README ──────────────────────────────────────────────────────────────────── + +# Path to README.md within the GitHub repository (relative to repo root) +README_PATH=README.md diff --git a/ops/snapshot/snapshot_make.py b/ops/snapshot/snapshot_make.py new file mode 100644 index 0000000..7071a75 --- /dev/null +++ b/ops/snapshot/snapshot_make.py @@ -0,0 +1,202 @@ +#!/usr/bin/env python3 +""" +ops/snapshot/snapshot_make.py + +Runs on the node server via cron (1st and 15th of each month). + +Responsibilities: + 1. Stop morph-geth and morph-node + 2. Create and compress a snapshot of chain data + 3. Upload the snapshot to S3 + 4. Restart morph-geth, wait for RPC, collect base_height + 5. Restart morph-node + 6. Call update_metadata.py to open a PR updating the README snapshot table + +Setup: + 1. Clone the repo to /data/run-morph-node on the node server + 2. Copy ops/snapshot/snapshot.env.example to ops/snapshot/snapshot.env and fill in values + For multiple environments/types, use separate files and pass via ENV_FILE: + cp ops/snapshot/snapshot.env.example ops/snapshot/snapshot-mainnet.env + cp ops/snapshot/snapshot.env.example ops/snapshot/snapshot-hoodi.env + + 3. Add to crontab (one entry per environment / snapshot type): + + REPO=/data/run-morph-node/ops/snapshot + + # mainnet standard snapshot (uses default snapshot.env) + 0 2 1,15 * * python3 $REPO/snapshot_make.py >> /var/log/snapshot-mainnet.log 2>&1 + + # mainnet mpt-snapshot + 0 3 1,15 * * ENV_FILE=$REPO/snapshot-mainnet-mpt.env \ + python3 $REPO/snapshot_make.py >> /var/log/snapshot-mainnet-mpt.log 2>&1 + + # hoodi + 0 2 1,15 * * ENV_FILE=$REPO/snapshot-hoodi.env \ + python3 $REPO/snapshot_make.py >> /var/log/snapshot-hoodi.log 2>&1 +""" + +import json +import os +import shutil +import subprocess +import sys +import time +import urllib.request +from datetime import datetime, timezone +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.resolve())) + +SCRIPT_DIR = Path(__file__).parent.resolve() +REPO_DIR = SCRIPT_DIR.parent.parent + +# ── Env file loader ──────────────────────────────────────────────────────────── + +def load_env_file(path: str) -> None: + """Parse KEY=value lines (with or without 'export' prefix) into os.environ.""" + try: + with open(path) as f: + for line in f: + line = line.strip() + if not line or line.startswith("#"): + continue + if line.startswith("export "): + line = line[len("export "):] + if "=" in line: + key, _, value = line.partition("=") + value = value.strip().strip('"').strip("'") + os.environ.setdefault(key.strip(), value) + except FileNotFoundError: + print(f"WARNING: {path} not found, relying on existing environment variables") + +# ── Shell helpers ────────────────────────────────────────────────────────────── + +def run(args: list, check: bool = True) -> None: + print(f" $ {' '.join(str(a) for a in args)}") + subprocess.run(args, check=check) + +# ── Geth RPC ─────────────────────────────────────────────────────────────────── + +def get_block_height(rpc_url: str = "http://localhost:8545", + retries: int = 30, interval: int = 5) -> int: + payload = json.dumps({ + "jsonrpc": "2.0", "method": "eth_blockNumber", "params": [], "id": 1 + }).encode() + for i in range(1, retries + 1): + try: + req = urllib.request.Request( + rpc_url, data=payload, + headers={"Content-Type": "application/json"}, + ) + with urllib.request.urlopen(req, timeout=5) as resp: + result = json.loads(resp.read())["result"] + if result: + return int(result, 16) + except Exception: + pass + print(f" attempt {i}: geth not ready yet, retrying in {interval}s...") + time.sleep(interval) + raise RuntimeError("geth RPC did not become available in time") + +# ── Main ─────────────────────────────────────────────────────────────────────── + +def main() -> None: + env_file = os.environ.get("ENV_FILE", str(SCRIPT_DIR / "snapshot.env")) + load_env_file(env_file) + + environment = os.environ.get("ENVIRONMENT", "mainnet") + morph_home = os.environ.get("MORPH_HOME", f"/data/{environment}") + s3_bucket = os.environ.get("S3_BUCKET", "") + if not s3_bucket: + print("ERROR: S3_BUCKET is required", file=sys.stderr) + sys.exit(1) + + geth_data_dir = os.path.join(morph_home, "geth-data") + node_data_dir = os.path.join(morph_home, "node-data") + work_dir = os.environ.get("SNAPSHOT_WORK_DIR", "/data/snapshot_work") + snapshot_file = os.environ.get("SNAPSHOT_FILE", "/data/snapshot.tar.gz") + + # SNAPSHOT_PREFIX allows different snapshot types to coexist: + # e.g. "snapshot", "mpt-snapshot", "full-snapshot" + snapshot_prefix = os.environ.get("SNAPSHOT_PREFIX", "snapshot") + date = datetime.now(timezone.utc).strftime("%Y%m%d") + snapshot_name = f"{snapshot_prefix}-{date}-1" + + os.environ["SNAPSHOT_NAME"] = snapshot_name + os.environ["ENVIRONMENT"] = environment + + print(f"=== Morph Snapshot: {snapshot_name} ({environment}) ===") + print(f"Started at: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')}") + + gh_token = os.environ.get("GH_TOKEN", "") + gh_repo = os.environ.get("GITHUB_REPOSITORY", "") + + services_stopped = False + try: + # ── Step 0: Resolve snapshot_name before any destructive operation ──── + # Check GitHub now so that snapshot_name, S3 key, and branch all match. + if gh_token and gh_repo: + from update_metadata import resolve_snapshot_name + snapshot_name = resolve_snapshot_name(gh_repo, environment, snapshot_name, gh_token) + os.environ["SNAPSHOT_NAME"] = snapshot_name + print(f"Resolved snapshot name: {snapshot_name}") + + # ── Step 1: Stop services ───────────────────────────────────────────── + print("\n[1/6] Stopping services...") + run(["pm2", "stop", "morph-node"]) + run(["pm2", "stop", "morph-geth"]) + services_stopped = True + time.sleep(10) + print("✅ Services stopped") + + # ── Step 2: Create snapshot ─────────────────────────────────────────── + print("\n[2/6] Creating snapshot...") + if os.path.exists(work_dir): + shutil.rmtree(work_dir) + os.makedirs(work_dir) + shutil.copytree(os.path.join(geth_data_dir, "geth"), os.path.join(work_dir, "geth")) + shutil.copytree(os.path.join(node_data_dir, "data"), os.path.join(work_dir, "data")) + + print(f"Compressing to {snapshot_file}...") + run(["tar", "-czf", snapshot_file, "-C", work_dir, "."]) + shutil.rmtree(work_dir) + size = subprocess.check_output(["du", "-sh", snapshot_file]).decode().split()[0] + print(f"✅ Snapshot created: {size}") + + # ── Step 3: Upload to S3 ────────────────────────────────────────────── + print("\n[3/6] Uploading to S3...") + s3_key = f"{environment}/{snapshot_name}.tar.gz" + run(["aws", "s3", "cp", snapshot_file, f"s3://{s3_bucket}/{s3_key}", "--no-progress"]) + print(f"✅ Uploaded: s3://{s3_bucket}/{s3_key}") + + # ── Step 4: Start geth, collect base_height ─────────────────────────── + print("\n[4/6] Starting morph-geth and collecting base_height...") + run(["pm2", "start", "morph-geth"]) + print("Waiting for geth RPC to be ready...") + base_height = get_block_height() + os.environ["BASE_HEIGHT"] = str(base_height) + print(f"✅ Geth base height: {base_height}") + + # ── Step 5: Start morph-node ────────────────────────────────────────── + print("\n[5/6] Starting morph-node...") + run(["pm2", "start", "morph-node"]) + print("✅ morph-node started") + + # ── Step 6: Update README via GitHub API ────────────────────────────── + print("\n[6/6] Updating README snapshot table...") + run([sys.executable, str(REPO_DIR / "ops" / "snapshot" / "update_metadata.py")]) + + except Exception as e: + print(f"\nERROR: {e}", file=sys.stderr) + if services_stopped: + print("Recovering services...") + run(["pm2", "start", "morph-geth"], check=False) + run(["pm2", "start", "morph-node"], check=False) + print("Services recovered.") + sys.exit(1) + + print(f"\n=== Done at {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')} ===") + + +if __name__ == "__main__": + main() diff --git a/ops/snapshot/update_metadata.py b/ops/snapshot/update_metadata.py new file mode 100644 index 0000000..49d4584 --- /dev/null +++ b/ops/snapshot/update_metadata.py @@ -0,0 +1,298 @@ +#!/usr/bin/env python3 +""" +Fetch snapshot metadata from the indexer API and update README.md via GitHub API. + +Given BASE_HEIGHT and SNAPSHOT_NAME, this script: + 1. Queries the internal explorer-indexer API for l1_msg_start_height + and derivation_start_height. + 2. Fetches README.md content from GitHub, applies the table update in memory. + 3. Creates a new branch, pushes the updated file, and opens a PR — + all via GitHub REST API (no git or gh CLI required). + +Environment variables: + ENVIRONMENT - mainnet | hoodi | holesky + SNAPSHOT_NAME - e.g. snapshot-20260225-1 + BASE_HEIGHT - L2 geth block height + GH_TOKEN - GitHub personal access token (repo scope) + GITHUB_REPOSITORY - owner/repo, e.g. morphl2/run-morph-node + README_PATH - path to README.md inside the repo (default: README.md) + L1_MSG_HEIGHT - (optional) skip indexer API, use this value directly + DERIV_HEIGHT - (optional) skip indexer API, use this value directly + DRY_RUN - set to "1" to skip README update and PR creation + +Usage: + # Full run (on Self-hosted Runner, hits internal indexer API): + ENVIRONMENT=mainnet SNAPSHOT_NAME=snapshot-20260225-1 BASE_HEIGHT=20169165 \\ + GH_TOKEN=ghp_xxx GITHUB_REPOSITORY=morphl2/run-morph-node \\ + python3 ops/snapshot/update_metadata.py + + # Local test with mock values — no git/gh CLI needed: + ENVIRONMENT=mainnet SNAPSHOT_NAME=snapshot-test-1 BASE_HEIGHT=20169165 \\ + L1_MSG_HEIGHT=24280251 DERIV_HEIGHT=24294756 \\ + GH_TOKEN=ghp_xxx GITHUB_REPOSITORY=morphl2/run-morph-node \\ + python3 ops/snapshot/update_metadata.py + + # Dry run — only fetches/prints metadata, touches nothing: + ENVIRONMENT=mainnet SNAPSHOT_NAME=snapshot-test-1 BASE_HEIGHT=20169165 \\ + L1_MSG_HEIGHT=24280251 DERIV_HEIGHT=24294756 DRY_RUN=1 \\ + python3 ops/snapshot/update_metadata.py +""" + +import base64 +import json +import os +import re +import sys +import urllib.error +import urllib.request + +# ── Constants ───────────────────────────────────────────────────────────────── + +INDEXER_HOSTS = { + "mainnet": "explorer-indexer.morphl2.io", + "hoodi": "explorer-indexer-hoodi.morphl2.io", + "holesky": "explorer-indexer-holesky.morphl2.io", +} + +SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) +sys.path.insert(0, SCRIPT_DIR) + +# ── HTTP helpers ────────────────────────────────────────────────────────────── + +def _http_request(req: urllib.request.Request, url: str) -> dict: + try: + with urllib.request.urlopen(req, timeout=30) as resp: + return json.loads(resp.read().decode()) + except urllib.error.HTTPError as e: + body = e.read().decode(errors="replace") + raise RuntimeError(f"HTTP {e.code} {e.reason} — URL: {url}\nResponse: {body}") from None + except urllib.error.URLError as e: + raise RuntimeError(f"Network error — URL: {url}\n{e.reason}") from None + + +def http_get(url: str, token: str = "") -> dict: + headers = {"Accept": "application/json"} + if token: + headers["Authorization"] = f"Bearer {token}" + return _http_request(urllib.request.Request(url, headers=headers), url) + + +def http_get_or_none(url: str, token: str = "") -> dict | None: + """Like http_get but returns None on 404 instead of raising.""" + headers = {"Accept": "application/json"} + if token: + headers["Authorization"] = f"Bearer {token}" + req = urllib.request.Request(url, headers=headers) + try: + with urllib.request.urlopen(req, timeout=30) as resp: + return json.loads(resp.read().decode()) + except urllib.error.HTTPError as e: + if e.code == 404: + return None + body = e.read().decode(errors="replace") + raise RuntimeError(f"HTTP {e.code} {e.reason} — URL: {url}\nResponse: {body}") from None + except urllib.error.URLError as e: + raise RuntimeError(f"Network error — URL: {url}\n{e.reason}") from None + + +def http_post(url: str, payload: dict, token: str) -> dict: + data = json.dumps(payload).encode() + headers = {"Content-Type": "application/json", "Authorization": f"Bearer {token}"} + return _http_request(urllib.request.Request(url, data=data, headers=headers, method="POST"), url) + + +def http_put(url: str, payload: dict, token: str) -> dict: + data = json.dumps(payload).encode() + headers = {"Content-Type": "application/json", "Authorization": f"Bearer {token}"} + return _http_request(urllib.request.Request(url, data=data, headers=headers, method="PUT"), url) + +# ── Indexer API ─────────────────────────────────────────────────────────────── + +def fetch_metadata(environment: str, base_height: str) -> tuple[str, str]: + """Return (l1_msg_start_height, derivation_start_height) as strings.""" + host = INDEXER_HOSTS[environment] + + def get(path): + url = f"https://{host}{path}" + print(f" GET {url}") + return http_get(url) + + l1_data = get(f"/v1/batch/l1_msg_start_height/{base_height}") + deriv_data = get(f"/v1/batch/derivation_start_height/{base_height}") + + if "l1_msg_start_height" not in l1_data: + raise RuntimeError(f"Unexpected indexer response for l1_msg_start_height: {l1_data}") + if "derivation_start_height" not in deriv_data: + raise RuntimeError(f"Unexpected indexer response for derivation_start_height: {deriv_data}") + + return str(l1_data["l1_msg_start_height"]), str(deriv_data["derivation_start_height"]) + +# ── GitHub API ──────────────────────────────────────────────────────────────── + +GITHUB_API = "https://api.github.com" + + +def gh_get_file(repo: str, path: str, token: str, ref: str = "main") -> tuple[str, str]: + """Fetch file content. Returns (decoded_content, blob_sha).""" + url = f"{GITHUB_API}/repos/{repo}/contents/{path}?ref={ref}" + data = http_get(url, token) + content = base64.b64decode(data["content"]).decode("utf-8") + return content, data["sha"] + + +def gh_get_main_sha(repo: str, token: str) -> str: + """Return the current commit SHA of the main branch.""" + url = f"{GITHUB_API}/repos/{repo}/git/ref/heads/main" + data = http_get(url, token) + return data["object"]["sha"] + + +def gh_branch_exists(repo: str, branch: str, token: str) -> bool: + url = f"{GITHUB_API}/repos/{repo}/git/ref/heads/{branch}" + return http_get_or_none(url, token) is not None + + +def resolve_snapshot_name(repo: str, environment: str, + snapshot_name: str, token: str) -> str: + """Return a snapshot_name whose branch does not yet exist on GitHub. + + Increments the trailing -N suffix until a free branch is found, so that + snapshot_name, S3 key, README row, and branch name all stay in sync. + + e.g. snapshot-20260309-1 → snapshot-20260309-2 if the -1 branch exists. + """ + base_name = re.sub(r"-\d+$", "", snapshot_name) + counter = 1 + candidate = f"{base_name}-{counter}" + while gh_branch_exists(repo, f"snapshot/{environment}-{candidate}", token): + counter += 1 + candidate = f"{base_name}-{counter}" + if candidate != snapshot_name: + print(f" Branch for {snapshot_name} already exists → using {candidate}") + return candidate + + +def gh_create_branch(repo: str, branch: str, sha: str, token: str) -> None: + """Create branch. snapshot_name must already be resolved via resolve_snapshot_name.""" + url = f"{GITHUB_API}/repos/{repo}/git/refs" + http_post(url, {"ref": f"refs/heads/{branch}", "sha": sha}, token) + print(f" Created branch: {branch}") + + +def gh_update_file(repo: str, path: str, content: str, + blob_sha: str, branch: str, message: str, token: str) -> None: + url = f"{GITHUB_API}/repos/{repo}/contents/{path}" + http_put(url, { + "message": message, + "content": base64.b64encode(content.encode("utf-8")).decode(), + "sha": blob_sha, + "branch": branch, + }, token) + print(f" Pushed {path} to branch: {branch}") + + +def gh_create_pr(repo: str, branch: str, title: str, body: str, token: str) -> str: + url = f"{GITHUB_API}/repos/{repo}/pulls" + data = http_post(url, { + "title": title, + "body": body, + "head": branch, + "base": "main", + }, token) + return data["html_url"] + +# ── README update (in-memory) ───────────────────────────────────────────────── + +def build_new_row(environment: str, snapshot_name: str, + deriv_height: str, l1_msg_height: str, base_height: str) -> str: + cdn_base = "https://snapshot.morphl2.io" + url = f"{cdn_base}/{environment}/{snapshot_name}.tar.gz" + return f"| [{snapshot_name}]({url}) | {deriv_height} | {l1_msg_height} | {base_height} |" + + +def apply_readme_update(content: str, environment: str, snapshot_name: str, + deriv_height: str, l1_msg_height: str, base_height: str) -> str: + """Import insert_row_content from update_readme.py and apply it.""" + from update_readme import insert_row_content, SECTION_MARKERS # noqa: E402 + + section_marker = SECTION_MARKERS[environment] + new_row = build_new_row(environment, snapshot_name, deriv_height, l1_msg_height, base_height) + return insert_row_content(content, section_marker, new_row) + +# ── Main ────────────────────────────────────────────────────────────────────── + +def main() -> None: + dry_run = os.environ.get("DRY_RUN", "0") == "1" + + # Validate required env vars + required = ["ENVIRONMENT", "SNAPSHOT_NAME", "BASE_HEIGHT"] + if not dry_run: + required += ["GH_TOKEN", "GITHUB_REPOSITORY"] + + missing = [v for v in required if not os.environ.get(v)] + if missing: + print(f"ERROR: Missing required env vars: {', '.join(missing)}", file=sys.stderr) + sys.exit(1) + + environment = os.environ["ENVIRONMENT"] + snapshot_name = os.environ["SNAPSHOT_NAME"] + base_height = os.environ["BASE_HEIGHT"] + token = os.environ.get("GH_TOKEN", "") + repo = os.environ.get("GITHUB_REPOSITORY", "") + readme_path = os.environ.get("README_PATH", "README.md") + + if environment not in INDEXER_HOSTS: + print(f"ERROR: Unknown environment: {environment!r}. Must be: {' | '.join(INDEXER_HOSTS)}", + file=sys.stderr) + sys.exit(1) + + # ── Step 1: metadata ────────────────────────────────────────────────────── + l1_msg_height = os.environ.get("L1_MSG_HEIGHT", "") + deriv_height = os.environ.get("DERIV_HEIGHT", "") + + if l1_msg_height and deriv_height: + print(f"\n[1/3] Using provided metadata (API call skipped):") + else: + print(f"\n[1/3] Fetching metadata from indexer (base_height={base_height}) ...") + l1_msg_height, deriv_height = fetch_metadata(environment, base_height) + + print(f" l1_msg_start_height = {l1_msg_height}") + print(f" derivation_start_height = {deriv_height}") + + if dry_run: + print("\n[DRY RUN] Skipping README update and PR creation.") + print(f" Would insert: env={environment} snapshot={snapshot_name}") + print(f" base={base_height} l1_msg={l1_msg_height} deriv={deriv_height}") + return + + # ── Step 2: update README in memory, push via GitHub API ───────────────── + print(f"\n[2/3] Updating README via GitHub API ...") + current_content, blob_sha = gh_get_file(repo, readme_path, token) + updated_content = apply_readme_update( + current_content, environment, snapshot_name, deriv_height, l1_msg_height, base_height + ) + + branch = f"snapshot/{environment}-{snapshot_name}" + commit_msg = f"snapshot: add {snapshot_name} ({environment})" + main_sha = gh_get_main_sha(repo, token) + + gh_create_branch(repo, branch, main_sha, token) + gh_update_file(repo, readme_path, updated_content, blob_sha, branch, commit_msg, token) + + # ── Step 3: open PR ─────────────────────────────────────────────────────── + print(f"\n[3/3] Creating PR ...") + pr_body = ( + f"Auto-generated by snapshot workflow.\n\n" + f"- Environment: `{environment}`\n" + f"- Snapshot: `{snapshot_name}`\n" + f"- L2 Base Height: `{base_height}`\n" + f"- L1 Msg Start Height: `{l1_msg_height}`\n" + f"- Derivation Start Height: `{deriv_height}`" + ) + pr_url = gh_create_pr(repo, branch, commit_msg, pr_body, token) + + print(f"\n✅ Done. PR opened: {pr_url}") + + +if __name__ == "__main__": + main() diff --git a/ops/snapshot/update_readme.py b/ops/snapshot/update_readme.py new file mode 100644 index 0000000..048705a --- /dev/null +++ b/ops/snapshot/update_readme.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python3 +""" +Update the snapshot table in README.md. + +Inserts a new row at the TOP of the target environment's snapshot table. + +Environment variables: + ENVIRONMENT - mainnet | hoodi | holesky + SNAPSHOT_NAME - e.g. snapshot-20260225-1 + BASE_HEIGHT - L2 geth block height (L2 Base Height) + L1_MSG_HEIGHT - l1_msg_start_height from indexer API + DERIV_HEIGHT - derivation_start_height from indexer API + METRICS_FILE - (optional) path to write Prometheus metrics + default: /tmp/morph_snapshot_metrics.prom + metrics_server.py reads this file and serves it on :6060/metrics + +Usage: + python3 ops/snapshot/update_readme.py +""" + +import os +import re +import sys +import time + +# ── Constants ───────────────────────────────────────────────────────────────── + +CDN_BASE = "https://snapshot.morphl2.io" + +SECTION_MARKERS = { + "mainnet": "**For mainnet**", + "hoodi": "**For hoodi testnet**", + "holesky": "**For holesky testnet(legacy)**", +} + +METRICS_FILE = os.environ.get("METRICS_FILE", "/tmp/morph_snapshot_metrics.prom") + +# ── Metrics ─────────────────────────────────────────────────────────────────── + +def write_metric(status: int, environment: str, snapshot_name: str) -> None: + """Write Prometheus metrics to METRICS_FILE. status: 1=success, 0=failure.""" + ts = int(time.time()) + labels = f'environment="{environment}",snapshot="{snapshot_name}"' + content = ( + "# HELP morph_snapshot_readme_update_status 1 if last README update succeeded, 0 if failed\n" + "# TYPE morph_snapshot_readme_update_status gauge\n" + f"morph_snapshot_readme_update_status{{{labels}}} {status}\n" + "# HELP morph_snapshot_readme_update_timestamp_seconds Unix timestamp of last run\n" + "# TYPE morph_snapshot_readme_update_timestamp_seconds gauge\n" + f"morph_snapshot_readme_update_timestamp_seconds{{{labels}}} {ts}\n" + ) + os.makedirs(os.path.dirname(os.path.abspath(METRICS_FILE)), exist_ok=True) + with open(METRICS_FILE, "w") as f: + f.write(content) + +# ── README update ───────────────────────────────────────────────────────────── + +def insert_row_content(content: str, section_marker: str, new_row: str) -> str: + """ + In-memory version: takes the README content as a string, inserts new_row + after the table separator in the target section, returns updated content. + """ + lines = content.splitlines(keepends=True) + in_section = False + inserted = False + result = [] + + for line in lines: + result.append(line) + + if section_marker in line: + in_section = True + + if in_section and not inserted and re.match(r"^\|[\s:|-]+\|", line): + result.append(new_row + "\n") + inserted = True + in_section = False + + if not inserted: + raise RuntimeError( + f"Could not find table separator for section: {section_marker!r}" + ) + + return "".join(result) + + +def insert_row(readme_path: str, section_marker: str, new_row: str) -> None: + """File-based wrapper around insert_row_content.""" + with open(readme_path, "r") as f: + content = f.read() + + updated = insert_row_content(content, section_marker, new_row) + + with open(readme_path, "w") as f: + f.write(updated) + +# ── Main ────────────────────────────────────────────────────────────────────── + +def main() -> None: + if len(sys.argv) != 2: + print(f"Usage: {sys.argv[0]} ", file=sys.stderr) + sys.exit(1) + + readme_path = sys.argv[1] + + # Validate required env vars + missing = [v for v in ("ENVIRONMENT", "SNAPSHOT_NAME", "BASE_HEIGHT", "L1_MSG_HEIGHT", "DERIV_HEIGHT") + if not os.environ.get(v)] + if missing: + print(f"ERROR: Missing required env vars: {', '.join(missing)}", file=sys.stderr) + write_metric(0, os.environ.get("ENVIRONMENT", "unknown"), + os.environ.get("SNAPSHOT_NAME", "unknown")) + sys.exit(1) + + environment = os.environ["ENVIRONMENT"] + snapshot_name = os.environ["SNAPSHOT_NAME"] + base_height = os.environ["BASE_HEIGHT"] + l1_msg_height = os.environ["L1_MSG_HEIGHT"] + deriv_height = os.environ["DERIV_HEIGHT"] + + # Validate environment + if environment not in SECTION_MARKERS: + print(f"ERROR: Unknown environment: {environment!r}. Must be: {' | '.join(SECTION_MARKERS)}", + file=sys.stderr) + write_metric(0, environment, snapshot_name) + sys.exit(1) + + section_marker = SECTION_MARKERS[environment] + url = f"{CDN_BASE}/{environment}/{snapshot_name}.tar.gz" + new_row = f"| [{snapshot_name}]({url}) | {deriv_height} | {l1_msg_height} | {base_height} |" + + try: + insert_row(readme_path, section_marker, new_row) + except Exception as e: + print(f"ERROR: {e}", file=sys.stderr) + write_metric(0, environment, snapshot_name) + sys.exit(1) + + print(f"✅ Inserted new row into [{environment}] table:") + print(f" {new_row}") + + write_metric(1, environment, snapshot_name) + print(f"📊 Metrics written to: {METRICS_FILE}") + + +if __name__ == "__main__": + main() + From e5f7bdce58e64a64c7a41108f375216a76c20397 Mon Sep 17 00:00:00 2001 From: corey Date: Wed, 11 Mar 2026 23:32:34 +0800 Subject: [PATCH 3/5] docs: add MPT node setup instructions to README - Add MPT snapshot download commands - Add MPT node run commands for mainnet and hoodi - Add empty MPT snapshot tables (to be populated by automated process) Co-Authored-By: Claude Opus 4.6 --- ops/snapshot/README.md | 24 +++++++------- ops/snapshot/README.zh.md | 24 +++++++------- ops/snapshot/metrics_server.py | 1 + ops/snapshot/snapshot.env.example | 54 ++++++++++++++++++------------- ops/snapshot/snapshot_make.py | 46 +++++++++++++------------- ops/snapshot/update_metadata.py | 20 +++++++----- ops/snapshot/update_readme.py | 1 + 7 files changed, 94 insertions(+), 76 deletions(-) diff --git a/ops/snapshot/README.md b/ops/snapshot/README.md index 32737a2..9c831aa 100644 --- a/ops/snapshot/README.md +++ b/ops/snapshot/README.md @@ -95,25 +95,27 @@ All available variables are documented in [`snapshot.env.example`](./snapshot.en Also recommended: enable **"Automatically delete head branches"** under repo Settings → General. Branches will be deleted automatically after a PR is merged. -### 3. Configure the Cron Job +### 3. Configure the Scheduled Job (PM2) -Add one entry per environment / snapshot type: +Copy the ecosystem template and edit `ENV_FILE` and `script` path for your environment: ```bash -crontab -e +cp /data/run-morph-node/ops/snapshot/ecosystem.config.js.example /data/morph-hoodi/ecosystem.config.js +# edit ecosystem.config.js ``` -```cron -REPO=/data/run-morph-node/ops/snapshot +Start and persist: -# mainnet standard snapshot (uses default snapshot.env) -0 2 1,15 * * python3 $REPO/snapshot_make.py >> /var/log/snapshot-mainnet.log 2>&1 +```bash +pm2 start /data/morph-hoodi/ecosystem.config.js +pm2 save +``` -# mainnet mpt-snapshot -0 3 1,15 * * ENV_FILE=$REPO/snapshot-mainnet-mpt.env python3 $REPO/snapshot_make.py >> /var/log/snapshot-mainnet-mpt.log 2>&1 +To trigger manually for testing: -# hoodi -0 2 1,15 * * ENV_FILE=$REPO/snapshot-hoodi.env python3 $REPO/snapshot_make.py >> /var/log/snapshot-hoodi.log 2>&1 +```bash +pm2 restart snapshot-hoodi +pm2 logs snapshot-hoodi ``` ### 4. Start the Metrics Server diff --git a/ops/snapshot/README.zh.md b/ops/snapshot/README.zh.md index 604a938..018492b 100644 --- a/ops/snapshot/README.zh.md +++ b/ops/snapshot/README.zh.md @@ -96,25 +96,27 @@ cp snapshot.env.example snapshot-mainnet-mpt.env 同时建议在 repo Settings → General 中开启 **"Automatically delete head branches"**,PR merge 后分支自动删除,无需手动维护。 -### 3. 配置 cron job +### 3. 配置定时任务(PM2) -每个环境 / 快照类型各添加一条 cron 记录: +复制 ecosystem 模板,修改 `ENV_FILE` 和 `script` 路径后启动: ```bash -crontab -e +cp /data/run-morph-node/ops/snapshot/ecosystem.config.js.example /data/morph-hoodi/ecosystem.config.js +# 编辑 ecosystem.config.js ``` -```cron -REPO=/data/run-morph-node/ops/snapshot +启动并持久化: -# mainnet 标准 snapshot(使用默认的 snapshot.env) -0 2 1,15 * * python3 $REPO/snapshot_make.py >> /var/log/snapshot-mainnet.log 2>&1 +```bash +pm2 start /data/morph-hoodi/ecosystem.config.js +pm2 save +``` -# mainnet mpt-snapshot(env 文件中设置 SNAPSHOT_PREFIX=mpt-snapshot) -0 3 1,15 * * ENV_FILE=$REPO/snapshot-mainnet-mpt.env python3 $REPO/snapshot_make.py >> /var/log/snapshot-mainnet-mpt.log 2>&1 +手动触发测试: -# hoodi -0 2 1,15 * * ENV_FILE=$REPO/snapshot-hoodi.env python3 $REPO/snapshot_make.py >> /var/log/snapshot-hoodi.log 2>&1 +```bash +pm2 restart snapshot-hoodi +pm2 logs snapshot-hoodi ``` ### 4. 启动 metrics server diff --git a/ops/snapshot/metrics_server.py b/ops/snapshot/metrics_server.py index 5e6ef27..8c6e183 100644 --- a/ops/snapshot/metrics_server.py +++ b/ops/snapshot/metrics_server.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +from __future__ import annotations """ Lightweight Prometheus metrics HTTP server for morph snapshot automation. diff --git a/ops/snapshot/snapshot.env.example b/ops/snapshot/snapshot.env.example index e416509..0d6f6b9 100644 --- a/ops/snapshot/snapshot.env.example +++ b/ops/snapshot/snapshot.env.example @@ -1,26 +1,24 @@ # ───────────────────────────────────────────────────────────────────────────── # Morph Snapshot Environment Configuration # -# Copy this file and fill in the values (keep it in the same directory as the scripts): -# cp ops/snapshot/snapshot.env.example ops/snapshot/snapshot.env +# Copy this file for each environment and fill in the values: +# cp ops/snapshot/snapshot.env.example /data/morph-hoodi/snapshot.env +# cp ops/snapshot/snapshot.env.example /data/morph-mainnet/snapshot.env # -# For multiple environments / snapshot types, use separate files: -# cp ops/snapshot/snapshot.env.example ops/snapshot/snapshot-hoodi.env -# ENV_FILE=ops/snapshot/snapshot-hoodi.env python3 ops/snapshot/snapshot_make.py -# -# snapshot.env (the default) is loaded automatically without ENV_FILE. +# Then set ENV_FILE when starting the pm2 process (see ecosystem.config.js). # ───────────────────────────────────────────────────────────────────────────── # ── Required ────────────────────────────────────────────────────────────────── # Target environment: mainnet | hoodi | holesky -ENVIRONMENT=mainnet +ENVIRONMENT=hoodi -# S3 bucket to upload snapshots to -S3_BUCKET=my-morph-snapshots +# S3 bucket name only — no s3:// prefix, no path suffix +# e.g. morph-0582-morph-technical-department-snapshot +S3_BUCKET= # GitHub Fine-grained PAT with Contents:write and Pull requests:write -GH_TOKEN=ghp_xxxxxxxxxxxx +GH_TOKEN= # GitHub repository in owner/repo format GITHUB_REPOSITORY=morph-l2/run-morph-node @@ -29,26 +27,36 @@ GITHUB_REPOSITORY=morph-l2/run-morph-node # Prefix for the snapshot name: snapshot | mpt-snapshot | full-snapshot # Affects snapshot name (e.g. snapshot-20260309-1), S3 key, and branch name. -# Each type running on the same day will get a unique name and branch. +# Each prefix running on the same day gets a unique name and branch. SNAPSHOT_PREFIX=snapshot # ── Paths ───────────────────────────────────────────────────────────────────── # Root directory of chain data for this environment -MORPH_HOME=/data/mainnet +# e.g. /data/morph-hoodi or /data/morph-mainnet +MORPH_HOME=/data/morph-hoodi -# Temporary work directory used during snapshot compression (cleared after use) -SNAPSHOT_WORK_DIR=/data/snapshot_work +# Directory packed as geth/ in the snapshot — defaults to $MORPH_HOME/geth-data +# e.g. GETH_DB_DIR=/data/morph-hoodi/geth-data +GETH_DB_DIR= -# Output path of the compressed snapshot file -SNAPSHOT_FILE=/data/snapshot.tar.gz +# Directory packed as data/ in the snapshot — defaults to $MORPH_HOME/node-data/data +# e.g. NODE_DB_DIR=/data/morph-hoodi/node-data/data +NODE_DB_DIR= -# ── Service ─────────────────────────────────────────────────────────────────── +# Temporary working directory — all temp files go here, deleted after upload. +# WARNING: must NOT be MORPH_HOME or any data directory (it gets deleted and recreated). +# Layout: staging/ for copytree, snapshot.tar.gz for compressed output. +# e.g. SNAPSHOT_WORK_DIR=/data/morph-hoodi/snapshot_work +SNAPSHOT_WORK_DIR= -# Geth JSON-RPC endpoint used to collect base_height after restart -GETH_RPC=http://127.0.0.1:8545 +# ── Service ─────────────────────────────────────────────────────────────────── -# ── README ──────────────────────────────────────────────────────────────────── +# Geth JSON-RPC endpoint — defaults to http://127.0.0.1:8545 +# GETH_RPC=http://127.0.0.1:8545 -# Path to README.md within the GitHub repository (relative to repo root) -README_PATH=README.md +# Indexer API base URL for fetching l1_msg_start_height and derivation_start_height. +# Defaults to the public endpoint for the given ENVIRONMENT if not set. +# Set to internal/intranet URL on production servers. +# e.g. EXPLORER_INDEXER_URL=https://explorer-indexer-hoodi.morphl2.io +EXPLORER_INDEXER_URL= diff --git a/ops/snapshot/snapshot_make.py b/ops/snapshot/snapshot_make.py index 7071a75..da79a1f 100644 --- a/ops/snapshot/snapshot_make.py +++ b/ops/snapshot/snapshot_make.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +from __future__ import annotations """ ops/snapshot/snapshot_make.py @@ -14,25 +15,13 @@ Setup: 1. Clone the repo to /data/run-morph-node on the node server - 2. Copy ops/snapshot/snapshot.env.example to ops/snapshot/snapshot.env and fill in values - For multiple environments/types, use separate files and pass via ENV_FILE: + 2. Copy ops/snapshot/snapshot.env.example for each environment and fill in values: cp ops/snapshot/snapshot.env.example ops/snapshot/snapshot-mainnet.env cp ops/snapshot/snapshot.env.example ops/snapshot/snapshot-hoodi.env - 3. Add to crontab (one entry per environment / snapshot type): - - REPO=/data/run-morph-node/ops/snapshot - - # mainnet standard snapshot (uses default snapshot.env) - 0 2 1,15 * * python3 $REPO/snapshot_make.py >> /var/log/snapshot-mainnet.log 2>&1 - - # mainnet mpt-snapshot - 0 3 1,15 * * ENV_FILE=$REPO/snapshot-mainnet-mpt.env \ - python3 $REPO/snapshot_make.py >> /var/log/snapshot-mainnet-mpt.log 2>&1 - - # hoodi - 0 2 1,15 * * ENV_FILE=$REPO/snapshot-hoodi.env \ - python3 $REPO/snapshot_make.py >> /var/log/snapshot-hoodi.log 2>&1 + 3. Copy ecosystem.config.js.example, set ENV_FILE and script path, then: + pm2 start /data/morph-hoodi/ecosystem.config.js + pm2 save """ import json @@ -111,10 +100,18 @@ def main() -> None: print("ERROR: S3_BUCKET is required", file=sys.stderr) sys.exit(1) - geth_data_dir = os.path.join(morph_home, "geth-data") - node_data_dir = os.path.join(morph_home, "node-data") - work_dir = os.environ.get("SNAPSHOT_WORK_DIR", "/data/snapshot_work") - snapshot_file = os.environ.get("SNAPSHOT_FILE", "/data/snapshot.tar.gz") + # GETH_DB_DIR / NODE_DB_DIR point directly to the directories that will be + # packed into the snapshot (as geth/ and data/ respectively). + # Use `or` so that empty-string values in the env file also fall back to defaults. + geth_db_dir = os.environ.get("GETH_DB_DIR") or os.path.join(morph_home, "geth-data") + node_db_dir = os.environ.get("NODE_DB_DIR") or os.path.join(morph_home, "node-data", "data") + + # All temp files live under SNAPSHOT_WORK_DIR: + # staging/ — copytree target, deleted after compression + # snapshot.tar.gz — compressed output, deleted after S3 upload + work_base = os.environ.get("SNAPSHOT_WORK_DIR") or "/data/snapshot_work" + work_dir = os.path.join(work_base, "staging") + snapshot_file = os.path.join(work_base, "snapshot.tar.gz") # SNAPSHOT_PREFIX allows different snapshot types to coexist: # e.g. "snapshot", "mpt-snapshot", "full-snapshot" @@ -154,8 +151,8 @@ def main() -> None: if os.path.exists(work_dir): shutil.rmtree(work_dir) os.makedirs(work_dir) - shutil.copytree(os.path.join(geth_data_dir, "geth"), os.path.join(work_dir, "geth")) - shutil.copytree(os.path.join(node_data_dir, "data"), os.path.join(work_dir, "data")) + shutil.copytree(geth_db_dir, os.path.join(work_dir, "geth")) + shutil.copytree(node_db_dir, os.path.join(work_dir, "data")) print(f"Compressing to {snapshot_file}...") run(["tar", "-czf", snapshot_file, "-C", work_dir, "."]) @@ -168,12 +165,15 @@ def main() -> None: s3_key = f"{environment}/{snapshot_name}.tar.gz" run(["aws", "s3", "cp", snapshot_file, f"s3://{s3_bucket}/{s3_key}", "--no-progress"]) print(f"✅ Uploaded: s3://{s3_bucket}/{s3_key}") + os.remove(snapshot_file) + print(f"✅ Removed local snapshot file: {snapshot_file}") # ── Step 4: Start geth, collect base_height ─────────────────────────── print("\n[4/6] Starting morph-geth and collecting base_height...") run(["pm2", "start", "morph-geth"]) + geth_rpc = os.environ.get("GETH_RPC") or "http://127.0.0.1:8545" print("Waiting for geth RPC to be ready...") - base_height = get_block_height() + base_height = get_block_height(geth_rpc) os.environ["BASE_HEIGHT"] = str(base_height) print(f"✅ Geth base height: {base_height}") diff --git a/ops/snapshot/update_metadata.py b/ops/snapshot/update_metadata.py index 49d4584..41f8be7 100644 --- a/ops/snapshot/update_metadata.py +++ b/ops/snapshot/update_metadata.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +from __future__ import annotations """ Fetch snapshot metadata from the indexer API and update README.md via GitHub API. @@ -48,10 +49,10 @@ # ── Constants ───────────────────────────────────────────────────────────────── -INDEXER_HOSTS = { - "mainnet": "explorer-indexer.morphl2.io", - "hoodi": "explorer-indexer-hoodi.morphl2.io", - "holesky": "explorer-indexer-holesky.morphl2.io", +INDEXER_BASE_URLS = { + "mainnet": "https://explorer-indexer.morphl2.io", + "hoodi": "https://explorer-indexer-hoodi.morphl2.io", + "holesky": "https://explorer-indexer-holesky.morphl2.io", } SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) @@ -110,10 +111,13 @@ def http_put(url: str, payload: dict, token: str) -> dict: def fetch_metadata(environment: str, base_height: str) -> tuple[str, str]: """Return (l1_msg_start_height, derivation_start_height) as strings.""" - host = INDEXER_HOSTS[environment] + # INDEXER_URL overrides the default public URL, useful for internal/intranet access. + base_url = os.environ.get("EXPLORER_INDEXER_URL", INDEXER_BASE_URLS.get(environment, "")) + if not base_url: + raise RuntimeError(f"No indexer URL for environment {environment!r}. Set INDEXER_URL.") def get(path): - url = f"https://{host}{path}" + url = f"{base_url.rstrip('/')}{path}" print(f" GET {url}") return http_get(url) @@ -241,8 +245,8 @@ def main() -> None: repo = os.environ.get("GITHUB_REPOSITORY", "") readme_path = os.environ.get("README_PATH", "README.md") - if environment not in INDEXER_HOSTS: - print(f"ERROR: Unknown environment: {environment!r}. Must be: {' | '.join(INDEXER_HOSTS)}", + if environment not in INDEXER_BASE_URLS: + print(f"ERROR: Unknown environment: {environment!r}. Must be: {' | '.join(INDEXER_BASE_URLS)}", file=sys.stderr) sys.exit(1) diff --git a/ops/snapshot/update_readme.py b/ops/snapshot/update_readme.py index 048705a..006162b 100644 --- a/ops/snapshot/update_readme.py +++ b/ops/snapshot/update_readme.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +from __future__ import annotations """ Update the snapshot table in README.md. From 447693b77d4714fde013e42f5c967e7abd4d857d Mon Sep 17 00:00:00 2001 From: corey Date: Thu, 2 Apr 2026 14:26:18 +0800 Subject: [PATCH 4/5] fix --- CLAUDE.md | 95 ++++++++++++ ops/snapshot/README.md | 133 +++++++++++----- ops/snapshot/README.zh.md | 132 +++++++++++----- ops/snapshot/ecosystem.config.js.example | 39 +++++ ops/snapshot/generate_sha256.py | 189 +++++++++++++++++++++++ ops/snapshot/list_snapshots.py | 69 +++++++++ ops/snapshot/metrics_server.py | 31 +++- ops/snapshot/snapshot.env.example | 19 ++- ops/snapshot/snapshot_make.py | 174 ++++++++++++++++++--- ops/snapshot/update_metadata.py | 149 ++++++++++-------- 10 files changed, 856 insertions(+), 174 deletions(-) create mode 100644 CLAUDE.md create mode 100644 ops/snapshot/ecosystem.config.js.example create mode 100644 ops/snapshot/generate_sha256.py create mode 100644 ops/snapshot/list_snapshots.py diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..2ebd449 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,95 @@ +# run-morph-node + +Morph L2 node operator toolkit. Provides Docker-based setup for running Morph nodes (standard and MPT), plus automated snapshot creation and distribution infrastructure. + +## Project Structure + +``` +morph-node/ # Docker Compose setup for running a Morph node + Makefile # All node operations (run/stop/download-snapshot) + docker-compose.yml # Service definitions: geth + node/validator + .env # Mainnet config + .env_holesky # Holesky testnet config + .env_hoodi # Hoodi testnet config + .env_mpt # MPT-specific overrides (loaded on top of env) + entrypoint-geth.sh # Standard geth startup script + entrypoint-geth-mpt.sh # MPT geth startup script (--morph-mpt flag) + +ops/snapshot/ # Snapshot automation scripts (runs on server via cron/pm2) + snapshot_make.py # Entry point: stop → snapshot → S3 upload → restart → update README + update_metadata.py # Fetches indexer API data, creates branch + PR via GitHub API + update_readme.py # In-memory README table insertion logic + Prometheus metrics + metrics_server.py # HTTP server exposing snapshot metrics on :6060/metrics + snapshot.env.example # Configuration reference + +mainnet/hoodi/holesky/ # Chain genesis and config files (static, do not modify) +``` + +## Environments + +| Environment | Makefile prefix | Snapshot CDN | +|-------------|-----------------|--------------| +| Mainnet | (no prefix) | snapshot.morphl2.io/mainnet | +| Hoodi testnet | `-hoodi` | snapshot.morphl2.io/hoodi | +| Holesky testnet (legacy) | `-holesky` | snapshot.morphl2.io/holesky | + +MPT variants use an additional `--env-file .env_mpt` overlay. + +## Common Operations + +```bash +# Run a node +cd morph-node +make run-hoodi-node # hoodi standard +make run-mainnet-mpt-node # mainnet MPT + +# Download snapshot +make download-and-decompress-hoodi-snapshot +make download-and-decompress-mainnet-mpt-snapshot + +# Stop +make stop-node +make stop-validator +``` + +## Snapshot Automation + +Runs on a server via pm2, triggered on configured days of the month. + +**Full flow:** +1. Stop morph-node + morph-geth (pm2) +2. tar geth-data + node-data → upload to S3 +3. Restart geth → wait for RPC → collect `base_height` +4. Restart morph-node +5. Query indexer API for `l1_msg_start_height` and `derivation_start_height` +6. Push updated README row + open PR via GitHub API (no git CLI needed) + +**Configuration:** Copy `ops/snapshot/snapshot.env.example` to the server data directory, fill in `S3_BUCKET`, `GH_TOKEN`, `MORPH_HOME`, `GITHUB_REPOSITORY`. + +**Dry run (safe, no writes):** +```bash +DRY_RUN=1 ENVIRONMENT=mainnet SNAPSHOT_NAME=test-1 BASE_HEIGHT=123 \ + L1_MSG_HEIGHT=456 DERIV_HEIGHT=789 python3 ops/snapshot/update_metadata.py +``` + +## Code Conventions + +- **Python**: stdlib only (no third-party deps), Python 3.9+. Use `urllib.request` for HTTP, not `requests`. +- **Shell scripts**: POSIX sh (`#!/bin/sh`), not bash. Use `set -e` for error handling. +- **Makefile**: Use `define`/`call` macros for repeated patterns. Always check for required tools before running. +- **Error handling**: Scripts must recover services if stopped (see `try/finally` pattern in `snapshot_make.py`). +- **Environment config**: Never hardcode paths or credentials. Always read from env vars with sensible defaults. + +## Security + +- **Never commit** `.env`, `snapshot.env`, or any file containing `GH_TOKEN`, `S3_BUCKET`, or AWS credentials. +- `GH_TOKEN` must be a Fine-grained PAT with only `Contents: Read/Write` and `Pull requests: Read/Write`. +- Snapshot automation opens PRs — it never merges directly to main. +- `jwt-secret.txt` is generated locally and never committed. + +## Git Conventions + +- Branch naming: `feat/`, `fix/`, `docs/` prefixes +- Snapshot automation branches: `snapshot/{environment}-{snapshot-name}` +- PRs require at least 1 approval before merging to main +- Commits are GPG-signed (configured in `~/.gitconfig`) diff --git a/ops/snapshot/README.md b/ops/snapshot/README.md index 9c831aa..926bf8e 100644 --- a/ops/snapshot/README.md +++ b/ops/snapshot/README.md @@ -12,14 +12,17 @@ Manually creating snapshots is error-prone and tedious. This solution automates ``` run-morph-node/ -├── README.md # snapshot table is updated here +├── README.md # snapshot table is updated here └── ops/snapshot/ - ├── README.md # this document - ├── README.zh.md # Chinese version - ├── snapshot_make.py # entry point: stop → snapshot → upload → restart → update README - ├── update_metadata.py # fetches indexer API data and orchestrates the full update flow - ├── update_readme.py # pure table-update logic (imported by update_metadata.py) - └── metrics_server.py # persistent HTTP server exposing metrics on :6060/metrics + ├── README.md # this document + ├── README.zh.md # Chinese version + ├── snapshot.env.example # environment variable template (copy one per environment) + ├── ecosystem.config.js.example # PM2 process config template + ├── snapshot_make.py # entry point: stop → snapshot → upload → restart → update README + ├── update_metadata.py # fetches indexer API data and orchestrates the full update flow + ├── update_readme.py # pure table-update logic (imported by update_metadata.py) + ├── metrics_server.py # persistent HTTP server exposing metrics on :6060/metrics + └── list_snapshots.py # utility to list uploaded snapshots in S3 ``` ## Workflow @@ -30,8 +33,10 @@ Server cron job (1st and 15th of each month) ▼ ops/snapshot/snapshot_make.py [1] stop morph-node, morph-geth - [2] create snapshot (tar geth + node data) - [3] upload to S3 + [2] copy chain data: + - geth: chaindata only → snapshot/geth/chaindata/ + - node: blockstore.db, cs.wal, state.db, tx_index.db, evidence.db → snapshot/data/ + [3] compress → upload to S3 as {environment}/{snapshot_name}.tar.gz [4] restart morph-geth → wait for RPC → collect base_height [5] restart morph-node [6] call update_metadata.py @@ -39,7 +44,7 @@ Server cron job (1st and 15th of each month) ▼ python3 update_metadata.py ┌─────────────────────────────────────────────────────┐ - │ 1. call internal explorer-indexer API: │ + │ 1. call explorer-indexer API: │ │ GET /v1/batch/l1_msg_start_height/ │ │ GET /v1/batch/derivation_start_height/│ │ 2. fetch README.md content via GitHub API │ @@ -58,40 +63,43 @@ Server cron job (1st and 15th of each month) ## Multi-environment Support -| Environment | Indexer API (internal) | -|---|---| -| mainnet | `explorer-indexer.morphl2.io` | -| hoodi | `explorer-indexer-hoodi.morphl2.io` | -| holesky | `explorer-indexer-holesky.morphl2.io` | +| Environment | Default Indexer API | Override | +|---|---|---| +| mainnet | `https://explorer-indexer.morphl2.io` | `EXPLORER_INDEXER_URL` | +| hoodi | `https://explorer-indexer-hoodi.morphl2.io` | `EXPLORER_INDEXER_URL` | +| holesky | `https://explorer-indexer-holesky.morphl2.io` | `EXPLORER_INDEXER_URL` | + +Each environment runs its own cron job with its own env file. S3 paths and README table sections are automatically scoped by environment. -Each environment has its own node server with its own cron job. S3 paths and README table sections are automatically scoped by environment. +Set `EXPLORER_INDEXER_URL` to an internal/intranet URL if the default public endpoint is not reachable from the node server. ## Deployment -### 1. Clone the Repository on the Node Server +### 1. Copy Scripts to the Node Server + +The node server does not require git. Copy the scripts manually: ```bash -git clone https://github.com/morphl2/run-morph-node.git /data/run-morph-node +# copy all scripts to the data directory of each environment +scp ops/snapshot/*.py user@server:/data/morph-hoodi/ ``` ### 2. Create the Environment File -Copy the template into the same directory and fill in the values: +Copy the template into the environment's data directory and fill in the values: ```bash -cd /data/run-morph-node/ops/snapshot -cp snapshot.env.example snapshot.env -# edit snapshot.env and fill in GH_TOKEN, S3_BUCKET, ENVIRONMENT, etc. +cp ops/snapshot/snapshot.env.example /data/morph-hoodi/snapshot.env +vi /data/morph-hoodi/snapshot.env ``` -For multiple environments or snapshot types, use separate files: +All available variables are documented in [`snapshot.env.example`](./snapshot.env.example). -```bash -cp snapshot.env.example snapshot-hoodi.env -cp snapshot.env.example snapshot-mainnet-mpt.env -``` +> ⚠️ **`SNAPSHOT_WORK_DIR` must NOT be set to `MORPH_HOME` or any data directory.** +> The script deletes and recreates this directory at startup. Setting it incorrectly will cause data loss. +> Use a dedicated subdirectory, e.g. `SNAPSHOT_WORK_DIR=/data/morph-hoodi/snapshot_work`. -All available variables are documented in [`snapshot.env.example`](./snapshot.env.example). These files must **not** be committed to git (add `*.env` to `.gitignore`). +These files must **not** be committed to git (add `*.env` to `.gitignore`). Also recommended: enable **"Automatically delete head branches"** under repo Settings → General. Branches will be deleted automatically after a PR is merged. @@ -100,8 +108,8 @@ Also recommended: enable **"Automatically delete head branches"** under repo Set Copy the ecosystem template and edit `ENV_FILE` and `script` path for your environment: ```bash -cp /data/run-morph-node/ops/snapshot/ecosystem.config.js.example /data/morph-hoodi/ecosystem.config.js -# edit ecosystem.config.js +cp ops/snapshot/ecosystem.config.js.example /data/morph-hoodi/ecosystem.config.js +vi /data/morph-hoodi/ecosystem.config.js ``` Start and persist: @@ -114,8 +122,10 @@ pm2 save To trigger manually for testing: ```bash -pm2 restart snapshot-hoodi -pm2 logs snapshot-hoodi +cd /data/morph-hoodi +nohup env ENV_FILE=/data/morph-hoodi/snapshot.env python3 /data/morph-hoodi/snapshot_make.py \ + > /tmp/snapshot.log 2>&1 & +tail -f /tmp/snapshot.log ``` ### 4. Start the Metrics Server @@ -124,7 +134,7 @@ Run `metrics_server.py` as a persistent pm2 process so it survives server reboot ```bash pm2 startup # register pm2 itself as a system startup service (run once) -pm2 start python3 --name morph-snapshot-metrics -- /data/run-morph-node/ops/snapshot/metrics_server.py +pm2 start python3 --name morph-snapshot-metrics -- /data/morph-hoodi/metrics_server.py pm2 save ``` @@ -140,14 +150,57 @@ Exposed metrics: Labels: `environment` (mainnet / hoodi / holesky), `snapshot` (snapshot name) > Default metrics file path: `/tmp/morph_snapshot_metrics.prom` -> Override via the `METRICS_FILE` environment variable — applies to both `update_readme.py` and `metrics_server.py`. +> Override via the `METRICS_FILE` environment variable. -## Key Design Decisions +## Listing Snapshots in S3 + +```bash +# list all snapshots +python3 list_snapshots.py -- **`base_height` is collected after geth restarts**: querying the RPC after the snapshot is created and geth is started alone gives the actual block state of the snapshot, which is more accurate than querying before the stop. `morph-node` is started only after the height is confirmed. -- **Fallback recovery on failure**: if the snapshot or upload fails, a fallback step in `snapshot_make.py` attempts to restart both processes to avoid prolonged service interruption. -- **No GitHub Actions or git CLI required**: `update_metadata.py` uses the GitHub REST API directly — the server only needs Python 3. The `GH_TOKEN` is the only credential needed. -- **New entries are inserted at the top of the table**: the latest snapshot always appears in the first row for quick access. -- **Changes are merged via PR, not direct push**: a new branch is created and a PR is opened, preserving review opportunity and preventing automated scripts from writing directly to the main branch. +# filter by environment +python3 list_snapshots.py --env hoodi + +# specify bucket +python3 list_snapshots.py --env hoodi --bucket my-bucket-name +``` +## Manual Recovery + +If step 6 (README update) fails after a successful S3 upload, re-run `update_metadata.py` directly. +The snapshot summary is printed to the log before step 6 starts — use those values: + +```bash +cd /data/morph-hoodi +ENVIRONMENT=hoodi \ +SNAPSHOT_NAME=snapshot-20260312-1 \ +BASE_HEIGHT=3904561 \ +GH_TOKEN=ghp_xxx \ +GITHUB_REPOSITORY=morph-l2/run-morph-node \ +python3 /data/morph-hoodi/update_metadata.py +``` + +If the indexer API values are already known (visible in the log), skip the API call: + +```bash +cd /data/morph-hoodi +ENVIRONMENT=hoodi \ +SNAPSHOT_NAME=snapshot-20260312-1 \ +BASE_HEIGHT=3904561 \ +L1_MSG_HEIGHT=2388518 \ +DERIV_HEIGHT=2401543 \ +GH_TOKEN=ghp_xxx \ +GITHUB_REPOSITORY=morph-l2/run-morph-node \ +python3 /data/morph-hoodi/update_metadata.py +``` + +## Key Design Decisions +- **`base_height` is collected after geth restarts**: querying the RPC after the snapshot is created and geth is started alone gives the actual block state of the snapshot. `morph-node` is started only after the height is confirmed. +- **Only essential data is included in the snapshot**: geth `chaindata` only; node data includes `blockstore.db`, `cs.wal`, `state.db`, `tx_index.db`, `evidence.db`. Lock files, node keys, and P2P peer lists are excluded. +- **Snapshot extracts to a named directory**: the tar archive extracts to `{snapshot_name}/geth/` and `{snapshot_name}/data/`, matching the archive filename. +- **Fallback recovery on failure**: if the snapshot or upload fails, `snapshot_make.py` attempts to restart both services to avoid prolonged downtime. +- **No GitHub Actions or git CLI required**: `update_metadata.py` uses the GitHub REST API directly — the server only needs Python 3.7+. +- **New entries are inserted at the top of the table**: the latest snapshot always appears in the first row. +- **Changes are merged via PR, not direct push**: a new branch is created and a PR is opened, preserving review opportunity. +- **Per-environment env files**: each environment and snapshot type has its own `snapshot.env` file, specified via `ENV_FILE`. diff --git a/ops/snapshot/README.zh.md b/ops/snapshot/README.zh.md index 018492b..cc4a058 100644 --- a/ops/snapshot/README.zh.md +++ b/ops/snapshot/README.zh.md @@ -12,15 +12,17 @@ ``` run-morph-node/ -├── README.md # snapshot 表格在此更新 +├── README.md # snapshot 表格在此更新 └── ops/snapshot/ - ├── README.md # 英文文档 - ├── README.zh.md # 本文档 - ├── snapshot.env.example # 环境变量模板(每个环境复制一份填写) - ├── snapshot_make.py # 入口:停服 → 制作 → 上传 → 重启 → 更新 README - ├── update_metadata.py # 查询 indexer API 并编排完整更新流程 - ├── update_readme.py # 纯表格更新逻辑(由 update_metadata.py 调用) - └── metrics_server.py # 常驻 HTTP server,在 :6060/metrics 暴露 metrics + ├── README.md # 英文文档 + ├── README.zh.md # 本文档 + ├── snapshot.env.example # 环境变量模板(每个环境复制一份填写) + ├── ecosystem.config.js.example # PM2 进程配置模板 + ├── snapshot_make.py # 入口:停服 → 制作 → 上传 → 重启 → 更新 README + ├── update_metadata.py # 查询 indexer API 并编排完整更新流程 + ├── update_readme.py # 纯表格更新逻辑(由 update_metadata.py 调用) + ├── metrics_server.py # 常驻 HTTP server,在 :6060/metrics 暴露 metrics + └── list_snapshots.py # 查询 S3 中已上传的快照列表 ``` ## 完整流程 @@ -31,8 +33,10 @@ run-morph-node/ ▼ ops/snapshot/snapshot_make.py [1] 停止 morph-node、morph-geth - [2] 制作快照(tar geth + node 数据) - [3] 上传至 S3 + [2] 复制链数据: + - geth:仅复制 chaindata → snapshot/geth/chaindata/ + - node:blockstore.db、cs.wal、state.db、tx_index.db、evidence.db → snapshot/data/ + [3] 压缩 → 上传至 S3,路径为 {environment}/{snapshot_name}.tar.gz [4] 重启 morph-geth → 等待 RPC 就绪 → 采集 base_height [5] 重启 morph-node [6] 调用 update_metadata.py @@ -40,7 +44,7 @@ run-morph-node/ ▼ python3 update_metadata.py ┌─────────────────────────────────────────────────────┐ - │ 1. 调用内网 explorer-indexer API: │ + │ 1. 调用 explorer-indexer API: │ │ GET /v1/batch/l1_msg_start_height/ │ │ GET /v1/batch/derivation_start_height/│ │ 2. 通过 GitHub API 获取 README.md 当前内容 │ @@ -59,40 +63,42 @@ run-morph-node/ ## 多环境支持 -| 环境 | Indexer API(内网) | -|---|---| -| mainnet | `explorer-indexer.morphl2.io` | -| hoodi | `explorer-indexer-hoodi.morphl2.io` | -| holesky | `explorer-indexer-holesky.morphl2.io` | +| 环境 | 默认 Indexer API | 覆盖方式 | +|---|---|---| +| mainnet | `https://explorer-indexer.morphl2.io` | `EXPLORER_INDEXER_URL` | +| hoodi | `https://explorer-indexer-hoodi.morphl2.io` | `EXPLORER_INDEXER_URL` | +| holesky | `https://explorer-indexer-holesky.morphl2.io` | `EXPLORER_INDEXER_URL` | + +每个环境有独立的 cron 任务和 env 文件,S3 路径和 README 表格自动按环境区分。 -每个环境 / 快照类型有独立的 env 文件,通过 `ENV_FILE` 环境变量指定。S3 路径和 README 表格自动按环境区分。 +如果节点服务器无法访问默认公网地址,通过 `EXPLORER_INDEXER_URL` 指定内网地址。 ## 部署步骤 -### 1. 在节点服务器上克隆仓库 +### 1. 将脚本复制到节点服务器 + +节点服务器不需要安装 git,直接手动复制脚本: ```bash -git clone https://github.com/morph-l2/run-morph-node.git /data/run-morph-node +scp ops/snapshot/*.py user@server:/data/morph-hoodi/ ``` ### 2. 创建环境变量文件 -在脚本同级目录复制模板并填写对应值: +将模板复制到对应环境的数据目录并填写配置: ```bash -cd /data/run-morph-node/ops/snapshot -cp snapshot.env.example snapshot.env -# 编辑 snapshot.env,填写 GH_TOKEN、S3_BUCKET、ENVIRONMENT 等 +cp ops/snapshot/snapshot.env.example /data/morph-hoodi/snapshot.env +vi /data/morph-hoodi/snapshot.env ``` -多个环境或快照类型各自使用独立的 env 文件: +所有可配置变量及说明见 [`snapshot.env.example`](./snapshot.env.example)。 -```bash -cp snapshot.env.example snapshot-hoodi.env -cp snapshot.env.example snapshot-mainnet-mpt.env -``` +> ⚠️ **`SNAPSHOT_WORK_DIR` 绝对不能设置为 `MORPH_HOME` 或任何链数据目录。** +> 脚本在启动时会删除并重建该目录,配置错误会导致数据丢失。 +> 请使用独立的子目录,例如 `SNAPSHOT_WORK_DIR=/data/morph-hoodi/snapshot_work`。 -所有可配置变量及其说明见 [`snapshot.env.example`](./snapshot.env.example)。这些文件**不可提交到 git**(在 `.gitignore` 中添加 `*.env`)。 +这些文件**不可提交到 git**(在 `.gitignore` 中添加 `*.env`)。 同时建议在 repo Settings → General 中开启 **"Automatically delete head branches"**,PR merge 后分支自动删除,无需手动维护。 @@ -101,8 +107,8 @@ cp snapshot.env.example snapshot-mainnet-mpt.env 复制 ecosystem 模板,修改 `ENV_FILE` 和 `script` 路径后启动: ```bash -cp /data/run-morph-node/ops/snapshot/ecosystem.config.js.example /data/morph-hoodi/ecosystem.config.js -# 编辑 ecosystem.config.js +cp ops/snapshot/ecosystem.config.js.example /data/morph-hoodi/ecosystem.config.js +vi /data/morph-hoodi/ecosystem.config.js ``` 启动并持久化: @@ -115,8 +121,10 @@ pm2 save 手动触发测试: ```bash -pm2 restart snapshot-hoodi -pm2 logs snapshot-hoodi +cd /data/morph-hoodi +nohup env ENV_FILE=/data/morph-hoodi/snapshot.env python3 /data/morph-hoodi/snapshot_make.py \ + > /tmp/snapshot.log 2>&1 & +tail -f /tmp/snapshot.log ``` ### 4. 启动 metrics server @@ -125,7 +133,7 @@ pm2 logs snapshot-hoodi ```bash pm2 startup # 将 pm2 自身注册为系统开机服务(仅需执行一次) -pm2 start python3 --name morph-snapshot-metrics -- /data/run-morph-node/ops/snapshot/metrics_server.py +pm2 start python3 --name morph-snapshot-metrics -- /data/morph-hoodi/metrics_server.py pm2 save ``` @@ -141,13 +149,57 @@ pm2 save Labels:`environment`(mainnet / hoodi / holesky)、`snapshot`(快照名称) > 默认 metrics 文件路径:`/tmp/morph_snapshot_metrics.prom` -> 如需修改,通过环境变量 `METRICS_FILE` 统一传入(对 `update_readme.py` 和 `metrics_server.py` 同时生效)。 +> 如需修改,通过环境变量 `METRICS_FILE` 传入。 + +## 查询 S3 快照列表 + +```bash +# 列出所有快照 +python3 list_snapshots.py + +# 只看某个环境 +python3 list_snapshots.py --env hoodi + +# 指定 bucket +python3 list_snapshots.py --env hoodi --bucket my-bucket-name +``` + +## 手动补跑 + +如果第 6 步(README 更新)失败,但 S3 上传已经成功,直接补跑 `update_metadata.py`。 +日志中 step 6 开始前会打印快照摘要信息,直接拿来用: + +```bash +cd /data/morph-hoodi +ENVIRONMENT=hoodi \ +SNAPSHOT_NAME=snapshot-20260312-1 \ +BASE_HEIGHT=3904561 \ +GH_TOKEN=ghp_xxx \ +GITHUB_REPOSITORY=morph-l2/run-morph-node \ +python3 /data/morph-hoodi/update_metadata.py +``` + +如果 indexer 已经查到了值(日志中可见),加上 `L1_MSG_HEIGHT` 和 `DERIV_HEIGHT` 跳过重查: + +```bash +cd /data/morph-hoodi +ENVIRONMENT=hoodi \ +SNAPSHOT_NAME=snapshot-20260312-1 \ +BASE_HEIGHT=3904561 \ +L1_MSG_HEIGHT=2388518 \ +DERIV_HEIGHT=2401543 \ +GH_TOKEN=ghp_xxx \ +GITHUB_REPOSITORY=morph-l2/run-morph-node \ +python3 /data/morph-hoodi/update_metadata.py +``` ## 关键设计决策 -- **base_height 在 geth 重启后采集**:snapshot 制作完成、geth 单独启动后再查询 RPC,读取的是 snapshot 实际对应的区块状态,比停止前采集更准确。morph-node 在确认高度后再启动。 +- **base_height 在 geth 重启后采集**:snapshot 制作完成、geth 单独启动后再查询 RPC,读取的是 snapshot 实际对应的区块状态。morph-node 在确认高度后再启动。 +- **只打包必要数据**:geth 只复制 `chaindata`;node 数据只包含 `blockstore.db`、`cs.wal`、`state.db`、`tx_index.db`、`evidence.db`。lock 文件、节点密钥、P2P 节点列表均排除在外。 +- **解压后为同名目录**:tar 包解压后得到 `{snapshot_name}/geth/` 和 `{snapshot_name}/data/`,与压缩包文件名一致。 - **失败时兜底恢复**:`snapshot_make.py` 在异常时尝试拉起两个进程,避免服务持续中断。 -- **不依赖 GitHub Actions 和 git CLI**:`update_metadata.py` 直接调用 GitHub REST API,服务器只需要 Python 3,`GH_TOKEN` 是唯一需要的凭证。 -- **新记录插入表格顶部**:最新 snapshot 始终出现在表格第一行,便于用户快速找到。 -- **通过 PR 而非直接 push 合并变更**:创建新分支并开启 PR,保留 review 机会,避免自动化脚本直接写入 main 分支。 -- **每个环境 / 类型独立 env 文件**:通过 `ENV_FILE` 环境变量指定,各配置互不干扰,同一台机器可以跑多种 snapshot 类型。 +- **不依赖 GitHub Actions 和 git CLI**:`update_metadata.py` 直接调用 GitHub REST API,服务器只需要 Python 3.7+。 +- **新记录插入表格顶部**:最新 snapshot 始终出现在第一行,便于用户快速找到。 +- **通过 PR 而非直接 push 合并变更**:创建新分支并开启 PR,保留 review 机会。 +- **每个环境 / 类型独立 env 文件**:通过 `ENV_FILE` 指定,各配置互不干扰。 diff --git a/ops/snapshot/ecosystem.config.js.example b/ops/snapshot/ecosystem.config.js.example new file mode 100644 index 0000000..ba18eb0 --- /dev/null +++ b/ops/snapshot/ecosystem.config.js.example @@ -0,0 +1,39 @@ +// PM2 ecosystem config for snapshot automation. +// +// Setup: +// 1. Copy this file to the node server: +// cp ops/snapshot/ecosystem.config.js.example /data/morph-hoodi/ecosystem.config.js +// 2. Edit paths and ENV_FILE to match the environment. +// 3. Start: +// pm2 start /data/morph-hoodi/ecosystem.config.js +// pm2 save + +module.exports = { + apps: [ + { + name: 'snapshot-hoodi', + script: '/data/morph-hoodi/snapshot_make.py', + interpreter: 'python3', + // 13th and 28th of each month at 15:30 CST (= 07:30 UTC) + cron_restart: '30 7 13,28 * *', + autorestart: false, + // Give morph-geth enough time to flush the snapshot journal before + // being force-killed (default is 1600ms which is too short). + // This prevents "diffs=missing" errors on the next prune-state run. + kill_timeout: 60000, + env: { + ENV_FILE: '/data/morph-hoodi/snapshot.env', + }, + }, + { + name: 'morph-snapshot-metrics', + script: '/data/morph-hoodi/metrics_server.py', + interpreter: 'python3', + autorestart: true, + env: { + METRICS_PORT: '6060', + ENVIRONMENT: 'hoodi', + }, + }, + ], +}; diff --git a/ops/snapshot/generate_sha256.py b/ops/snapshot/generate_sha256.py new file mode 100644 index 0000000..b5b312b --- /dev/null +++ b/ops/snapshot/generate_sha256.py @@ -0,0 +1,189 @@ +#!/usr/bin/env python3 +from __future__ import annotations +""" +Backfill SHA256 checksum files for snapshot archives in S3. + +Finds .tar.gz archives that are missing a corresponding .sha256 sidecar +and generates one by streaming the archive content directly from S3 +(no local disk needed). + +Usage: + python3 generate_sha256.py --bucket my-bucket + python3 generate_sha256.py --bucket my-bucket --env hoodi + python3 generate_sha256.py --bucket my-bucket --key mainnet/snapshot-20260309-1.tar.gz + python3 generate_sha256.py --bucket my-bucket --dry-run + python3 generate_sha256.py --bucket my-bucket --force +""" + +import argparse +import hashlib +import json +import os +import subprocess +import sys +import tempfile +import time + +CHUNK_SIZE = 8 * 1024 * 1024 # 8 MiB, same as snapshot_make.py + + +def list_s3_keys(bucket: str, prefix: str, suffix: str) -> list[str]: + """Return S3 keys matching the given prefix and suffix.""" + cmd = ["aws", "s3api", "list-objects-v2", + "--bucket", bucket, "--output", "json"] + if prefix: + cmd += ["--prefix", prefix] + + keys: list[str] = [] + token = None + while True: + page_cmd = cmd if token is None else cmd + ["--continuation-token", token] + result = subprocess.run(page_cmd, capture_output=True, text=True) + if result.returncode != 0: + print(f"ERROR: aws s3api failed: {result.stderr.strip()}", file=sys.stderr) + sys.exit(1) + + data = json.loads(result.stdout or "{}") + for obj in data.get("Contents", []): + if obj["Key"].endswith(suffix): + keys.append(obj["Key"]) + + if not data.get("IsTruncated"): + break + token = data.get("NextContinuationToken") + + return sorted(keys) + + +def human_size(size_bytes: int) -> str: + for unit in ("B", "KB", "MB", "GB", "TB"): + if size_bytes < 1024: + return f"{size_bytes:.1f} {unit}" + size_bytes /= 1024 + return f"{size_bytes:.1f} PB" + + +def get_object_size(bucket: str, key: str) -> int: + cmd = ["aws", "s3api", "head-object", "--bucket", bucket, "--key", key, + "--query", "ContentLength", "--output", "text"] + result = subprocess.run(cmd, capture_output=True, text=True) + if result.returncode != 0: + return 0 + try: + return int(result.stdout.strip()) + except ValueError: + return 0 + + +def compute_sha256_streaming(bucket: str, key: str) -> str: + """Stream an S3 object through hashlib.sha256 without touching disk.""" + s3_uri = f"s3://{bucket}/{key}" + proc = subprocess.Popen( + ["aws", "s3", "cp", s3_uri, "-"], + stdout=subprocess.PIPE, stderr=subprocess.PIPE, + ) + sha = hashlib.sha256() + total = 0 + while True: + chunk = proc.stdout.read(CHUNK_SIZE) + if not chunk: + break + sha.update(chunk) + total += len(chunk) + + proc.wait() + if proc.returncode != 0: + err = proc.stderr.read().decode().strip() + raise RuntimeError(f"aws s3 cp failed for {s3_uri}: {err}") + + return sha.hexdigest() + + +def upload_sha256(bucket: str, sha256_key: str, sha256_hex: str, + archive_basename: str) -> None: + """Write a sha256sum-compatible file and upload to S3.""" + content = f"{sha256_hex} {archive_basename}\n" + with tempfile.NamedTemporaryFile(mode="w", suffix=".sha256", delete=False) as tmp: + tmp.write(content) + tmp_path = tmp.name + + try: + s3_uri = f"s3://{bucket}/{sha256_key}" + result = subprocess.run( + ["aws", "s3", "cp", tmp_path, s3_uri], + capture_output=True, text=True, + ) + if result.returncode != 0: + raise RuntimeError(f"upload failed: {result.stderr.strip()}") + finally: + os.unlink(tmp_path) + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Backfill SHA256 checksums for S3 snapshot archives") + parser.add_argument("--bucket", required=True, help="S3 bucket name") + parser.add_argument("--env", default="", + help="Filter by environment prefix (e.g. hoodi, mainnet)") + parser.add_argument("--key", default="", + help="Process a single S3 key instead of scanning") + parser.add_argument("--dry-run", action="store_true", + help="List archives that need checksums without processing") + parser.add_argument("--force", action="store_true", + help="Recompute even if .sha256 already exists") + args = parser.parse_args() + + prefix = f"{args.env}/" if args.env else "" + + if args.key: + if not args.key.endswith(".tar.gz"): + print(f"ERROR: --key must end with .tar.gz, got: {args.key}", + file=sys.stderr) + sys.exit(1) + targets = [args.key] + else: + print(f"Listing archives in s3://{args.bucket}/{prefix or '*'} ...") + archives = list_s3_keys(args.bucket, prefix, ".tar.gz") + existing = set(list_s3_keys(args.bucket, prefix, ".tar.gz.sha256")) + + if args.force: + targets = archives + else: + targets = [k for k in archives if k + ".sha256" not in existing] + + print(f" Total archives: {len(archives)}") + print(f" Already have .sha256: {len(archives) - len(targets)}") + print(f" Need processing: {len(targets)}") + + if not targets: + print("\nNothing to do.") + return + + if args.dry_run: + print("\n[DRY RUN] Archives that would be processed:") + for key in targets: + size = get_object_size(args.bucket, key) + print(f" {key} ({human_size(size)})") + return + + print(f"\nProcessing {len(targets)} archive(s)...\n") + for i, key in enumerate(targets, 1): + basename = os.path.basename(key) + sha256_key = key + ".sha256" + size = get_object_size(args.bucket, key) + print(f"[{i}/{len(targets)}] {key} ({human_size(size)})") + + t0 = time.time() + print(f" Streaming and computing SHA256...") + sha256_hex = compute_sha256_streaming(args.bucket, key) + elapsed = time.time() - t0 + print(f" SHA256: {sha256_hex} ({elapsed:.1f}s)") + + upload_sha256(args.bucket, sha256_key, sha256_hex, basename) + print(f" Uploaded: s3://{args.bucket}/{sha256_key}\n") + + print(f"Done. Processed {len(targets)} archive(s).") + + +if __name__ == "__main__": + main() diff --git a/ops/snapshot/list_snapshots.py b/ops/snapshot/list_snapshots.py new file mode 100644 index 0000000..caa9894 --- /dev/null +++ b/ops/snapshot/list_snapshots.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 +from __future__ import annotations +""" +List snapshot files in S3 bucket. + +Usage: + python3 list_snapshots.py + python3 list_snapshots.py --env hoodi + python3 list_snapshots.py --env mainnet --bucket my-bucket +""" + +import argparse +import subprocess +import json +import sys + + +def list_snapshots(bucket: str, prefix: str = "") -> list[dict]: + cmd = ["aws", "s3api", "list-objects-v2", + "--bucket", bucket, + "--query", "Contents[?ends_with(Key, '.tar.gz')].[Key,Size,LastModified]", + "--output", "json"] + if prefix: + cmd += ["--prefix", prefix] + + result = subprocess.run(cmd, capture_output=True, text=True) + if result.returncode != 0: + print(f"ERROR: {result.stderr.strip()}", file=sys.stderr) + sys.exit(1) + + items = json.loads(result.stdout or "[]") or [] + return [{"key": r[0], "size": r[1], "last_modified": r[2]} for r in items] + + +def human_size(size_bytes: int) -> str: + for unit in ["B", "KB", "MB", "GB", "TB"]: + if size_bytes < 1024: + return f"{size_bytes:.1f} {unit}" + size_bytes /= 1024 + return f"{size_bytes:.1f} PB" + + +def main() -> None: + parser = argparse.ArgumentParser(description="List snapshots in S3 bucket") + parser.add_argument("--bucket", default="morph-0582-morph-technical-department-snapshot", + help="S3 bucket name") + parser.add_argument("--env", default="", + help="Filter by environment prefix (e.g. hoodi, mainnet)") + args = parser.parse_args() + + prefix = f"{args.env}/" if args.env else "" + snapshots = list_snapshots(args.bucket, prefix) + + if not snapshots: + print(f"No snapshots found in s3://{args.bucket}/{prefix}") + return + + snapshots.sort(key=lambda x: x["last_modified"], reverse=True) + + print(f"\nSnapshots in s3://{args.bucket}/{prefix or '*'}") + print(f"{'Last Modified':<28} {'Size':>10} Key") + print("-" * 80) + for s in snapshots: + print(f"{s['last_modified']:<28} {human_size(s['size']):>10} {s['key']}") + print(f"\nTotal: {len(snapshots)} snapshot(s)") + + +if __name__ == "__main__": + main() diff --git a/ops/snapshot/metrics_server.py b/ops/snapshot/metrics_server.py index 8c6e183..65916db 100644 --- a/ops/snapshot/metrics_server.py +++ b/ops/snapshot/metrics_server.py @@ -14,15 +14,35 @@ import http.server import os import socket +import time METRICS_FILE = os.environ.get("METRICS_FILE", "/tmp/morph_snapshot_metrics.prom") PORT = int(os.environ.get("METRICS_PORT", "6060")) -EMPTY_METRICS = ( - "# HELP morph_snapshot_readme_update_status 1 if last README update succeeded, 0 if failed\n" - "# TYPE morph_snapshot_readme_update_status gauge\n" - "# (no data yet — update_readme.sh has not run)\n" -) + +def _default_metrics() -> str: + """Default metrics written on first startup — status=1 to avoid false alarms.""" + environment = os.environ.get("ENVIRONMENT", "unknown") + ts = int(time.time()) + return ( + "# HELP morph_snapshot_readme_update_status 1 if last README update succeeded, 0 if failed\n" + "# TYPE morph_snapshot_readme_update_status gauge\n" + f'morph_snapshot_readme_update_status{{environment="{environment}",snapshot="pending"}} 1\n' + "# HELP morph_snapshot_readme_update_timestamp_seconds Unix timestamp of last run\n" + "# TYPE morph_snapshot_readme_update_timestamp_seconds gauge\n" + f'morph_snapshot_readme_update_timestamp_seconds{{environment="{environment}",snapshot="pending"}} {ts}\n' + ) + + +def _init_metrics_file() -> None: + """Write default metrics if file does not exist yet.""" + if not os.path.exists(METRICS_FILE): + try: + with open(METRICS_FILE, "w") as f: + f.write(_default_metrics()) + print(f"Initialized default metrics: {METRICS_FILE}") + except OSError as e: + print(f"WARNING: could not initialize metrics file: {e}") class MetricsHandler(http.server.BaseHTTPRequestHandler): @@ -52,6 +72,7 @@ def log_message(self, fmt, *args): if __name__ == "__main__": + _init_metrics_file() server = http.server.HTTPServer(("0.0.0.0", PORT), MetricsHandler) host = socket.gethostname() print(f"morph-snapshot metrics server listening on http://{host}:{PORT}/metrics") diff --git a/ops/snapshot/snapshot.env.example b/ops/snapshot/snapshot.env.example index 0d6f6b9..7e1146e 100644 --- a/ops/snapshot/snapshot.env.example +++ b/ops/snapshot/snapshot.env.example @@ -23,6 +23,12 @@ GH_TOKEN= # GitHub repository in owner/repo format GITHUB_REPOSITORY=morph-l2/run-morph-node +# ── Schedule ────────────────────────────────────────────────────────────────── + +# Days of month to run (UTC). Script exits immediately on other days. +# Set to "any" to bypass the check (e.g. for manual testing). +SNAPSHOT_DAYS=13,28 + # ── Snapshot type ───────────────────────────────────────────────────────────── # Prefix for the snapshot name: snapshot | mpt-snapshot | full-snapshot @@ -32,9 +38,9 @@ SNAPSHOT_PREFIX=snapshot # ── Paths ───────────────────────────────────────────────────────────────────── -# Root directory of chain data for this environment +# Root directory of chain data for this environment — required, no default # e.g. /data/morph-hoodi or /data/morph-mainnet -MORPH_HOME=/data/morph-hoodi +MORPH_HOME= # Directory packed as geth/ in the snapshot — defaults to $MORPH_HOME/geth-data # e.g. GETH_DB_DIR=/data/morph-hoodi/geth-data @@ -50,6 +56,15 @@ NODE_DB_DIR= # e.g. SNAPSHOT_WORK_DIR=/data/morph-hoodi/snapshot_work SNAPSHOT_WORK_DIR= +# ── Prune (full node only) ──────────────────────────────────────────────────── + +# Set to true to run `geth snapshot prune-state` before copying data. +# Only for full nodes — leave unset or false for archive nodes. +# GETH_PRUNE=true + +# Path to the geth binary — defaults to `geth` (must be in PATH). +# GETH_BIN=/usr/local/bin/geth + # ── Service ─────────────────────────────────────────────────────────────────── # Geth JSON-RPC endpoint — defaults to http://127.0.0.1:8545 diff --git a/ops/snapshot/snapshot_make.py b/ops/snapshot/snapshot_make.py index da79a1f..9d3855b 100644 --- a/ops/snapshot/snapshot_make.py +++ b/ops/snapshot/snapshot_make.py @@ -24,6 +24,7 @@ pm2 save """ +import hashlib import json import os import shutil @@ -37,7 +38,6 @@ sys.path.insert(0, str(Path(__file__).parent.resolve())) SCRIPT_DIR = Path(__file__).parent.resolve() -REPO_DIR = SCRIPT_DIR.parent.parent # ── Env file loader ──────────────────────────────────────────────────────────── @@ -93,11 +93,25 @@ def main() -> None: env_file = os.environ.get("ENV_FILE", str(SCRIPT_DIR / "snapshot.env")) load_env_file(env_file) - environment = os.environ.get("ENVIRONMENT", "mainnet") - morph_home = os.environ.get("MORPH_HOME", f"/data/{environment}") - s3_bucket = os.environ.get("S3_BUCKET", "") - if not s3_bucket: - print("ERROR: S3_BUCKET is required", file=sys.stderr) + # SNAPSHOT_DAYS: comma-separated days of month to run (default: 13,28). + # On other days the script exits immediately — prevents accidental runs on pm2 start. + # Set SNAPSHOT_DAYS=any to bypass this check (e.g. for manual testing). + snapshot_days_env = os.environ.get("SNAPSHOT_DAYS", "13,18") + if snapshot_days_env.strip().lower() != "any": + allowed_days = {int(d.strip()) for d in snapshot_days_env.split(",")} + today = datetime.now(timezone.utc).day + if today not in allowed_days: + print(f"Today is day {today}, not in SNAPSHOT_DAYS={snapshot_days_env}. Exiting.") + sys.exit(0) + + environment = os.environ.get("ENVIRONMENT", "mainnet") + morph_home = os.environ.get("MORPH_HOME", "") + s3_bucket = os.environ.get("S3_BUCKET", "") + + missing = [k for k, v in [("MORPH_HOME", morph_home), ("S3_BUCKET", s3_bucket)] if not v] + if missing: + for k in missing: + print(f"ERROR: {k} is required", file=sys.stderr) sys.exit(1) # GETH_DB_DIR / NODE_DB_DIR point directly to the directories that will be @@ -113,6 +127,28 @@ def main() -> None: work_dir = os.path.join(work_base, "staging") snapshot_file = os.path.join(work_base, "snapshot.tar.gz") + # Safety check: SNAPSHOT_WORK_DIR must not overlap with actual data directories. + # The script deletes and recreates work_base at startup — if work_base IS or CONTAINS + # a data directory, that data will be wiped. Placing work_base *inside* MORPH_HOME + # (as a dedicated subdirectory) is safe as long as it doesn't overlap with geth/node data. + def _is_subpath(child: str, parent: str) -> bool: + child = os.path.realpath(child) + parent = os.path.realpath(parent) + return child == parent or child.startswith(parent.rstrip("/") + "/") + + # Only block overlap with the actual data dirs, not with MORPH_HOME itself. + protected = {"GETH_DB_DIR": geth_db_dir, "NODE_DB_DIR": node_db_dir} + for var, path in protected.items(): + if not path: + continue + if _is_subpath(path, work_base) or _is_subpath(work_base, path): + print( + f"ERROR: SNAPSHOT_WORK_DIR={work_base!r} overlaps with {var}={path!r}.\n" + f" SNAPSHOT_WORK_DIR must be a dedicated directory outside all data paths.", + file=sys.stderr, + ) + sys.exit(1) + # SNAPSHOT_PREFIX allows different snapshot types to coexist: # e.g. "snapshot", "mpt-snapshot", "full-snapshot" snapshot_prefix = os.environ.get("SNAPSHOT_PREFIX", "snapshot") @@ -141,32 +177,102 @@ def main() -> None: # ── Step 1: Stop services ───────────────────────────────────────────── print("\n[1/6] Stopping services...") run(["pm2", "stop", "morph-node"]) + services_stopped = True # morph-node is down; exception handler must restart it + + # Stop morph-geth cleanly so geth can flush the snapshot journal to disk + # (BlockChain.Stop → snaps.Journal) before prune or copy begins. + # pm2 stop sends SIGTERM but returns immediately — geth may still be running. + # Poll the geth LOCK file: it exists as long as geth holds the datadir lock, + # and disappears only when the process has fully exited. + # + # IMPORTANT: For GETH_PRUNE=true to work, morph-geth must have + # kill_timeout: 120000 in its ecosystem.config.js so PM2 does not + # SIGKILL geth before the snapshot journal is written to disk. + geth_lock = os.path.join(geth_db_dir, "geth", "LOCK") + print(" Stopping morph-geth (waiting for LOCK file to disappear, up to 120s)...") run(["pm2", "stop", "morph-geth"]) - services_stopped = True - time.sleep(10) + for i in range(120): + if not os.path.exists(geth_lock): + print(f" morph-geth exited after {i}s") + break + time.sleep(1) + else: + print(" WARNING: geth LOCK file still present after 120s — proceeding anyway") + print("✅ Services stopped") + # ── Step 1.5: Optional prune (full node only) ───────────────────────── + # Set GETH_PRUNE=true in snapshot.env to run `geth snapshot prune-state` + # before copying data. Leave unset (or false) for archive nodes. + geth_bin = os.environ.get("GETH_BIN") or "geth" + if os.environ.get("GETH_PRUNE", "").lower() in ("1", "true", "yes"): + print("\n[1.5/6] Pruning geth state (may take a while)...") + run([geth_bin, "snapshot", "prune-state", "--datadir", geth_db_dir]) + print("✅ Prune complete") + else: + print("\n[1.5/6] Skipping prune (GETH_PRUNE not set)") + # ── Step 2: Create snapshot ─────────────────────────────────────────── print("\n[2/6] Creating snapshot...") - if os.path.exists(work_dir): - shutil.rmtree(work_dir) + named_dir = os.path.join(work_base, snapshot_name) + for d in [work_dir, named_dir]: + if os.path.exists(d): + shutil.rmtree(d) os.makedirs(work_dir) - shutil.copytree(geth_db_dir, os.path.join(work_dir, "geth")) - shutil.copytree(node_db_dir, os.path.join(work_dir, "data")) - print(f"Compressing to {snapshot_file}...") - run(["tar", "-czf", snapshot_file, "-C", work_dir, "."]) - shutil.rmtree(work_dir) + # geth: only chaindata is needed for a snapshot + geth_src = os.path.join(geth_db_dir, "geth", "chaindata") + geth_dst = os.path.join(work_dir, "geth", "chaindata") + print(f" Copying geth chaindata: {geth_src} (may take a while...)") + shutil.copytree(geth_src, geth_dst) + geth_size = subprocess.check_output(["du", "-sh", geth_dst]).decode().split()[0] + print(f" ✅ geth chaindata copied: {geth_size}") + + # node: only the 5 essential db directories + node_dst = os.path.join(work_dir, "data") + os.makedirs(node_dst) + for db in ["blockstore.db", "cs.wal", "state.db", "tx_index.db", "evidence.db"]: + src = os.path.join(node_db_dir, db) + dst = os.path.join(node_dst, db) + print(f" Copying {db}...") + shutil.copytree(src, dst) + node_size = subprocess.check_output(["du", "-sh", node_dst]).decode().split()[0] + print(f" ✅ node data copied: {node_size}") + + # Rename staging/ to snapshot_name so the tar extracts to a named directory. + os.rename(work_dir, named_dir) + + print(f" Compressing to {snapshot_file} (may take a while...)") + run(["tar", "-czf", snapshot_file, "-C", work_base, snapshot_name]) + shutil.rmtree(named_dir) size = subprocess.check_output(["du", "-sh", snapshot_file]).decode().split()[0] print(f"✅ Snapshot created: {size}") # ── Step 3: Upload to S3 ────────────────────────────────────────────── print("\n[3/6] Uploading to S3...") - s3_key = f"{environment}/{snapshot_name}.tar.gz" - run(["aws", "s3", "cp", snapshot_file, f"s3://{s3_bucket}/{s3_key}", "--no-progress"]) + + # Compute SHA256 of the archive for integrity verification. + print(f" Computing SHA256 of {snapshot_file} (may take a while...)") + sha256 = hashlib.sha256() + with open(snapshot_file, "rb") as f: + for chunk in iter(lambda: f.read(8 * 1024 * 1024), b""): + sha256.update(chunk) + sha256_hex = sha256.hexdigest() + sha256_file = snapshot_file + ".sha256" + archive_basename = os.path.basename(snapshot_file) + with open(sha256_file, "w") as f: + f.write(f"{sha256_hex} {archive_basename}\n") + print(f" SHA256: {sha256_hex}") + + s3_key = f"{environment}/{snapshot_name}.tar.gz" + s3_sha256_key = f"{environment}/{snapshot_name}.tar.gz.sha256" + run(["aws", "s3", "cp", snapshot_file, f"s3://{s3_bucket}/{s3_key}"]) + run(["aws", "s3", "cp", sha256_file, f"s3://{s3_bucket}/{s3_sha256_key}"]) print(f"✅ Uploaded: s3://{s3_bucket}/{s3_key}") + print(f"✅ Uploaded: s3://{s3_bucket}/{s3_sha256_key}") os.remove(snapshot_file) - print(f"✅ Removed local snapshot file: {snapshot_file}") + os.remove(sha256_file) + print(f"✅ Removed local snapshot and sha256 files") # ── Step 4: Start geth, collect base_height ─────────────────────────── print("\n[4/6] Starting morph-geth and collecting base_height...") @@ -184,15 +290,39 @@ def main() -> None: # ── Step 6: Update README via GitHub API ────────────────────────────── print("\n[6/6] Updating README snapshot table...") - run([sys.executable, str(REPO_DIR / "ops" / "snapshot" / "update_metadata.py")]) - - except Exception as e: - print(f"\nERROR: {e}", file=sys.stderr) + print("\n" + "─" * 60) + print(" Snapshot summary (use this to create PR manually if step 6 fails):") + print(f" ENVIRONMENT = {environment}") + print(f" SNAPSHOT_NAME = {snapshot_name}") + print(f" BASE_HEIGHT = {base_height}") + print(f" S3_KEY = s3://{s3_bucket}/{s3_key}") + print(f" SHA256 = {sha256_hex}") + print(" l1_msg_start_height and derivation_start_height will be") + print(" printed by update_metadata.py — check log if PR creation fails.") + print("─" * 60 + "\n") + run([sys.executable, str(SCRIPT_DIR / "update_metadata.py")]) + + if os.path.exists(work_base): + shutil.rmtree(work_base) + + from update_readme import write_metric # noqa: E402 + write_metric(1, environment, snapshot_name) + + except (Exception, KeyboardInterrupt) as e: + if isinstance(e, KeyboardInterrupt): + print("\nInterrupted (SIGINT received).", file=sys.stderr) + else: + print(f"\nERROR: {e}", file=sys.stderr) if services_stopped: print("Recovering services...") run(["pm2", "start", "morph-geth"], check=False) run(["pm2", "start", "morph-node"], check=False) print("Services recovered.") + try: + from update_readme import write_metric # noqa: E402 + write_metric(0, environment, snapshot_name) + except Exception: + pass sys.exit(1) print(f"\n=== Done at {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')} ===") diff --git a/ops/snapshot/update_metadata.py b/ops/snapshot/update_metadata.py index 41f8be7..e2c235d 100644 --- a/ops/snapshot/update_metadata.py +++ b/ops/snapshot/update_metadata.py @@ -42,7 +42,6 @@ import base64 import json import os -import re import sys import urllib.error import urllib.request @@ -72,7 +71,10 @@ def _http_request(req: urllib.request.Request, url: str) -> dict: def http_get(url: str, token: str = "") -> dict: - headers = {"Accept": "application/json"} + headers = { + "Accept": "application/json", + "User-Agent": "Mozilla/5.0 (compatible; morph-snapshot/1.0)", + } if token: headers["Authorization"] = f"Bearer {token}" return _http_request(urllib.request.Request(url, headers=headers), url) @@ -124,12 +126,22 @@ def get(path): l1_data = get(f"/v1/batch/l1_msg_start_height/{base_height}") deriv_data = get(f"/v1/batch/derivation_start_height/{base_height}") - if "l1_msg_start_height" not in l1_data: + # API may return a plain number or a dict like {"l1_msg_start_height": N} + if isinstance(l1_data, (int, float)): + l1_msg_height = str(int(l1_data)) + elif isinstance(l1_data, dict) and "l1_msg_start_height" in l1_data: + l1_msg_height = str(l1_data["l1_msg_start_height"]) + else: raise RuntimeError(f"Unexpected indexer response for l1_msg_start_height: {l1_data}") - if "derivation_start_height" not in deriv_data: + + if isinstance(deriv_data, (int, float)): + deriv_height = str(int(deriv_data)) + elif isinstance(deriv_data, dict) and "derivation_start_height" in deriv_data: + deriv_height = str(deriv_data["derivation_start_height"]) + else: raise RuntimeError(f"Unexpected indexer response for derivation_start_height: {deriv_data}") - return str(l1_data["l1_msg_start_height"]), str(deriv_data["derivation_start_height"]) + return l1_msg_height, deriv_height # ── GitHub API ──────────────────────────────────────────────────────────────── @@ -158,22 +170,20 @@ def gh_branch_exists(repo: str, branch: str, token: str) -> bool: def resolve_snapshot_name(repo: str, environment: str, snapshot_name: str, token: str) -> str: - """Return a snapshot_name whose branch does not yet exist on GitHub. - - Increments the trailing -N suffix until a free branch is found, so that - snapshot_name, S3 key, README row, and branch name all stay in sync. + """Verify that the snapshot branch does not already exist on GitHub. - e.g. snapshot-20260309-1 → snapshot-20260309-2 if the -1 branch exists. + Each date should have exactly one snapshot. If the branch for snapshot_name + already exists it means today's snapshot was already created — raise an error + instead of silently creating a -2 duplicate. """ - base_name = re.sub(r"-\d+$", "", snapshot_name) - counter = 1 - candidate = f"{base_name}-{counter}" - while gh_branch_exists(repo, f"snapshot/{environment}-{candidate}", token): - counter += 1 - candidate = f"{base_name}-{counter}" - if candidate != snapshot_name: - print(f" Branch for {snapshot_name} already exists → using {candidate}") - return candidate + branch = f"snapshot/{environment}-{snapshot_name}" + if gh_branch_exists(repo, branch, token): + raise RuntimeError( + f"Branch '{branch}' already exists on GitHub.\n" + f" Today's snapshot ({snapshot_name}) has already been created.\n" + f" If you need to re-run, delete the branch first or use a different SNAPSHOT_PREFIX." + ) + return snapshot_name def gh_create_branch(repo: str, branch: str, sha: str, token: str) -> None: @@ -226,6 +236,8 @@ def apply_readme_update(content: str, environment: str, snapshot_name: str, # ── Main ────────────────────────────────────────────────────────────────────── def main() -> None: + from update_readme import write_metric # noqa: E402 + dry_run = os.environ.get("DRY_RUN", "0") == "1" # Validate required env vars @@ -250,52 +262,59 @@ def main() -> None: file=sys.stderr) sys.exit(1) - # ── Step 1: metadata ────────────────────────────────────────────────────── - l1_msg_height = os.environ.get("L1_MSG_HEIGHT", "") - deriv_height = os.environ.get("DERIV_HEIGHT", "") - - if l1_msg_height and deriv_height: - print(f"\n[1/3] Using provided metadata (API call skipped):") - else: - print(f"\n[1/3] Fetching metadata from indexer (base_height={base_height}) ...") - l1_msg_height, deriv_height = fetch_metadata(environment, base_height) - - print(f" l1_msg_start_height = {l1_msg_height}") - print(f" derivation_start_height = {deriv_height}") - - if dry_run: - print("\n[DRY RUN] Skipping README update and PR creation.") - print(f" Would insert: env={environment} snapshot={snapshot_name}") - print(f" base={base_height} l1_msg={l1_msg_height} deriv={deriv_height}") - return - - # ── Step 2: update README in memory, push via GitHub API ───────────────── - print(f"\n[2/3] Updating README via GitHub API ...") - current_content, blob_sha = gh_get_file(repo, readme_path, token) - updated_content = apply_readme_update( - current_content, environment, snapshot_name, deriv_height, l1_msg_height, base_height - ) - - branch = f"snapshot/{environment}-{snapshot_name}" - commit_msg = f"snapshot: add {snapshot_name} ({environment})" - main_sha = gh_get_main_sha(repo, token) - - gh_create_branch(repo, branch, main_sha, token) - gh_update_file(repo, readme_path, updated_content, blob_sha, branch, commit_msg, token) - - # ── Step 3: open PR ─────────────────────────────────────────────────────── - print(f"\n[3/3] Creating PR ...") - pr_body = ( - f"Auto-generated by snapshot workflow.\n\n" - f"- Environment: `{environment}`\n" - f"- Snapshot: `{snapshot_name}`\n" - f"- L2 Base Height: `{base_height}`\n" - f"- L1 Msg Start Height: `{l1_msg_height}`\n" - f"- Derivation Start Height: `{deriv_height}`" - ) - pr_url = gh_create_pr(repo, branch, commit_msg, pr_body, token) - - print(f"\n✅ Done. PR opened: {pr_url}") + try: + # ── Step 1: metadata ────────────────────────────────────────────────── + l1_msg_height = os.environ.get("L1_MSG_HEIGHT", "") + deriv_height = os.environ.get("DERIV_HEIGHT", "") + + if l1_msg_height and deriv_height: + print(f"\n[1/3] Using provided metadata (API call skipped):") + else: + print(f"\n[1/3] Fetching metadata from indexer (base_height={base_height}) ...") + l1_msg_height, deriv_height = fetch_metadata(environment, base_height) + + print(f" l1_msg_start_height = {l1_msg_height}") + print(f" derivation_start_height = {deriv_height}") + + if dry_run: + print("\n[DRY RUN] Skipping README update and PR creation.") + print(f" Would insert: env={environment} snapshot={snapshot_name}") + print(f" base={base_height} l1_msg={l1_msg_height} deriv={deriv_height}") + return + + # ── Step 2: update README in memory, push via GitHub API ───────────── + print(f"\n[2/3] Updating README via GitHub API ...") + current_content, blob_sha = gh_get_file(repo, readme_path, token) + updated_content = apply_readme_update( + current_content, environment, snapshot_name, deriv_height, l1_msg_height, base_height + ) + + branch = f"snapshot/{environment}-{snapshot_name}" + commit_msg = f"snapshot: add {snapshot_name} ({environment})" + main_sha = gh_get_main_sha(repo, token) + + gh_create_branch(repo, branch, main_sha, token) + gh_update_file(repo, readme_path, updated_content, blob_sha, branch, commit_msg, token) + + # ── Step 3: open PR ─────────────────────────────────────────────────── + print(f"\n[3/3] Creating PR ...") + pr_body = ( + f"Auto-generated by snapshot workflow.\n\n" + f"- Environment: `{environment}`\n" + f"- Snapshot: `{snapshot_name}`\n" + f"- L2 Base Height: `{base_height}`\n" + f"- L1 Msg Start Height: `{l1_msg_height}`\n" + f"- Derivation Start Height: `{deriv_height}`" + ) + pr_url = gh_create_pr(repo, branch, commit_msg, pr_body, token) + + print(f"\n✅ Done. PR opened: {pr_url}") + write_metric(1, environment, snapshot_name) + + except Exception as e: + print(f"\nERROR: {e}", file=sys.stderr) + write_metric(0, environment, snapshot_name) + sys.exit(1) if __name__ == "__main__": From 8079e89cf0551fdf48c0ce2a0ddfaf749e397931 Mon Sep 17 00:00:00 2001 From: corey Date: Fri, 3 Apr 2026 15:44:59 +0800 Subject: [PATCH 5/5] feat: auto-update env files (snapshot name + heights) alongside README update_metadata.py now also updates morph-node/.env or .env_hoodi: - Snapshot name variable (MAINNET_MPT_SNAPSHOT_NAME / HOODI_MPT_SNAPSHOT_NAME) - DERIVATION_START_HEIGHT, L1_MSG_START_HEIGHT, L2_BASE_HEIGHT Both changes are pushed to the same PR branch. Also remove holesky from supported environments (no longer maintained). Co-Authored-By: Claude Opus 4.6 --- ops/snapshot/update_metadata.py | 63 ++++++++++++++++++++++++++++----- ops/snapshot/update_readme.py | 3 +- 2 files changed, 56 insertions(+), 10 deletions(-) diff --git a/ops/snapshot/update_metadata.py b/ops/snapshot/update_metadata.py index e2c235d..25f1a0e 100644 --- a/ops/snapshot/update_metadata.py +++ b/ops/snapshot/update_metadata.py @@ -7,7 +7,8 @@ 1. Queries the internal explorer-indexer API for l1_msg_start_height and derivation_start_height. 2. Fetches README.md content from GitHub, applies the table update in memory. - 3. Creates a new branch, pushes the updated file, and opens a PR — + 3. Fetches morph-node/.env (or .env_hoodi) and updates snapshot name + heights. + 4. Creates a new branch, pushes both updated files, and opens a PR — all via GitHub REST API (no git or gh CLI required). Environment variables: @@ -42,6 +43,7 @@ import base64 import json import os +import re import sys import urllib.error import urllib.request @@ -51,7 +53,16 @@ INDEXER_BASE_URLS = { "mainnet": "https://explorer-indexer.morphl2.io", "hoodi": "https://explorer-indexer-hoodi.morphl2.io", - "holesky": "https://explorer-indexer-holesky.morphl2.io", +} + +ENV_FILE_MAP = { + "mainnet": "morph-node/.env", + "hoodi": "morph-node/.env_hoodi", +} + +SNAPSHOT_VAR_MAP = { + "mainnet": {"mpt": "MAINNET_MPT_SNAPSHOT_NAME", "zk": "MAINNET_SNAPSHOT_NAME"}, + "hoodi": {"mpt": "HOODI_MPT_SNAPSHOT_NAME", "zk": "HOODI_SNAPSHOT_NAME"}, } SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) @@ -215,6 +226,26 @@ def gh_create_pr(repo: str, branch: str, title: str, body: str, token: str) -> s }, token) return data["html_url"] +# ── Env file update (in-memory) ─────────────────────────────────────────────── + +def apply_env_update(content: str, environment: str, snapshot_name: str, + deriv_height: str, l1_msg_height: str, base_height: str) -> str: + """Update snapshot name and validator heights in a .env file.""" + is_mpt = snapshot_name.startswith("mpt-") + snapshot_var = SNAPSHOT_VAR_MAP[environment]["mpt" if is_mpt else "zk"] + + def replace(var, value): + return re.sub(rf'^({re.escape(var)}=).*$', rf'\g<1>{value}', content, flags=re.MULTILINE) + + content = replace(snapshot_var, snapshot_name) + content = replace("DERIVATION_START_HEIGHT", deriv_height) + content = replace("L1_MSG_START_HEIGHT", l1_msg_height) + if is_mpt: + content = replace("L2_BASE_HEIGHT", base_height) + + return content + + # ── README update (in-memory) ───────────────────────────────────────────────── def build_new_row(environment: str, snapshot_name: str, @@ -277,13 +308,16 @@ def main() -> None: print(f" derivation_start_height = {deriv_height}") if dry_run: - print("\n[DRY RUN] Skipping README update and PR creation.") - print(f" Would insert: env={environment} snapshot={snapshot_name}") + print("\n[DRY RUN] Skipping README/env update and PR creation.") + print(f" Would update: env={environment} snapshot={snapshot_name}") print(f" base={base_height} l1_msg={l1_msg_height} deriv={deriv_height}") + env_path = ENV_FILE_MAP.get(environment) + if env_path: + print(f" Would update: {env_path}") return - # ── Step 2: update README in memory, push via GitHub API ───────────── - print(f"\n[2/3] Updating README via GitHub API ...") + # ── Step 2: update README + env file, push via GitHub API ──────────── + print(f"\n[2/4] Updating README via GitHub API ...") current_content, blob_sha = gh_get_file(repo, readme_path, token) updated_content = apply_readme_update( current_content, environment, snapshot_name, deriv_height, l1_msg_height, base_height @@ -296,8 +330,21 @@ def main() -> None: gh_create_branch(repo, branch, main_sha, token) gh_update_file(repo, readme_path, updated_content, blob_sha, branch, commit_msg, token) - # ── Step 3: open PR ─────────────────────────────────────────────────── - print(f"\n[3/3] Creating PR ...") + # ── Step 3: update env file ─────────────────────────────────────────── + env_path = ENV_FILE_MAP.get(environment) + if env_path: + print(f"\n[3/4] Updating {env_path} ...") + env_content, env_blob_sha = gh_get_file(repo, env_path, token) + updated_env = apply_env_update( + env_content, environment, snapshot_name, deriv_height, l1_msg_height, base_height + ) + env_commit_msg = f"snapshot: update {env_path} for {snapshot_name} ({environment})" + gh_update_file(repo, env_path, updated_env, env_blob_sha, branch, env_commit_msg, token) + else: + print(f"\n[3/4] No env file mapping for {environment}, skipping.") + + # ── Step 4: open PR ─────────────────────────────────────────────────── + print(f"\n[4/4] Creating PR ...") pr_body = ( f"Auto-generated by snapshot workflow.\n\n" f"- Environment: `{environment}`\n" diff --git a/ops/snapshot/update_readme.py b/ops/snapshot/update_readme.py index 006162b..a1ee1c2 100644 --- a/ops/snapshot/update_readme.py +++ b/ops/snapshot/update_readme.py @@ -6,7 +6,7 @@ Inserts a new row at the TOP of the target environment's snapshot table. Environment variables: - ENVIRONMENT - mainnet | hoodi | holesky + ENVIRONMENT - mainnet | hoodi SNAPSHOT_NAME - e.g. snapshot-20260225-1 BASE_HEIGHT - L2 geth block height (L2 Base Height) L1_MSG_HEIGHT - l1_msg_start_height from indexer API @@ -31,7 +31,6 @@ SECTION_MARKERS = { "mainnet": "**For mainnet**", "hoodi": "**For hoodi testnet**", - "holesky": "**For holesky testnet(legacy)**", } METRICS_FILE = os.environ.get("METRICS_FILE", "/tmp/morph_snapshot_metrics.prom")