diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..2ebd449 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,95 @@ +# run-morph-node + +Morph L2 node operator toolkit. Provides Docker-based setup for running Morph nodes (standard and MPT), plus automated snapshot creation and distribution infrastructure. + +## Project Structure + +``` +morph-node/ # Docker Compose setup for running a Morph node + Makefile # All node operations (run/stop/download-snapshot) + docker-compose.yml # Service definitions: geth + node/validator + .env # Mainnet config + .env_holesky # Holesky testnet config + .env_hoodi # Hoodi testnet config + .env_mpt # MPT-specific overrides (loaded on top of env) + entrypoint-geth.sh # Standard geth startup script + entrypoint-geth-mpt.sh # MPT geth startup script (--morph-mpt flag) + +ops/snapshot/ # Snapshot automation scripts (runs on server via cron/pm2) + snapshot_make.py # Entry point: stop → snapshot → S3 upload → restart → update README + update_metadata.py # Fetches indexer API data, creates branch + PR via GitHub API + update_readme.py # In-memory README table insertion logic + Prometheus metrics + metrics_server.py # HTTP server exposing snapshot metrics on :6060/metrics + snapshot.env.example # Configuration reference + +mainnet/hoodi/holesky/ # Chain genesis and config files (static, do not modify) +``` + +## Environments + +| Environment | Makefile prefix | Snapshot CDN | +|-------------|-----------------|--------------| +| Mainnet | (no prefix) | snapshot.morphl2.io/mainnet | +| Hoodi testnet | `-hoodi` | snapshot.morphl2.io/hoodi | +| Holesky testnet (legacy) | `-holesky` | snapshot.morphl2.io/holesky | + +MPT variants use an additional `--env-file .env_mpt` overlay. + +## Common Operations + +```bash +# Run a node +cd morph-node +make run-hoodi-node # hoodi standard +make run-mainnet-mpt-node # mainnet MPT + +# Download snapshot +make download-and-decompress-hoodi-snapshot +make download-and-decompress-mainnet-mpt-snapshot + +# Stop +make stop-node +make stop-validator +``` + +## Snapshot Automation + +Runs on a server via pm2, triggered on configured days of the month. + +**Full flow:** +1. Stop morph-node + morph-geth (pm2) +2. tar geth-data + node-data → upload to S3 +3. Restart geth → wait for RPC → collect `base_height` +4. Restart morph-node +5. Query indexer API for `l1_msg_start_height` and `derivation_start_height` +6. Push updated README row + open PR via GitHub API (no git CLI needed) + +**Configuration:** Copy `ops/snapshot/snapshot.env.example` to the server data directory, fill in `S3_BUCKET`, `GH_TOKEN`, `MORPH_HOME`, `GITHUB_REPOSITORY`. + +**Dry run (safe, no writes):** +```bash +DRY_RUN=1 ENVIRONMENT=mainnet SNAPSHOT_NAME=test-1 BASE_HEIGHT=123 \ + L1_MSG_HEIGHT=456 DERIV_HEIGHT=789 python3 ops/snapshot/update_metadata.py +``` + +## Code Conventions + +- **Python**: stdlib only (no third-party deps), Python 3.9+. Use `urllib.request` for HTTP, not `requests`. +- **Shell scripts**: POSIX sh (`#!/bin/sh`), not bash. Use `set -e` for error handling. +- **Makefile**: Use `define`/`call` macros for repeated patterns. Always check for required tools before running. +- **Error handling**: Scripts must recover services if stopped (see `try/finally` pattern in `snapshot_make.py`). +- **Environment config**: Never hardcode paths or credentials. Always read from env vars with sensible defaults. + +## Security + +- **Never commit** `.env`, `snapshot.env`, or any file containing `GH_TOKEN`, `S3_BUCKET`, or AWS credentials. +- `GH_TOKEN` must be a Fine-grained PAT with only `Contents: Read/Write` and `Pull requests: Read/Write`. +- Snapshot automation opens PRs — it never merges directly to main. +- `jwt-secret.txt` is generated locally and never committed. + +## Git Conventions + +- Branch naming: `feat/`, `fix/`, `docs/` prefixes +- Snapshot automation branches: `snapshot/{environment}-{snapshot-name}` +- PRs require at least 1 approval before merging to main +- Commits are GPG-signed (configured in `~/.gitconfig`) diff --git a/morph-node/.env_mpt b/morph-node/.env_mpt new file mode 100644 index 0000000..1defda6 --- /dev/null +++ b/morph-node/.env_mpt @@ -0,0 +1,7 @@ +# MPT specific overrides (loaded after base env to override values) +GETH_ENTRYPOINT_FILE=./entrypoint-geth-mpt.sh +MPT_FORK_TIME=2000000000000 + +# MPT snapshot names +HOODI_MPT_SNAPSHOT_NAME=snapshot-20260211-1 +MAINNET_MPT_SNAPSHOT_NAME=snapshot-20260211-1 diff --git a/morph-node/Makefile b/morph-node/Makefile index b38471c..2aef9ad 100644 --- a/morph-node/Makefile +++ b/morph-node/Makefile @@ -12,6 +12,8 @@ JWT_SECRET_FILE_HOLESKY := $(JWT_SECRET_FILE) JWT_SECRET_FILE_HOODI := $(JWT_SECRET_FILE) +include .env_mpt + generate-jwt: @[ -f $(JWT_SECRET_FILE_MAINNET) ] || (echo "Generating $(JWT_SECRET_FILE_MAINNET)..." && openssl rand -hex 32 > $(JWT_SECRET_FILE_MAINNET) && echo "$(JWT_SECRET_FILE_MAINNET) created.") @@ -31,6 +33,12 @@ run-holesky-node: generate-jwt-holesky run-hoodi-node: generate-jwt-hoodi docker-compose --env-file .env_hoodi up node & +run-hoodi-mpt-node: generate-jwt-hoodi + docker-compose --env-file .env_hoodi --env-file .env_mpt up node & + +run-mainnet-mpt-node: generate-jwt + docker-compose --env-file .env --env-file .env_mpt up node & + stop-node: docker stop morph-node morph-geth @@ -47,6 +55,12 @@ run-holesky-validator: generate-jwt-holesky run-hoodi-validator: generate-jwt-hoodi docker-compose --env-file .env_hoodi up validator & +run-hoodi-mpt-validator: generate-jwt-hoodi + docker-compose --env-file .env_hoodi --env-file .env_mpt up validator & + +run-mainnet-mpt-validator: generate-jwt + docker-compose --env-file .env --env-file .env_mpt up validator & + stop-validator: docker stop validator-node morph-geth @@ -93,6 +107,11 @@ download-and-decompress-hoodi-snapshot: download-and-decompress-mainnet-snapshot: $(call download-and-decompress,$(MAINNET_SNAPSHOT_NAME),https://snapshot.morphl2.io/mainnet) +download-and-decompress-hoodi-mpt-snapshot: + $(call download-and-decompress,$(HOODI_MPT_SNAPSHOT_NAME),https://snapshot.morphl2.io/hoodi) + +download-and-decompress-mainnet-mpt-snapshot: + $(call download-and-decompress,$(MAINNET_MPT_SNAPSHOT_NAME),https://snapshot.morphl2.io/mainnet) diff --git a/morph-node/docker-compose.yml b/morph-node/docker-compose.yml index 72027f8..3225d7f 100644 --- a/morph-node/docker-compose.yml +++ b/morph-node/docker-compose.yml @@ -3,7 +3,7 @@ version: '3.8' services: geth: container_name: morph-geth - image: ghcr.io/morph-l2/go-ethereum:2.1.1 + image: ghcr.io/morph-l2/go-ethereum:2.1.2 restart: unless-stopped ports: - "8545:8545" @@ -26,7 +26,7 @@ services: depends_on: geth: condition: service_started - image: ghcr.io/morph-l2/node:0.4.10 + image: ghcr.io/morph-l2/node:0.4.11 restart: unless-stopped ports: - "26656" @@ -53,7 +53,7 @@ services: depends_on: geth: condition: service_started - image: ghcr.io/morph-l2/node:0.4.10 + image: ghcr.io/morph-l2/node:0.4.11 ports: - "26660" environment: diff --git a/morph-node/entrypoint-geth-mpt.sh b/morph-node/entrypoint-geth-mpt.sh new file mode 100644 index 0000000..ecaf768 --- /dev/null +++ b/morph-node/entrypoint-geth-mpt.sh @@ -0,0 +1,35 @@ +#!/bin/sh + +if [ ! -f /jwt-secret.txt ]; then + echo "Error: jwt-secret.txt not found. Please create it before starting the service." + exit 1 +fi + +MORPH_FLAG=${MORPH_FLAG:-"morph"} + +COMMAND="geth \ +--$MORPH_FLAG \ +--morph-mpt +--datadir="./db" \ +--verbosity=3 \ +--http \ +--http.corsdomain="*" \ +--http.vhosts="*" \ +--http.addr=0.0.0.0 \ +--http.port=8545 \ +--http.api=web3,debug,eth,txpool,net,morph,engine,admin \ +--ws \ +--ws.addr=0.0.0.0 \ +--ws.port=8546 \ +--ws.origins="*" \ +--ws.api=web3,debug,eth,txpool,net,morph,engine,admin \ +--authrpc.addr=0.0.0.0 \ +--authrpc.port=8551 \ +--authrpc.vhosts="*" \ +--authrpc.jwtsecret="./jwt-secret.txt" \ +--gcmode=archive \ +--log.filename=./db/geth.log \ +--metrics \ +--metrics.addr=0.0.0.0" + +eval $COMMAND diff --git a/ops/snapshot/README.md b/ops/snapshot/README.md new file mode 100644 index 0000000..926bf8e --- /dev/null +++ b/ops/snapshot/README.md @@ -0,0 +1,206 @@ +# Snapshot Automation + +> 中文版请见 [README.zh.md](./README.zh.md) + +Automatically creates a node snapshot every two weeks and syncs the relevant parameters to README.md for users to download. + +## Background + +Manually creating snapshots is error-prone and tedious. This solution automates the entire process using a server-side cron job and the GitHub REST API — no GitHub Actions or git CLI required. + +## Directory Structure + +``` +run-morph-node/ +├── README.md # snapshot table is updated here +└── ops/snapshot/ + ├── README.md # this document + ├── README.zh.md # Chinese version + ├── snapshot.env.example # environment variable template (copy one per environment) + ├── ecosystem.config.js.example # PM2 process config template + ├── snapshot_make.py # entry point: stop → snapshot → upload → restart → update README + ├── update_metadata.py # fetches indexer API data and orchestrates the full update flow + ├── update_readme.py # pure table-update logic (imported by update_metadata.py) + ├── metrics_server.py # persistent HTTP server exposing metrics on :6060/metrics + └── list_snapshots.py # utility to list uploaded snapshots in S3 +``` + +## Workflow + +``` +Server cron job (1st and 15th of each month) + │ + ▼ + ops/snapshot/snapshot_make.py + [1] stop morph-node, morph-geth + [2] copy chain data: + - geth: chaindata only → snapshot/geth/chaindata/ + - node: blockstore.db, cs.wal, state.db, tx_index.db, evidence.db → snapshot/data/ + [3] compress → upload to S3 as {environment}/{snapshot_name}.tar.gz + [4] restart morph-geth → wait for RPC → collect base_height + [5] restart morph-node + [6] call update_metadata.py + │ BASE_HEIGHT, SNAPSHOT_NAME + ▼ + python3 update_metadata.py + ┌─────────────────────────────────────────────────────┐ + │ 1. call explorer-indexer API: │ + │ GET /v1/batch/l1_msg_start_height/ │ + │ GET /v1/batch/derivation_start_height/│ + │ 2. fetch README.md content via GitHub API │ + │ 3. insert new snapshot row at top of table │ + │ 4. create branch + push updated file via GitHub API │ + │ 5. open PR via GitHub API │ + └─────────────────────────────────────────────────────┘ +``` + +## Triggers + +| Method | Description | +|---|---| +| Scheduled | Server cron job on the 1st and 15th of each month | +| Manual | SSH into the server and run `snapshot_make.py` directly | + +## Multi-environment Support + +| Environment | Default Indexer API | Override | +|---|---|---| +| mainnet | `https://explorer-indexer.morphl2.io` | `EXPLORER_INDEXER_URL` | +| hoodi | `https://explorer-indexer-hoodi.morphl2.io` | `EXPLORER_INDEXER_URL` | +| holesky | `https://explorer-indexer-holesky.morphl2.io` | `EXPLORER_INDEXER_URL` | + +Each environment runs its own cron job with its own env file. S3 paths and README table sections are automatically scoped by environment. + +Set `EXPLORER_INDEXER_URL` to an internal/intranet URL if the default public endpoint is not reachable from the node server. + +## Deployment + +### 1. Copy Scripts to the Node Server + +The node server does not require git. Copy the scripts manually: + +```bash +# copy all scripts to the data directory of each environment +scp ops/snapshot/*.py user@server:/data/morph-hoodi/ +``` + +### 2. Create the Environment File + +Copy the template into the environment's data directory and fill in the values: + +```bash +cp ops/snapshot/snapshot.env.example /data/morph-hoodi/snapshot.env +vi /data/morph-hoodi/snapshot.env +``` + +All available variables are documented in [`snapshot.env.example`](./snapshot.env.example). + +> ⚠️ **`SNAPSHOT_WORK_DIR` must NOT be set to `MORPH_HOME` or any data directory.** +> The script deletes and recreates this directory at startup. Setting it incorrectly will cause data loss. +> Use a dedicated subdirectory, e.g. `SNAPSHOT_WORK_DIR=/data/morph-hoodi/snapshot_work`. + +These files must **not** be committed to git (add `*.env` to `.gitignore`). + +Also recommended: enable **"Automatically delete head branches"** under repo Settings → General. Branches will be deleted automatically after a PR is merged. + +### 3. Configure the Scheduled Job (PM2) + +Copy the ecosystem template and edit `ENV_FILE` and `script` path for your environment: + +```bash +cp ops/snapshot/ecosystem.config.js.example /data/morph-hoodi/ecosystem.config.js +vi /data/morph-hoodi/ecosystem.config.js +``` + +Start and persist: + +```bash +pm2 start /data/morph-hoodi/ecosystem.config.js +pm2 save +``` + +To trigger manually for testing: + +```bash +cd /data/morph-hoodi +nohup env ENV_FILE=/data/morph-hoodi/snapshot.env python3 /data/morph-hoodi/snapshot_make.py \ + > /tmp/snapshot.log 2>&1 & +tail -f /tmp/snapshot.log +``` + +### 4. Start the Metrics Server + +Run `metrics_server.py` as a persistent pm2 process so it survives server reboots: + +```bash +pm2 startup # register pm2 itself as a system startup service (run once) +pm2 start python3 --name morph-snapshot-metrics -- /data/morph-hoodi/metrics_server.py +pm2 save +``` + +Once running, the metrics endpoint is available at `http://:6060/metrics`. + +Exposed metrics: + +| Metric | Type | Description | +|---|---|---| +| `morph_snapshot_readme_update_status` | gauge | 1 = success, 0 = failure | +| `morph_snapshot_readme_update_timestamp_seconds` | gauge | Unix timestamp of the last run | + +Labels: `environment` (mainnet / hoodi / holesky), `snapshot` (snapshot name) + +> Default metrics file path: `/tmp/morph_snapshot_metrics.prom` +> Override via the `METRICS_FILE` environment variable. + +## Listing Snapshots in S3 + +```bash +# list all snapshots +python3 list_snapshots.py + +# filter by environment +python3 list_snapshots.py --env hoodi + +# specify bucket +python3 list_snapshots.py --env hoodi --bucket my-bucket-name +``` + +## Manual Recovery + +If step 6 (README update) fails after a successful S3 upload, re-run `update_metadata.py` directly. +The snapshot summary is printed to the log before step 6 starts — use those values: + +```bash +cd /data/morph-hoodi +ENVIRONMENT=hoodi \ +SNAPSHOT_NAME=snapshot-20260312-1 \ +BASE_HEIGHT=3904561 \ +GH_TOKEN=ghp_xxx \ +GITHUB_REPOSITORY=morph-l2/run-morph-node \ +python3 /data/morph-hoodi/update_metadata.py +``` + +If the indexer API values are already known (visible in the log), skip the API call: + +```bash +cd /data/morph-hoodi +ENVIRONMENT=hoodi \ +SNAPSHOT_NAME=snapshot-20260312-1 \ +BASE_HEIGHT=3904561 \ +L1_MSG_HEIGHT=2388518 \ +DERIV_HEIGHT=2401543 \ +GH_TOKEN=ghp_xxx \ +GITHUB_REPOSITORY=morph-l2/run-morph-node \ +python3 /data/morph-hoodi/update_metadata.py +``` + +## Key Design Decisions + +- **`base_height` is collected after geth restarts**: querying the RPC after the snapshot is created and geth is started alone gives the actual block state of the snapshot. `morph-node` is started only after the height is confirmed. +- **Only essential data is included in the snapshot**: geth `chaindata` only; node data includes `blockstore.db`, `cs.wal`, `state.db`, `tx_index.db`, `evidence.db`. Lock files, node keys, and P2P peer lists are excluded. +- **Snapshot extracts to a named directory**: the tar archive extracts to `{snapshot_name}/geth/` and `{snapshot_name}/data/`, matching the archive filename. +- **Fallback recovery on failure**: if the snapshot or upload fails, `snapshot_make.py` attempts to restart both services to avoid prolonged downtime. +- **No GitHub Actions or git CLI required**: `update_metadata.py` uses the GitHub REST API directly — the server only needs Python 3.7+. +- **New entries are inserted at the top of the table**: the latest snapshot always appears in the first row. +- **Changes are merged via PR, not direct push**: a new branch is created and a PR is opened, preserving review opportunity. +- **Per-environment env files**: each environment and snapshot type has its own `snapshot.env` file, specified via `ENV_FILE`. diff --git a/ops/snapshot/README.zh.md b/ops/snapshot/README.zh.md new file mode 100644 index 0000000..cc4a058 --- /dev/null +++ b/ops/snapshot/README.zh.md @@ -0,0 +1,205 @@ +# Snapshot 自动化 + +> English version: [README.md](./README.md) + +每两周自动制作一次节点 snapshot,并将相关参数同步到 README.md 供用户下载使用。 + +## 背景 + +手动制作 snapshot 流程繁琐且容易遗漏,本方案通过服务器 cron 定时任务和 GitHub REST API 将全流程自动化,无需 GitHub Actions 或 git CLI。 + +## 目录结构 + +``` +run-morph-node/ +├── README.md # snapshot 表格在此更新 +└── ops/snapshot/ + ├── README.md # 英文文档 + ├── README.zh.md # 本文档 + ├── snapshot.env.example # 环境变量模板(每个环境复制一份填写) + ├── ecosystem.config.js.example # PM2 进程配置模板 + ├── snapshot_make.py # 入口:停服 → 制作 → 上传 → 重启 → 更新 README + ├── update_metadata.py # 查询 indexer API 并编排完整更新流程 + ├── update_readme.py # 纯表格更新逻辑(由 update_metadata.py 调用) + ├── metrics_server.py # 常驻 HTTP server,在 :6060/metrics 暴露 metrics + └── list_snapshots.py # 查询 S3 中已上传的快照列表 +``` + +## 完整流程 + +``` +服务器 cron 定时任务(每月 1 日 / 15 日) + │ + ▼ + ops/snapshot/snapshot_make.py + [1] 停止 morph-node、morph-geth + [2] 复制链数据: + - geth:仅复制 chaindata → snapshot/geth/chaindata/ + - node:blockstore.db、cs.wal、state.db、tx_index.db、evidence.db → snapshot/data/ + [3] 压缩 → 上传至 S3,路径为 {environment}/{snapshot_name}.tar.gz + [4] 重启 morph-geth → 等待 RPC 就绪 → 采集 base_height + [5] 重启 morph-node + [6] 调用 update_metadata.py + │ BASE_HEIGHT, SNAPSHOT_NAME + ▼ + python3 update_metadata.py + ┌─────────────────────────────────────────────────────┐ + │ 1. 调用 explorer-indexer API: │ + │ GET /v1/batch/l1_msg_start_height/ │ + │ GET /v1/batch/derivation_start_height/│ + │ 2. 通过 GitHub API 获取 README.md 当前内容 │ + │ 3. 在内存中插入新快照记录到表格顶部 │ + │ 4. 通过 GitHub API 建新分支并推送更新后的文件 │ + │ 5. 通过 GitHub API 开启 PR │ + └─────────────────────────────────────────────────────┘ +``` + +## 触发方式 + +| 方式 | 说明 | +|---|---| +| 定时 | 服务器 cron,每月 1 日和 15 日自动执行 | +| 手动 | SSH 登录服务器,直接执行 `snapshot_make.py` | + +## 多环境支持 + +| 环境 | 默认 Indexer API | 覆盖方式 | +|---|---|---| +| mainnet | `https://explorer-indexer.morphl2.io` | `EXPLORER_INDEXER_URL` | +| hoodi | `https://explorer-indexer-hoodi.morphl2.io` | `EXPLORER_INDEXER_URL` | +| holesky | `https://explorer-indexer-holesky.morphl2.io` | `EXPLORER_INDEXER_URL` | + +每个环境有独立的 cron 任务和 env 文件,S3 路径和 README 表格自动按环境区分。 + +如果节点服务器无法访问默认公网地址,通过 `EXPLORER_INDEXER_URL` 指定内网地址。 + +## 部署步骤 + +### 1. 将脚本复制到节点服务器 + +节点服务器不需要安装 git,直接手动复制脚本: + +```bash +scp ops/snapshot/*.py user@server:/data/morph-hoodi/ +``` + +### 2. 创建环境变量文件 + +将模板复制到对应环境的数据目录并填写配置: + +```bash +cp ops/snapshot/snapshot.env.example /data/morph-hoodi/snapshot.env +vi /data/morph-hoodi/snapshot.env +``` + +所有可配置变量及说明见 [`snapshot.env.example`](./snapshot.env.example)。 + +> ⚠️ **`SNAPSHOT_WORK_DIR` 绝对不能设置为 `MORPH_HOME` 或任何链数据目录。** +> 脚本在启动时会删除并重建该目录,配置错误会导致数据丢失。 +> 请使用独立的子目录,例如 `SNAPSHOT_WORK_DIR=/data/morph-hoodi/snapshot_work`。 + +这些文件**不可提交到 git**(在 `.gitignore` 中添加 `*.env`)。 + +同时建议在 repo Settings → General 中开启 **"Automatically delete head branches"**,PR merge 后分支自动删除,无需手动维护。 + +### 3. 配置定时任务(PM2) + +复制 ecosystem 模板,修改 `ENV_FILE` 和 `script` 路径后启动: + +```bash +cp ops/snapshot/ecosystem.config.js.example /data/morph-hoodi/ecosystem.config.js +vi /data/morph-hoodi/ecosystem.config.js +``` + +启动并持久化: + +```bash +pm2 start /data/morph-hoodi/ecosystem.config.js +pm2 save +``` + +手动触发测试: + +```bash +cd /data/morph-hoodi +nohup env ENV_FILE=/data/morph-hoodi/snapshot.env python3 /data/morph-hoodi/snapshot_make.py \ + > /tmp/snapshot.log 2>&1 & +tail -f /tmp/snapshot.log +``` + +### 4. 启动 metrics server + +在节点服务器上用 pm2 托管 `metrics_server.py`,使其随机器重启自动恢复: + +```bash +pm2 startup # 将 pm2 自身注册为系统开机服务(仅需执行一次) +pm2 start python3 --name morph-snapshot-metrics -- /data/morph-hoodi/metrics_server.py +pm2 save +``` + +启动后采集侧即可通过 `http://:6060/metrics` 拉取指标。 + +暴露的 metrics: + +| Metric | 类型 | 说明 | +|---|---|---| +| `morph_snapshot_readme_update_status` | gauge | 1 = 成功,0 = 失败 | +| `morph_snapshot_readme_update_timestamp_seconds` | gauge | 最后一次执行的 Unix 时间戳 | + +Labels:`environment`(mainnet / hoodi / holesky)、`snapshot`(快照名称) + +> 默认 metrics 文件路径:`/tmp/morph_snapshot_metrics.prom` +> 如需修改,通过环境变量 `METRICS_FILE` 传入。 + +## 查询 S3 快照列表 + +```bash +# 列出所有快照 +python3 list_snapshots.py + +# 只看某个环境 +python3 list_snapshots.py --env hoodi + +# 指定 bucket +python3 list_snapshots.py --env hoodi --bucket my-bucket-name +``` + +## 手动补跑 + +如果第 6 步(README 更新)失败,但 S3 上传已经成功,直接补跑 `update_metadata.py`。 +日志中 step 6 开始前会打印快照摘要信息,直接拿来用: + +```bash +cd /data/morph-hoodi +ENVIRONMENT=hoodi \ +SNAPSHOT_NAME=snapshot-20260312-1 \ +BASE_HEIGHT=3904561 \ +GH_TOKEN=ghp_xxx \ +GITHUB_REPOSITORY=morph-l2/run-morph-node \ +python3 /data/morph-hoodi/update_metadata.py +``` + +如果 indexer 已经查到了值(日志中可见),加上 `L1_MSG_HEIGHT` 和 `DERIV_HEIGHT` 跳过重查: + +```bash +cd /data/morph-hoodi +ENVIRONMENT=hoodi \ +SNAPSHOT_NAME=snapshot-20260312-1 \ +BASE_HEIGHT=3904561 \ +L1_MSG_HEIGHT=2388518 \ +DERIV_HEIGHT=2401543 \ +GH_TOKEN=ghp_xxx \ +GITHUB_REPOSITORY=morph-l2/run-morph-node \ +python3 /data/morph-hoodi/update_metadata.py +``` + +## 关键设计决策 + +- **base_height 在 geth 重启后采集**:snapshot 制作完成、geth 单独启动后再查询 RPC,读取的是 snapshot 实际对应的区块状态。morph-node 在确认高度后再启动。 +- **只打包必要数据**:geth 只复制 `chaindata`;node 数据只包含 `blockstore.db`、`cs.wal`、`state.db`、`tx_index.db`、`evidence.db`。lock 文件、节点密钥、P2P 节点列表均排除在外。 +- **解压后为同名目录**:tar 包解压后得到 `{snapshot_name}/geth/` 和 `{snapshot_name}/data/`,与压缩包文件名一致。 +- **失败时兜底恢复**:`snapshot_make.py` 在异常时尝试拉起两个进程,避免服务持续中断。 +- **不依赖 GitHub Actions 和 git CLI**:`update_metadata.py` 直接调用 GitHub REST API,服务器只需要 Python 3.7+。 +- **新记录插入表格顶部**:最新 snapshot 始终出现在第一行,便于用户快速找到。 +- **通过 PR 而非直接 push 合并变更**:创建新分支并开启 PR,保留 review 机会。 +- **每个环境 / 类型独立 env 文件**:通过 `ENV_FILE` 指定,各配置互不干扰。 diff --git a/ops/snapshot/ecosystem.config.js.example b/ops/snapshot/ecosystem.config.js.example new file mode 100644 index 0000000..ba18eb0 --- /dev/null +++ b/ops/snapshot/ecosystem.config.js.example @@ -0,0 +1,39 @@ +// PM2 ecosystem config for snapshot automation. +// +// Setup: +// 1. Copy this file to the node server: +// cp ops/snapshot/ecosystem.config.js.example /data/morph-hoodi/ecosystem.config.js +// 2. Edit paths and ENV_FILE to match the environment. +// 3. Start: +// pm2 start /data/morph-hoodi/ecosystem.config.js +// pm2 save + +module.exports = { + apps: [ + { + name: 'snapshot-hoodi', + script: '/data/morph-hoodi/snapshot_make.py', + interpreter: 'python3', + // 13th and 28th of each month at 15:30 CST (= 07:30 UTC) + cron_restart: '30 7 13,28 * *', + autorestart: false, + // Give morph-geth enough time to flush the snapshot journal before + // being force-killed (default is 1600ms which is too short). + // This prevents "diffs=missing" errors on the next prune-state run. + kill_timeout: 60000, + env: { + ENV_FILE: '/data/morph-hoodi/snapshot.env', + }, + }, + { + name: 'morph-snapshot-metrics', + script: '/data/morph-hoodi/metrics_server.py', + interpreter: 'python3', + autorestart: true, + env: { + METRICS_PORT: '6060', + ENVIRONMENT: 'hoodi', + }, + }, + ], +}; diff --git a/ops/snapshot/generate_sha256.py b/ops/snapshot/generate_sha256.py new file mode 100644 index 0000000..b5b312b --- /dev/null +++ b/ops/snapshot/generate_sha256.py @@ -0,0 +1,189 @@ +#!/usr/bin/env python3 +from __future__ import annotations +""" +Backfill SHA256 checksum files for snapshot archives in S3. + +Finds .tar.gz archives that are missing a corresponding .sha256 sidecar +and generates one by streaming the archive content directly from S3 +(no local disk needed). + +Usage: + python3 generate_sha256.py --bucket my-bucket + python3 generate_sha256.py --bucket my-bucket --env hoodi + python3 generate_sha256.py --bucket my-bucket --key mainnet/snapshot-20260309-1.tar.gz + python3 generate_sha256.py --bucket my-bucket --dry-run + python3 generate_sha256.py --bucket my-bucket --force +""" + +import argparse +import hashlib +import json +import os +import subprocess +import sys +import tempfile +import time + +CHUNK_SIZE = 8 * 1024 * 1024 # 8 MiB, same as snapshot_make.py + + +def list_s3_keys(bucket: str, prefix: str, suffix: str) -> list[str]: + """Return S3 keys matching the given prefix and suffix.""" + cmd = ["aws", "s3api", "list-objects-v2", + "--bucket", bucket, "--output", "json"] + if prefix: + cmd += ["--prefix", prefix] + + keys: list[str] = [] + token = None + while True: + page_cmd = cmd if token is None else cmd + ["--continuation-token", token] + result = subprocess.run(page_cmd, capture_output=True, text=True) + if result.returncode != 0: + print(f"ERROR: aws s3api failed: {result.stderr.strip()}", file=sys.stderr) + sys.exit(1) + + data = json.loads(result.stdout or "{}") + for obj in data.get("Contents", []): + if obj["Key"].endswith(suffix): + keys.append(obj["Key"]) + + if not data.get("IsTruncated"): + break + token = data.get("NextContinuationToken") + + return sorted(keys) + + +def human_size(size_bytes: int) -> str: + for unit in ("B", "KB", "MB", "GB", "TB"): + if size_bytes < 1024: + return f"{size_bytes:.1f} {unit}" + size_bytes /= 1024 + return f"{size_bytes:.1f} PB" + + +def get_object_size(bucket: str, key: str) -> int: + cmd = ["aws", "s3api", "head-object", "--bucket", bucket, "--key", key, + "--query", "ContentLength", "--output", "text"] + result = subprocess.run(cmd, capture_output=True, text=True) + if result.returncode != 0: + return 0 + try: + return int(result.stdout.strip()) + except ValueError: + return 0 + + +def compute_sha256_streaming(bucket: str, key: str) -> str: + """Stream an S3 object through hashlib.sha256 without touching disk.""" + s3_uri = f"s3://{bucket}/{key}" + proc = subprocess.Popen( + ["aws", "s3", "cp", s3_uri, "-"], + stdout=subprocess.PIPE, stderr=subprocess.PIPE, + ) + sha = hashlib.sha256() + total = 0 + while True: + chunk = proc.stdout.read(CHUNK_SIZE) + if not chunk: + break + sha.update(chunk) + total += len(chunk) + + proc.wait() + if proc.returncode != 0: + err = proc.stderr.read().decode().strip() + raise RuntimeError(f"aws s3 cp failed for {s3_uri}: {err}") + + return sha.hexdigest() + + +def upload_sha256(bucket: str, sha256_key: str, sha256_hex: str, + archive_basename: str) -> None: + """Write a sha256sum-compatible file and upload to S3.""" + content = f"{sha256_hex} {archive_basename}\n" + with tempfile.NamedTemporaryFile(mode="w", suffix=".sha256", delete=False) as tmp: + tmp.write(content) + tmp_path = tmp.name + + try: + s3_uri = f"s3://{bucket}/{sha256_key}" + result = subprocess.run( + ["aws", "s3", "cp", tmp_path, s3_uri], + capture_output=True, text=True, + ) + if result.returncode != 0: + raise RuntimeError(f"upload failed: {result.stderr.strip()}") + finally: + os.unlink(tmp_path) + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Backfill SHA256 checksums for S3 snapshot archives") + parser.add_argument("--bucket", required=True, help="S3 bucket name") + parser.add_argument("--env", default="", + help="Filter by environment prefix (e.g. hoodi, mainnet)") + parser.add_argument("--key", default="", + help="Process a single S3 key instead of scanning") + parser.add_argument("--dry-run", action="store_true", + help="List archives that need checksums without processing") + parser.add_argument("--force", action="store_true", + help="Recompute even if .sha256 already exists") + args = parser.parse_args() + + prefix = f"{args.env}/" if args.env else "" + + if args.key: + if not args.key.endswith(".tar.gz"): + print(f"ERROR: --key must end with .tar.gz, got: {args.key}", + file=sys.stderr) + sys.exit(1) + targets = [args.key] + else: + print(f"Listing archives in s3://{args.bucket}/{prefix or '*'} ...") + archives = list_s3_keys(args.bucket, prefix, ".tar.gz") + existing = set(list_s3_keys(args.bucket, prefix, ".tar.gz.sha256")) + + if args.force: + targets = archives + else: + targets = [k for k in archives if k + ".sha256" not in existing] + + print(f" Total archives: {len(archives)}") + print(f" Already have .sha256: {len(archives) - len(targets)}") + print(f" Need processing: {len(targets)}") + + if not targets: + print("\nNothing to do.") + return + + if args.dry_run: + print("\n[DRY RUN] Archives that would be processed:") + for key in targets: + size = get_object_size(args.bucket, key) + print(f" {key} ({human_size(size)})") + return + + print(f"\nProcessing {len(targets)} archive(s)...\n") + for i, key in enumerate(targets, 1): + basename = os.path.basename(key) + sha256_key = key + ".sha256" + size = get_object_size(args.bucket, key) + print(f"[{i}/{len(targets)}] {key} ({human_size(size)})") + + t0 = time.time() + print(f" Streaming and computing SHA256...") + sha256_hex = compute_sha256_streaming(args.bucket, key) + elapsed = time.time() - t0 + print(f" SHA256: {sha256_hex} ({elapsed:.1f}s)") + + upload_sha256(args.bucket, sha256_key, sha256_hex, basename) + print(f" Uploaded: s3://{args.bucket}/{sha256_key}\n") + + print(f"Done. Processed {len(targets)} archive(s).") + + +if __name__ == "__main__": + main() diff --git a/ops/snapshot/list_snapshots.py b/ops/snapshot/list_snapshots.py new file mode 100644 index 0000000..caa9894 --- /dev/null +++ b/ops/snapshot/list_snapshots.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 +from __future__ import annotations +""" +List snapshot files in S3 bucket. + +Usage: + python3 list_snapshots.py + python3 list_snapshots.py --env hoodi + python3 list_snapshots.py --env mainnet --bucket my-bucket +""" + +import argparse +import subprocess +import json +import sys + + +def list_snapshots(bucket: str, prefix: str = "") -> list[dict]: + cmd = ["aws", "s3api", "list-objects-v2", + "--bucket", bucket, + "--query", "Contents[?ends_with(Key, '.tar.gz')].[Key,Size,LastModified]", + "--output", "json"] + if prefix: + cmd += ["--prefix", prefix] + + result = subprocess.run(cmd, capture_output=True, text=True) + if result.returncode != 0: + print(f"ERROR: {result.stderr.strip()}", file=sys.stderr) + sys.exit(1) + + items = json.loads(result.stdout or "[]") or [] + return [{"key": r[0], "size": r[1], "last_modified": r[2]} for r in items] + + +def human_size(size_bytes: int) -> str: + for unit in ["B", "KB", "MB", "GB", "TB"]: + if size_bytes < 1024: + return f"{size_bytes:.1f} {unit}" + size_bytes /= 1024 + return f"{size_bytes:.1f} PB" + + +def main() -> None: + parser = argparse.ArgumentParser(description="List snapshots in S3 bucket") + parser.add_argument("--bucket", default="morph-0582-morph-technical-department-snapshot", + help="S3 bucket name") + parser.add_argument("--env", default="", + help="Filter by environment prefix (e.g. hoodi, mainnet)") + args = parser.parse_args() + + prefix = f"{args.env}/" if args.env else "" + snapshots = list_snapshots(args.bucket, prefix) + + if not snapshots: + print(f"No snapshots found in s3://{args.bucket}/{prefix}") + return + + snapshots.sort(key=lambda x: x["last_modified"], reverse=True) + + print(f"\nSnapshots in s3://{args.bucket}/{prefix or '*'}") + print(f"{'Last Modified':<28} {'Size':>10} Key") + print("-" * 80) + for s in snapshots: + print(f"{s['last_modified']:<28} {human_size(s['size']):>10} {s['key']}") + print(f"\nTotal: {len(snapshots)} snapshot(s)") + + +if __name__ == "__main__": + main() diff --git a/ops/snapshot/metrics_server.py b/ops/snapshot/metrics_server.py new file mode 100644 index 0000000..65916db --- /dev/null +++ b/ops/snapshot/metrics_server.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python3 +from __future__ import annotations +""" +Lightweight Prometheus metrics HTTP server for morph snapshot automation. + +Reads a .prom file written by update_readme.py and serves it on :6060/metrics. +Intended to run as a persistent process (e.g. managed by pm2). + +Environment variables: + METRICS_FILE - path to the .prom file (default: /tmp/morph_snapshot_metrics.prom) + METRICS_PORT - port to listen on (default: 6060) +""" + +import http.server +import os +import socket +import time + +METRICS_FILE = os.environ.get("METRICS_FILE", "/tmp/morph_snapshot_metrics.prom") +PORT = int(os.environ.get("METRICS_PORT", "6060")) + + +def _default_metrics() -> str: + """Default metrics written on first startup — status=1 to avoid false alarms.""" + environment = os.environ.get("ENVIRONMENT", "unknown") + ts = int(time.time()) + return ( + "# HELP morph_snapshot_readme_update_status 1 if last README update succeeded, 0 if failed\n" + "# TYPE morph_snapshot_readme_update_status gauge\n" + f'morph_snapshot_readme_update_status{{environment="{environment}",snapshot="pending"}} 1\n' + "# HELP morph_snapshot_readme_update_timestamp_seconds Unix timestamp of last run\n" + "# TYPE morph_snapshot_readme_update_timestamp_seconds gauge\n" + f'morph_snapshot_readme_update_timestamp_seconds{{environment="{environment}",snapshot="pending"}} {ts}\n' + ) + + +def _init_metrics_file() -> None: + """Write default metrics if file does not exist yet.""" + if not os.path.exists(METRICS_FILE): + try: + with open(METRICS_FILE, "w") as f: + f.write(_default_metrics()) + print(f"Initialized default metrics: {METRICS_FILE}") + except OSError as e: + print(f"WARNING: could not initialize metrics file: {e}") + + +class MetricsHandler(http.server.BaseHTTPRequestHandler): + def do_GET(self): + if self.path != "/metrics": + self.send_response(404) + self.end_headers() + return + + try: + with open(METRICS_FILE, "r") as f: + content = f.read() + self.send_response(200) + except OSError: + content = EMPTY_METRICS + self.send_response(200) + + body = content.encode("utf-8") + self.send_header("Content-Type", "text/plain; version=0.0.4; charset=utf-8") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + + def log_message(self, fmt, *args): + # Suppress per-request access logs to keep output clean + pass + + +if __name__ == "__main__": + _init_metrics_file() + server = http.server.HTTPServer(("0.0.0.0", PORT), MetricsHandler) + host = socket.gethostname() + print(f"morph-snapshot metrics server listening on http://{host}:{PORT}/metrics") + print(f"Reading metrics from: {METRICS_FILE}") + try: + server.serve_forever() + except KeyboardInterrupt: + print("\nShutting down.") + server.server_close() + diff --git a/ops/snapshot/snapshot.env.example b/ops/snapshot/snapshot.env.example new file mode 100644 index 0000000..7e1146e --- /dev/null +++ b/ops/snapshot/snapshot.env.example @@ -0,0 +1,77 @@ +# ───────────────────────────────────────────────────────────────────────────── +# Morph Snapshot Environment Configuration +# +# Copy this file for each environment and fill in the values: +# cp ops/snapshot/snapshot.env.example /data/morph-hoodi/snapshot.env +# cp ops/snapshot/snapshot.env.example /data/morph-mainnet/snapshot.env +# +# Then set ENV_FILE when starting the pm2 process (see ecosystem.config.js). +# ───────────────────────────────────────────────────────────────────────────── + +# ── Required ────────────────────────────────────────────────────────────────── + +# Target environment: mainnet | hoodi | holesky +ENVIRONMENT=hoodi + +# S3 bucket name only — no s3:// prefix, no path suffix +# e.g. morph-0582-morph-technical-department-snapshot +S3_BUCKET= + +# GitHub Fine-grained PAT with Contents:write and Pull requests:write +GH_TOKEN= + +# GitHub repository in owner/repo format +GITHUB_REPOSITORY=morph-l2/run-morph-node + +# ── Schedule ────────────────────────────────────────────────────────────────── + +# Days of month to run (UTC). Script exits immediately on other days. +# Set to "any" to bypass the check (e.g. for manual testing). +SNAPSHOT_DAYS=13,28 + +# ── Snapshot type ───────────────────────────────────────────────────────────── + +# Prefix for the snapshot name: snapshot | mpt-snapshot | full-snapshot +# Affects snapshot name (e.g. snapshot-20260309-1), S3 key, and branch name. +# Each prefix running on the same day gets a unique name and branch. +SNAPSHOT_PREFIX=snapshot + +# ── Paths ───────────────────────────────────────────────────────────────────── + +# Root directory of chain data for this environment — required, no default +# e.g. /data/morph-hoodi or /data/morph-mainnet +MORPH_HOME= + +# Directory packed as geth/ in the snapshot — defaults to $MORPH_HOME/geth-data +# e.g. GETH_DB_DIR=/data/morph-hoodi/geth-data +GETH_DB_DIR= + +# Directory packed as data/ in the snapshot — defaults to $MORPH_HOME/node-data/data +# e.g. NODE_DB_DIR=/data/morph-hoodi/node-data/data +NODE_DB_DIR= + +# Temporary working directory — all temp files go here, deleted after upload. +# WARNING: must NOT be MORPH_HOME or any data directory (it gets deleted and recreated). +# Layout: staging/ for copytree, snapshot.tar.gz for compressed output. +# e.g. SNAPSHOT_WORK_DIR=/data/morph-hoodi/snapshot_work +SNAPSHOT_WORK_DIR= + +# ── Prune (full node only) ──────────────────────────────────────────────────── + +# Set to true to run `geth snapshot prune-state` before copying data. +# Only for full nodes — leave unset or false for archive nodes. +# GETH_PRUNE=true + +# Path to the geth binary — defaults to `geth` (must be in PATH). +# GETH_BIN=/usr/local/bin/geth + +# ── Service ─────────────────────────────────────────────────────────────────── + +# Geth JSON-RPC endpoint — defaults to http://127.0.0.1:8545 +# GETH_RPC=http://127.0.0.1:8545 + +# Indexer API base URL for fetching l1_msg_start_height and derivation_start_height. +# Defaults to the public endpoint for the given ENVIRONMENT if not set. +# Set to internal/intranet URL on production servers. +# e.g. EXPLORER_INDEXER_URL=https://explorer-indexer-hoodi.morphl2.io +EXPLORER_INDEXER_URL= diff --git a/ops/snapshot/snapshot_make.py b/ops/snapshot/snapshot_make.py new file mode 100644 index 0000000..9d3855b --- /dev/null +++ b/ops/snapshot/snapshot_make.py @@ -0,0 +1,332 @@ +#!/usr/bin/env python3 +from __future__ import annotations +""" +ops/snapshot/snapshot_make.py + +Runs on the node server via cron (1st and 15th of each month). + +Responsibilities: + 1. Stop morph-geth and morph-node + 2. Create and compress a snapshot of chain data + 3. Upload the snapshot to S3 + 4. Restart morph-geth, wait for RPC, collect base_height + 5. Restart morph-node + 6. Call update_metadata.py to open a PR updating the README snapshot table + +Setup: + 1. Clone the repo to /data/run-morph-node on the node server + 2. Copy ops/snapshot/snapshot.env.example for each environment and fill in values: + cp ops/snapshot/snapshot.env.example ops/snapshot/snapshot-mainnet.env + cp ops/snapshot/snapshot.env.example ops/snapshot/snapshot-hoodi.env + + 3. Copy ecosystem.config.js.example, set ENV_FILE and script path, then: + pm2 start /data/morph-hoodi/ecosystem.config.js + pm2 save +""" + +import hashlib +import json +import os +import shutil +import subprocess +import sys +import time +import urllib.request +from datetime import datetime, timezone +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.resolve())) + +SCRIPT_DIR = Path(__file__).parent.resolve() + +# ── Env file loader ──────────────────────────────────────────────────────────── + +def load_env_file(path: str) -> None: + """Parse KEY=value lines (with or without 'export' prefix) into os.environ.""" + try: + with open(path) as f: + for line in f: + line = line.strip() + if not line or line.startswith("#"): + continue + if line.startswith("export "): + line = line[len("export "):] + if "=" in line: + key, _, value = line.partition("=") + value = value.strip().strip('"').strip("'") + os.environ.setdefault(key.strip(), value) + except FileNotFoundError: + print(f"WARNING: {path} not found, relying on existing environment variables") + +# ── Shell helpers ────────────────────────────────────────────────────────────── + +def run(args: list, check: bool = True) -> None: + print(f" $ {' '.join(str(a) for a in args)}") + subprocess.run(args, check=check) + +# ── Geth RPC ─────────────────────────────────────────────────────────────────── + +def get_block_height(rpc_url: str = "http://localhost:8545", + retries: int = 30, interval: int = 5) -> int: + payload = json.dumps({ + "jsonrpc": "2.0", "method": "eth_blockNumber", "params": [], "id": 1 + }).encode() + for i in range(1, retries + 1): + try: + req = urllib.request.Request( + rpc_url, data=payload, + headers={"Content-Type": "application/json"}, + ) + with urllib.request.urlopen(req, timeout=5) as resp: + result = json.loads(resp.read())["result"] + if result: + return int(result, 16) + except Exception: + pass + print(f" attempt {i}: geth not ready yet, retrying in {interval}s...") + time.sleep(interval) + raise RuntimeError("geth RPC did not become available in time") + +# ── Main ─────────────────────────────────────────────────────────────────────── + +def main() -> None: + env_file = os.environ.get("ENV_FILE", str(SCRIPT_DIR / "snapshot.env")) + load_env_file(env_file) + + # SNAPSHOT_DAYS: comma-separated days of month to run (default: 13,28). + # On other days the script exits immediately — prevents accidental runs on pm2 start. + # Set SNAPSHOT_DAYS=any to bypass this check (e.g. for manual testing). + snapshot_days_env = os.environ.get("SNAPSHOT_DAYS", "13,18") + if snapshot_days_env.strip().lower() != "any": + allowed_days = {int(d.strip()) for d in snapshot_days_env.split(",")} + today = datetime.now(timezone.utc).day + if today not in allowed_days: + print(f"Today is day {today}, not in SNAPSHOT_DAYS={snapshot_days_env}. Exiting.") + sys.exit(0) + + environment = os.environ.get("ENVIRONMENT", "mainnet") + morph_home = os.environ.get("MORPH_HOME", "") + s3_bucket = os.environ.get("S3_BUCKET", "") + + missing = [k for k, v in [("MORPH_HOME", morph_home), ("S3_BUCKET", s3_bucket)] if not v] + if missing: + for k in missing: + print(f"ERROR: {k} is required", file=sys.stderr) + sys.exit(1) + + # GETH_DB_DIR / NODE_DB_DIR point directly to the directories that will be + # packed into the snapshot (as geth/ and data/ respectively). + # Use `or` so that empty-string values in the env file also fall back to defaults. + geth_db_dir = os.environ.get("GETH_DB_DIR") or os.path.join(morph_home, "geth-data") + node_db_dir = os.environ.get("NODE_DB_DIR") or os.path.join(morph_home, "node-data", "data") + + # All temp files live under SNAPSHOT_WORK_DIR: + # staging/ — copytree target, deleted after compression + # snapshot.tar.gz — compressed output, deleted after S3 upload + work_base = os.environ.get("SNAPSHOT_WORK_DIR") or "/data/snapshot_work" + work_dir = os.path.join(work_base, "staging") + snapshot_file = os.path.join(work_base, "snapshot.tar.gz") + + # Safety check: SNAPSHOT_WORK_DIR must not overlap with actual data directories. + # The script deletes and recreates work_base at startup — if work_base IS or CONTAINS + # a data directory, that data will be wiped. Placing work_base *inside* MORPH_HOME + # (as a dedicated subdirectory) is safe as long as it doesn't overlap with geth/node data. + def _is_subpath(child: str, parent: str) -> bool: + child = os.path.realpath(child) + parent = os.path.realpath(parent) + return child == parent or child.startswith(parent.rstrip("/") + "/") + + # Only block overlap with the actual data dirs, not with MORPH_HOME itself. + protected = {"GETH_DB_DIR": geth_db_dir, "NODE_DB_DIR": node_db_dir} + for var, path in protected.items(): + if not path: + continue + if _is_subpath(path, work_base) or _is_subpath(work_base, path): + print( + f"ERROR: SNAPSHOT_WORK_DIR={work_base!r} overlaps with {var}={path!r}.\n" + f" SNAPSHOT_WORK_DIR must be a dedicated directory outside all data paths.", + file=sys.stderr, + ) + sys.exit(1) + + # SNAPSHOT_PREFIX allows different snapshot types to coexist: + # e.g. "snapshot", "mpt-snapshot", "full-snapshot" + snapshot_prefix = os.environ.get("SNAPSHOT_PREFIX", "snapshot") + date = datetime.now(timezone.utc).strftime("%Y%m%d") + snapshot_name = f"{snapshot_prefix}-{date}-1" + + os.environ["SNAPSHOT_NAME"] = snapshot_name + os.environ["ENVIRONMENT"] = environment + + print(f"=== Morph Snapshot: {snapshot_name} ({environment}) ===") + print(f"Started at: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')}") + + gh_token = os.environ.get("GH_TOKEN", "") + gh_repo = os.environ.get("GITHUB_REPOSITORY", "") + + services_stopped = False + try: + # ── Step 0: Resolve snapshot_name before any destructive operation ──── + # Check GitHub now so that snapshot_name, S3 key, and branch all match. + if gh_token and gh_repo: + from update_metadata import resolve_snapshot_name + snapshot_name = resolve_snapshot_name(gh_repo, environment, snapshot_name, gh_token) + os.environ["SNAPSHOT_NAME"] = snapshot_name + print(f"Resolved snapshot name: {snapshot_name}") + + # ── Step 1: Stop services ───────────────────────────────────────────── + print("\n[1/6] Stopping services...") + run(["pm2", "stop", "morph-node"]) + services_stopped = True # morph-node is down; exception handler must restart it + + # Stop morph-geth cleanly so geth can flush the snapshot journal to disk + # (BlockChain.Stop → snaps.Journal) before prune or copy begins. + # pm2 stop sends SIGTERM but returns immediately — geth may still be running. + # Poll the geth LOCK file: it exists as long as geth holds the datadir lock, + # and disappears only when the process has fully exited. + # + # IMPORTANT: For GETH_PRUNE=true to work, morph-geth must have + # kill_timeout: 120000 in its ecosystem.config.js so PM2 does not + # SIGKILL geth before the snapshot journal is written to disk. + geth_lock = os.path.join(geth_db_dir, "geth", "LOCK") + print(" Stopping morph-geth (waiting for LOCK file to disappear, up to 120s)...") + run(["pm2", "stop", "morph-geth"]) + for i in range(120): + if not os.path.exists(geth_lock): + print(f" morph-geth exited after {i}s") + break + time.sleep(1) + else: + print(" WARNING: geth LOCK file still present after 120s — proceeding anyway") + + print("✅ Services stopped") + + # ── Step 1.5: Optional prune (full node only) ───────────────────────── + # Set GETH_PRUNE=true in snapshot.env to run `geth snapshot prune-state` + # before copying data. Leave unset (or false) for archive nodes. + geth_bin = os.environ.get("GETH_BIN") or "geth" + if os.environ.get("GETH_PRUNE", "").lower() in ("1", "true", "yes"): + print("\n[1.5/6] Pruning geth state (may take a while)...") + run([geth_bin, "snapshot", "prune-state", "--datadir", geth_db_dir]) + print("✅ Prune complete") + else: + print("\n[1.5/6] Skipping prune (GETH_PRUNE not set)") + + # ── Step 2: Create snapshot ─────────────────────────────────────────── + print("\n[2/6] Creating snapshot...") + named_dir = os.path.join(work_base, snapshot_name) + for d in [work_dir, named_dir]: + if os.path.exists(d): + shutil.rmtree(d) + os.makedirs(work_dir) + + # geth: only chaindata is needed for a snapshot + geth_src = os.path.join(geth_db_dir, "geth", "chaindata") + geth_dst = os.path.join(work_dir, "geth", "chaindata") + print(f" Copying geth chaindata: {geth_src} (may take a while...)") + shutil.copytree(geth_src, geth_dst) + geth_size = subprocess.check_output(["du", "-sh", geth_dst]).decode().split()[0] + print(f" ✅ geth chaindata copied: {geth_size}") + + # node: only the 5 essential db directories + node_dst = os.path.join(work_dir, "data") + os.makedirs(node_dst) + for db in ["blockstore.db", "cs.wal", "state.db", "tx_index.db", "evidence.db"]: + src = os.path.join(node_db_dir, db) + dst = os.path.join(node_dst, db) + print(f" Copying {db}...") + shutil.copytree(src, dst) + node_size = subprocess.check_output(["du", "-sh", node_dst]).decode().split()[0] + print(f" ✅ node data copied: {node_size}") + + # Rename staging/ to snapshot_name so the tar extracts to a named directory. + os.rename(work_dir, named_dir) + + print(f" Compressing to {snapshot_file} (may take a while...)") + run(["tar", "-czf", snapshot_file, "-C", work_base, snapshot_name]) + shutil.rmtree(named_dir) + size = subprocess.check_output(["du", "-sh", snapshot_file]).decode().split()[0] + print(f"✅ Snapshot created: {size}") + + # ── Step 3: Upload to S3 ────────────────────────────────────────────── + print("\n[3/6] Uploading to S3...") + + # Compute SHA256 of the archive for integrity verification. + print(f" Computing SHA256 of {snapshot_file} (may take a while...)") + sha256 = hashlib.sha256() + with open(snapshot_file, "rb") as f: + for chunk in iter(lambda: f.read(8 * 1024 * 1024), b""): + sha256.update(chunk) + sha256_hex = sha256.hexdigest() + sha256_file = snapshot_file + ".sha256" + archive_basename = os.path.basename(snapshot_file) + with open(sha256_file, "w") as f: + f.write(f"{sha256_hex} {archive_basename}\n") + print(f" SHA256: {sha256_hex}") + + s3_key = f"{environment}/{snapshot_name}.tar.gz" + s3_sha256_key = f"{environment}/{snapshot_name}.tar.gz.sha256" + run(["aws", "s3", "cp", snapshot_file, f"s3://{s3_bucket}/{s3_key}"]) + run(["aws", "s3", "cp", sha256_file, f"s3://{s3_bucket}/{s3_sha256_key}"]) + print(f"✅ Uploaded: s3://{s3_bucket}/{s3_key}") + print(f"✅ Uploaded: s3://{s3_bucket}/{s3_sha256_key}") + os.remove(snapshot_file) + os.remove(sha256_file) + print(f"✅ Removed local snapshot and sha256 files") + + # ── Step 4: Start geth, collect base_height ─────────────────────────── + print("\n[4/6] Starting morph-geth and collecting base_height...") + run(["pm2", "start", "morph-geth"]) + geth_rpc = os.environ.get("GETH_RPC") or "http://127.0.0.1:8545" + print("Waiting for geth RPC to be ready...") + base_height = get_block_height(geth_rpc) + os.environ["BASE_HEIGHT"] = str(base_height) + print(f"✅ Geth base height: {base_height}") + + # ── Step 5: Start morph-node ────────────────────────────────────────── + print("\n[5/6] Starting morph-node...") + run(["pm2", "start", "morph-node"]) + print("✅ morph-node started") + + # ── Step 6: Update README via GitHub API ────────────────────────────── + print("\n[6/6] Updating README snapshot table...") + print("\n" + "─" * 60) + print(" Snapshot summary (use this to create PR manually if step 6 fails):") + print(f" ENVIRONMENT = {environment}") + print(f" SNAPSHOT_NAME = {snapshot_name}") + print(f" BASE_HEIGHT = {base_height}") + print(f" S3_KEY = s3://{s3_bucket}/{s3_key}") + print(f" SHA256 = {sha256_hex}") + print(" l1_msg_start_height and derivation_start_height will be") + print(" printed by update_metadata.py — check log if PR creation fails.") + print("─" * 60 + "\n") + run([sys.executable, str(SCRIPT_DIR / "update_metadata.py")]) + + if os.path.exists(work_base): + shutil.rmtree(work_base) + + from update_readme import write_metric # noqa: E402 + write_metric(1, environment, snapshot_name) + + except (Exception, KeyboardInterrupt) as e: + if isinstance(e, KeyboardInterrupt): + print("\nInterrupted (SIGINT received).", file=sys.stderr) + else: + print(f"\nERROR: {e}", file=sys.stderr) + if services_stopped: + print("Recovering services...") + run(["pm2", "start", "morph-geth"], check=False) + run(["pm2", "start", "morph-node"], check=False) + print("Services recovered.") + try: + from update_readme import write_metric # noqa: E402 + write_metric(0, environment, snapshot_name) + except Exception: + pass + sys.exit(1) + + print(f"\n=== Done at {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')} ===") + + +if __name__ == "__main__": + main() diff --git a/ops/snapshot/update_metadata.py b/ops/snapshot/update_metadata.py new file mode 100644 index 0000000..25f1a0e --- /dev/null +++ b/ops/snapshot/update_metadata.py @@ -0,0 +1,368 @@ +#!/usr/bin/env python3 +from __future__ import annotations +""" +Fetch snapshot metadata from the indexer API and update README.md via GitHub API. + +Given BASE_HEIGHT and SNAPSHOT_NAME, this script: + 1. Queries the internal explorer-indexer API for l1_msg_start_height + and derivation_start_height. + 2. Fetches README.md content from GitHub, applies the table update in memory. + 3. Fetches morph-node/.env (or .env_hoodi) and updates snapshot name + heights. + 4. Creates a new branch, pushes both updated files, and opens a PR — + all via GitHub REST API (no git or gh CLI required). + +Environment variables: + ENVIRONMENT - mainnet | hoodi | holesky + SNAPSHOT_NAME - e.g. snapshot-20260225-1 + BASE_HEIGHT - L2 geth block height + GH_TOKEN - GitHub personal access token (repo scope) + GITHUB_REPOSITORY - owner/repo, e.g. morphl2/run-morph-node + README_PATH - path to README.md inside the repo (default: README.md) + L1_MSG_HEIGHT - (optional) skip indexer API, use this value directly + DERIV_HEIGHT - (optional) skip indexer API, use this value directly + DRY_RUN - set to "1" to skip README update and PR creation + +Usage: + # Full run (on Self-hosted Runner, hits internal indexer API): + ENVIRONMENT=mainnet SNAPSHOT_NAME=snapshot-20260225-1 BASE_HEIGHT=20169165 \\ + GH_TOKEN=ghp_xxx GITHUB_REPOSITORY=morphl2/run-morph-node \\ + python3 ops/snapshot/update_metadata.py + + # Local test with mock values — no git/gh CLI needed: + ENVIRONMENT=mainnet SNAPSHOT_NAME=snapshot-test-1 BASE_HEIGHT=20169165 \\ + L1_MSG_HEIGHT=24280251 DERIV_HEIGHT=24294756 \\ + GH_TOKEN=ghp_xxx GITHUB_REPOSITORY=morphl2/run-morph-node \\ + python3 ops/snapshot/update_metadata.py + + # Dry run — only fetches/prints metadata, touches nothing: + ENVIRONMENT=mainnet SNAPSHOT_NAME=snapshot-test-1 BASE_HEIGHT=20169165 \\ + L1_MSG_HEIGHT=24280251 DERIV_HEIGHT=24294756 DRY_RUN=1 \\ + python3 ops/snapshot/update_metadata.py +""" + +import base64 +import json +import os +import re +import sys +import urllib.error +import urllib.request + +# ── Constants ───────────────────────────────────────────────────────────────── + +INDEXER_BASE_URLS = { + "mainnet": "https://explorer-indexer.morphl2.io", + "hoodi": "https://explorer-indexer-hoodi.morphl2.io", +} + +ENV_FILE_MAP = { + "mainnet": "morph-node/.env", + "hoodi": "morph-node/.env_hoodi", +} + +SNAPSHOT_VAR_MAP = { + "mainnet": {"mpt": "MAINNET_MPT_SNAPSHOT_NAME", "zk": "MAINNET_SNAPSHOT_NAME"}, + "hoodi": {"mpt": "HOODI_MPT_SNAPSHOT_NAME", "zk": "HOODI_SNAPSHOT_NAME"}, +} + +SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) +sys.path.insert(0, SCRIPT_DIR) + +# ── HTTP helpers ────────────────────────────────────────────────────────────── + +def _http_request(req: urllib.request.Request, url: str) -> dict: + try: + with urllib.request.urlopen(req, timeout=30) as resp: + return json.loads(resp.read().decode()) + except urllib.error.HTTPError as e: + body = e.read().decode(errors="replace") + raise RuntimeError(f"HTTP {e.code} {e.reason} — URL: {url}\nResponse: {body}") from None + except urllib.error.URLError as e: + raise RuntimeError(f"Network error — URL: {url}\n{e.reason}") from None + + +def http_get(url: str, token: str = "") -> dict: + headers = { + "Accept": "application/json", + "User-Agent": "Mozilla/5.0 (compatible; morph-snapshot/1.0)", + } + if token: + headers["Authorization"] = f"Bearer {token}" + return _http_request(urllib.request.Request(url, headers=headers), url) + + +def http_get_or_none(url: str, token: str = "") -> dict | None: + """Like http_get but returns None on 404 instead of raising.""" + headers = {"Accept": "application/json"} + if token: + headers["Authorization"] = f"Bearer {token}" + req = urllib.request.Request(url, headers=headers) + try: + with urllib.request.urlopen(req, timeout=30) as resp: + return json.loads(resp.read().decode()) + except urllib.error.HTTPError as e: + if e.code == 404: + return None + body = e.read().decode(errors="replace") + raise RuntimeError(f"HTTP {e.code} {e.reason} — URL: {url}\nResponse: {body}") from None + except urllib.error.URLError as e: + raise RuntimeError(f"Network error — URL: {url}\n{e.reason}") from None + + +def http_post(url: str, payload: dict, token: str) -> dict: + data = json.dumps(payload).encode() + headers = {"Content-Type": "application/json", "Authorization": f"Bearer {token}"} + return _http_request(urllib.request.Request(url, data=data, headers=headers, method="POST"), url) + + +def http_put(url: str, payload: dict, token: str) -> dict: + data = json.dumps(payload).encode() + headers = {"Content-Type": "application/json", "Authorization": f"Bearer {token}"} + return _http_request(urllib.request.Request(url, data=data, headers=headers, method="PUT"), url) + +# ── Indexer API ─────────────────────────────────────────────────────────────── + +def fetch_metadata(environment: str, base_height: str) -> tuple[str, str]: + """Return (l1_msg_start_height, derivation_start_height) as strings.""" + # INDEXER_URL overrides the default public URL, useful for internal/intranet access. + base_url = os.environ.get("EXPLORER_INDEXER_URL", INDEXER_BASE_URLS.get(environment, "")) + if not base_url: + raise RuntimeError(f"No indexer URL for environment {environment!r}. Set INDEXER_URL.") + + def get(path): + url = f"{base_url.rstrip('/')}{path}" + print(f" GET {url}") + return http_get(url) + + l1_data = get(f"/v1/batch/l1_msg_start_height/{base_height}") + deriv_data = get(f"/v1/batch/derivation_start_height/{base_height}") + + # API may return a plain number or a dict like {"l1_msg_start_height": N} + if isinstance(l1_data, (int, float)): + l1_msg_height = str(int(l1_data)) + elif isinstance(l1_data, dict) and "l1_msg_start_height" in l1_data: + l1_msg_height = str(l1_data["l1_msg_start_height"]) + else: + raise RuntimeError(f"Unexpected indexer response for l1_msg_start_height: {l1_data}") + + if isinstance(deriv_data, (int, float)): + deriv_height = str(int(deriv_data)) + elif isinstance(deriv_data, dict) and "derivation_start_height" in deriv_data: + deriv_height = str(deriv_data["derivation_start_height"]) + else: + raise RuntimeError(f"Unexpected indexer response for derivation_start_height: {deriv_data}") + + return l1_msg_height, deriv_height + +# ── GitHub API ──────────────────────────────────────────────────────────────── + +GITHUB_API = "https://api.github.com" + + +def gh_get_file(repo: str, path: str, token: str, ref: str = "main") -> tuple[str, str]: + """Fetch file content. Returns (decoded_content, blob_sha).""" + url = f"{GITHUB_API}/repos/{repo}/contents/{path}?ref={ref}" + data = http_get(url, token) + content = base64.b64decode(data["content"]).decode("utf-8") + return content, data["sha"] + + +def gh_get_main_sha(repo: str, token: str) -> str: + """Return the current commit SHA of the main branch.""" + url = f"{GITHUB_API}/repos/{repo}/git/ref/heads/main" + data = http_get(url, token) + return data["object"]["sha"] + + +def gh_branch_exists(repo: str, branch: str, token: str) -> bool: + url = f"{GITHUB_API}/repos/{repo}/git/ref/heads/{branch}" + return http_get_or_none(url, token) is not None + + +def resolve_snapshot_name(repo: str, environment: str, + snapshot_name: str, token: str) -> str: + """Verify that the snapshot branch does not already exist on GitHub. + + Each date should have exactly one snapshot. If the branch for snapshot_name + already exists it means today's snapshot was already created — raise an error + instead of silently creating a -2 duplicate. + """ + branch = f"snapshot/{environment}-{snapshot_name}" + if gh_branch_exists(repo, branch, token): + raise RuntimeError( + f"Branch '{branch}' already exists on GitHub.\n" + f" Today's snapshot ({snapshot_name}) has already been created.\n" + f" If you need to re-run, delete the branch first or use a different SNAPSHOT_PREFIX." + ) + return snapshot_name + + +def gh_create_branch(repo: str, branch: str, sha: str, token: str) -> None: + """Create branch. snapshot_name must already be resolved via resolve_snapshot_name.""" + url = f"{GITHUB_API}/repos/{repo}/git/refs" + http_post(url, {"ref": f"refs/heads/{branch}", "sha": sha}, token) + print(f" Created branch: {branch}") + + +def gh_update_file(repo: str, path: str, content: str, + blob_sha: str, branch: str, message: str, token: str) -> None: + url = f"{GITHUB_API}/repos/{repo}/contents/{path}" + http_put(url, { + "message": message, + "content": base64.b64encode(content.encode("utf-8")).decode(), + "sha": blob_sha, + "branch": branch, + }, token) + print(f" Pushed {path} to branch: {branch}") + + +def gh_create_pr(repo: str, branch: str, title: str, body: str, token: str) -> str: + url = f"{GITHUB_API}/repos/{repo}/pulls" + data = http_post(url, { + "title": title, + "body": body, + "head": branch, + "base": "main", + }, token) + return data["html_url"] + +# ── Env file update (in-memory) ─────────────────────────────────────────────── + +def apply_env_update(content: str, environment: str, snapshot_name: str, + deriv_height: str, l1_msg_height: str, base_height: str) -> str: + """Update snapshot name and validator heights in a .env file.""" + is_mpt = snapshot_name.startswith("mpt-") + snapshot_var = SNAPSHOT_VAR_MAP[environment]["mpt" if is_mpt else "zk"] + + def replace(var, value): + return re.sub(rf'^({re.escape(var)}=).*$', rf'\g<1>{value}', content, flags=re.MULTILINE) + + content = replace(snapshot_var, snapshot_name) + content = replace("DERIVATION_START_HEIGHT", deriv_height) + content = replace("L1_MSG_START_HEIGHT", l1_msg_height) + if is_mpt: + content = replace("L2_BASE_HEIGHT", base_height) + + return content + + +# ── README update (in-memory) ───────────────────────────────────────────────── + +def build_new_row(environment: str, snapshot_name: str, + deriv_height: str, l1_msg_height: str, base_height: str) -> str: + cdn_base = "https://snapshot.morphl2.io" + url = f"{cdn_base}/{environment}/{snapshot_name}.tar.gz" + return f"| [{snapshot_name}]({url}) | {deriv_height} | {l1_msg_height} | {base_height} |" + + +def apply_readme_update(content: str, environment: str, snapshot_name: str, + deriv_height: str, l1_msg_height: str, base_height: str) -> str: + """Import insert_row_content from update_readme.py and apply it.""" + from update_readme import insert_row_content, SECTION_MARKERS # noqa: E402 + + section_marker = SECTION_MARKERS[environment] + new_row = build_new_row(environment, snapshot_name, deriv_height, l1_msg_height, base_height) + return insert_row_content(content, section_marker, new_row) + +# ── Main ────────────────────────────────────────────────────────────────────── + +def main() -> None: + from update_readme import write_metric # noqa: E402 + + dry_run = os.environ.get("DRY_RUN", "0") == "1" + + # Validate required env vars + required = ["ENVIRONMENT", "SNAPSHOT_NAME", "BASE_HEIGHT"] + if not dry_run: + required += ["GH_TOKEN", "GITHUB_REPOSITORY"] + + missing = [v for v in required if not os.environ.get(v)] + if missing: + print(f"ERROR: Missing required env vars: {', '.join(missing)}", file=sys.stderr) + sys.exit(1) + + environment = os.environ["ENVIRONMENT"] + snapshot_name = os.environ["SNAPSHOT_NAME"] + base_height = os.environ["BASE_HEIGHT"] + token = os.environ.get("GH_TOKEN", "") + repo = os.environ.get("GITHUB_REPOSITORY", "") + readme_path = os.environ.get("README_PATH", "README.md") + + if environment not in INDEXER_BASE_URLS: + print(f"ERROR: Unknown environment: {environment!r}. Must be: {' | '.join(INDEXER_BASE_URLS)}", + file=sys.stderr) + sys.exit(1) + + try: + # ── Step 1: metadata ────────────────────────────────────────────────── + l1_msg_height = os.environ.get("L1_MSG_HEIGHT", "") + deriv_height = os.environ.get("DERIV_HEIGHT", "") + + if l1_msg_height and deriv_height: + print(f"\n[1/3] Using provided metadata (API call skipped):") + else: + print(f"\n[1/3] Fetching metadata from indexer (base_height={base_height}) ...") + l1_msg_height, deriv_height = fetch_metadata(environment, base_height) + + print(f" l1_msg_start_height = {l1_msg_height}") + print(f" derivation_start_height = {deriv_height}") + + if dry_run: + print("\n[DRY RUN] Skipping README/env update and PR creation.") + print(f" Would update: env={environment} snapshot={snapshot_name}") + print(f" base={base_height} l1_msg={l1_msg_height} deriv={deriv_height}") + env_path = ENV_FILE_MAP.get(environment) + if env_path: + print(f" Would update: {env_path}") + return + + # ── Step 2: update README + env file, push via GitHub API ──────────── + print(f"\n[2/4] Updating README via GitHub API ...") + current_content, blob_sha = gh_get_file(repo, readme_path, token) + updated_content = apply_readme_update( + current_content, environment, snapshot_name, deriv_height, l1_msg_height, base_height + ) + + branch = f"snapshot/{environment}-{snapshot_name}" + commit_msg = f"snapshot: add {snapshot_name} ({environment})" + main_sha = gh_get_main_sha(repo, token) + + gh_create_branch(repo, branch, main_sha, token) + gh_update_file(repo, readme_path, updated_content, blob_sha, branch, commit_msg, token) + + # ── Step 3: update env file ─────────────────────────────────────────── + env_path = ENV_FILE_MAP.get(environment) + if env_path: + print(f"\n[3/4] Updating {env_path} ...") + env_content, env_blob_sha = gh_get_file(repo, env_path, token) + updated_env = apply_env_update( + env_content, environment, snapshot_name, deriv_height, l1_msg_height, base_height + ) + env_commit_msg = f"snapshot: update {env_path} for {snapshot_name} ({environment})" + gh_update_file(repo, env_path, updated_env, env_blob_sha, branch, env_commit_msg, token) + else: + print(f"\n[3/4] No env file mapping for {environment}, skipping.") + + # ── Step 4: open PR ─────────────────────────────────────────────────── + print(f"\n[4/4] Creating PR ...") + pr_body = ( + f"Auto-generated by snapshot workflow.\n\n" + f"- Environment: `{environment}`\n" + f"- Snapshot: `{snapshot_name}`\n" + f"- L2 Base Height: `{base_height}`\n" + f"- L1 Msg Start Height: `{l1_msg_height}`\n" + f"- Derivation Start Height: `{deriv_height}`" + ) + pr_url = gh_create_pr(repo, branch, commit_msg, pr_body, token) + + print(f"\n✅ Done. PR opened: {pr_url}") + write_metric(1, environment, snapshot_name) + + except Exception as e: + print(f"\nERROR: {e}", file=sys.stderr) + write_metric(0, environment, snapshot_name) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/ops/snapshot/update_readme.py b/ops/snapshot/update_readme.py new file mode 100644 index 0000000..a1ee1c2 --- /dev/null +++ b/ops/snapshot/update_readme.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python3 +from __future__ import annotations +""" +Update the snapshot table in README.md. + +Inserts a new row at the TOP of the target environment's snapshot table. + +Environment variables: + ENVIRONMENT - mainnet | hoodi + SNAPSHOT_NAME - e.g. snapshot-20260225-1 + BASE_HEIGHT - L2 geth block height (L2 Base Height) + L1_MSG_HEIGHT - l1_msg_start_height from indexer API + DERIV_HEIGHT - derivation_start_height from indexer API + METRICS_FILE - (optional) path to write Prometheus metrics + default: /tmp/morph_snapshot_metrics.prom + metrics_server.py reads this file and serves it on :6060/metrics + +Usage: + python3 ops/snapshot/update_readme.py +""" + +import os +import re +import sys +import time + +# ── Constants ───────────────────────────────────────────────────────────────── + +CDN_BASE = "https://snapshot.morphl2.io" + +SECTION_MARKERS = { + "mainnet": "**For mainnet**", + "hoodi": "**For hoodi testnet**", +} + +METRICS_FILE = os.environ.get("METRICS_FILE", "/tmp/morph_snapshot_metrics.prom") + +# ── Metrics ─────────────────────────────────────────────────────────────────── + +def write_metric(status: int, environment: str, snapshot_name: str) -> None: + """Write Prometheus metrics to METRICS_FILE. status: 1=success, 0=failure.""" + ts = int(time.time()) + labels = f'environment="{environment}",snapshot="{snapshot_name}"' + content = ( + "# HELP morph_snapshot_readme_update_status 1 if last README update succeeded, 0 if failed\n" + "# TYPE morph_snapshot_readme_update_status gauge\n" + f"morph_snapshot_readme_update_status{{{labels}}} {status}\n" + "# HELP morph_snapshot_readme_update_timestamp_seconds Unix timestamp of last run\n" + "# TYPE morph_snapshot_readme_update_timestamp_seconds gauge\n" + f"morph_snapshot_readme_update_timestamp_seconds{{{labels}}} {ts}\n" + ) + os.makedirs(os.path.dirname(os.path.abspath(METRICS_FILE)), exist_ok=True) + with open(METRICS_FILE, "w") as f: + f.write(content) + +# ── README update ───────────────────────────────────────────────────────────── + +def insert_row_content(content: str, section_marker: str, new_row: str) -> str: + """ + In-memory version: takes the README content as a string, inserts new_row + after the table separator in the target section, returns updated content. + """ + lines = content.splitlines(keepends=True) + in_section = False + inserted = False + result = [] + + for line in lines: + result.append(line) + + if section_marker in line: + in_section = True + + if in_section and not inserted and re.match(r"^\|[\s:|-]+\|", line): + result.append(new_row + "\n") + inserted = True + in_section = False + + if not inserted: + raise RuntimeError( + f"Could not find table separator for section: {section_marker!r}" + ) + + return "".join(result) + + +def insert_row(readme_path: str, section_marker: str, new_row: str) -> None: + """File-based wrapper around insert_row_content.""" + with open(readme_path, "r") as f: + content = f.read() + + updated = insert_row_content(content, section_marker, new_row) + + with open(readme_path, "w") as f: + f.write(updated) + +# ── Main ────────────────────────────────────────────────────────────────────── + +def main() -> None: + if len(sys.argv) != 2: + print(f"Usage: {sys.argv[0]} ", file=sys.stderr) + sys.exit(1) + + readme_path = sys.argv[1] + + # Validate required env vars + missing = [v for v in ("ENVIRONMENT", "SNAPSHOT_NAME", "BASE_HEIGHT", "L1_MSG_HEIGHT", "DERIV_HEIGHT") + if not os.environ.get(v)] + if missing: + print(f"ERROR: Missing required env vars: {', '.join(missing)}", file=sys.stderr) + write_metric(0, os.environ.get("ENVIRONMENT", "unknown"), + os.environ.get("SNAPSHOT_NAME", "unknown")) + sys.exit(1) + + environment = os.environ["ENVIRONMENT"] + snapshot_name = os.environ["SNAPSHOT_NAME"] + base_height = os.environ["BASE_HEIGHT"] + l1_msg_height = os.environ["L1_MSG_HEIGHT"] + deriv_height = os.environ["DERIV_HEIGHT"] + + # Validate environment + if environment not in SECTION_MARKERS: + print(f"ERROR: Unknown environment: {environment!r}. Must be: {' | '.join(SECTION_MARKERS)}", + file=sys.stderr) + write_metric(0, environment, snapshot_name) + sys.exit(1) + + section_marker = SECTION_MARKERS[environment] + url = f"{CDN_BASE}/{environment}/{snapshot_name}.tar.gz" + new_row = f"| [{snapshot_name}]({url}) | {deriv_height} | {l1_msg_height} | {base_height} |" + + try: + insert_row(readme_path, section_marker, new_row) + except Exception as e: + print(f"ERROR: {e}", file=sys.stderr) + write_metric(0, environment, snapshot_name) + sys.exit(1) + + print(f"✅ Inserted new row into [{environment}] table:") + print(f" {new_row}") + + write_metric(1, environment, snapshot_name) + print(f"📊 Metrics written to: {METRICS_FILE}") + + +if __name__ == "__main__": + main() +