From 7dca392618e24972d2bce82f953d6a564fee9863 Mon Sep 17 00:00:00 2001 From: Cruz Molina <7537712+CruzMolina@users.noreply.github.com> Date: Wed, 15 Apr 2026 16:17:23 -0700 Subject: [PATCH] fix: refresh node setup and archival snapshot bootstrap Replace stale archival snapshot pointers with the live Gelato indexes and verify checksums before extraction. Refresh the setup docs with the validated Sepolia full-node bootstrap flow, early sync signals, and clearer wrapper configuration guidance. --- .env.example | 16 +++++++- README.md | 70 ++++++++++++++++++++++++++++--- scripts/init-bedrock.sh | 91 +++++++++++++++++++++++++++++++++++------ scripts/utils.sh | 54 ++++++++++++++++++++---- 4 files changed, 203 insertions(+), 28 deletions(-) diff --git a/.env.example b/.env.example index 9120e45..6d98f06 100644 --- a/.env.example +++ b/.env.example @@ -5,8 +5,9 @@ # Recommended first run: ink-sepolia + full NETWORK_NAME=ink-sepolia -# "full" starts from an empty datadir. "archive" downloads a network snapshot -# during bedrock-init and needs much more disk. +# "full" starts from an empty datadir. This is the recommended first run. +# "archive" restores from a per-network snapshot pointer during bedrock-init +# and needs much more disk. If the snapshot lookup fails, switch back to full. NODE_TYPE=full # For a quick Sepolia smoke test, these public endpoints worked during docs @@ -48,5 +49,16 @@ PORT__INFLUXDB= PORT__OP_GETH_HTTP= PORT__OP_GETH_WS= PORT__OP_GETH_P2P= +# PORT__OP_NODE_P2P changes the published host port. The in-container +# op-node listener still uses 9003. PORT__OP_NODE_P2P= PORT__OP_NODE_HTTP= + +# Advanced wrapper inputs shared by the shell entrypoints. Leave blank unless +# you know you need them. +# OVERRIDE_HOLOCENE appends --override.holocene= in both wrappers. +OVERRIDE_HOLOCENE= + +# EXTENDED_ARG is appended verbatim to both start commands. Only use flags that +# are valid for the process you want to affect. +EXTENDED_ARG= diff --git a/README.md b/README.md index 39c4561..d4025f3 100644 --- a/README.md +++ b/README.md @@ -88,9 +88,11 @@ HEALTHCHECK__REFERENCE_RPC_PROVIDER=https://rpc-gel-sepolia.inkonchain.com Configuration notes: - `NETWORK_NAME`: `ink-sepolia` or `ink-mainnet` -- `NODE_TYPE=full`: starts from an empty local datadir -- `NODE_TYPE=archive`: downloads and extracts a network snapshot during - `bedrock-init` +- `NODE_TYPE=full`: starts from an empty local datadir. This is the validated + first-run path in this repo +- `NODE_TYPE=archive`: resolves the newest archival geth datadir from the + Gelato ChainSnap index for your network, downloads the matching `.sha256`, + verifies the archive, and extracts it during `bedrock-init` - `OP_NODE__RPC_TYPE=basic`: the right default for generic providers; use `alchemy`, `quicknode`, or `erigon` only when your provider requires it - `.env` overrides the same variable for services that load `.env` in @@ -98,8 +100,14 @@ Configuration notes: `bedrock-init` - `envs//op-node.env` already supplies the network P2P defaults, so most first-time setups only need the `.env` values above +- `PORT__OP_NODE_P2P` changes the published host port in `docker-compose.yml`. + The in-container `op-node` listener still uses `9003` - For `ink-mainnet`, switch the healthcheck reference RPC to `https://rpc-gel.inkonchain.com` +- Advanced wrapper inputs such as `OVERRIDE_HOLOCENE` and `EXTENDED_ARG` live + in `.env.example`. The shell entrypoints append them to both `op-geth` and + `op-node`, so leave them empty unless you know the flag is compatible with + the process you want to change ### 4. Start the stack @@ -110,6 +118,7 @@ docker compose up -d --build This pulls the service images, builds the local `bedrock-init` image, creates a JWT, and starts: +- `bedrock-init` (one-time init) - `op-geth` - `op-node` - `healthcheck` @@ -117,6 +126,10 @@ JWT, and starts: - `grafana` - `influxdb` +`op-geth` and `op-node` both wait for `bedrock-init` to create +`/shared/initialized.txt`. If the stack looks stuck, check `bedrock-init` +first. + ## Validate Startup ### Check service status @@ -157,6 +170,19 @@ Rollup node RPC: curl -fsS -X POST -H "Content-Type: application/json" --data '{"jsonrpc":"2.0","method":"rpc_modules","params":[],"id":1}' http://127.0.0.1:9545 ``` +On `ink-sepolia`, a healthy reply includes the `optimism`, `opp2p`, and +`health` modules. + +Sync status: + +```sh +curl -fsS -X POST -H "Content-Type: application/json" --data '{"jsonrpc":"2.0","method":"optimism_syncStatus","params":[],"id":1}' http://127.0.0.1:9545 +``` + +On a brand-new `full` node, this is the best early signal that the rollup node +is moving forward. Look for `current_l1` and `head_l1` values to advance even +while local L2 block height is still `0x0`. + Healthcheck metrics: ```sh @@ -164,8 +190,9 @@ curl -fsS http://127.0.0.1:7300/metrics | grep -E 'healthcheck_(reference_height ``` On a brand-new `full` node, `eth_blockNumber` can stay at `0x0` for a while. -That is expected. Use `optimism_syncStatus` and the healthcheck metrics to -confirm the node is moving forward during early sync. +That is expected. During that window it is also normal for +`healthcheck_target_height` to stay at `0`. Use `optimism_syncStatus` and the +healthcheck metrics to confirm the node is moving forward during early sync. ### Open Grafana @@ -226,6 +253,10 @@ This removes all local chain and monitoring data. `progress.sh` uses Foundry's `cast` on the host machine. +The `bedrock-init` container installs Foundry for its own image build, but that +does not make `cast` available on your host shell. Install Foundry locally if +you want to use `progress.sh`. + Install Foundry from [https://getfoundry.sh/](https://getfoundry.sh/) and then run: @@ -269,7 +300,26 @@ If image pulls or snapshot downloads fail, make sure the host can reach: - `docker.io` - `us-docker.pkg.dev` -- `storage.googleapis.com` +- `ink.t.snapshots.gelato.cloud` +- `ink.snapshots.gelato.cloud` + +Archive snapshots are resolved from these indexes: + +- Sepolia: [https://ink.t.snapshots.gelato.cloud/index.html](https://ink.t.snapshots.gelato.cloud/index.html) +- Mainnet: [https://ink.snapshots.gelato.cloud/index.html](https://ink.snapshots.gelato.cloud/index.html) + +`bedrock-init` downloads the matching `.sha256` file and verifies the archive +before extraction. + +If `bedrock-init` exits with `Failed to resolve latest snapshot` or +`Unexpected snapshot filename format`, the index is unreachable or its format +changed. Switch back to `NODE_TYPE=full` and retry, or pick a direct archive +from the index page and update the script before retrying. + +If `bedrock-init` exits with `Unexpected checksum file format`, +`Checksum file does not match downloaded archive`, or `SHA256 verification +failed`, do not reuse that download. Retry later or verify the checksum file +from the index page before attempting another restore. ### `eth_blockNumber` stays at `0x0` right after startup @@ -301,6 +351,14 @@ docker compose down docker compose up -d --build ``` +### `error dialing static peer` appears in `op-node` logs + +That can happen during early bootstrap if a configured static peer is +temporarily unavailable. If `optimism_syncStatus.current_l1` keeps advancing, +the node is still making progress. If those errors continue and `current_l1` +stops moving, inspect `envs//op-node.env` and your outbound network +access. + ### `Walking back L1Block` appears in the logs A few reset lines during first startup are normal. If the node keeps printing diff --git a/scripts/init-bedrock.sh b/scripts/init-bedrock.sh index f6c3d33..6b47cf0 100755 --- a/scripts/init-bedrock.sh +++ b/scripts/init-bedrock.sh @@ -10,8 +10,58 @@ BEDROCK_JWT_PATH=/shared/jwt.txt GETH_DATA_DIR=$BEDROCK_DATADIR TORRENTS_DIR=/torrents/$NETWORK_NAME BEDROCK_TAR_PATH=/downloads/bedrock.tar +BEDROCK_TAR_CHECKSUM_PATH= BEDROCK_TMP_PATH=/bedrock-tmp +function validate_snapshot_filename() { + local snapshot_filename="$1" + + if [[ -z "$snapshot_filename" ]]; then + echo "Snapshot filename lookup returned an empty response" >&2 + exit 1 + fi + + if [[ ! "$snapshot_filename" =~ ^[A-Za-z0-9._-]+\.tar(\.zst|\.lz4)?$ ]]; then + echo "Unexpected snapshot filename format: $snapshot_filename" >&2 + exit 1 + fi +} + +function resolve_latest_snapshot_filename() { + local index_url="$1" + local filename_regex="$2" + local snapshot_filename + + snapshot_filename="$( + curl -fsS "$index_url" | python3 -c ' +import re +import sys + +filename_pattern = re.compile(sys.argv[1]) +timestamp_pattern = re.compile(r"-(\d+)\.tar(?:\.(?:zst|lz4))?$") +content = sys.stdin.read() + +matches = sorted( + {match.group(0) for match in filename_pattern.finditer(content)}, + key=lambda name: int(timestamp_pattern.search(name).group(1)), + reverse=True, +) + +if not matches: + print("No matching snapshot files found in index", file=sys.stderr) + sys.exit(1) + +print(matches[0]) +' "$filename_regex" + )" || { + echo "Failed to resolve latest snapshot from $index_url" >&2 + exit 1 + } + + validate_snapshot_filename "$snapshot_filename" + echo "$snapshot_filename" +} + # Exit early if we've already initialized. if [ -e "$INITIALIZED_FLAG" ]; then echo "Bedrock node already initialized" @@ -21,18 +71,25 @@ fi echo "Bedrock node needs to be initialized..." echo "Initializing via download..." -# Fix OP link with hardcoded official OP snapshot +# Resolve the latest archival geth datadir snapshot from the ChainSnap indexes. echo "Fetching download link..." if [ "$NODE_TYPE" = "archive" ]; then if [ "$NETWORK_NAME" = "ink-sepolia" ]; then - SNAPSHOT_FILENAME=$(curl -s https://storage.googleapis.com/raas-op-geth-snapshots-d2a56/datadir-archive/latest) - BEDROCK_TAR_DOWNLOAD="https://storage.googleapis.com/raas-op-geth-snapshots-d2a56/datadir-archive/$SNAPSHOT_FILENAME" + SNAPSHOT_FILENAME="$(resolve_latest_snapshot_filename \ + "https://ink.t.snapshots.gelato.cloud/index.html" \ + 'ink-sepolia-geth-archival-datadir-[0-9]+-[0-9]+\.tar(?:\.(?:zst|lz4))?')" + BEDROCK_TAR_DOWNLOAD="https://ink.t.snapshots.gelato.cloud/geth/archival/datadir/$SNAPSHOT_FILENAME" echo "Using snapshot file: $SNAPSHOT_FILENAME" elif [ "$NETWORK_NAME" = "ink-mainnet" ]; then - SNAPSHOT_FILENAME=$(curl -s https://storage.googleapis.com/raas-op-geth-snapshots-e2025/datadir-archive/latest) - BEDROCK_TAR_DOWNLOAD="https://storage.googleapis.com/raas-op-geth-snapshots-e2025/datadir-archive/$SNAPSHOT_FILENAME" + SNAPSHOT_FILENAME="$(resolve_latest_snapshot_filename \ + "https://ink.snapshots.gelato.cloud/index.html" \ + 'ink-geth-archival-datadir-[0-9]+-[0-9]+\.tar(?:\.(?:zst|lz4))?')" + BEDROCK_TAR_DOWNLOAD="https://ink.snapshots.gelato.cloud/geth/archival/datadir/$SNAPSHOT_FILENAME" echo "Using snapshot file: $SNAPSHOT_FILENAME" + else + echo "Unsupported archive network: $NETWORK_NAME" >&2 + exit 1 fi fi @@ -42,26 +99,34 @@ if [ -n "$BEDROCK_TAR_DOWNLOAD" ]; then elif [[ "$BEDROCK_TAR_DOWNLOAD" == *.lz4 ]]; then BEDROCK_TAR_PATH+=".lz4" fi + BEDROCK_TAR_CHECKSUM_PATH="${BEDROCK_TAR_PATH}.sha256" + + echo "Downloading bedrock.tar checksum..." + curl -fsS "$BEDROCK_TAR_DOWNLOAD.sha256" -o "$BEDROCK_TAR_CHECKSUM_PATH" echo "Downloading bedrock.tar..." - download $BEDROCK_TAR_DOWNLOAD $BEDROCK_TAR_PATH + download "$BEDROCK_TAR_DOWNLOAD" "$BEDROCK_TAR_PATH" + + echo "Verifying bedrock.tar checksum..." + verify_sha256_checksum "$BEDROCK_TAR_PATH" "$BEDROCK_TAR_CHECKSUM_PATH" "$SNAPSHOT_FILENAME" echo "Extracting bedrock.tar..." if [[ "$BEDROCK_TAR_DOWNLOAD" == *.zst ]]; then - extractzst $BEDROCK_TAR_PATH $GETH_DATA_DIR + extractzst "$BEDROCK_TAR_PATH" "$GETH_DATA_DIR" elif [[ "$BEDROCK_TAR_DOWNLOAD" == *.lz4 ]]; then - extractlz4 $BEDROCK_TAR_PATH $GETH_DATA_DIR + extractlz4 "$BEDROCK_TAR_PATH" "$GETH_DATA_DIR" else - extract $BEDROCK_TAR_PATH $GETH_DATA_DIR + extract "$BEDROCK_TAR_PATH" "$GETH_DATA_DIR" fi # Remove tar file to save disk space - rm $BEDROCK_TAR_PATH + rm "$BEDROCK_TAR_PATH" + rm "$BEDROCK_TAR_CHECKSUM_PATH" fi echo "Creating JWT..." -mkdir -p $(dirname $BEDROCK_JWT_PATH) -openssl rand -hex 32 > $BEDROCK_JWT_PATH +mkdir -p "$(dirname "$BEDROCK_JWT_PATH")" +openssl rand -hex 32 > "$BEDROCK_JWT_PATH" echo "Creating Bedrock flag..." -touch $INITIALIZED_FLAG +touch "$INITIALIZED_FLAG" diff --git a/scripts/utils.sh b/scripts/utils.sh index 9a73587..fa5293c 100755 --- a/scripts/utils.sh +++ b/scripts/utils.sh @@ -5,8 +5,8 @@ # arc: Archive to extract. # loc: Location to extract to. function extract() { - mkdir -p $2 - tar -xf $1 -C $2 + mkdir -p "$2" + tar -xf "$1" -C "$2" } # extractzst: Extracts a zst archive into an output location. @@ -14,8 +14,8 @@ function extract() { # arc: ZST archive to extract. # loc: Location to extract to. function extractzst() { - mkdir -p $2 - tar --use-compress-program=unzstd -xf $1 -C $2 + mkdir -p "$2" + tar --use-compress-program=unzstd -xf "$1" -C "$2" } # extractlz4: Extracts a lz4 archive into an output location. @@ -23,8 +23,8 @@ function extractzst() { # arc: lz4 archive to extract. # loc: Location to extract to. function extractlz4() { - mkdir -p $2 - tar --use-compress-program="lz4 --no-crc" -xf $1 -C $2 + mkdir -p "$2" + tar --use-compress-program="lz4 --no-crc" -xf "$1" -C "$2" } # download: Downloads a file and provides basic progress percentages. @@ -32,5 +32,45 @@ function extractlz4() { # url: URL of the file to download. # out: Location to download the file to. function download() { - aria2c --max-tries=0 -x 16 -s 16 -k100M -o $2 $1 + local out_dir out_name + out_dir="$(dirname "$2")" + out_name="$(basename "$2")" + + mkdir -p "$out_dir" + aria2c --max-tries=0 -x 16 -s 16 -k100M --dir="$out_dir" --out="$out_name" "$1" +} + +# verify_sha256_checksum: Verifies a downloaded file against a checksum file. +# Arguments: +# file_path: Path to the downloaded file. +# checksum_path: Path to the checksum file. +# expected_name: Expected archive filename from the checksum file. +function verify_sha256_checksum() { + local file_path="$1" + local checksum_path="$2" + local expected_name="$3" + local expected_hash checksum_name extra actual_hash + + read -r expected_hash checksum_name extra < "$checksum_path" + + if [[ -z "$expected_hash" || -z "$checksum_name" || -n "$extra" ]]; then + echo "Unexpected checksum file format: $checksum_path" >&2 + exit 1 + fi + + if [[ ! "$expected_hash" =~ ^[A-Fa-f0-9]{64}$ ]]; then + echo "Unexpected checksum value in: $checksum_path" >&2 + exit 1 + fi + + if [[ "$checksum_name" != "$expected_name" ]]; then + echo "Checksum file does not match downloaded archive: $checksum_path" >&2 + exit 1 + fi + + actual_hash="$(sha256sum "$file_path" | awk '{print $1}')" + if [[ "$actual_hash" != "$expected_hash" ]]; then + echo "SHA256 verification failed for: $file_path" >&2 + exit 1 + fi }