diff --git a/.env.example b/.env.example index 9120e45..6d98f06 100644 --- a/.env.example +++ b/.env.example @@ -5,8 +5,9 @@ # Recommended first run: ink-sepolia + full NETWORK_NAME=ink-sepolia -# "full" starts from an empty datadir. "archive" downloads a network snapshot -# during bedrock-init and needs much more disk. +# "full" starts from an empty datadir. This is the recommended first run. +# "archive" restores from a per-network snapshot pointer during bedrock-init +# and needs much more disk. If the snapshot lookup fails, switch back to full. NODE_TYPE=full # For a quick Sepolia smoke test, these public endpoints worked during docs @@ -48,5 +49,16 @@ PORT__INFLUXDB= PORT__OP_GETH_HTTP= PORT__OP_GETH_WS= PORT__OP_GETH_P2P= +# PORT__OP_NODE_P2P changes the published host port. The in-container +# op-node listener still uses 9003. PORT__OP_NODE_P2P= PORT__OP_NODE_HTTP= + +# Advanced wrapper inputs shared by the shell entrypoints. Leave blank unless +# you know you need them. +# OVERRIDE_HOLOCENE appends --override.holocene= in both wrappers. +OVERRIDE_HOLOCENE= + +# EXTENDED_ARG is appended verbatim to both start commands. Only use flags that +# are valid for the process you want to affect. +EXTENDED_ARG= diff --git a/README.md b/README.md index 39c4561..d4025f3 100644 --- a/README.md +++ b/README.md @@ -88,9 +88,11 @@ HEALTHCHECK__REFERENCE_RPC_PROVIDER=https://rpc-gel-sepolia.inkonchain.com Configuration notes: - `NETWORK_NAME`: `ink-sepolia` or `ink-mainnet` -- `NODE_TYPE=full`: starts from an empty local datadir -- `NODE_TYPE=archive`: downloads and extracts a network snapshot during - `bedrock-init` +- `NODE_TYPE=full`: starts from an empty local datadir. This is the validated + first-run path in this repo +- `NODE_TYPE=archive`: resolves the newest archival geth datadir from the + Gelato ChainSnap index for your network, downloads the matching `.sha256`, + verifies the archive, and extracts it during `bedrock-init` - `OP_NODE__RPC_TYPE=basic`: the right default for generic providers; use `alchemy`, `quicknode`, or `erigon` only when your provider requires it - `.env` overrides the same variable for services that load `.env` in @@ -98,8 +100,14 @@ Configuration notes: `bedrock-init` - `envs//op-node.env` already supplies the network P2P defaults, so most first-time setups only need the `.env` values above +- `PORT__OP_NODE_P2P` changes the published host port in `docker-compose.yml`. + The in-container `op-node` listener still uses `9003` - For `ink-mainnet`, switch the healthcheck reference RPC to `https://rpc-gel.inkonchain.com` +- Advanced wrapper inputs such as `OVERRIDE_HOLOCENE` and `EXTENDED_ARG` live + in `.env.example`. The shell entrypoints append them to both `op-geth` and + `op-node`, so leave them empty unless you know the flag is compatible with + the process you want to change ### 4. Start the stack @@ -110,6 +118,7 @@ docker compose up -d --build This pulls the service images, builds the local `bedrock-init` image, creates a JWT, and starts: +- `bedrock-init` (one-time init) - `op-geth` - `op-node` - `healthcheck` @@ -117,6 +126,10 @@ JWT, and starts: - `grafana` - `influxdb` +`op-geth` and `op-node` both wait for `bedrock-init` to create +`/shared/initialized.txt`. If the stack looks stuck, check `bedrock-init` +first. + ## Validate Startup ### Check service status @@ -157,6 +170,19 @@ Rollup node RPC: curl -fsS -X POST -H "Content-Type: application/json" --data '{"jsonrpc":"2.0","method":"rpc_modules","params":[],"id":1}' http://127.0.0.1:9545 ``` +On `ink-sepolia`, a healthy reply includes the `optimism`, `opp2p`, and +`health` modules. + +Sync status: + +```sh +curl -fsS -X POST -H "Content-Type: application/json" --data '{"jsonrpc":"2.0","method":"optimism_syncStatus","params":[],"id":1}' http://127.0.0.1:9545 +``` + +On a brand-new `full` node, this is the best early signal that the rollup node +is moving forward. Look for `current_l1` and `head_l1` values to advance even +while local L2 block height is still `0x0`. + Healthcheck metrics: ```sh @@ -164,8 +190,9 @@ curl -fsS http://127.0.0.1:7300/metrics | grep -E 'healthcheck_(reference_height ``` On a brand-new `full` node, `eth_blockNumber` can stay at `0x0` for a while. -That is expected. Use `optimism_syncStatus` and the healthcheck metrics to -confirm the node is moving forward during early sync. +That is expected. During that window it is also normal for +`healthcheck_target_height` to stay at `0`. Use `optimism_syncStatus` and the +healthcheck metrics to confirm the node is moving forward during early sync. ### Open Grafana @@ -226,6 +253,10 @@ This removes all local chain and monitoring data. `progress.sh` uses Foundry's `cast` on the host machine. +The `bedrock-init` container installs Foundry for its own image build, but that +does not make `cast` available on your host shell. Install Foundry locally if +you want to use `progress.sh`. + Install Foundry from [https://getfoundry.sh/](https://getfoundry.sh/) and then run: @@ -269,7 +300,26 @@ If image pulls or snapshot downloads fail, make sure the host can reach: - `docker.io` - `us-docker.pkg.dev` -- `storage.googleapis.com` +- `ink.t.snapshots.gelato.cloud` +- `ink.snapshots.gelato.cloud` + +Archive snapshots are resolved from these indexes: + +- Sepolia: [https://ink.t.snapshots.gelato.cloud/index.html](https://ink.t.snapshots.gelato.cloud/index.html) +- Mainnet: [https://ink.snapshots.gelato.cloud/index.html](https://ink.snapshots.gelato.cloud/index.html) + +`bedrock-init` downloads the matching `.sha256` file and verifies the archive +before extraction. + +If `bedrock-init` exits with `Failed to resolve latest snapshot` or +`Unexpected snapshot filename format`, the index is unreachable or its format +changed. Switch back to `NODE_TYPE=full` and retry, or pick a direct archive +from the index page and update the script before retrying. + +If `bedrock-init` exits with `Unexpected checksum file format`, +`Checksum file does not match downloaded archive`, or `SHA256 verification +failed`, do not reuse that download. Retry later or verify the checksum file +from the index page before attempting another restore. ### `eth_blockNumber` stays at `0x0` right after startup @@ -301,6 +351,14 @@ docker compose down docker compose up -d --build ``` +### `error dialing static peer` appears in `op-node` logs + +That can happen during early bootstrap if a configured static peer is +temporarily unavailable. If `optimism_syncStatus.current_l1` keeps advancing, +the node is still making progress. If those errors continue and `current_l1` +stops moving, inspect `envs//op-node.env` and your outbound network +access. + ### `Walking back L1Block` appears in the logs A few reset lines during first startup are normal. If the node keeps printing diff --git a/scripts/init-bedrock.sh b/scripts/init-bedrock.sh index f6c3d33..6b47cf0 100755 --- a/scripts/init-bedrock.sh +++ b/scripts/init-bedrock.sh @@ -10,8 +10,58 @@ BEDROCK_JWT_PATH=/shared/jwt.txt GETH_DATA_DIR=$BEDROCK_DATADIR TORRENTS_DIR=/torrents/$NETWORK_NAME BEDROCK_TAR_PATH=/downloads/bedrock.tar +BEDROCK_TAR_CHECKSUM_PATH= BEDROCK_TMP_PATH=/bedrock-tmp +function validate_snapshot_filename() { + local snapshot_filename="$1" + + if [[ -z "$snapshot_filename" ]]; then + echo "Snapshot filename lookup returned an empty response" >&2 + exit 1 + fi + + if [[ ! "$snapshot_filename" =~ ^[A-Za-z0-9._-]+\.tar(\.zst|\.lz4)?$ ]]; then + echo "Unexpected snapshot filename format: $snapshot_filename" >&2 + exit 1 + fi +} + +function resolve_latest_snapshot_filename() { + local index_url="$1" + local filename_regex="$2" + local snapshot_filename + + snapshot_filename="$( + curl -fsS "$index_url" | python3 -c ' +import re +import sys + +filename_pattern = re.compile(sys.argv[1]) +timestamp_pattern = re.compile(r"-(\d+)\.tar(?:\.(?:zst|lz4))?$") +content = sys.stdin.read() + +matches = sorted( + {match.group(0) for match in filename_pattern.finditer(content)}, + key=lambda name: int(timestamp_pattern.search(name).group(1)), + reverse=True, +) + +if not matches: + print("No matching snapshot files found in index", file=sys.stderr) + sys.exit(1) + +print(matches[0]) +' "$filename_regex" + )" || { + echo "Failed to resolve latest snapshot from $index_url" >&2 + exit 1 + } + + validate_snapshot_filename "$snapshot_filename" + echo "$snapshot_filename" +} + # Exit early if we've already initialized. if [ -e "$INITIALIZED_FLAG" ]; then echo "Bedrock node already initialized" @@ -21,18 +71,25 @@ fi echo "Bedrock node needs to be initialized..." echo "Initializing via download..." -# Fix OP link with hardcoded official OP snapshot +# Resolve the latest archival geth datadir snapshot from the ChainSnap indexes. echo "Fetching download link..." if [ "$NODE_TYPE" = "archive" ]; then if [ "$NETWORK_NAME" = "ink-sepolia" ]; then - SNAPSHOT_FILENAME=$(curl -s https://storage.googleapis.com/raas-op-geth-snapshots-d2a56/datadir-archive/latest) - BEDROCK_TAR_DOWNLOAD="https://storage.googleapis.com/raas-op-geth-snapshots-d2a56/datadir-archive/$SNAPSHOT_FILENAME" + SNAPSHOT_FILENAME="$(resolve_latest_snapshot_filename \ + "https://ink.t.snapshots.gelato.cloud/index.html" \ + 'ink-sepolia-geth-archival-datadir-[0-9]+-[0-9]+\.tar(?:\.(?:zst|lz4))?')" + BEDROCK_TAR_DOWNLOAD="https://ink.t.snapshots.gelato.cloud/geth/archival/datadir/$SNAPSHOT_FILENAME" echo "Using snapshot file: $SNAPSHOT_FILENAME" elif [ "$NETWORK_NAME" = "ink-mainnet" ]; then - SNAPSHOT_FILENAME=$(curl -s https://storage.googleapis.com/raas-op-geth-snapshots-e2025/datadir-archive/latest) - BEDROCK_TAR_DOWNLOAD="https://storage.googleapis.com/raas-op-geth-snapshots-e2025/datadir-archive/$SNAPSHOT_FILENAME" + SNAPSHOT_FILENAME="$(resolve_latest_snapshot_filename \ + "https://ink.snapshots.gelato.cloud/index.html" \ + 'ink-geth-archival-datadir-[0-9]+-[0-9]+\.tar(?:\.(?:zst|lz4))?')" + BEDROCK_TAR_DOWNLOAD="https://ink.snapshots.gelato.cloud/geth/archival/datadir/$SNAPSHOT_FILENAME" echo "Using snapshot file: $SNAPSHOT_FILENAME" + else + echo "Unsupported archive network: $NETWORK_NAME" >&2 + exit 1 fi fi @@ -42,26 +99,34 @@ if [ -n "$BEDROCK_TAR_DOWNLOAD" ]; then elif [[ "$BEDROCK_TAR_DOWNLOAD" == *.lz4 ]]; then BEDROCK_TAR_PATH+=".lz4" fi + BEDROCK_TAR_CHECKSUM_PATH="${BEDROCK_TAR_PATH}.sha256" + + echo "Downloading bedrock.tar checksum..." + curl -fsS "$BEDROCK_TAR_DOWNLOAD.sha256" -o "$BEDROCK_TAR_CHECKSUM_PATH" echo "Downloading bedrock.tar..." - download $BEDROCK_TAR_DOWNLOAD $BEDROCK_TAR_PATH + download "$BEDROCK_TAR_DOWNLOAD" "$BEDROCK_TAR_PATH" + + echo "Verifying bedrock.tar checksum..." + verify_sha256_checksum "$BEDROCK_TAR_PATH" "$BEDROCK_TAR_CHECKSUM_PATH" "$SNAPSHOT_FILENAME" echo "Extracting bedrock.tar..." if [[ "$BEDROCK_TAR_DOWNLOAD" == *.zst ]]; then - extractzst $BEDROCK_TAR_PATH $GETH_DATA_DIR + extractzst "$BEDROCK_TAR_PATH" "$GETH_DATA_DIR" elif [[ "$BEDROCK_TAR_DOWNLOAD" == *.lz4 ]]; then - extractlz4 $BEDROCK_TAR_PATH $GETH_DATA_DIR + extractlz4 "$BEDROCK_TAR_PATH" "$GETH_DATA_DIR" else - extract $BEDROCK_TAR_PATH $GETH_DATA_DIR + extract "$BEDROCK_TAR_PATH" "$GETH_DATA_DIR" fi # Remove tar file to save disk space - rm $BEDROCK_TAR_PATH + rm "$BEDROCK_TAR_PATH" + rm "$BEDROCK_TAR_CHECKSUM_PATH" fi echo "Creating JWT..." -mkdir -p $(dirname $BEDROCK_JWT_PATH) -openssl rand -hex 32 > $BEDROCK_JWT_PATH +mkdir -p "$(dirname "$BEDROCK_JWT_PATH")" +openssl rand -hex 32 > "$BEDROCK_JWT_PATH" echo "Creating Bedrock flag..." -touch $INITIALIZED_FLAG +touch "$INITIALIZED_FLAG" diff --git a/scripts/utils.sh b/scripts/utils.sh index 9a73587..fa5293c 100755 --- a/scripts/utils.sh +++ b/scripts/utils.sh @@ -5,8 +5,8 @@ # arc: Archive to extract. # loc: Location to extract to. function extract() { - mkdir -p $2 - tar -xf $1 -C $2 + mkdir -p "$2" + tar -xf "$1" -C "$2" } # extractzst: Extracts a zst archive into an output location. @@ -14,8 +14,8 @@ function extract() { # arc: ZST archive to extract. # loc: Location to extract to. function extractzst() { - mkdir -p $2 - tar --use-compress-program=unzstd -xf $1 -C $2 + mkdir -p "$2" + tar --use-compress-program=unzstd -xf "$1" -C "$2" } # extractlz4: Extracts a lz4 archive into an output location. @@ -23,8 +23,8 @@ function extractzst() { # arc: lz4 archive to extract. # loc: Location to extract to. function extractlz4() { - mkdir -p $2 - tar --use-compress-program="lz4 --no-crc" -xf $1 -C $2 + mkdir -p "$2" + tar --use-compress-program="lz4 --no-crc" -xf "$1" -C "$2" } # download: Downloads a file and provides basic progress percentages. @@ -32,5 +32,45 @@ function extractlz4() { # url: URL of the file to download. # out: Location to download the file to. function download() { - aria2c --max-tries=0 -x 16 -s 16 -k100M -o $2 $1 + local out_dir out_name + out_dir="$(dirname "$2")" + out_name="$(basename "$2")" + + mkdir -p "$out_dir" + aria2c --max-tries=0 -x 16 -s 16 -k100M --dir="$out_dir" --out="$out_name" "$1" +} + +# verify_sha256_checksum: Verifies a downloaded file against a checksum file. +# Arguments: +# file_path: Path to the downloaded file. +# checksum_path: Path to the checksum file. +# expected_name: Expected archive filename from the checksum file. +function verify_sha256_checksum() { + local file_path="$1" + local checksum_path="$2" + local expected_name="$3" + local expected_hash checksum_name extra actual_hash + + read -r expected_hash checksum_name extra < "$checksum_path" + + if [[ -z "$expected_hash" || -z "$checksum_name" || -n "$extra" ]]; then + echo "Unexpected checksum file format: $checksum_path" >&2 + exit 1 + fi + + if [[ ! "$expected_hash" =~ ^[A-Fa-f0-9]{64}$ ]]; then + echo "Unexpected checksum value in: $checksum_path" >&2 + exit 1 + fi + + if [[ "$checksum_name" != "$expected_name" ]]; then + echo "Checksum file does not match downloaded archive: $checksum_path" >&2 + exit 1 + fi + + actual_hash="$(sha256sum "$file_path" | awk '{print $1}')" + if [[ "$actual_hash" != "$expected_hash" ]]; then + echo "SHA256 verification failed for: $file_path" >&2 + exit 1 + fi }