diff --git a/website/docs/xdcchain/developers/subgraphs/performance.md b/website/docs/xdcchain/developers/subgraphs/performance.md new file mode 100644 index 00000000..f3f7caeb --- /dev/null +++ b/website/docs/xdcchain/developers/subgraphs/performance.md @@ -0,0 +1,82 @@ +--- +title: Performance +--- + +## Example Subgraphs + +### XRC20 Token Tracker + +A complete subgraph for indexing any XRC20 token with holder balances and transfer history. + +**Schema:** + +```graphql +type Token @entity { + id: ID! + symbol: String! + name: String! + decimals: Int! + totalSupply: BigInt! + transferCount: BigInt! + holderCount: BigInt! +} + +type Account @entity { + id: ID! + balances: [TokenBalance!]! @derivedFrom(field: "account") +} + +type TokenBalance @entity { + id: ID! + token: Token! + account: Account! + balance: BigInt! +} + +type Transfer @entity { + id: ID! + token: Token! + from: Account! + to: Account! + value: BigInt! + timestamp: BigInt! + blockNumber: BigInt! +} +``` + +**Mapping:** + +```typescript +import { Transfer } from "../generated/XRC20/XRC20"; +import { Token, Account, TokenBalance, Transfer as TransferEntity } from "../generated/schema"; + +export function handleTransfer(event: Transfer): void { + // Implementation as shown in previous sections +} +``` + +### NFT Marketplace Indexer + +Index NFT mints, transfers, sales, and marketplace listings. + +**Key Entities:** + +- `Collection`: NFT contract metadata +- `Token`: Individual NFT with metadata +- `Transfer`: Ownership changes +- `Sale`: Marketplace transactions with price +- `Listing`: Active marketplace listings + +### DeFi Protocol Tracker + +Index liquidity pools, swaps, deposits, and yield farming positions. + +**Key Entities:** + +- `Pool`: Liquidity pool with reserves +- `Swap`: Token exchange transactions +- `LiquidityPosition`: User LP token balances +- `YieldPosition`: Staking and farming positions + +--- + diff --git a/website/docs/xdcchain/developers/subgraphs/querying.md b/website/docs/xdcchain/developers/subgraphs/querying.md new file mode 100644 index 00000000..4c17fe4a --- /dev/null +++ b/website/docs/xdcchain/developers/subgraphs/querying.md @@ -0,0 +1,176 @@ +--- +title: Querying +--- + +## Querying Subgraphs + +### GraphQL Queries + +Subgraphs expose a GraphQL endpoint for flexible data retrieval. + +**Query Structure:** + +```graphql +query GetTransfers($first: Int!, $skip: Int!) { + transfers( + first: $first + skip: $skip + orderBy: timestamp + orderDirection: desc + ) { + id + from { + id + } + to { + id + } + value + timestamp + transactionHash + } +} +``` + +**Variables:** + +```json +{ + "first": 10, + "skip": 0 +} +``` + +### Pagination + +Use `first` and `skip` for offset-based pagination: + +```graphql +query PaginatedTransfers($first: Int!, $skip: Int!) { + transfers(first: $first, skip: $skip) { + id + value + } +} +``` + +**Best Practice:** Limit `first` to 1000 maximum. For large datasets, use cursor-based pagination with `id` filtering: + +```graphql +query CursorPagination($lastId: ID!) { + transfers( + first: 1000 + where: { id_gt: $lastId } + orderBy: id + orderDirection: asc + ) { + id + value + } +} +``` + +### Filtering + +Apply `where` clauses for precise data selection: + +```graphql +query FilteredTransfers { + transfers( + where: { + value_gt: "1000000000000000000" + timestamp_gt: "1700000000" + } + ) { + id + from { + id + } + to { + id + } + value + } +} +``` + +**Available Operators:** + +| Operator | Description | +|----------|-------------| +| `_eq` | Equal | +| `_gt` | Greater than | +| `_lt` | Less than | +| `_gte` | Greater than or equal | +| `_lte` | Less than or equal | +| `_in` | In array | +| `_not_in` | Not in array | +| `_contains` | Contains substring | +| `_starts_with` | Starts with | +| `_ends_with` | Ends with | + +### Sorting + +Order results with `orderBy` and `orderDirection`: + +```graphql +query SortedTransfers { + transfers( + orderBy: timestamp + orderDirection: desc + first: 50 + ) { + id + timestamp + value + } +} +``` + +### Time-Travel Queries + +Query historical state at a specific block: + +```graphql +query HistoricalState { + token(id: "0x...", block: { number: 50000000 }) { + totalSupply + holderCount + } +} +``` + +### Real-Time Subscriptions + +WebSocket subscriptions for live data updates: + +```javascript +import { createClient } from 'graphql-ws'; + +const client = createClient({ + url: 'wss://api.thegraph.com/subgraphs/name/username/xdc-token-indexer' +}); + +const subscription = client.subscribe( + { + query: ` + subscription OnNewTransfer { + transfers(orderBy: timestamp, orderDirection: desc, first: 1) { + id + from { id } + to { id } + value + } + } + ` + }, + { + next: (data) => console.log('New transfer:', data), + error: (err) => console.error(err), + complete: () => console.log('Done') + } +); +``` + +--- + diff --git a/website/docs/xdcchain/developers/usdc-quickstart.md b/website/docs/xdcchain/developers/usdc-quickstart.md new file mode 100644 index 00000000..427b48ee --- /dev/null +++ b/website/docs/xdcchain/developers/usdc-quickstart.md @@ -0,0 +1,207 @@ +--- +title: USDC on XDC QuickStart Guide +--- + +# USDC on XDC QuickStart Guide + +## Overview + +USDC is a digital dollar issued by Circle, also known as a stablecoin, and is available on many of the world’s leading blockchains. Designed to represent US dollars on the internet, USDC is backed 100% by highly liquid cash and cash-equivalent assets, making it redeemable 1:1 for USD. + +On the XDC Network, USDC behaves like any standard ERC-20 token — enabling fast, secure, and programmable digital dollar transactions. + +This guide walks you through building a standalone `index.js` script using **Viem** and **Node.js** to check your USDC balance and send a test transfer to another address on the **XDC Apothem Testnet**. + +--- + +## Prerequisites + +- Node.js v18+ with `"type": "module"` in `package.json` +- `viem` and `dotenv` packages installed +- An XDC Apothem testnet wallet funded with testnet USDC and XDC (for gas fees) +- A `.env` file with: + - `PRIVATE_KEY` + - `RECIPIENT_ADDRESS` + +> To get testnet USDC, use Circle’s CCTP v2 Sample App to transfer USDC cross-chain to your XDC wallet. + +--- + +## Project Setup + +### 1. Initialize Project & Install Dependencies + +```bash +npm init -y +npm install viem dotenv +``` + +### 2. Create Environment File + +In your project root, create a `.env` file and add: + +```env +PRIVATE_KEY= # Must be 0x-prefixed 64-character hex +RECIPIENT_ADDRESS=0x +``` + +### 3. Create Your Script File + +In the same directory, create a file named `index.js`. + +Ensure `"type": "module"` is set in your `package.json`. + +--- + +## Script Breakdown + +### 1. Import Modules & Define USDC Constants + +```js +import 'dotenv/config'; +import { createPublicClient, createWalletClient, http, formatUnits, parseUnits } from 'viem'; +import { privateKeyToAccount } from 'viem/accounts'; +import { xdcTestnet } from 'viem/chains'; + +const USDC_ADDRESS = '0xb5AB69F7bBada22B28e79C8FFAECe55eF1c771D4'; +const USDC_DECIMALS = 6; +const USDC_ABI = [ + { + name: 'balanceOf', + type: 'function', + stateMutability: 'view', + inputs: [{ name: 'account', type: 'address' }], + outputs: [{ name: '', type: 'uint256' }], + }, + { + name: 'transfer', + type: 'function', + stateMutability: 'nonpayable', + inputs: [ + { name: 'to', type: 'address' }, + { name: 'amount', type: 'uint256' }, + ], + outputs: [{ name: '', type: 'bool' }], + }, +]; +``` + +--- + +### 2. Load & Validate Environment Variables + +```js +const PRIVATE_KEY_RAW = process.env.PRIVATE_KEY; +const RECIPIENT = process.env.RECIPIENT_ADDRESS || process.env.RECIPIENT; + +if (!PRIVATE_KEY_RAW) { + console.error('Error: Set PRIVATE_KEY in your .env file'); + process.exit(1); +} +if (!RECIPIENT) { + console.error('Error: Set RECIPIENT_ADDRESS or RECIPIENT in your .env file'); + process.exit(1); +} +if (!/^0x[a-fA-F0-9]{40}$/.test(RECIPIENT)) { + console.error('Error: Recipient address is not a valid Ethereum address'); + process.exit(1); +} + +const PRIVATE_KEY = PRIVATE_KEY_RAW.startsWith('0x') ? PRIVATE_KEY_RAW : '0x' + PRIVATE_KEY_RAW; +``` + +--- + +### 3. Initialize Viem Clients + +```js +const account = privateKeyToAccount(PRIVATE_KEY); +const publicClient = createPublicClient({ chain: xdcTestnet, transport: http() }); +const walletClient = createWalletClient({ account, chain: xdcTestnet, transport: http() }); +``` + +--- + +### 4. Main Transfer Logic + +```js +(async () => { + try { + const balance = await publicClient.readContract({ + address: USDC_ADDRESS, + abi: USDC_ABI, + functionName: 'balanceOf', + args: [account.address], + }); + + const balanceFormatted = Number(formatUnits(balance, USDC_DECIMALS)); + const amount = 10; + + console.log('Sender:', account.address); + console.log('Recipient:', RECIPIENT); + console.log('USDC balance:', balanceFormatted); + + if (amount > balanceFormatted) { + console.error('Error: Insufficient USDC balance'); + process.exit(1); + } + + const amountInDecimals = parseUnits(amount.toString(), USDC_DECIMALS); + + const hash = await walletClient.writeContract({ + address: USDC_ADDRESS, + abi: USDC_ABI, + functionName: 'transfer', + args: [RECIPIENT, amountInDecimals], + }); + + console.log('Transfer successful!'); + console.log('Tx hash:', hash); + console.log('Explorer:', `https://testnet.xdcscan.com/tx/${hash}`); + } catch (err) { + console.error('Transfer failed:', err.message || err); + process.exit(1); + } + + process.exit(0); +})(); +``` + +--- + +## Run the Script + +Use the following command: + +```bash +node index.js +``` + +If successful, you’ll see output like: + +``` +Sender: 0x1A2b...7890 +Recipient: 0x9F8f...1234 +USDC balance: 250.0 +Transfer successful! +Tx hash: 0xabc123...def456 +Explorer: https://testnet.xdcscan.com/tx/0xabc123...def456 +``` + +--- + +## Important Notes + +* **Testnet Only**: USDC on Apothem has no real value. +* **Security**: Never commit your `.env` file. Treat private keys as sensitive. +* **Gas Fees**: Get free XDC from the [XDC Faucet](https://faucet.apothem.network/). +* **Lightweight ABI**: Only the necessary functions (`balanceOf`, `transfer`) are used. +* **Viem Behavior**: Viem auto-handles RPC interaction, account signing, and encoding/decoding. + +--- + +## Learn More + +Explore the full Circle USDC integration guide in the [Circle Developer Docs](https://developers.circle.com). + + diff --git a/website/docs/xdcchain/developers/wallet-configuration.md b/website/docs/xdcchain/developers/wallet-configuration.md new file mode 100644 index 00000000..3f039508 --- /dev/null +++ b/website/docs/xdcchain/developers/wallet-configuration.md @@ -0,0 +1,315 @@ +--- +title: "Wallet Configuration — XDC Network +description: Step-by-step wallet setup guides for MetaMask, Trust Wallet, Ledger, Trezor, TokenPocket, and XDCPay on XDC Network." +--- + +# Wallet Configuration + +XDC Network is fully EVM-compatible, so any Ethereum wallet works. This guide covers detailed setup for the most popular wallets including MetaMask, Trust Wallet, Ledger, Trezor, TokenPocket, and XDCPay. + +**Quick Network Parameters:** + +| Parameter | XDC Mainnet | XDC Apothem Testnet | +|-----------|-------------|---------------------| +| Network Name | XDC Mainnet | XDC Apothem Testnet | +| RPC URL | `https://erpc.xinfin.network` | `https://rpc.apothem.network` | +| Chain ID | `50` | `51` | +| Currency Symbol | XDC | TXDC | +| Block Explorer | `https://xdcscan.com` | `https://testnet.xdcscan.com` | + +--- + +## MetaMask + +MetaMask is the most popular EVM wallet with over 30 million users. It supports XDC natively. + +### Option 1: Automatic Setup (Recommended) + +1. Open MetaMask and click the **network dropdown** at the top center +2. Click **"Add Network"** +3. Search for **"XDC"** in the network list +4. Select **XDC Mainnet** or **XDC Apothem Testnet** +5. Click **"Approve"** + +> 💡 **Tip:** If XDC doesn't appear in the search, use manual setup below. + +### Option 2: Manual Configuration + +1. Open MetaMask → Click the **network dropdown** → Select **"Add Network"** +2. Choose **"Add a network manually"** +3. Fill in the parameters from the table above +4. Click **"Save"** + +### Option 3: One-Click Add + +Click these buttons to add XDC networks automatically: + +**[Add XDC Mainnet]** | **[Add XDC Apothem Testnet]** + +### Adding XDC Tokens to MetaMask + +After adding the network, import XDC tokens: + +1. In MetaMask, click **"Import tokens"** +2. Paste the token contract address: + - Mainnet XDC: `0x0000000000000000000000000000000000000000` (native) + - Wrapped XDC (WXDC): `0x951857744785e80e2de051c32ee7b25f9c458c42` +3. Token symbol and decimals auto-fill +4. Click **"Add custom token"** + +### MetaMask Mobile Setup + +1. Download MetaMask from [App Store](https://apps.apple.com/app/metamask/id1438144202) or [Google Play](https://play.google.com/store/apps/details?id=io.metamask) +2. Create or import a wallet +3. Tap the **hamburger menu (☰)** → **Settings** → **Networks** → **Add Network** +4. Enter XDC Mainnet parameters from the table above +5. Tap **"Add"** + +--- + +## Trust Wallet + +Trust Wallet is a mobile-first wallet with built-in dApp browser and staking support. + +### Mobile Setup + +1. Download Trust Wallet from [App Store](https://apps.apple.com/app/trust-crypto-bitcoin-wallet/id1288339409) or [Google Play](https://play.google.com/store/apps/details?id=com.wallet.crypto.trustapp) +2. Create a new wallet or import an existing one +3. Tap the **settings icon (⚙️)** at the top right +4. Select **"Manage Crypto"** → **"Add Custom Token"** +5. Select **"XDC Network"** (or add manually if not listed) +6. Enter network parameters: + - Network: XDC + - RPC URL: `https://erpc.xinfin.network` + - Chain ID: `50` + - Symbol: XDC + - Explorer: `https://xdcscan.com` +7. Tap **"Save"** + +### Adding Custom Tokens + +1. In Trust Wallet, tap the **"+"** icon on the main screen +2. Search for **"XDC"** +3. If not found, tap **"Add Custom Token"** +4. Enter contract address and details +5. Toggle the token to show it on your main screen + +--- + +## Ledger Hardware Wallet + +Ledger provides cold storage for maximum security. + +### Prerequisites + +- Ledger Nano S, Nano S Plus, or Nano X +- Ledger Live installed ([ledger.com/start](https://www.ledger.com/start)) +- USB cable or Bluetooth (Nano X) + +### Setup Steps + +1. **Install Ethereum App** + - Open Ledger Live → **Manager** + - Connect and unlock your Ledger + - Search for **"Ethereum (ETH)"** + - Click **Install** + +2. **Connect to MetaMask** + - Open MetaMask → Click the **account icon** → **"Connect Hardware Wallet"** + - Select **Ledger** → Click **"Continue"** + - Connect your Ledger via USB + - Select the account you want to use → Click **"Unlock"** + +3. **Add XDC Network to MetaMask** + - Follow the MetaMask setup above + - Your Ledger account now works with XDC + +4. **Confirm Transactions on Ledger** + - When sending transactions, MetaMask will prompt your Ledger + - Review the transaction details on your Ledger screen + - Press both buttons to confirm + +> ⚠️ **Security:** Never enter your Ledger recovery phrase anywhere except the Ledger device itself. + +--- + +## Trezor Hardware Wallet + +Trezor is another secure hardware wallet option. + +### Prerequisites + +- Trezor Model One or Model T +- Trezor Suite installed ([suite.trezor.io](https://suite.trezor.io)) + +### Setup Steps + +1. **Connect Trezor to MetaMask** + - Open MetaMask → Click the **account icon** → **"Connect Hardware Wallet"** + - Select **Trezor** → Click **"Continue"** + - Connect your Trezor via USB + - Follow the Trezor Suite prompts to authorize + - Select the account → Click **"Unlock"** + +2. **Add XDC Network** + - Follow MetaMask setup above + - Your Trezor account now works with XDC + +3. **Confirm Transactions** + - Review transaction on your Trezor screen + - Confirm with the touchscreen (Model T) or buttons (Model One) + +--- + +## TokenPocket + +TokenPocket is a multi-chain wallet popular in Asia with built-in DeFi and NFT support. + +### Setup Steps + +1. Download TokenPocket from [tokenpocket.pro](https://tokenpocket.pro) +2. Create or import a wallet +3. Tap **"Me"** → **"Node Settings"** → **"Custom"** +4. Add XDC network: + - Name: XDC Mainnet + - RPC: `https://erpc.xinfin.network` + - Chain ID: `50` + - Symbol: XDC + - Explorer: `https://xdcscan.com` +5. Tap **"Save"** + +--- + +## XDCPay (XDC-Native Wallet) + +XDCPay is the official XDC browser extension wallet, similar to MetaMask but XDC-native. + +### Setup Steps + +1. Install XDCPay from the [Chrome Web Store](https://chrome.google.com/webstore) +2. Click the XDCPay icon in your browser toolbar +3. Create a new wallet or import with seed phrase +4. XDCPay comes with XDC Mainnet and Apothem Testnet **pre-configured** +5. No manual network setup required + +### Features + +- Native XDC address format (`xdc...`) +- Built-in XDC token support +- Direct integration with XDC dApps +- One-click network switching + +--- + +## Wallet Comparison + +| Wallet | Type | Best For | XDC Native | Hardware Support | Mobile | +|--------|------|----------|------------|------------------|--------| +| **MetaMask** | Browser/Mobile | Most users, DeFi | No | Ledger, Trezor | Yes | +| **Trust Wallet** | Mobile | Mobile-first users | No | No | Yes | +| **Ledger** | Hardware | Maximum security | Via MetaMask | Self | No | +| **Trezor** | Hardware | Maximum security | Via MetaMask | Self | No | +| **TokenPocket** | Mobile | Multi-chain, Asia | No | No | Yes | +| **XDCPay** | Browser | XDC-native dApps | Yes | No | No | + +--- + +## Address Format + +XDC supports two address formats: + +| Format | Example | Usage | +|--------|---------|-------| +| `0x` | `0x71C7656EC7ab88b098defB751B7401B5f6d8976F` | MetaMask, EVM wallets | +| `xdc` | `xdc71C7656EC7ab88b098defB751B7401B5f6d8976F` | XDC-native display | + +**Convert between formats:** + +```javascript +// xdc to 0x +const ethAddress = xdcAddress.replace('xdc', '0x'); + +// 0x to xdc +const xdcAddress = ethAddress.replace('0x', 'xdc'); +``` + +> 💡 **Tip:** MetaMask uses `0x` format internally. XDCScan shows `xdc` format. Both refer to the same account. + +--- + +## Finding the Best RPC Endpoint + +For optimal performance based on your location: + +1. Visit [Chainlist.org](https://chainlist.org/?search=xdc) or [XDC RPC](https://xdcrpc.com/) +2. Test RPC latency for your region +3. Choose the endpoint with the lowest latency + +**Alternative RPC URLs:** + +| Provider | Mainnet URL | Testnet URL | +|----------|-------------|-------------| +| XDC Official | `https://erpc.xinfin.network` | `https://rpc.apothem.network` | +| XDC Official (alt) | `https://rpc.xdc.org` | — | +| Ankr | `https://rpc.ankr.com/xdc` | — | +| BlocksScan | `https://rpc.xdc.blocksscan.io` | `https://rpc.apothem.blocksscan.io` | + +--- + +## Troubleshooting + +| Problem | Solution | +|---------|----------| +| "Invalid chain ID" | Ensure Chain ID is `50` (mainnet) or `51` (testnet) | +| "Could not fetch chain ID" | Try a different RPC URL from the list above | +| Transaction pending forever | Increase gas price to at least 0.25 Gwei | +| "Insufficient funds" | Ensure you have XDC for gas. Get test XDC from [faucet](https://faucet.apothem.network) | +| Address not recognized | Convert between `0x` and `xdc` formats | +| Ledger not detected | Ensure Ledger Live is closed (it can lock the USB connection) | +| MetaMask won't connect | Check that you're on XDC network, not Ethereum | +| Token not showing | Click "Import token" and enter the contract address | +| Mobile wallet crashes | Update to the latest app version | + +### Gas Price Issues + +XDC has very low gas fees (~0.25 Gwei). If transactions fail: + +1. In MetaMask, click **"Edit"** next to gas fees +2. Set **Max base fee** to at least `0.25 Gwei` +3. Set **Priority fee** to `0` +4. Click **"Save"** + +### Reset MetaMask Account + +If nonce errors occur: + +1. MetaMask → **Settings** → **Advanced** +2. Click **"Clear activity tab data"** +3. Click **"Reset account"** (this only clears local history, not your funds) + +--- + +## Security Best Practices + +1. **Never share your seed phrase** with anyone or any website +2. **Use hardware wallets** for large amounts +3. **Verify contract addresses** before interacting with dApps +4. **Enable 2FA** on mobile wallets where available +5. **Bookmark official sites** to avoid phishing +6. **Test on Apothem testnet** before mainnet transactions +7. **Keep wallet software updated** to latest versions + +--- + +## 🚀 Next Steps + +Now that your wallet is configured: + +1. **[Quick Start Guide →](./quick-guide.md)** — Deploy your first contract (⏱️ 5 min) +2. **[Get Test XDC](https://faucet.apothem.network)** — Fund your wallet on Apothem testnet +3. **[Smart Contract Setup →](/smartcontract/setup)** — Set up your dev environment (⏱️ 15 min) + +Or explore: +- **[RPC Endpoints →](./rpc.md)** — Connect to the network +- **[Developer Tools](https://xinfin.org/quick-tools-guide)** — Official XDC developer resources +- **[XDC Chain Overview →](../index.md)** — Learn about the network diff --git a/website/docs/xdcchain/developers/xdposchain_privatenet.md b/website/docs/xdcchain/developers/xdposchain_privatenet.md new file mode 100644 index 00000000..953f4299 --- /dev/null +++ b/website/docs/xdcchain/developers/xdposchain_privatenet.md @@ -0,0 +1,44 @@ +--- +title: Deploy Private Blockchain With XDC2.0 Consensus +--- + + +# Deploy Private Blockchain With XDC2.0 Consensus + +## Deploy With Wizard UI + + 1. Pull `start.sh` script from the generator Github repo and run. This will start a local webserver. + ``` + curl -O https://raw.githubusercontent.com/XinFinOrg/Subnet-Deployment/v2.1.0/container-manager/start_xdpos.sh + chmod +x start_xdpos.sh + ./start_xdpos.sh + ``` + + 2. Go to [http://localhost:5210/gen_xdpos](http://localhost:5210/gen_xdpos) in your browser. +
+ If you are running this on a remote server. +

+ - if this is running on your server, first use ssh tunnel: ssh -N -L localhost:5210:localhost:5210 USERNAME@IP_ADDRESS -i SERVER_KEY_FILE +
+ - if you are using VSCode Remote Explorer, ssh tunnel will be available by default +

+
+ + 3. Input your desired configuration. + ![Config](../img/privatenet_configs.png) + + 4. Submit and continue in your terminal with the below command. + ``` + cd generated; + ./docker-up.sh machine1; + ``` + ![Success](../img/privatenet_generate_success.png) + 5. Confirm the private blockchain is working correcly by running check scripts. + ``` + ./scripts/check-peer.sh + ``` + ![Check Peer](../img/privatenet_check_peer.png) + ``` + ./scripts/check-mining.sh + ``` + ![Check Mining](../img/privatenet_check_mining.png) \ No newline at end of file diff --git a/website/docs/xdcchain/devops/backup/index.md b/website/docs/xdcchain/devops/backup/index.md new file mode 100644 index 00000000..0f6ad2f3 --- /dev/null +++ b/website/docs/xdcchain/devops/backup/index.md @@ -0,0 +1,680 @@ +--- +title: "Node Backup, Recovery, and Failover +description: Comprehensive backup strategies, disaster recovery procedures, and high-availability failover configurations for XDC masternodes, standby nodes, and full nodes." +--- + +# Node Backup, Recovery, and Failover + +This guide covers backup strategies, recovery procedures, and failover architectures for XDC Network nodes. Proper backup and recovery planning prevents data loss, minimizes downtime, and protects validators from slashing during outages. + +## Table of Contents + +1. [What to Backup](#what-to-backup) +2. [Backup Strategies](#backup-strategies) +3. [Automated Backup Scripts](#automated-backup-scripts) +4. [Recovery Procedures](#recovery-procedures) +5. [Failover Architecture](#failover-architecture) +6. [Disaster Recovery Plan](#disaster-recovery-plan) +7. [Testing Your Backups](#testing-your-backups) +8. [Security Considerations](#security-considerations) + +--- + +## What to Backup + +### Critical Data Hierarchy + +| Priority | Data | Location | Size | Backup Frequency | +|----------|------|----------|------|-----------------| +| Critical | Keystore + Private Keys | `xdcchain/keystore/`, `xdcchain/coinbase.txt` | < 1 MB | Every change + daily | +| Critical | Nodekey | `xdcchain/XDC/nodekey` | 64 bytes | Every change + daily | +| High | Chain Data | `xdcchain/XDC/` | 1-4 TB | Daily incremental | +| Medium | Configuration | `.env`, `docker-compose.yml` | < 1 MB | Every change | +| Low | Logs | `xdcchain/logs/` | GBs | Optional | + +### What NOT to Backup + +- `xdcchain/XDC/nodekey` on full nodes (can be regenerated) +- Temporary files: `transactions.rlp`, `LOCK`, `tmp/` directories +- Cache directories that rebuild automatically + +--- + +## Backup Strategies + +### Strategy 1: Volume Snapshots (Cloud Native) + +Best for: Kubernetes, cloud VMs with volume snapshot support + +**AWS EBS Snapshot:** + +```bash +# Create snapshot +aws ec2 create-snapshot \ + --volume-id vol-1234567890abcdef0 \ + --description "XDC masternode backup $(date +%Y%m%d-%H%M)" + +# Tag for retention +aws ec2 create-tags \ + --resources snap-1234567890abcdef0 \ + --tags Key=Name,Value=xdc-masternode Key=Retention,Value=30days +``` + +**Automated with Lifecycle Manager:** + +```bash +aws dlm create-lifecycle-policy \ + --execution-role-arn arn:aws:iam::ACCOUNT:role/AWSDataLifecycleManagerDefaultRole \ + --description "XDC Node Daily Snapshots" \ + --state ENABLED \ + --policy-details file://snapshot-policy.json +``` + +`snapshot-policy.json`: + +```json +{ + "PolicyType": "EBS_SNAPSHOT_MANAGEMENT", + "ResourceTypes": ["VOLUME"], + "TargetTags": [{"Key": "Name", "Value": "xdc-masternode"}], + "Schedules": [{ + "Name": "Daily", + "TagsToAdd": [{"Key": "BackupType", "Value": "Daily"}], + "CreateRule": {"Interval": 24, "IntervalUnit": "HOURS", "Times": ["02:00"]}, + "RetainRule": {"Count": 14} + }] +} +``` + +### Strategy 2: Rsync to Remote Storage + +Best for: Bare metal, on-premise, cross-region redundancy + +```bash +#!/bin/bash +# /opt/xdc/scripts/backup-chain.sh + +set -euo pipefail + +BACKUP_DIR="/backup/xdc/$(date +%Y%m%d)" +REMOTE="backup-server:/backups/xdc" +CHAIN_DIR="/opt/xdc/xdcchain/XDC" +RETENTION_DAYS=7 + +# Create local backup +mkdir -p "$BACKUP_DIR" +rsync -a --delete --exclude='nodekey' --exclude='LOCK' \ + "$CHAIN_DIR/" "$BACKUP_DIR/chain/" + +# Sync to remote +rsync -az --delete "$BACKUP_DIR/" "$REMOTE/$(date +%Y%m%d)/" + +# Clean old backups +find /backup/xdc -maxdepth 1 -type d -mtime +$RETENTION_DAYS -exec rm -rf {} + +ssh backup-server "find /backups/xdc -maxdepth 1 -type d -mtime +$RETENTION_DAYS -exec rm -rf {} +" + +echo "Backup completed: $BACKUP_DIR" +``` + +Add to crontab: + +```bash +0 2 * * * /opt/xdc/scripts/backup-chain.sh >> /var/log/xdc-backup.log 2>&1 +``` + +### Strategy 3: S3 with Object Lock + +Best for: Immutable backups, compliance requirements + +```bash +#!/bin/bash +# backup-to-s3.sh + +BUCKET="xdc-backups" +PREFIX="mainnet/masternode-01" +CHAIN_DIR="/opt/xdc/xdcchain/XDC" + +# Sync chain data with glacier transition +aws s3 sync "$CHAIN_DIR" "s3://$BUCKET/$PREFIX/chain/$(date +%Y%m%d)/" \ + --storage-class STANDARD_IA \ + --exclude="nodekey" \ + --exclude="LOCK" \ + --exclude="*.tmp" + +# Backup keys separately with encryption +aws s3 cp /opt/xdc/xdcchain/keystore/ "s3://$BUCKET/$PREFIX/keystore/" \ + --recursive \ + --server-side-encryption AES256 + +aws s3 cp /opt/xdc/xdcchain/coinbase.txt "s3://$BUCKET/$PREFIX/coinbase.txt" \ + --server-side-encryption AES256 +``` + +### Strategy 4: Kubernetes Volume Snapshots + +Best for: Containerized deployments + +```yaml +apiVersion: snapshot.storage.k8s.io/v1 +kind: VolumeSnapshot +metadata: + name: xdc-masternode-backup-daily + namespace: xdc-network +spec: + volumeSnapshotClassName: csi-aws-vsc + source: + persistentVolumeClaimName: xdc-data-xdc-masternode-0 +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: xdc-snapshot-creator + namespace: xdc-network +spec: + schedule: "0 2 * * *" + jobTemplate: + spec: + template: + spec: + containers: + - name: snapshot + image: bitnami/kubectl:latest + command: + - /bin/sh + - -c + - | + kubectl create volumesnapshot xdc-masternode-backup-$(date +%Y%m%d) \ + --namespace=xdc-network \ + --volumesnapshotclass=csi-aws-vsc \ + --source=xdc-data-xdc-masternode-0 + restartPolicy: OnFailure +``` + +--- + +## Automated Backup Scripts + +### Complete Backup Script + +```bash +#!/bin/bash +# xdc-backup.sh - Comprehensive XDC node backup + +set -euo pipefail + +# Configuration +NODE_TYPE="${NODE_TYPE:-masternode}" +CHAIN_DIR="${CHAIN_DIR:-/opt/xdc/xdcchain}" +BACKUP_ROOT="${BACKUP_ROOT:-/backup/xdc}" +REMOTE_BACKUP="${REMOTE_BACKUP:-}" +S3_BUCKET="${S3_BUCKET:-}" +RETENTION_DAYS="${RETENTION_DAYS:-14}" +DATE=$(date +%Y%m%d-%H%M%S) +BACKUP_DIR="$BACKUP_ROOT/$DATE" + +# Logging +exec 1> >(tee -a "$BACKUP_ROOT/backup.log") +exec 2>&1 + +echo "=== XDC Backup Started: $DATE ===" + +# Pre-backup checks +if [ ! -d "$CHAIN_DIR" ]; then + echo "ERROR: Chain directory not found: $CHAIN_DIR" + exit 1 +fi + +# Check disk space +AVAILABLE=$(df "$BACKUP_ROOT" | awk 'NR==2 {print $4}') +REQUIRED=$(du -s "$CHAIN_DIR" | awk '{print $1}') +if [ "$AVAILABLE" -lt "$REQUIRED" ]; then + echo "ERROR: Insufficient disk space for backup" + exit 1 +fi + +# Create backup directory +mkdir -p "$BACKUP_DIR" + +# 1. Backup keys (most critical) +echo "Backing up keys..." +cp "$CHAIN_DIR/coinbase.txt" "$BACKUP_DIR/" +cp -r "$CHAIN_DIR/keystore/" "$BACKUP_DIR/" 2>/dev/null || true +cp "$CHAIN_DIR/XDC/nodekey" "$BACKUP_DIR/" 2>/dev/null || true +chmod 600 "$BACKUP_DIR"/* + +# 2. Backup configuration +echo "Backing up configuration..." +cp "$CHAIN_DIR/../.env" "$BACKUP_DIR/" 2>/dev/null || true +cp "$CHAIN_DIR/../docker-compose.yml" "$BACKUP_DIR/" 2>/dev/null || true + +# 3. Backup chain data (incremental with hard links) +echo "Backing up chain data..." +LATEST_BACKUP=$(ls -td "$BACKUP_ROOT"/*/ 2>/dev/null | head -1 || echo "") +if [ -n "$LATEST_BACKUP" ] && [ -d "$LATEST_BACKUP/chain" ]; then + rsync -a --delete --link-dest="$LATEST_BACKUP/chain" \ + --exclude='nodekey' --exclude='LOCK' --exclude='*.tmp' \ + "$CHAIN_DIR/XDC/" "$BACKUP_DIR/chain/" +else + rsync -a --delete \ + --exclude='nodekey' --exclude='LOCK' --exclude='*.tmp' \ + "$CHAIN_DIR/XDC/" "$BACKUP_DIR/chain/" +fi + +# 4. Create manifest +cat > "$BACKUP_DIR/MANIFEST.txt" </dev/null || true + +echo "=== Backup Completed: $BACKUP_DIR ===" +``` + +### Key-Only Backup (For Critical Recovery) + +```bash +#!/bin/bash +# xdc-keys-backup.sh - Backup only critical keys + +BACKUP_DIR="/secure/xdc-keys-$(date +%Y%m%d)" +mkdir -p "$BACKUP_DIR" + +cp /opt/xdc/xdcchain/coinbase.txt "$BACK_DIR/" +cp -r /opt/xdc/xdcchain/keystore/ "$BACKUP_DIR/" +cp /opt/xdc/xdcchain/XDC/nodekey "$BACKUP_DIR/" + +# Encrypt +tar -czf - "$BACKUP_DIR" | gpg --symmetric --cipher-algo AES256 > "$BACKUP_DIR.tar.gz.gpg" + +# Upload to secure storage +aws s3 cp "$BACKUP_DIR.tar.gz.gpg" s3://xdc-secure-backups/keys/ \ + --server-side-encryption AES256 + +# Cleanup +rm -rf "$BACKUP_DIR" "$BACKUP_DIR.tar.gz.gpg" +``` + +--- + +## Recovery Procedures + +### Scenario 1: Full Node Recovery from Backup + +```bash +# 1. Stop node +bash docker-down.sh + +# 2. Backup current state (if any) +mv xdcchain/XDC xdcchain/XDC.corrupted.$(date +%Y%m%d) + +# 3. Restore from backup +BACKUP_DATE="20250115" +BACKUP_PATH="/backup/xdc/$BACKUP_DATE" + +# Restore keys +cp "$BACKUP_PATH/coinbase.txt" xdcchain/ +cp -r "$BACKUP_PATH/keystore/" xdcchain/ 2>/dev/null || true +cp "$BACKUP_PATH/nodekey" xdcchain/XDC/ 2>/dev/null || true + +# Restore chain data +rsync -a "$BACKUP_PATH/chain/" xdcchain/XDC/ + +# 4. Fix permissions +chmod 600 xdcchain/keystore/* xdcchain/XDC/nodekey 2>/dev/null || true + +# 5. Start node +bash docker-up.sh + +# 6. Verify sync +bash xdc-attach.sh +> eth.syncing +> eth.blockNumber +``` + +### Scenario 2: State Sync (Faster than Full Restore) + +Instead of restoring multi-TB chain data, sync from network: + +```bash +# 1. Stop node +bash docker-down.sh + +# 2. Keep keys, remove chain data +rm -rf xdcchain/XDC/geth/chaindata +rm -rf xdcchain/XDC/geth/triecache + +# 3. Start with fast sync +# Edit .env to add: SYNC_MODE=fast +bash docker-up.sh + +# 4. Monitor sync +bash xdc-attach.sh +> eth.syncing +``` + +**Trade-offs:** + +| Method | Time | Data Integrity | Use Case | +|--------|------|---------------|----------| +| Full Backup Restore | Hours | Complete | Corruption, migration | +| State Sync | 30 min - 2 hours | Pruned history | Quick recovery | +| Snapshot Download | 1-4 hours | Complete | New deployment | + +### Scenario 3: Key Recovery Only + +If chain data is intact but keys are lost: + +```bash +# 1. Stop node +bash docker-down.sh + +# 2. Restore keys from backup +cp /backup/xdc/keys/coinbase.txt xdcchain/ +cp -r /backup/xdc/keys/keystore/ xdcchain/ +cp /backup/xdc/keys/nodekey xdcchain/XDC/ + +# 3. Start node +bash docker-up.sh +``` + +### Scenario 4: Cross-Region Recovery + +```bash +# Download from S3 in new region +aws s3 sync s3://xdc-backups/mainnet/latest/ /opt/xdc/restore/ + +# Follow Scenario 1 restore steps +``` + +--- + +## Failover Architecture + +### Hot Standby Configuration + +``` +Primary Masternode (Active) + - Region: us-east-1 + - IP: 203.0.113.10 + - Status: Producing blocks + +Standby Node (Passive) + - Region: us-west-2 + - IP: 198.51.100.20 + - Status: Synced, ready to activate + - Same coinbase, same keys + +Health Check Service + - Monitors primary every 10 seconds + - Triggers failover if primary down > 60s + +Load Balancer / DNS + - Points to active node + - Switches on failover +``` + +### Automated Failover Script + +```bash +#!/bin/bash +# xdc-failover.sh + +PRIMARY_IP="203.0.113.10" +STANDBY_IP="198.51.100.20" +HEALTH_CHECK_URL="http://$PRIMARY_IP:8545" +FAILOVER_THRESHOLD=6 # 60 seconds with 10s interval + +consecutive_failures=0 + +while true; do + if curl -sf "$HEALTH_CHECK_URL" \ + -X POST \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","method":"eth_syncing","params":[],"id":1}' > /dev/null; then + consecutive_failures=0 + else + consecutive_failures=$((consecutive_failures + 1)) + echo "Health check failed ($consecutive_failures/$FAILOVER_THRESHOLD)" + + if [ "$consecutive_failures" -ge "$FAILOVER_THRESHOLD" ]; then + echo "Triggering failover..." + + # Update DNS / Load Balancer to point to standby + aws route53 change-resource-record-sets \ + --hosted-zone-id ZONE_ID \ + --change-batch file://failover-dns.json + + # Alert on-call + curl -X POST "$PAGERDUTY_INTEGRATION_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "routing_key": "'"$PAGERDUTY_KEY"'", + "event_action": "trigger", + "payload": { + "summary": "XDC Masternode Failover Triggered", + "severity": "critical", + "source": "failover-script" + } + }' + + consecutive_failures=0 + fi + fi + + sleep 10 +done +``` + +### Kubernetes-Based Failover + +```yaml +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: xdc-masternode-primary + namespace: xdc-network +spec: + replicas: 1 + selector: + matchLabels: + app: xdc-masternode + role: primary + template: + metadata: + labels: + app: xdc-masternode + role: primary + spec: + containers: + - name: xdc-node + image: xinfinorg/xinfin-node:latest + env: + - name: NODE_TYPE + value: "masternode" +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: xdc-masternode-standby + namespace: xdc-network +spec: + replicas: 1 + selector: + matchLabels: + app: xdc-masternode + role: standby + template: + metadata: + labels: + app: xdc-masternode + role: standby + spec: + containers: + - name: xdc-node + image: xinfinorg/xinfin-node:latest + env: + - name: NODE_TYPE + value: "standby" +``` + +--- + +## Disaster Recovery Plan + +### RPO and RTO Targets + +| Node Type | RPO (Data Loss) | RTO (Downtime) | Strategy | +|-----------|----------------|----------------|----------| +| Masternode | < 1 hour | < 15 minutes | Hot standby + automated failover | +| Standby | < 24 hours | < 30 minutes | Daily backups + manual promotion | +| Full Node | < 24 hours | < 2 hours | Daily backups or state sync | +| Archive | < 24 hours | < 4 hours | Weekly full + daily incremental | + +### DR Checklist + +**Immediate (0-15 minutes):** +- [ ] Confirm outage via monitoring +- [ ] Check if primary is recoverable +- [ ] Initiate failover if needed +- [ ] Notify stakeholders + +**Short-term (15-60 minutes):** +- [ ] Verify standby is producing blocks / serving RPC +- [ ] Check network health (peers, sync status) +- [ ] Update DNS / load balancer +- [ ] Begin root cause analysis + +**Long-term (1-24 hours):** +- [ ] Restore primary node +- [ ] Verify primary sync +- [ ] Plan failback +- [ ] Document incident + +### Communication Template + +``` +Subject: [INCIDENT] XDC Node Outage - [Severity] + +Impact: [Masternode / Full Node / RPC] in [Region] +Start Time: [UTC Timestamp] +Status: [Investigating / Failover Initiated / Resolved] + +Details: +- [Brief description] +- [Affected services] + +Actions Taken: +- [Step 1] +- [Step 2] + +Next Update: [Time] +``` + +--- + +## Testing Your Backups + +### Monthly Backup Verification + +```bash +#!/bin/bash +# backup-test.sh + +TEST_DIR="/tmp/xdc-backup-test-$(date +%Y%m%d)" +BACKUP_DIR="/backup/xdc/$(ls -t /backup/xdc | head -1)" + +# 1. Restore to test directory +mkdir -p "$TEST_DIR" +cp -r "$BACKUP_DIR/keystore" "$TEST_DIR/" +cp "$BACKUP_DIR/coinbase.txt" "$TEST_DIR/" + +# 2. Verify keystore integrity +for keyfile in "$TEST_DIR/keystore"/*; do + if ! python3 -c "import json; json.load(open('$keyfile'))" 2>/dev/null; then + echo "ERROR: Invalid keystore: $keyfile" + exit 1 + fi +done + +# 3. Verify coinbase matches keystore +COINBASE=$(cat "$TEST_DIR/coinbase.txt") +KEY_ADDRESS=$(python3 -c "import json; print(json.load(open('$TEST_DIR/keystore/' + os.listdir('$TEST_DIR/keystore')[0]))['address'])" 2>/dev/null) + +if [ "xdc$KEY_ADDRESS" != "$COINBASE" ]; then + echo "WARNING: Coinbase does not match keystore" +fi + +# 4. Test chain data integrity (if included) +if [ -d "$BACKUP_DIR/chain" ]; then + du -sh "$BACKUP_DIR/chain" + ls "$BACKUP_DIR/chain/" | head -5 +fi + +echo "Backup test passed: $BACKUP_DIR" +rm -rf "$TEST_DIR" +``` + +### Quarterly DR Drill + +1. **Announce drill** to team +2. **Simulate primary failure** (stop node, disconnect network) +3. **Execute failover** procedure +4. **Verify** standby takes over +5. **Restore primary** from backup +6. **Failback** to primary +7. **Document** lessons learned + +--- + +## Security Considerations + +### Key Handling + +- **Never** commit keys to version control +- **Encrypt** all backups containing keys +- **Use separate** storage for keys vs chain data +- **Rotate** backup encryption keys annually +- **Multi-sig** for backup access in enterprise settings + +### Access Control + +| Role | Backup Access | Recovery Access | Failover Trigger | +|------|--------------|-----------------|------------------| +| Node Operator | Read | Execute | No | +| DevOps Lead | Read/Write | Execute | Yes | +| Security Officer | Audit | Audit | No | +| On-call Engineer | Read | Execute | Yes | + +### Compliance + +- **SOC 2:** Document backup procedures, test quarterly +- **ISO 27001:** Encrypt backups, control access +- **GDPR:** Anonymize logs in backups if containing PII + +--- + +## Related Topics + +- [Kubernetes Deployment](../kubernetes/index.md): Containerized node deployment +- [Helm Charts](../helm/index.md): Helm-based deployment +- [Incident Response](../runbooks/index.md): Troubleshooting runbooks +- [Infrastructure as Code](../iac/index.md): Automated infrastructure +- [Monitoring and Observability](../monitoring/index.md): Prometheus and Grafana diff --git a/website/docs/xdcchain/devops/helm/index.md b/website/docs/xdcchain/devops/helm/index.md new file mode 100644 index 00000000..5a0bbfab --- /dev/null +++ b/website/docs/xdcchain/devops/helm/index.md @@ -0,0 +1,480 @@ +--- +title: "XDC Node Helm Chart +description: Production Helm chart for deploying XDC masternodes, standby nodes, and full nodes on Kubernetes with customizable values, upgrade procedures, and security defaults." +--- + +# XDC Node Helm Chart + +This Helm chart deploys XDC Network nodes on Kubernetes with production-ready defaults, security contexts, monitoring integration, and flexible configuration through values.yaml. + +## Prerequisites + +- Kubernetes 1.28+ +- Helm 3.12+ +- PV provisioner supporting ReadWriteOnce +- Optional: cert-manager for TLS, Prometheus Operator for metrics + +## Installation + +### Add Repository + +```bash +helm repo add xdc https://charts.xdc.network +helm repo update +``` + +### Quick Start: Full Node + +```bash +helm install xdc-fullnode xdc/xdc-node \ + --namespace xdc-network \ + --create-namespace \ + --set node.type=fullnode \ + --set persistence.size=2Ti +``` + +### Masternode Deployment + +```bash +helm install xdc-masternode xdc/xdc-node \ + --namespace xdc-network \ + --create-namespace \ + --set node.type=masternode \ + --set node.coinbase=xdcYOURADDRESS \ + --set persistence.size=4Ti \ + --set resources.requests.cpu=16 \ + --set resources.requests.memory=64Gi \ + --set service.p2p.type=LoadBalancer \ + --set secrets.keystoreExistingSecret=xdc-masternode-keys +``` + +## values.yaml Reference + +### Global Settings + +```yaml +global: + clusterDomain: cluster.local + imagePullSecrets: [] + nodeSelector: {} + tolerations: [] + affinity: {} + podAnnotations: {} + securityContext: + runAsUser: 1000 + runAsGroup: 1000 + fsGroup: 1000 + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault +``` + +### Node Configuration + +```yaml +node: + type: fullnode # masternode, standby, fullnode, archive + network: mainnet # mainnet, testnet, devnet + syncMode: snap # full, fast, snap, light + coinbase: "" # Required for masternode/standby + maxPeers: 50 + cache: 4096 # Cache size in MB + snapshot: true + gasPrice: 1 + logLevel: info # debug, info, warn, error + extraArgs: [] +``` + +### Image Configuration + +```yaml +image: + repository: xinfinorg/xinfin-node + tag: latest + pullPolicy: IfNotPresent + digest: "" # Optional: immutable deployments +``` + +### Service Configuration + +```yaml +service: + p2p: + type: LoadBalancer + annotations: + service.beta.kubernetes.io/aws-load-balancer-type: nlb + service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing + externalTrafficPolicy: Local + + rpc: + type: ClusterIP + port: 8545 + + ws: + type: ClusterIP + port: 8888 + + metrics: + type: ClusterIP + port: 6060 + + ingress: + enabled: false + className: nginx + annotations: + nginx.ingress.kubernetes.io/ssl-redirect: "true" + cert-manager.io/cluster-issuer: letsencrypt-prod + hosts: + - host: rpc.example.com + paths: + - path: / + pathType: Prefix + tls: + - secretName: xdc-rpc-tls + hosts: + - rpc.example.com +``` + +### Persistence + +```yaml +persistence: + enabled: true + storageClassName: xdc-fast-ssd + accessMode: ReadWriteOnce + size: 2Ti + allowVolumeExpansion: true + dataSource: {} # For snapshot initialization +``` + +### Resources + +```yaml +resources: + requests: + cpu: 4 + memory: 16Gi + limits: + cpu: 8 + memory: 32Gi +``` + +### Node Type Presets + +```yaml +presets: + masternode: + resources: + requests: + cpu: 16 + memory: 64Gi + limits: + cpu: 32 + memory: 128Gi + persistence: + size: 4Ti + service: + p2p: + type: LoadBalancer + podDisruptionBudget: + minAvailable: 1 + + standby: + resources: + requests: + cpu: 16 + memory: 64Gi + limits: + cpu: 32 + memory: 128Gi + persistence: + size: 4Ti + service: + p2p: + type: LoadBalancer + podDisruptionBudget: + minAvailable: 1 + + fullnode: + resources: + requests: + cpu: 4 + memory: 16Gi + limits: + cpu: 8 + memory: 32Gi + persistence: + size: 2Ti + service: + p2p: + type: ClusterIP + podDisruptionBudget: + minAvailable: 2 + + archive: + resources: + requests: + cpu: 16 + memory: 128Gi + limits: + cpu: 32 + memory: 256Gi + persistence: + size: 8Ti + service: + p2p: + type: ClusterIP + podDisruptionBudget: + minAvailable: 1 +``` + +### Secrets + +```yaml +secrets: + keystore: "" # Inline (not for production) + keystoreExistingSecret: "" # Reference existing secret + keystoreExistingSecretKey: keystore + + externalSecret: + enabled: false + refreshInterval: 1h + secretStoreRef: + name: aws-secrets-manager + kind: ClusterSecretStore + remoteRef: + key: xdc/mainnet/masternode-1 + property: keystore +``` + +### Monitoring + +```yaml +monitoring: + enabled: true + serviceMonitor: + enabled: true + interval: 15s + scrapeTimeout: 10s + namespace: monitoring + prometheusRule: + enabled: true + namespace: monitoring + grafanaDashboard: + enabled: true + namespace: monitoring + metricsPort: 6060 +``` + +### Network Policy + +```yaml +networkPolicy: + enabled: true + allowP2P: true + allowRPCFrom: + - namespaceSelector: + matchLabels: + name: ingress-nginx + - namespaceSelector: + matchLabels: + name: monitoring + allowEgress: true + customEgress: [] +``` + +### Backup + +```yaml +backup: + enabled: false + volumeSnapshot: + enabled: false + className: csi-aws-vsc + schedule: "0 2 * * *" + retention: 7 + s3: + enabled: false + bucket: xdc-backups + prefix: mainnet + schedule: "0 2 * * *" + region: us-east-1 + credentials: + existingSecret: aws-backup-credentials +``` + +## Production values.yaml Example + +```yaml +node: + type: masternode + network: mainnet + syncMode: full + coinbase: xdcYOURCOINBASEADDRESS + cache: 8192 + +image: + repository: xinfinorg/xinfin-node + tag: v2.0.0 + pullPolicy: IfNotPresent + +service: + p2p: + type: LoadBalancer + annotations: + service.beta.kubernetes.io/aws-load-balancer-type: nlb + service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing + ingress: + enabled: true + className: nginx + hosts: + - host: rpc.xdc-network.example.com + paths: + - path: / + pathType: Prefix + tls: + - secretName: xdc-rpc-tls + hosts: + - rpc.xdc-network.example.com + +persistence: + storageClassName: xdc-fast-ssd + size: 4Ti + +resources: + requests: + cpu: 16 + memory: 64Gi + limits: + cpu: 32 + memory: 128Gi + +secrets: + keystoreExistingSecret: xdc-masternode-keys + +monitoring: + enabled: true + serviceMonitor: + enabled: true + namespace: monitoring + prometheusRule: + enabled: true + namespace: monitoring + +networkPolicy: + enabled: true + allowP2P: true + allowRPCFrom: + - namespaceSelector: + matchLabels: + name: ingress-nginx + - namespaceSelector: + matchLabels: + name: monitoring + +backup: + enabled: true + volumeSnapshot: + enabled: true + className: csi-aws-vsc + schedule: "0 2 * * *" + retention: 14 + s3: + enabled: true + bucket: xdc-backups + prefix: mainnet/masternode-1 + schedule: "0 3 * * *" + region: us-east-1 + credentials: + existingSecret: aws-backup-credentials + +affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app.kubernetes.io/component + operator: In + values: + - masternode + topologyKey: kubernetes.io/hostname + +tolerations: +- key: dedicated + operator: Equal + value: xdc-nodes + effect: NoSchedule +``` + +## Upgrade Procedures + +### Helm Upgrade + +```bash +# Upgrade to new chart version +helm upgrade xdc-masternode xdc/xdc-node \ + --namespace xdc-network \ + --values production-values.yaml \ + --version 1.2.0 + +# Upgrade with new image +helm upgrade xdc-masternode xdc/xdc-node \ + --namespace xdc-network \ + --set image.tag=v2.1.0 \ + --values production-values.yaml +``` + +### Helm Rollback + +```bash +# List revisions +helm history xdc-masternode -n xdc-network + +# Rollback to previous +helm rollback xdc-masternode -n xdc-network + +# Rollback to specific revision +helm rollback xdc-masternode 3 -n xdc-network +``` + +## Uninstallation + +```bash +# Uninstall release +helm uninstall xdc-masternode -n xdc-network + +# Delete PVC (WARNING: irreversible data loss) +kubectl delete pvc -n xdc-network -l app.kubernetes.io/instance=xdc-masternode +``` + +## Troubleshooting + +### Template Debugging + +```bash +# Render templates without installing +helm template xdc-masternode xdc/xdc-node \ + --values production-values.yaml \ + --debug + +# Validate against cluster +helm install xdc-masternode xdc/xdc-node \ + --values production-values.yaml \ + --dry-run --debug +``` + +### Show Values + +```bash +helm show values xdc/xdc-node +helm show chart xdc/xdc-node +``` + +## Related Topics + +- [Kubernetes Deployment](../kubernetes/index.md): Raw Kubernetes manifests +- [Docker Setup](/xdcchain/developers/node_operators/docker): Single-node Docker deployment +- [Node Architecture](/xdcchain/developers/node_operators/node_architecture): XDC node internals +- [Validator Handbook](/xdcchain/developers/node_operators/validator-handbook): Validator operations +- [Backup and Recovery](../backup/index.md): Backup strategies +- [Incident Response](../runbooks/index.md): Troubleshooting runbooks diff --git a/website/docs/xdcchain/devops/iac/index.md b/website/docs/xdcchain/devops/iac/index.md new file mode 100644 index 00000000..4335502c --- /dev/null +++ b/website/docs/xdcchain/devops/iac/index.md @@ -0,0 +1,974 @@ +--- +title: "Infrastructure as Code for XDC Nodes +description: Terraform, Pulumi, CloudFormation, and Ansible examples for deploying XDC node infrastructure on AWS, GCP, and Azure." +--- + +# Infrastructure as Code for XDC Nodes + +This guide provides Infrastructure-as-Code (IaC) examples for deploying XDC Network node infrastructure across major cloud providers. Using IaC ensures reproducible, version-controlled, and auditable deployments. + +## Table of Contents + +1. [Overview](#overview) +2. [Terraform](#terraform) +3. [Pulumi](#pulumi) +4. [CloudFormation](#cloudformation) +5. [Ansible](#ansible) +6. [State Management](#state-management) +7. [Security Best Practices](#security-best-practices) +8. [Cost Estimation](#cost-estimation) + +--- + +## Overview + +### Why IaC for Blockchain Nodes + +- **Reproducibility**: Identical environments across dev, staging, production +- **Version Control**: Track infrastructure changes in Git +- **Auditability**: Know who changed what and when +- **Scalability**: Spin up new regions in minutes +- **Disaster Recovery**: Rebuild entire infrastructure from code + +### Tool Comparison + +| Tool | Best For | Learning Curve | Ecosystem | +|------|----------|---------------|-----------| +| Terraform | Multi-cloud, modules | Medium | Largest | +| Pulumi | Developers preferring code | Medium | Growing | +| CloudFormation | AWS-only, native integration | Low | AWS-specific | +| Ansible | Configuration management | Low | Mature | + +--- + +## Terraform + +### Project Structure + +``` +terraform/ +├── modules/ +│ ├── xdc-node/ +│ │ ├── main.tf +│ │ ├── variables.tf +│ │ ├── outputs.tf +│ │ └── README.md +│ └── vpc/ +│ └── ... +├── environments/ +│ ├── mainnet/ +│ │ ├── main.tf +│ │ ├── variables.tf +│ │ └── terraform.tfvars +│ └── testnet/ +│ └── ... +└── backend.tf +``` + +### VPC Module + +```hcl +# modules/vpc/main.tf +resource "aws_vpc" "xdc" { + cidr_block = var.vpc_cidr + enable_dns_hostnames = true + enable_dns_support = true + + tags = { + Name = "xdc-${var.environment}" + } +} + +resource "aws_internet_gateway" "xdc" { + vpc_id = aws_vpc.xdc.id + + tags = { + Name = "xdc-${var.environment}" + } +} + +resource "aws_subnet" "xdc_public" { + count = length(var.availability_zones) + vpc_id = aws_vpc.xdc.id + cidr_block = cidrsubnet(var.vpc_cidr, 8, count.index) + availability_zone = var.availability_zones[count.index] + map_public_ip_on_launch = true + + tags = { + Name = "xdc-${var.environment}-public-${count.index + 1}" + } +} + +resource "aws_subnet" "xdc_private" { + count = length(var.availability_zones) + vpc_id = aws_vpc.xdc.id + cidr_block = cidrsubnet(var.vpc_cidr, 8, count.index + 10) + availability_zone = var.availability_zones[count.index] + + tags = { + Name = "xdc-${var.environment}-private-${count.index + 1}" + } +} + +resource "aws_security_group" "xdc_node" { + name_prefix = "xdc-node-" + vpc_id = aws_vpc.xdc.id + + # P2P + ingress { + from_port = 30303 + to_port = 30303 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + from_port = 30303 + to_port = 30303 + protocol = "udp" + cidr_blocks = ["0.0.0.0/0"] + } + + # RPC (restricted) + ingress { + from_port = 8545 + to_port = 8545 + protocol = "tcp" + cidr_blocks = var.allowed_rpc_cidrs + } + + # SSH + ingress { + from_port = 22 + to_port = 22 + protocol = "tcp" + cidr_blocks = var.allowed_ssh_cidrs + } + + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } + + tags = { + Name = "xdc-${var.environment}-node" + } +} +``` + +### XDC Node Module + +```hcl +# modules/xdc-node/main.tf +resource "aws_instance" "xdc_node" { + ami = var.ami_id + instance_type = var.instance_type + subnet_id = var.subnet_id + vpc_security_group_ids = [var.security_group_id] + key_name = var.key_name + + root_block_device { + volume_size = 100 + volume_type = "gp3" + encrypted = true + } + + ebs_block_device { + device_name = "/dev/sdf" + volume_size = var.chain_data_size + volume_type = "gp3" + iops = 16000 + throughput = 1000 + encrypted = true + } + + user_data = templatefile("${path.module}/userdata.sh", { + node_type = var.node_type + network = var.network + coinbase = var.coinbase + snapshot_url = var.snapshot_url + }) + + tags = { + Name = "xdc-${var.environment}-${var.node_type}-${var.index}" + Environment = var.environment + NodeType = var.node_type + } +} + +resource "aws_eip" "xdc_node" { + count = var.node_type == "masternode" ? 1 : 0 + instance = aws_instance.xdc_node.id + domain = "vpc" + + tags = { + Name = "xdc-${var.environment}-masternode-ip" + } +} + +resource "aws_cloudwatch_metric_alarm" "xdc_cpu" { + alarm_name = "xdc-${var.environment}-${var.node_type}-cpu" + comparison_operator = "GreaterThanThreshold" + evaluation_periods = "3" + metric_name = "CPUUtilization" + namespace = "AWS/EC2" + period = "300" + statistic = "Average" + threshold = "80" + alarm_description = "CPU utilization high" + + dimensions = { + InstanceId = aws_instance.xdc_node.id + } +} +``` + +### User Data Script + +```bash +#!/bin/bash +# modules/xdc-node/userdata.sh + +set -e + +# Update system +apt-get update +apt-get install -y docker.io docker-compose git jq awscli + +# Mount data volume +mkfs -t ext4 /dev/nvme1n1 +mkdir -p /opt/xdc +mount /dev/nvme1n1 /opt/xdc +echo '/dev/nvme1n1 /opt/xdc ext4 defaults,nofail 0 2' >> /etc/fstab + +# Clone node repository +cd /opt/xdc +git clone https://github.com/XinFinOrg/XinFin-Node.git +cd XinFin-Node + +# Configure +mkdir -p mainnet +cat > mainnet/.env < /opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json <<'CWEOF' +{ + "metrics": { + "namespace": "XDC/Nodes", + "metrics_collected": { + "disk": { + "measurement": ["used_percent"], + "resources": ["/opt/xdc"] + }, + "mem": { + "measurement": ["used_percent"] + } + } + } +} +CWEOF + +systemctl enable amazon-cloudwatch-agent +systemctl start amazon-cloudwatch-agent +``` + +### Mainnet Environment + +```hcl +# environments/mainnet/main.tf +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + } + + backend "s3" { + bucket = "xdc-terraform-state" + key = "mainnet/terraform.tfstate" + region = "us-east-1" + encrypt = true + dynamodb_table = "terraform-locks" + } +} + +provider "aws" { + region = var.region + + default_tags { + tags = { + Environment = "mainnet" + Project = "xdc-network" + ManagedBy = "terraform" + } + } +} + +module "vpc" { + source = "../../modules/vpc" + + environment = "mainnet" + vpc_cidr = "10.0.0.0/16" + availability_zones = ["us-east-1a", "us-east-1b", "us-east-1c"] + allowed_rpc_cidrs = ["10.0.0.0/8"] + allowed_ssh_cidrs = ["YOUR_OFFICE_IP/32"] +} + +module "masternode" { + source = "../../modules/xdc-node" + + environment = "mainnet" + node_type = "masternode" + index = 1 + ami_id = "ami-0c02fb55956c7d316" # Ubuntu 22.04 + instance_type = "m6i.4xlarge" + subnet_id = module.vpc.public_subnet_ids[0] + security_group_id = module.vpc.security_group_id + key_name = var.ssh_key_name + chain_data_size = 4000 + network = "mainnet" + coinbase = var.masternode_coinbase +} + +module "fullnode" { + source = "../../modules/xdc-node" + + count = 2 + + environment = "mainnet" + node_type = "fullnode" + index = count.index + 1 + ami_id = "ami-0c02fb55956c7d316" + instance_type = "m6i.2xlarge" + subnet_id = module.vpc.private_subnet_ids[count.index] + security_group_id = module.vpc.security_group_id + key_name = var.ssh_key_name + chain_data_size = 2000 + network = "mainnet" +} +``` + +### Usage + +```bash +# Initialize +cd environments/mainnet +terraform init + +# Plan +terraform plan -var-file="terraform.tfvars" + +# Apply +terraform apply -var-file="terraform.tfvars" + +# Destroy (emergency) +terraform destroy -var-file="terraform.tfvars" +``` + +--- + +## Pulumi + +### TypeScript Example + +```typescript +// pulumi/index.ts +import * as aws from "@pulumi/aws"; +import * as pulumi from "@pulumi/pulumi"; + +const config = new pulumi.Config(); +const environment = config.require("environment"); +const nodeType = config.get("nodeType") || "fullnode"; + +// VPC +const vpc = new aws.ec2.Vpc("xdc-vpc", { + cidrBlock: "10.0.0.0/16", + enableDnsHostnames: true, + enableDnsSupport: true, + tags: { Name: `xdc-${environment}` }, +}); + +const igw = new aws.ec2.InternetGateway("xdc-igw", { + vpcId: vpc.id, + tags: { Name: `xdc-${environment}` }, +}); + +const subnet = new aws.ec2.Subnet("xdc-subnet", { + vpcId: vpc.id, + cidrBlock: "10.0.1.0/24", + availabilityZone: "us-east-1a", + mapPublicIpOnLaunch: true, + tags: { Name: `xdc-${environment}-public` }, +}); + +// Security Group +const sg = new aws.ec2.SecurityGroup("xdc-sg", { + vpcId: vpc.id, + ingress: [ + { protocol: "tcp", fromPort: 30303, toPort: 30303, cidrBlocks: ["0.0.0.0/0"] }, + { protocol: "udp", fromPort: 30303, toPort: 30303, cidrBlocks: ["0.0.0.0/0"] }, + { protocol: "tcp", fromPort: 22, toPort: 22, cidrBlocks: [config.require("sshCidr")] }, + ], + egress: [{ protocol: "-1", fromPort: 0, toPort: 0, cidrBlocks: ["0.0.0.0/0"] }], + tags: { Name: `xdc-${environment}` }, +}); + +// EC2 Instance +const instance = new aws.ec2.Instance("xdc-node", { + ami: "ami-0c02fb55956c7d316", + instanceType: nodeType === "masternode" ? "m6i.4xlarge" : "m6i.2xlarge", + subnetId: subnet.id, + vpcSecurityGroupIds: [sg.id], + keyName: config.require("keyName"), + rootBlockDevice: { + volumeSize: 100, + volumeType: "gp3", + encrypted: true, + }, + ebsBlockDevices: [{ + deviceName: "/dev/sdf", + volumeSize: nodeType === "masternode" ? 4000 : 2000, + volumeType: "gp3", + iops: 16000, + throughput: 1000, + encrypted: true, + }], + userData: `#!/bin/bash + apt-get update + apt-get install -y docker.io docker-compose git + mkfs -t ext4 /dev/nvme1n1 + mkdir -p /opt/xdc + mount /dev/nvme1n1 /opt/xdc + cd /opt/xdc + git clone https://github.com/XinFinOrg/XinFin-Node.git + cd XinFin-Node/mainnet + docker-compose up -d + `, + tags: { + Name: `xdc-${environment}-${nodeType}`, + Environment: environment, + NodeType: nodeType, + }, +}); + +// Elastic IP for masternode +if (nodeType === "masternode") { + new aws.ec2.Eip("xdc-eip", { + instance: instance.id, + domain: "vpc", + tags: { Name: `xdc-${environment}-masternode` }, + }); +} + +export const instanceId = instance.id; +export const publicIp = instance.publicIp; +export const privateIp = instance.privateIp; +``` + +### Python Example + +```python +# pulumi/__main__.py +import pulumi +import pulumi_aws as aws + +config = pulumi.Config() +environment = config.require("environment") +node_type = config.get("node_type") or "fullnode" + +vpc = aws.ec2.Vpc("xdc-vpc", + cidr_block="10.0.0.0/16", + enable_dns_hostnames=True, + tags={"Name": f"xdc-{environment}"}) + +subnet = aws.ec2.Subnet("xdc-subnet", + vpc_id=vpc.id, + cidr_block="10.0.1.0/24", + availability_zone="us-east-1a", + map_public_ip_on_launch=True) + +sg = aws.ec2.SecurityGroup("xdc-sg", + vpc_id=vpc.id, + ingress=[ + {"protocol": "tcp", "from_port": 30303, "to_port": 30303, "cidr_blocks": ["0.0.0.0/0"]}, + {"protocol": "tcp", "from_port": 22, "to_port": 22, "cidr_blocks": [config.require("ssh_cidr")]}, + ], + egress=[{"protocol": "-1", "from_port": 0, "to_port": 0, "cidr_blocks": ["0.0.0.0/0"]}]) + +instance = aws.ec2.Instance("xdc-node", + ami="ami-0c02fb55956c7d316", + instance_type="m6i.4xlarge" if node_type == "masternode" else "m6i.2xlarge", + subnet_id=subnet.id, + vpc_security_group_ids=[sg.id], + key_name=config.require("key_name"), + tags={"Name": f"xdc-{environment}-{node_type}"}) + +pulumi.export("instance_id", instance.id) +pulumi.export("public_ip", instance.public_ip) +``` + +### Pulumi Usage + +```bash +# Login to Pulumi Cloud (or self-hosted) +pulumi login + +# Create stack +pulumi stack init mainnet + +# Set configuration +pulumi config set environment mainnet +pulumi config set nodeType masternode +pulumi config set sshCidr 203.0.113.0/24 --secret + +# Deploy +pulumi up + +# Destroy +pulumi destroy +``` + +--- + +## CloudFormation + +### Template + +```yaml +AWSTemplateFormatVersion: '2010-09-09' +Description: 'XDC Network Node Infrastructure' + +Parameters: + Environment: + Type: String + Default: mainnet + AllowedValues: [mainnet, testnet] + NodeType: + Type: String + Default: fullnode + AllowedValues: [masternode, fullnode, archive] + KeyName: + Type: AWS::EC2::KeyPair::KeyName + Description: EC2 Key Pair + VpcCidr: + Type: String + Default: 10.0.0.0/16 + SshCidr: + Type: String + Default: 0.0.0.0/0 + Description: CIDR allowed for SSH + +Resources: + VPC: + Type: AWS::EC2::VPC + Properties: + CidrBlock: !Ref VpcCidr + EnableDnsHostnames: true + EnableDnsSupport: true + Tags: + - Key: Name + Value: !Sub "xdc-${Environment}" + + InternetGateway: + Type: AWS::EC2::InternetGateway + Properties: + Tags: + - Key: Name + Value: !Sub "xdc-${Environment}" + + AttachGateway: + Type: AWS::EC2::VPCGatewayAttachment + Properties: + VpcId: !Ref VPC + InternetGatewayId: !Ref InternetGateway + + PublicSubnet: + Type: AWS::EC2::Subnet + Properties: + VpcId: !Ref VPC + CidrBlock: !Select [0, !Cidr [!Ref VpcCidr, 3, 8]] + AvailabilityZone: !Select [0, !GetAZs ''] + MapPublicIpOnLaunch: true + Tags: + - Key: Name + Value: !Sub "xdc-${Environment}-public" + + SecurityGroup: + Type: AWS::EC2::SecurityGroup + Properties: + GroupDescription: XDC Node Security Group + VpcId: !Ref VPC + SecurityGroupIngress: + - IpProtocol: tcp + FromPort: 30303 + ToPort: 30303 + CidrIp: 0.0.0.0/0 + - IpProtocol: udp + FromPort: 30303 + ToPort: 30303 + CidrIp: 0.0.0.0/0 + - IpProtocol: tcp + FromPort: 22 + ToPort: 22 + CidrIp: !Ref SshCidr + SecurityGroupEgress: + - IpProtocol: '-1' + CidrIp: 0.0.0.0/0 + Tags: + - Key: Name + Value: !Sub "xdc-${Environment}" + + NodeInstance: + Type: AWS::EC2::Instance + Properties: + ImageId: ami-0c02fb55956c7d316 + InstanceType: !If [IsMasternode, m6i.4xlarge, m6i.2xlarge] + KeyName: !Ref KeyName + SubnetId: !Ref PublicSubnet + SecurityGroupIds: + - !Ref SecurityGroup + BlockDeviceMappings: + - DeviceName: /dev/sda1 + Ebs: + VolumeSize: 100 + VolumeType: gp3 + Encrypted: true + - DeviceName: /dev/sdf + Ebs: + VolumeSize: !If [IsMasternode, 4000, 2000] + VolumeType: gp3 + Iops: 16000 + Throughput: 1000 + Encrypted: true + UserData: + Fn::Base64: !Sub | + #!/bin/bash + apt-get update + apt-get install -y docker.io docker-compose git + mkfs -t ext4 /dev/nvme1n1 + mkdir -p /opt/xdc + mount /dev/nvme1n1 /opt/xdc + cd /opt/xdc + git clone https://github.com/XinFinOrg/XinFin-Node.git + cd XinFin-Node/mainnet + docker-compose up -d + Tags: + - Key: Name + Value: !Sub "xdc-${Environment}-${NodeType}" + - Key: NodeType + Value: !Ref NodeType + + MasternodeEIP: + Type: AWS::EC2::EIP + Condition: IsMasternode + Properties: + Domain: vpc + InstanceId: !Ref NodeInstance + +Conditions: + IsMasternode: !Equals [!Ref NodeType, masternode] + +Outputs: + InstanceId: + Description: EC2 Instance ID + Value: !Ref NodeInstance + PublicIp: + Description: Public IP Address + Value: !GetAtt NodeInstance.PublicIp + PrivateIp: + Description: Private IP Address + Value: !GetAtt NodeInstance.PrivateIp +``` + +### Usage + +```bash +# Create stack +aws cloudformation create-stack \ + --stack-name xdc-mainnet-masternode \ + --template-body file://template.yaml \ + --parameters \ + ParameterKey=Environment,ParameterValue=mainnet \ + ParameterKey=NodeType,ParameterValue=masternode \ + ParameterKey=KeyName,ParameterValue=my-key \ + --capabilities CAPABILITY_IAM + +# Update stack +aws cloudformation update-stack \ + --stack-name xdc-mainnet-masternode \ + --template-body file://template.yaml \ + --parameters ... + +# Delete stack +aws cloudformation delete-stack \ + --stack-name xdc-mainnet-masternode +``` + +--- + +## Ansible + +### Playbook + +```yaml +# ansible/playbook.yml +--- +- name: Deploy XDC Node + hosts: xdc_nodes + become: yes + vars: + xdc_version: "latest" + network: "mainnet" + node_type: "fullnode" + data_dir: "/opt/xdc" + + tasks: + - name: Update apt cache + apt: + update_cache: yes + cache_valid_time: 3600 + + - name: Install dependencies + apt: + name: + - docker.io + - docker-compose + - git + - jq + - awscli + state: present + + - name: Create data directory + file: + path: "{{ data_dir }}" + state: directory + owner: root + group: root + mode: '0755' + + - name: Mount data volume + mount: + path: "{{ data_dir }}" + src: /dev/nvme1n1 + fstype: ext4 + state: mounted + when: ansible_devices.nvme1n1 is defined + + - name: Format data volume + filesystem: + fstype: ext4 + dev: /dev/nvme1n1 + when: ansible_devices.nvme1n1 is defined + + - name: Clone XinFin-Node repository + git: + repo: https://github.com/XinFinOrg/XinFin-Node.git + dest: "{{ data_dir }}/XinFin-Node" + version: master + + - name: Configure environment + template: + src: env.j2 + dest: "{{ data_dir }}/XinFin-Node/{{ network }}/.env" + mode: '0600' + vars: + node_name: "xdc-{{ node_type }}-{{ inventory_hostname }}" + + - name: Start XDC node + docker_compose: + project_src: "{{ data_dir }}/XinFin-Node/{{ network }}" + state: present + detached: yes + + - name: Configure UFW + ufw: + rule: allow + port: "{{ item }}" + proto: "{{ 'tcp' if item != '30303' else 'any' }}" + loop: + - 30303 + - 8545 + - 22 + + - name: Enable UFW + ufw: + state: enabled + policy: deny + + - name: Setup logrotate + template: + src: logrotate-xdc.j2 + dest: /etc/logrotate.d/xdc + + - name: Configure monitoring + template: + src: prometheus.yml.j2 + dest: /opt/prometheus/prometheus.yml + notify: restart prometheus + + handlers: + - name: restart prometheus + service: + name: prometheus + state: restarted +``` + +### Inventory + +```ini +# ansible/inventory.ini +[masternodes] +masternode-1 ansible_host=203.0.113.10 node_type=masternode + +[fullnodes] +fullnode-1 ansible_host=198.51.100.20 node_type=fullnode +fullnode-2 ansible_host=198.51.100.21 node_type=fullnode + +[standby] +standby-1 ansible_host=192.0.2.30 node_type=standby + +[xdc_nodes:children] +masternodes +fullnodes +standby + +[xdc_nodes:vars] +ansible_user=ubuntu +ansible_ssh_private_key_file=~/.ssh/xdc-nodes.pem +ansible_python_interpreter=/usr/bin/python3 +``` + +### Usage + +```bash +# Install roles +ansible-galaxy install -r requirements.yml + +# Check connectivity +ansible -i inventory.ini xdc_nodes -m ping + +# Deploy +ansible-playbook -i inventory.ini playbook.yml + +# Deploy specific group +ansible-playbook -i inventory.ini playbook.yml --limit masternodes + +# Check status +ansible -i inventory.ini xdc_nodes -a "docker ps" +``` + +--- + +## State Management + +### Terraform Remote State + +```hcl +# backend.tf +terraform { + backend "s3" { + bucket = "xdc-terraform-state" + key = "${var.environment}/terraform.tfstate" + region = "us-east-1" + encrypt = true + dynamodb_table = "terraform-locks" + } +} +``` + +### State Locking + +```bash +# Create DynamoDB table for locking +aws dynamodb create-table \ + --table-name terraform-locks \ + --attribute-definitions AttributeName=LockID,AttributeType=S \ + --key-schema AttributeName=LockID,KeyType=HASH \ + --billing-mode PAY_PER_REQUEST +``` + +### Pulumi State + +```bash +# Use Pulumi Cloud (recommended) +pulumi login + +# Or self-hosted S3 backend +pulumi login s3://xdc-pulumi-state +``` + +--- + +## Security Best Practices + +### Secret Management + +| Secret | Storage | Access | +|--------|---------|--------| +| AWS Credentials | IAM Roles | Instance profiles | +| SSH Keys | AWS Secrets Manager | IAM-controlled | +| Node Keys | HashiCorp Vault | AppRole authentication | +| API Keys | AWS Secrets Manager | Lambda/ECS only | + +### Network Security + +- Use private subnets for full nodes +- Restrict SSH to bastion hosts or VPN +- Enable VPC Flow Logs +- Use AWS Network Firewall or Security Groups + +### Encryption + +- EBS volumes encrypted with KMS +- S3 buckets with SSE-S3 or SSE-KMS +- Terraform state encrypted at rest +- Secrets encrypted in transit (TLS 1.3) + +--- + +## Cost Estimation + +### AWS Monthly Costs (us-east-1) + +| Component | Masternode | Full Node | Archive | +|-----------|-----------|-----------|---------| +| EC2 (on-demand) | $500-700 | $250-350 | $1000-1400 | +| EBS (gp3) | $320-640 | $160-320 | $640-1280 | +| Data Transfer | $50-100 | $20-50 | $100-200 | +| Load Balancer | $20-30 | $0 | $0 | +| Monitoring | $20-40 | $10-20 | $40-80 | +| **Total** | **$910-1510** | **$440-740** | **$1780-2960** | + +### Cost Optimization + +- Use Reserved Instances (40-60% savings) +- Use Spot Instances for full nodes (not masternodes) +- Enable EBS gp3 without provisioning IOPS unless needed +- Use S3 Intelligent-Tiering for backups + +--- + +## Related Topics + +- [Kubernetes Deployment](../kubernetes/index.md): Containerized deployment +- [Helm Charts](../helm/index.md): Helm chart deployment +- [Backup and Recovery](../backup/index.md): Backup strategies +- [Monitoring and Observability](../monitoring/index.md): Prometheus and Grafana diff --git a/website/docs/xdcchain/devops/kubernetes/index.md b/website/docs/xdcchain/devops/kubernetes/index.md new file mode 100644 index 00000000..829f3ba2 --- /dev/null +++ b/website/docs/xdcchain/devops/kubernetes/index.md @@ -0,0 +1,1323 @@ +--- +title: "Deploy XDC Nodes on Kubernetes +description: Production-ready Kubernetes deployment guide for XDC masternodes, standby nodes, and full nodes with StatefulSets, persistent storage, monitoring, and security best practices." +--- + +# Deploy XDC Nodes on Kubernetes + +This guide provides production-ready Kubernetes manifests and operational procedures for deploying XDC Network nodes on any conformant Kubernetes cluster including AWS EKS, Google GKE, and Azure AKS. It covers masternodes, standby nodes, full nodes, and archive nodes with real-world configurations you can apply directly. + +## Table of Contents + +1. [Overview](#overview) +2. [Prerequisites](#prerequisites) +3. [Architecture](#architecture) +4. [Resource Requirements](#resource-requirements) +5. [Quick Start](#quick-start) +6. [Namespace and RBAC](#namespace-and-rbac) +7. [Storage Configuration](#storage-configuration) +8. [ConfigMap: Node Configuration](#configmap-node-configuration) +9. [Secret: Key Management](#secret-key-management) +10. [StatefulSet Deployment](#statefulset-deployment) +11. [Service Definitions](#service-definitions) +12. [Network Policies](#network-policies) +13. [Pod Disruption Budget](#pod-disruption-budget) +14. [Monitoring: Prometheus and Grafana](#monitoring-prometheus-and-grafana) +15. [Backup Strategies](#backup-strategies) +16. [Upgrade Procedures](#upgrade-procedures) +17. [Cloud Provider Examples](#cloud-provider-examples) +18. [Troubleshooting](#troubleshooting) +19. [Security Checklist](#security-checklist) + +--- + +## Overview + +Running XDC nodes on Kubernetes provides high availability through pod rescheduling and health checks, scalability for RPC nodes, operational consistency via declarative configuration, and native observability through Prometheus and Grafana integration. + +### Node Types Supported + +| Type | Purpose | Replicas | Storage | Network Exposure | +|------|---------|----------|---------|-----------------| +| Masternode | Block production and validation | 1 | 2 TB+ SSD | Public IP required | +| Standby Node | Failover candidate | 1-3 | 2 TB+ SSD | Public IP required | +| Full Node | RPC serving and sync | 1-N | 2 TB+ SSD | Internal only | +| Archive Node | Historical state queries | 1 | 4 TB+ SSD | Internal only | + +Masternodes and standby nodes require a static public IP address for P2P communication on port 30303. Full nodes and archive nodes can operate with internal cluster IPs. + +--- + +## Prerequisites + +### Cluster Requirements + +- Kubernetes 1.28 or later +- Container runtime: containerd 1.7+ or Docker 24+ +- CNI plugin: Calico, Cilium, or AWS VPC CNI +- CSI driver for block storage: EBS, GCE PD, or Azure Disk +- Metrics Server installed and running + +### Tools + +```bash +kubectl version --client +helm version +``` + +### XDC-Specific Requirements + +- Coinbase address for masternode rewards +- 10 million XDC staked for mainnet masternodes +- KYC completion for mainnet validator participation +- Static public IP or LoadBalancer for P2P port 30303 + +--- + +## Architecture + +### Deployment Topology + +``` +Kubernetes Cluster + xdc-network Namespace + Masternode Pod (xdc-masternode-0) + Persistent Volume Claim: xdc-data-xdc-masternode-0 (2TB) + Container: xinfin-node + Ports: 30303 (P2P), 8545 (RPC), 8888 (WS), 6060 (metrics) + + Standby Pod (xdc-standby-0) + Persistent Volume Claim: xdc-data-xdc-standby-0 (2TB) + Container: xinfin-node + Ports: 30303 (P2P), 8545 (RPC), 8888 (WS), 6060 (metrics) + + Full Node Pods (xdc-fullnode-0, xdc-fullnode-1, xdc-fullnode-2) + Persistent Volume Claims: xdc-data-xdc-fullnode-0/1/2 (2TB each) + Containers: xinfin-node + Ports: 30303 (P2P), 8545 (RPC), 8888 (WS), 6060 (metrics) + + Services: + xdc-masternode (Headless, ClusterIP None) + xdc-masternode-p2p (LoadBalancer, port 30303) + xdc-rpc-internal (ClusterIP, port 8545) + + ConfigMaps: + xdc-config (network, sync mode, RPC settings) + + Secrets: + xdc-keys (keystore, coinbase address) +``` + +### Network Flow + +- P2P: Port 30303 TCP and UDP for peer discovery and block sync +- RPC: Port 8545 for JSON-RPC API, internal only, exposed via Ingress +- WebSocket: Port 8888 for WebSocket subscriptions +- Metrics: Port 6060 for Prometheus scraping + +--- + +## Resource Requirements + +### Minimum per Node + +| Resource | Masternode | Standby | Full Node | Archive | +|----------|-----------|---------|-----------|---------| +| CPU | 8 cores | 8 cores | 4 cores | 16 cores | +| Memory | 32 GB | 32 GB | 16 GB | 64 GB | +| Storage | 2 TB SSD | 2 TB SSD | 2 TB SSD | 4 TB SSD | +| IOPS | 10,000 | 10,000 | 5,000 | 15,000 | +| Network | 1 Gbps | 1 Gbps | 500 Mbps | 1 Gbps | + +### Recommended per Node + +| Resource | Masternode | Standby | Full Node | Archive | +|----------|-----------|---------|-----------|---------| +| CPU | 16 cores | 16 cores | 8 cores | 32 cores | +| Memory | 64 GB | 64 GB | 32 GB | 128 GB | +| Storage | 4 TB NVMe | 4 TB NVMe | 2 TB NVMe | 8 TB NVMe | +| IOPS | 50,000 | 50,000 | 20,000 | 50,000 | +| Network | 10 Gbps | 10 Gbps | 1 Gbps | 10 Gbps | + +### Resource Quotas + +Apply this ResourceQuota to the xdc-network namespace to prevent resource exhaustion: + +```yaml +apiVersion: v1 +kind: ResourceQuota +metadata: + name: xdc-network-quota + namespace: xdc-network +spec: + hard: + requests.cpu: "64" + requests.memory: 256Gi + limits.cpu: "128" + limits.memory: 512Gi + persistentvolumeclaims: "10" + services.loadbalancers: "2" +``` + +--- + +## Quick Start + +For users who want to deploy immediately, run these commands in order: + +```bash +# 1. Create namespace +kubectl create namespace xdc-network + +# 2. Apply ConfigMap +kubectl apply -f - < 0' + initialDelaySeconds: 300 + periodSeconds: 30 + timeoutSeconds: 10 + failureThreshold: 3 + readinessProbe: + exec: + command: + - /bin/sh + - -c + - | + curl -sf http://localhost:8545 \ + -X POST \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","method":"eth_syncing","params":[],"id":1}' | \ + jq -e '.result == false' + initialDelaySeconds: 60 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 + startupProbe: + exec: + command: + - /bin/sh + - -c + - | + curl -sf http://localhost:8545 \ + -X POST \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","method":"net_listening","params":[],"id":1}' | \ + jq -e '.result == true' + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 30 + volumes: + - name: tmp + emptyDir: {} + volumeClaimTemplates: + - metadata: + name: xdc-data + spec: + storageClassName: xdc-fast-ssd + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 2Ti +``` + +### Full Node + +```yaml +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: xdc-fullnode + namespace: xdc-network + labels: + app.kubernetes.io/name: xdc-node + app.kubernetes.io/component: fullnode +spec: + serviceName: xdc-fullnode + replicas: 3 + podManagementPolicy: Parallel + selector: + matchLabels: + app.kubernetes.io/name: xdc-node + app.kubernetes.io/component: fullnode + template: + metadata: + labels: + app.kubernetes.io/name: xdc-node + app.kubernetes.io/component: fullnode + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "6060" + spec: + serviceAccountName: xdc-node-sa + securityContext: + runAsUser: 1000 + runAsGroup: 1000 + fsGroup: 1000 + runAsNonRoot: true + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app.kubernetes.io/component + operator: In + values: + - fullnode + topologyKey: kubernetes.io/hostname + containers: + - name: xdc-node + image: xinfinorg/xinfin-node:latest + ports: + - name: p2p-tcp + containerPort: 30303 + protocol: TCP + - name: p2p-udp + containerPort: 30303 + protocol: UDP + - name: rpc + containerPort: 8545 + protocol: TCP + - name: ws + containerPort: 8888 + protocol: TCP + - name: metrics + containerPort: 6060 + protocol: TCP + env: + - name: NETWORK + valueFrom: + configMapKeyRef: + name: xdc-config + key: NETWORK + - name: SYNC_MODE + value: "snap" + resources: + requests: + cpu: "4" + memory: "16Gi" + limits: + cpu: "8" + memory: "32Gi" + volumeMounts: + - name: xdc-data + mountPath: /work/xdcchain + livenessProbe: + httpGet: + path: / + port: rpc + initialDelaySeconds: 300 + periodSeconds: 30 + readinessProbe: + httpGet: + path: / + port: rpc + initialDelaySeconds: 60 + periodSeconds: 10 + volumeClaimTemplates: + - metadata: + name: xdc-data + spec: + storageClassName: xdc-fast-ssd + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 2Ti +``` + +--- + +## Service Definitions + +### Headless Service + +```yaml +apiVersion: v1 +kind: Service +metadata: + name: xdc-masternode + namespace: xdc-network +spec: + type: ClusterIP + clusterIP: None + selector: + app.kubernetes.io/name: xdc-node + app.kubernetes.io/component: masternode + ports: + - name: p2p-tcp + port: 30303 + targetPort: 30303 + protocol: TCP + - name: p2p-udp + port: 30303 + targetPort: 30303 + protocol: UDP + - name: rpc + port: 8545 + targetPort: 8545 + - name: ws + port: 8888 + targetPort: 8888 + - name: metrics + port: 6060 + targetPort: 6060 +``` + +### LoadBalancer for P2P + +```yaml +apiVersion: v1 +kind: Service +metadata: + name: xdc-masternode-p2p + namespace: xdc-network + annotations: + service.beta.kubernetes.io/aws-load-balancer-type: "nlb" + service.beta.kubernetes.io/aws-load-balancer-scheme: "internet-facing" +spec: + type: LoadBalancer + selector: + app.kubernetes.io/name: xdc-node + app.kubernetes.io/component: masternode + ports: + - name: p2p-tcp + port: 30303 + targetPort: 30303 + protocol: TCP + - name: p2p-udp + port: 30303 + targetPort: 30303 + protocol: UDP + externalTrafficPolicy: Local +``` + +### Internal RPC + +```yaml +apiVersion: v1 +kind: Service +metadata: + name: xdc-rpc-internal + namespace: xdc-network +spec: + type: ClusterIP + selector: + app.kubernetes.io/component: fullnode + ports: + - name: rpc + port: 8545 + targetPort: 8545 + - name: ws + port: 8888 + targetPort: 8888 +``` + +### Ingress with TLS + +```yaml +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: xdc-rpc-ingress + namespace: xdc-network + annotations: + nginx.ingress.kubernetes.io/ssl-redirect: "true" + nginx.ingress.kubernetes.io/proxy-body-size: "10m" + nginx.ingress.kubernetes.io/proxy-read-timeout: "600" + cert-manager.io/cluster-issuer: "letsencrypt-prod" +spec: + ingressClassName: nginx + tls: + - hosts: + - rpc.xdc-network.example.com + secretName: xdc-rpc-tls + rules: + - host: rpc.xdc-network.example.com + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: xdc-rpc-internal + port: + number: 8545 +``` + +--- + +## Network Policies + +### Default Deny + +```yaml +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: default-deny-all + namespace: xdc-network +spec: + podSelector: {} + policyTypes: + - Ingress + - Egress +``` + +### Allow P2P + +```yaml +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: allow-p2p + namespace: xdc-network +spec: + podSelector: + matchLabels: + app.kubernetes.io/name: xdc-node + policyTypes: + - Ingress + - Egress + ingress: + - from: [] + ports: + - protocol: TCP + port: 30303 + - protocol: UDP + port: 30303 + egress: + - to: [] + ports: + - protocol: TCP + port: 30303 + - protocol: UDP + port: 30303 +``` + +### Allow RPC Internal + +```yaml +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: allow-rpc-internal + namespace: xdc-network +spec: + podSelector: + matchLabels: + app.kubernetes.io/name: xdc-node + policyTypes: + - Ingress + ingress: + - from: + - namespaceSelector: + matchLabels: + name: monitoring + - namespaceSelector: + matchLabels: + name: ingress-nginx + ports: + - protocol: TCP + port: 8545 + - protocol: TCP + port: 8888 + - protocol: TCP + port: 6060 +``` + +### Allow DNS and External + +```yaml +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: allow-egress-dns-sync + namespace: xdc-network +spec: + podSelector: + matchLabels: + app.kubernetes.io/name: xdc-node + policyTypes: + - Egress + egress: + - to: + - namespaceSelector: {} + podSelector: + matchLabels: + k8s-app: kube-dns + ports: + - protocol: UDP + port: 53 + - protocol: TCP + port: 53 + - to: [] + ports: + - protocol: TCP + port: 443 + - protocol: TCP + port: 80 +``` + +--- + +## Pod Disruption Budget + +### Masternode + +```yaml +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: xdc-masternode-pdb + namespace: xdc-network +spec: + minAvailable: 1 + selector: + matchLabels: + app.kubernetes.io/name: xdc-node + app.kubernetes.io/component: masternode +``` + +### Full Nodes + +```yaml +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: xdc-fullnode-pdb + namespace: xdc-network +spec: + minAvailable: 2 + selector: + matchLabels: + app.kubernetes.io/name: xdc-node + app.kubernetes.io/component: fullnode +``` + +--- + +## Monitoring: Prometheus and Grafana + +### ServiceMonitor + +```yaml +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: xdc-node-metrics + namespace: monitoring + labels: + release: prometheus +spec: + namespaceSelector: + matchNames: + - xdc-network + selector: + matchLabels: + app.kubernetes.io/name: xdc-node + endpoints: + - port: metrics + path: /debug/metrics/prometheus + interval: 15s + scrapeTimeout: 10s +``` + +### Prometheus Rules + +```yaml +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: xdc-node-alerts + namespace: monitoring + labels: + release: prometheus +spec: + groups: + - name: xdc-node + rules: + - alert: XDCNodeDown + expr: up{job="xdc-node-metrics"} == 0 + for: 5m + labels: + severity: critical + annotations: + summary: "XDC node {{ $labels.pod }} is down" + description: "XDC node {{ $labels.pod }} in namespace {{ $labels.namespace }} has been down for more than 5 minutes." + + - alert: XDCNodeNotSynced + expr: xdc_eth_syncing{job="xdc-node-metrics"} == 1 + for: 10m + labels: + severity: warning + annotations: + summary: "XDC node {{ $labels.pod }} is not synced" + description: "XDC node has been out of sync for more than 10 minutes." + + - alert: XDCNodeLowPeers + expr: xdc_p2p_peers{job="xdc-node-metrics"} < 5 + for: 15m + labels: + severity: warning + annotations: + summary: "XDC node {{ $labels.pod }} has low peer count" + description: "Peer count is {{ $value }}, expected at least 5." + + - alert: XDCNodeDiskFull + expr: | + ( + kubelet_volume_stats_available_bytes{namespace="xdc-network"} + / + kubelet_volume_stats_capacity_bytes{namespace="xdc-network"} + ) < 0.1 + for: 5m + labels: + severity: critical + annotations: + summary: "XDC node disk is almost full" + description: "Disk usage is above 90%." +``` + +### Grafana Dashboard Panels + +| Panel | Prometheus Query | Alert Threshold | +|-------|-----------------|-----------------| +| Block Height | `xdc_eth_block_number` | None | +| Peer Count | `xdc_p2p_peers` | < 5 warning, < 2 critical | +| Sync Status | `xdc_eth_syncing` | 1 = syncing | +| Disk Usage | `1 - (available / capacity)` | > 80% warning, > 90% critical | +| Memory Usage | `working_set / limit` | > 80% warning, > 90% critical | +| CPU Usage | `rate(container_cpu_usage_seconds_total[5m])` | > 80% warning | + +--- + +## Backup Strategies + +### Volume Snapshots + +```yaml +apiVersion: snapshot.storage.k8s.io/v1 +kind: VolumeSnapshot +metadata: + name: xdc-masternode-backup + namespace: xdc-network +spec: + volumeSnapshotClassName: csi-aws-vsc + source: + persistentVolumeClaimName: xdc-data-xdc-masternode-0 +``` + +### S3 Chain Data Backup + +```yaml +apiVersion: batch/v1 +kind: CronJob +metadata: + name: xdc-chain-backup + namespace: xdc-network +spec: + schedule: "0 2 * * *" + concurrencyPolicy: Forbid + jobTemplate: + spec: + template: + spec: + containers: + - name: backup + image: amazon/aws-cli:latest + command: + - /bin/sh + - -c + - | + aws s3 sync /work/xdcchain/XDC \ + s3://xdc-backups/mainnet/chain-$(date +%Y%m%d)/ \ + --storage-class STANDARD_IA + volumeMounts: + - name: xdc-data + mountPath: /work/xdcchain + readOnly: true + volumes: + - name: xdc-data + persistentVolumeClaim: + claimName: xdc-data-xdc-masternode-0 + restartPolicy: OnFailure +``` + +--- + +## Upgrade Procedures + +### Rolling Upgrade for Full Nodes + +```bash +# Update image +kubectl set image statefulset/xdc-fullnode \ + xdc-node=xinfinorg/xinfin-node:v2.0.0 \ + -n xdc-network + +# Watch rollout +kubectl rollout status statefulset/xdc-fullnode -n xdc-network +``` + +### Masternode Upgrade + +```bash +# Cordon node +kubectl cordon $(kubectl get pod xdc-masternode-0 -n xdc-network -o jsonpath='{.spec.nodeName}') + +# Graceful shutdown +kubectl exec xdc-masternode-0 -n xdc-network -- \ + /work/xdcchain/bin/XDC attach /work/xdcchain/XDC.ipc --exec admin.stopRPC() + +# Wait for shutdown +kubectl wait --for=delete pod/xdc-masternode-0 -n xdc-network --timeout=300s + +# Update image +kubectl set image statefulset/xdc-masternode \ + xdc-node=xinfinorg/xinfin-node:v2.0.0 -n xdc-network + +# Verify +kubectl rollout status statefulset/xdc-masternode -n xdc-network + +# Uncordon +kubectl uncordon $(kubectl get pod xdc-masternode-0 -n xdc-network -o jsonpath='{.spec.nodeName}') +``` + +### Rollback + +```bash +kubectl rollout undo statefulset/xdc-masternode -n xdc-network +``` + +--- + +## Cloud Provider Examples + +### AWS EKS + +```bash +# Create cluster +eksctl create cluster \ + --name xdc-network \ + --region us-east-1 \ + --node-type m6i.2xlarge \ + --nodes 3 \ + --nodes-min 3 \ + --nodes-max 6 \ + --managed + +# Add dedicated node group +eksctl create nodegroup \ + --cluster xdc-network \ + --name xdc-nodes \ + --node-type m6i.4xlarge \ + --nodes 3 \ + --node-labels "node-role.kubernetes.io/xdc=true" \ + --node-taints "dedicated=xdc-nodes:NoSchedule" + +# Install EBS CSI driver +eksctl create addon --name aws-ebs-csi-driver --cluster xdc-network +``` + +### Google GKE + +```bash +gcloud container clusters create xdc-network \ + --zone us-central1-a \ + --num-nodes 3 \ + --machine-type n2-standard-8 + +gcloud container node-pools create xdc-nodes \ + --cluster xdc-network \ + --machine-type n2-standard-16 \ + --num-nodes 3 \ + --node-labels node-role.kubernetes.io/xdc=true \ + --node-taints dedicated=xdc-nodes:NoSchedule +``` + +### Azure AKS + +```bash +az aks create \ + --resource-group xdc-network-rg \ + --name xdc-network \ + --node-count 3 \ + --node-vm-size Standard_D8s_v5 + +az aks nodepool add \ + --cluster-name xdc-network \ + --resource-group xdc-network-rg \ + --name xdcnodes \ + --node-count 3 \ + --node-vm-size Standard_D16s_v5 \ + --labels node-role.kubernetes.io/xdc=true \ + --node-taints dedicated=xdc-nodes:NoSchedule +``` + +--- + +## Troubleshooting + +### Pod Stuck in Pending + +```bash +kubectl describe pod xdc-masternode-0 -n xdc-network +kubectl top nodes +kubectl get pvc -n xdc-network +kubectl get nodes --show-labels +``` + +Common causes: insufficient CPU or memory, PVC not bound, node affinity requirements not met. + +### Node Not Syncing + +```bash +# Check sync status +kubectl exec xdc-masternode-0 -n xdc-network -- \ + curl -s http://localhost:8545 -X POST \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","method":"eth_syncing","params":[],"id":1}' + +# Check peer count +kubectl exec xdc-masternode-0 -n xdc-network -- \ + curl -s http://localhost:8545 -X POST \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","method":"net_peerCount","params":[],"id":1}' + +# Check logs +kubectl logs xdc-masternode-0 -n xdc-network --tail=100 +``` + +Common fixes: verify NetworkPolicy allows P2P, confirm LoadBalancer has public IP, check firewall rules for port 30303. + +### Disk Full + +```bash +kubectl exec xdc-masternode-0 -n xdc-network -- df -h /work/xdcchain + +# Expand PVC +kubectl patch pvc xdc-data-xdc-masternode-0 -n xdc-network \ + -p '{"spec":{"resources":{"requests":{"storage":"4Ti"}}}}' +``` + +### High Memory Usage + +```bash +kubectl top pod xdc-masternode-0 -n xdc-network + +# Reduce cache in ConfigMap +kubectl edit configmap xdc-config -n xdc-network +# Change CACHE from 4096 to 2048 + +# Restart to apply +kubectl rollout restart statefulset/xdc-masternode -n xdc-network +``` + +--- + +## Security Checklist + +| Check | Status | Implementation | +|-------|--------|----------------| +| Non-root container | Required | runAsUser: 1000 | +| Read-only root filesystem | Required | readOnlyRootFilesystem: true | +| Drop all capabilities | Required | capabilities: drop: [ALL] | +| No privilege escalation | Required | allowPrivilegeEscalation: false | +| Seccomp profile | Required | RuntimeDefault | +| Encrypted storage | Required | encrypted: true in StorageClass | +| Network policies | Required | Default deny + explicit allow | +| Secrets encryption | Required | KMS or external secrets manager | +| RBAC least privilege | Required | Role scoped to namespace | +| Resource limits | Required | Prevents DoS | +| Image scanning | Required | Trivy or Grype in CI/CD | +| Pod security standards | Required | Enforce restricted | +| Audit logging | Recommended | Enable Kubernetes audit | +| Runtime detection | Recommended | Falco for anomaly detection | + +--- + +## Related Topics + +- [Docker Setup](/xdcchain/developers/node_operators/docker): Single-node Docker deployment +- [Node Architecture](/xdcchain/developers/node_operators/node_architecture): XDC node internals +- [Validator Handbook](/xdcchain/developers/node_operators/validator-handbook): Validator operations +- [Helm Charts](../helm/index.md): Helm chart deployment +- [Backup and Recovery](../backup/index.md): Comprehensive backup strategies +- [Incident Response](../runbooks/index.md): Troubleshooting runbooks +- [Infrastructure as Code](../iac/index.md): Terraform and Pulumi examples +- [Monitoring and Observability](../monitoring/index.md): Prometheus and Grafana setup diff --git a/website/docs/xdcchain/devops/monitoring/index.md b/website/docs/xdcchain/devops/monitoring/index.md new file mode 100644 index 00000000..e77c0b56 --- /dev/null +++ b/website/docs/xdcchain/devops/monitoring/index.md @@ -0,0 +1,576 @@ +--- +title: "Node Monitoring and Observability +description: Prometheus, Grafana, and alerting setup for XDC nodes with metric definitions, dashboard templates, and incident response integration." +--- + +# Node Monitoring and Observability + +This guide covers monitoring and observability for XDC Network nodes using Prometheus, Grafana, and alerting systems. Proper monitoring detects issues before they cause downtime or slashing. + +## Table of Contents + +1. [Overview](#overview) +2. [Prometheus Setup](#prometheus-setup) +3. [Key Metrics](#key-metrics) +4. [Grafana Dashboards](#grafana-dashboards) +5. [Alert Rules](#alert-rules) +6. [Log Aggregation](#log-aggregation) +7. [Uptime Monitoring](#uptime-monitoring) +8. [Mobile Alerting](#mobile-alerting) +9. [Runbook Integration](#runbook-integration) + +--- + +## Overview + +### Why Monitor XDC Nodes + +- **Prevent Slashing**: Detect missed blocks before penalties +- **Minimize Downtime**: Proactive alerts for disk, memory, sync issues +- **Performance Optimization**: Identify bottlenecks in block processing +- **Capacity Planning**: Track growth trends for storage and traffic + +### Monitoring Stack + +``` +XDC Node (port 6060) + -> Prometheus (scrapes metrics) + -> Grafana (visualizes dashboards) + -> Alertmanager (routes alerts) + -> PagerDuty / Slack / Email +``` + +--- + +## Prometheus Setup + +### Installation + +```bash +# Download Prometheus +wget https://github.com/prometheus/prometheus/releases/download/v2.47.0/prometheus-2.47.0.linux-amd64.tar.gz +tar xvfz prometheus-2.47.0.linux-amd64.tar.gz +cd prometheus-2.47.0.linux-amd64 +``` + +### Configuration + +```yaml +# prometheus.yml +global: + scrape_interval: 15s + evaluation_interval: 15s + external_labels: + cluster: xdc-mainnet + replica: '{{.ExternalURL}}' + +scrape_configs: + - job_name: 'xdc-nodes' + static_configs: + - targets: ['localhost:6060'] + labels: + node_type: 'masternode' + region: 'us-east-1' + - targets: ['fullnode-1:6060', 'fullnode-2:6060'] + labels: + node_type: 'fullnode' + region: 'us-east-1' + metrics_path: /debug/metrics/prometheus + scrape_timeout: 10s + + - job_name: 'node-exporter' + static_configs: + - targets: ['localhost:9100'] + + - job_name: 'docker' + static_configs: + - targets: ['localhost:9323'] + +alerting: + alertmanagers: + - static_configs: + - targets: ['localhost:9093'] + +rule_files: + - /etc/prometheus/rules/*.yml +``` + +### Retention + +```bash +# Keep 30 days of metrics +./prometheus \ + --config.file=prometheus.yml \ + --storage.tsdb.retention.time=30d \ + --storage.tsdb.retention.size=50GB +``` + +### Kubernetes ServiceMonitor + +```yaml +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: xdc-node-metrics + namespace: monitoring + labels: + release: prometheus +spec: + namespaceSelector: + matchNames: + - xdc-network + selector: + matchLabels: + app.kubernetes.io/name: xdc-node + endpoints: + - port: metrics + path: /debug/metrics/prometheus + interval: 15s + scrapeTimeout: 10s +``` + +--- + +## Key Metrics + +### Blockchain Metrics + +| Metric | Name | Type | Description | +|--------|------|------|-------------| +| Block Height | `eth_block_number` | Gauge | Current block height | +| Sync Status | `eth_syncing` | Gauge | 1 if syncing, 0 if synced | +| Peer Count | `p2p_peers` | Gauge | Number of connected peers | +| Pending Transactions | `txpool_pending` | Gauge | Transactions in mempool | +| Gas Price | `eth_gas_price` | Gauge | Current gas price in wei | + +### System Metrics + +| Metric | Name | Type | Critical Threshold | +|--------|------|------|-------------------| +| CPU Usage | `process_cpu_seconds_total` | Counter | > 80% for 10m | +| Memory Usage | `process_resident_memory_bytes` | Gauge | > 90% | +| Disk Usage | `node_filesystem_avail_bytes` | Gauge | < 10% free | +| Goroutines | `go_goroutines` | Gauge | > 10000 | +| Open Files | `process_open_fds` | Gauge | > 80% of limit | + +### Validator Metrics + +| Metric | Name | Type | Alert Condition | +|--------|------|------|----------------| +| Missed Blocks | `validator_missed_blocks` | Counter | Increase in 1h | +| Block Time | `block_timestamp` | Gauge | > 5s average | +| Epoch Participation | `validator_participation` | Gauge | < 90% | + +--- + +## Grafana Dashboards + +### Node Health Dashboard + +**Panels:** + +1. **Block Height** + - Query: `eth_block_number` + - Visualization: Stat + Graph + - Alert: No increase for 5 minutes + +2. **Sync Status** + - Query: `eth_syncing` + - Visualization: Stat (0 = green, 1 = yellow) + +3. **Peer Count** + - Query: `p2p_peers` + - Visualization: Gauge + Graph + - Thresholds: < 5 yellow, < 2 red + +4. **Disk Usage** + - Query: `1 - (node_filesystem_avail_bytes / node_filesystem_size_bytes)` + - Visualization: Gauge + - Thresholds: > 80% yellow, > 90% red + +5. **Memory Usage** + - Query: `process_resident_memory_bytes / 1024 / 1024 / 1024` + - Visualization: Graph + - Unit: GB + +6. **CPU Usage** + - Query: `rate(process_cpu_seconds_total[5m]) * 100` + - Visualization: Graph + - Unit: Percent + +### Network Dashboard + +**Panels:** + +1. **Network Block Height** + - Query: `max(eth_block_number)` across all nodes + - Shows consensus on latest block + +2. **Peer Distribution** + - Query: `p2p_peers` by `instance` + - Bar gauge showing peers per node + +3. **Transaction Pool** + - Query: `txpool_pending` + - Shows mempool backlog + +4. **Block Time** + - Query: `rate(eth_block_number[5m]) * 60` + - Average blocks per minute + +### Validator Dashboard + +**Panels:** + +1. **Validator Status** + - Query: `validator_status` + - Stat: Active / Inactive / Jailed + +2. **Blocks Produced** + - Query: `increase(validator_blocks_produced[1h])` + - Counter for block production + +3. **Rewards Earned** + - Query: `validator_rewards_total` + - Cumulative rewards + +4. **Slashing Risk** + - Query: `validator_missed_blocks_in_window` + - Gauge showing missed blocks in current window + +### Dashboard JSON (Excerpt) + +```json +{ + "dashboard": { + "title": "XDC Node Health", + "panels": [ + { + "id": 1, + "title": "Block Height", + "type": "stat", + "targets": [{ + "expr": "eth_block_number{job=\"xdc-nodes\"}", + "legendFormat": "{{instance}}" + }], + "fieldConfig": { + "defaults": { + "unit": "none", + "thresholds": { + "steps": [ + {"color": "green", "value": null} + ] + } + } + } + }, + { + "id": 2, + "title": "Peer Count", + "type": "gauge", + "targets": [{ + "expr": "p2p_peers{job=\"xdc-nodes\"}", + "legendFormat": "{{instance}}" + }], + "fieldConfig": { + "defaults": { + "min": 0, + "max": 50, + "thresholds": { + "steps": [ + {"color": "red", "value": 0}, + {"color": "yellow", "value": 5}, + {"color": "green", "value": 10} + ] + } + } + } + } + ] + } +} +``` + +--- + +## Alert Rules + +### Critical Alerts (P1) + +```yaml +# rules/critical.yml +groups: + - name: xdc-critical + rules: + - alert: XDCNodeDown + expr: up{job="xdc-nodes"} == 0 + for: 5m + labels: + severity: critical + team: devops + annotations: + summary: "XDC node {{ $labels.instance }} is down" + description: "Node has been down for more than 5 minutes." + runbook_url: "https://docs.xdc.network/devops/runbooks/#node-not-syncing" + + - alert: XDCNodeDiskFull + expr: | + ( + node_filesystem_avail_bytes{job="node-exporter",mountpoint="/opt/xdc"} + / + node_filesystem_size_bytes{job="node-exporter",mountpoint="/opt/xdc"} + ) < 0.1 + for: 5m + labels: + severity: critical + annotations: + summary: "Disk almost full on {{ $labels.instance }}" + description: "Disk usage is above 90%." + + - alert: XDCValidatorMissingBlocks + expr: increase(validator_missed_blocks[1h]) > 5 + for: 5m + labels: + severity: critical + annotations: + summary: "Validator missing blocks" + description: "Validator has missed {{ $value }} blocks in the last hour." +``` + +### Warning Alerts (P2) + +```yaml +# rules/warning.yml +groups: + - name: xdc-warning + rules: + - alert: XDCNodeSyncingSlow + expr: | + ( + eth_syncing{job="xdc-nodes"} == 1 + ) + and + ( + rate(eth_block_number[5m]) < 0.1 + ) + for: 10m + labels: + severity: warning + annotations: + summary: "Node syncing slowly" + description: "Block processing rate is below threshold." + + - alert: XDCNodeLowPeers + expr: p2p_peers{job="xdc-nodes"} < 5 + for: 15m + labels: + severity: warning + annotations: + summary: "Low peer count on {{ $labels.instance }}" + description: "Only {{ $value }} peers connected." + + - alert: XDCNodeHighMemory + expr: | + ( + process_resident_memory_bytes{job="xdc-nodes"} + / + node_memory_MemTotal_bytes{job="node-exporter"} + ) > 0.9 + for: 10m + labels: + severity: warning + annotations: + summary: "High memory usage on {{ $labels.instance }}" +``` + +### Alertmanager Configuration + +```yaml +# alertmanager.yml +global: + smtp_smarthost: 'smtp.gmail.com:587' + smtp_from: 'alerts@xdc.network' + smtp_auth_username: 'alerts@xdc.network' + smtp_auth_password: '${SMTP_PASSWORD}' + +route: + group_by: ['alertname', 'severity'] + group_wait: 30s + group_interval: 5m + repeat_interval: 4h + receiver: 'default' + routes: + - match: + severity: critical + receiver: 'pagerduty-critical' + continue: true + - match: + severity: warning + receiver: 'slack-warnings' + +receivers: + - name: 'default' + email_configs: + - to: 'devops@xdc.network' + + - name: 'pagerduty-critical' + pagerduty_configs: + - service_key: '${PAGERDUTY_KEY}' + severity: critical + + - name: 'slack-warnings' + slack_configs: + - api_url: '${SLACK_WEBHOOK_URL}' + channel: '#xdc-alerts' + title: '{{ .GroupLabels.alertname }}' + text: '{{ range .Alerts }}{{ .Annotations.summary }}{{ end }}' +``` + +--- + +## Log Aggregation + +### Promtail + Loki Setup + +```yaml +# promtail-config.yml +server: + http_listen_port: 9080 + grpc_listen_port: 0 + +positions: + filename: /tmp/positions.yaml + +clients: + - url: http://loki:3100/loki/api/v1/push + +scrape_configs: + - job_name: xdc-logs + static_configs: + - targets: + - localhost + labels: + job: xdc-node + __path__: /opt/xdc/xdcchain/logs/*.log +``` + +### Log Queries in Grafana + +``` +# Find errors +{job="xdc-node"} |= "ERROR" + +# Find sync issues +{job="xdc-node"} |= "sync" |~ "failed|stuck|timeout" + +# Find peer connection issues +{job="xdc-node"} |= "peer" |~ "disconnect|reject|dial" + +# Find block production logs (validators) +{job="xdc-node"} |= "Commit new mining work" +``` + +--- + +## Uptime Monitoring + +### External Monitoring + +```bash +# Simple uptime check with curl +curl -sf http://your-node:8545 \ + -X POST \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","method":"eth_blockNumber","params":[],"id":1}' +``` + +### UptimeRobot / Pingdom + +Configure HTTP checks: +- URL: `http://your-node:8545` +- Method: POST +- Body: `{"jsonrpc":"2.0","method":"net_listening","params":[],"id":1}` +- Expected response: Contains `"result":true` +- Check interval: 1 minute + +### Blackbox Exporter + +```yaml +# blackbox.yml +modules: + xdc_rpc: + prober: http + timeout: 5s + http: + method: POST + headers: + Content-Type: application/json + body: '{"jsonrpc":"2.0","method":"net_listening","params":[],"id":1}' + fail_if_body_not_matches_regexp: + - '"result":true' +``` + +--- + +## Mobile Alerting + +### PagerDuty Mobile App + +1. Install PagerDuty app +2. Configure notification rules +3. Set up escalation policies + +### Slack Mobile Notifications + +1. Enable mobile push for #xdc-alerts +2. Configure Do Not Disturb exceptions +3. Use @mentions for critical alerts + +### SMS via Twilio + +```yaml +# alertmanager.yml addition +receivers: + - name: 'sms-critical' + webhook_configs: + - url: 'http://twilio-webhook:5000/sms' + send_resolved: false +``` + +--- + +## Runbook Integration + +### Alert Annotations + +Every alert should include: + +```yaml +annotations: + summary: "Brief description" + description: "Detailed explanation" + runbook_url: "https://docs.xdc.network/devops/runbooks/#specific-runbook" + dashboard_url: "https://grafana.xdc.network/d/xdc-node-health" +``` + +### Grafana Alert Links + +Configure Grafana to link alerts to runbooks: + +```ini +# grafana.ini +[unified_alerting] +enabled = true + +[annotations] +runbook_url = https://docs.xdc.network/devops/runbooks +``` + +--- + +## Related Topics + +- [Kubernetes Deployment](../kubernetes/index.md): Containerized deployment +- [Incident Response](../runbooks/index.md): Troubleshooting runbooks +- [Backup and Recovery](../backup/index.md): Backup strategies +- [Infrastructure as Code](../iac/index.md): Terraform and Pulumi