photoncloud-monorepo/baremetal/vm-cluster/validate-cluster.sh
centra 5c6eb04a46 T036: Add VM cluster deployment configs for nixos-anywhere
- netboot-base.nix with SSH key auth
- Launch scripts for node01/02/03
- Node configuration.nix and disko.nix
- Nix modules for first-boot automation

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-11 09:59:19 +09:00

89 lines
2.8 KiB
Bash
Executable file

#!/usr/bin/env bash
# T036 Cluster Validation Script
# Validates cluster health and Raft formation per S6 acceptance criteria
set -euo pipefail
echo "=== T036 Cluster Validation ==="
echo ""
# Wait for services to be ready
echo "Waiting for cluster services to start (60 seconds)..."
sleep 60
echo ""
echo "=== S6.1: PXE Server Validation ==="
echo ""
echo "Checking DHCP service..."
ssh root@192.168.100.1 'systemctl status dnsmasq || true'
echo ""
echo "Checking DHCP leases..."
ssh root@192.168.100.1 'cat /var/lib/dnsmasq/dnsmasq.leases || echo "No leases yet"'
echo ""
echo "=== S6.2: Chainfire Cluster Validation ==="
echo ""
echo "Checking Chainfire cluster members on node01..."
curl -k https://192.168.100.11:2379/admin/cluster/members | jq . || echo "Chainfire API not ready"
echo ""
echo "Expected: 3 members (node01, node02, node03), one leader elected"
echo ""
echo "=== S6.3: FlareDB Cluster Validation ==="
echo ""
echo "Checking FlareDB cluster members on node01..."
curl -k https://192.168.100.11:2479/admin/cluster/members | jq . || echo "FlareDB API not ready"
echo ""
echo "=== S6.4: CRUD Operations Test ==="
echo ""
echo "Writing test key to FlareDB..."
curl -k -X PUT https://192.168.100.11:2479/api/v1/kv/test-key \
-H "Content-Type: application/json" \
-d '{"value": "hello-t036-cluster"}' || echo "Write failed"
echo ""
echo "Reading test key from node01..."
curl -k https://192.168.100.11:2479/api/v1/kv/test-key || echo "Read failed"
echo ""
echo "Reading test key from node02 (verify replication)..."
curl -k https://192.168.100.12:2479/api/v1/kv/test-key || echo "Read failed"
echo ""
echo "Reading test key from node03 (verify replication)..."
curl -k https://192.168.100.13:2479/api/v1/kv/test-key || echo "Read failed"
echo ""
echo "=== S6.5: IAM Service Validation ==="
echo ""
for node in 192.168.100.11 192.168.100.12 192.168.100.13; do
echo "Checking IAM health on $node..."
curl -k https://$node:8080/health || echo "IAM not ready on $node"
echo ""
done
echo ""
echo "=== S6.6: Health Checks ==="
echo ""
for node in 192.168.100.11 192.168.100.12 192.168.100.13; do
echo "Node: $node"
echo " Chainfire: $(curl -sk https://$node:2379/health || echo 'N/A')"
echo " FlareDB: $(curl -sk https://$node:2479/health || echo 'N/A')"
echo " IAM: $(curl -sk https://$node:8080/health || echo 'N/A')"
echo ""
done
echo ""
echo "=== Validation Complete ==="
echo ""
echo "Review the output above and verify:"
echo " ✓ Chainfire cluster: 3 members, leader elected"
echo " ✓ FlareDB cluster: 3 members, quorum formed"
echo " ✓ CRUD operations: write/read working, data replicated to all nodes"
echo " ✓ IAM service: operational on all 3 nodes"
echo " ✓ Health checks: all services responding"
echo ""
echo "If all checks pass, T036 acceptance criteria are met."