#!/usr/bin/env bash set -euo pipefail ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" if [[ -z "${PHOTONCLOUD_E2E_IN_NIX:-}" ]]; then exec nix develop "$ROOT" -c env PHOTONCLOUD_E2E_IN_NIX=1 bash "$0" "$@" fi tmp_dir="$(mktemp -d)" cf_pid="" deployer_pid="" cleanup() { set +e if [[ -n "$deployer_pid" ]]; then kill "$deployer_pid" 2>/dev/null || true wait "$deployer_pid" 2>/dev/null || true fi if [[ -n "$cf_pid" ]]; then kill "$cf_pid" 2>/dev/null || true wait "$cf_pid" 2>/dev/null || true fi rm -rf "$tmp_dir" } trap cleanup EXIT free_port() { python3 - <<'PY' import socket s = socket.socket() s.bind(("127.0.0.1", 0)) print(s.getsockname()[1]) s.close() PY } wait_for_port() { local host="$1" local port="$2" local timeout_secs="${3:-60}" local deadline=$((SECONDS + timeout_secs)) while (( SECONDS < deadline )); do if python3 - "$host" "$port" <<'PY' import socket import sys host = sys.argv[1] port = int(sys.argv[2]) with socket.socket() as sock: sock.settimeout(0.5) try: sock.connect((host, port)) except OSError: raise SystemExit(1) raise SystemExit(0) PY then return 0 fi sleep 1 done echo "timed out waiting for ${host}:${port}" >&2 return 1 } wait_for_http() { local url="$1" local timeout_secs="${2:-60}" local deadline=$((SECONDS + timeout_secs)) while (( SECONDS < deadline )); do if python3 - "$url" <<'PY' import sys import urllib.request try: with urllib.request.urlopen(sys.argv[1], timeout=2): pass except Exception: raise SystemExit(1) raise SystemExit(0) PY then return 0 fi sleep 1 done echo "timed out waiting for $url" >&2 return 1 } api_port="$(free_port)" http_port="$(free_port)" raft_port="$(free_port)" gossip_port="$(free_port)" deployer_port="$(free_port)" bootstrap_token="bootstrap-secret" printf 'bundle-bytes' >"$tmp_dir/flake-bundle.tar.gz" cat >"$tmp_dir/chainfire.toml" <"$tmp_dir/chainfire.log" 2>&1 & cf_pid="$!" wait_for_port "127.0.0.1" "$api_port" 120 cat >"$tmp_dir/deployer.toml" <"$tmp_dir/deployer.log" 2>&1 & deployer_pid="$!" wait_for_http "http://127.0.0.1:${deployer_port}/health" 120 cat >"$tmp_dir/cluster.yaml" <<'EOF' cluster: cluster_id: test-cluster environment: dev node_classes: - name: general-worker nix_profile: profiles/worker-linux install_plan: nixos_configuration: worker-golden disko_config_path: profiles/worker-linux/disko.nix target_disk_by_id: /dev/disk/by-id/worker-default roles: - worker labels: tier: general - name: edge-metal nix_profile: profiles/edge-metal install_plan: nixos_configuration: edge-metal disko_config_path: profiles/edge-metal/disko.nix target_disk_by_id: /dev/disk/by-id/edge-default roles: - edge labels: tier: edge pools: - name: general node_class: general-worker labels: env: dev - name: edge node_class: edge-metal labels: env: dev lane: edge nodes: - node_id: node-seeded machine_id: known-machine-01 hostname: node-seeded ip: 10.0.0.11 pool: general failure_domain: rack-a install_plan: nixos_configuration: node01 disko_config_path: nix/nodes/vm-cluster/node01/disko.nix target_disk: /dev/vda desired_system: flake_ref: "github:centra/cloud" health_check_command: - systemctl - is-system-running - "--wait" rollback_on_failure: true state: pending enrollment_rules: - name: edge-metal-auto priority: 100 match_labels: rack: edge sku: metal pool: edge labels: managed-by: deployer services: - prismnet ssh_authorized_keys: - ssh-ed25519 AAAATEST edge@test node_id_prefix: edge EOF chainfire_endpoint="http://127.0.0.1:${api_port}" deployer_endpoint="http://127.0.0.1:${deployer_port}" run_deployer_ctl() { cargo run --quiet --manifest-path "$ROOT/deployer/Cargo.toml" -p deployer-ctl -- \ --chainfire-endpoint "$chainfire_endpoint" \ --cluster-id test-cluster \ --cluster-namespace photoncloud \ --deployer-namespace deployer \ "$@" } echo "Applying declarative cluster/bootstrap config" run_deployer_ctl apply --config "$tmp_dir/cluster.yaml" --prune echo "Validating seeded machine bootstrap mapping" python3 - "$deployer_endpoint" "$bootstrap_token" <<'PY' import json import sys import urllib.request endpoint, token = sys.argv[1], sys.argv[2] request = urllib.request.Request( endpoint + "/api/v1/phone-home", data=json.dumps({"machine_id": "known-machine-01", "ip": "10.0.0.11"}).encode(), headers={ "Content-Type": "application/json", "X-Deployer-Token": token, }, ) with urllib.request.urlopen(request, timeout=5) as response: payload = json.loads(response.read().decode("utf-8")) assert payload["success"] is True assert payload["node_id"] == "node-seeded" assert payload["node_config"]["pool"] == "general" assert payload["node_config"]["node_class"] == "general-worker" assert payload["node_config"]["nix_profile"] == "profiles/worker-linux" assert payload["node_config"]["install_plan"]["nixos_configuration"] == "node01" assert payload["node_config"]["install_plan"]["disko_config_path"] == "nix/nodes/vm-cluster/node01/disko.nix" assert payload["node_config"]["install_plan"]["target_disk"] == "/dev/vda" assert payload["node_config"]["failure_domain"] == "rack-a" print("Seeded mapping validated") PY echo "Validating cloud-init metadata endpoints" python3 - "$deployer_endpoint" "$bootstrap_token" <<'PY' import sys import urllib.request endpoint, token = sys.argv[1], sys.argv[2] for path, expected in ( ("/api/v1/cloud-init/known-machine-01/meta-data", "instance-id: node-seeded"), ("/api/v1/cloud-init/known-machine-01/user-data", "#cloud-config"), ): request = urllib.request.Request( endpoint + path, headers={"X-Deployer-Token": token}, ) with urllib.request.urlopen(request, timeout=5) as response: payload = response.read().decode("utf-8") assert expected in payload if path.endswith("user-data"): assert "/etc/plasmacloud/node-config.json" in payload assert "profiles/worker-linux" in payload assert "\"nixos_configuration\": \"node01\"" in payload print("cloud-init endpoints validated") PY echo "Validating bootstrap flake bundle endpoint" python3 - "$deployer_endpoint" "$bootstrap_token" <<'PY' import sys import urllib.request endpoint, token = sys.argv[1], sys.argv[2] request = urllib.request.Request( endpoint + "/api/v1/bootstrap/flake-bundle", headers={"X-Deployer-Token": token}, ) with urllib.request.urlopen(request, timeout=5) as response: payload = response.read() assert payload == b"bundle-bytes" print("bootstrap flake bundle endpoint validated") PY echo "Validating enrollment-rule bootstrap path" dynamic_node_id="$( python3 - "$deployer_endpoint" "$bootstrap_token" <<'PY' import json import sys import urllib.request endpoint, token = sys.argv[1], sys.argv[2] request = urllib.request.Request( endpoint + "/api/v1/phone-home", data=json.dumps( { "machine_id": "dynamic-metal-01", "ip": "10.0.9.25", "metadata": { "rack": "edge", "sku": "metal", "topology.kubernetes.io/zone": "rack-z", }, "hardware_facts": { "architecture": "x86_64", "cpu_model": "Example CPU", "cpu_threads": 32, "cpu_cores": 16, "memory_bytes": 137438953472, "disks": [ { "name": "nvme0n1", "path": "/dev/nvme0n1", "by_id": "/dev/disk/by-id/nvme-dynamic-metal-01", "size_bytes": 2000398934016, "model": "Example NVMe", "serial": "disk-serial-01", "rotational": False } ], "nics": [ { "name": "eno1", "mac_address": "52:54:00:aa:bb:cc", "oper_state": "up" } ], "dmi": { "vendor": "ExampleVendor", "product_name": "ExampleMetal", "serial_number": "dynamic-metal-serial" } }, } ).encode(), headers={ "Content-Type": "application/json", "X-Deployer-Token": token, }, ) with urllib.request.urlopen(request, timeout=5) as response: payload = json.loads(response.read().decode("utf-8")) assert payload["success"] is True assert payload["node_id"].startswith("edge-") assert payload["node_config"]["role"] == "edge" assert payload["node_config"]["pool"] == "edge" assert payload["node_config"]["node_class"] == "edge-metal" assert payload["node_config"]["nix_profile"] == "profiles/edge-metal" assert payload["node_config"]["install_plan"]["nixos_configuration"] == "edge-metal" assert payload["node_config"]["install_plan"]["disko_config_path"] == "profiles/edge-metal/disko.nix" assert payload["node_config"]["install_plan"]["target_disk_by_id"] == "/dev/disk/by-id/edge-default" assert "prismnet" in payload["node_config"]["services"] assert payload["node_config"]["labels"]["managed-by"] == "deployer" print(payload["node_id"]) PY )" echo "Inspecting stored cluster node records" run_deployer_ctl dump --prefix "photoncloud/clusters/test-cluster/nodes/" >"$tmp_dir/nodes.dump" python3 - "$tmp_dir/nodes.dump" "$dynamic_node_id" <<'PY' import json import sys path = sys.argv[1] dynamic_id = sys.argv[2] records = {} with open(path, "r", encoding="utf-8") as handle: for line in handle: line = line.strip() if " key=" not in line or " value=" not in line: continue key = line.split(" key=", 1)[1].split(" value=", 1)[0] if key.endswith("/desired-system"): continue value = line.split(" value=", 1)[1] record = json.loads(value) records[record["node_id"]] = record seeded = records.get("node-seeded") dynamic = records.get(dynamic_id) if seeded is None: raise SystemExit("missing seeded node record") if dynamic is None: raise SystemExit("missing dynamic node record") if seeded.get("pool") != "general" or seeded.get("node_class") != "general-worker": raise SystemExit(f"unexpected seeded node record: {seeded}") if dynamic.get("pool") != "edge" or dynamic.get("node_class") != "edge-metal": raise SystemExit(f"unexpected dynamic node record: {dynamic}") if dynamic.get("failure_domain") != "rack-z": raise SystemExit(f"unexpected dynamic failure domain: {dynamic}") if dynamic.get("labels", {}).get("lane") != "edge": raise SystemExit(f"missing pool label propagation: {dynamic}") if seeded.get("install_plan", {}).get("target_disk") != "/dev/vda": raise SystemExit(f"missing seeded target disk: {seeded}") if dynamic.get("install_plan", {}).get("target_disk_by_id") != "/dev/disk/by-id/edge-default": raise SystemExit(f"missing dynamic target disk by-id: {dynamic}") facts = dynamic.get("hardware_facts") or {} if facts.get("architecture") != "x86_64": raise SystemExit(f"missing dynamic hardware architecture: {dynamic}") if facts.get("disks", [{}])[0].get("by_id") != "/dev/disk/by-id/nvme-dynamic-metal-01": raise SystemExit(f"missing dynamic hardware disk facts: {dynamic}") if dynamic.get("labels", {}).get("hardware.architecture") != "x86_64": raise SystemExit(f"missing hardware metadata labels: {dynamic}") if dynamic.get("labels", {}).get("hardware.disk_count") != "1": raise SystemExit(f"missing hardware disk count label: {dynamic}") print("Deployer bootstrap records validated") PY echo "Inspecting desired-system state" run_deployer_ctl dump --prefix "photoncloud/clusters/test-cluster/nodes/node-seeded/desired-system" >"$tmp_dir/desired-system.dump" python3 - "$tmp_dir/desired-system.dump" <<'PY' import json import sys path = sys.argv[1] with open(path, "r", encoding="utf-8") as handle: lines = [line.strip() for line in handle if " value=" in line] if len(lines) != 1: raise SystemExit(f"unexpected desired-system dump: {lines}") payload = json.loads(lines[0].split(" value=", 1)[1]) assert payload["node_id"] == "node-seeded" assert payload["nixos_configuration"] == "node01" assert payload["flake_ref"] == "github:centra/cloud" assert payload["health_check_command"] == ["systemctl", "is-system-running", "--wait"] assert payload["rollback_on_failure"] is True print("desired-system state validated") PY echo "Deployer bootstrap E2E verification passed"