378 lines
9.8 KiB
Bash
Executable file
378 lines
9.8 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
|
|
|
if [[ -z "${PHOTONCLOUD_E2E_IN_NIX:-}" ]]; then
|
|
exec nix develop "$ROOT" -c env PHOTONCLOUD_E2E_IN_NIX=1 bash "$0" "$@"
|
|
fi
|
|
|
|
tmp_dir="$(mktemp -d)"
|
|
cf_pid=""
|
|
deployer_pid=""
|
|
|
|
cleanup() {
|
|
set +e
|
|
if [[ -n "$deployer_pid" ]]; then
|
|
kill "$deployer_pid" 2>/dev/null || true
|
|
wait "$deployer_pid" 2>/dev/null || true
|
|
fi
|
|
if [[ -n "$cf_pid" ]]; then
|
|
kill "$cf_pid" 2>/dev/null || true
|
|
wait "$cf_pid" 2>/dev/null || true
|
|
fi
|
|
rm -rf "$tmp_dir"
|
|
}
|
|
|
|
trap cleanup EXIT
|
|
|
|
free_port() {
|
|
python3 - <<'PY'
|
|
import socket
|
|
s = socket.socket()
|
|
s.bind(("127.0.0.1", 0))
|
|
print(s.getsockname()[1])
|
|
s.close()
|
|
PY
|
|
}
|
|
|
|
wait_for_port() {
|
|
local host="$1"
|
|
local port="$2"
|
|
local timeout_secs="${3:-60}"
|
|
local deadline=$((SECONDS + timeout_secs))
|
|
|
|
while (( SECONDS < deadline )); do
|
|
if python3 - "$host" "$port" <<'PY'
|
|
import socket
|
|
import sys
|
|
|
|
host = sys.argv[1]
|
|
port = int(sys.argv[2])
|
|
|
|
with socket.socket() as sock:
|
|
sock.settimeout(0.5)
|
|
try:
|
|
sock.connect((host, port))
|
|
except OSError:
|
|
raise SystemExit(1)
|
|
raise SystemExit(0)
|
|
PY
|
|
then
|
|
return 0
|
|
fi
|
|
sleep 1
|
|
done
|
|
|
|
echo "timed out waiting for ${host}:${port}" >&2
|
|
return 1
|
|
}
|
|
|
|
wait_for_http() {
|
|
local url="$1"
|
|
local timeout_secs="${2:-60}"
|
|
local deadline=$((SECONDS + timeout_secs))
|
|
|
|
while (( SECONDS < deadline )); do
|
|
if python3 - "$url" <<'PY'
|
|
import sys
|
|
import urllib.request
|
|
|
|
try:
|
|
with urllib.request.urlopen(sys.argv[1], timeout=2):
|
|
pass
|
|
except Exception:
|
|
raise SystemExit(1)
|
|
raise SystemExit(0)
|
|
PY
|
|
then
|
|
return 0
|
|
fi
|
|
sleep 1
|
|
done
|
|
|
|
echo "timed out waiting for $url" >&2
|
|
return 1
|
|
}
|
|
|
|
api_port="$(free_port)"
|
|
http_port="$(free_port)"
|
|
raft_port="$(free_port)"
|
|
gossip_port="$(free_port)"
|
|
deployer_port="$(free_port)"
|
|
bootstrap_token="bootstrap-secret"
|
|
|
|
cat >"$tmp_dir/chainfire.toml" <<EOF
|
|
[node]
|
|
id = 1
|
|
name = "chainfire-1"
|
|
role = "control_plane"
|
|
|
|
[storage]
|
|
data_dir = "$tmp_dir/chainfire-data"
|
|
|
|
[network]
|
|
api_addr = "127.0.0.1:${api_port}"
|
|
http_addr = "127.0.0.1:${http_port}"
|
|
raft_addr = "127.0.0.1:${raft_port}"
|
|
gossip_addr = "127.0.0.1:${gossip_port}"
|
|
|
|
[cluster]
|
|
id = 1
|
|
initial_members = []
|
|
bootstrap = true
|
|
|
|
[raft]
|
|
role = "voter"
|
|
EOF
|
|
|
|
echo "Starting ChainFire on 127.0.0.1:${api_port}"
|
|
cargo run --manifest-path "$ROOT/chainfire/Cargo.toml" -p chainfire-server -- \
|
|
--config "$tmp_dir/chainfire.toml" \
|
|
>"$tmp_dir/chainfire.log" 2>&1 &
|
|
cf_pid="$!"
|
|
|
|
wait_for_port "127.0.0.1" "$api_port" 120
|
|
|
|
cat >"$tmp_dir/deployer.toml" <<EOF
|
|
bind_addr = "127.0.0.1:${deployer_port}"
|
|
cluster_id = "test-cluster"
|
|
cluster_namespace = "photoncloud"
|
|
heartbeat_timeout_secs = 300
|
|
local_state_path = "$tmp_dir/deployer-state"
|
|
bootstrap_token = "${bootstrap_token}"
|
|
require_chainfire = true
|
|
allow_unknown_nodes = false
|
|
allow_unauthenticated = false
|
|
allow_test_mappings = false
|
|
tls_self_signed = false
|
|
|
|
[chainfire]
|
|
endpoints = ["http://127.0.0.1:${api_port}"]
|
|
namespace = "deployer"
|
|
EOF
|
|
|
|
echo "Starting Deployer on 127.0.0.1:${deployer_port}"
|
|
cargo run --quiet --manifest-path "$ROOT/deployer/Cargo.toml" -p deployer-server -- \
|
|
--config "$tmp_dir/deployer.toml" \
|
|
>"$tmp_dir/deployer.log" 2>&1 &
|
|
deployer_pid="$!"
|
|
|
|
wait_for_http "http://127.0.0.1:${deployer_port}/health" 120
|
|
|
|
cat >"$tmp_dir/cluster.yaml" <<'EOF'
|
|
cluster:
|
|
cluster_id: test-cluster
|
|
environment: dev
|
|
|
|
node_classes:
|
|
- name: general-worker
|
|
nix_profile: profiles/worker-linux
|
|
install_plan:
|
|
nixos_configuration: worker-golden
|
|
disko_config_path: profiles/worker-linux/disko.nix
|
|
roles:
|
|
- worker
|
|
labels:
|
|
tier: general
|
|
- name: edge-metal
|
|
nix_profile: profiles/edge-metal
|
|
install_plan:
|
|
nixos_configuration: edge-metal
|
|
disko_config_path: profiles/edge-metal/disko.nix
|
|
roles:
|
|
- edge
|
|
labels:
|
|
tier: edge
|
|
|
|
pools:
|
|
- name: general
|
|
node_class: general-worker
|
|
labels:
|
|
env: dev
|
|
- name: edge
|
|
node_class: edge-metal
|
|
labels:
|
|
env: dev
|
|
lane: edge
|
|
|
|
nodes:
|
|
- node_id: node-seeded
|
|
machine_id: known-machine-01
|
|
hostname: node-seeded
|
|
ip: 10.0.0.11
|
|
pool: general
|
|
failure_domain: rack-a
|
|
install_plan:
|
|
nixos_configuration: node01
|
|
disko_config_path: nix/nodes/vm-cluster/node01/disko.nix
|
|
state: pending
|
|
|
|
enrollment_rules:
|
|
- name: edge-metal-auto
|
|
priority: 100
|
|
match_labels:
|
|
rack: edge
|
|
sku: metal
|
|
pool: edge
|
|
labels:
|
|
managed-by: deployer
|
|
services:
|
|
- prismnet
|
|
ssh_authorized_keys:
|
|
- ssh-ed25519 AAAATEST edge@test
|
|
node_id_prefix: edge
|
|
EOF
|
|
|
|
chainfire_endpoint="http://127.0.0.1:${api_port}"
|
|
deployer_endpoint="http://127.0.0.1:${deployer_port}"
|
|
|
|
run_deployer_ctl() {
|
|
cargo run --quiet --manifest-path "$ROOT/deployer/Cargo.toml" -p deployer-ctl -- \
|
|
--chainfire-endpoint "$chainfire_endpoint" \
|
|
--cluster-id test-cluster \
|
|
--cluster-namespace photoncloud \
|
|
--deployer-namespace deployer \
|
|
"$@"
|
|
}
|
|
|
|
echo "Applying declarative cluster/bootstrap config"
|
|
run_deployer_ctl apply --config "$tmp_dir/cluster.yaml" --prune
|
|
|
|
echo "Validating seeded machine bootstrap mapping"
|
|
python3 - "$deployer_endpoint" "$bootstrap_token" <<'PY'
|
|
import json
|
|
import sys
|
|
import urllib.request
|
|
|
|
endpoint, token = sys.argv[1], sys.argv[2]
|
|
request = urllib.request.Request(
|
|
endpoint + "/api/v1/phone-home",
|
|
data=json.dumps({"machine_id": "known-machine-01", "ip": "10.0.0.11"}).encode(),
|
|
headers={
|
|
"Content-Type": "application/json",
|
|
"X-Deployer-Token": token,
|
|
},
|
|
)
|
|
with urllib.request.urlopen(request, timeout=5) as response:
|
|
payload = json.loads(response.read().decode("utf-8"))
|
|
|
|
assert payload["success"] is True
|
|
assert payload["node_id"] == "node-seeded"
|
|
assert payload["node_config"]["pool"] == "general"
|
|
assert payload["node_config"]["node_class"] == "general-worker"
|
|
assert payload["node_config"]["nix_profile"] == "profiles/worker-linux"
|
|
assert payload["node_config"]["install_plan"]["nixos_configuration"] == "node01"
|
|
assert payload["node_config"]["install_plan"]["disko_config_path"] == "nix/nodes/vm-cluster/node01/disko.nix"
|
|
assert payload["node_config"]["failure_domain"] == "rack-a"
|
|
print("Seeded mapping validated")
|
|
PY
|
|
|
|
echo "Validating cloud-init metadata endpoints"
|
|
python3 - "$deployer_endpoint" "$bootstrap_token" <<'PY'
|
|
import sys
|
|
import urllib.request
|
|
|
|
endpoint, token = sys.argv[1], sys.argv[2]
|
|
|
|
for path, expected in (
|
|
("/api/v1/cloud-init/known-machine-01/meta-data", "instance-id: node-seeded"),
|
|
("/api/v1/cloud-init/known-machine-01/user-data", "#cloud-config"),
|
|
):
|
|
request = urllib.request.Request(
|
|
endpoint + path,
|
|
headers={"X-Deployer-Token": token},
|
|
)
|
|
with urllib.request.urlopen(request, timeout=5) as response:
|
|
payload = response.read().decode("utf-8")
|
|
assert expected in payload
|
|
if path.endswith("user-data"):
|
|
assert "/etc/plasmacloud/node-config.json" in payload
|
|
assert "profiles/worker-linux" in payload
|
|
assert "\"nixos_configuration\": \"node01\"" in payload
|
|
|
|
print("cloud-init endpoints validated")
|
|
PY
|
|
|
|
echo "Validating enrollment-rule bootstrap path"
|
|
dynamic_node_id="$(
|
|
python3 - "$deployer_endpoint" "$bootstrap_token" <<'PY'
|
|
import json
|
|
import sys
|
|
import urllib.request
|
|
|
|
endpoint, token = sys.argv[1], sys.argv[2]
|
|
request = urllib.request.Request(
|
|
endpoint + "/api/v1/phone-home",
|
|
data=json.dumps(
|
|
{
|
|
"machine_id": "dynamic-metal-01",
|
|
"ip": "10.0.9.25",
|
|
"metadata": {
|
|
"rack": "edge",
|
|
"sku": "metal",
|
|
"topology.kubernetes.io/zone": "rack-z",
|
|
},
|
|
}
|
|
).encode(),
|
|
headers={
|
|
"Content-Type": "application/json",
|
|
"X-Deployer-Token": token,
|
|
},
|
|
)
|
|
with urllib.request.urlopen(request, timeout=5) as response:
|
|
payload = json.loads(response.read().decode("utf-8"))
|
|
|
|
assert payload["success"] is True
|
|
assert payload["node_id"].startswith("edge-")
|
|
assert payload["node_config"]["role"] == "edge"
|
|
assert payload["node_config"]["pool"] == "edge"
|
|
assert payload["node_config"]["node_class"] == "edge-metal"
|
|
assert payload["node_config"]["nix_profile"] == "profiles/edge-metal"
|
|
assert payload["node_config"]["install_plan"]["nixos_configuration"] == "edge-metal"
|
|
assert payload["node_config"]["install_plan"]["disko_config_path"] == "profiles/edge-metal/disko.nix"
|
|
assert "prismnet" in payload["node_config"]["services"]
|
|
assert payload["node_config"]["labels"]["managed-by"] == "deployer"
|
|
print(payload["node_id"])
|
|
PY
|
|
)"
|
|
|
|
echo "Inspecting stored cluster node records"
|
|
run_deployer_ctl dump --prefix "photoncloud/clusters/test-cluster/nodes/" >"$tmp_dir/nodes.dump"
|
|
python3 - "$tmp_dir/nodes.dump" "$dynamic_node_id" <<'PY'
|
|
import json
|
|
import sys
|
|
|
|
path = sys.argv[1]
|
|
dynamic_id = sys.argv[2]
|
|
records = {}
|
|
|
|
with open(path, "r", encoding="utf-8") as handle:
|
|
for line in handle:
|
|
line = line.strip()
|
|
if " value=" not in line:
|
|
continue
|
|
value = line.split(" value=", 1)[1]
|
|
record = json.loads(value)
|
|
records[record["node_id"]] = record
|
|
|
|
seeded = records.get("node-seeded")
|
|
dynamic = records.get(dynamic_id)
|
|
if seeded is None:
|
|
raise SystemExit("missing seeded node record")
|
|
if dynamic is None:
|
|
raise SystemExit("missing dynamic node record")
|
|
|
|
if seeded.get("pool") != "general" or seeded.get("node_class") != "general-worker":
|
|
raise SystemExit(f"unexpected seeded node record: {seeded}")
|
|
if dynamic.get("pool") != "edge" or dynamic.get("node_class") != "edge-metal":
|
|
raise SystemExit(f"unexpected dynamic node record: {dynamic}")
|
|
if dynamic.get("failure_domain") != "rack-z":
|
|
raise SystemExit(f"unexpected dynamic failure domain: {dynamic}")
|
|
if dynamic.get("labels", {}).get("lane") != "edge":
|
|
raise SystemExit(f"missing pool label propagation: {dynamic}")
|
|
|
|
print("Deployer bootstrap records validated")
|
|
PY
|
|
|
|
echo "Deployer bootstrap E2E verification passed"
|