photoncloud-monorepo/nix/test-cluster/verify-baremetal-iso.sh

1098 lines
33 KiB
Bash

#!/usr/bin/env bash
set -euo pipefail
ROOT="${ULTRACLOUD_REPO_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)}"
DEFAULT_WORK_ROOT="${ULTRACLOUD_WORK_ROOT:-}"
CLUSTER_ID="${ULTRACLOUD_BAREMETAL_CLUSTER_ID:-baremetal-iso-canonical}"
CHAINFIRE_ENDPOINT="http://127.0.0.1:2379"
DEPLOYER_ENDPOINT="http://127.0.0.1:8088"
BINARY_CACHE_ENDPOINT="http://127.0.0.1:8090"
BOOTSTRAP_TOKEN="${ULTRACLOUD_BAREMETAL_BOOTSTRAP_TOKEN:-baremetal-iso-bootstrap-token}"
CONTROL_NODE_CLASS="${ULTRACLOUD_BAREMETAL_CONTROL_NODE_CLASS:-iso-control-plane}"
WORKER_NODE_CLASS="${ULTRACLOUD_BAREMETAL_WORKER_NODE_CLASS:-iso-worker}"
CONTROL_NIXOS_CONFIGURATION="${ULTRACLOUD_BAREMETAL_CONTROL_CONFIGURATION:-baremetal-qemu-control-plane}"
WORKER_NIXOS_CONFIGURATION="${ULTRACLOUD_BAREMETAL_WORKER_CONFIGURATION:-baremetal-qemu-worker}"
CONTROL_DISKO_CONFIG_PATH="${ULTRACLOUD_BAREMETAL_CONTROL_DISKO_CONFIG_PATH:-nix/nodes/baremetal-qemu/control-plane/disko.nix}"
WORKER_DISKO_CONFIG_PATH="${ULTRACLOUD_BAREMETAL_WORKER_DISKO_CONFIG_PATH:-nix/nodes/baremetal-qemu/worker/disko.nix}"
CONTROL_TARGET_DISK_BY_ID="${ULTRACLOUD_BAREMETAL_CONTROL_TARGET_DISK_BY_ID:-/dev/disk/by-id/virtio-uc-control-root}"
WORKER_TARGET_DISK_BY_ID="${ULTRACLOUD_BAREMETAL_WORKER_TARGET_DISK_BY_ID:-/dev/disk/by-id/virtio-uc-worker-root}"
CONTROL_DISK_SERIAL="${ULTRACLOUD_BAREMETAL_CONTROL_DISK_SERIAL:-uc-control-root}"
WORKER_DISK_SERIAL="${ULTRACLOUD_BAREMETAL_WORKER_DISK_SERIAL:-uc-worker-root}"
CONTROL_HEALTH_CHECK_PATH="/etc/ultracloud-role-control-plane"
WORKER_HEALTH_CHECK_PATH="/etc/ultracloud-role-worker"
CONTROL_NODE_ID="${ULTRACLOUD_BAREMETAL_CONTROL_NODE_ID:-iso-control-plane-01}"
WORKER_NODE_ID="${ULTRACLOUD_BAREMETAL_WORKER_NODE_ID:-iso-worker-01}"
CONTROL_SSH_PORT="${ULTRACLOUD_BAREMETAL_CONTROL_SSH_PORT:-22231}"
WORKER_SSH_PORT="${ULTRACLOUD_BAREMETAL_WORKER_SSH_PORT:-22232}"
CONTROL_DHCP_START="${ULTRACLOUD_BAREMETAL_CONTROL_DHCP_START:-10.0.2.15}"
WORKER_DHCP_START="${ULTRACLOUD_BAREMETAL_WORKER_DHCP_START:-10.0.2.16}"
CONTROL_DISK_GIB="${ULTRACLOUD_BAREMETAL_CONTROL_DISK_GIB:-18G}"
WORKER_DISK_GIB="${ULTRACLOUD_BAREMETAL_WORKER_DISK_GIB:-18G}"
log() {
printf '[baremetal-iso-e2e] %s\n' "$*"
}
marker() {
printf 'ULTRACLOUD_MARKER %s\n' "$*"
}
die() {
echo "[baremetal-iso-e2e] ERROR: $*" >&2
exit 1
}
require_cmd() {
command -v "$1" >/dev/null 2>&1 || die "required command not found: $1"
}
host_cpu_count() {
local count
count="$(getconf _NPROCESSORS_ONLN 2>/dev/null || nproc 2>/dev/null || echo 1)"
if [[ ! "${count}" =~ ^[0-9]+$ ]] || (( count < 1 )); then
count=1
fi
printf '%s\n' "${count}"
}
default_local_nix_max_jobs() {
local cpu_count="$1"
if (( cpu_count <= 2 )); then
printf '1\n'
return 0
fi
printf '%s\n' "$(( (cpu_count + 1) / 2 ))"
}
default_local_nix_build_cores() {
local cpu_count="$1"
local max_jobs="$2"
local build_cores=1
if (( max_jobs > 0 )); then
build_cores="$(( cpu_count / max_jobs ))"
fi
if (( build_cores < 1 )); then
build_cores=1
fi
printf '%s\n' "${build_cores}"
}
default_baremetal_vm_vcpus() {
local cpu_count="$1"
if (( cpu_count >= 8 )); then
printf '4\n'
elif (( cpu_count >= 4 )); then
printf '2\n'
else
printf '1\n'
fi
}
default_baremetal_vm_memory_mib() {
local cpu_count="$1"
if (( cpu_count >= 8 )); then
printf '3072\n'
else
printf '2048\n'
fi
}
append_nix_config_line() {
local line="$1"
if [[ -n "${NIX_CONFIG:-}" ]]; then
NIX_CONFIG+=$'\n'
fi
NIX_CONFIG+="${line}"
}
configure_local_nix_execution() {
append_nix_config_line "builders ="
append_nix_config_line "max-jobs = ${LOCAL_NIX_MAX_JOBS}"
append_nix_config_line "cores = ${LOCAL_NIX_BUILD_CORES}"
append_nix_config_line "experimental-features = nix-command flakes"
append_nix_config_line "warn-dirty = false"
export NIX_CONFIG
}
host_kvm_access() {
[[ -r /dev/kvm && -w /dev/kvm ]]
}
qemu_machine_args() {
if [[ "${BAREMETAL_VM_ACCELERATOR_MODE}" == "kvm" ]]; then
printf '%s\n' \
"-machine" "pc,accel=kvm:tcg" \
"-enable-kvm" \
"-cpu" "host"
return 0
fi
printf '%s\n' \
"-machine" "pc" \
"-accel" "tcg,thread=multi" \
"-cpu" "max"
}
nix_build_local() {
NIX_BUILD_CORES="${LOCAL_NIX_BUILD_CORES}" nix \
--option builders '' \
--option warn-dirty false \
--max-jobs "${LOCAL_NIX_MAX_JOBS}" \
build "$@"
}
resolve_default_work_root() {
if [[ -n "${DEFAULT_WORK_ROOT}" ]]; then
printf '%s\n' "${DEFAULT_WORK_ROOT}"
return 0
fi
if [[ -w "${ROOT}" ]]; then
printf '%s\n' "${ROOT}/work"
return 0
fi
if [[ -n "${TMPDIR:-}" ]]; then
printf '%s\n' "${TMPDIR}/ultracloud"
return 0
fi
printf '%s\n' "/tmp/ultracloud"
}
resolve_store_path() {
local env_name="$1"
local attr="$2"
if [[ -n "${!env_name:-}" ]]; then
printf '%s\n' "${!env_name}"
return 0
fi
nix_build_local "$ROOT#$attr" --no-link --print-out-paths
}
resolve_binary() {
local env_name="$1"
local bin_name="$2"
local attr="$3"
if [[ -n "${!env_name:-}" ]]; then
printf '%s\n' "${!env_name}"
return 0
fi
if command -v "$bin_name" >/dev/null 2>&1; then
command -v "$bin_name"
return 0
fi
local out
out="$(nix_build_local "$ROOT#$attr" --no-link --print-out-paths)"
printf '%s/bin/%s\n' "$out" "$bin_name"
}
resolve_iso_image() {
local candidate="$1"
if [[ -f "$candidate" ]]; then
printf '%s\n' "$candidate"
return 0
fi
local iso_dir="$candidate/iso"
if [[ -d "$iso_dir" ]]; then
local iso_path
iso_path="$(find "$iso_dir" -maxdepth 1 -type f -name '*.iso' | head -n 1)"
if [[ -n "$iso_path" ]]; then
printf '%s\n' "$iso_path"
return 0
fi
fi
die "unable to resolve a bootable ISO file from $candidate"
}
resolve_ovmf_firmware() {
local env_name="$1"
local relative_path="$2"
if [[ -n "${!env_name:-}" ]]; then
printf '%s\n' "${!env_name}"
return 0
fi
local ovmf_dir
ovmf_dir="$(nix_build_local nixpkgs#OVMF.fd --no-link --print-out-paths)"
printf '%s/%s\n' "$ovmf_dir" "$relative_path"
}
capture_environment() {
{
printf 'started_at=%s\n' "$(date -Is)"
printf 'pwd=%s\n' "$PWD"
printf 'user=%s\n' "$(id -un)"
printf 'uid=%s\n' "$(id -u)"
printf 'gid=%s\n' "$(id -g)"
printf 'work_root=%s\n' "${DEFAULT_WORK_ROOT}"
printf 'state_dir=%s\n' "$TMP_DIR"
printf 'iso_image=%s\n' "$ISO_IMAGE"
printf 'flake_bundle=%s\n' "$FLAKE_BUNDLE"
printf 'bootstrap_token_set=%s\n' "$([[ -n "${BOOTSTRAP_TOKEN}" ]] && echo yes || echo no)"
printf 'control_node_class=%s\n' "$CONTROL_NODE_CLASS"
printf 'worker_node_class=%s\n' "$WORKER_NODE_CLASS"
printf 'control_nixos_configuration=%s\n' "$CONTROL_NIXOS_CONFIGURATION"
printf 'worker_nixos_configuration=%s\n' "$WORKER_NIXOS_CONFIGURATION"
printf 'control_disko_config_path=%s\n' "$CONTROL_DISKO_CONFIG_PATH"
printf 'worker_disko_config_path=%s\n' "$WORKER_DISKO_CONFIG_PATH"
printf 'control_target_disk_by_id=%s\n' "$CONTROL_TARGET_DISK_BY_ID"
printf 'worker_target_disk_by_id=%s\n' "$WORKER_TARGET_DISK_BY_ID"
printf 'control_target=%s\n' "$CONTROL_TARGET_SYSTEM"
printf 'worker_target=%s\n' "$WORKER_TARGET_SYSTEM"
printf 'tmpdir=%s\n' "${TMPDIR:-}"
printf 'host_cpu_count=%s\n' "${HOST_CPU_COUNT}"
printf 'local_nix_max_jobs=%s\n' "${LOCAL_NIX_MAX_JOBS}"
printf 'local_nix_build_cores=%s\n' "${LOCAL_NIX_BUILD_CORES}"
printf 'vm_accelerator_mode=%s\n' "${BAREMETAL_VM_ACCELERATOR_MODE}"
printf 'vm_vcpus=%s\n' "${BAREMETAL_VM_VCPUS}"
printf 'vm_memory_mib=%s\n' "${BAREMETAL_VM_MEMORY_MIB}"
printf 'kvm_present=%s\n' "$([[ -e /dev/kvm ]] && echo yes || echo no)"
printf 'kvm_access=%s\n' "$([[ -r /dev/kvm && -w /dev/kvm ]] && echo rw || echo no)"
printf 'nix_builders=%s\n' "$(nix config show builders 2>/dev/null | awk -F' = ' 'NR==1 { print $2 }')"
} >"$TMP_DIR/environment.txt"
}
wait_for_http() {
local url="$1"
local timeout_secs="$2"
local deadline=$((SECONDS + timeout_secs))
while (( SECONDS < deadline )); do
if curl -fsS "$url" >/dev/null 2>&1; then
return 0
fi
sleep 1
done
return 1
}
wait_for_log_marker() {
local label="$1"
local log_file="$2"
local needle="$3"
local timeout_secs="$4"
local deadline=$((SECONDS + timeout_secs))
while (( SECONDS < deadline )); do
if [[ -f "$log_file" ]] && grep -Eq "$needle" "$log_file"; then
log "${label}: observed ${needle}"
return 0
fi
sleep 2
done
return 1
}
ssh_base() {
local port="$1"
shift
ssh \
-F /dev/null \
-i "$SSH_KEY" \
-o BatchMode=yes \
-o ConnectTimeout=5 \
-o ConnectionAttempts=1 \
-o StrictHostKeyChecking=no \
-o UserKnownHostsFile=/dev/null \
-o LogLevel=ERROR \
-p "$port" \
root@127.0.0.1 "$@"
}
wait_for_ssh() {
local label="$1"
local port="$2"
local timeout_secs="$3"
local deadline=$((SECONDS + timeout_secs))
while (( SECONDS < deadline )); do
if ssh_base "$port" true >/dev/null 2>&1; then
log "${label}: SSH is reachable on port ${port}"
return 0
fi
sleep 2
done
return 1
}
ssh_shell() {
local port="$1"
local script="$2"
local quoted
printf -v quoted '%q' "$script"
ssh_base "$port" "bash -lc $quoted"
}
current_system_path() {
local port="$1"
ssh_shell "$port" 'readlink -f /run/current-system'
}
remote_boot_id() {
local port="$1"
ssh_shell "$port" 'cat /proc/sys/kernel/random/boot_id'
}
remote_journal_has_marker() {
local port="$1"
local needle="$2"
shift 2
local remote_cmd="journalctl -b -o cat --no-pager"
local unit
for unit in "$@"; do
printf -v remote_cmd '%s -u %q' "$remote_cmd" "$unit"
done
printf -v remote_cmd '%s | grep -Fq %q' "$remote_cmd" "$needle"
ssh_shell "$port" "$remote_cmd"
}
wait_for_remote_journal_marker() {
local label="$1"
local port="$2"
local needle="$3"
local timeout_secs="$4"
shift 4
local deadline=$((SECONDS + timeout_secs))
while (( SECONDS < deadline )); do
if remote_journal_has_marker "$port" "$needle" "$@" >/dev/null 2>&1; then
log "${label}: observed ${needle} via remote journal"
return 0
fi
sleep 2
done
return 1
}
wait_for_remote_unit_active() {
local label="$1"
local port="$2"
local unit_name="$3"
local timeout_secs="$4"
local deadline=$((SECONDS + timeout_secs))
while (( SECONDS < deadline )); do
if ssh_shell "$port" "systemctl is-active ${unit_name} >/dev/null" >/dev/null 2>&1; then
log "${label}: ${unit_name} is active"
return 0
fi
sleep 2
done
return 1
}
wait_for_reboot_transition() {
local label="$1"
local port="$2"
local previous_boot_id="$3"
local timeout_secs="$4"
local deadline=$((SECONDS + timeout_secs))
while (( SECONDS < deadline )); do
local current_boot_id
if current_boot_id="$(remote_boot_id "$port" 2>/dev/null)"; then
if [[ -n "$current_boot_id" && "$current_boot_id" != "$previous_boot_id" ]]; then
log "${label}: reboot completed with boot_id=${current_boot_id}"
return 0
fi
fi
sleep 2
done
return 1
}
observed_status() {
local node_id="$1"
local payload
if ! payload="$(
"$DEPLOYER_CTL_BIN" \
--chainfire-endpoint "$CHAINFIRE_ENDPOINT" \
--cluster-id "$CLUSTER_ID" \
--cluster-namespace ultracloud \
--deployer-namespace deployer \
node inspect \
--node-id "$node_id" \
--include-observed-system \
--format json 2>/dev/null
)"; then
printf 'missing\n'
return 0
fi
jq -r '.observed_system.status // "missing"' <<<"$payload"
}
wait_for_observed_active() {
local node_id="$1"
local timeout_secs="$2"
local deadline=$((SECONDS + timeout_secs))
while (( SECONDS < deadline )); do
if [[ "$(observed_status "$node_id")" == "active" ]]; then
log "${node_id}: observed-system reached active"
return 0
fi
sleep 5
done
return 1
}
inspect_node_payload() {
local node_id="$1"
"$DEPLOYER_CTL_BIN" \
--chainfire-endpoint "$CHAINFIRE_ENDPOINT" \
--cluster-id "$CLUSTER_ID" \
--cluster-namespace ultracloud \
--deployer-namespace deployer \
node inspect \
--node-id "$node_id" \
--include-desired-system \
--format json
}
assert_node_contract() {
local node_id="$1"
local expected_node_class="$2"
local expected_nixos_configuration="$3"
local expected_disko_config_path="$4"
local expected_target_disk_by_id="$5"
local expected_health_check_path="$6"
local expected_target_system="$7"
local payload
payload="$(inspect_node_payload "$node_id")" \
|| die "${node_id} install contract is not inspectable through deployer-ctl"
jq -e \
--arg node_id "$node_id" \
--arg node_class "$expected_node_class" \
--arg nixos_configuration "$expected_nixos_configuration" \
--arg disko_config_path "$expected_disko_config_path" \
--arg target_disk_by_id "$expected_target_disk_by_id" \
--arg health_check_path "$expected_health_check_path" \
--arg target_system "$expected_target_system" \
'
.node.node_id == $node_id
and .node.node_class == $node_class
and .node.install_plan.nixos_configuration == $nixos_configuration
and .node.install_plan.disko_config_path == $disko_config_path
and (.node.install_plan.target_disk_by_id // "") == $target_disk_by_id
and (.node.install_plan.target_disk // "") == ""
and .desired_system.nixos_configuration == $nixos_configuration
and (.desired_system.target_system // "") == $target_system
and (.desired_system.switch_action // "switch") == "switch"
and (.desired_system.rollback_on_failure // true) == true
and ((.desired_system.health_check_command | if length == 0 then "" else .[-1] end) == $health_check_path)
' <<<"$payload" >/dev/null \
|| die "${node_id} install contract did not resolve to the expected class/profile defaults"
log "${node_id}: install contract resolved via node class ${expected_node_class}"
}
assert_port_free() {
local port="$1"
if ss -ltn "( sport = :$port )" | grep -Fq ":$port"; then
die "port $port is already in use"
fi
}
start_host_services() {
cat >"$TMP_DIR/chainfire.toml" <<EOF
[node]
id = 1
name = "baremetal-iso-chainfire"
role = "control_plane"
[storage]
data_dir = "$TMP_DIR/chainfire-data"
[network]
api_addr = "0.0.0.0:2379"
http_addr = "0.0.0.0:8081"
raft_addr = "0.0.0.0:2380"
gossip_addr = "0.0.0.0:2381"
[cluster]
id = 1
initial_members = []
bootstrap = true
[raft]
role = "voter"
EOF
cat >"$TMP_DIR/deployer.toml" <<EOF
bind_addr = "0.0.0.0:8088"
cluster_id = "${CLUSTER_ID}"
cluster_namespace = "ultracloud"
heartbeat_timeout_secs = 300
local_state_path = "$TMP_DIR/deployer-state"
bootstrap_flake_bundle_path = "$FLAKE_BUNDLE"
bootstrap_token = "${BOOTSTRAP_TOKEN}"
require_chainfire = true
allow_unknown_nodes = false
allow_unauthenticated = true
allow_test_mappings = false
tls_self_signed = false
[chainfire]
endpoints = ["${CHAINFIRE_ENDPOINT}"]
namespace = "deployer"
EOF
log "Starting host-side Chainfire"
NO_COLOR=1 CLICOLOR=0 RUST_LOG_STYLE=never \
"$CHAINFIRE_BIN" --config "$TMP_DIR/chainfire.toml" >"$CHAINFIRE_LOG" 2>&1 &
CHAINFIRE_PID="$!"
wait_for_http "http://127.0.0.1:8081/health" 120 \
|| die "host Chainfire did not become healthy"
log "Starting host-side Deployer"
NO_COLOR=1 CLICOLOR=0 RUST_LOG_STYLE=never \
"$DEPLOYER_SERVER_BIN" --config "$TMP_DIR/deployer.toml" >"$DEPLOYER_LOG" 2>&1 &
DEPLOYER_PID="$!"
wait_for_http "http://127.0.0.1:8088/health" 120 \
|| die "host Deployer did not become healthy"
}
seed_binary_cache() {
local path
local nar_rel
local nar_path
local store_base
local store_hash
local nar_hash
local nar_size
local refs
local deriver
mkdir -p "$NIX_CACHE_DIR/nar"
cat >"$NIX_CACHE_DIR/nix-cache-info" <<'EOF'
StoreDir: /nix/store
WantMassQuery: 1
Priority: 30
EOF
log "Seeding host-local Nix binary cache"
if [[ -n "${ULTRACLOUD_BAREMETAL_CACHE_REGISTRATION:-}" && -f "${ULTRACLOUD_BAREMETAL_CACHE_REGISTRATION}/registration" ]]; then
nix-store --load-db <"${ULTRACLOUD_BAREMETAL_CACHE_REGISTRATION}/registration"
fi
while IFS= read -r path; do
[[ -n "$path" ]] || continue
store_base="$(basename "$path")"
store_hash="${store_base%%-*}"
nar_rel="nar/${store_base}.nar"
nar_path="$NIX_CACHE_DIR/$nar_rel"
if [[ ! -f "$nar_path" ]]; then
nix-store --dump "$path" >"$nar_path"
fi
nar_size="$(stat -c%s "$nar_path")"
nar_hash="$(nix hash file --type sha256 --base32 "$nar_path")"
refs="$(nix-store --query --references "$path" | xargs -r -n1 basename | tr '\n' ' ' | sed 's/ $//')"
deriver="$(nix-store --query --deriver "$path" 2>/dev/null || true)"
deriver="$(basename "$deriver" 2>/dev/null || true)"
{
echo "StorePath: $path"
echo "URL: $nar_rel"
echo "Compression: none"
echo "FileHash: sha256:$nar_hash"
echo "FileSize: $nar_size"
echo "NarHash: sha256:$nar_hash"
echo "NarSize: $nar_size"
echo "References: $refs"
if [[ -n "$deriver" && "$deriver" != "unknown-deriver" ]]; then
echo "Deriver: $deriver"
fi
} >"$NIX_CACHE_DIR/${store_hash}.narinfo"
done < <(
nix-store --query --requisites \
"$CONTROL_TARGET_SYSTEM" \
"$WORKER_TARGET_SYSTEM" \
"$CONTROL_DISKO_SCRIPT" \
"$WORKER_DISKO_SCRIPT" \
| sort -u
)
}
start_binary_cache() {
seed_binary_cache
log "Starting host-local Nix binary cache"
python3 -m http.server 8090 --bind 0.0.0.0 --directory "$NIX_CACHE_DIR" \
>"$NIX_CACHE_LOG" 2>&1 &
NIX_CACHE_PID="$!"
wait_for_http "${BINARY_CACHE_ENDPOINT}/nix-cache-info" 120 \
|| die "host-local Nix binary cache did not become reachable"
}
apply_cluster_state() {
cat >"$TMP_DIR/cluster-state.yaml" <<EOF
cluster:
cluster_id: ${CLUSTER_ID}
environment: qemu
node_classes:
- name: ${CONTROL_NODE_CLASS}
description: Canonical ISO-installed QEMU control-plane target
roles:
- control-plane
labels:
tier: control-plane
canonical_install_path: iso
install.ultracloud.io/profile: ${CONTROL_NIXOS_CONFIGURATION}
install_plan:
nixos_configuration: ${CONTROL_NIXOS_CONFIGURATION}
disko_config_path: ${CONTROL_DISKO_CONFIG_PATH}
target_disk_by_id: ${CONTROL_TARGET_DISK_BY_ID}
- name: ${WORKER_NODE_CLASS}
description: Canonical ISO-installed QEMU worker target
roles:
- worker
labels:
tier: worker
canonical_install_path: iso
install.ultracloud.io/profile: ${WORKER_NIXOS_CONFIGURATION}
install_plan:
nixos_configuration: ${WORKER_NIXOS_CONFIGURATION}
disko_config_path: ${WORKER_DISKO_CONFIG_PATH}
target_disk_by_id: ${WORKER_TARGET_DISK_BY_ID}
pools:
- name: control
description: ISO bare-metal control-plane pool
node_class: ${CONTROL_NODE_CLASS}
labels:
pool.ultracloud.io/name: control
- name: workers
description: ISO bare-metal worker pool
node_class: ${WORKER_NODE_CLASS}
labels:
pool.ultracloud.io/name: workers
nodes:
- node_id: ${CONTROL_NODE_ID}
hostname: ${CONTROL_NODE_ID}
ip: ${CONTROL_DHCP_START}
pool: control
desired_system:
nixos_configuration: ${CONTROL_NIXOS_CONFIGURATION}
target_system: ${CONTROL_TARGET_SYSTEM}
health_check_command:
- test
- -f
- ${CONTROL_HEALTH_CHECK_PATH}
rollback_on_failure: true
state: pending
- node_id: ${WORKER_NODE_ID}
hostname: ${WORKER_NODE_ID}
ip: ${WORKER_DHCP_START}
pool: workers
desired_system:
nixos_configuration: ${WORKER_NIXOS_CONFIGURATION}
target_system: ${WORKER_TARGET_SYSTEM}
health_check_command:
- test
- -f
- ${WORKER_HEALTH_CHECK_PATH}
rollback_on_failure: true
state: pending
enrollment_rules:
- name: ${CONTROL_NODE_CLASS}
priority: 200
match_hostname_prefix: iso-control-plane
pool: control
ssh_authorized_keys:
- ${SSH_PUBKEY}
- name: ${WORKER_NODE_CLASS}
priority: 190
match_hostname_prefix: iso-worker
pool: workers
ssh_authorized_keys:
- ${SSH_PUBKEY}
EOF
"$DEPLOYER_CTL_BIN" \
--chainfire-endpoint "$CHAINFIRE_ENDPOINT" \
--cluster-id "$CLUSTER_ID" \
--cluster-namespace ultracloud \
--deployer-namespace deployer \
apply --config "$TMP_DIR/cluster-state.yaml" --prune
}
launch_iso_vm() {
local label="$1"
local node_id="$2"
local ssh_port="$3"
local dhcp_start="$4"
local mac="$5"
local disk_serial="$6"
local disk_size="$7"
local disk_path="$8"
local log_path="$9"
local ovmf_vars_path="${disk_path}.ovmf-vars.fd"
"$QEMU_IMG_BIN" create -f qcow2 "$disk_path" "$disk_size" >/dev/null
rm -f "$ovmf_vars_path"
cp "$OVMF_VARS_TEMPLATE" "$ovmf_vars_path"
chmod u+w "$ovmf_vars_path"
nohup "$QEMU_BIN" \
-name "$label" \
-smp "${BAREMETAL_VM_VCPUS}" \
-m "${BAREMETAL_VM_MEMORY_MIB}" \
-nographic \
-no-reboot \
-boot order=dc,once=d,menu=off \
$(qemu_machine_args) \
-drive if=pflash,format=raw,readonly=on,file="$OVMF_CODE_FD" \
-drive if=pflash,format=raw,file="$ovmf_vars_path" \
-drive id=systemdisk,if=none,file="$disk_path",format=qcow2 \
-device virtio-blk-pci,bootindex=1,drive=systemdisk,serial="$disk_serial" \
-cdrom "$ISO_IMAGE" \
-netdev user,id=user0,hostfwd=tcp:127.0.0.1:${ssh_port}-:22,dhcpstart=${dhcp_start} \
-device virtio-net-pci,netdev=user0,mac="${mac}" \
-smbios type=1,product=UltraCloudQEMUBaremetal,serial="${node_id}" \
>"$log_path" 2>&1 &
echo "$!" >"${log_path}.pid"
}
launch_installed_vm() {
local label="$1"
local ssh_port="$2"
local dhcp_start="$3"
local mac="$4"
local disk_serial="$5"
local disk_path="$6"
local log_path="$7"
local ovmf_vars_path="${disk_path}.ovmf-vars.fd"
[[ -f "$ovmf_vars_path" ]] || die "missing OVMF vars file for relaunch: $ovmf_vars_path"
nohup "$QEMU_BIN" \
-name "$label" \
-smp "${BAREMETAL_VM_VCPUS}" \
-m "${BAREMETAL_VM_MEMORY_MIB}" \
-nographic \
$(qemu_machine_args) \
-drive if=pflash,format=raw,readonly=on,file="$OVMF_CODE_FD" \
-drive if=pflash,format=raw,file="$ovmf_vars_path" \
-drive id=systemdisk,if=none,file="$disk_path",format=qcow2 \
-device virtio-blk-pci,bootindex=1,drive=systemdisk,serial="$disk_serial" \
-netdev user,id=user0,hostfwd=tcp:127.0.0.1:${ssh_port}-:22,dhcpstart=${dhcp_start} \
-device virtio-net-pci,netdev=user0,mac="${mac}" \
>>"$log_path" 2>&1 &
echo "$!" >"${log_path}.pid"
}
wait_for_pid_exit() {
local label="$1"
local pid_file="$2"
local timeout_secs="$3"
local deadline=$((SECONDS + timeout_secs))
local pid
[[ -f "$pid_file" ]] || die "${label} is missing pid file $pid_file"
pid="$(cat "$pid_file")"
while (( SECONDS < deadline )); do
if ! kill -0 "$pid" >/dev/null 2>&1; then
log "${label}: QEMU exited after installer-triggered reboot"
return 0
fi
sleep 2
done
return 1
}
verify_node() {
local node_id="$1"
local ssh_port="$2"
local disk_path="$3"
local log_path="$4"
local expected_role="$5"
local expected_system="$6"
local expected_nixos_configuration="$7"
local expected_node_class="$8"
local expected_disko_config_path="$9"
local expected_target_disk_by_id="${10}"
local expected_health_check_path="${11}"
local dhcp_start="${12}"
local mac="${13}"
local disk_serial="${14}"
wait_for_log_marker "$node_id" "$TMP_DIR/deployer.log" "Node registered successfully.*node_id=${node_id}" 900 \
|| die "${node_id} never completed /api/v1/phone-home registration"
assert_node_contract \
"$node_id" \
"$expected_node_class" \
"$expected_nixos_configuration" \
"$expected_disko_config_path" \
"$expected_target_disk_by_id" \
"$expected_health_check_path" \
"$expected_system"
wait_for_ssh "$node_id" "$ssh_port" 900 \
|| die "${node_id} never exposed SSH during the installer boot"
wait_for_remote_journal_marker "$node_id" "$ssh_port" "ULTRACLOUD_MARKER pre-install.boot.${node_id}" 120 \
ultracloud-bootstrap.service ultracloud-install.service \
|| die "${node_id} never recorded the pre-install boot marker"
wait_for_remote_journal_marker "$node_id" "$ssh_port" "ULTRACLOUD_MARKER pre-install.phone-home.complete.${node_id}" 120 \
ultracloud-bootstrap.service ultracloud-install.service \
|| die "${node_id} never recorded the phone-home completion marker"
marker "pre-install.${node_id}"
wait_for_remote_journal_marker "$node_id" "$ssh_port" "ULTRACLOUD_MARKER install.bundle-downloaded.${node_id}" 1200 \
ultracloud-install.service \
|| die "${node_id} never downloaded the flake bundle"
wait_for_remote_journal_marker "$node_id" "$ssh_port" "ULTRACLOUD_MARKER install.disko.complete.${node_id}" 2400 \
ultracloud-install.service \
|| die "${node_id} never completed disko"
wait_for_remote_journal_marker "$node_id" "$ssh_port" "ULTRACLOUD_MARKER install.nixos-install.complete.${node_id}" 3600 \
ultracloud-install.service \
|| die "${node_id} never finished nixos-install"
marker "install.${node_id}"
wait_for_remote_journal_marker "$node_id" "$ssh_port" "ULTRACLOUD_MARKER reboot.${node_id}" 3600 \
ultracloud-install.service \
|| die "${node_id} never emitted reboot marker"
marker "reboot.${node_id}"
wait_for_pid_exit "$node_id" "${log_path}.pid" 300 \
|| die "${node_id} installer VM did not exit after the reboot marker"
launch_installed_vm \
"ultracloud-baremetal-${node_id}-installed" \
"$ssh_port" \
"$dhcp_start" \
"$mac" \
"$disk_serial" \
"$disk_path" \
"$log_path"
wait_for_ssh "$node_id" "$ssh_port" 1800 \
|| die "${node_id} did not come back over SSH after reboot"
wait_for_remote_journal_marker "$node_id" "$ssh_port" "ULTRACLOUD_MARKER post-install.boot.${node_id}.${expected_role}" 1800 \
ultracloud-baremetal-postinstall-marker.service \
|| die "${node_id} never emitted post-install marker"
marker "post-install.${node_id}"
ssh_shell "$ssh_port" 'test -f /etc/ultracloud/node-config.json'
ssh_shell "$ssh_port" 'test -d /var/lib/photon-src/.bundle-inputs/nixpkgs'
wait_for_remote_unit_active "$node_id" "$ssh_port" "nix-agent.service" 180 \
|| die "${node_id} never started nix-agent.service after install"
ssh_shell "$ssh_port" "grep -Fx '${expected_role}' /etc/ultracloud-role"
ssh_shell "$ssh_port" "test -b '${expected_target_disk_by_id}'"
if [[ "$expected_role" == "control-plane" ]]; then
wait_for_remote_unit_active "$node_id" "$ssh_port" "chainfire.service" 180 \
|| die "${node_id} never started chainfire.service after install"
fi
wait_for_observed_active "$node_id" 1200 \
|| die "${node_id} never reached observed-system active"
[[ "$(current_system_path "$ssh_port")" == "$expected_system" ]] \
|| die "${node_id} current system does not match expected target"
marker "desired-system-active.${node_id}"
}
cleanup() {
local status="$?"
set +e
if [[ -n "${TMP_DIR:-}" && -d "${TMP_DIR}" ]]; then
{
printf 'finished_at=%s\n' "$(date -Is)"
printf 'exit_status=%s\n' "$status"
} >>"$TMP_DIR/environment.txt"
fi
for pid_file in "$CONTROL_LOG.pid" "$WORKER_LOG.pid"; do
if [[ -f "$pid_file" ]]; then
pid="$(cat "$pid_file")"
kill "$pid" 2>/dev/null || true
wait "$pid" 2>/dev/null || true
fi
done
if [[ -n "${DEPLOYER_PID:-}" ]]; then
kill "$DEPLOYER_PID" 2>/dev/null || true
wait "$DEPLOYER_PID" 2>/dev/null || true
fi
if [[ -n "${CHAINFIRE_PID:-}" ]]; then
kill "$CHAINFIRE_PID" 2>/dev/null || true
wait "$CHAINFIRE_PID" 2>/dev/null || true
fi
if [[ -n "${NIX_CACHE_PID:-}" ]]; then
kill "$NIX_CACHE_PID" 2>/dev/null || true
wait "$NIX_CACHE_PID" 2>/dev/null || true
fi
if (( status != 0 )); then
log "control-plane serial log tail:"
tail -n 120 "$CONTROL_LOG" 2>/dev/null || true
log "worker serial log tail:"
tail -n 120 "$WORKER_LOG" 2>/dev/null || true
log "deployer log tail:"
tail -n 120 "$DEPLOYER_LOG" 2>/dev/null || true
log "chainfire log tail:"
tail -n 120 "$CHAINFIRE_LOG" 2>/dev/null || true
log "binary cache log tail:"
tail -n 120 "$NIX_CACHE_LOG" 2>/dev/null || true
fi
if [[ "${KEEP_STATE_DIR:-0}" != "1" ]]; then
rm -rf "$TMP_DIR"
fi
exit "$status"
}
main() {
DEFAULT_WORK_ROOT="$(resolve_default_work_root)"
HOST_CPU_COUNT="$(host_cpu_count)"
LOCAL_NIX_MAX_JOBS="${ULTRACLOUD_BAREMETAL_NIX_MAX_JOBS:-${ULTRACLOUD_LOCAL_NIX_MAX_JOBS:-$(default_local_nix_max_jobs "${HOST_CPU_COUNT}")}}"
LOCAL_NIX_BUILD_CORES="${ULTRACLOUD_BAREMETAL_NIX_BUILD_CORES:-${ULTRACLOUD_LOCAL_NIX_BUILD_CORES:-$(default_local_nix_build_cores "${HOST_CPU_COUNT}" "${LOCAL_NIX_MAX_JOBS}")}}"
BAREMETAL_VM_VCPUS="${ULTRACLOUD_BAREMETAL_VM_VCPUS:-$(default_baremetal_vm_vcpus "${HOST_CPU_COUNT}")}"
BAREMETAL_VM_MEMORY_MIB="${ULTRACLOUD_BAREMETAL_VM_MEMORY_MIB:-$(default_baremetal_vm_memory_mib "${HOST_CPU_COUNT}")}"
if [[ "${ULTRACLOUD_BAREMETAL_FORCE_TCG:-0}" == "1" ]]; then
BAREMETAL_VM_ACCELERATOR_MODE="tcg"
elif host_kvm_access; then
BAREMETAL_VM_ACCELERATOR_MODE="kvm"
else
BAREMETAL_VM_ACCELERATOR_MODE="tcg"
fi
configure_local_nix_execution
require_cmd curl
require_cmd jq
require_cmd nix
require_cmd python3
require_cmd qemu-img
require_cmd qemu-system-x86_64
require_cmd ssh
require_cmd ssh-keygen
require_cmd ss
ISO_IMAGE="$(resolve_iso_image "$(resolve_store_path ULTRACLOUD_BAREMETAL_ISO_IMAGE 'nixosConfigurations.ultracloud-iso.config.system.build.isoImage')")"
FLAKE_BUNDLE="$(resolve_store_path ULTRACLOUD_BAREMETAL_FLAKE_BUNDLE 'packages.x86_64-linux.ultracloudFlakeBundle')"
CONTROL_TARGET_SYSTEM="$(resolve_store_path ULTRACLOUD_BAREMETAL_CONTROL_TARGET 'nixosConfigurations.baremetal-qemu-control-plane.config.system.build.toplevel')"
WORKER_TARGET_SYSTEM="$(resolve_store_path ULTRACLOUD_BAREMETAL_WORKER_TARGET 'nixosConfigurations.baremetal-qemu-worker.config.system.build.toplevel')"
CONTROL_DISKO_SCRIPT="$(resolve_store_path ULTRACLOUD_BAREMETAL_CONTROL_DISKO_SCRIPT 'nixosConfigurations.baremetal-qemu-control-plane.config.system.build.formatMount')"
WORKER_DISKO_SCRIPT="$(resolve_store_path ULTRACLOUD_BAREMETAL_WORKER_DISKO_SCRIPT 'nixosConfigurations.baremetal-qemu-worker.config.system.build.formatMount')"
CHAINFIRE_BIN="$(resolve_binary ULTRACLOUD_CHAINFIRE_SERVER_BIN chainfire 'packages.x86_64-linux.chainfire-server')"
DEPLOYER_SERVER_BIN="$(resolve_binary ULTRACLOUD_DEPLOYER_SERVER_BIN deployer-server 'packages.x86_64-linux.deployer-server')"
DEPLOYER_CTL_BIN="$(resolve_binary ULTRACLOUD_DEPLOYER_CTL_BIN deployer-ctl 'packages.x86_64-linux.deployer-ctl')"
OVMF_CODE_FD="$(resolve_ovmf_firmware ULTRACLOUD_OVMF_CODE 'FV/OVMF_CODE.fd')"
OVMF_VARS_TEMPLATE="$(resolve_ovmf_firmware ULTRACLOUD_OVMF_VARS 'FV/OVMF_VARS.fd')"
QEMU_BIN="${ULTRACLOUD_QEMU_BIN:-$(command -v qemu-system-x86_64)}"
QEMU_IMG_BIN="${ULTRACLOUD_QEMU_IMG_BIN:-$(command -v qemu-img)}"
if [[ -n "${ULTRACLOUD_BAREMETAL_STATE_DIR:-}" ]]; then
TMP_DIR="$ULTRACLOUD_BAREMETAL_STATE_DIR"
KEEP_STATE_DIR=1
mkdir -p "$TMP_DIR"
find "$TMP_DIR" -mindepth 1 -maxdepth 1 \
! -name nix-cache \
-exec rm -rf {} +
else
TMP_DIR="${DEFAULT_WORK_ROOT}/baremetal-iso"
KEEP_STATE_DIR=1
mkdir -p "$TMP_DIR"
find "$TMP_DIR" -mindepth 1 -maxdepth 1 \
! -name nix-cache \
-exec rm -rf {} +
fi
export TMPDIR="${TMPDIR:-${DEFAULT_WORK_ROOT}/tmp}"
export XDG_CACHE_HOME="${XDG_CACHE_HOME:-${DEFAULT_WORK_ROOT}/xdg-cache}"
mkdir -p "$TMPDIR"
mkdir -p "$XDG_CACHE_HOME"
NIX_CACHE_DIR="$TMP_DIR/nix-cache"
CONTROL_LOG="$TMP_DIR/control-plane.serial.log"
WORKER_LOG="$TMP_DIR/worker.serial.log"
DEPLOYER_LOG="$TMP_DIR/deployer.log"
CHAINFIRE_LOG="$TMP_DIR/chainfire.log"
NIX_CACHE_LOG="$TMP_DIR/nix-cache.log"
trap cleanup EXIT
SSH_KEY="$TMP_DIR/id_ed25519"
ssh-keygen -q -t ed25519 -N "" -f "$SSH_KEY" >/dev/null
SSH_PUBKEY="$(tr -d '\n' <"$SSH_KEY.pub")"
capture_environment
assert_port_free 2379
assert_port_free 8081
assert_port_free 8088
assert_port_free 8090
assert_port_free "$CONTROL_SSH_PORT"
assert_port_free "$WORKER_SSH_PORT"
start_binary_cache
start_host_services
apply_cluster_state
launch_iso_vm \
"ultracloud-baremetal-control-plane" \
"$CONTROL_NODE_ID" \
"$CONTROL_SSH_PORT" \
"$CONTROL_DHCP_START" \
"52:54:00:11:22:31" \
"$CONTROL_DISK_SERIAL" \
"$CONTROL_DISK_GIB" \
"$TMP_DIR/control-plane.qcow2" \
"$CONTROL_LOG"
verify_node \
"$CONTROL_NODE_ID" \
"$CONTROL_SSH_PORT" \
"$TMP_DIR/control-plane.qcow2" \
"$CONTROL_LOG" \
"control-plane" \
"$CONTROL_TARGET_SYSTEM" \
"$CONTROL_NIXOS_CONFIGURATION" \
"$CONTROL_NODE_CLASS" \
"$CONTROL_DISKO_CONFIG_PATH" \
"$CONTROL_TARGET_DISK_BY_ID" \
"$CONTROL_HEALTH_CHECK_PATH" \
"$CONTROL_DHCP_START" \
"52:54:00:11:22:31" \
"$CONTROL_DISK_SERIAL"
launch_iso_vm \
"ultracloud-baremetal-worker" \
"$WORKER_NODE_ID" \
"$WORKER_SSH_PORT" \
"$WORKER_DHCP_START" \
"52:54:00:11:22:32" \
"$WORKER_DISK_SERIAL" \
"$WORKER_DISK_GIB" \
"$TMP_DIR/worker.qcow2" \
"$WORKER_LOG"
verify_node \
"$WORKER_NODE_ID" \
"$WORKER_SSH_PORT" \
"$TMP_DIR/worker.qcow2" \
"$WORKER_LOG" \
"worker" \
"$WORKER_TARGET_SYSTEM" \
"$WORKER_NIXOS_CONFIGURATION" \
"$WORKER_NODE_CLASS" \
"$WORKER_DISKO_CONFIG_PATH" \
"$WORKER_TARGET_DISK_BY_ID" \
"$WORKER_HEALTH_CHECK_PATH" \
"$WORKER_DHCP_START" \
"52:54:00:11:22:32" \
"$WORKER_DISK_SERIAL"
log "Canonical ISO bare-metal QEMU verification succeeded"
}
main "$@"